Commit bf9e9b81 authored by Vitaliy Lyudvichenko's avatar Vitaliy Lyudvichenko

Adding of OCL and public interface for Softmax layer

parent d8507fef
...@@ -260,6 +260,13 @@ namespace dnn ...@@ -260,6 +260,13 @@ namespace dnn
static Ptr<PoolingLayer> create(int type = MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0)); static Ptr<PoolingLayer> create(int type = MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
}; };
class CV_EXPORTS_W SoftmaxLayer : public Layer
{
public:
static Ptr<SoftmaxLayer> create(int axis = 1);
};
//! @} //! @}
//! @} //! @}
......
...@@ -77,7 +77,7 @@ void initModule() ...@@ -77,7 +77,7 @@ void initModule()
return; return;
REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer) REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer)
REG_RUNTIME_LAYER_CLASS(Softmax, SoftMaxLayer) REG_STATIC_LAYER_FUNC(Softmax, createSoftmaxLayerFromCaffe)
REG_RUNTIME_LAYER_CLASS(Split, SplitLayer) REG_RUNTIME_LAYER_CLASS(Split, SplitLayer)
REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer) REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer)
REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer) REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer)
......
...@@ -269,7 +269,7 @@ Ptr<PoolingLayer> PoolingLayer::create(int type, Size kernel, Size stride, Size ...@@ -269,7 +269,7 @@ Ptr<PoolingLayer> PoolingLayer::create(int type, Size kernel, Size stride, Size
Ptr<Layer> createPoolingLayerFromCaffe(LayerParams &params) Ptr<Layer> createPoolingLayerFromCaffe(LayerParams &params)
{ {
int type; int type;
Size kernel, pad, stride; Size kernel, stride, pad;
if (params.has("pool")) if (params.has("pool"))
{ {
......
...@@ -42,6 +42,8 @@ ...@@ -42,6 +42,8 @@
#include "../precomp.hpp" #include "../precomp.hpp"
#include "layers_common.hpp" #include "layers_common.hpp"
#include "softmax_layer.hpp" #include "softmax_layer.hpp"
#include <opencv2/core/ocl.hpp>
#include "modules/dnn/opencl_kernels_dnn.hpp"
#include <algorithm> #include <algorithm>
#include <stdlib.h> #include <stdlib.h>
using std::max; using std::max;
...@@ -50,95 +52,179 @@ namespace cv ...@@ -50,95 +52,179 @@ namespace cv
{ {
namespace dnn namespace dnn
{ {
//TODO: set default axis number to 1, and add custom shape length in FullyConnected
SoftMaxLayer::SoftMaxLayer(LayerParams &params) : Layer(params) SoftMaxLayerImpl::SoftMaxLayerImpl(int axis)
{
axisRaw = axis;
}
void SoftMaxLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
axis = inputs[0]->canonicalAxis(axisRaw);
useOpenCL = ocl::useOpenCL();
BlobShape shape = inputs[0]->shape();
outerSize = shape.total(0, axis);
channels = shape[axis];
innerSize = shape.total(axis + 1);
int allocFlag = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
shape[axis] = 1;
buf.create(shape, inputs[0]->type(), allocFlag);
outputs.resize(1);
outputs[0].create(inputs[0]->shape(), inputs[0]->type(), allocFlag);
}
void SoftMaxLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
Blob &src = *inputs[0];
Blob &dst = outputs[0];
if (!useOpenCL)
forward_cpu(src, dst);
else
{ {
//hotfix!!! CV_Assert(forward_ocl(src, dst));
axis_ = params.get<int>("axis", 1);
} }
}
void SoftMaxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs) #ifdef HAVE_OPENCL
{ bool SoftMaxLayerImpl::forward_ocl(Blob &src, Blob &dst)
CV_Assert(inputs.size() == 1); {
axis = inputs[0]->canonicalAxis(axis_); const UMat &srcMat = src.umatRefConst();
UMat &dstMat = dst.umatRef();
srcMat.copyTo(dstMat);
UMat &bufMat = buf.umatRef();
CV_Assert(dstMat.offset == 0);
BlobShape shape = inputs[0]->shape(); String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
outputs.resize(1); ocl::Kernel kmax, ksub, ksum, kdiv;
outputs[0].create(shape);
shape[axis] = 1; if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
maxAggregator.create(shape); return false;
}
void SoftMaxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs) if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
{ return false;
Blob &src = *inputs[0];
Blob &dst = outputs[0];
float *srcPtr = src.ptrf(); if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
float *dstPtr = dst.ptrf(); return false;
float *bufPtr = maxAggregator.ptrf();
size_t outerSize = src.total(0, axis); if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
size_t channels = src.size(axis); return false;
size_t innerSize = src.total(axis + 1);
size_t outerStep = src.total(axis); size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t cnStep = src.total(axis + 1); size_t bufSize = buf.total();
size_t totalSize = src.total();
//compute max along axis kmax.args((int)outerSize, (int)channels, (int)innerSize,
for (size_t outerDim = 0; outerDim < outerSize; outerDim++) ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
{ if (!kmax.run(1, &bufSize, &wgSize, true))
size_t srcOffset = outerDim * outerStep; return false;
size_t bufOffset = outerDim * cnStep;
ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
if (!ksub.run(1, &totalSize, &wgSize, true))
return false;
cv::exp(dstMat, dstMat);
memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float)); ksum.args((int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
if (!ksum.run(1, &bufSize, &wgSize, true))
return false;
for (size_t cnDim = 1; cnDim < channels; cnDim++) kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
{ ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
for (size_t i = 0; i < innerSize; i++) if (!kdiv.run(1, &totalSize, &wgSize, true))
bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]); return false;
}
return true;
}
#else
bool SoftMaxLayerImpl::forward_ocl(Blob&, Blob&)
{
return false;
}
#endif
void SoftMaxLayerImpl::forward_cpu(Blob &src, Blob &dst)
{
CV_Assert(src.type() == CV_32F);
float *srcPtr = src.ptrf();
float *dstPtr = dst.ptrf();
float *bufPtr = buf.ptrf();
size_t outerStep = src.total(axis);
size_t cnStep = src.total(axis + 1);
//compute max along axis
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
for (size_t cnDim = 1; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
} }
}
//subtract max
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
//subtract max for (size_t cnDim = 0; cnDim < channels; cnDim++)
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{ {
size_t srcOffset = outerDim * outerStep; for (size_t i = 0; i < innerSize; i++)
size_t bufOffset = outerDim * cnStep; dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i];
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i];
}
} }
}
cv::exp(dst.matRef(), dst.matRef()); cv::exp(dst.matRef(), dst.matRef());
for (size_t outerDim = 0; outerDim < outerSize; outerDim++) for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
//sum exp along axis
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = 0.f;
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{ {
size_t srcOffset = outerDim * outerStep; for (size_t i = 0; i < innerSize; i++)
size_t bufOffset = outerDim * cnStep; bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i];
}
//sum exp along axis //divide by computed sum
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++) for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = 0.f; dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i];
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i];
}
//divide by computed sum
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i];
}
} }
} }
}
Ptr<SoftmaxLayer> SoftmaxLayer::create(int axis)
{
return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(axis));
}
Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams &params)
{
int axis = params.get<int>("axis", 1);
return Ptr<Layer>(SoftmaxLayer::create(axis));
}
} }
} }
...@@ -42,21 +42,33 @@ ...@@ -42,21 +42,33 @@
#ifndef __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__ #ifndef __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__ #define __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
#include "../precomp.hpp" #include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
class SoftMaxLayer : public Layer
{ class SoftMaxLayerImpl : public SoftmaxLayer
int axis_, axis; {
Blob maxAggregator; int axis, axisRaw;
Blob buf;
public: bool useOpenCL;
SoftMaxLayer(LayerParams &params); size_t outerSize, channels, innerSize;
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
}; bool forward_ocl(Blob &src, Blob &dst);
void forward_cpu(Blob &src, Blob &dst);
public:
SoftMaxLayerImpl(int axis = 1);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams &params);
} }
} }
#endif #endif
/*************************************************************************************
* Copyright (c) 2015, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/
__kernel void kernel_channel_max(const int num, const int channels,
const int spatial_dim, __global const T* data, __global T* out) {
int index = get_global_id(0);
if(index < num * spatial_dim) {
int n = index / spatial_dim;
int s = index % spatial_dim;
T maxval = -FLT_MAX;
for (int c = 0; c < channels; ++c) {
maxval = max(data[(n * channels + c) * spatial_dim + s], maxval);
}
out[index] = maxval;
}
}
__kernel void kernel_channel_subtract(const int count,
const int num, const int channels,
const int spatial_dim, __global const T* channel_max, __global T* data) {
int index = get_global_id(0);
if(index < count) {
int n = index / channels / spatial_dim;
int s = index % spatial_dim;
data[index] -= channel_max[n * spatial_dim + s];
}
}
__kernel void kernel_channel_sum(const int num, const int channels,
const int spatial_dim, __global const T* data, __global T* channel_sum) {
int index = get_global_id(0);
if(index < num * spatial_dim) {
int n = index / spatial_dim;
int s = index % spatial_dim;
T sum = 0;
for (int c = 0; c < channels; ++c) {
sum += data[(n * channels + c) * spatial_dim + s];
}
channel_sum[index] = sum;
}
}
__kernel void kernel_channel_div(const int count,
const int num, const int channels,
const int spatial_dim, __global const T* channel_sum, __global T* data) {
int index = get_global_id(0);
if(index < count) {
int n = index / channels / spatial_dim;
int s = index % spatial_dim;
data[index] /= channel_sum[n * spatial_dim + s];
}
}
\ No newline at end of file
...@@ -87,7 +87,7 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool ...@@ -87,7 +87,7 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool
TEST(Layer_Test_Softmax, Accuracy) TEST(Layer_Test_Softmax, Accuracy)
{ {
testLayerUsingCaffeModels("layer_softmax"); OCL_OFF(testLayerUsingCaffeModels("layer_softmax"));
} }
OCL_TEST(Layer_Test_Softmax, Accuracy) OCL_TEST(Layer_Test_Softmax, Accuracy)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment