Commit 78c5e41c authored by Manjunath Bhat's avatar Manjunath Bhat Committed by Alexander Alekhin

Merge pull request #15808 from thebhatman:Mish_swish

* Added Swish and Mish activations

* Fixed whitespace errors

* Kernel implementation done

* Added function for launching kernel

* Changed type of 1.0

* Attempt to add test for Swish and Mish

* Resolving type mismatch for log

* exp from device

* Use log1pexp instead of adding 1

* Added openCL kernels
parent 01a28db9
......@@ -462,6 +462,18 @@ CV__DNN_INLINE_NS_BEGIN
static Ptr<TanHLayer> create(const LayerParams &params);
};
class CV_EXPORTS SwishLayer : public ActivationLayer
{
public:
static Ptr<SwishLayer> create(const LayerParams &params);
};
class CV_EXPORTS MishLayer : public ActivationLayer
{
public:
static Ptr<MishLayer> create(const LayerParams &params);
};
class CV_EXPORTS SigmoidLayer : public ActivationLayer
{
public:
......
......@@ -62,6 +62,43 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
}
}
template <class T, std::size_t N>
__global__ void swish_vec(Span<T> output, View<T> input) {
using vector_type = get_vector_type_t<T, N>;
auto output_vPtr = vector_type::get_pointer(output.data());
auto input_vPtr = vector_type::get_pointer(input.data());
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
vector_type vec;
v_load(vec, input_vPtr[i]);
for (int j = 0; j < vector_type::size(); j++) {
using device::sigmoid;
vec.data[j] = vec.data[j] * sigmoid(vec.data[j]);
}
v_store(output_vPtr[i], vec);
}
}
template <class T, std::size_t N>
__global__ void mish_vec(Span<T> output, View<T> input) {
using vector_type = get_vector_type_t<T, N>;
auto output_vPtr = vector_type::get_pointer(output.data());
auto input_vPtr = vector_type::get_pointer(input.data());
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
vector_type vec;
v_load(vec, input_vPtr[i]);
for (int j = 0; j < vector_type::size(); j++) {
using device::tanh;
using device::log1pexp;
vec.data[j] = vec.data[j] * tanh(log1pexp(vec.data[j]));
}
v_store(output_vPtr[i], vec);
}
}
template <class T, std::size_t N>
__global__ void sigmoid_vec(Span<T> output, View<T> input) {
using vector_type = get_vector_type_t<T, N>;
......@@ -240,6 +277,58 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template void tanh<__half>(const Stream&, Span<__half>, View<__half>);
template void tanh<float>(const Stream&, Span<float>, View<float>);
template <class T, std::size_t N>
void launch_vectorized_swish(const Stream& stream, Span<T> output, View<T> input) {
CV_Assert(is_fully_aligned<T>(output, N));
CV_Assert(is_fully_aligned<T>(input, N));
auto kernel = raw::swish_vec<T, N>;
auto policy = make_policy(kernel, output.size() / N, 0, stream);
launch_kernel(kernel, policy, output, input);
}
template <class T>
void swish(const Stream& stream, Span<T> output, View<T> input) {
CV_Assert(input.size() == output.size());
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4)) {
launch_vectorized_swish<T, 4>(stream, output, input);
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2)) {
launch_vectorized_swish<T, 2>(stream, output, input);
} else {
launch_vectorized_swish<T, 1>(stream, output, input);
}
}
template void swish<__half>(const Stream&, Span<__half>, View<__half>);
template void swish<float>(const Stream&, Span<float>, View<float>);
template <class T, std::size_t N>
void launch_vectorized_mish(const Stream& stream, Span<T> output, View<T> input) {
CV_Assert(is_fully_aligned<T>(output, N));
CV_Assert(is_fully_aligned<T>(input, N));
auto kernel = raw::mish_vec<T, N>;
auto policy = make_policy(kernel, output.size() / N, 0, stream);
launch_kernel(kernel, policy, output, input);
}
template <class T>
void mish(const Stream& stream, Span<T> output, View<T> input) {
CV_Assert(input.size() == output.size());
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4)) {
launch_vectorized_mish<T, 4>(stream, output, input);
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2)) {
launch_vectorized_mish<T, 2>(stream, output, input);
} else {
launch_vectorized_mish<T, 1>(stream, output, input);
}
}
template void mish<__half>(const Stream&, Span<__half>, View<__half>);
template void mish<float>(const Stream&, Span<float>, View<float>);
template <class T, std::size_t N>
void launch_vectorized_sigmoid(const Stream& stream, Span<T> output, View<T> input) {
CV_Assert(is_fully_aligned<T>(output, N));
......
......@@ -18,6 +18,12 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template <class T>
void tanh(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
template <class T>
void swish(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
template <class T>
void mish(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
template <class T>
void sigmoid(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
......
......@@ -143,6 +143,62 @@ namespace cv { namespace dnn { namespace cuda4dnn {
csl::Stream stream;
};
template <class T>
class SwishOp final : public CUDABackendNode {
public:
using wrapper_type = GetCUDABackendWrapperType<T>;
SwishOp(csl::Stream stream_) : stream(std::move(stream_)) { }
void forward(
const std::vector<cv::Ptr<BackendWrapper>>& inputs,
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
csl::Workspace& workspace) override
{
for (int i = 0; i < inputs.size(); i++)
{
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
auto input = input_wrapper->getView();
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
auto output = output_wrapper->getSpan();
kernels::swish<T>(stream, output, input);
}
}
private:
csl::Stream stream;
};
template <class T>
class MishOp final : public CUDABackendNode {
public:
using wrapper_type = GetCUDABackendWrapperType<T>;
MishOp(csl::Stream stream_) : stream(std::move(stream_)) { }
void forward(
const std::vector<cv::Ptr<BackendWrapper>>& inputs,
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
csl::Workspace& workspace) override
{
for (int i = 0; i < inputs.size(); i++)
{
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
auto input = input_wrapper->getView();
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
auto output = output_wrapper->getSpan();
kernels::mish<T>(stream, output, input);
}
}
private:
csl::Stream stream;
};
template <class T>
class SigmoidOp final : public CUDABackendNode {
public:
......
......@@ -103,6 +103,8 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(PReLU, ChannelsPReLULayer);
CV_DNN_REGISTER_LAYER_CLASS(Sigmoid, SigmoidLayer);
CV_DNN_REGISTER_LAYER_CLASS(TanH, TanHLayer);
CV_DNN_REGISTER_LAYER_CLASS(Swish, SwishLayer);
CV_DNN_REGISTER_LAYER_CLASS(Mish, MishLayer);
CV_DNN_REGISTER_LAYER_CLASS(ELU, ELULayer);
CV_DNN_REGISTER_LAYER_CLASS(BNLL, BNLLLayer);
CV_DNN_REGISTER_LAYER_CLASS(AbsVal, AbsLayer);
......
......@@ -613,6 +613,184 @@ struct TanHFunctor
int64 getFLOPSPerElement() const { return 1; }
};
struct SwishFunctor
{
typedef SwishLayer Layer;
bool supportBackend(int backendId, int)
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_HALIDE;
}
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
{
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
{
for( int i = 0; i < len; i++ )
{
float x = srcptr[i];
dstptr[i] = x / (1.0f + exp(-x));
}
}
}
#ifdef HAVE_OPENCL
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
String buildopt = oclGetTMacro(inputs[0]);
for (size_t i = 0; i < inputs.size(); i++)
{
UMat& src = inputs[i];
UMat& dst = outputs[i];
ocl::Kernel kernel("SwishForward", ocl::dnn::activations_oclsrc, buildopt);
kernel.set(0, (int)src.total());
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t gSize = src.total();
CV_Assert(kernel.run(1, &gSize, NULL, false));
}
return true;
}
#endif
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
{
return make_cuda_node<cuda4dnn::SwishOp>(target, stream);
}
#endif
#ifdef HAVE_HALIDE
void attachHalide(const Halide::Expr& input, Halide::Func& top)
{
Halide::Var x("x"), y("y"), c("c"), n("n");
top(x, y, c, n) = input / (1.0f + exp(-input));
}
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
CV_Error(Error::StsNotImplemented, "");
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_VULKAN
std::shared_ptr<vkcom::OpBase> initVkCom()
{
// TODO: add vkcom implementation
return std::shared_ptr<vkcom::OpBase>();
}
#endif // HAVE_VULKAN
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
void getScaleShift(Mat&, Mat&) const {}
int64 getFLOPSPerElement() const { return 3; }
};
struct MishFunctor
{
typedef MishLayer Layer;
bool supportBackend(int backendId, int)
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_HALIDE;
}
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
{
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
{
for( int i = 0; i < len; i++ )
{
float x = srcptr[i];
dstptr[i] = x * tanh(log(1.0f + exp(x)));
}
}
}
#ifdef HAVE_OPENCL
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
String buildopt = oclGetTMacro(inputs[0]);
for (size_t i = 0; i < inputs.size(); i++)
{
UMat& src = inputs[i];
UMat& dst = outputs[i];
ocl::Kernel kernel("MishForward", ocl::dnn::activations_oclsrc, buildopt);
kernel.set(0, (int)src.total());
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t gSize = src.total();
CV_Assert(kernel.run(1, &gSize, NULL, false));
}
return true;
}
#endif
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
{
return make_cuda_node<cuda4dnn::MishOp>(target, stream);
}
#endif
#ifdef HAVE_HALIDE
void attachHalide(const Halide::Expr& input, Halide::Func& top)
{
Halide::Var x("x"), y("y"), c("c"), n("n");
top(x, y, c, n) = input * tanh(log(1.0f + exp(input)));
}
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
CV_Error(Error::StsNotImplemented, "");
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_VULKAN
std::shared_ptr<vkcom::OpBase> initVkCom()
{
// TODO: add vkcom implementation
return std::shared_ptr<vkcom::OpBase>();
}
#endif // HAVE_VULKAN
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
void getScaleShift(Mat&, Mat&) const {}
int64 getFLOPSPerElement() const { return 3; }
};
struct SigmoidFunctor
{
typedef SigmoidLayer Layer;
......@@ -1292,6 +1470,22 @@ Ptr<TanHLayer> TanHLayer::create(const LayerParams& params)
return l;
}
Ptr<SwishLayer> SwishLayer::create(const LayerParams& params)
{
Ptr<SwishLayer> l(new ElementWiseLayer<SwishFunctor>());
l->setParamsFrom(params);
return l;
}
Ptr<MishLayer> MishLayer::create(const LayerParams& params)
{
Ptr<MishLayer> l(new ElementWiseLayer<MishFunctor>());
l->setParamsFrom(params);
return l;
}
Ptr<SigmoidLayer> SigmoidLayer::create(const LayerParams& params)
{
Ptr<SigmoidLayer> l(new ElementWiseLayer<SigmoidFunctor>());
......
......@@ -95,6 +95,18 @@ __kernel void SigmoidForward(const int count, __global const T* in, __global T*
out[index] = 1.0f / (1.0f + exp(-in[index]));
}
__kernel void SwishForward(const int count, __global const T* in, __global T* out) {
int index = get_global_id(0);
if(index < count)
out[index] = in[index] / (1.0f + exp(-in[index]));
}
__kernel void MishForward(const int count, __global const T* in, __global T* out) {
int index = get_global_id(0);
if(index < count)
out[index] = in[index] * tanh(log(1.0f + exp(in[index])));
}
__kernel void BNLLForward(const int n, __global const T* in, __global T* out) {
int index = get_global_id(0);
if (index < n) {
......
......@@ -583,7 +583,7 @@ TEST_P(NoParamActivation, Accuracy)
testInPlaceActivation(lp, backendId, targetId);
}
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, NoParamActivation, Combine(
/*type*/ Values("TanH", "Sigmoid", "AbsVal", "BNLL"),
/*type*/ Values("TanH", "Sigmoid", "AbsVal", "BNLL", "Swish", "Mish"),
dnnBackendsAndTargetsWithHalide()
));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment