Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
7b12cbd7
Commit
7b12cbd7
authored
Dec 29, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #16254 from YashasSamaga:cuda4dnn-cudnn-optm-conv_bias_relu
parents
d76f2456
17a35587
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
281 additions
and
57 deletions
+281
-57
activation.hpp
modules/dnn/src/cuda4dnn/csl/cudnn/activation.hpp
+80
-0
convolution.hpp
modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp
+88
-0
tensor_ops.hpp
modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp
+30
-2
convolution.hpp
modules/dnn/src/cuda4dnn/primitives/convolution.hpp
+83
-55
No files found.
modules/dnn/src/cuda4dnn/csl/cudnn/activation.hpp
0 → 100644
View file @
7b12cbd7
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_ACTIVATION_HPP
#define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_ACTIVATION_HPP
#include <cudnn.h>
namespace
cv
{
namespace
dnn
{
namespace
cuda4dnn
{
namespace
csl
{
namespace
cudnn
{
class
ActivationDescriptor
{
public
:
enum
class
ActivationType
{
IDENTITY
,
RELU
,
CLIPPED_RELU
,
TANH
,
SIGMOID
,
ELU
};
ActivationDescriptor
()
noexcept
:
descriptor
{
nullptr
}
{
}
ActivationDescriptor
(
const
ActivationDescriptor
&
)
=
delete
;
ActivationDescriptor
(
ActivationDescriptor
&&
other
)
noexcept
:
descriptor
{
other
.
descriptor
}
{
other
.
descriptor
=
nullptr
;
}
/* `relu_ceiling_or_elu_alpha`:
* - `alpha` coefficient in ELU activation
* - `ceiling` for CLIPPED_RELU activation
*/
ActivationDescriptor
(
ActivationType
type
,
double
relu_ceiling_or_elu_alpha
=
0.0
)
{
CUDA4DNN_CHECK_CUDNN
(
cudnnCreateActivationDescriptor
(
&
descriptor
));
try
{
const
auto
mode
=
[
type
]
{
switch
(
type
)
{
case
ActivationType
:
:
IDENTITY
:
return
CUDNN_ACTIVATION_IDENTITY
;
case
ActivationType
:
:
RELU
:
return
CUDNN_ACTIVATION_RELU
;
case
ActivationType
:
:
CLIPPED_RELU
:
return
CUDNN_ACTIVATION_CLIPPED_RELU
;
case
ActivationType
:
:
SIGMOID
:
return
CUDNN_ACTIVATION_SIGMOID
;
case
ActivationType
:
:
TANH
:
return
CUDNN_ACTIVATION_TANH
;
case
ActivationType
:
:
ELU
:
return
CUDNN_ACTIVATION_ELU
;
}
CV_Assert
(
0
);
return
CUDNN_ACTIVATION_IDENTITY
;
}
();
CUDA4DNN_CHECK_CUDNN
(
cudnnSetActivationDescriptor
(
descriptor
,
mode
,
CUDNN_NOT_PROPAGATE_NAN
,
relu_ceiling_or_elu_alpha
));
}
catch
(...)
{
/* cudnnDestroyActivationDescriptor will not fail for a valid descriptor object */
CUDA4DNN_CHECK_CUDNN
(
cudnnDestroyActivationDescriptor
(
descriptor
));
throw
;
}
}
~
ActivationDescriptor
()
noexcept
{
if
(
descriptor
!=
nullptr
)
{
/* cudnnDestroyActivationDescriptor will not fail */
CUDA4DNN_CHECK_CUDNN
(
cudnnDestroyActivationDescriptor
(
descriptor
));
}
}
ActivationDescriptor
&
operator
=
(
const
ActivationDescriptor
&
)
=
delete
;
ActivationDescriptor
&
operator
=
(
ActivationDescriptor
&&
other
)
noexcept
{
descriptor
=
other
.
descriptor
;
other
.
descriptor
=
nullptr
;
return
*
this
;
};
cudnnActivationDescriptor_t
get
()
const
noexcept
{
return
descriptor
;
}
private
:
cudnnActivationDescriptor_t
descriptor
;
};
}}}}}
/* namespace cv::dnn::cuda4dnn::csl::cudnn */
#endif
/* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_ACTIVATION_HPP */
modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp
View file @
7b12cbd7
...
...
@@ -6,6 +6,7 @@
#define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_CONVOLUTION_HPP
#include "cudnn.hpp"
#include "activation.hpp"
#include "../pointer.hpp"
#include "../workspace.hpp"
...
...
@@ -405,6 +406,93 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu
);
}
/** @brief performs convolution, bias addition and activation simultaneously
*
* dstValue = act(alpha * conv(input) + bias)
*
* @tparam T convolution element type (must be `half` or `float`)
*
* @param handle valid cuDNN Handle
* @param convDesc convolution description
* @param convAlgo algorithm to use for convolution
* @param workspace workspace memory which meets the requirements of \p convAlgo
* @param filterDesc filter descriptor
* @param[in] filterPtr pointer to device memory containing the filters
* @param alpha convolution scale factor
* @param inputDesc tensor descriptor describing the input
* @param[in] inputPtr pointer to input tensor in device memory
* @param biasDesc tensor descriptor describing the bias
* @param[in] biasPtr pointer to bias tensor in device memory
* @param actDesc activation descriptor
* @param outputDesc tensor descriptor describing the output
* @param[out] outputPtr pointer to output tensor in device memory
*
* Exception Guarantee: Basic
*/
template
<
class
T
>
void
convolve_with_bias_activation
(
const
Handle
&
handle
,
T
alpha
,
const
ConvolutionDescriptor
<
T
>&
convDesc
,
const
ConvolutionAlgorithm
<
T
>&
convAlgo
,
WorkspaceInstance
workspace
,
const
FilterDescriptor
<
T
>&
filterDesc
,
DevicePtr
<
const
T
>
filterPtr
,
const
TensorDescriptor
<
T
>&
inputDesc
,
DevicePtr
<
const
T
>
inputPtr
,
const
TensorDescriptor
<
T
>&
biasDesc
,
DevicePtr
<
const
T
>
biasPtr
,
const
ActivationDescriptor
&
actDesc
,
const
TensorDescriptor
<
T
>&
outputDesc
,
DevicePtr
<
T
>
outputPtr
)
{
CV_Assert
(
handle
);
T
alpha2
=
0.0
;
CUDA4DNN_CHECK_CUDNN
(
cudnnConvolutionBiasActivationForward
(
handle
.
get
(),
&
alpha
,
inputDesc
.
get
(),
inputPtr
.
get
(),
filterDesc
.
get
(),
filterPtr
.
get
(),
convDesc
.
get
(),
convAlgo
.
get
(),
static_cast
<
void
*>
(
workspace
.
get
()),
workspace
.
size_in_bytes
(),
&
alpha2
,
outputDesc
.
get
(),
outputPtr
.
get
(),
biasDesc
.
get
(),
biasPtr
.
get
(),
actDesc
.
get
(),
outputDesc
.
get
(),
outputPtr
.
get
()));
}
template
<>
inline
void
convolve_with_bias_activation
(
const
Handle
&
handle
,
half
alpha
,
const
ConvolutionDescriptor
<
half
>&
convDesc
,
const
ConvolutionAlgorithm
<
half
>&
convAlgo
,
WorkspaceInstance
workspace
,
const
FilterDescriptor
<
half
>&
filterDesc
,
DevicePtr
<
const
half
>
filterPtr
,
const
TensorDescriptor
<
half
>&
inputDesc
,
DevicePtr
<
const
half
>
inputPtr
,
const
TensorDescriptor
<
half
>&
biasDesc
,
DevicePtr
<
const
half
>
biasPtr
,
const
ActivationDescriptor
&
actDesc
,
const
TensorDescriptor
<
half
>&
outputDesc
,
DevicePtr
<
half
>
outputPtr
)
{
CV_Assert
(
handle
);
float
alpha_
=
alpha
,
alpha2
=
0.0
;
CUDA4DNN_CHECK_CUDNN
(
cudnnConvolutionBiasActivationForward
(
handle
.
get
(),
&
alpha_
,
inputDesc
.
get
(),
inputPtr
.
get
(),
filterDesc
.
get
(),
filterPtr
.
get
(),
convDesc
.
get
(),
convAlgo
.
get
(),
static_cast
<
void
*>
(
workspace
.
get
()),
workspace
.
size_in_bytes
(),
&
alpha2
,
outputDesc
.
get
(),
outputPtr
.
get
(),
biasDesc
.
get
(),
biasPtr
.
get
(),
actDesc
.
get
(),
outputDesc
.
get
(),
outputPtr
.
get
()));
}
}}}}}
/* namespace cv::dnn::cuda4dnn::csl::cudnn */
#endif
/* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_CONVOLUTION_HPP */
modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp
View file @
7b12cbd7
...
...
@@ -135,17 +135,23 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
using
FilterDescriptor
=
cudnn
::
FilterDescriptor
<
T
>
;
using
ConvolutionDescriptor
=
cudnn
::
ConvolutionDescriptor
<
T
>
;
using
ConvolutionAlgorithm
=
cudnn
::
ConvolutionAlgorithm
<
T
>
;
using
ActivationDescriptor
=
cudnn
::
ActivationDescriptor
;
public
:
using
ActivationType
=
ActivationDescriptor
::
ActivationType
;
struct
params_type
{
/* convolution */
std
::
vector
<
std
::
size_t
>
input_shape
;
std
::
vector
<
std
::
size_t
>
filter_shape
;
std
::
vector
<
std
::
size_t
>
padding
;
std
::
vector
<
std
::
size_t
>
stride
;
std
::
vector
<
std
::
size_t
>
dilation
;
std
::
size_t
groups
;
/* bias and activation (only RELU supported) */
std
::
vector
<
std
::
size_t
>
bias_shape
;
ActivationType
activation_type
;
/* MUST BE identity if there is no bias and ReLU if there is bias */
};
Convolution
()
=
default
;
...
...
@@ -158,6 +164,14 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
filterDesc
=
FilterDescriptor
(
params
.
filter_shape
);
convDesc
=
ConvolutionDescriptor
(
params
.
padding
,
params
.
stride
,
params
.
dilation
,
params
.
groups
);
if
(
!
params
.
bias_shape
.
empty
())
{
CV_Assert
(
params
.
activation_type
==
ActivationType
::
RELU
);
biasTensorDesc
=
TensorDescriptor
(
params
.
bias_shape
);
activationDesc
=
ActivationDescriptor
(
params
.
activation_type
,
0.0
);
}
else
{
CV_Assert
(
params
.
activation_type
==
ActivationType
::
IDENTITY
);
}
std
::
vector
<
int
>
output_dims
;
getConvolutionForwardOutputDim
(
convDesc
,
filterDesc
,
inputTensorDesc
,
output_dims
);
outputTensorDesc
=
TensorDescriptor
(
output_dims
);
...
...
@@ -182,12 +196,26 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
);
}
void
convolve_with_bias_activation
(
TensorSpan
<
T
>
output
,
TensorView
<
T
>
input
,
TensorView
<
T
>
filters
,
TensorView
<
T
>
bias
,
WorkspaceInstance
scratchpad
)
{
cudnn
::
convolve_with_bias_activation
<
T
>
(
cudnnHandle
,
1.0
,
convDesc
,
algo
,
scratchpad
,
filterDesc
,
filters
.
get
(),
inputTensorDesc
,
input
.
get
(),
biasTensorDesc
,
bias
.
get
(),
activationDesc
,
outputTensorDesc
,
output
.
get
()
);
}
private
:
cudnn
::
Handle
cudnnHandle
;
TensorDescriptor
inputTensorDesc
,
outputTensorDesc
;
FilterDescriptor
filterDesc
;
ConvolutionDescriptor
convDesc
;
ConvolutionAlgorithm
algo
;
TensorDescriptor
biasTensorDesc
;
ActivationDescriptor
activationDesc
;
};
template
<
class
T
>
...
...
modules/dnn/src/cuda4dnn/primitives/convolution.hpp
View file @
7b12cbd7
...
...
@@ -204,6 +204,21 @@ namespace cv { namespace dnn { namespace cuda4dnn {
params
.
dilation
=
dilations
;
params
.
groups
=
config
.
groups
;
/* check if we can perform fused convolution using cudnn */
params
.
activation_type
=
csl
::
Convolution
<
T
>::
ActivationType
::
IDENTITY
;
fusion_location
=
InternalFusionLocation
::
NATIVE
;
if
(
!
biasTensor
.
empty
()
&&
biasTensor
.
size
()
==
output_feature_maps
&&
/* cuDNN requirement */
config
.
activation_type
==
ConvolutionConfiguration
::
ActivationType
::
RELU
&&
config
.
relu_negative_slope
==
0.0
)
{
fusion_location
=
InternalFusionLocation
::
CUDNN
;
auto
bias_shape
=
std
::
vector
<
std
::
size_t
>
(
rank
,
1
);
bias_shape
[
1
]
=
output_feature_maps
;
params
.
bias_shape
=
bias_shape
;
params
.
activation_type
=
csl
::
Convolution
<
T
>::
ActivationType
::
RELU
;
}
convoluter
=
csl
::
Convolution
<
T
>
(
cudnnHandle
,
params
);
activation
=
config
.
activation_type
;
...
...
@@ -216,7 +231,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
activation
=
ConvolutionConfiguration
::
ActivationType
::
IDENTITY
;
csl
::
WorkspaceBuilder
builder
;
if
(
!
transformed_shape
.
empty
())
{
if
(
!
transformed_shape
.
empty
())
{
auto
&
shape
=
transformed_shape
;
auto
sz
=
std
::
accumulate
(
std
::
begin
(
shape
),
std
::
end
(
shape
),
1
,
std
::
multiplies
<
std
::
size_t
>
());
builder
.
require
<
T
>
(
sz
);
...
...
@@ -248,65 +264,72 @@ namespace cv { namespace dnn { namespace cuda4dnn {
auto
output_wrapper
=
outputs
[
0
].
dynamicCast
<
wrapper_type
>
();
auto
output
=
output_wrapper
->
getSpan
();
convoluter
.
convolve
(
output
,
input
,
filtersTensor
,
allocator
.
get_instance
());
if
(
!
biasTensor
.
empty
())
if
(
fusion_location
==
InternalFusionLocation
::
CUDNN
)
{
std
::
size_t
inner_size
=
output
.
size_range
(
2
,
output
.
rank
());
switch
(
activation
)
{
case
ConvolutionConfiguration
:
:
ActivationType
::
IDENTITY
:
kernels
::
biasN
<
T
>
(
stream
,
output
,
output
,
inner_size
,
biasTensor
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
RELU
:
kernels
::
biasN_relu_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
,
relu_negative_slope
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
CLIPPED_RELU
:
kernels
::
biasN_clipped_relu_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
,
crelu_floor
,
crelu_ceil
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
POWER
:
kernels
::
biasN_power_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
,
power_exp
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
TANH
:
kernels
::
biasN_tanh_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
SIGMOID
:
kernels
::
biasN_sigmoid_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
SWISH
:
kernels
::
biasN_swish_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
MISH
:
kernels
::
biasN_mish_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
);
break
;
}
convoluter
.
convolve_with_bias_activation
(
output
,
input
,
filtersTensor
,
biasTensor
,
allocator
.
get_instance
());
}
else
{
switch
(
activation
)
convoluter
.
convolve
(
output
,
input
,
filtersTensor
,
allocator
.
get_instance
());
if
(
!
biasTensor
.
empty
())
{
case
ConvolutionConfiguration
:
:
ActivationType
::
IDENTITY
:
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
RELU
:
kernels
::
relu
<
T
>
(
stream
,
output
,
output
,
relu_negative_slope
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
CLIPPED_RELU
:
kernels
::
clipped_relu
<
T
>
(
stream
,
output
,
output
,
crelu_floor
,
crelu_ceil
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
POWER
:
kernels
::
power
<
T
>
(
stream
,
output
,
output
,
power_exp
,
1.0
,
0.0
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
TANH
:
kernels
::
tanh
<
T
>
(
stream
,
output
,
output
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
SIGMOID
:
kernels
::
sigmoid
<
T
>
(
stream
,
output
,
output
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
SWISH
:
kernels
::
swish
<
T
>
(
stream
,
output
,
output
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
MISH
:
kernels
::
mish
<
T
>
(
stream
,
output
,
output
);
break
;
std
::
size_t
inner_size
=
output
.
size_range
(
2
,
output
.
rank
());
switch
(
activation
)
{
case
ConvolutionConfiguration
:
:
ActivationType
::
IDENTITY
:
kernels
::
biasN
<
T
>
(
stream
,
output
,
output
,
inner_size
,
biasTensor
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
RELU
:
kernels
::
biasN_relu_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
,
relu_negative_slope
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
CLIPPED_RELU
:
kernels
::
biasN_clipped_relu_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
,
crelu_floor
,
crelu_ceil
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
POWER
:
kernels
::
biasN_power_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
,
power_exp
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
TANH
:
kernels
::
biasN_tanh_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
SIGMOID
:
kernels
::
biasN_sigmoid_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
SWISH
:
kernels
::
biasN_swish_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
MISH
:
kernels
::
biasN_mish_inplace
<
T
>
(
stream
,
output
,
inner_size
,
biasTensor
);
break
;
}
}
else
{
switch
(
activation
)
{
case
ConvolutionConfiguration
:
:
ActivationType
::
IDENTITY
:
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
RELU
:
kernels
::
relu
<
T
>
(
stream
,
output
,
output
,
relu_negative_slope
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
CLIPPED_RELU
:
kernels
::
clipped_relu
<
T
>
(
stream
,
output
,
output
,
crelu_floor
,
crelu_ceil
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
POWER
:
kernels
::
power
<
T
>
(
stream
,
output
,
output
,
power_exp
,
1.0
,
0.0
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
TANH
:
kernels
::
tanh
<
T
>
(
stream
,
output
,
output
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
SIGMOID
:
kernels
::
sigmoid
<
T
>
(
stream
,
output
,
output
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
SWISH
:
kernels
::
swish
<
T
>
(
stream
,
output
,
output
);
break
;
case
ConvolutionConfiguration
:
:
ActivationType
::
MISH
:
kernels
::
mish
<
T
>
(
stream
,
output
,
output
);
break
;
}
}
}
}
...
...
@@ -326,6 +349,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
ConvolutionConfiguration
::
ActivationType
activation
;
float
relu_negative_slope
,
crelu_floor
,
crelu_ceil
,
power_exp
;
enum
class
InternalFusionLocation
{
CUDNN
,
NATIVE
}
fusion_location
;
};
}}}
/* namespace cv::dnn::cuda4dnn */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment