Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
59b91d6c
Commit
59b91d6c
authored
Feb 13, 2018
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #10821 from dkurt:dnn_layers_fusion
parents
a91a11e5
514e6df4
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
165 additions
and
293 deletions
+165
-293
all_layers.hpp
modules/dnn/include/opencv2/dnn/all_layers.hpp
+0
-1
dnn.hpp
modules/dnn/include/opencv2/dnn/dnn.hpp
+15
-9
dnn.cpp
modules/dnn/src/dnn.cpp
+21
-34
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+105
-231
mvn_layer.cpp
modules/dnn/src/layers/mvn_layer.cpp
+12
-18
scale_layer.cpp
modules/dnn/src/layers/scale_layer.cpp
+6
-0
shift_layer.cpp
modules/dnn/src/layers/shift_layer.cpp
+6
-0
No files found.
modules/dnn/include/opencv2/dnn/all_layers.hpp
View file @
59b91d6c
...
@@ -472,7 +472,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
...
@@ -472,7 +472,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
bool
hasWeights
,
hasBias
;
bool
hasWeights
,
hasBias
;
float
epsilon
;
float
epsilon
;
virtual
void
getScaleShift
(
Mat
&
scale
,
Mat
&
shift
)
const
=
0
;
static
Ptr
<
BatchNormLayer
>
create
(
const
LayerParams
&
params
);
static
Ptr
<
BatchNormLayer
>
create
(
const
LayerParams
&
params
);
};
};
...
...
modules/dnn/include/opencv2/dnn/dnn.hpp
View file @
59b91d6c
...
@@ -281,20 +281,26 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
...
@@ -281,20 +281,26 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
virtual
bool
setActivation
(
const
Ptr
<
ActivationLayer
>&
layer
);
virtual
bool
setActivation
(
const
Ptr
<
ActivationLayer
>&
layer
);
/**
/**
* @brief Tries to attach to the layer the subsequent batch normalization layer, i.e. do the layer fusion in a partial case.
* @brief Try to fuse current layer with a next one
* @param[in] layer The subsequent batch normalization layer.
* @param[in] top Next layer to be fused.
*
* @returns True if fusion was performed.
* Returns true if the batch normalization layer has been attached successfully.
*/
*/
virtual
bool
setBatchNorm
(
const
Ptr
<
BatchNormLayer
>&
layer
);
virtual
bool
tryFuse
(
Ptr
<
Layer
>&
top
);
/**
/**
* @brief Tries to attach to the layer the subsequent scaling layer, i.e. do the layer fusion in a partial case.
* @brief Returns parameters of layers with channel-wise multiplication and addition.
* @param[in] layer The subsequent scaling layer.
* @param[out] scale Channel-wise multipliers. Total number of values should
* be equal to number of channels.
* @param[out] shift Channel-wise offsets. Total number of values should
* be equal to number of channels.
*
*
* Returns true if the scaling layer has been attached successfully.
* Some layers can fuse their transformations with further layers.
* In example, convolution + batch normalization. This way base layer
* use weights from layer after it. Fused layer is skipped.
* By default, @p scale and @p shift are empty that means layer has no
* element-wise multiplications or additions.
*/
*/
virtual
bool
setScale
(
const
Ptr
<
ScaleLayer
>&
layer
)
;
virtual
void
getScaleShift
(
Mat
&
scale
,
Mat
&
shift
)
const
;
/**
/**
* @brief "Deattaches" all the layers, attached to particular layer.
* @brief "Deattaches" all the layers, attached to particular layer.
...
...
modules/dnn/src/dnn.cpp
View file @
59b91d6c
...
@@ -1407,46 +1407,30 @@ struct Net::Impl
...
@@ -1407,46 +1407,30 @@ struct Net::Impl
if
(
ld
.
consumers
.
size
()
==
1
&&
pinsToKeep
.
count
(
LayerPin
(
lid
,
0
))
==
0
)
if
(
ld
.
consumers
.
size
()
==
1
&&
pinsToKeep
.
count
(
LayerPin
(
lid
,
0
))
==
0
)
{
{
LayerData
*
nextData
=
&
layers
[
ld
.
consumers
[
0
].
lid
];
LayerData
*
nextData
=
&
layers
[
ld
.
consumers
[
0
].
lid
];
Ptr
<
BatchNormLayer
>
nextBNormLayer
=
nextData
->
layerInstance
.
dynamicCast
<
BatchNormLayer
>
();
LayerPin
lpNext
(
ld
.
consumers
[
0
].
lid
,
0
);
LayerPin
lpNext
(
ld
.
consumers
[
0
].
lid
,
0
);
if
(
!
nextBNormLayer
.
empty
()
&&
pinsToKeep
.
count
(
lpNext
)
==
0
)
while
(
nextData
)
{
{
LayerData
*
bnormData
=
nextData
;
Ptr
<
Layer
>
nextLayer
=
nextData
->
layerInstance
;
nextData
=
0
;
if
(
currLayer
->
tryFuse
(
nextLayer
))
if
(
currLayer
->
setBatchNorm
(
nextBNormLayer
)
)
{
{
printf_
((
"
\t
fused with %s
\n
"
,
next
BNorm
Layer
->
name
.
c_str
()));
printf_
((
"
\t
fused with %s
\n
"
,
nextLayer
->
name
.
c_str
()));
bnorm
Data
->
skip
=
true
;
next
Data
->
skip
=
true
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
ld
.
outputBlobsWrappers
=
layers
[
lpNext
.
lid
].
outputBlobsWrappers
;
ld
.
outputBlobsWrappers
=
layers
[
lpNext
.
lid
].
outputBlobsWrappers
;
if
(
bnormData
->
consumers
.
size
()
==
1
)
if
(
nextData
->
consumers
.
size
()
==
1
)
{
{
nextData
=
&
layers
[
bnormData
->
consumers
[
0
].
lid
];
int
nextLayerId
=
nextData
->
consumers
[
0
].
lid
;
lpNext
=
LayerPin
(
bnormData
->
consumers
[
0
].
lid
,
0
);
nextData
=
&
layers
[
nextLayerId
];
lpNext
=
LayerPin
(
nextLayerId
,
0
);
}
}
}
else
}
Ptr
<
ScaleLayer
>
nextScaleLayer
;
if
(
nextData
)
nextScaleLayer
=
nextData
->
layerInstance
.
dynamicCast
<
ScaleLayer
>
();
if
(
!
nextScaleLayer
.
empty
()
&&
pinsToKeep
.
count
(
lpNext
)
==
0
)
{
LayerData
*
scaleData
=
nextData
;
nextData
=
0
;
if
(
currLayer
->
setScale
(
nextScaleLayer
)
)
{
printf_
((
"
\t
fused with %s
\n
"
,
nextScaleLayer
->
name
.
c_str
()));
scaleData
->
skip
=
true
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
ld
.
outputBlobsWrappers
=
layers
[
lpNext
.
lid
].
outputBlobsWrappers
;
if
(
scaleData
->
consumers
.
size
()
==
1
)
{
{
nextData
=
&
layers
[
scaleData
->
consumers
[
0
].
lid
]
;
nextData
=
0
;
lpNext
=
LayerPin
(
scaleData
->
consumers
[
0
].
lid
,
0
)
;
break
;
}
}
}
}
else
break
;
}
}
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
...
@@ -2627,13 +2611,16 @@ Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
...
@@ -2627,13 +2611,16 @@ Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
}
}
bool
Layer
::
setActivation
(
const
Ptr
<
ActivationLayer
>&
)
{
return
false
;
}
bool
Layer
::
setActivation
(
const
Ptr
<
ActivationLayer
>&
)
{
return
false
;
}
bool
Layer
::
setBatchNorm
(
const
Ptr
<
BatchNormLayer
>&
)
{
return
false
;
}
bool
Layer
::
tryFuse
(
Ptr
<
Layer
>&
)
{
return
false
;
}
bool
Layer
::
setScale
(
const
Ptr
<
ScaleLayer
>&
)
{
return
false
;
}
void
Layer
::
getScaleShift
(
Mat
&
scale
,
Mat
&
shift
)
const
{
scale
=
Mat
();
shift
=
Mat
();
}
void
Layer
::
unsetAttached
()
void
Layer
::
unsetAttached
()
{
{
setActivation
(
Ptr
<
ActivationLayer
>
());
setActivation
(
Ptr
<
ActivationLayer
>
());
setBatchNorm
(
Ptr
<
BatchNormLayer
>
());
setScale
(
Ptr
<
ScaleLayer
>
());
}
}
template
<
typename
T
>
template
<
typename
T
>
...
...
modules/dnn/src/layers/convolution_layer.cpp
View file @
59b91d6c
...
@@ -61,7 +61,23 @@ namespace dnn
...
@@ -61,7 +61,23 @@ namespace dnn
class
BaseConvolutionLayerImpl
:
public
ConvolutionLayer
class
BaseConvolutionLayerImpl
:
public
ConvolutionLayer
{
{
public
:
public
:
BaseConvolutionLayerImpl
()
{}
BaseConvolutionLayerImpl
(
const
LayerParams
&
params
)
{
setParamsFrom
(
params
);
getConvolutionKernelParams
(
params
,
kernel
.
height
,
kernel
.
width
,
pad
.
height
,
pad
.
width
,
stride
.
height
,
stride
.
width
,
dilation
.
height
,
dilation
.
width
,
padMode
);
numOutput
=
params
.
get
<
int
>
(
"num_output"
);
int
ngroups
=
params
.
get
<
int
>
(
"group"
,
1
);
adjustPad
.
height
=
params
.
get
<
int
>
(
"adj_h"
,
0
);
adjustPad
.
width
=
params
.
get
<
int
>
(
"adj_w"
,
0
);
CV_Assert
(
numOutput
%
ngroups
==
0
);
CV_Assert
(
adjustPad
.
width
<
stride
.
width
&&
adjustPad
.
height
<
stride
.
height
);
}
virtual
bool
supportBackend
(
int
backendId
)
virtual
bool
supportBackend
(
int
backendId
)
{
{
...
@@ -153,12 +169,10 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
...
@@ -153,12 +169,10 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
{
{
public
:
public
:
enum
{
VEC_ALIGN
=
8
,
DFT_TYPE
=
CV_32F
};
enum
{
VEC_ALIGN
=
8
,
DFT_TYPE
=
CV_32F
};
Mat
weightsMat
;
Mat
weightsMat
,
weightsMat_doubles
;
std
::
vector
<
float
>
biasvec
;
std
::
vector
<
float
>
biasvec
;
std
::
vector
<
float
>
reluslope
;
std
::
vector
<
float
>
reluslope
;
Ptr
<
ActivationLayer
>
activ
;
Ptr
<
ActivationLayer
>
activ
;
Ptr
<
BatchNormLayer
>
bnorm
;
Ptr
<
ScaleLayer
>
scaleLayer
;
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENCL
Ptr
<
OCL4DNNConvSpatial
<
float
>
>
convolutionOp
;
Ptr
<
OCL4DNNConvSpatial
<
float
>
>
convolutionOp
;
...
@@ -169,7 +183,7 @@ public:
...
@@ -169,7 +183,7 @@ public:
ocl4dnnFusedActiv_t
activType
;
ocl4dnnFusedActiv_t
activType
;
float
power
;
float
power
;
#endif
#endif
ConvolutionLayerImpl
()
ConvolutionLayerImpl
(
const
LayerParams
&
params
)
:
BaseConvolutionLayerImpl
(
params
)
{
{
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENCL
fusedBias
=
false
;
fusedBias
=
false
;
...
@@ -225,6 +239,42 @@ public:
...
@@ -225,6 +239,42 @@ public:
return
false
;
return
false
;
}
}
virtual
void
finalize
(
const
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
BaseConvolutionLayerImpl
::
finalize
(
inputs
,
outputs
);
CV_Assert
(
!
blobs
.
empty
());
const
int
outCn
=
blobs
[
0
].
size
[
0
];
// prepare weightsMat where each row is aligned and has enough zero padding on the right to
// use vectorized (i.e. with intrinsics) loops without tail processing
Mat
wm
=
blobs
[
0
].
reshape
(
1
,
outCn
).
clone
();
if
(
wm
.
step1
()
%
VEC_ALIGN
!=
0
)
{
int
newcols
=
(
int
)
alignSize
(
wm
.
step1
(),
VEC_ALIGN
);
Mat
wm_buffer
=
Mat
(
outCn
,
newcols
,
wm
.
type
());
Mat
wm_padding
=
wm_buffer
.
colRange
(
wm
.
cols
,
newcols
);
wm_padding
.
setTo
(
Scalar
::
all
(
0.
));
Mat
wm_aligned
=
wm_buffer
.
colRange
(
0
,
wm
.
cols
);
wm
.
copyTo
(
wm_aligned
);
wm
=
wm_aligned
;
}
weightsMat
=
wm
;
weightsMat
.
convertTo
(
weightsMat_doubles
,
CV_64F
);
Mat
biasMat
=
hasBias
()
?
blobs
[
1
].
reshape
(
1
,
outCn
)
:
Mat
();
biasvec
.
resize
(
outCn
+
2
);
if
(
biasMat
.
empty
()
)
{
for
(
int
i
=
0
;
i
<
outCn
;
i
++
)
biasvec
[
i
]
=
0.
f
;
}
else
{
for
(
int
i
=
0
;
i
<
outCn
;
i
++
)
biasvec
[
i
]
=
biasMat
.
at
<
float
>
(
i
);
}
}
bool
setActivation
(
const
Ptr
<
ActivationLayer
>&
layer
)
bool
setActivation
(
const
Ptr
<
ActivationLayer
>&
layer
)
{
{
activ
=
layer
;
activ
=
layer
;
...
@@ -240,10 +290,11 @@ public:
...
@@ -240,10 +290,11 @@ public:
if
(
!
activ_power
.
empty
())
if
(
!
activ_power
.
empty
())
{
{
if
(
activ_power
->
scale
!=
1.
f
||
activ_power
->
shift
!=
0.
f
)
if
(
activ_power
->
scale
!=
1.
f
||
activ_power
->
shift
!=
0.
f
)
newWeightAndBias
=
true
;
{
const
int
outCh
=
blobs
[
0
].
size
[
0
];
if
(
activ_power
->
scale
!=
1.
f
)
fuseWeights
(
Mat
(
1
,
outCh
,
CV_32F
,
Scalar
(
activ_power
->
scale
)),
weightsMat
.
release
();
Mat
(
1
,
outCh
,
CV_32F
,
Scalar
(
activ_power
->
shift
)));
}
power
=
activ_power
->
power
;
power
=
activ_power
->
power
;
activType
=
OCL4DNN_CONV_FUSED_ACTIV_POWER
;
activType
=
OCL4DNN_CONV_FUSED_ACTIV_POWER
;
...
@@ -258,35 +309,49 @@ public:
...
@@ -258,35 +309,49 @@ public:
return
!
activ
.
empty
();
return
!
activ
.
empty
();
}
}
bool
setBatchNorm
(
const
Ptr
<
BatchNormLayer
>&
layer
)
virtual
bool
tryFuse
(
Ptr
<
Layer
>&
top
)
{
{
// for now the scale layer followed by the batch norm cannot be fused, only vice versa.
Mat
w
,
b
;
if
(
!
scaleLayer
.
empty
()
)
top
->
getScaleShift
(
w
,
b
);
return
false
;
if
(
!
w
.
empty
()
||
!
b
.
empty
())
bnorm
=
layer
;
{
// we will need to re-compute the weights with the batch
fuseWeights
(
w
,
b
);
// norm coefficients taken into account
return
true
;
weightsMat
.
release
();
}
#ifdef HAVE_OPENCL
return
false
;
newWeightAndBias
=
true
;
fusedBias
=
false
;
#endif
return
!
bnorm
.
empty
();
}
}
bool
setScale
(
const
Ptr
<
ScaleLayer
>&
layer
)
void
fuseWeights
(
const
Mat
&
w
,
const
Mat
&
b
)
{
{
if
(
layer
.
empty
()
||
layer
->
blobs
.
empty
())
// Convolution weights have OIHW data layout. Parameters fusion in case of
return
false
;
// (conv(I) + b1 ) * w + b2
scaleLayer
=
layer
;
// means to replace convolution's weights to [w*conv(I)] and bias to [b1 * w + b2]
// we will need to re-compute the weights with the scaling
const
int
outCn
=
weightsMat
.
size
[
0
];
// coefficients taken into account
CV_Assert
(
!
weightsMat
.
empty
(),
biasvec
.
size
()
==
outCn
+
2
,
weightsMat
.
release
();
w
.
empty
()
||
outCn
==
w
.
total
(),
b
.
empty
()
||
outCn
==
b
.
total
());
if
(
!
w
.
empty
())
{
for
(
int
i
=
0
;
i
<
outCn
;
++
i
)
{
double
wi
=
w
.
at
<
float
>
(
i
);
cv
::
multiply
(
slice
(
weightsMat_doubles
,
i
),
wi
,
slice
(
weightsMat_doubles
,
i
));
biasvec
[
i
]
*=
wi
;
}
weightsMat_doubles
.
convertTo
(
weightsMat
,
weightsMat
.
type
());
}
if
(
!
b
.
empty
())
{
for
(
int
i
=
0
;
i
<
outCn
;
++
i
)
biasvec
[
i
]
+=
b
.
at
<
float
>
(
i
);
}
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENCL
newWeightAndBias
=
true
;
newWeightAndBias
=
!
w
.
empty
()
||
!
b
.
empty
()
;
fusedBias
=
false
;
fusedBias
=
hasBias
()
||
!
b
.
empty
()
;
#endif
#endif
return
true
;
biasvec
[
outCn
]
=
biasvec
[
outCn
+
1
]
=
biasvec
[
outCn
-
1
]
;
}
}
virtual
Ptr
<
BackendNode
>
initHalide
(
const
std
::
vector
<
Ptr
<
BackendWrapper
>
>
&
inputs
)
virtual
Ptr
<
BackendNode
>
initHalide
(
const
std
::
vector
<
Ptr
<
BackendWrapper
>
>
&
inputs
)
...
@@ -776,97 +841,7 @@ public:
...
@@ -776,97 +841,7 @@ public:
convolutionOp
=
Ptr
<
OCL4DNNConvSpatial
<
float
>
>
(
new
OCL4DNNConvSpatial
<
float
>
(
config
));
convolutionOp
=
Ptr
<
OCL4DNNConvSpatial
<
float
>
>
(
new
OCL4DNNConvSpatial
<
float
>
(
config
));
}
}
int
k
,
outCn
=
umat_blobs
[
0
].
size
[
0
];
int
outCn
=
umat_blobs
[
0
].
size
[
0
];
if
(
weightsMat
.
empty
()
)
{
// prepare weightsMat where each row is aligned and has enough zero padding on the right to
// use vectorized (i.e. with intrinsics) loops without tail processing
Mat
wm
=
blobs
[
0
].
reshape
(
1
,
outCn
).
clone
();
if
(
wm
.
step1
()
%
VEC_ALIGN
!=
0
)
{
int
newcols
=
(
int
)
alignSize
(
wm
.
step1
(),
VEC_ALIGN
);
Mat
wm_buffer
=
Mat
(
outCn
,
newcols
,
wm
.
type
());
Mat
wm_padding
=
wm_buffer
.
colRange
(
wm
.
cols
,
newcols
);
wm_padding
.
setTo
(
Scalar
::
all
(
0.
));
Mat
wm_aligned
=
wm_buffer
.
colRange
(
0
,
wm
.
cols
);
wm
.
copyTo
(
wm_aligned
);
wm
=
wm_aligned
;
}
weightsMat
=
wm
;
Mat
biasMat
=
hasBias
()
?
blobs
[
1
].
reshape
(
1
,
outCn
)
:
Mat
();
biasvec
.
resize
(
outCn
+
2
);
if
(
biasMat
.
empty
()
)
{
for
(
k
=
0
;
k
<
outCn
;
k
++
)
biasvec
[
k
]
=
0.
f
;
}
else
{
for
(
k
=
0
;
k
<
outCn
;
k
++
)
biasvec
[
k
]
=
biasMat
.
at
<
float
>
(
k
);
}
if
(
!
bnorm
.
empty
()
||
!
scaleLayer
.
empty
()
||
IS_POWER_LAYER
(
activ
))
{
Mat
scale
,
shift
,
scale2
,
shift2
;
const
float
*
scaleptr
=
0
,
*
shiftptr
=
0
;
const
float
*
scaleptr2
=
0
,
*
shiftptr2
=
0
;
float
a
=
1.
f
,
b
=
0.
f
;
if
(
!
bnorm
.
empty
()
)
{
bnorm
->
getScaleShift
(
scale
,
shift
);
CV_Assert
(
scale
.
isContinuous
()
&&
shift
.
isContinuous
()
&&
scale
.
type
()
==
CV_32F
&&
shift
.
type
()
==
CV_32F
&&
scale
.
total
()
==
(
size_t
)
outCn
&&
shift
.
total
()
==
(
size_t
)
outCn
);
scaleptr
=
scale
.
ptr
<
float
>
();
shiftptr
=
shift
.
ptr
<
float
>
();
}
if
(
!
scaleLayer
.
empty
()
)
{
scale2
=
scaleLayer
->
blobs
[
0
];
CV_Assert
(
scale2
.
isContinuous
()
&&
scale2
.
type
()
==
CV_32F
&&
scale2
.
total
()
==
(
size_t
)
outCn
);
scaleptr2
=
scale2
.
ptr
<
float
>
();
if
(
scaleLayer
->
hasBias
)
{
shift2
=
scaleLayer
->
blobs
[
1
];
CV_Assert
(
shift2
.
isContinuous
()
&&
shift2
.
type
()
==
CV_32F
&&
shift2
.
total
()
==
(
size_t
)
outCn
);
shiftptr2
=
shift2
.
ptr
<
float
>
();
}
}
if
(
IS_POWER_LAYER
(
activ
)
)
{
Ptr
<
PowerLayer
>
activ_power
=
activ
.
dynamicCast
<
PowerLayer
>
();
CV_Assert
(
activ_power
);
a
=
activ_power
->
scale
;
b
=
activ_power
->
shift
;
}
if
(
shiftptr
||
shiftptr2
||
b
!=
0.
f
)
fusedBias
=
true
;
for
(
int
i
=
0
;
i
<
outCn
;
i
++
)
{
float
s1
=
scaleptr
?
scaleptr
[
i
]
:
1.
f
;
float
delta1
=
shiftptr
?
shiftptr
[
i
]
:
0.
f
;
float
s2
=
scaleptr2
?
scaleptr2
[
i
]
:
1.
f
;
float
delta2
=
shiftptr2
?
shiftptr2
[
i
]
:
0.
f
;
float
*
w_i
=
weightsMat
.
ptr
<
float
>
(
i
);
int
j
,
wcols
=
weightsMat
.
cols
;
for
(
j
=
0
;
j
<
wcols
;
j
++
)
w_i
[
j
]
*=
(
s1
*
s2
*
a
);
biasvec
[
i
]
=
biasvec
[
i
]
*
(
s1
*
s2
*
a
)
+
(
delta1
*
s2
*
a
+
delta2
*
a
+
b
);
}
}
biasvec
[
outCn
]
=
biasvec
[
outCn
+
1
]
=
biasvec
[
outCn
-
1
];
}
reluslope
.
clear
();
reluslope
.
clear
();
if
(
activ
)
if
(
activ
)
...
@@ -973,86 +948,7 @@ public:
...
@@ -973,86 +948,7 @@ public:
int
ngroups
=
inputs
[
0
]
->
size
[
1
]
/
blobs
[
0
].
size
[
1
];
int
ngroups
=
inputs
[
0
]
->
size
[
1
]
/
blobs
[
0
].
size
[
1
];
CV_Assert
(
outputs
[
0
].
size
[
1
]
%
ngroups
==
0
);
CV_Assert
(
outputs
[
0
].
size
[
1
]
%
ngroups
==
0
);
int
k
,
outCn
=
blobs
[
0
].
size
[
0
];
int
outCn
=
blobs
[
0
].
size
[
0
];
if
(
weightsMat
.
empty
()
)
{
// prepare weightsMat where each row is aligned and has enough zero padding on the right to
// use vectorized (i.e. with intrinsics) loops without tail processing
Mat
wm
=
blobs
[
0
].
reshape
(
1
,
outCn
).
clone
();
if
(
wm
.
step1
()
%
VEC_ALIGN
!=
0
)
{
int
newcols
=
(
int
)
alignSize
(
wm
.
step1
(),
VEC_ALIGN
);
Mat
wm_buffer
=
Mat
(
outCn
,
newcols
,
wm
.
type
());
Mat
wm_padding
=
wm_buffer
.
colRange
(
wm
.
cols
,
newcols
);
wm_padding
.
setTo
(
Scalar
::
all
(
0.
));
Mat
wm_aligned
=
wm_buffer
.
colRange
(
0
,
wm
.
cols
);
wm
.
copyTo
(
wm_aligned
);
wm
=
wm_aligned
;
}
weightsMat
=
wm
;
Mat
biasMat
=
hasBias
()
?
blobs
[
1
].
reshape
(
1
,
outCn
)
:
Mat
();
biasvec
.
resize
(
outCn
+
2
);
if
(
biasMat
.
empty
()
)
{
for
(
k
=
0
;
k
<
outCn
;
k
++
)
biasvec
[
k
]
=
0.
f
;
}
else
{
for
(
k
=
0
;
k
<
outCn
;
k
++
)
biasvec
[
k
]
=
biasMat
.
at
<
float
>
(
k
);
}
if
(
!
bnorm
.
empty
()
||
!
scaleLayer
.
empty
()
)
{
Mat
scale
,
shift
,
scale2
,
shift2
;
const
float
*
scaleptr
=
0
,
*
shiftptr
=
0
;
const
float
*
scaleptr2
=
0
,
*
shiftptr2
=
0
;
if
(
!
bnorm
.
empty
()
)
{
bnorm
->
getScaleShift
(
scale
,
shift
);
CV_Assert
(
scale
.
isContinuous
()
&&
shift
.
isContinuous
()
&&
scale
.
type
()
==
CV_32F
&&
shift
.
type
()
==
CV_32F
&&
scale
.
total
()
==
(
size_t
)
outCn
&&
shift
.
total
()
==
(
size_t
)
outCn
);
scaleptr
=
scale
.
ptr
<
float
>
();
shiftptr
=
shift
.
ptr
<
float
>
();
}
if
(
!
scaleLayer
.
empty
()
)
{
scale2
=
scaleLayer
->
blobs
[
0
];
CV_Assert
(
scale2
.
isContinuous
()
&&
scale2
.
type
()
==
CV_32F
&&
scale2
.
total
()
==
(
size_t
)
outCn
);
scaleptr2
=
scale2
.
ptr
<
float
>
();
if
(
scaleLayer
->
hasBias
)
{
shift2
=
scaleLayer
->
blobs
[
1
];
CV_Assert
(
shift2
.
isContinuous
()
&&
shift2
.
type
()
==
CV_32F
&&
shift2
.
total
()
==
(
size_t
)
outCn
);
shiftptr2
=
shift2
.
ptr
<
float
>
();
}
}
for
(
int
i
=
0
;
i
<
outCn
;
i
++
)
{
float
s1
=
scaleptr
?
scaleptr
[
i
]
:
1.
f
;
float
delta1
=
shiftptr
?
shiftptr
[
i
]
:
0.
f
;
float
s2
=
scaleptr2
?
scaleptr2
[
i
]
:
1.
f
;
float
delta2
=
shiftptr2
?
shiftptr2
[
i
]
:
0.
f
;
float
*
w_i
=
weightsMat
.
ptr
<
float
>
(
i
);
int
j
,
wcols
=
weightsMat
.
cols
;
for
(
j
=
0
;
j
<
wcols
;
j
++
)
w_i
[
j
]
*=
(
s1
*
s2
);
biasvec
[
i
]
=
biasvec
[
i
]
*
(
s1
*
s2
)
+
(
delta1
*
s2
+
delta2
);
}
}
biasvec
[
outCn
]
=
biasvec
[
outCn
+
1
]
=
biasvec
[
outCn
-
1
];
}
reluslope
.
clear
();
reluslope
.
clear
();
if
(
activ
)
if
(
activ
)
...
@@ -1103,6 +999,8 @@ public:
...
@@ -1103,6 +999,8 @@ public:
UMat
umat_weights
;
UMat
umat_weights
;
UMat
umat_biases
;
UMat
umat_biases
;
DeConvolutionLayerImpl
(
const
LayerParams
&
params
)
:
BaseConvolutionLayerImpl
(
params
)
{}
MatShape
computeColRowShape
(
const
MatShape
&
inpShape
,
const
MatShape
&
outShape
)
const
MatShape
computeColRowShape
(
const
MatShape
&
inpShape
,
const
MatShape
&
outShape
)
const
{
{
int
inpCn
=
inpShape
[
1
];
int
inpCn
=
inpShape
[
1
];
...
@@ -1619,36 +1517,15 @@ public:
...
@@ -1619,36 +1517,15 @@ public:
}
}
};
};
//Convolution and Deconvolution
static
void
initConvDeconvLayerFromCaffe
(
Ptr
<
BaseConvolutionLayer
>
l
,
const
LayerParams
&
params
)
{
l
->
setParamsFrom
(
params
);
getConvolutionKernelParams
(
params
,
l
->
kernel
.
height
,
l
->
kernel
.
width
,
l
->
pad
.
height
,
l
->
pad
.
width
,
l
->
stride
.
height
,
l
->
stride
.
width
,
l
->
dilation
.
height
,
l
->
dilation
.
width
,
l
->
padMode
);
l
->
numOutput
=
params
.
get
<
int
>
(
"num_output"
);
int
ngroups
=
params
.
get
<
int
>
(
"group"
,
1
);
l
->
adjustPad
.
height
=
params
.
get
<
int
>
(
"adj_h"
,
0
);
l
->
adjustPad
.
width
=
params
.
get
<
int
>
(
"adj_w"
,
0
);
CV_Assert
(
l
->
numOutput
%
ngroups
==
0
);
CV_Assert
(
l
->
adjustPad
.
width
<
l
->
stride
.
width
&&
l
->
adjustPad
.
height
<
l
->
stride
.
height
);
}
Ptr
<
BaseConvolutionLayer
>
ConvolutionLayer
::
create
(
const
LayerParams
&
params
)
Ptr
<
BaseConvolutionLayer
>
ConvolutionLayer
::
create
(
const
LayerParams
&
params
)
{
{
ConvolutionLayerImpl
*
conv_ptr
=
new
ConvolutionLayerImpl
;
Ptr
<
ConvolutionLayerImpl
>
l
(
new
ConvolutionLayerImpl
(
params
));
Ptr
<
BaseConvolutionLayer
>
l
(
conv_ptr
);
initConvDeconvLayerFromCaffe
(
l
,
params
);
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENCL
size_t
n
=
params
.
blobs
.
size
();
size_t
n
=
params
.
blobs
.
size
();
conv_ptr
->
umat_blobs
.
resize
(
n
);
l
->
umat_blobs
.
resize
(
n
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
for
(
int
i
=
0
;
i
<
n
;
i
++
)
conv_ptr
->
umat_blobs
[
i
]
=
params
.
blobs
[
i
].
getUMat
(
ACCESS_READ
);
l
->
umat_blobs
[
i
]
=
params
.
blobs
[
i
].
getUMat
(
ACCESS_READ
);
#endif
#endif
return
l
;
return
l
;
...
@@ -1656,10 +1533,7 @@ Ptr<BaseConvolutionLayer> ConvolutionLayer::create(const LayerParams ¶ms)
...
@@ -1656,10 +1533,7 @@ Ptr<BaseConvolutionLayer> ConvolutionLayer::create(const LayerParams ¶ms)
Ptr
<
BaseConvolutionLayer
>
DeconvolutionLayer
::
create
(
const
LayerParams
&
params
)
Ptr
<
BaseConvolutionLayer
>
DeconvolutionLayer
::
create
(
const
LayerParams
&
params
)
{
{
Ptr
<
BaseConvolutionLayer
>
l
(
new
DeConvolutionLayerImpl
);
return
Ptr
<
BaseConvolutionLayer
>
(
new
DeConvolutionLayerImpl
(
params
));
initConvDeconvLayerFromCaffe
(
l
,
params
);
return
l
;
}
}
}
}
...
...
modules/dnn/src/layers/mvn_layer.cpp
View file @
59b91d6c
...
@@ -65,16 +65,18 @@ public:
...
@@ -65,16 +65,18 @@ public:
relu_slope
=
0.
f
;
relu_slope
=
0.
f
;
}
}
Ptr
<
BatchNormLayer
>
bnorm
;
Mat
scale
,
shift
;
Mat
scale
,
shift
;
UMat
bnorm_weight
,
bnorm_bias
;
bool
fuse_batch_norm
;
bool
fuse_batch_norm
;
bool
setBatchNorm
(
const
Ptr
<
BatchNormLayer
>&
layer
)
virtual
bool
tryFuse
(
Ptr
<
Layer
>&
top
)
{
{
bnorm
=
layer
;
if
(
preferableTarget
==
DNN_TARGET_OPENCL
&&
!
fuse_batch_norm
)
fuse_batch_norm
=
!
bnorm
.
empty
()
&&
(
preferableTarget
==
DNN_TARGET_OPENCL
);
{
return
fuse_batch_norm
;
top
->
getScaleShift
(
scale
,
shift
);
fuse_batch_norm
=
!
scale
.
empty
()
||
!
shift
.
empty
();
return
fuse_batch_norm
;
}
return
false
;
}
}
Ptr
<
ReLULayer
>
activ_relu
;
Ptr
<
ReLULayer
>
activ_relu
;
...
@@ -95,12 +97,8 @@ public:
...
@@ -95,12 +97,8 @@ public:
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENCL
bool
fast_forward_ocl
(
std
::
vector
<
UMat
>
&
inputs
,
std
::
vector
<
UMat
>
&
outputs
)
bool
fast_forward_ocl
(
std
::
vector
<
UMat
>
&
inputs
,
std
::
vector
<
UMat
>
&
outputs
)
{
{
if
(
fuse_batch_norm
&&
scale
.
empty
())
UMat
bnorm_weight
=
scale
.
empty
()
?
UMat
()
:
scale
.
getUMat
(
ACCESS_READ
);
{
UMat
bnorm_bias
=
shift
.
empty
()
?
UMat
()
:
shift
.
getUMat
(
ACCESS_READ
);
bnorm
->
getScaleShift
(
scale
,
shift
);
bnorm_weight
=
scale
.
getUMat
(
ACCESS_READ
);
bnorm_bias
=
shift
.
getUMat
(
ACCESS_READ
);
}
int
splitDim
=
(
acrossChannels
)
?
1
:
2
;
int
splitDim
=
(
acrossChannels
)
?
1
:
2
;
for
(
size_t
inpIdx
=
0
;
inpIdx
<
inputs
.
size
();
inpIdx
++
)
for
(
size_t
inpIdx
=
0
;
inpIdx
<
inputs
.
size
();
inpIdx
++
)
...
@@ -171,12 +169,8 @@ public:
...
@@ -171,12 +169,8 @@ public:
return
ret
;
return
ret
;
}
}
if
(
fuse_batch_norm
&&
scale
.
empty
())
UMat
bnorm_weight
=
scale
.
empty
()
?
UMat
()
:
scale
.
getUMat
(
ACCESS_READ
);
{
UMat
bnorm_bias
=
shift
.
empty
()
?
UMat
()
:
shift
.
getUMat
(
ACCESS_READ
);
bnorm
->
getScaleShift
(
scale
,
shift
);
bnorm_weight
=
scale
.
getUMat
(
ACCESS_READ
);
bnorm_bias
=
shift
.
getUMat
(
ACCESS_READ
);
}
for
(
size_t
inpIdx
=
0
;
inpIdx
<
inputs
.
size
();
inpIdx
++
)
for
(
size_t
inpIdx
=
0
;
inpIdx
<
inputs
.
size
();
inpIdx
++
)
{
{
...
...
modules/dnn/src/layers/scale_layer.cpp
View file @
59b91d6c
...
@@ -201,6 +201,12 @@ public:
...
@@ -201,6 +201,12 @@ public:
return
Ptr
<
BackendNode
>
();
return
Ptr
<
BackendNode
>
();
}
}
void
getScaleShift
(
Mat
&
scale
,
Mat
&
shift
)
const
{
scale
=
!
blobs
.
empty
()
?
blobs
[
0
]
:
Mat
();
shift
=
hasBias
?
blobs
[
1
]
:
Mat
();
}
virtual
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
virtual
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
const
std
::
vector
<
MatShape
>
&
outputs
)
const
const
std
::
vector
<
MatShape
>
&
outputs
)
const
{
{
...
...
modules/dnn/src/layers/shift_layer.cpp
View file @
59b91d6c
...
@@ -136,6 +136,12 @@ public:
...
@@ -136,6 +136,12 @@ public:
return
Ptr
<
BackendNode
>
();
return
Ptr
<
BackendNode
>
();
}
}
void
getScaleShift
(
Mat
&
scale
,
Mat
&
shift
)
const
{
scale
=
Mat
();
shift
=
blobs
[
0
];
}
virtual
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
virtual
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
const
std
::
vector
<
MatShape
>
&
outputs
)
const
const
std
::
vector
<
MatShape
>
&
outputs
)
const
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment