Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
86e8a105
Commit
86e8a105
authored
Jul 14, 2017
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #9090 from vpisarev:dnn_optim_scale_concat
parents
a586ef72
0488d9bd
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
305 additions
and
30 deletions
+305
-30
dnn.hpp
modules/dnn/include/opencv2/dnn/dnn.hpp
+21
-1
dnn.cpp
modules/dnn/src/dnn.cpp
+155
-20
concat_layer.cpp
modules/dnn/src/layers/concat_layer.cpp
+81
-0
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+43
-6
test_halide_layers.cpp
modules/dnn/test/test_halide_layers.cpp
+5
-3
No files found.
modules/dnn/include/opencv2/dnn/dnn.hpp
View file @
86e8a105
...
@@ -152,6 +152,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
...
@@ -152,6 +152,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
class
CV_EXPORTS
ActivationLayer
;
class
CV_EXPORTS
ActivationLayer
;
class
CV_EXPORTS
BatchNormLayer
;
class
CV_EXPORTS
BatchNormLayer
;
class
CV_EXPORTS
ScaleLayer
;
/** @brief This interface class allows to build new Layers - are building blocks of networks.
/** @brief This interface class allows to build new Layers - are building blocks of networks.
*
*
...
@@ -269,6 +270,19 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
...
@@ -269,6 +270,19 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
*/
*/
virtual
bool
setBatchNorm
(
const
Ptr
<
BatchNormLayer
>&
layer
);
virtual
bool
setBatchNorm
(
const
Ptr
<
BatchNormLayer
>&
layer
);
/**
* @brief Tries to attach to the layer the subsequent scaling layer, i.e. do the layer fusion in a partial case.
* @param[in] layer The subsequent scaling layer.
*
* Returns true if the scaling layer has been attached successfully.
*/
virtual
bool
setScale
(
const
Ptr
<
ScaleLayer
>&
layer
);
/**
* @brief "Deattaches" all the layers, attached to particular layer.
*/
virtual
void
unsetAttached
();
virtual
bool
getMemoryShapes
(
const
std
::
vector
<
MatShape
>
&
inputs
,
virtual
bool
getMemoryShapes
(
const
std
::
vector
<
MatShape
>
&
inputs
,
const
int
requiredOutputs
,
const
int
requiredOutputs
,
std
::
vector
<
MatShape
>
&
outputs
,
std
::
vector
<
MatShape
>
&
outputs
,
...
@@ -498,6 +512,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
...
@@ -498,6 +512,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
const
int
layerId
,
const
int
layerId
,
std
::
vector
<
MatShape
>*
inLayerShapes
,
std
::
vector
<
MatShape
>*
inLayerShapes
,
std
::
vector
<
MatShape
>*
outLayerShapes
)
const
;
std
::
vector
<
MatShape
>*
outLayerShapes
)
const
;
/** @brief Computes FLOP for whole loaded model with specified input shapes.
/** @brief Computes FLOP for whole loaded model with specified input shapes.
* @param netInputShapes vector of shapes for all net inputs.
* @param netInputShapes vector of shapes for all net inputs.
* @returns computed FLOP.
* @returns computed FLOP.
...
@@ -557,8 +572,13 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
...
@@ -557,8 +572,13 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
CV_WRAP
void
getMemoryConsumption
(
const
MatShape
&
netInputShape
,
CV_WRAP
void
getMemoryConsumption
(
const
MatShape
&
netInputShape
,
CV_OUT
std
::
vector
<
int
>&
layerIds
,
CV_OUT
std
::
vector
<
size_t
>&
weights
,
CV_OUT
std
::
vector
<
int
>&
layerIds
,
CV_OUT
std
::
vector
<
size_t
>&
weights
,
CV_OUT
std
::
vector
<
size_t
>&
blobs
)
const
;
CV_OUT
std
::
vector
<
size_t
>&
blobs
)
const
;
private
:
/** @brief Enables or disables layer fusion in the network.
* @param fusion true to enable the fusion, false to disable. The fusion is enabled by default.
*/
CV_WRAP
void
enableFusion
(
bool
fusion
);
private
:
struct
Impl
;
struct
Impl
;
Ptr
<
Impl
>
impl
;
Ptr
<
Impl
>
impl
;
};
};
...
...
modules/dnn/src/dnn.cpp
View file @
86e8a105
...
@@ -464,15 +464,19 @@ public:
...
@@ -464,15 +464,19 @@ public:
}
}
}
}
void
reuseOrCreate
(
const
MatShape
&
shape
,
const
LayerPin
&
lp
,
Mat
&
dst
)
void
reuseOrCreate
(
const
MatShape
&
shape
,
const
LayerPin
&
lp
,
Mat
&
dst
,
bool
force
)
{
Mat
bestBlob
;
LayerPin
bestBlobPin
;
if
(
!
force
)
{
{
std
::
map
<
LayerPin
,
Mat
>::
iterator
hostIt
;
std
::
map
<
LayerPin
,
Mat
>::
iterator
hostIt
;
std
::
map
<
LayerPin
,
int
>::
iterator
refIt
;
std
::
map
<
LayerPin
,
int
>::
iterator
refIt
;
const
int
targetTotal
=
total
(
shape
);
const
int
targetTotal
=
total
(
shape
);
Mat
bestBlob
;
int
bestBlobTotal
=
INT_MAX
;
int
bestBlobTotal
=
INT_MAX
;
LayerPin
bestBlobPin
;
for
(
hostIt
=
memHosts
.
begin
();
hostIt
!=
memHosts
.
end
();
++
hostIt
)
for
(
hostIt
=
memHosts
.
begin
();
hostIt
!=
memHosts
.
end
();
++
hostIt
)
{
{
refIt
=
refCounter
.
find
(
hostIt
->
first
);
refIt
=
refCounter
.
find
(
hostIt
->
first
);
...
@@ -490,6 +494,7 @@ public:
...
@@ -490,6 +494,7 @@ public:
}
}
}
}
}
}
}
if
(
!
bestBlob
.
empty
())
if
(
!
bestBlob
.
empty
())
{
{
reuse
(
bestBlobPin
,
lp
);
reuse
(
bestBlobPin
,
lp
);
...
@@ -505,7 +510,8 @@ public:
...
@@ -505,7 +510,8 @@ public:
}
}
void
allocateBlobsForLayer
(
LayerData
&
ld
,
const
LayerShapes
&
layerShapes
,
void
allocateBlobsForLayer
(
LayerData
&
ld
,
const
LayerShapes
&
layerShapes
,
std
::
vector
<
LayerPin
>&
pinsForInternalBlobs
)
std
::
vector
<
LayerPin
>&
pinsForInternalBlobs
,
bool
maximizeReuse
)
{
{
CV_TRACE_FUNCTION
();
CV_TRACE_FUNCTION
();
...
@@ -561,6 +567,7 @@ public:
...
@@ -561,6 +567,7 @@ public:
}
}
std
::
map
<
int
,
std
::
vector
<
int
>
>::
reverse_iterator
it
;
std
::
map
<
int
,
std
::
vector
<
int
>
>::
reverse_iterator
it
;
bool
force
=
!
maximizeReuse
&&
ld
.
inputBlobsId
.
size
()
>
1
;
for
(
it
=
idxSizes
.
rbegin
();
it
!=
idxSizes
.
rend
();
it
++
)
for
(
it
=
idxSizes
.
rbegin
();
it
!=
idxSizes
.
rend
();
it
++
)
{
{
for
(
int
j
=
0
;
j
<
it
->
second
.
size
();
j
++
)
for
(
int
j
=
0
;
j
<
it
->
second
.
size
();
j
++
)
...
@@ -569,7 +576,7 @@ public:
...
@@ -569,7 +576,7 @@ public:
if
(
total
(
shapes
[
index
]))
if
(
total
(
shapes
[
index
]))
{
{
LayerPin
blobPin
(
ld
.
id
,
index
);
LayerPin
blobPin
(
ld
.
id
,
index
);
if
(
index
<
outShapes
.
size
()
&&
inPlace
)
if
(
index
<
outShapes
.
size
()
&&
inPlace
&&
!
force
)
{
{
CV_Assert
(
ld
.
inputBlobs
[
0
]
->
total
()
==
total
(
shapes
[
index
]));
CV_Assert
(
ld
.
inputBlobs
[
0
]
->
total
()
==
total
(
shapes
[
index
]));
ld
.
outputBlobs
[
index
]
=
ld
.
inputBlobs
[
0
]
->
reshape
(
1
,
shapes
[
index
]);
ld
.
outputBlobs
[
index
]
=
ld
.
inputBlobs
[
0
]
->
reshape
(
1
,
shapes
[
index
]);
...
@@ -577,7 +584,7 @@ public:
...
@@ -577,7 +584,7 @@ public:
}
}
else
else
{
{
reuseOrCreate
(
shapes
[
index
],
blobPin
,
*
blobs
[
index
]);
reuseOrCreate
(
shapes
[
index
],
blobPin
,
*
blobs
[
index
]
,
force
);
}
}
}
}
}
}
...
@@ -628,6 +635,7 @@ struct Net::Impl
...
@@ -628,6 +635,7 @@ struct Net::Impl
lastLayerId
=
1
;
lastLayerId
=
1
;
netWasAllocated
=
false
;
netWasAllocated
=
false
;
fusion
=
true
;
preferableBackend
=
DNN_BACKEND_DEFAULT
;
preferableBackend
=
DNN_BACKEND_DEFAULT
;
preferableTarget
=
DNN_TARGET_CPU
;
preferableTarget
=
DNN_TARGET_CPU
;
}
}
...
@@ -647,6 +655,7 @@ struct Net::Impl
...
@@ -647,6 +655,7 @@ struct Net::Impl
int
lastLayerId
;
int
lastLayerId
;
bool
netWasAllocated
;
bool
netWasAllocated
;
bool
fusion
;
void
compileHalide
()
void
compileHalide
()
{
{
...
@@ -695,8 +704,7 @@ struct Net::Impl
...
@@ -695,8 +704,7 @@ struct Net::Impl
if
(
currLayer
.
empty
()
)
if
(
currLayer
.
empty
()
)
continue
;
continue
;
currLayer
->
setActivation
(
Ptr
<
ActivationLayer
>
());
currLayer
->
unsetAttached
();
currLayer
->
setBatchNorm
(
Ptr
<
BatchNormLayer
>
());
Ptr
<
PoolingLayer
>
poolingLayer
=
currLayer
.
dynamicCast
<
PoolingLayer
>
();
Ptr
<
PoolingLayer
>
poolingLayer
=
currLayer
.
dynamicCast
<
PoolingLayer
>
();
if
(
!
poolingLayer
.
empty
()
)
if
(
!
poolingLayer
.
empty
()
)
...
@@ -704,9 +712,11 @@ struct Net::Impl
...
@@ -704,9 +712,11 @@ struct Net::Impl
poolingLayer
->
computeMaxIdx
=
true
;
poolingLayer
->
computeMaxIdx
=
true
;
}
}
}
}
it
=
layers
.
find
(
0
);
CV_Assert
(
it
!=
layers
.
end
());
it
->
second
.
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
}
}
void
setUpNet
(
const
std
::
vector
<
LayerPin
>&
blobsToKeep_
=
std
::
vector
<
LayerPin
>
())
void
setUpNet
(
const
std
::
vector
<
LayerPin
>&
blobsToKeep_
=
std
::
vector
<
LayerPin
>
())
{
{
CV_TRACE_FUNCTION
();
CV_TRACE_FUNCTION
();
...
@@ -783,13 +793,11 @@ struct Net::Impl
...
@@ -783,13 +793,11 @@ struct Net::Impl
LayerData
&
getLayerData
(
const
DictValue
&
layerDesc
)
LayerData
&
getLayerData
(
const
DictValue
&
layerDesc
)
{
{
CV_Assert
(
layerDesc
.
isInt
()
||
layerDesc
.
isString
());
if
(
layerDesc
.
isInt
())
if
(
layerDesc
.
isInt
())
return
getLayerData
(
layerDesc
.
get
<
int
>
());
return
getLayerData
(
layerDesc
.
get
<
int
>
());
else
if
(
layerDesc
.
isString
())
else
/*if (layerDesc.isString())*/
return
getLayerData
(
layerDesc
.
get
<
String
>
());
return
getLayerData
(
layerDesc
.
get
<
String
>
());
CV_Assert
(
layerDesc
.
isInt
()
||
layerDesc
.
isString
());
return
*
((
LayerData
*
)
NULL
);
}
}
static
void
addLayerInput
(
LayerData
&
ld
,
int
inNum
,
LayerPin
from
)
static
void
addLayerInput
(
LayerData
&
ld
,
int
inNum
,
LayerPin
from
)
...
@@ -1021,7 +1029,8 @@ struct Net::Impl
...
@@ -1021,7 +1029,8 @@ struct Net::Impl
CV_Assert
(
layerShapesIt
!=
layersShapes
.
end
());
CV_Assert
(
layerShapesIt
!=
layersShapes
.
end
());
std
::
vector
<
LayerPin
>
pinsForInternalBlobs
;
std
::
vector
<
LayerPin
>
pinsForInternalBlobs
;
blobManager
.
allocateBlobsForLayer
(
ld
,
layerShapesIt
->
second
,
pinsForInternalBlobs
);
bool
maximizeReuse
=
preferableBackend
==
DNN_BACKEND_HALIDE
;
blobManager
.
allocateBlobsForLayer
(
ld
,
layerShapesIt
->
second
,
pinsForInternalBlobs
,
maximizeReuse
);
Ptr
<
Layer
>
layerPtr
=
ld
.
getLayerInstance
();
Ptr
<
Layer
>
layerPtr
=
ld
.
getLayerInstance
();
{
{
...
@@ -1044,8 +1053,17 @@ struct Net::Impl
...
@@ -1044,8 +1053,17 @@ struct Net::Impl
ld
.
flag
=
1
;
ld
.
flag
=
1
;
}
}
#if 0
#define printf_(args) printf args
#else
#define printf_(args)
#endif
void
fuseLayers
(
const
std
::
vector
<
LayerPin
>&
blobsToKeep_
)
void
fuseLayers
(
const
std
::
vector
<
LayerPin
>&
blobsToKeep_
)
{
{
if
(
!
fusion
||
preferableBackend
==
DNN_BACKEND_HALIDE
)
return
;
CV_TRACE_FUNCTION
();
CV_TRACE_FUNCTION
();
// scan through all the layers. If there is convolution layer followed by the activation layer,
// scan through all the layers. If there is convolution layer followed by the activation layer,
...
@@ -1060,11 +1078,17 @@ struct Net::Impl
...
@@ -1060,11 +1078,17 @@ struct Net::Impl
LayerData
&
ld
=
layers
[
lid
];
LayerData
&
ld
=
layers
[
lid
];
if
(
ld
.
skipFlags
[
DNN_BACKEND_DEFAULT
]
)
if
(
ld
.
skipFlags
[
DNN_BACKEND_DEFAULT
]
)
{
{
printf_
((
"skipped %s: %s
\n
"
,
ld
.
layerInstance
->
name
.
c_str
(),
ld
.
layerInstance
->
type
.
c_str
()));
continue
;
continue
;
}
}
printf_
((
"analyzing %s: %s
\n
"
,
ld
.
layerInstance
->
name
.
c_str
(),
ld
.
layerInstance
->
type
.
c_str
()));
if
(
ld
.
consumers
.
size
()
==
0
)
if
(
ld
.
consumers
.
size
()
==
0
)
outnames
.
push_back
(
ld
.
layerInstance
->
name
);
outnames
.
push_back
(
ld
.
layerInstance
->
name
);
// the optimization #1. try to fuse batch norm, scaling and/or activation layers
// with the current layer if they follow it. Normally, the are fused with the convolution layer,
// but some of them (like activation) may be fused with fully-connected, elemwise (+) and
// some other layers.
Ptr
<
Layer
>&
currLayer
=
ld
.
layerInstance
;
Ptr
<
Layer
>&
currLayer
=
ld
.
layerInstance
;
if
(
ld
.
consumers
.
size
()
==
1
&&
pinsToKeep
.
count
(
LayerPin
(
lid
,
0
))
==
0
)
if
(
ld
.
consumers
.
size
()
==
1
&&
pinsToKeep
.
count
(
LayerPin
(
lid
,
0
))
==
0
)
{
{
...
@@ -1078,10 +1102,29 @@ struct Net::Impl
...
@@ -1078,10 +1102,29 @@ struct Net::Impl
nextData
=
0
;
nextData
=
0
;
if
(
currLayer
->
setBatchNorm
(
nextBNormLayer
)
)
if
(
currLayer
->
setBatchNorm
(
nextBNormLayer
)
)
{
{
printf_
((
"
\t
fused with %s
\n
"
,
nextBNormLayer
->
name
.
c_str
()));
bnormData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
bnormData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
if
(
bnormData
->
consumers
.
size
()
==
1
)
if
(
bnormData
->
consumers
.
size
()
==
1
)
nextData
=
&
layers
[
bnormData
->
consumers
[
0
].
lid
];
nextData
=
&
layers
[
bnormData
->
consumers
[
0
].
lid
];
lpNext
=
LayerPin
(
bnormData
->
consumers
[
0
].
lid
,
0
);
}
}
Ptr
<
ScaleLayer
>
nextScaleLayer
;
if
(
nextData
)
nextScaleLayer
=
nextData
->
layerInstance
.
dynamicCast
<
ScaleLayer
>
();
if
(
!
nextScaleLayer
.
empty
()
&&
pinsToKeep
.
count
(
lpNext
)
==
0
)
{
LayerData
*
scaleData
=
nextData
;
nextData
=
0
;
if
(
currLayer
->
setScale
(
nextScaleLayer
)
)
{
printf_
((
"
\t
fused with %s
\n
"
,
nextScaleLayer
->
name
.
c_str
()));
scaleData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
if
(
scaleData
->
consumers
.
size
()
==
1
)
nextData
=
&
layers
[
scaleData
->
consumers
[
0
].
lid
];
}
}
}
}
...
@@ -1091,11 +1134,16 @@ struct Net::Impl
...
@@ -1091,11 +1134,16 @@ struct Net::Impl
if
(
!
nextActivLayer
.
empty
()
&&
currLayer
->
setActivation
(
nextActivLayer
)
)
if
(
!
nextActivLayer
.
empty
()
&&
currLayer
->
setActivation
(
nextActivLayer
)
)
{
{
//printf("successfully merged %s and %s\n", currLayer->name.c_str(), nextActivLayer->name.c_str(
));
printf_
((
"
\t
fused with %s
\n
"
,
nextActivLayer
->
name
.
c_str
()
));
nextData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
nextData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
}
}
}
}
// the optimization #2. if there is no layer that takes max pooling layer's computed
// max indices (and only some semantical segmentation networks might need this;
// many others only take the maximum values), then we switch the max pooling
// layer to the faster operating mode.
Ptr
<
PoolingLayer
>
poolingLayer
=
ld
.
layerInstance
.
dynamicCast
<
PoolingLayer
>
();
Ptr
<
PoolingLayer
>
poolingLayer
=
ld
.
layerInstance
.
dynamicCast
<
PoolingLayer
>
();
if
(
!
poolingLayer
.
empty
()
&&
!
ld
.
consumers
.
empty
()
)
if
(
!
poolingLayer
.
empty
()
&&
!
ld
.
consumers
.
empty
()
)
{
{
...
@@ -1108,7 +1156,71 @@ struct Net::Impl
...
@@ -1108,7 +1156,71 @@ struct Net::Impl
if
(
i
>=
nconsumers
)
if
(
i
>=
nconsumers
)
{
{
poolingLayer
->
computeMaxIdx
=
false
;
poolingLayer
->
computeMaxIdx
=
false
;
//printf("simplified pooling layer %s\n", poolingLayer->name.c_str());
printf_
((
"
\t
simplified pooling layer %s
\n
"
,
poolingLayer
->
name
.
c_str
()));
}
}
// the optimization #3. if there is concat layer that concatenates channels
// from the inputs together (i.e. axis == 1) then we make the inputs of
// the concat layer to write to the concatetion output buffer
// (and so we eliminate the concatenation layer, because the channels
// are concatenated implicitly).
Ptr
<
ConcatLayer
>
concatLayer
=
ld
.
layerInstance
.
dynamicCast
<
ConcatLayer
>
();
if
(
!
concatLayer
.
empty
()
&&
concatLayer
->
axis
==
1
&&
ld
.
outputBlobs
.
size
()
==
1
)
{
Mat
&
output
=
ld
.
outputBlobs
[
0
];
// TODO: in general, this optimization can always be done, but
// many layers currently check that the input/output blobs are
// continuous arrays. Unfortunately, this is not true when
// the concatenation optimization is applied with batch_size > 1.
// so, for now, we only apply this optimization in the most popular
// case batch_size == 1.
if
(
output
.
dims
==
4
&&
output
.
size
[
0
]
==
1
)
{
size_t
i
,
ninputs
=
ld
.
inputBlobsId
.
size
();
std
::
vector
<
LayerPin
>
realinputs
(
ninputs
);
for
(
i
=
0
;
i
<
ninputs
;
i
++
)
{
LayerPin
pin
=
ld
.
inputBlobsId
[
i
];
LayerData
*
inp_i_data
=
&
layers
[
pin
.
lid
];
while
(
inp_i_data
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
&&
inp_i_data
->
inputBlobsId
.
size
()
==
1
)
{
pin
=
inp_i_data
->
inputBlobsId
[
0
];
inp_i_data
=
&
layers
[
pin
.
lid
];
}
printf_
((
"
\t
real input for %s is %s
\n
"
,
layers
[
ld
.
inputBlobsId
[
i
].
lid
].
getLayerInstance
()
->
name
.
c_str
(),
inp_i_data
->
getLayerInstance
()
->
name
.
c_str
()));
if
(
inp_i_data
->
skipFlags
[
DNN_BACKEND_DEFAULT
])
break
;
realinputs
[
i
]
=
pin
;
}
if
(
i
>=
ninputs
)
{
Range
chrange
[]
=
{
Range
::
all
(),
Range
::
all
(),
Range
::
all
(),
Range
::
all
()
};
int
ofs
=
0
;
for
(
i
=
0
;
i
<
ninputs
;
i
++
)
{
LayerPin
pin
=
realinputs
[
i
];
LayerData
*
inp_i_data
=
&
layers
[
pin
.
lid
];
int
channels_i
=
ld
.
inputBlobs
[
i
]
->
size
[
1
];
chrange
[
1
]
=
Range
(
ofs
,
ofs
+
channels_i
);
printf_
((
"
\t
output %s(%d) to channels (%d, %d)
\n
"
,
inp_i_data
->
layerInstance
->
name
.
c_str
(),
pin
.
oid
,
ofs
,
ofs
+
channels_i
));
ofs
+=
channels_i
;
Mat
output_slice
=
output
(
chrange
);
Mat
&
curr_output
=
inp_i_data
->
outputBlobs
[
pin
.
oid
];
CV_Assert
(
output_slice
.
isContinuous
()
&&
output_slice
.
size
==
curr_output
.
size
);
curr_output
=
output_slice
;
}
ld
.
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
printf_
((
"
\t
optimized out Concat layer %s
\n
"
,
concatLayer
->
name
.
c_str
()));
}
}
}
}
}
}
}
...
@@ -1458,9 +1570,12 @@ void Net::setPreferableBackend(int backendId)
...
@@ -1458,9 +1570,12 @@ void Net::setPreferableBackend(int backendId)
CV_TRACE_FUNCTION
();
CV_TRACE_FUNCTION
();
CV_TRACE_ARG
(
backendId
);
CV_TRACE_ARG
(
backendId
);
i
mpl
->
netWasAllocated
=
impl
->
netWasAllocated
&&
i
f
(
impl
->
preferableBackend
!=
backendId
)
impl
->
preferableBackend
==
backendId
;
{
impl
->
preferableBackend
=
backendId
;
impl
->
preferableBackend
=
backendId
;
impl
->
netWasAllocated
=
false
;
impl
->
clear
();
}
}
}
void
Net
::
setPreferableTarget
(
int
targetId
)
void
Net
::
setPreferableTarget
(
int
targetId
)
...
@@ -1468,9 +1583,12 @@ void Net::setPreferableTarget(int targetId)
...
@@ -1468,9 +1583,12 @@ void Net::setPreferableTarget(int targetId)
CV_TRACE_FUNCTION
();
CV_TRACE_FUNCTION
();
CV_TRACE_ARG
(
targetId
);
CV_TRACE_ARG
(
targetId
);
i
mpl
->
netWasAllocated
=
impl
->
netWasAllocated
&&
i
f
(
impl
->
preferableTarget
!=
targetId
)
impl
->
preferableTarget
==
targetId
;
{
impl
->
preferableTarget
=
targetId
;
impl
->
preferableTarget
=
targetId
;
impl
->
netWasAllocated
=
false
;
impl
->
clear
();
}
}
}
void
Net
::
setInputsNames
(
const
std
::
vector
<
String
>
&
inputBlobNames
)
void
Net
::
setInputsNames
(
const
std
::
vector
<
String
>
&
inputBlobNames
)
...
@@ -1825,6 +1943,16 @@ void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>&
...
@@ -1825,6 +1943,16 @@ void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>&
weights
,
blobs
);
weights
,
blobs
);
}
}
void
Net
::
enableFusion
(
bool
fusion
)
{
if
(
impl
->
fusion
!=
fusion
)
{
impl
->
fusion
=
fusion
;
impl
->
netWasAllocated
=
false
;
impl
->
clear
();
}
}
void
Net
::
setHalideScheduler
(
const
String
&
scheduler
)
void
Net
::
setHalideScheduler
(
const
String
&
scheduler
)
{
{
CV_TRACE_FUNCTION
();
CV_TRACE_FUNCTION
();
...
@@ -1950,6 +2078,13 @@ Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
...
@@ -1950,6 +2078,13 @@ Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
bool
Layer
::
setActivation
(
const
Ptr
<
ActivationLayer
>&
)
{
return
false
;
}
bool
Layer
::
setActivation
(
const
Ptr
<
ActivationLayer
>&
)
{
return
false
;
}
bool
Layer
::
setBatchNorm
(
const
Ptr
<
BatchNormLayer
>&
)
{
return
false
;
}
bool
Layer
::
setBatchNorm
(
const
Ptr
<
BatchNormLayer
>&
)
{
return
false
;
}
bool
Layer
::
setScale
(
const
Ptr
<
ScaleLayer
>&
)
{
return
false
;
}
void
Layer
::
unsetAttached
()
{
setActivation
(
Ptr
<
ActivationLayer
>
());
setBatchNorm
(
Ptr
<
BatchNormLayer
>
());
setScale
(
Ptr
<
ScaleLayer
>
());
}
template
<
typename
T
>
template
<
typename
T
>
static
void
vecToPVec
(
const
std
::
vector
<
T
>
&
v
,
std
::
vector
<
T
*>
&
pv
)
static
void
vecToPVec
(
const
std
::
vector
<
T
>
&
v
,
std
::
vector
<
T
*>
&
pv
)
...
...
modules/dnn/src/layers/concat_layer.cpp
View file @
86e8a105
...
@@ -94,6 +94,78 @@ public:
...
@@ -94,6 +94,78 @@ public:
backendId
==
DNN_BACKEND_HALIDE
&&
haveHalide
()
&&
axis
==
1
;
// By channels
backendId
==
DNN_BACKEND_HALIDE
&&
haveHalide
()
&&
axis
==
1
;
// By channels
}
}
class
ChannelConcatInvoker
:
public
ParallelLoopBody
{
public
:
std
::
vector
<
Mat
*>*
inputs
;
Mat
*
output
;
int
nstripes
;
std
::
vector
<
const
float
*>
chptrs
;
static
void
run
(
std
::
vector
<
Mat
*>&
inputs
,
Mat
&
output
,
int
nstripes
)
{
ChannelConcatInvoker
cc
;
cc
.
inputs
=
&
inputs
;
cc
.
output
=
&
output
;
cc
.
nstripes
=
nstripes
;
size_t
i
,
ninputs
=
inputs
.
size
();
int
nchannels
=
0
,
batchsz
=
output
.
size
[
0
];
for
(
i
=
0
;
i
<
ninputs
;
i
++
)
{
Mat
&
inp
=
*
inputs
[
i
];
CV_Assert
(
inp
.
isContinuous
()
&&
inp
.
type
()
==
CV_32F
&&
inp
.
dims
==
4
&&
inp
.
size
[
0
]
==
output
.
size
[
0
]
&&
inp
.
size
[
2
]
==
output
.
size
[
2
]
&&
inp
.
size
[
3
]
==
output
.
size
[
3
]
);
nchannels
+=
inp
.
size
[
1
];
}
CV_Assert
(
nchannels
==
output
.
size
[
1
]
);
CV_Assert
(
output
.
isContinuous
()
&&
output
.
type
()
==
CV_32F
);
cc
.
chptrs
.
resize
(
nchannels
*
batchsz
);
int
ofs
=
0
;
for
(
i
=
0
;
i
<
ninputs
;
i
++
)
{
Mat
&
inp
=
*
inputs
[
i
];
for
(
int
j
=
0
;
j
<
batchsz
;
j
++
)
for
(
int
k
=
0
;
k
<
inp
.
size
[
1
];
k
++
)
{
const
float
*
ptr
=
inp
.
ptr
<
float
>
(
j
,
k
);
cc
.
chptrs
[
ofs
+
j
*
nchannels
+
k
]
=
ptr
;
}
ofs
+=
inp
.
size
[
1
];
}
parallel_for_
(
Range
(
0
,
nstripes
),
cc
,
nstripes
);
}
ChannelConcatInvoker
()
{}
void
operator
()(
const
Range
&
r
)
const
{
size_t
planeSize
=
(
size_t
)
output
->
size
[
2
]
*
output
->
size
[
3
];
size_t
nch
=
chptrs
.
size
();
size_t
total
=
nch
*
planeSize
;
size_t
stripeSize
=
(
total
+
nstripes
-
1
)
/
nstripes
;
size_t
stripeStart
=
r
.
start
*
stripeSize
;
size_t
stripeEnd
=
std
::
min
(
total
,
r
.
end
*
stripeSize
);
const
float
**
ptrs
=
(
const
float
**
)
&
chptrs
[
0
];
float
*
outptr
=
output
->
ptr
<
float
>
();
size_t
blockSize0
=
1
<<
16
;
for
(
size_t
ofs0
=
stripeStart
;
ofs0
<
stripeEnd
;
)
{
size_t
ch
=
ofs0
/
planeSize
;
size_t
ofs
=
ofs0
-
ch
*
planeSize
;
size_t
blockSize
=
std
::
min
(
blockSize0
,
planeSize
-
ofs
);
memcpy
(
outptr
+
ofs0
,
ptrs
[
ch
]
+
ofs
,
blockSize
*
sizeof
(
outptr
[
0
]));
ofs0
+=
blockSize
;
}
}
};
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
,
std
::
vector
<
Mat
>
&
internals
)
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
,
std
::
vector
<
Mat
>
&
internals
)
{
{
CV_TRACE_FUNCTION
();
CV_TRACE_FUNCTION
();
...
@@ -101,6 +173,14 @@ public:
...
@@ -101,6 +173,14 @@ public:
int
cAxis
=
clamp
(
axis
,
inputs
[
0
]
->
dims
);
int
cAxis
=
clamp
(
axis
,
inputs
[
0
]
->
dims
);
Mat
&
outMat
=
outputs
[
0
];
Mat
&
outMat
=
outputs
[
0
];
if
(
cAxis
==
1
&&
outMat
.
dims
==
4
)
{
int
nstripes
=
getNumThreads
();
ChannelConcatInvoker
::
run
(
inputs
,
outMat
,
nstripes
);
}
else
{
std
::
vector
<
Range
>
ranges
(
outputs
[
0
].
dims
,
Range
::
all
());
std
::
vector
<
Range
>
ranges
(
outputs
[
0
].
dims
,
Range
::
all
());
ranges
[
cAxis
].
start
=
0
;
ranges
[
cAxis
].
start
=
0
;
...
@@ -111,6 +191,7 @@ public:
...
@@ -111,6 +191,7 @@ public:
ranges
[
cAxis
].
start
=
ranges
[
cAxis
].
end
;
ranges
[
cAxis
].
start
=
ranges
[
cAxis
].
end
;
}
}
}
}
}
virtual
Ptr
<
BackendNode
>
initHalide
(
const
std
::
vector
<
Ptr
<
BackendWrapper
>
>
&
input
)
virtual
Ptr
<
BackendNode
>
initHalide
(
const
std
::
vector
<
Ptr
<
BackendWrapper
>
>
&
input
)
{
{
...
...
modules/dnn/src/layers/convolution_layer.cpp
View file @
86e8a105
...
@@ -148,6 +148,7 @@ public:
...
@@ -148,6 +148,7 @@ public:
std
::
vector
<
float
>
reluslope
;
std
::
vector
<
float
>
reluslope
;
Ptr
<
ActivationLayer
>
activ
;
Ptr
<
ActivationLayer
>
activ
;
Ptr
<
BatchNormLayer
>
bnorm
;
Ptr
<
BatchNormLayer
>
bnorm
;
Ptr
<
ScaleLayer
>
scaleLayer
;
MatShape
computeColRowShape
(
const
MatShape
&
inpShape
,
const
MatShape
&
outShape
)
const
MatShape
computeColRowShape
(
const
MatShape
&
inpShape
,
const
MatShape
&
outShape
)
const
{
{
...
@@ -202,6 +203,9 @@ public:
...
@@ -202,6 +203,9 @@ public:
bool
setBatchNorm
(
const
Ptr
<
BatchNormLayer
>&
layer
)
bool
setBatchNorm
(
const
Ptr
<
BatchNormLayer
>&
layer
)
{
{
// for now the scale layer followed by the batch norm cannot be fused, only vice versa.
if
(
!
scaleLayer
.
empty
()
)
return
false
;
bnorm
=
layer
;
bnorm
=
layer
;
// we will need to re-compute the weights with the batch
// we will need to re-compute the weights with the batch
// norm coefficients taken into account
// norm coefficients taken into account
...
@@ -209,6 +213,15 @@ public:
...
@@ -209,6 +213,15 @@ public:
return
!
bnorm
.
empty
();
return
!
bnorm
.
empty
();
}
}
bool
setScale
(
const
Ptr
<
ScaleLayer
>&
layer
)
{
scaleLayer
=
layer
;
// we will need to re-compute the weights with the scaling
// coefficients taken into account
weightsMat
.
release
();
return
!
scaleLayer
.
empty
();
}
virtual
Ptr
<
BackendNode
>
initHalide
(
const
std
::
vector
<
Ptr
<
BackendWrapper
>
>
&
inputs
)
virtual
Ptr
<
BackendNode
>
initHalide
(
const
std
::
vector
<
Ptr
<
BackendWrapper
>
>
&
inputs
)
{
{
#ifdef HAVE_HALIDE
#ifdef HAVE_HALIDE
...
@@ -678,32 +691,56 @@ public:
...
@@ -678,32 +691,56 @@ public:
biasvec
[
k
]
=
biasMat
.
at
<
float
>
(
k
);
biasvec
[
k
]
=
biasMat
.
at
<
float
>
(
k
);
}
}
if
(
!
bnorm
.
empty
()
||
!
scaleLayer
.
empty
()
)
{
Mat
scale
,
shift
,
scale2
,
shift2
;
const
float
*
scaleptr
=
0
,
*
shiftptr
=
0
;
const
float
*
scaleptr2
=
0
,
*
shiftptr2
=
0
;
if
(
!
bnorm
.
empty
()
)
if
(
!
bnorm
.
empty
()
)
{
{
Mat
scale
,
shift
;
bnorm
->
getScaleShift
(
scale
,
shift
);
bnorm
->
getScaleShift
(
scale
,
shift
);
CV_Assert
(
scale
.
isContinuous
()
&&
shift
.
isContinuous
()
&&
CV_Assert
(
scale
.
isContinuous
()
&&
shift
.
isContinuous
()
&&
scale
.
type
()
==
CV_32F
&&
shift
.
type
()
==
CV_32F
&&
scale
.
type
()
==
CV_32F
&&
shift
.
type
()
==
CV_32F
&&
scale
.
total
()
==
(
size_t
)
outCn
&&
scale
.
total
()
==
(
size_t
)
outCn
&&
shift
.
total
()
==
(
size_t
)
outCn
);
shift
.
total
()
==
(
size_t
)
outCn
);
scaleptr
=
scale
.
ptr
<
float
>
();
shiftptr
=
shift
.
ptr
<
float
>
();
}
if
(
!
scaleLayer
.
empty
()
)
{
scale2
=
scaleLayer
->
blobs
[
0
];
CV_Assert
(
scale2
.
isContinuous
()
&&
scale2
.
type
()
==
CV_32F
&&
scale2
.
total
()
==
(
size_t
)
outCn
);
scaleptr2
=
scale2
.
ptr
<
float
>
();
if
(
scaleLayer
->
hasBias
)
{
shift2
=
scaleLayer
->
blobs
[
1
];
CV_Assert
(
shift2
.
isContinuous
()
&&
shift2
.
type
()
==
CV_32F
&&
shift2
.
total
()
==
(
size_t
)
outCn
);
shiftptr2
=
shift2
.
ptr
<
float
>
();
}
}
for
(
int
i
=
0
;
i
<
outCn
;
i
++
)
for
(
int
i
=
0
;
i
<
outCn
;
i
++
)
{
{
float
s
=
scale
.
at
<
float
>
(
i
);
float
s1
=
scaleptr
?
scaleptr
[
i
]
:
1.
f
;
float
delta
=
shift
.
at
<
float
>
(
i
);
float
delta1
=
shiftptr
?
shiftptr
[
i
]
:
0.
f
;
float
s2
=
scaleptr2
?
scaleptr2
[
i
]
:
1.
f
;
float
delta2
=
shiftptr2
?
shiftptr2
[
i
]
:
0.
f
;
float
*
w_i
=
weightsMat
.
ptr
<
float
>
(
i
);
float
*
w_i
=
weightsMat
.
ptr
<
float
>
(
i
);
int
j
,
wcols
=
weightsMat
.
cols
;
int
j
,
wcols
=
weightsMat
.
cols
;
for
(
j
=
0
;
j
<
wcols
;
j
++
)
for
(
j
=
0
;
j
<
wcols
;
j
++
)
w_i
[
j
]
*=
s
;
w_i
[
j
]
*=
(
s1
*
s2
)
;
biasvec
[
i
]
=
biasvec
[
i
]
*
s
+
delta
;
biasvec
[
i
]
=
biasvec
[
i
]
*
(
s1
*
s2
)
+
(
delta1
*
s2
+
delta2
)
;
}
}
}
}
biasvec
[
outCn
]
=
biasvec
[
outCn
+
1
]
=
biasvec
[
outCn
-
1
];
biasvec
[
outCn
]
=
biasvec
[
outCn
+
1
]
=
biasvec
[
outCn
-
1
];
}
}
reluslope
.
clear
();
if
(
activ
)
if
(
activ
)
{
{
Ptr
<
ReLULayer
>
activ_relu
=
activ
.
dynamicCast
<
ReLULayer
>
();
Ptr
<
ReLULayer
>
activ_relu
=
activ
.
dynamicCast
<
ReLULayer
>
();
...
...
modules/dnn/test/test_halide_layers.cpp
View file @
86e8a105
...
@@ -517,7 +517,8 @@ TEST_P(Concat, Accuracy)
...
@@ -517,7 +517,8 @@ TEST_P(Concat, Accuracy)
Net
net
;
Net
net
;
std
::
vector
<
int
>
convLayerIds
(
numChannels
.
channels
);
std
::
vector
<
int
>
convLayerIds
;
convLayerIds
.
reserve
(
numChannels
.
channels
);
for
(
int
i
=
0
,
n
=
numChannels
.
channels
;
i
<
n
;
++
i
)
for
(
int
i
=
0
,
n
=
numChannels
.
channels
;
i
<
n
;
++
i
)
{
{
if
(
!
numChannels
[
i
])
if
(
!
numChannels
[
i
])
...
@@ -537,8 +538,9 @@ TEST_P(Concat, Accuracy)
...
@@ -537,8 +538,9 @@ TEST_P(Concat, Accuracy)
convParam
.
name
=
ss
.
str
();
convParam
.
name
=
ss
.
str
();
convParam
.
blobs
.
push_back
(
weights
);
convParam
.
blobs
.
push_back
(
weights
);
convLayerIds
[
i
]
=
net
.
addLayer
(
convParam
.
name
,
convParam
.
type
,
convParam
);
int
layerId
=
net
.
addLayer
(
convParam
.
name
,
convParam
.
type
,
convParam
);
net
.
connect
(
0
,
0
,
convLayerIds
[
i
],
0
);
convLayerIds
.
push_back
(
layerId
);
net
.
connect
(
0
,
0
,
layerId
,
0
);
}
}
LayerParams
concatParam
;
LayerParams
concatParam
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment