Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
329abb5b
Commit
329abb5b
authored
Apr 26, 2018
by
Li Peng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
dnn fp16 support
Signed-off-by:
Li Peng
<
peng.li@intel.com
>
parent
bb8ff2c4
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
113 additions
and
22 deletions
+113
-22
dnn.cpp
modules/dnn/src/dnn.cpp
+112
-22
precomp.hpp
modules/dnn/src/precomp.hpp
+1
-0
No files found.
modules/dnn/src/dnn.cpp
View file @
329abb5b
...
...
@@ -499,7 +499,7 @@ public:
}
}
void
reuseOrCreate
(
const
MatShape
&
shape
,
const
LayerPin
&
lp
,
Mat
&
dst
,
bool
forceCreate
)
void
reuseOrCreate
(
const
MatShape
&
shape
,
const
LayerPin
&
lp
,
Mat
&
dst
,
bool
forceCreate
,
bool
use_half
)
{
if
(
!
DNN_DISABLE_MEMORY_OPTIMIZATIONS
&&
!
forceCreate
)
{
...
...
@@ -540,14 +540,14 @@ public:
{
// if dst already has been allocated with total(shape) elements,
// it won't be recrreated and pointer of dst.data remains the same.
dst
.
create
(
shape
,
CV_32F
);
dst
.
create
(
shape
,
use_half
?
CV_16S
:
CV_32F
);
addHost
(
lp
,
dst
);
}
}
void
allocateBlobsForLayer
(
LayerData
&
ld
,
const
LayerShapes
&
layerShapes
,
std
::
vector
<
LayerPin
>&
pinsForInternalBlobs
,
bool
forceCreate
=
false
)
bool
forceCreate
=
false
,
bool
use_half
=
false
)
{
CV_TRACE_FUNCTION
();
...
...
@@ -618,7 +618,7 @@ public:
reuse
(
ld
.
inputBlobsId
[
0
],
blobPin
);
}
else
reuseOrCreate
(
shapes
[
index
],
blobPin
,
*
blobs
[
index
],
forceCreate
);
reuseOrCreate
(
shapes
[
index
],
blobPin
,
*
blobs
[
index
],
forceCreate
,
use_half
);
}
}
}
...
...
@@ -656,7 +656,7 @@ static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
{
if
(
targetId
==
DNN_TARGET_CPU
)
return
Ptr
<
BackendWrapper
>
();
else
if
(
targetId
==
DNN_TARGET_OPENCL
)
else
if
(
IS_DNN_OPENCL_TARGET
(
targetId
)
)
return
OpenCLBackendWrapper
::
create
(
m
);
else
CV_Error
(
Error
::
StsNotImplemented
,
"Unknown target identifier"
);
...
...
@@ -721,6 +721,7 @@ struct Net::Impl
bool
netWasAllocated
;
bool
fusion
;
std
::
vector
<
int64
>
layersTimings
;
Mat
output_blob
;
Ptr
<
BackendWrapper
>
wrap
(
Mat
&
host
)
{
...
...
@@ -737,7 +738,7 @@ struct Net::Impl
Ptr
<
BackendWrapper
>
baseBuffer
=
backendWrappers
[
data
];
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
)
{
CV_Assert
(
preferableTarget
==
DNN_TARGET_OPENCL
);
CV_Assert
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
);
return
OpenCLBackendWrapper
::
create
(
baseBuffer
,
host
);
}
else
if
(
preferableBackend
==
DNN_BACKEND_HALIDE
)
...
...
@@ -849,7 +850,7 @@ struct Net::Impl
if
(
!
netWasAllocated
||
this
->
blobsToKeep
!=
blobsToKeep_
)
{
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_OPENCL
)
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
)
#ifndef HAVE_OPENCL
{
CV_LOG_WARNING
(
NULL
,
"DNN: OpenCL target is not available in this OpenCV build, switching to CPU."
);
...
...
@@ -1034,7 +1035,7 @@ struct Net::Impl
{
CV_TRACE_FUNCTION
();
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
)
CV_Assert
(
preferableTarget
==
DNN_TARGET_CPU
||
preferableTarget
==
DNN_TARGET_OPENCL
);
CV_Assert
(
preferableTarget
==
DNN_TARGET_CPU
||
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
);
else
if
(
preferableBackend
==
DNN_BACKEND_HALIDE
)
initHalideBackend
();
else
if
(
preferableBackend
==
DNN_BACKEND_INFERENCE_ENGINE
)
...
...
@@ -1369,7 +1370,9 @@ struct Net::Impl
std
::
vector
<
LayerPin
>
pinsForInternalBlobs
;
blobManager
.
allocateBlobsForLayer
(
ld
,
layerShapesIt
->
second
,
pinsForInternalBlobs
,
preferableBackend
==
DNN_BACKEND_INFERENCE_ENGINE
);
preferableBackend
==
DNN_BACKEND_INFERENCE_ENGINE
,
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_OPENCL_FP16
);
ld
.
outputBlobsWrappers
.
resize
(
ld
.
outputBlobs
.
size
());
for
(
int
i
=
0
;
i
<
ld
.
outputBlobs
.
size
();
++
i
)
{
...
...
@@ -1439,7 +1442,7 @@ struct Net::Impl
// some other layers.
// TODO: OpenCL target support more fusion styles.
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_OPENCL
&&
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
(
!
cv
::
ocl
::
useOpenCL
()
||
(
ld
.
layerInstance
->
type
!=
"Convolution"
&&
ld
.
layerInstance
->
type
!=
"MVN"
))
)
continue
;
...
...
@@ -1478,8 +1481,8 @@ struct Net::Impl
continue
;
// Go to the next layer.
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
if
(
preferableTarget
!=
DNN_TARGET_OPENCL
||
(
preferableTarget
==
DNN_TARGET_OPENCL
&&
if
(
!
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
||
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
nextData
&&
((
nextData
->
type
==
"ReLU"
)
||
(
nextData
->
type
==
"ChannelsPReLU"
)
||
...
...
@@ -1502,7 +1505,7 @@ struct Net::Impl
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
ld
.
outputBlobsWrappers
=
layers
[
lpNext
.
lid
].
outputBlobsWrappers
;
if
(
preferableTarget
==
DNN_TARGET_OPENCL
)
if
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
)
{
if
(
!
activData
->
consumers
.
empty
()
)
{
...
...
@@ -1514,7 +1517,7 @@ struct Net::Impl
}
// fuse convlution layer followed by eltwise + relu
if
(
preferableTarget
==
DNN_TARGET_OPENCL
)
if
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
)
{
Ptr
<
EltwiseLayer
>
nextEltwiseLayer
;
if
(
nextData
)
...
...
@@ -1727,6 +1730,13 @@ struct Net::Impl
for
(
int
i
=
0
;
i
<
layers
[
0
].
outputBlobs
.
size
();
i
++
)
{
CV_Assert
(
layers
[
0
].
outputBlobs
[
i
].
total
());
if
(
layers
[
0
].
outputBlobs
[
i
].
depth
()
==
CV_32F
&&
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_OPENCL_FP16
)
{
Mat
mat
=
layers
[
0
].
outputBlobs
[
i
].
clone
();
convertFp16
(
mat
,
layers
[
0
].
outputBlobs
[
i
]);
}
inputShapes
.
push_back
(
shape
(
layers
[
0
].
outputBlobs
[
i
]));
}
LayersShapesMap
layersShapes
;
...
...
@@ -1772,7 +1782,7 @@ struct Net::Impl
{
if
(
!
ld
.
skip
)
{
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_OPENCL
)
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
)
{
std
::
vector
<
UMat
>
umat_outputBlobs
=
OpenCLBackendWrapper
::
getUMatVector
(
ld
.
outputBlobsWrappers
);
layer
->
forward
(
OpenCLBackendWrapper
::
getUMatVector
(
ld
.
inputBlobsWrappers
),
...
...
@@ -1937,7 +1947,14 @@ struct Net::Impl
// Transfer data to CPU if it's require.
ld
.
outputBlobsWrappers
[
pin
.
oid
]
->
copyToHost
();
}
return
ld
.
outputBlobs
[
pin
.
oid
];
if
(
ld
.
outputBlobs
[
pin
.
oid
].
depth
()
==
CV_16S
)
{
convertFp16
(
ld
.
outputBlobs
[
pin
.
oid
],
output_blob
);
return
output_blob
;
}
else
return
ld
.
outputBlobs
[
pin
.
oid
];
}
Mat
getBlob
(
String
outputName
)
...
...
@@ -2080,7 +2097,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
if
(
outputBlobs
.
isUMat
())
{
outputBlobs
.
assign
(
ld
.
outputBlobs
[
pin
.
oid
]
.
getUMat
(
ACCESS_RW
));
outputBlobs
.
assign
(
impl
->
getBlob
(
layerName
)
.
getUMat
(
ACCESS_RW
));
}
else
if
(
outputBlobs
.
isMat
())
{
...
...
@@ -2096,17 +2113,33 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
ld
.
outputBlobsWrappers
[
i
]
->
copyToHost
();
}
}
std
::
vector
<
Mat
>
&
outputvec
=
*
(
std
::
vector
<
Mat
>
*
)
outputBlobs
.
getObj
();
outputvec
=
ld
.
outputBlobs
;
if
(
ld
.
outputBlobs
[
0
].
depth
()
==
CV_32F
)
{
std
::
vector
<
Mat
>
&
outputvec
=
*
(
std
::
vector
<
Mat
>
*
)
outputBlobs
.
getObj
();
outputvec
=
ld
.
outputBlobs
;
}
else
{
std
::
vector
<
Mat
>
&
outputvec
=
*
(
std
::
vector
<
Mat
>
*
)
outputBlobs
.
getObj
();
outputvec
.
resize
(
ld
.
outputBlobs
.
size
());
for
(
int
i
=
0
;
i
<
outputvec
.
size
();
i
++
)
convertFp16
(
ld
.
outputBlobs
[
i
],
outputvec
[
i
]);
}
}
else
if
(
outputBlobs
.
isUMatVector
())
{
std
::
vector
<
UMat
>
&
outputvec
=
*
(
std
::
vector
<
UMat
>
*
)
outputBlobs
.
getObj
();
if
(
impl
->
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
impl
->
preferableTarget
==
DNN_TARGET_OPENCL
)
IS_DNN_OPENCL_TARGET
(
impl
->
preferableTarget
)
)
{
outputvec
=
OpenCLBackendWrapper
::
getUMatVector
(
ld
.
outputBlobsWrappers
);
if
(
impl
->
preferableTarget
==
DNN_TARGET_OPENCL
)
outputvec
=
OpenCLBackendWrapper
::
getUMatVector
(
ld
.
outputBlobsWrappers
);
else
if
(
impl
->
preferableTarget
==
DNN_TARGET_OPENCL_FP16
)
{
std
::
vector
<
UMat
>
out_vec
=
OpenCLBackendWrapper
::
getUMatVector
(
ld
.
outputBlobsWrappers
);
outputvec
.
resize
(
out_vec
.
size
());
for
(
int
i
=
0
;
i
<
out_vec
.
size
();
i
++
)
convertFp16
(
out_vec
[
i
],
outputvec
[
i
]);
}
}
else
{
...
...
@@ -2194,6 +2227,16 @@ void Net::setPreferableTarget(int targetId)
if
(
impl
->
preferableTarget
!=
targetId
)
{
impl
->
preferableTarget
=
targetId
;
if
(
IS_DNN_OPENCL_TARGET
(
targetId
))
{
#ifndef HAVE_OPENCL
impl
->
preferableTarget
=
DNN_TARGET_CPU
;
#else
bool
fp16
=
ocl
::
Device
::
getDefault
().
isExtensionSupported
(
"cl_khr_fp16"
);
if
(
!
fp16
&&
targetId
==
DNN_TARGET_OPENCL_FP16
)
impl
->
preferableTarget
=
DNN_TARGET_OPENCL
;
#endif
}
impl
->
netWasAllocated
=
false
;
impl
->
clear
();
}
...
...
@@ -2222,7 +2265,17 @@ void Net::setInput(InputArray blob, const String& name)
ld
.
outputBlobs
.
resize
(
std
::
max
(
pin
.
oid
+
1
,
(
int
)
ld
.
requiredOutputs
.
size
())
);
ld
.
outputBlobsWrappers
.
resize
(
ld
.
outputBlobs
.
size
());
MatShape
prevShape
=
shape
(
ld
.
outputBlobs
[
pin
.
oid
]);
Mat
blob_
=
blob
.
getMat
();
Mat
blob_
;
if
(
impl
->
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
impl
->
preferableTarget
==
DNN_TARGET_OPENCL_FP16
)
{
Mat
blob_mat
=
blob
.
getMat
();
convertFp16
(
blob_mat
,
blob_
);
}
else
{
blob_
=
blob
.
getMat
();
}
bool
oldShape
=
prevShape
==
shape
(
blob_
);
if
(
oldShape
)
{
...
...
@@ -2747,6 +2800,43 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
if
(
preferableTarget
==
DNN_TARGET_OPENCL_FP16
&&
inputs_arr
.
depth
()
==
CV_16S
)
{
std
::
vector
<
UMat
>
inputs
;
std
::
vector
<
UMat
>
outputs
;
std
::
vector
<
UMat
>
internals
;
std
::
vector
<
UMat
>
orig_inputs
;
std
::
vector
<
UMat
>
orig_outputs
;
std
::
vector
<
UMat
>
orig_internals
;
inputs_arr
.
getUMatVector
(
orig_inputs
);
outputs_arr
.
getUMatVector
(
orig_outputs
);
internals_arr
.
getUMatVector
(
orig_internals
);
inputs
.
resize
(
orig_inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
orig_inputs
.
size
();
i
++
)
convertFp16
(
orig_inputs
[
i
],
inputs
[
i
]);
outputs
.
resize
(
orig_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
orig_outputs
.
size
();
i
++
)
outputs
[
i
].
create
(
shape
(
orig_outputs
[
i
]),
CV_32F
);
internals
.
resize
(
orig_internals
.
size
());
for
(
size_t
i
=
0
;
i
<
orig_internals
.
size
();
i
++
)
internals
[
i
].
create
(
shape
(
orig_internals
[
i
]),
CV_32F
);
forward
(
inputs
,
outputs
,
internals
);
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
convertFp16
(
outputs
[
i
],
orig_outputs
[
i
]);
// sync results back
outputs_arr
.
assign
(
orig_outputs
);
internals_arr
.
assign
(
orig_internals
);
return
;
}
std
::
vector
<
Mat
>
inpvec
;
std
::
vector
<
Mat
>
outputs
;
std
::
vector
<
Mat
>
internals
;
...
...
modules/dnn/src/precomp.hpp
View file @
329abb5b
...
...
@@ -64,6 +64,7 @@
namespace
cv
{
namespace
dnn
{
CV__DNN_EXPERIMENTAL_NS_BEGIN
#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16)
Mutex
&
getInitializationMutex
();
void
initializeLayerFactory
();
CV__DNN_EXPERIMENTAL_NS_END
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment