Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
62ba5d75
Commit
62ba5d75
authored
Jun 22, 2017
by
Dmitry Kurtaev
Committed by
Vadim Pisarevsky
Jun 22, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added Halide OpenCL target for deep learning networks (#1246)
parent
a4a8b84e
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
348 additions
and
193 deletions
+348
-193
dnn.hpp
modules/dnn/include/opencv2/dnn/dnn.hpp
+18
-3
perf_halide_net.cpp
modules/dnn/perf/perf_halide_net.cpp
+71
-54
dnn.cpp
modules/dnn/src/dnn.cpp
+94
-10
halide_scheduler.cpp
modules/dnn/src/halide_scheduler.cpp
+24
-0
batch_norm_layer.cpp
modules/dnn/src/layers/batch_norm_layer.cpp
+2
-0
concat_layer.cpp
modules/dnn/src/layers/concat_layer.cpp
+0
-23
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+7
-1
elementwise_layers.cpp
modules/dnn/src/layers/elementwise_layers.cpp
+1
-1
eltwise_layer.cpp
modules/dnn/src/layers/eltwise_layer.cpp
+0
-23
fully_connected_layer.cpp
modules/dnn/src/layers/fully_connected_layer.cpp
+0
-25
lrn_layer.cpp
modules/dnn/src/layers/lrn_layer.cpp
+7
-1
max_unpooling_layer.cpp
modules/dnn/src/layers/max_unpooling_layer.cpp
+0
-20
padding_layer.cpp
modules/dnn/src/layers/padding_layer.cpp
+24
-0
pooling_layer.cpp
modules/dnn/src/layers/pooling_layer.cpp
+7
-1
softmax_layer.cpp
modules/dnn/src/layers/softmax_layer.cpp
+0
-27
op_halide.cpp
modules/dnn/src/op_halide.cpp
+38
-4
op_halide.hpp
modules/dnn/src/op_halide.hpp
+2
-0
test_halide_nets.cpp
modules/dnn/test/test_halide_nets.cpp
+53
-0
No files found.
modules/dnn/include/opencv2/dnn/dnn.hpp
View file @
62ba5d75
...
...
@@ -69,7 +69,8 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*/
enum
Target
{
DNN_TARGET_CPU
DNN_TARGET_CPU
,
DNN_TARGET_OPENCL
};
/** @brief Initialize dnn module and built-in layers.
...
...
@@ -138,6 +139,11 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
virtual
~
BackendWrapper
();
//!< Virtual destructor to make polymorphism.
/**
* @brief Transfer data to CPU host memory.
*/
virtual
void
copyToHost
()
=
0
;
int
backendId
;
//!< Backend identifier.
int
targetId
;
//!< Target identifier.
};
...
...
@@ -220,14 +226,16 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @param[in] node Backend node with Halide functions.
* @param[in] inputs Blobs that will be used in forward invocations.
* @param[in] outputs Blobs that will be used in forward invocations.
* @see BackendNode
* @param[in] targetId Target identifier
* @see BackendNode, Target
*
* Layer don't use own Halide::Func members because we can have applied
* layers fusing. In this way the fused function should be scheduled.
*/
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
;
const
std
::
vector
<
Mat
>
&
outputs
,
int
targetId
)
const
;
/**
* @brief Implement layers fusing.
...
...
@@ -394,6 +402,13 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*/
void
setPreferableBackend
(
int
backendId
);
/**
* @brief Ask network to make computations on specific target device.
* @param[in] targetId target identifier.
* @see Target
*/
void
setPreferableTarget
(
int
targetId
);
/** @brief Sets the new value for the layer output blob
* @param name descriptor of the updating layer output blob.
* @param blob new blob.
...
...
modules/dnn/perf/perf_halide_net.cpp
View file @
62ba5d75
...
...
@@ -41,114 +41,131 @@ static void loadNet(std::string weights, std::string proto, std::string schedule
net
->
setInput
(
blobFromImage
(
input
,
1.0
,
false
));
net
->
setPreferableBackend
(
DNN_BACKEND_HALIDE
);
net
->
setPreferableTarget
(
targetId
);
net
->
setHalideScheduler
(
scheduler
);
net
->
forward
(
outputLayer
);
}
////////////////////////////////////////////////////////////////////////////////
// CPU target
////////////////////////////////////////////////////////////////////////////////
PERF_TEST
(
GoogLeNet
,
HalidePerfTest
)
{
try
{
Net
net
;
loadNet
(
"dnn/bvlc_googlenet
.caffemodel"
,
"dnn/bvlc_googlenet.prototxt"
,
loadNet
(
"dnn/bvlc_googlenet2
.caffemodel"
,
"dnn/bvlc_googlenet.prototxt"
,
""
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE_N
(
10
)
{
net
.
forward
();
}
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
catch
(
SkipTestException
&
e
)
{
throw
PerfSkipTestException
();
}
}
PERF_TEST
(
AlexNet
,
HalidePerfTest
)
{
try
{
Net
net
;
loadNet
(
"dnn/bvlc_alexnet.caffemodel"
,
"dnn/bvlc_alexnet.prototxt"
,
"dnn/halide_scheduler_alexnet.yml"
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE_N
(
10
)
{
net
.
forward
();
}
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
catch
(
SkipTestException
&
e
)
{
throw
PerfSkipTestException
();
}
}
PERF_TEST
(
ResNet50
,
HalidePerfTest
)
{
try
{
Net
net
;
loadNet
(
"dnn/ResNet-50-model.caffemodel"
,
"dnn/ResNet-50-deploy.prototxt"
,
"dnn/halide_scheduler_resnet_50.yml"
,
224
,
224
,
"prob"
,
"caffe"
,
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE_N
(
10
)
{
net
.
forward
();
}
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
catch
(
SkipTestException
&
e
)
{
throw
PerfSkipTestException
();
}
}
PERF_TEST
(
SqueezeNet_v1_1
,
HalidePerfTest
)
{
try
{
Net
net
;
loadNet
(
"dnn/squeezenet_v1_1.caffemodel"
,
"dnn/squeezenet_v1_1.prototxt"
,
"dnn/halide_scheduler_squeezenet_v1_1.yml"
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE_N
(
10
)
{
net
.
forward
();
}
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
catch
(
SkipTestException
&
e
)
{
throw
PerfSkipTestException
();
}
}
PERF_TEST
(
Inception_5h
,
HalidePerfTest
)
{
try
{
Net
net
;
loadNet
(
"dnn/tensorflow_inception_graph.pb"
,
""
,
"dnn/halide_scheduler_inception_5h.yml"
,
224
,
224
,
"softmax2"
,
"tensorflow"
,
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE_N
(
10
)
{
net
.
forward
(
"softmax2"
);
}
TEST_CYCLE
()
net
.
forward
(
"softmax2"
);
SANITY_CHECK_NOTHING
();
}
catch
(
SkipTestException
&
e
)
{
throw
PerfSkipTestException
();
}
}
PERF_TEST
(
ENet
,
HalidePerfTest
)
{
try
{
Net
net
;
loadNet
(
"dnn/Enet-model-best.net"
,
""
,
"dnn/halide_scheduler_enet.yml"
,
512
,
256
,
"l367_Deconvolution"
,
"torch"
,
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
////////////////////////////////////////////////////////////////////////////////
// OpenCL target
////////////////////////////////////////////////////////////////////////////////
PERF_TEST
(
GoogLeNet_opencl
,
HalidePerfTest
)
{
Net
net
;
loadNet
(
"dnn/bvlc_googlenet.caffemodel"
,
"dnn/bvlc_googlenet.prototxt"
,
""
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
TEST_CYCLE_N
(
10
)
{
net
.
forward
(
"l367_Deconvolution"
);
}
PERF_TEST
(
AlexNet_opencl
,
HalidePerfTest
)
{
Net
net
;
loadNet
(
"dnn/bvlc_alexnet.caffemodel"
,
"dnn/bvlc_alexnet.prototxt"
,
"dnn/halide_scheduler_opencl_alexnet.yml"
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
PERF_TEST
(
ResNet50_opencl
,
HalidePerfTest
)
{
Net
net
;
loadNet
(
"dnn/ResNet-50-model.caffemodel"
,
"dnn/ResNet-50-deploy.prototxt"
,
"dnn/halide_scheduler_opencl_resnet_50.yml"
,
224
,
224
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
PERF_TEST
(
SqueezeNet_v1_1_opencl
,
HalidePerfTest
)
{
Net
net
;
loadNet
(
"dnn/squeezenet_v1_1.caffemodel"
,
"dnn/squeezenet_v1_1.prototxt"
,
"dnn/halide_scheduler_opencl_squeezenet_v1_1.yml"
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
PERF_TEST
(
Inception_5h_opencl
,
HalidePerfTest
)
{
Net
net
;
loadNet
(
"dnn/tensorflow_inception_graph.pb"
,
""
,
"dnn/halide_scheduler_opencl_inception_5h.yml"
,
224
,
224
,
"softmax2"
,
"tensorflow"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
(
"softmax2"
);
SANITY_CHECK_NOTHING
();
}
PERF_TEST
(
ENet_opencl
,
HalidePerfTest
)
{
Net
net
;
loadNet
(
"dnn/Enet-model-best.net"
,
""
,
"dnn/halide_scheduler_opencl_enet.yml"
,
512
,
256
,
"l367_Deconvolution"
,
"torch"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
catch
(
SkipTestException
&
e
)
{
throw
PerfSkipTestException
();
}
}
#endif // HAVE_HALIDE
...
...
modules/dnn/src/dnn.cpp
View file @
62ba5d75
...
...
@@ -205,7 +205,7 @@ struct LayerPin
class
BackendWrapManager
{
public
:
Ptr
<
BackendWrapper
>
wrap
(
const
Mat
&
m
,
int
backendId
,
int
targetId
=
DNN_TARGET_CPU
)
Ptr
<
BackendWrapper
>
wrap
(
const
Mat
&
m
,
int
backendId
,
int
targetId
)
{
CV_Assert
(
backendId
!=
DNN_BACKEND_DEFAULT
);
...
...
@@ -236,7 +236,7 @@ public:
}
std
::
vector
<
Ptr
<
BackendWrapper
>
>
wrap
(
const
std
::
vector
<
Mat
*>&
mats
,
int
backendId
,
int
targetId
=
DNN_TARGET_CPU
)
int
backendId
,
int
targetId
)
{
const
int
num
=
mats
.
size
();
std
::
vector
<
Ptr
<
BackendWrapper
>
>
dst
(
num
);
...
...
@@ -248,7 +248,7 @@ public:
}
std
::
vector
<
Ptr
<
BackendWrapper
>
>
wrap
(
const
std
::
vector
<
Mat
>&
mats
,
int
backendId
,
int
targetId
=
DNN_TARGET_CPU
)
int
backendId
,
int
targetId
)
{
const
int
num
=
mats
.
size
();
std
::
vector
<
Ptr
<
BackendWrapper
>
>
dst
(
num
);
...
...
@@ -617,6 +617,7 @@ struct Net::Impl
lastLayerId
=
1
;
netWasAllocated
=
false
;
preferableBackend
=
DNN_BACKEND_DEFAULT
;
preferableTarget
=
DNN_TARGET_CPU
;
}
Ptr
<
DataLayer
>
netInputLayer
;
...
...
@@ -626,6 +627,7 @@ struct Net::Impl
std
::
map
<
String
,
int
>
layerNameToId
;
BlobManager
blobManager
;
int
preferableBackend
;
int
preferableTarget
;
String
halideConfigFile
;
// Backend-specific wrapping manager.
BackendWrapManager
backendWrapper
;
...
...
@@ -652,10 +654,11 @@ struct Net::Impl
{
// Use automatic scheduling provided by layer.
layer
->
applyHalideScheduler
(
ld
.
backendNodes
[
DNN_BACKEND_HALIDE
],
ld
.
inputBlobs
,
ld
.
outputBlobs
);
ld
.
inputBlobs
,
ld
.
outputBlobs
,
preferableTarget
);
}
dnn
::
compileHalide
(
ld
.
outputBlobs
,
ld
.
backendNodes
[
DNN_BACKEND_HALIDE
],
DNN_TARGET_CPU
);
preferableTarget
);
}
}
}
...
...
@@ -859,7 +862,10 @@ struct Net::Impl
{
backendWrapper
.
reset
();
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
)
{
CV_Assert
(
preferableTarget
==
DNN_TARGET_CPU
);
return
;
}
// Iterator to current layer.
MapIdToLayerData
::
iterator
it
=
layers
.
begin
();
...
...
@@ -905,7 +911,8 @@ struct Net::Impl
// No layers fusion.
ldTop
.
skipFlags
[
preferableBackend
]
=
false
;
std
::
vector
<
Ptr
<
BackendWrapper
>
>
inputs
=
backendWrapper
.
wrap
(
ldTop
.
inputBlobs
,
preferableBackend
);
backendWrapper
.
wrap
(
ldTop
.
inputBlobs
,
preferableBackend
,
preferableTarget
);
if
(
preferableBackend
==
DNN_BACKEND_HALIDE
)
{
ldTop
.
backendNodes
[
DNN_BACKEND_HALIDE
]
=
layerTop
->
initHalide
(
inputs
);
...
...
@@ -1040,7 +1047,7 @@ struct Net::Impl
else
if
(
!
ld
.
skipFlags
[
preferableBackend
])
{
std
::
vector
<
Ptr
<
BackendWrapper
>
>
outputs
=
backendWrapper
.
wrap
(
ld
.
outputBlobs
,
preferableBackend
);
backendWrapper
.
wrap
(
ld
.
outputBlobs
,
preferableBackend
,
preferableTarget
);
Ptr
<
BackendNode
>
node
=
ld
.
backendNodes
[
preferableBackend
];
if
(
preferableBackend
==
DNN_BACKEND_HALIDE
)
{
...
...
@@ -1154,6 +1161,16 @@ struct Net::Impl
CV_Error
(
Error
::
StsOutOfRange
,
"Layer
\"
"
+
ld
.
name
+
"
\"
produce only "
+
toString
(
ld
.
outputBlobs
.
size
())
+
" outputs, the #"
+
toString
(
pin
.
oid
)
+
" was requsted"
);
}
if
(
preferableBackend
!=
DNN_BACKEND_DEFAULT
)
{
// Transfer data to CPU if it's require.
backendWrapper
.
wrap
(
ld
.
outputBlobs
[
pin
.
oid
],
preferableBackend
,
preferableTarget
)
->
copyToHost
();
}
else
{
CV_Assert
(
preferableTarget
==
DNN_TARGET_CPU
);
}
return
ld
.
outputBlobs
[
pin
.
oid
];
}
...
...
@@ -1314,6 +1331,13 @@ void Net::setPreferableBackend(int backendId)
impl
->
preferableBackend
=
backendId
;
}
void
Net
::
setPreferableTarget
(
int
targetId
)
{
impl
->
netWasAllocated
=
impl
->
netWasAllocated
&&
impl
->
preferableTarget
==
targetId
;
impl
->
preferableTarget
=
targetId
;
}
void
Net
::
setInputsNames
(
const
std
::
vector
<
String
>
&
inputBlobNames
)
{
impl
->
netInputLayer
->
setNames
(
inputBlobNames
);
...
...
@@ -1702,10 +1726,70 @@ Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
}
void
Layer
::
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
const
std
::
vector
<
Mat
>
&
outputs
,
int
targetId
)
const
{
CV_Error
(
Error
::
StsNotImplemented
,
"Scheduling of "
+
type
+
" layers is not implemented."
);
#ifdef HAVE_HALIDE
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
co
(
"co"
),
ci
(
"ci"
),
xo
(
"xo"
),
xi
(
"xi"
),
yo
(
"yo"
),
yi
(
"yi"
),
tile
(
"tile"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
if
(
targetId
==
DNN_TARGET_CPU
)
{
if
(
outW
==
1
&&
outH
==
1
)
{
if
(
outC
+
outN
==
1
)
return
;
if
(
outC
>
8
)
top
.
split
(
c
,
co
,
ci
,
8
)
.
fuse
(
x
,
y
,
tile
).
fuse
(
co
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
)
.
vectorize
(
ci
,
8
);
else
top
.
fuse
(
x
,
y
,
tile
).
fuse
(
c
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
);
}
else
{
if
(
outH
>
2
)
{
top
.
reorder
(
x
,
c
,
y
)
.
split
(
y
,
yo
,
yi
,
2
)
.
fuse
(
yo
,
n
,
tile
)
.
parallel
(
tile
)
.
unroll
(
yi
)
.
vectorize
(
x
,
outW
>=
16
?
16
:
outW
);
}
}
}
else
if
(
targetId
==
DNN_TARGET_OPENCL
)
{
int
c_split
=
outC
>
8
?
(
outC
>
16
?
8
:
4
)
:
outC
;
if
(
outW
==
1
&&
outH
==
1
)
{
top
.
split
(
c
,
co
,
ci
,
c_split
)
.
fuse
(
x
,
y
,
tile
).
fuse
(
co
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
gpu_blocks
(
tile
)
.
gpu_threads
(
ci
);
}
else
{
int
x_split
=
outW
>
8
?
(
outW
>=
32
?
16
:
8
)
:
outW
;
int
y_split
=
outH
>
8
?
(
outH
>=
32
?
16
:
8
)
:
outH
;
top
.
split
(
x
,
xo
,
xi
,
x_split
).
split
(
y
,
yo
,
yi
,
y_split
)
.
split
(
c
,
co
,
ci
,
c_split
)
.
gpu_blocks
(
xo
,
yo
,
co
)
.
gpu_threads
(
xi
,
yi
)
.
reorder
(
xi
,
yi
,
ci
,
xo
,
yo
,
co
)
.
vectorize
(
ci
);
}
}
else
CV_Error
(
Error
::
StsNotImplemented
,
"Unknown target identifier"
);
#endif // HAVE_HALIDE
}
Ptr
<
BackendNode
>
Layer
::
tryAttach
(
const
Ptr
<
BackendNode
>&
node
)
...
...
modules/dnn/src/halide_scheduler.cpp
View file @
62ba5d75
...
...
@@ -143,6 +143,26 @@ static void applyComputeRoot(const FileNode& directive, Halide::Func& func)
func
.
compute_root
();
}
static
void
applyGpuBlocks
(
const
FileNode
&
directive
,
Halide
::
Func
&
func
)
{
std
::
string
varName
;
for
(
int
i
=
0
,
n
=
directive
.
size
();
i
<
n
;
++
i
)
{
directive
[
i
]
>>
varName
;
func
.
gpu_blocks
(
Halide
::
Var
(
varName
));
}
}
static
void
applyGpuThreads
(
const
FileNode
&
directive
,
Halide
::
Func
&
func
)
{
std
::
string
varName
;
for
(
int
i
=
0
,
n
=
directive
.
size
();
i
<
n
;
++
i
)
{
directive
[
i
]
>>
varName
;
func
.
gpu_threads
(
Halide
::
Var
(
varName
));
}
}
static
void
apply
(
const
FileNode
&
directives
,
Halide
::
Func
&
func
,
std
::
map
<
std
::
string
,
Halide
::
Func
>&
funcsMap
,
const
FileNode
&
params
)
...
...
@@ -167,6 +187,10 @@ static void apply(const FileNode& directives, Halide::Func& func,
applyComputeAt
(
directive
,
func
,
funcsMap
);
else
if
(
directive
.
name
()
==
"compute_root"
)
applyComputeRoot
(
directive
,
func
);
else
if
(
directive
.
name
()
==
"gpu_blocks"
)
applyGpuBlocks
(
directive
,
func
);
else
if
(
directive
.
name
()
==
"gpu_threads"
)
applyGpuThreads
(
directive
,
func
);
else
CV_Error
(
Error
::
StsNotImplemented
,
"Scheduling directive "
+
directive
.
name
()
+
" is not implemented."
);
...
...
modules/dnn/src/layers/batch_norm_layer.cpp
View file @
62ba5d75
...
...
@@ -157,6 +157,8 @@ public:
bias
(
i
)
=
(
hasBias
?
biasData
[
i
]
:
0.0
f
)
-
weights
(
i
)
*
meanData
[
i
]
*
varMeanScale
;
}
weights
.
set_host_dirty
();
bias
.
set_host_dirty
();
top
(
x
,
y
,
c
,
n
)
=
input
*
weights
(
c
)
+
bias
(
c
);
return
top
;
}
...
...
modules/dnn/src/layers/concat_layer.cpp
View file @
62ba5d75
...
...
@@ -130,29 +130,6 @@ public:
#endif // HAVE_HALIDE
return
Ptr
<
BackendNode
>
();
}
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
{
#ifdef HAVE_HALIDE
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
yi
(
"yi"
),
yo
(
"yo"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
if
(
outW
==
1
||
outH
<=
2
)
return
;
top
.
reorder
(
x
,
c
,
y
)
.
split
(
y
,
yo
,
yi
,
2
)
.
fuse
(
yo
,
n
,
tile
)
.
parallel
(
tile
)
.
unroll
(
yi
)
.
vectorize
(
x
,
outW
>=
16
?
16
:
outW
);
#endif // HAVE_HALIDE
}
};
Ptr
<
ConcatLayer
>
ConcatLayer
::
create
(
const
LayerParams
&
params
)
...
...
modules/dnn/src/layers/convolution_layer.cpp
View file @
62ba5d75
...
...
@@ -99,9 +99,15 @@ public:
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
const
std
::
vector
<
Mat
>
&
outputs
,
int
targetId
)
const
{
#ifdef HAVE_HALIDE
if
(
targetId
!=
DNN_TARGET_CPU
)
{
Layer
::
applyHalideScheduler
(
node
,
inputs
,
outputs
,
targetId
);
return
;
}
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
yi
(
"yi"
),
yo
(
"yo"
),
co
(
"co"
),
ci
(
"ci"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
[
1
];
Halide
::
Func
&
padded_input
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
[
0
];
...
...
modules/dnn/src/layers/elementwise_layers.cpp
View file @
62ba5d75
...
...
@@ -422,7 +422,7 @@ struct ChannelsPReLUFunctor
{
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
);
auto
weights
=
wrapToHalideBuffer
(
scale
,
{(
int
)
scale
.
total
()});
top
(
x
,
y
,
c
,
n
)
=
select
(
input
>
0.0
f
,
input
,
weights
(
c
)
*
input
);
top
(
x
,
y
,
c
,
n
)
=
select
(
input
>
=
0.0
f
,
input
,
weights
(
c
)
*
input
);
}
#endif // HAVE_HALIDE
...
...
modules/dnn/src/layers/eltwise_layer.cpp
View file @
62ba5d75
...
...
@@ -198,29 +198,6 @@ public:
return
Ptr
<
BackendNode
>
();
}
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
{
#ifdef HAVE_HALIDE
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
yi
(
"yi"
),
yo
(
"yo"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
if
(
outW
==
1
||
outH
<=
2
)
return
;
top
.
reorder
(
x
,
c
,
y
)
.
split
(
y
,
yo
,
yi
,
2
)
.
fuse
(
yo
,
n
,
tile
)
.
parallel
(
tile
)
.
unroll
(
yi
)
.
vectorize
(
x
,
outW
>=
16
?
16
:
outW
);
#endif // HAVE_HALIDE
}
virtual
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
const
std
::
vector
<
MatShape
>
&
outputs
)
const
{
...
...
modules/dnn/src/layers/fully_connected_layer.cpp
View file @
62ba5d75
...
...
@@ -252,31 +252,6 @@ public:
return
Ptr
<
BackendNode
>
();
}
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
{
#ifdef HAVE_HALIDE
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
co
(
"co"
),
ci
(
"ci"
),
tile
(
"tile"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
if
(
outC
+
outN
==
1
)
return
;
if
(
outC
>
8
)
top
.
split
(
c
,
co
,
ci
,
8
)
.
fuse
(
x
,
y
,
tile
).
fuse
(
co
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
)
.
vectorize
(
ci
,
8
);
else
top
.
fuse
(
x
,
y
,
tile
).
fuse
(
c
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
);
#endif // HAVE_HALIDE
}
virtual
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
const
std
::
vector
<
MatShape
>
&
outputs
)
const
{
...
...
modules/dnn/src/layers/lrn_layer.cpp
View file @
62ba5d75
...
...
@@ -272,9 +272,15 @@ public:
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
const
std
::
vector
<
Mat
>
&
outputs
,
int
targetId
)
const
{
#ifdef HAVE_HALIDE
if
(
targetId
!=
DNN_TARGET_CPU
)
{
Layer
::
applyHalideScheduler
(
node
,
inputs
,
outputs
,
targetId
);
return
;
}
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
...
...
modules/dnn/src/layers/max_unpooling_layer.cpp
View file @
62ba5d75
...
...
@@ -117,26 +117,6 @@ public:
#endif // HAVE_HALIDE
return
Ptr
<
BackendNode
>
();
}
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
{
#ifdef HAVE_HALIDE
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
yi
(
"yi"
),
yo
(
"yo"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
top
.
reorder
(
x
,
c
,
y
)
.
split
(
y
,
yo
,
yi
,
2
)
.
fuse
(
yo
,
n
,
tile
)
.
parallel
(
tile
)
.
unroll
(
yi
)
.
vectorize
(
x
,
outW
>=
16
?
16
:
outW
);
#endif // HAVE_HALIDE
}
};
Ptr
<
MaxUnpoolLayer
>
MaxUnpoolLayer
::
create
(
const
LayerParams
&
params
)
...
...
modules/dnn/src/layers/padding_layer.cpp
View file @
62ba5d75
...
...
@@ -10,6 +10,7 @@ Implementation of padding layer, which adds paddings to input blob.
*/
#include "../precomp.hpp"
#include "op_halide.hpp"
#include <vector>
namespace
cv
...
...
@@ -52,6 +53,12 @@ public:
return
false
;
}
virtual
bool
supportBackend
(
int
backendId
)
{
return
backendId
==
DNN_BACKEND_DEFAULT
||
backendId
==
DNN_BACKEND_HALIDE
&&
haveHalide
();
}
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
,
std
::
vector
<
Mat
>
&
internals
)
{
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
...
...
@@ -94,6 +101,23 @@ public:
return
inputDims
>
0
&&
(
int
)
shape
.
size
()
>
inputDims
?
paddingDim
+
1
:
paddingDim
;
}
virtual
Ptr
<
BackendNode
>
initHalide
(
const
std
::
vector
<
Ptr
<
BackendWrapper
>
>
&
inputs
)
{
#ifdef HAVE_HALIDE
int
inW
,
inH
,
inC
,
inN
;
Halide
::
Buffer
<
float
>
inputBuffer
=
halideBuffer
(
inputs
[
0
]);
getCanonicalSize
(
inputBuffer
,
&
inW
,
&
inH
,
&
inC
,
&
inN
);
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
);
Halide
::
Func
top
=
(
name
.
empty
()
?
Halide
::
Func
()
:
Halide
::
Func
(
name
));
Halide
::
Func
padded
=
Halide
::
BoundaryConditions
::
constant_exterior
(
inputBuffer
,
paddingValue
);
top
(
x
,
y
,
c
,
n
)
=
padded
(
x
,
y
,
c
,
n
);
return
Ptr
<
BackendNode
>
(
new
HalideBackendNode
(
top
));
#endif // HAVE_HALIDE
return
Ptr
<
BackendNode
>
();
}
int
paddingDim
,
padding
,
inputDims
,
index
;
float
paddingValue
;
};
...
...
modules/dnn/src/layers/pooling_layer.cpp
View file @
62ba5d75
...
...
@@ -388,9 +388,15 @@ public:
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
const
std
::
vector
<
Mat
>
&
outputs
,
int
targetId
)
const
{
#ifdef HAVE_HALIDE
if
(
targetId
!=
DNN_TARGET_CPU
)
{
Layer
::
applyHalideScheduler
(
node
,
inputs
,
outputs
,
targetId
);
return
;
}
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
xi
(
"xi"
),
yi
(
"yi"
),
ci
(
"ci"
),
xo
(
"xo"
),
yo
(
"yo"
),
co
(
"co"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
...
...
modules/dnn/src/layers/softmax_layer.cpp
View file @
62ba5d75
...
...
@@ -187,33 +187,6 @@ public:
return
Ptr
<
BackendNode
>
();
}
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
{
#ifdef HAVE_HALIDE
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
// Most common case when SoftMax is a layer after fully-connected.
// So we just schedule it in the same way.
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
co
(
"co"
),
ci
(
"ci"
),
tile
(
"tile"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
if
(
outC
+
outN
==
1
)
return
;
if
(
outC
>
8
)
top
.
split
(
c
,
co
,
ci
,
8
)
.
fuse
(
x
,
y
,
tile
).
fuse
(
co
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
)
.
vectorize
(
ci
,
8
);
else
top
.
fuse
(
x
,
y
,
tile
).
fuse
(
c
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
);
#endif // HAVE_HALIDE
}
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
const
std
::
vector
<
MatShape
>
&
outputs
)
const
{
...
...
modules/dnn/src/op_halide.cpp
View file @
62ba5d75
...
...
@@ -7,6 +7,10 @@
#include "op_halide.hpp"
#ifdef HAVE_HALIDE
#include <HalideRuntimeOpenCL.h>
#endif // HAVE_HALIDE
namespace
cv
{
namespace
dnn
...
...
@@ -72,7 +76,15 @@ HalideBackendWrapper::HalideBackendWrapper(int targetId, const cv::Mat& m)
:
BackendWrapper
(
DNN_BACKEND_HALIDE
,
targetId
)
{
buffer
=
wrapToHalideBuffer
(
m
);
if
(
targetId
!=
DNN_TARGET_CPU
)
if
(
targetId
==
DNN_TARGET_CPU
)
{
return
;
}
else
if
(
targetId
==
DNN_TARGET_OPENCL
)
{
buffer
.
copy_to_device
(
halide_opencl_device_interface
());
}
else
CV_Error
(
Error
::
StsNotImplemented
,
"Unknown target identifier"
);
}
...
...
@@ -80,15 +92,32 @@ HalideBackendWrapper::HalideBackendWrapper(const Ptr<BackendWrapper>& base,
const
MatShape
&
shape
)
:
BackendWrapper
(
DNN_BACKEND_HALIDE
,
base
->
targetId
)
{
if
(
base
->
targetId
!=
DNN_TARGET_CPU
)
CV_Error
(
Error
::
StsNotImplemented
,
"Unknown target identifier"
);
int
w
,
h
,
c
,
n
;
getCanonicalSize
(
shape
,
&
w
,
&
h
,
&
c
,
&
n
);
Halide
::
Buffer
<
float
>
baseBuffer
=
halideBuffer
(
base
);
buffer
=
Halide
::
Buffer
<
float
>
((
float
*
)
baseBuffer
.
raw_buffer
()
->
host
,
{
w
,
h
,
c
,
n
});
if
(
baseBuffer
.
has_device_allocation
())
{
buffer
.
raw_buffer
()
->
device
=
baseBuffer
.
raw_buffer
()
->
device
;
buffer
.
raw_buffer
()
->
device_interface
=
baseBuffer
.
raw_buffer
()
->
device_interface
;
buffer
.
set_device_dirty
();
}
else
{
buffer
.
set_host_dirty
();
// Indicate that data is on CPU.
CV_Assert
(
targetId
==
DNN_TARGET_CPU
);
}
}
void
HalideBackendWrapper
::
copyToHost
()
{
CV_Assert
(
targetId
==
DNN_TARGET_CPU
||
buffer
.
device_dirty
());
if
(
buffer
.
device_dirty
())
{
buffer
.
device_sync
();
buffer
.
copy_to_host
();
}
}
#endif // HAVE_HALIDE
...
...
@@ -144,6 +173,11 @@ void compileHalide(std::vector<Mat> &outputs, Ptr<BackendNode>& node, int target
Halide
::
Target
target
=
Halide
::
get_host_target
();
target
.
set_feature
(
Halide
::
Target
::
NoAsserts
);
if
(
targetId
==
DNN_TARGET_OPENCL
)
{
target
.
set_feature
(
Halide
::
Target
::
OpenCL
);
}
CV_Assert
(
target
.
supported
());
top
.
compile_jit
(
target
);
#endif // HAVE_HALIDE
}
...
...
modules/dnn/src/op_halide.hpp
View file @
62ba5d75
...
...
@@ -57,6 +57,8 @@ namespace dnn
HalideBackendWrapper
(
const
Ptr
<
BackendWrapper
>&
base
,
const
MatShape
&
shape
);
virtual
void
copyToHost
();
Halide
::
Buffer
<
float
>
buffer
;
};
#endif // HAVE_HALIDE
...
...
modules/dnn/test/test_halide_nets.cpp
View file @
62ba5d75
...
...
@@ -48,6 +48,7 @@ static void test(const std::string& weights, const std::string& proto,
netHalide
.
setInput
(
blobFromImage
(
input
.
clone
(),
1.0
f
,
false
));
netHalide
.
setPreferableBackend
(
DNN_BACKEND_HALIDE
);
netHalide
.
setPreferableTarget
(
targetId
);
netHalide
.
setHalideScheduler
(
scheduler
);
outputHalide
=
netHalide
.
forward
(
outputLayer
).
clone
();
...
...
@@ -62,15 +63,20 @@ static void test(const std::string& weights, const std::string& proto,
// Swap backends.
netHalide
.
setPreferableBackend
(
DNN_BACKEND_DEFAULT
);
netHalide
.
setPreferableTarget
(
DNN_TARGET_CPU
);
outputDefault
=
netHalide
.
forward
(
outputLayer
).
clone
();
netDefault
.
setPreferableBackend
(
DNN_BACKEND_HALIDE
);
netDefault
.
setPreferableTarget
(
targetId
);
netDefault
.
setHalideScheduler
(
scheduler
);
outputHalide
=
netDefault
.
forward
(
outputLayer
).
clone
();
normAssert
(
outputDefault
,
outputHalide
);
}
////////////////////////////////////////////////////////////////////////////////
// CPU target
////////////////////////////////////////////////////////////////////////////////
TEST
(
Reproducibility_GoogLeNet_Halide
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/bvlc_googlenet.caffemodel"
,
false
),
...
...
@@ -115,6 +121,53 @@ TEST(Reproducibility_ENet_Halide, Accuracy)
findDataFile
(
"dnn/halide_scheduler_enet.yml"
,
false
),
512
,
512
,
"l367_Deconvolution"
,
"torch"
,
DNN_TARGET_CPU
);
};
////////////////////////////////////////////////////////////////////////////////
// OpenCL target
////////////////////////////////////////////////////////////////////////////////
TEST
(
Reproducibility_GoogLeNet_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/bvlc_googlenet.caffemodel"
,
false
),
findDataFile
(
"dnn/bvlc_googlenet.prototxt"
,
false
),
""
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
);
};
TEST
(
Reproducibility_AlexNet_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/bvlc_alexnet.caffemodel"
,
false
),
findDataFile
(
"dnn/bvlc_alexnet.prototxt"
,
false
),
findDataFile
(
"dnn/halide_scheduler_opencl_alexnet.yml"
,
false
),
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
);
};
TEST
(
Reproducibility_ResNet_50_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/ResNet-50-model.caffemodel"
,
false
),
findDataFile
(
"dnn/ResNet-50-deploy.prototxt"
,
false
),
findDataFile
(
"dnn/halide_scheduler_opencl_resnet_50.yml"
,
false
),
224
,
224
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
);
};
TEST
(
Reproducibility_SqueezeNet_v1_1_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/squeezenet_v1_1.caffemodel"
,
false
),
findDataFile
(
"dnn/squeezenet_v1_1.prototxt"
,
false
),
findDataFile
(
"dnn/halide_scheduler_opencl_squeezenet_v1_1.yml"
,
false
),
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
);
};
TEST
(
Reproducibility_Inception_5h_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/tensorflow_inception_graph.pb"
,
false
),
""
,
findDataFile
(
"dnn/halide_scheduler_opencl_inception_5h.yml"
,
false
),
224
,
224
,
"softmax2"
,
"tensorflow"
,
DNN_TARGET_OPENCL
);
};
TEST
(
Reproducibility_ENet_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/Enet-model-best.net"
,
false
),
""
,
findDataFile
(
"dnn/halide_scheduler_opencl_enet.yml"
,
false
),
512
,
512
,
"l367_Deconvolution"
,
"torch"
,
DNN_TARGET_OPENCL
);
};
#endif // HAVE_HALIDE
}
// namespace cvtest
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment