Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
62ba5d75
Commit
62ba5d75
authored
Jun 22, 2017
by
Dmitry Kurtaev
Committed by
Vadim Pisarevsky
Jun 22, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added Halide OpenCL target for deep learning networks (#1246)
parent
a4a8b84e
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
377 additions
and
222 deletions
+377
-222
dnn.hpp
modules/dnn/include/opencv2/dnn/dnn.hpp
+18
-3
perf_halide_net.cpp
modules/dnn/perf/perf_halide_net.cpp
+99
-82
dnn.cpp
modules/dnn/src/dnn.cpp
+94
-10
halide_scheduler.cpp
modules/dnn/src/halide_scheduler.cpp
+24
-0
batch_norm_layer.cpp
modules/dnn/src/layers/batch_norm_layer.cpp
+2
-0
concat_layer.cpp
modules/dnn/src/layers/concat_layer.cpp
+0
-23
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+7
-1
elementwise_layers.cpp
modules/dnn/src/layers/elementwise_layers.cpp
+1
-1
eltwise_layer.cpp
modules/dnn/src/layers/eltwise_layer.cpp
+0
-23
fully_connected_layer.cpp
modules/dnn/src/layers/fully_connected_layer.cpp
+0
-25
lrn_layer.cpp
modules/dnn/src/layers/lrn_layer.cpp
+7
-1
max_unpooling_layer.cpp
modules/dnn/src/layers/max_unpooling_layer.cpp
+0
-20
padding_layer.cpp
modules/dnn/src/layers/padding_layer.cpp
+24
-0
pooling_layer.cpp
modules/dnn/src/layers/pooling_layer.cpp
+7
-1
softmax_layer.cpp
modules/dnn/src/layers/softmax_layer.cpp
+0
-27
op_halide.cpp
modules/dnn/src/op_halide.cpp
+39
-5
op_halide.hpp
modules/dnn/src/op_halide.hpp
+2
-0
test_halide_nets.cpp
modules/dnn/test/test_halide_nets.cpp
+53
-0
No files found.
modules/dnn/include/opencv2/dnn/dnn.hpp
View file @
62ba5d75
...
@@ -69,7 +69,8 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
...
@@ -69,7 +69,8 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*/
*/
enum
Target
enum
Target
{
{
DNN_TARGET_CPU
DNN_TARGET_CPU
,
DNN_TARGET_OPENCL
};
};
/** @brief Initialize dnn module and built-in layers.
/** @brief Initialize dnn module and built-in layers.
...
@@ -138,6 +139,11 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
...
@@ -138,6 +139,11 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
virtual
~
BackendWrapper
();
//!< Virtual destructor to make polymorphism.
virtual
~
BackendWrapper
();
//!< Virtual destructor to make polymorphism.
/**
* @brief Transfer data to CPU host memory.
*/
virtual
void
copyToHost
()
=
0
;
int
backendId
;
//!< Backend identifier.
int
backendId
;
//!< Backend identifier.
int
targetId
;
//!< Target identifier.
int
targetId
;
//!< Target identifier.
};
};
...
@@ -220,14 +226,16 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
...
@@ -220,14 +226,16 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @param[in] node Backend node with Halide functions.
* @param[in] node Backend node with Halide functions.
* @param[in] inputs Blobs that will be used in forward invocations.
* @param[in] inputs Blobs that will be used in forward invocations.
* @param[in] outputs Blobs that will be used in forward invocations.
* @param[in] outputs Blobs that will be used in forward invocations.
* @see BackendNode
* @param[in] targetId Target identifier
* @see BackendNode, Target
*
*
* Layer don't use own Halide::Func members because we can have applied
* Layer don't use own Halide::Func members because we can have applied
* layers fusing. In this way the fused function should be scheduled.
* layers fusing. In this way the fused function should be scheduled.
*/
*/
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
;
const
std
::
vector
<
Mat
>
&
outputs
,
int
targetId
)
const
;
/**
/**
* @brief Implement layers fusing.
* @brief Implement layers fusing.
...
@@ -394,6 +402,13 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
...
@@ -394,6 +402,13 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*/
*/
void
setPreferableBackend
(
int
backendId
);
void
setPreferableBackend
(
int
backendId
);
/**
* @brief Ask network to make computations on specific target device.
* @param[in] targetId target identifier.
* @see Target
*/
void
setPreferableTarget
(
int
targetId
);
/** @brief Sets the new value for the layer output blob
/** @brief Sets the new value for the layer output blob
* @param name descriptor of the updating layer output blob.
* @param name descriptor of the updating layer output blob.
* @param blob new blob.
* @param blob new blob.
...
...
modules/dnn/perf/perf_halide_net.cpp
View file @
62ba5d75
...
@@ -41,114 +41,131 @@ static void loadNet(std::string weights, std::string proto, std::string schedule
...
@@ -41,114 +41,131 @@ static void loadNet(std::string weights, std::string proto, std::string schedule
net
->
setInput
(
blobFromImage
(
input
,
1.0
,
false
));
net
->
setInput
(
blobFromImage
(
input
,
1.0
,
false
));
net
->
setPreferableBackend
(
DNN_BACKEND_HALIDE
);
net
->
setPreferableBackend
(
DNN_BACKEND_HALIDE
);
net
->
setPreferableTarget
(
targetId
);
net
->
setHalideScheduler
(
scheduler
);
net
->
setHalideScheduler
(
scheduler
);
net
->
forward
(
outputLayer
);
net
->
forward
(
outputLayer
);
}
}
////////////////////////////////////////////////////////////////////////////////
// CPU target
////////////////////////////////////////////////////////////////////////////////
PERF_TEST
(
GoogLeNet
,
HalidePerfTest
)
PERF_TEST
(
GoogLeNet
,
HalidePerfTest
)
{
{
try
{
Net
net
;
Net
net
;
loadNet
(
"dnn/bvlc_googlenet2.caffemodel"
,
"dnn/bvlc_googlenet.prototxt"
,
loadNet
(
"dnn/bvlc_googlenet.caffemodel"
,
"dnn/bvlc_googlenet.prototxt"
,
""
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_CPU
,
&
net
);
""
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
TEST_CYCLE_N
(
10
)
{
net
.
forward
();
}
SANITY_CHECK_NOTHING
();
}
catch
(
SkipTestException
&
e
)
{
throw
PerfSkipTestException
();
}
}
}
PERF_TEST
(
AlexNet
,
HalidePerfTest
)
PERF_TEST
(
AlexNet
,
HalidePerfTest
)
{
{
try
{
Net
net
;
Net
net
;
loadNet
(
"dnn/bvlc_alexnet.caffemodel"
,
"dnn/bvlc_alexnet.prototxt"
,
loadNet
(
"dnn/bvlc_alexnet.caffemodel"
,
"dnn/bvlc_alexnet.prototxt"
,
"dnn/halide_scheduler_alexnet.yml"
,
227
,
227
,
"prob"
,
"caffe"
,
"dnn/halide_scheduler_alexnet.yml"
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_CPU
,
&
net
);
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
TEST_CYCLE_N
(
10
)
{
net
.
forward
();
}
SANITY_CHECK_NOTHING
();
}
catch
(
SkipTestException
&
e
)
{
throw
PerfSkipTestException
();
}
}
}
PERF_TEST
(
ResNet50
,
HalidePerfTest
)
PERF_TEST
(
ResNet50
,
HalidePerfTest
)
{
{
try
{
Net
net
;
Net
net
;
loadNet
(
"dnn/ResNet-50-model.caffemodel"
,
"dnn/ResNet-50-deploy.prototxt"
,
loadNet
(
"dnn/ResNet-50-model.caffemodel"
,
"dnn/ResNet-50-deploy.prototxt"
,
"dnn/halide_scheduler_resnet_50.yml"
,
224
,
224
,
"prob"
,
"caffe"
,
"dnn/halide_scheduler_resnet_50.yml"
,
224
,
224
,
"prob"
,
"caffe"
,
DNN_TARGET_CPU
,
&
net
);
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
TEST_CYCLE_N
(
10
)
{
net
.
forward
();
}
SANITY_CHECK_NOTHING
();
}
catch
(
SkipTestException
&
e
)
{
throw
PerfSkipTestException
();
}
}
}
PERF_TEST
(
SqueezeNet_v1_1
,
HalidePerfTest
)
PERF_TEST
(
SqueezeNet_v1_1
,
HalidePerfTest
)
{
{
try
{
Net
net
;
Net
net
;
loadNet
(
"dnn/squeezenet_v1_1.caffemodel"
,
"dnn/squeezenet_v1_1.prototxt"
,
loadNet
(
"dnn/squeezenet_v1_1.caffemodel"
,
"dnn/squeezenet_v1_1.prototxt"
,
"dnn/halide_scheduler_squeezenet_v1_1.yml"
,
227
,
227
,
"prob"
,
"dnn/halide_scheduler_squeezenet_v1_1.yml"
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_CPU
,
&
net
);
"caffe"
,
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
TEST_CYCLE_N
(
10
)
{
net
.
forward
();
}
SANITY_CHECK_NOTHING
();
}
catch
(
SkipTestException
&
e
)
{
throw
PerfSkipTestException
();
}
}
}
PERF_TEST
(
Inception_5h
,
HalidePerfTest
)
PERF_TEST
(
Inception_5h
,
HalidePerfTest
)
{
{
try
{
Net
net
;
Net
net
;
loadNet
(
"dnn/tensorflow_inception_graph.pb"
,
""
,
loadNet
(
"dnn/tensorflow_inception_graph.pb"
,
""
,
"dnn/halide_scheduler_inception_5h.yml"
,
"dnn/halide_scheduler_inception_5h.yml"
,
224
,
224
,
"softmax2"
,
"tensorflow"
,
DNN_TARGET_CPU
,
&
net
);
224
,
224
,
"softmax2"
,
"tensorflow"
,
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE
()
net
.
forward
(
"softmax2"
);
SANITY_CHECK_NOTHING
();
TEST_CYCLE_N
(
10
)
{
net
.
forward
(
"softmax2"
);
}
SANITY_CHECK_NOTHING
();
}
catch
(
SkipTestException
&
e
)
{
throw
PerfSkipTestException
();
}
}
}
PERF_TEST
(
ENet
,
HalidePerfTest
)
PERF_TEST
(
ENet
,
HalidePerfTest
)
{
{
try
{
Net
net
;
Net
net
;
loadNet
(
"dnn/Enet-model-best.net"
,
""
,
"dnn/halide_scheduler_enet.yml"
,
loadNet
(
"dnn/Enet-model-best.net"
,
""
,
"dnn/halide_scheduler_enet.yml"
,
512
,
256
,
"l367_Deconvolution"
,
"torch"
,
DNN_TARGET_CPU
,
&
net
);
512
,
256
,
"l367_Deconvolution"
,
"torch"
,
DNN_TARGET_CPU
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
TEST_CYCLE_N
(
10
)
}
{
////////////////////////////////////////////////////////////////////////////////
net
.
forward
(
"l367_Deconvolution"
);
// OpenCL target
}
////////////////////////////////////////////////////////////////////////////////
SANITY_CHECK_NOTHING
();
PERF_TEST
(
GoogLeNet_opencl
,
HalidePerfTest
)
}
catch
(
SkipTestException
&
e
)
{
{
throw
PerfSkipTestException
();
Net
net
;
}
loadNet
(
"dnn/bvlc_googlenet.caffemodel"
,
"dnn/bvlc_googlenet.prototxt"
,
""
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
PERF_TEST
(
AlexNet_opencl
,
HalidePerfTest
)
{
Net
net
;
loadNet
(
"dnn/bvlc_alexnet.caffemodel"
,
"dnn/bvlc_alexnet.prototxt"
,
"dnn/halide_scheduler_opencl_alexnet.yml"
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
PERF_TEST
(
ResNet50_opencl
,
HalidePerfTest
)
{
Net
net
;
loadNet
(
"dnn/ResNet-50-model.caffemodel"
,
"dnn/ResNet-50-deploy.prototxt"
,
"dnn/halide_scheduler_opencl_resnet_50.yml"
,
224
,
224
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
PERF_TEST
(
SqueezeNet_v1_1_opencl
,
HalidePerfTest
)
{
Net
net
;
loadNet
(
"dnn/squeezenet_v1_1.caffemodel"
,
"dnn/squeezenet_v1_1.prototxt"
,
"dnn/halide_scheduler_opencl_squeezenet_v1_1.yml"
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
PERF_TEST
(
Inception_5h_opencl
,
HalidePerfTest
)
{
Net
net
;
loadNet
(
"dnn/tensorflow_inception_graph.pb"
,
""
,
"dnn/halide_scheduler_opencl_inception_5h.yml"
,
224
,
224
,
"softmax2"
,
"tensorflow"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
(
"softmax2"
);
SANITY_CHECK_NOTHING
();
}
PERF_TEST
(
ENet_opencl
,
HalidePerfTest
)
{
Net
net
;
loadNet
(
"dnn/Enet-model-best.net"
,
""
,
"dnn/halide_scheduler_opencl_enet.yml"
,
512
,
256
,
"l367_Deconvolution"
,
"torch"
,
DNN_TARGET_OPENCL
,
&
net
);
TEST_CYCLE
()
net
.
forward
();
SANITY_CHECK_NOTHING
();
}
}
#endif // HAVE_HALIDE
#endif // HAVE_HALIDE
...
...
modules/dnn/src/dnn.cpp
View file @
62ba5d75
...
@@ -205,7 +205,7 @@ struct LayerPin
...
@@ -205,7 +205,7 @@ struct LayerPin
class
BackendWrapManager
class
BackendWrapManager
{
{
public
:
public
:
Ptr
<
BackendWrapper
>
wrap
(
const
Mat
&
m
,
int
backendId
,
int
targetId
=
DNN_TARGET_CPU
)
Ptr
<
BackendWrapper
>
wrap
(
const
Mat
&
m
,
int
backendId
,
int
targetId
)
{
{
CV_Assert
(
backendId
!=
DNN_BACKEND_DEFAULT
);
CV_Assert
(
backendId
!=
DNN_BACKEND_DEFAULT
);
...
@@ -236,7 +236,7 @@ public:
...
@@ -236,7 +236,7 @@ public:
}
}
std
::
vector
<
Ptr
<
BackendWrapper
>
>
wrap
(
const
std
::
vector
<
Mat
*>&
mats
,
std
::
vector
<
Ptr
<
BackendWrapper
>
>
wrap
(
const
std
::
vector
<
Mat
*>&
mats
,
int
backendId
,
int
targetId
=
DNN_TARGET_CPU
)
int
backendId
,
int
targetId
)
{
{
const
int
num
=
mats
.
size
();
const
int
num
=
mats
.
size
();
std
::
vector
<
Ptr
<
BackendWrapper
>
>
dst
(
num
);
std
::
vector
<
Ptr
<
BackendWrapper
>
>
dst
(
num
);
...
@@ -248,7 +248,7 @@ public:
...
@@ -248,7 +248,7 @@ public:
}
}
std
::
vector
<
Ptr
<
BackendWrapper
>
>
wrap
(
const
std
::
vector
<
Mat
>&
mats
,
std
::
vector
<
Ptr
<
BackendWrapper
>
>
wrap
(
const
std
::
vector
<
Mat
>&
mats
,
int
backendId
,
int
targetId
=
DNN_TARGET_CPU
)
int
backendId
,
int
targetId
)
{
{
const
int
num
=
mats
.
size
();
const
int
num
=
mats
.
size
();
std
::
vector
<
Ptr
<
BackendWrapper
>
>
dst
(
num
);
std
::
vector
<
Ptr
<
BackendWrapper
>
>
dst
(
num
);
...
@@ -617,6 +617,7 @@ struct Net::Impl
...
@@ -617,6 +617,7 @@ struct Net::Impl
lastLayerId
=
1
;
lastLayerId
=
1
;
netWasAllocated
=
false
;
netWasAllocated
=
false
;
preferableBackend
=
DNN_BACKEND_DEFAULT
;
preferableBackend
=
DNN_BACKEND_DEFAULT
;
preferableTarget
=
DNN_TARGET_CPU
;
}
}
Ptr
<
DataLayer
>
netInputLayer
;
Ptr
<
DataLayer
>
netInputLayer
;
...
@@ -626,6 +627,7 @@ struct Net::Impl
...
@@ -626,6 +627,7 @@ struct Net::Impl
std
::
map
<
String
,
int
>
layerNameToId
;
std
::
map
<
String
,
int
>
layerNameToId
;
BlobManager
blobManager
;
BlobManager
blobManager
;
int
preferableBackend
;
int
preferableBackend
;
int
preferableTarget
;
String
halideConfigFile
;
String
halideConfigFile
;
// Backend-specific wrapping manager.
// Backend-specific wrapping manager.
BackendWrapManager
backendWrapper
;
BackendWrapManager
backendWrapper
;
...
@@ -652,10 +654,11 @@ struct Net::Impl
...
@@ -652,10 +654,11 @@ struct Net::Impl
{
{
// Use automatic scheduling provided by layer.
// Use automatic scheduling provided by layer.
layer
->
applyHalideScheduler
(
ld
.
backendNodes
[
DNN_BACKEND_HALIDE
],
layer
->
applyHalideScheduler
(
ld
.
backendNodes
[
DNN_BACKEND_HALIDE
],
ld
.
inputBlobs
,
ld
.
outputBlobs
);
ld
.
inputBlobs
,
ld
.
outputBlobs
,
preferableTarget
);
}
}
dnn
::
compileHalide
(
ld
.
outputBlobs
,
ld
.
backendNodes
[
DNN_BACKEND_HALIDE
],
dnn
::
compileHalide
(
ld
.
outputBlobs
,
ld
.
backendNodes
[
DNN_BACKEND_HALIDE
],
DNN_TARGET_CPU
);
preferableTarget
);
}
}
}
}
}
}
...
@@ -859,7 +862,10 @@ struct Net::Impl
...
@@ -859,7 +862,10 @@ struct Net::Impl
{
{
backendWrapper
.
reset
();
backendWrapper
.
reset
();
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
)
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
)
{
CV_Assert
(
preferableTarget
==
DNN_TARGET_CPU
);
return
;
return
;
}
// Iterator to current layer.
// Iterator to current layer.
MapIdToLayerData
::
iterator
it
=
layers
.
begin
();
MapIdToLayerData
::
iterator
it
=
layers
.
begin
();
...
@@ -905,7 +911,8 @@ struct Net::Impl
...
@@ -905,7 +911,8 @@ struct Net::Impl
// No layers fusion.
// No layers fusion.
ldTop
.
skipFlags
[
preferableBackend
]
=
false
;
ldTop
.
skipFlags
[
preferableBackend
]
=
false
;
std
::
vector
<
Ptr
<
BackendWrapper
>
>
inputs
=
std
::
vector
<
Ptr
<
BackendWrapper
>
>
inputs
=
backendWrapper
.
wrap
(
ldTop
.
inputBlobs
,
preferableBackend
);
backendWrapper
.
wrap
(
ldTop
.
inputBlobs
,
preferableBackend
,
preferableTarget
);
if
(
preferableBackend
==
DNN_BACKEND_HALIDE
)
if
(
preferableBackend
==
DNN_BACKEND_HALIDE
)
{
{
ldTop
.
backendNodes
[
DNN_BACKEND_HALIDE
]
=
layerTop
->
initHalide
(
inputs
);
ldTop
.
backendNodes
[
DNN_BACKEND_HALIDE
]
=
layerTop
->
initHalide
(
inputs
);
...
@@ -1040,7 +1047,7 @@ struct Net::Impl
...
@@ -1040,7 +1047,7 @@ struct Net::Impl
else
if
(
!
ld
.
skipFlags
[
preferableBackend
])
else
if
(
!
ld
.
skipFlags
[
preferableBackend
])
{
{
std
::
vector
<
Ptr
<
BackendWrapper
>
>
outputs
=
std
::
vector
<
Ptr
<
BackendWrapper
>
>
outputs
=
backendWrapper
.
wrap
(
ld
.
outputBlobs
,
preferableBackend
);
backendWrapper
.
wrap
(
ld
.
outputBlobs
,
preferableBackend
,
preferableTarget
);
Ptr
<
BackendNode
>
node
=
ld
.
backendNodes
[
preferableBackend
];
Ptr
<
BackendNode
>
node
=
ld
.
backendNodes
[
preferableBackend
];
if
(
preferableBackend
==
DNN_BACKEND_HALIDE
)
if
(
preferableBackend
==
DNN_BACKEND_HALIDE
)
{
{
...
@@ -1154,6 +1161,16 @@ struct Net::Impl
...
@@ -1154,6 +1161,16 @@ struct Net::Impl
CV_Error
(
Error
::
StsOutOfRange
,
"Layer
\"
"
+
ld
.
name
+
"
\"
produce only "
+
toString
(
ld
.
outputBlobs
.
size
())
+
CV_Error
(
Error
::
StsOutOfRange
,
"Layer
\"
"
+
ld
.
name
+
"
\"
produce only "
+
toString
(
ld
.
outputBlobs
.
size
())
+
" outputs, the #"
+
toString
(
pin
.
oid
)
+
" was requsted"
);
" outputs, the #"
+
toString
(
pin
.
oid
)
+
" was requsted"
);
}
}
if
(
preferableBackend
!=
DNN_BACKEND_DEFAULT
)
{
// Transfer data to CPU if it's require.
backendWrapper
.
wrap
(
ld
.
outputBlobs
[
pin
.
oid
],
preferableBackend
,
preferableTarget
)
->
copyToHost
();
}
else
{
CV_Assert
(
preferableTarget
==
DNN_TARGET_CPU
);
}
return
ld
.
outputBlobs
[
pin
.
oid
];
return
ld
.
outputBlobs
[
pin
.
oid
];
}
}
...
@@ -1314,6 +1331,13 @@ void Net::setPreferableBackend(int backendId)
...
@@ -1314,6 +1331,13 @@ void Net::setPreferableBackend(int backendId)
impl
->
preferableBackend
=
backendId
;
impl
->
preferableBackend
=
backendId
;
}
}
void
Net
::
setPreferableTarget
(
int
targetId
)
{
impl
->
netWasAllocated
=
impl
->
netWasAllocated
&&
impl
->
preferableTarget
==
targetId
;
impl
->
preferableTarget
=
targetId
;
}
void
Net
::
setInputsNames
(
const
std
::
vector
<
String
>
&
inputBlobNames
)
void
Net
::
setInputsNames
(
const
std
::
vector
<
String
>
&
inputBlobNames
)
{
{
impl
->
netInputLayer
->
setNames
(
inputBlobNames
);
impl
->
netInputLayer
->
setNames
(
inputBlobNames
);
...
@@ -1702,10 +1726,70 @@ Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
...
@@ -1702,10 +1726,70 @@ Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
}
}
void
Layer
::
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
void
Layer
::
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
const
std
::
vector
<
Mat
>
&
outputs
,
int
targetId
)
const
{
{
CV_Error
(
Error
::
StsNotImplemented
,
"Scheduling of "
+
type
+
#ifdef HAVE_HALIDE
" layers is not implemented."
);
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
co
(
"co"
),
ci
(
"ci"
),
xo
(
"xo"
),
xi
(
"xi"
),
yo
(
"yo"
),
yi
(
"yi"
),
tile
(
"tile"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
if
(
targetId
==
DNN_TARGET_CPU
)
{
if
(
outW
==
1
&&
outH
==
1
)
{
if
(
outC
+
outN
==
1
)
return
;
if
(
outC
>
8
)
top
.
split
(
c
,
co
,
ci
,
8
)
.
fuse
(
x
,
y
,
tile
).
fuse
(
co
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
)
.
vectorize
(
ci
,
8
);
else
top
.
fuse
(
x
,
y
,
tile
).
fuse
(
c
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
);
}
else
{
if
(
outH
>
2
)
{
top
.
reorder
(
x
,
c
,
y
)
.
split
(
y
,
yo
,
yi
,
2
)
.
fuse
(
yo
,
n
,
tile
)
.
parallel
(
tile
)
.
unroll
(
yi
)
.
vectorize
(
x
,
outW
>=
16
?
16
:
outW
);
}
}
}
else
if
(
targetId
==
DNN_TARGET_OPENCL
)
{
int
c_split
=
outC
>
8
?
(
outC
>
16
?
8
:
4
)
:
outC
;
if
(
outW
==
1
&&
outH
==
1
)
{
top
.
split
(
c
,
co
,
ci
,
c_split
)
.
fuse
(
x
,
y
,
tile
).
fuse
(
co
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
gpu_blocks
(
tile
)
.
gpu_threads
(
ci
);
}
else
{
int
x_split
=
outW
>
8
?
(
outW
>=
32
?
16
:
8
)
:
outW
;
int
y_split
=
outH
>
8
?
(
outH
>=
32
?
16
:
8
)
:
outH
;
top
.
split
(
x
,
xo
,
xi
,
x_split
).
split
(
y
,
yo
,
yi
,
y_split
)
.
split
(
c
,
co
,
ci
,
c_split
)
.
gpu_blocks
(
xo
,
yo
,
co
)
.
gpu_threads
(
xi
,
yi
)
.
reorder
(
xi
,
yi
,
ci
,
xo
,
yo
,
co
)
.
vectorize
(
ci
);
}
}
else
CV_Error
(
Error
::
StsNotImplemented
,
"Unknown target identifier"
);
#endif // HAVE_HALIDE
}
}
Ptr
<
BackendNode
>
Layer
::
tryAttach
(
const
Ptr
<
BackendNode
>&
node
)
Ptr
<
BackendNode
>
Layer
::
tryAttach
(
const
Ptr
<
BackendNode
>&
node
)
...
...
modules/dnn/src/halide_scheduler.cpp
View file @
62ba5d75
...
@@ -143,6 +143,26 @@ static void applyComputeRoot(const FileNode& directive, Halide::Func& func)
...
@@ -143,6 +143,26 @@ static void applyComputeRoot(const FileNode& directive, Halide::Func& func)
func
.
compute_root
();
func
.
compute_root
();
}
}
static
void
applyGpuBlocks
(
const
FileNode
&
directive
,
Halide
::
Func
&
func
)
{
std
::
string
varName
;
for
(
int
i
=
0
,
n
=
directive
.
size
();
i
<
n
;
++
i
)
{
directive
[
i
]
>>
varName
;
func
.
gpu_blocks
(
Halide
::
Var
(
varName
));
}
}
static
void
applyGpuThreads
(
const
FileNode
&
directive
,
Halide
::
Func
&
func
)
{
std
::
string
varName
;
for
(
int
i
=
0
,
n
=
directive
.
size
();
i
<
n
;
++
i
)
{
directive
[
i
]
>>
varName
;
func
.
gpu_threads
(
Halide
::
Var
(
varName
));
}
}
static
void
apply
(
const
FileNode
&
directives
,
Halide
::
Func
&
func
,
static
void
apply
(
const
FileNode
&
directives
,
Halide
::
Func
&
func
,
std
::
map
<
std
::
string
,
Halide
::
Func
>&
funcsMap
,
std
::
map
<
std
::
string
,
Halide
::
Func
>&
funcsMap
,
const
FileNode
&
params
)
const
FileNode
&
params
)
...
@@ -167,6 +187,10 @@ static void apply(const FileNode& directives, Halide::Func& func,
...
@@ -167,6 +187,10 @@ static void apply(const FileNode& directives, Halide::Func& func,
applyComputeAt
(
directive
,
func
,
funcsMap
);
applyComputeAt
(
directive
,
func
,
funcsMap
);
else
if
(
directive
.
name
()
==
"compute_root"
)
else
if
(
directive
.
name
()
==
"compute_root"
)
applyComputeRoot
(
directive
,
func
);
applyComputeRoot
(
directive
,
func
);
else
if
(
directive
.
name
()
==
"gpu_blocks"
)
applyGpuBlocks
(
directive
,
func
);
else
if
(
directive
.
name
()
==
"gpu_threads"
)
applyGpuThreads
(
directive
,
func
);
else
else
CV_Error
(
Error
::
StsNotImplemented
,
"Scheduling directive "
+
CV_Error
(
Error
::
StsNotImplemented
,
"Scheduling directive "
+
directive
.
name
()
+
" is not implemented."
);
directive
.
name
()
+
" is not implemented."
);
...
...
modules/dnn/src/layers/batch_norm_layer.cpp
View file @
62ba5d75
...
@@ -157,6 +157,8 @@ public:
...
@@ -157,6 +157,8 @@ public:
bias
(
i
)
=
(
hasBias
?
biasData
[
i
]
:
0.0
f
)
-
bias
(
i
)
=
(
hasBias
?
biasData
[
i
]
:
0.0
f
)
-
weights
(
i
)
*
meanData
[
i
]
*
varMeanScale
;
weights
(
i
)
*
meanData
[
i
]
*
varMeanScale
;
}
}
weights
.
set_host_dirty
();
bias
.
set_host_dirty
();
top
(
x
,
y
,
c
,
n
)
=
input
*
weights
(
c
)
+
bias
(
c
);
top
(
x
,
y
,
c
,
n
)
=
input
*
weights
(
c
)
+
bias
(
c
);
return
top
;
return
top
;
}
}
...
...
modules/dnn/src/layers/concat_layer.cpp
View file @
62ba5d75
...
@@ -130,29 +130,6 @@ public:
...
@@ -130,29 +130,6 @@ public:
#endif // HAVE_HALIDE
#endif // HAVE_HALIDE
return
Ptr
<
BackendNode
>
();
return
Ptr
<
BackendNode
>
();
}
}
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
{
#ifdef HAVE_HALIDE
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
yi
(
"yi"
),
yo
(
"yo"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
if
(
outW
==
1
||
outH
<=
2
)
return
;
top
.
reorder
(
x
,
c
,
y
)
.
split
(
y
,
yo
,
yi
,
2
)
.
fuse
(
yo
,
n
,
tile
)
.
parallel
(
tile
)
.
unroll
(
yi
)
.
vectorize
(
x
,
outW
>=
16
?
16
:
outW
);
#endif // HAVE_HALIDE
}
};
};
Ptr
<
ConcatLayer
>
ConcatLayer
::
create
(
const
LayerParams
&
params
)
Ptr
<
ConcatLayer
>
ConcatLayer
::
create
(
const
LayerParams
&
params
)
...
...
modules/dnn/src/layers/convolution_layer.cpp
View file @
62ba5d75
...
@@ -99,9 +99,15 @@ public:
...
@@ -99,9 +99,15 @@ public:
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
const
std
::
vector
<
Mat
>
&
outputs
,
int
targetId
)
const
{
{
#ifdef HAVE_HALIDE
#ifdef HAVE_HALIDE
if
(
targetId
!=
DNN_TARGET_CPU
)
{
Layer
::
applyHalideScheduler
(
node
,
inputs
,
outputs
,
targetId
);
return
;
}
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
yi
(
"yi"
),
yo
(
"yo"
),
co
(
"co"
),
ci
(
"ci"
);
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
yi
(
"yi"
),
yo
(
"yo"
),
co
(
"co"
),
ci
(
"ci"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
[
1
];
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
[
1
];
Halide
::
Func
&
padded_input
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
[
0
];
Halide
::
Func
&
padded_input
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
[
0
];
...
...
modules/dnn/src/layers/elementwise_layers.cpp
View file @
62ba5d75
...
@@ -422,7 +422,7 @@ struct ChannelsPReLUFunctor
...
@@ -422,7 +422,7 @@ struct ChannelsPReLUFunctor
{
{
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
);
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
);
auto
weights
=
wrapToHalideBuffer
(
scale
,
{(
int
)
scale
.
total
()});
auto
weights
=
wrapToHalideBuffer
(
scale
,
{(
int
)
scale
.
total
()});
top
(
x
,
y
,
c
,
n
)
=
select
(
input
>
0.0
f
,
input
,
weights
(
c
)
*
input
);
top
(
x
,
y
,
c
,
n
)
=
select
(
input
>
=
0.0
f
,
input
,
weights
(
c
)
*
input
);
}
}
#endif // HAVE_HALIDE
#endif // HAVE_HALIDE
...
...
modules/dnn/src/layers/eltwise_layer.cpp
View file @
62ba5d75
...
@@ -198,29 +198,6 @@ public:
...
@@ -198,29 +198,6 @@ public:
return
Ptr
<
BackendNode
>
();
return
Ptr
<
BackendNode
>
();
}
}
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
{
#ifdef HAVE_HALIDE
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
yi
(
"yi"
),
yo
(
"yo"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
if
(
outW
==
1
||
outH
<=
2
)
return
;
top
.
reorder
(
x
,
c
,
y
)
.
split
(
y
,
yo
,
yi
,
2
)
.
fuse
(
yo
,
n
,
tile
)
.
parallel
(
tile
)
.
unroll
(
yi
)
.
vectorize
(
x
,
outW
>=
16
?
16
:
outW
);
#endif // HAVE_HALIDE
}
virtual
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
virtual
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
const
std
::
vector
<
MatShape
>
&
outputs
)
const
const
std
::
vector
<
MatShape
>
&
outputs
)
const
{
{
...
...
modules/dnn/src/layers/fully_connected_layer.cpp
View file @
62ba5d75
...
@@ -252,31 +252,6 @@ public:
...
@@ -252,31 +252,6 @@ public:
return
Ptr
<
BackendNode
>
();
return
Ptr
<
BackendNode
>
();
}
}
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
{
#ifdef HAVE_HALIDE
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
co
(
"co"
),
ci
(
"ci"
),
tile
(
"tile"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
if
(
outC
+
outN
==
1
)
return
;
if
(
outC
>
8
)
top
.
split
(
c
,
co
,
ci
,
8
)
.
fuse
(
x
,
y
,
tile
).
fuse
(
co
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
)
.
vectorize
(
ci
,
8
);
else
top
.
fuse
(
x
,
y
,
tile
).
fuse
(
c
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
);
#endif // HAVE_HALIDE
}
virtual
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
virtual
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
const
std
::
vector
<
MatShape
>
&
outputs
)
const
const
std
::
vector
<
MatShape
>
&
outputs
)
const
{
{
...
...
modules/dnn/src/layers/lrn_layer.cpp
View file @
62ba5d75
...
@@ -272,9 +272,15 @@ public:
...
@@ -272,9 +272,15 @@ public:
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
const
std
::
vector
<
Mat
>
&
outputs
,
int
targetId
)
const
{
{
#ifdef HAVE_HALIDE
#ifdef HAVE_HALIDE
if
(
targetId
!=
DNN_TARGET_CPU
)
{
Layer
::
applyHalideScheduler
(
node
,
inputs
,
outputs
,
targetId
);
return
;
}
int
outW
,
outH
,
outC
,
outN
;
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
...
...
modules/dnn/src/layers/max_unpooling_layer.cpp
View file @
62ba5d75
...
@@ -117,26 +117,6 @@ public:
...
@@ -117,26 +117,6 @@ public:
#endif // HAVE_HALIDE
#endif // HAVE_HALIDE
return
Ptr
<
BackendNode
>
();
return
Ptr
<
BackendNode
>
();
}
}
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
{
#ifdef HAVE_HALIDE
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
yi
(
"yi"
),
yo
(
"yo"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
top
.
reorder
(
x
,
c
,
y
)
.
split
(
y
,
yo
,
yi
,
2
)
.
fuse
(
yo
,
n
,
tile
)
.
parallel
(
tile
)
.
unroll
(
yi
)
.
vectorize
(
x
,
outW
>=
16
?
16
:
outW
);
#endif // HAVE_HALIDE
}
};
};
Ptr
<
MaxUnpoolLayer
>
MaxUnpoolLayer
::
create
(
const
LayerParams
&
params
)
Ptr
<
MaxUnpoolLayer
>
MaxUnpoolLayer
::
create
(
const
LayerParams
&
params
)
...
...
modules/dnn/src/layers/padding_layer.cpp
View file @
62ba5d75
...
@@ -10,6 +10,7 @@ Implementation of padding layer, which adds paddings to input blob.
...
@@ -10,6 +10,7 @@ Implementation of padding layer, which adds paddings to input blob.
*/
*/
#include "../precomp.hpp"
#include "../precomp.hpp"
#include "op_halide.hpp"
#include <vector>
#include <vector>
namespace
cv
namespace
cv
...
@@ -52,6 +53,12 @@ public:
...
@@ -52,6 +53,12 @@ public:
return
false
;
return
false
;
}
}
virtual
bool
supportBackend
(
int
backendId
)
{
return
backendId
==
DNN_BACKEND_DEFAULT
||
backendId
==
DNN_BACKEND_HALIDE
&&
haveHalide
();
}
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
,
std
::
vector
<
Mat
>
&
internals
)
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
,
std
::
vector
<
Mat
>
&
internals
)
{
{
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
...
@@ -94,6 +101,23 @@ public:
...
@@ -94,6 +101,23 @@ public:
return
inputDims
>
0
&&
(
int
)
shape
.
size
()
>
inputDims
?
paddingDim
+
1
:
paddingDim
;
return
inputDims
>
0
&&
(
int
)
shape
.
size
()
>
inputDims
?
paddingDim
+
1
:
paddingDim
;
}
}
virtual
Ptr
<
BackendNode
>
initHalide
(
const
std
::
vector
<
Ptr
<
BackendWrapper
>
>
&
inputs
)
{
#ifdef HAVE_HALIDE
int
inW
,
inH
,
inC
,
inN
;
Halide
::
Buffer
<
float
>
inputBuffer
=
halideBuffer
(
inputs
[
0
]);
getCanonicalSize
(
inputBuffer
,
&
inW
,
&
inH
,
&
inC
,
&
inN
);
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
);
Halide
::
Func
top
=
(
name
.
empty
()
?
Halide
::
Func
()
:
Halide
::
Func
(
name
));
Halide
::
Func
padded
=
Halide
::
BoundaryConditions
::
constant_exterior
(
inputBuffer
,
paddingValue
);
top
(
x
,
y
,
c
,
n
)
=
padded
(
x
,
y
,
c
,
n
);
return
Ptr
<
BackendNode
>
(
new
HalideBackendNode
(
top
));
#endif // HAVE_HALIDE
return
Ptr
<
BackendNode
>
();
}
int
paddingDim
,
padding
,
inputDims
,
index
;
int
paddingDim
,
padding
,
inputDims
,
index
;
float
paddingValue
;
float
paddingValue
;
};
};
...
...
modules/dnn/src/layers/pooling_layer.cpp
View file @
62ba5d75
...
@@ -388,9 +388,15 @@ public:
...
@@ -388,9 +388,15 @@ public:
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
const
std
::
vector
<
Mat
>
&
outputs
,
int
targetId
)
const
{
{
#ifdef HAVE_HALIDE
#ifdef HAVE_HALIDE
if
(
targetId
!=
DNN_TARGET_CPU
)
{
Layer
::
applyHalideScheduler
(
node
,
inputs
,
outputs
,
targetId
);
return
;
}
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
tile
(
"tile"
),
xi
(
"xi"
),
yi
(
"yi"
),
ci
(
"ci"
),
xo
(
"xo"
),
yo
(
"yo"
),
co
(
"co"
);
xi
(
"xi"
),
yi
(
"yi"
),
ci
(
"ci"
),
xo
(
"xo"
),
yo
(
"yo"
),
co
(
"co"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
...
...
modules/dnn/src/layers/softmax_layer.cpp
View file @
62ba5d75
...
@@ -187,33 +187,6 @@ public:
...
@@ -187,33 +187,6 @@ public:
return
Ptr
<
BackendNode
>
();
return
Ptr
<
BackendNode
>
();
}
}
virtual
void
applyHalideScheduler
(
Ptr
<
BackendNode
>&
node
,
const
std
::
vector
<
Mat
*>
&
inputs
,
const
std
::
vector
<
Mat
>
&
outputs
)
const
{
#ifdef HAVE_HALIDE
int
outW
,
outH
,
outC
,
outN
;
getCanonicalSize
(
outputs
[
0
].
size
,
&
outW
,
&
outH
,
&
outC
,
&
outN
);
// Most common case when SoftMax is a layer after fully-connected.
// So we just schedule it in the same way.
Halide
::
Var
x
(
"x"
),
y
(
"y"
),
c
(
"c"
),
n
(
"n"
),
co
(
"co"
),
ci
(
"ci"
),
tile
(
"tile"
);
Halide
::
Func
&
top
=
node
.
dynamicCast
<
HalideBackendNode
>
()
->
funcs
.
back
();
if
(
outC
+
outN
==
1
)
return
;
if
(
outC
>
8
)
top
.
split
(
c
,
co
,
ci
,
8
)
.
fuse
(
x
,
y
,
tile
).
fuse
(
co
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
)
.
vectorize
(
ci
,
8
);
else
top
.
fuse
(
x
,
y
,
tile
).
fuse
(
c
,
tile
,
tile
).
fuse
(
n
,
tile
,
tile
)
.
parallel
(
tile
);
#endif // HAVE_HALIDE
}
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
const
std
::
vector
<
MatShape
>
&
outputs
)
const
const
std
::
vector
<
MatShape
>
&
outputs
)
const
{
{
...
...
modules/dnn/src/op_halide.cpp
View file @
62ba5d75
...
@@ -7,6 +7,10 @@
...
@@ -7,6 +7,10 @@
#include "op_halide.hpp"
#include "op_halide.hpp"
#ifdef HAVE_HALIDE
#include <HalideRuntimeOpenCL.h>
#endif // HAVE_HALIDE
namespace
cv
namespace
cv
{
{
namespace
dnn
namespace
dnn
...
@@ -72,7 +76,15 @@ HalideBackendWrapper::HalideBackendWrapper(int targetId, const cv::Mat& m)
...
@@ -72,7 +76,15 @@ HalideBackendWrapper::HalideBackendWrapper(int targetId, const cv::Mat& m)
:
BackendWrapper
(
DNN_BACKEND_HALIDE
,
targetId
)
:
BackendWrapper
(
DNN_BACKEND_HALIDE
,
targetId
)
{
{
buffer
=
wrapToHalideBuffer
(
m
);
buffer
=
wrapToHalideBuffer
(
m
);
if
(
targetId
!=
DNN_TARGET_CPU
)
if
(
targetId
==
DNN_TARGET_CPU
)
{
return
;
}
else
if
(
targetId
==
DNN_TARGET_OPENCL
)
{
buffer
.
copy_to_device
(
halide_opencl_device_interface
());
}
else
CV_Error
(
Error
::
StsNotImplemented
,
"Unknown target identifier"
);
CV_Error
(
Error
::
StsNotImplemented
,
"Unknown target identifier"
);
}
}
...
@@ -80,15 +92,32 @@ HalideBackendWrapper::HalideBackendWrapper(const Ptr<BackendWrapper>& base,
...
@@ -80,15 +92,32 @@ HalideBackendWrapper::HalideBackendWrapper(const Ptr<BackendWrapper>& base,
const
MatShape
&
shape
)
const
MatShape
&
shape
)
:
BackendWrapper
(
DNN_BACKEND_HALIDE
,
base
->
targetId
)
:
BackendWrapper
(
DNN_BACKEND_HALIDE
,
base
->
targetId
)
{
{
if
(
base
->
targetId
!=
DNN_TARGET_CPU
)
CV_Error
(
Error
::
StsNotImplemented
,
"Unknown target identifier"
);
int
w
,
h
,
c
,
n
;
int
w
,
h
,
c
,
n
;
getCanonicalSize
(
shape
,
&
w
,
&
h
,
&
c
,
&
n
);
getCanonicalSize
(
shape
,
&
w
,
&
h
,
&
c
,
&
n
);
Halide
::
Buffer
<
float
>
baseBuffer
=
halideBuffer
(
base
);
Halide
::
Buffer
<
float
>
baseBuffer
=
halideBuffer
(
base
);
buffer
=
Halide
::
Buffer
<
float
>
((
float
*
)
baseBuffer
.
raw_buffer
()
->
host
,
buffer
=
Halide
::
Buffer
<
float
>
((
float
*
)
baseBuffer
.
raw_buffer
()
->
host
,
{
w
,
h
,
c
,
n
});
{
w
,
h
,
c
,
n
});
buffer
.
set_host_dirty
();
// Indicate that data is on CPU.
if
(
baseBuffer
.
has_device_allocation
())
{
buffer
.
raw_buffer
()
->
device
=
baseBuffer
.
raw_buffer
()
->
device
;
buffer
.
raw_buffer
()
->
device_interface
=
baseBuffer
.
raw_buffer
()
->
device_interface
;
buffer
.
set_device_dirty
();
}
else
{
buffer
.
set_host_dirty
();
// Indicate that data is on CPU.
CV_Assert
(
targetId
==
DNN_TARGET_CPU
);
}
}
void
HalideBackendWrapper
::
copyToHost
()
{
CV_Assert
(
targetId
==
DNN_TARGET_CPU
||
buffer
.
device_dirty
());
if
(
buffer
.
device_dirty
())
{
buffer
.
device_sync
();
buffer
.
copy_to_host
();
}
}
}
#endif // HAVE_HALIDE
#endif // HAVE_HALIDE
...
@@ -144,6 +173,11 @@ void compileHalide(std::vector<Mat> &outputs, Ptr<BackendNode>& node, int target
...
@@ -144,6 +173,11 @@ void compileHalide(std::vector<Mat> &outputs, Ptr<BackendNode>& node, int target
Halide
::
Target
target
=
Halide
::
get_host_target
();
Halide
::
Target
target
=
Halide
::
get_host_target
();
target
.
set_feature
(
Halide
::
Target
::
NoAsserts
);
target
.
set_feature
(
Halide
::
Target
::
NoAsserts
);
if
(
targetId
==
DNN_TARGET_OPENCL
)
{
target
.
set_feature
(
Halide
::
Target
::
OpenCL
);
}
CV_Assert
(
target
.
supported
());
top
.
compile_jit
(
target
);
top
.
compile_jit
(
target
);
#endif // HAVE_HALIDE
#endif // HAVE_HALIDE
}
}
...
...
modules/dnn/src/op_halide.hpp
View file @
62ba5d75
...
@@ -57,6 +57,8 @@ namespace dnn
...
@@ -57,6 +57,8 @@ namespace dnn
HalideBackendWrapper
(
const
Ptr
<
BackendWrapper
>&
base
,
const
MatShape
&
shape
);
HalideBackendWrapper
(
const
Ptr
<
BackendWrapper
>&
base
,
const
MatShape
&
shape
);
virtual
void
copyToHost
();
Halide
::
Buffer
<
float
>
buffer
;
Halide
::
Buffer
<
float
>
buffer
;
};
};
#endif // HAVE_HALIDE
#endif // HAVE_HALIDE
...
...
modules/dnn/test/test_halide_nets.cpp
View file @
62ba5d75
...
@@ -48,6 +48,7 @@ static void test(const std::string& weights, const std::string& proto,
...
@@ -48,6 +48,7 @@ static void test(const std::string& weights, const std::string& proto,
netHalide
.
setInput
(
blobFromImage
(
input
.
clone
(),
1.0
f
,
false
));
netHalide
.
setInput
(
blobFromImage
(
input
.
clone
(),
1.0
f
,
false
));
netHalide
.
setPreferableBackend
(
DNN_BACKEND_HALIDE
);
netHalide
.
setPreferableBackend
(
DNN_BACKEND_HALIDE
);
netHalide
.
setPreferableTarget
(
targetId
);
netHalide
.
setHalideScheduler
(
scheduler
);
netHalide
.
setHalideScheduler
(
scheduler
);
outputHalide
=
netHalide
.
forward
(
outputLayer
).
clone
();
outputHalide
=
netHalide
.
forward
(
outputLayer
).
clone
();
...
@@ -62,15 +63,20 @@ static void test(const std::string& weights, const std::string& proto,
...
@@ -62,15 +63,20 @@ static void test(const std::string& weights, const std::string& proto,
// Swap backends.
// Swap backends.
netHalide
.
setPreferableBackend
(
DNN_BACKEND_DEFAULT
);
netHalide
.
setPreferableBackend
(
DNN_BACKEND_DEFAULT
);
netHalide
.
setPreferableTarget
(
DNN_TARGET_CPU
);
outputDefault
=
netHalide
.
forward
(
outputLayer
).
clone
();
outputDefault
=
netHalide
.
forward
(
outputLayer
).
clone
();
netDefault
.
setPreferableBackend
(
DNN_BACKEND_HALIDE
);
netDefault
.
setPreferableBackend
(
DNN_BACKEND_HALIDE
);
netDefault
.
setPreferableTarget
(
targetId
);
netDefault
.
setHalideScheduler
(
scheduler
);
netDefault
.
setHalideScheduler
(
scheduler
);
outputHalide
=
netDefault
.
forward
(
outputLayer
).
clone
();
outputHalide
=
netDefault
.
forward
(
outputLayer
).
clone
();
normAssert
(
outputDefault
,
outputHalide
);
normAssert
(
outputDefault
,
outputHalide
);
}
}
////////////////////////////////////////////////////////////////////////////////
// CPU target
////////////////////////////////////////////////////////////////////////////////
TEST
(
Reproducibility_GoogLeNet_Halide
,
Accuracy
)
TEST
(
Reproducibility_GoogLeNet_Halide
,
Accuracy
)
{
{
test
(
findDataFile
(
"dnn/bvlc_googlenet.caffemodel"
,
false
),
test
(
findDataFile
(
"dnn/bvlc_googlenet.caffemodel"
,
false
),
...
@@ -115,6 +121,53 @@ TEST(Reproducibility_ENet_Halide, Accuracy)
...
@@ -115,6 +121,53 @@ TEST(Reproducibility_ENet_Halide, Accuracy)
findDataFile
(
"dnn/halide_scheduler_enet.yml"
,
false
),
findDataFile
(
"dnn/halide_scheduler_enet.yml"
,
false
),
512
,
512
,
"l367_Deconvolution"
,
"torch"
,
DNN_TARGET_CPU
);
512
,
512
,
"l367_Deconvolution"
,
"torch"
,
DNN_TARGET_CPU
);
};
};
////////////////////////////////////////////////////////////////////////////////
// OpenCL target
////////////////////////////////////////////////////////////////////////////////
TEST
(
Reproducibility_GoogLeNet_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/bvlc_googlenet.caffemodel"
,
false
),
findDataFile
(
"dnn/bvlc_googlenet.prototxt"
,
false
),
""
,
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
);
};
TEST
(
Reproducibility_AlexNet_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/bvlc_alexnet.caffemodel"
,
false
),
findDataFile
(
"dnn/bvlc_alexnet.prototxt"
,
false
),
findDataFile
(
"dnn/halide_scheduler_opencl_alexnet.yml"
,
false
),
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
);
};
TEST
(
Reproducibility_ResNet_50_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/ResNet-50-model.caffemodel"
,
false
),
findDataFile
(
"dnn/ResNet-50-deploy.prototxt"
,
false
),
findDataFile
(
"dnn/halide_scheduler_opencl_resnet_50.yml"
,
false
),
224
,
224
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
);
};
TEST
(
Reproducibility_SqueezeNet_v1_1_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/squeezenet_v1_1.caffemodel"
,
false
),
findDataFile
(
"dnn/squeezenet_v1_1.prototxt"
,
false
),
findDataFile
(
"dnn/halide_scheduler_opencl_squeezenet_v1_1.yml"
,
false
),
227
,
227
,
"prob"
,
"caffe"
,
DNN_TARGET_OPENCL
);
};
TEST
(
Reproducibility_Inception_5h_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/tensorflow_inception_graph.pb"
,
false
),
""
,
findDataFile
(
"dnn/halide_scheduler_opencl_inception_5h.yml"
,
false
),
224
,
224
,
"softmax2"
,
"tensorflow"
,
DNN_TARGET_OPENCL
);
};
TEST
(
Reproducibility_ENet_Halide_opencl
,
Accuracy
)
{
test
(
findDataFile
(
"dnn/Enet-model-best.net"
,
false
),
""
,
findDataFile
(
"dnn/halide_scheduler_opencl_enet.yml"
,
false
),
512
,
512
,
"l367_Deconvolution"
,
"torch"
,
DNN_TARGET_OPENCL
);
};
#endif // HAVE_HALIDE
#endif // HAVE_HALIDE
}
// namespace cvtest
}
// namespace cvtest
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment