Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e0e40405
Commit
e0e40405
authored
Oct 27, 2017
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #9847 from wzw-intel:ocl4dnn_fusion
parents
ff037ebe
2d8f2c2a
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
721 additions
and
707 deletions
+721
-707
dnn.cpp
modules/dnn/src/dnn.cpp
+26
-10
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+82
-15
default_kernel_config.hpp
modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp
+459
-447
ocl4dnn.hpp
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
+19
-4
ocl4dnn_conv_spatial.cpp
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
+82
-8
conv_layer_spatial.cl
modules/dnn/src/opencl/conv_layer_spatial.cl
+53
-223
No files found.
modules/dnn/src/dnn.cpp
View file @
e0e40405
...
...
@@ -1028,7 +1028,7 @@ struct Net::Impl
void
fuseLayers
(
const
std
::
vector
<
LayerPin
>&
blobsToKeep_
)
{
if
(
!
fusion
||
!
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_CPU
)
)
if
(
!
fusion
||
preferableBackend
!=
DNN_BACKEND_DEFAULT
)
return
;
CV_TRACE_FUNCTION
();
...
...
@@ -1056,6 +1056,11 @@ struct Net::Impl
// with the current layer if they follow it. Normally, the are fused with the convolution layer,
// but some of them (like activation) may be fused with fully-connected, elemwise (+) and
// some other layers.
// TODO: OpenCL target support more fusion styles.
if
(
preferableTarget
==
DNN_TARGET_OPENCL
&&
ld
.
layerInstance
->
type
.
compare
(
"Convolution"
)
)
continue
;
Ptr
<
Layer
>&
currLayer
=
ld
.
layerInstance
;
if
(
ld
.
consumers
.
size
()
==
1
&&
pinsToKeep
.
count
(
LayerPin
(
lid
,
0
))
==
0
)
{
...
...
@@ -1100,16 +1105,27 @@ struct Net::Impl
}
}
Ptr
<
ActivationLayer
>
nextActivLayer
;
if
(
nextData
)
nextActivLayer
=
nextData
->
layerInstance
.
dynamicCast
<
ActivationLayer
>
();
if
(
!
nextActivLayer
.
empty
()
&&
pinsToKeep
.
count
(
lpNext
)
==
0
&&
currLayer
->
setActivation
(
nextActivLayer
)
)
// For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU
if
(
preferableTarget
!=
DNN_TARGET_OPENCL
||
(
preferableTarget
==
DNN_TARGET_OPENCL
&&
nextData
&&
(
!
nextData
->
type
.
compare
(
"ReLU"
)
||
!
nextData
->
type
.
compare
(
"ChannelsPReLU"
))
)
)
{
printf_
((
"
\t
fused with %s
\n
"
,
nextActivLayer
->
name
.
c_str
()));
nextData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
Ptr
<
ActivationLayer
>
nextActivLayer
;
if
(
nextData
)
nextActivLayer
=
nextData
->
layerInstance
.
dynamicCast
<
ActivationLayer
>
();
if
(
!
nextActivLayer
.
empty
()
&&
pinsToKeep
.
count
(
lpNext
)
==
0
&&
currLayer
->
setActivation
(
nextActivLayer
)
)
{
LayerData
*
activData
=
nextData
;
printf_
((
"
\t
fused with %s
\n
"
,
nextActivLayer
->
name
.
c_str
()));
activData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
}
}
}
...
...
modules/dnn/src/layers/convolution_layer.cpp
View file @
e0e40405
...
...
@@ -157,7 +157,20 @@ public:
#ifdef HAVE_OPENCL
Ptr
<
OCL4DNNConvSpatial
<
float
>
>
convolutionOp
;
std
::
vector
<
UMat
>
umat_blobs
;
bool
fusedBias
;
bool
newWeightAndBias
;
bool
newActiv
;
ocl4dnnFusedActiv_t
activType
;
#endif
ConvolutionLayerImpl
()
{
#ifdef HAVE_OPENCL
fusedBias
=
false
;
newWeightAndBias
=
false
;
newActiv
=
false
;
activType
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
#endif
}
MatShape
computeColRowShape
(
const
MatShape
&
inpShape
,
const
MatShape
&
outShape
)
const
{
...
...
@@ -209,6 +222,10 @@ public:
activ
=
layer
;
if
(
activ
.
empty
())
reluslope
.
clear
();
#ifdef HAVE_OPENCL
newActiv
=
true
;
activType
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
#endif
return
!
activ
.
empty
();
}
...
...
@@ -221,6 +238,10 @@ public:
// we will need to re-compute the weights with the batch
// norm coefficients taken into account
weightsMat
.
release
();
#ifdef HAVE_OPENCL
newWeightAndBias
=
true
;
fusedBias
=
false
;
#endif
return
!
bnorm
.
empty
();
}
...
...
@@ -230,6 +251,10 @@ public:
// we will need to re-compute the weights with the scaling
// coefficients taken into account
weightsMat
.
release
();
#ifdef HAVE_OPENCL
newWeightAndBias
=
true
;
fusedBias
=
false
;
#endif
return
!
scaleLayer
.
empty
();
}
...
...
@@ -665,19 +690,49 @@ public:
convolutionOp
=
Ptr
<
OCL4DNNConvSpatial
<
float
>
>
(
new
OCL4DNNConvSpatial
<
float
>
(
config
));
}
for
(
size_t
ii
=
0
;
ii
<
outputs
.
size
();
ii
++
)
if
(
newWeightAndBias
)
{
UMat
inpMat
,
outMat
;
inpMat
=
inputs
[
ii
]
->
getUMat
(
ACCESS_READ
);
outMat
=
outputs
[
ii
].
getUMat
(
ACCESS_WRITE
);
int
batch_size
=
inpMat
.
size
[
0
];
weightsMat
.
copyTo
(
umat_blobs
[
0
]);
if
(
fusedBias
)
{
if
(
umat_blobs
.
size
()
<
2
)
umat_blobs
.
resize
(
2
);
umat_blobs
[
1
]
=
UMat
(
biasvec
,
true
);
}
convolutionOp
->
setBias
(
fusedBias
||
hasBias
());
newWeightAndBias
=
false
;
}
if
(
!
convolutionOp
->
Forward
(
inpMat
,
umat_blobs
[
0
],
hasBias
()
?
umat_blobs
[
1
]
:
UMat
(),
outMat
,
batch_size
))
return
false
;
if
(
newActiv
)
{
if
(
activType
==
OCL4DNN_CONV_FUSED_ACTIV_RELU
)
{
CV_Assert
(
!
reluslope
.
empty
());
convolutionOp
->
setActivReLU
(
true
,
reluslope
[
0
]);
}
else
if
(
activType
==
OCL4DNN_CONV_FUSED_ACTIV_PRELU
)
{
CV_Assert
(
!
reluslope
.
empty
());
convolutionOp
->
setActivPReLU
(
true
,
reluslope
);
}
else
{
convolutionOp
->
setActivReLU
(
false
,
0
);
convolutionOp
->
setActivPReLU
(
false
,
reluslope
);
}
newActiv
=
false
;
}
return
true
;
UMat
inpMat
,
outMat
;
inpMat
=
inputs
[
0
]
->
getUMat
(
ACCESS_READ
);
outMat
=
outputs
[
0
].
getUMat
(
ACCESS_WRITE
);
int
batch_size
=
inpMat
.
size
[
0
];
return
convolutionOp
->
Forward
(
inpMat
,
umat_blobs
[
0
],
(
hasBias
()
||
fusedBias
)
?
umat_blobs
[
1
]
:
UMat
(),
outMat
,
batch_size
);
}
#endif
...
...
@@ -693,11 +748,6 @@ public:
CV_Assert
(
inputs
.
size
()
==
(
size_t
)
1
&&
inputs
[
0
]
->
size
[
1
]
%
blobs
[
0
].
size
[
1
]
==
0
);
int
ngroups
=
inputs
[
0
]
->
size
[
1
]
/
blobs
[
0
].
size
[
1
];
CV_Assert
(
outputs
[
0
].
size
[
1
]
%
ngroups
==
0
);
CV_OCL_RUN
((
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs
,
outputs
,
internals
))
int
k
,
outCn
=
blobs
[
0
].
size
[
0
];
if
(
weightsMat
.
empty
()
)
...
...
@@ -761,6 +811,11 @@ public:
}
}
#ifdef HAVE_OPENCL
if
(
shiftptr
||
shiftptr2
)
fusedBias
=
true
;
#endif
for
(
int
i
=
0
;
i
<
outCn
;
i
++
)
{
float
s1
=
scaleptr
?
scaleptr
[
i
]
:
1.
f
;
...
...
@@ -784,7 +839,12 @@ public:
{
Ptr
<
ReLULayer
>
activ_relu
=
activ
.
dynamicCast
<
ReLULayer
>
();
if
(
!
activ_relu
.
empty
()
)
{
reluslope
.
assign
(
outCn
+
2
,
activ_relu
->
negativeSlope
);
#ifdef HAVE_OPENCL
activType
=
OCL4DNN_CONV_FUSED_ACTIV_RELU
;
#endif
}
Ptr
<
ChannelsPReLULayer
>
activ_chprelu
=
activ
.
dynamicCast
<
ChannelsPReLULayer
>
();
if
(
!
activ_chprelu
.
empty
()
)
...
...
@@ -795,9 +855,16 @@ public:
reluslope
.
resize
(
outCn
+
2
);
std
::
copy
(
mdata
,
mdata
+
outCn
,
reluslope
.
begin
());
reluslope
[
outCn
]
=
reluslope
[
outCn
+
1
]
=
reluslope
[
outCn
-
1
];
#ifdef HAVE_OPENCL
activType
=
OCL4DNN_CONV_FUSED_ACTIV_PRELU
;
#endif
}
}
CV_OCL_RUN
((
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs
,
outputs
,
internals
))
int
nstripes
=
std
::
max
(
getNumThreads
(),
1
);
ParallelConv
::
run
(
*
inputs
[
0
],
outputs
[
0
],
weightsMat
,
biasvec
,
reluslope
,
...
...
modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp
View file @
e0e40405
...
...
@@ -25,7 +25,7 @@ Number of devices 1
Max clock frequency 950MHz
Device Partition (core)
Max number of sub-devices 0
Supported partition types by <unknown> (0x7F
E0
00000000)
Supported partition types by <unknown> (0x7F
2F
00000000)
Max work item dimensions 3
Max work item sizes 256x256x256
Max work group size 256
...
...
@@ -66,15 +66,15 @@ Number of devices 1
Support is emulated in software No
Correctly-rounded divide and sqrt operations No
Address bits 64, Little-Endian
Global memory size 2688
7677543
(25.04GiB)
Global memory size 2688
8119911
(25.04GiB)
Error Correction support No
Max memory allocation 4294959103 (4GiB)
Unified memory for Host and Device Yes
Shared Virtual Memory (SVM) capabilities (core)
Coarse-grained buffer sharing Yes
Fine-grained buffer sharing
No
Fine-grained buffer sharing
Yes
Fine-grained system sharing No
Atomics
No
Atomics
Yes
Minimum alignment for any data type 128 bytes
Alignment of base address 1024 bits (128 bytes)
Preferred alignment for atomics
...
...
@@ -140,150 +140,154 @@ NULL platform behavior
clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM) No platform
clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL) No platform
********************************************************************************/
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64"
,
"4 1 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4"
,
"14 1 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4"
,
"4 4 8 2 1 1 8 1 0 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208"
,
"2 6 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32"
,
"4 6 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn16_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M4"
,
"12 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn32_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M128"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32"
,
"3 6 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M512"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96"
,
"4 5 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192"
,
"10 2 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn4_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M16"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32"
,
"8 1 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384"
,
"4 7 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256"
,
"2 6 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5"
,
"2 3 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn128_g1_s1x1_d1x1_b0_in32x32_p1x1_num1_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M32"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M16"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn16_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M64"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M256"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192"
,
"10 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn1024_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M256"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M1024"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn2048_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M512"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn512_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M512"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M4"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn256_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M256"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k2x2_cn16_g1_s2x2_d1x1_b0_in256x256_p0x0_num1_M16"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M512"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU72_k2x2_cn64_g1_s2x2_d1x1_b0_in128x128_p0x0_num1_M32"
,
"8 3 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M256"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256"
,
"4 7 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M512"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M16"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU72_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn64_g1_s1x1_d1x1_b0_in64x64_p1x1_num1_M64"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M1024"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32"
,
"4 6 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2"
,
"1 3 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M2048"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M2048"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64"
,
"8 1 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2"
,
"3 3 16 2 1 1 16 1 0 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M512_activ1_eltwise1"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1"
,
"2 4 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0"
,
"2 3 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1"
,
"4 4 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1"
,
"2 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2"
,
"8 3 16 2 1 1 16 1 0 "
,
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1"
,
"4 7 8 2 1 1 8 1 0 "
,
"EU72_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0"
,
"4 6 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2"
,
"4 7 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ1_eltwise1"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M2048_activ1_eltwise1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0"
,
"4 1 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0"
,
"1 4 16 2 1 1 16 1 0 "
,
"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2"
,
"12 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1"
,
"8 1 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0"
,
"8 3 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1"
,
"6 2 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2"
,
"2 6 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M1024_activ1_eltwise1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1"
,
"8 1 16 2 1 1 16 1 0 "
,
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU72_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU72_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1"
,
"2 7 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU72_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
// Below is the information for OpenCL based on which these configurations tuned
/*******************************************************************************
Number of platforms 1
...
...
@@ -291,7 +295,7 @@ Number of platforms 1
Platform Vendor Intel(R) Corporation
Platform Version OpenCL 2.0
Platform Profile FULL_PROFILE
Platform Extensions cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_d
river_diagnostics cl_intel_motion_estimation
cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
Platform Extensions cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_d
evice_side_avc_motion_estimation cl_intel_driver_diagnostics cl_intel_media_block_io cl_intel_motion_estimation cl_intel_planar_yuv
cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
Platform Extensions function suffix INTEL
Platform Name Intel(R) OpenCL
...
...
@@ -300,15 +304,15 @@ Number of devices 1
Device Vendor Intel(R) Corporation
Device Vendor ID 0x8086
Device Version OpenCL 2.0
Driver Version
16.5.56875
Device OpenCL C Version OpenCL C 2.0
( using IGC )
Driver Version
r4.1.61547
Device OpenCL C Version OpenCL C 2.0
Device Type GPU
Device Profile FULL_PROFILE
Max compute units 48
Max clock frequency 950MHz
Device Partition (core)
Max number of sub-devices 0
Supported partition types by <unknown> (0x7F
4B
00000000)
Supported partition types by <unknown> (0x7F
22
00000000)
Max work item dimensions 3
Max work item sizes 256x256x256
Max work group size 256
...
...
@@ -410,7 +414,7 @@ Number of devices 1
Device Available Yes
Compiler Available Yes
Linker Available Yes
Device Extensions cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_d
river_diagnostics cl_intel_motion_estimation
cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
Device Extensions cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_d
evice_side_avc_motion_estimation cl_intel_driver_diagnostics cl_intel_media_block_io cl_intel_motion_estimation cl_intel_planar_yuv
cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
NULL platform behavior
clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...) No platform
...
...
@@ -423,150 +427,154 @@ NULL platform behavior
clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM) No platform
clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL) No platform
********************************************************************************/
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn32_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M128"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32"
,
"8 1 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k3x3_cn128_g1_s1x1_d1x1_b0_in32x32_p1x1_num1_M128"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64"
,
"8 1 16 2 1 1 16 1 0 "
,
"EU48_k2x2_cn16_g1_s2x2_d1x1_b0_in256x256_p0x0_num1_M16"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4"
,
"6 4 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M512"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k3x3_cn512_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M512"
,
"2 7 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M1024"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192"
,
"4 7 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU48_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M256"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn16_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M4"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32"
,
"4 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256"
,
"4 7 8 2 1 1 8 1 0 "
,
"EU48_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU48_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64"
,
"4 1 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M16"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32"
,
"3 3 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k2x2_cn64_g1_s2x2_d1x1_b0_in128x128_p0x0_num1_M32"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128"
,
"4 3 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M128"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn4_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M16"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128"
,
"6 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4"
,
"4 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M128"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M2048"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn16_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M64"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32"
,
"4 7 8 2 1 1 8 1 0 "
,
"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn2048_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M512"
,
"4 7 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384"
,
"2 4 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288"
,
"2 4 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64"
,
"8 1 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128"
,
"4 5 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn256_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M256"
,
"2 6 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn64_g1_s1x1_d1x1_b0_in64x64_p1x1_num1_M64"
,
"10 2 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32"
,
"4 5 8 2 1 1 8 1 0 "
,
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M2048"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M256"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M16"
,
"4 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M512"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn1024_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M256"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5"
,
"2 3 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128"
,
"8 2 16 2 1 1 16 1 0 "
,
"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M32"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M512"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M1024"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128"
,
"10 2 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128"
,
"4 7 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU48_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2"
,
"1 4 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M4"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1"
,
"8 1 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M1024_activ1_eltwise1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1"
,
"2 6 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0"
,
"2 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M512_activ1_eltwise1"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1"
,
"8 1 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU48_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ1_eltwise1"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1"
,
"4 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1"
,
"2 4 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2"
,
"8 2 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1"
,
"14 2 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0"
,
"1 2 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0"
,
"4 1 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1"
,
"4 2 8 2 1 1 8 1 0 "
,
"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1"
,
"4 4 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M2048_activ1_eltwise1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2"
,
"8 3 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1"
,
"8 1 16 2 1 1 16 1 0 "
,
"EU48_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2"
,
"10 2 8 2 1 1 8 1 0 "
,
"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1"
,
"12 1 8 2 1 1 8 1 0 "
,
"EU48_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU48_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU48_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
// Below is the information for OpenCL based on which these configurations tuned
/*******************************************************************************
Number of platforms 1
...
...
@@ -591,7 +599,7 @@ Number of devices 1
Max clock frequency 1050MHz
Device Partition (core)
Max number of sub-devices 0
Supported partition types by <unknown> (0x7F
51
00000000)
Supported partition types by <unknown> (0x7F
C3
00000000)
Max work item dimensions 3
Max work item sizes 256x256x256
Max work group size 256
...
...
@@ -632,9 +640,9 @@ Number of devices 1
Support is emulated in software No
Correctly-rounded divide and sqrt operations No
Address bits 64, Little-Endian
Global memory size 658880
2663
(6.136GiB)
Global memory size 658880
9216
(6.136GiB)
Error Correction support No
Max memory allocation 329440
1331
(3.068GiB)
Max memory allocation 329440
4608
(3.068GiB)
Unified memory for Host and Device Yes
Shared Virtual Memory (SVM) capabilities (core)
Coarse-grained buffer sharing Yes
...
...
@@ -648,13 +656,13 @@ Number of devices 1
Global 64 bytes
Local 64 bytes
Max size for global variable 65536 (64KiB)
Preferred total size of global vars 329440
1331
(3.068GiB)
Preferred total size of global vars 329440
4608
(3.068GiB)
Global Memory cache type Read/Write
Global Memory cache size 524288
Global Memory cache line 64 bytes
Image support Yes
Max number of samplers per kernel 16
Max size for 1D images from buffer 205900
083
pixels
Max size for 1D images from buffer 205900
288
pixels
Max 1D or 2D image array size 2048 images
Base address alignment for 2D image buffers 4 bytes
Pitch alignment for 2D image buffers 4 bytes
...
...
@@ -668,7 +676,7 @@ Number of devices 1
Max pipe packet size 1024
Local memory type Local
Local memory size 65536 (64KiB)
Max constant buffer size 329440
1331
(3.068GiB)
Max constant buffer size 329440
4608
(3.068GiB)
Max number of constant args 8
Max size of kernel argument 1024
Queue properties (on host)
...
...
@@ -706,149 +714,153 @@ NULL platform behavior
clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM) No platform
clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL) No platform
********************************************************************************/
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32"
,
"4 6 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k2x2_cn16_g1_s2x2_d1x1_b0_in256x256_p0x0_num1_M16"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn2048_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M512"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32"
,
"3 3 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn128_g1_s1x1_d1x1_b0_in32x32_p1x1_num1_M128"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn1024_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M256"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M2048"
,
"4 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M16"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M1024"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn32_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M128"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU24_k1x1_cn4_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M16"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M2048"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn256_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M256"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M256"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256"
,
"2 5 16 2 1 1 16 1 0 "
,
"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M16"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M256"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16"
,
"6 3 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4"
,
"10 2 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU24_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32"
,
"4 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k2x2_cn64_g1_s2x2_d1x1_b0_in128x128_p0x0_num1_M32"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48"
,
"8 1 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn16_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M4"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128"
,
"10 2 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn64_g1_s1x1_d1x1_b0_in64x64_p1x1_num1_M64"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5"
,
"2 3 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn16_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M64"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M4"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU24_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2"
,
"1 3 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn512_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M512"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M1024"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64"
,
"4 1 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192"
,
"6 4 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M512"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M512"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384"
,
"4 7 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU24_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M512"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2"
,
"8 3 16 2 1 1 16 1 0 "
,
"EU24_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0"
,
"2 4 8 2 1 1 8 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2"
,
"10 2 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M1024_activ1_eltwise1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1"
,
"8 2 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU24_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2"
,
"2 4 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1"
,
"2 6 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0"
,
"4 4 8 2 1 1 8 1 0 "
,
"EU24_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0"
,
"4 1 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1"
,
"4 7 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1"
,
"14 2 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1"
,
"14 2 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1"
,
"8 3 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1"
,
"2 8 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M512_activ1_eltwise1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1"
,
"14 2 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1"
,
"4 6 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ0"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1"
,
"4 3 16 2 1 1 16 1 0 "
,
"EU24_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0"
,
"1 3 8 2 1 1 8 1 0 "
,
"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0"
,
"1 1 1 4 1 1 1 0 1 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1"
,
"12 2 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ1_eltwise1"
,
"1 16 32 5 1 16 1 1 0 "
,
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1"
,
"4 2 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1"
,
"4 3 8 2 1 1 8 1 0 "
,
"EU24_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0"
,
"2 8 32 5 1 8 1 1 0 "
,
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1"
,
"4 4 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2"
,
"1 8 32 5 1 8 1 1 0 "
,
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M2048_activ1_eltwise1"
,
"4 7 16 2 1 1 16 1 0 "
,
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1"
,
"2 7 16 2 1 1 16 1 0 "
,
};
#endif
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
View file @
e0e40405
...
...
@@ -73,6 +73,11 @@ struct OCL4DNNConvConfig
bool
bias_term
;
// = false;
};
typedef
enum
{
OCL4DNN_CONV_FUSED_ACTIV_NONE
=
0
,
OCL4DNN_CONV_FUSED_ACTIV_RELU
=
1
,
OCL4DNN_CONV_FUSED_ACTIV_PRELU
=
2
,
}
ocl4dnnFusedActiv_t
;
template
<
typename
Dtype
>
class
OCL4DNNConvSpatial
...
...
@@ -80,9 +85,13 @@ class OCL4DNNConvSpatial
public
:
explicit
OCL4DNNConvSpatial
(
OCL4DNNConvConfig
config
);
~
OCL4DNNConvSpatial
();
bool
Forward
(
const
UMat
&
bottom_data
,
const
UMat
&
weight
,
bool
Forward
(
const
UMat
&
bottom_data
,
const
UMat
&
weight
,
const
UMat
&
bias
,
UMat
&
top_data
,
int32_t
batch_size
);
void
setActivReLU
(
bool
fuse_activ
,
float
slope
);
void
setActivPReLU
(
bool
fuse_activ
,
std
::
vector
<
float
>
&
slope
);
void
setBias
(
bool
bias_term
);
private
:
struct
kernelConfig
...
...
@@ -194,9 +203,9 @@ class OCL4DNNConvSpatial
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
blockDepth
);
bool
setupIDLF
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
blockDepth
);
bool
createIDLFKernel
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
blockDepth
);
bool
createBasicKernel
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
blockDepth
);
...
...
@@ -244,10 +253,13 @@ class OCL4DNNConvSpatial
int
lx
,
int
ly
,
int
lz
,
bool
swizzle
,
bool
nullLocal
);
void
generateTunerItems
(
std
::
vector
<
cv
::
Ptr
<
tunerParam
>
>
&
tunerItems
);
void
setFusionDefine
(
ocl4dnnFusedActiv_t
fused_activ
);
void
setFusionArg
(
ocl4dnnFusedActiv_t
fused_activ
,
ocl
::
Kernel
&
kernel
,
cl_uint
&
argIdx
);
int32_t
group_
;
bool
bias_term_
;
UMat
swizzled_weights_umat
;
UMat
bottom_data2_
;
int32_t
bottom_index_
;
int32_t
output_h_
;
...
...
@@ -291,6 +303,9 @@ class OCL4DNNConvSpatial
std
::
stringstream
options_
;
cv
::
ocl
::
ProgramSource
src_
;
int32_t
prev_kernel_type_
;
bool
negative_slope_
;
UMat
negative_slope_umat_
;
ocl4dnnFusedActiv_t
fused_activ_
;
};
typedef
enum
{
...
...
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
View file @
e0e40405
...
...
@@ -78,6 +78,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
num_output_
=
config
.
out_shape
[
dims
-
spatial_dims
-
1
];
group_
=
config
.
group
;
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
negative_slope_
=
0
;
prev_kernel_type_
=
-
1
;
tuned_
=
false
;
...
...
@@ -138,6 +140,38 @@ OCL4DNNConvSpatial<Dtype>::~OCL4DNNConvSpatial()
}
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setFusionDefine
(
ocl4dnnFusedActiv_t
fused_activ
)
{
switch
(
fused_activ
)
{
case
OCL4DNN_CONV_FUSED_ACTIV_RELU
:
addDef
(
"FUSED_CONV_RELU"
,
1
);
break
;
case
OCL4DNN_CONV_FUSED_ACTIV_PRELU
:
addDef
(
"FUSED_CONV_PRELU"
,
1
);
break
;
default
:
;
}
return
;
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setFusionArg
(
ocl4dnnFusedActiv_t
fused_activ
,
ocl
::
Kernel
&
kernel
,
cl_uint
&
argIdx
)
{
switch
(
fused_activ
)
{
case
OCL4DNN_CONV_FUSED_ACTIV_RELU
:
kernel
.
set
(
argIdx
++
,
(
float
)
negative_slope_
);
break
;
case
OCL4DNN_CONV_FUSED_ACTIV_PRELU
:
kernel
.
set
(
argIdx
++
,
(
cl_mem
)
negative_slope_umat_
.
handle
(
ACCESS_READ
));
break
;
default
:
;
}
return
;
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
collectCommonInformation
()
{
...
...
@@ -221,6 +255,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef
(
"ALIGNED_NUM_FILTERS"
,
(
int
)
alignSize
(
M_
,
simd_size
));
addDef
(
"OUT_BLOCK_SIZE"
,
(
output_block_width
*
output_block_height
));
addDef
(
"APPLY_BIAS"
,
bias_term_
);
setFusionDefine
(
fused_activ_
);
src_
=
cv
::
ocl
::
dnn
::
conv_layer_spatial_oclsrc
;
}
...
...
@@ -242,6 +277,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef
(
"APPLY_BIAS"
,
bias_term_
);
addDef
(
"OUTPUT_Z"
,
M_
);
addDef
(
"ZPAR"
,
1
);
setFusionDefine
(
fused_activ_
);
src_
=
cv
::
ocl
::
dnn
::
conv_layer_spatial_oclsrc
;
}
...
...
@@ -278,6 +314,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef
(
"TILE_N_LAST"
,
M_
%
32
);
addDef
(
"TILE_N_LAST_DIV8"
,
(
M_
%
32
)
/
8
);
addDef
(
"APPLY_BIAS"
,
bias_term_
);
setFusionDefine
(
fused_activ_
);
src_
=
ocl
::
dnn
::
conv_layer_spatial_oclsrc
;
}
}
...
...
@@ -302,6 +339,37 @@ void OCL4DNNConvSpatial<Dtype>::setupKernel()
setupKernelDetails
(
kernelType_
,
blockM_
,
blockK_
,
blockN_
);
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setBias
(
bool
bias_term
)
{
bias_term_
=
bias_term
;
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setActivReLU
(
bool
fuse_activ
,
float
slope
)
{
if
(
fuse_activ
)
{
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_RELU
;
negative_slope_
=
slope
;
}
else
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setActivPReLU
(
bool
fuse_activ
,
std
::
vector
<
float
>
&
slope
)
{
if
(
fuse_activ
)
{
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_PRELU
;
Mat
tmpMat
=
Mat
(
num_output_
,
1
,
CV_32FC1
,
(
uchar
*
)
&
slope
[
0
]);
tmpMat
.
copyTo
(
negative_slope_umat_
);
}
else
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
}
template
<
typename
Dtype
>
bool
OCL4DNNConvSpatial
<
Dtype
>::
Forward
(
const
UMat
&
bottom
,
const
UMat
&
weight
,
...
...
@@ -310,7 +378,6 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
int32_t
numImages
)
{
num_
=
numImages
;
prepareKernel
(
bottom
,
top
,
weight
,
bias
,
numImages
);
return
convolve
(
bottom
,
top
,
weight
,
bias
,
numImages
,
bestKernelConfig
,
cv
::
ocl
::
Queue
::
getDefault
());
}
...
...
@@ -358,7 +425,9 @@ void OCL4DNNConvSpatial<Dtype>::generateKey()
<<
"in"
<<
TUNING_SIZE
(
width_
)
<<
"x"
<<
TUNING_SIZE
(
height_
)
<<
"_"
<<
"p"
<<
pad_w_
<<
"x"
<<
pad_h_
<<
"_"
<<
"num"
<<
num_
<<
"_"
<<
"M"
<<
M_
;
<<
"M"
<<
M_
<<
"_"
<<
"activ"
<<
fused_activ_
;
key_
=
ocl
::
Device
::
getDefault
().
vendorName
()
+
"_EU"
+
cv
::
format
(
"%d"
,
ocl
::
Device
::
getDefault
().
maxComputeUnits
())
+
"_"
+
keyBuilder
.
str
();
key_sanitized_
=
key_
;
...
...
@@ -608,6 +677,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
return
false
;
cl_uint
argIdx
=
0
;
setFusionArg
(
fused_activ_
,
kernel
,
argIdx
);
UMat
img_buffer
;
if
(
image_offset
)
...
...
@@ -700,6 +770,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
return
false
;
cl_uint
argIdx
=
0
;
setFusionArg
(
fused_activ_
,
kernel
,
argIdx
);
UMat
img_buffer
;
if
(
image_offset
)
...
...
@@ -807,13 +878,16 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
int32_t
output_image_offset
=
n
*
top_dim_
+
output_w_
*
output_h_
*
M_
*
g
;
cl_uint
argIdx
=
0
;
int32_t
kernel_offset
=
kernel_h_
*
kernel_w_
*
(
channels_
/
group_
)
*
M_
*
g
;
int32_t
kernel_offset
=
kernel_h_
*
kernel_w_
*
(
channels_
/
group_
)
*
M_
*
g
;
ocl
::
Kernel
kernel
(
config
->
kernelName
.
c_str
(),
program
);
if
(
kernel
.
empty
())
return
false
;
cl_uint
argIdx
=
0
;
setFusionArg
(
fused_activ_
,
kernel
,
argIdx
);
kernel
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bottom
));
kernel
.
set
(
argIdx
++
,
image_offset
);
kernel
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
weight
));
...
...
@@ -1058,9 +1132,9 @@ bool OCL4DNNConvSpatial<float>::createGEMMLikeConvKernel(int32_t blockM,
}
template
<>
bool
OCL4DNNConvSpatial
<
float
>::
setupIDLF
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
simd_size
)
bool
OCL4DNNConvSpatial
<
float
>::
createIDLFKernel
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
simd_size
)
{
int32_t
workItemOutput
[
3
]
=
{
blockWidth
,
blockHeight
,
simd_size
};
const
int32_t
num_output_maps
=
M_
;
...
...
@@ -1122,7 +1196,7 @@ bool OCL4DNNConvSpatial<float>::createConvolutionKernel(int32_t kernelType,
src_
=
ocl
::
ProgramSource
();
if
(
kernelType
==
KERNEL_TYPE_INTEL_IDLF
)
return
setupIDLF
(
blockWidth
,
blockHeight
,
blockDepth
);
return
createIDLFKernel
(
blockWidth
,
blockHeight
,
blockDepth
);
else
if
(
kernelType
==
KERNEL_TYPE_BASIC
)
return
createBasicKernel
(
blockWidth
,
blockHeight
,
blockDepth
);
else
if
(
kernelType
==
KERNEL_TYPE_GEMM_LIKE
)
...
...
modules/dnn/src/opencl/conv_layer_spatial.cl
View file @
e0e40405
...
...
@@ -46,7 +46,19 @@
#
define
BIAS_KERNEL_ARG
#
endif
#
define
ACTIVATION_FUNCTION
(
_dst_,
_offset_,
_data_
)
do
{
(
_dst_
)
[
(
_offset_
)
]
=
(
_data_
)
;} while(0)
#
if
defined
(
FUSED_CONV_RELU
)
#
define
ACTIVATION_RELU_FUNCTION
(
x,
c
)
((
Dtype
)(
x
)
>
0
?
(
Dtype
)(
x
)
:
((
Dtype
)(
x
)
*
(
Dtype
)(
negative_slope
)))
#
define
NEGATIVE_SLOPE_ARG
Dtype
negative_slope,
#
elif
defined
(
FUSED_CONV_PRELU
)
#
define
ACTIVATION_RELU_FUNCTION
(
x,
c
)
((
Dtype
)(
x
)
>
0
?
(
Dtype
)(
x
)
:
((
Dtype
)(
x
)
*
(
Dtype
)(
negative_slope[c]
)))
#
define
NEGATIVE_SLOPE_ARG
__global
const
Dtype
*negative_slope,
#
else
#
define
ACTIVATION_RELU_FUNCTION
(
x,
c
)
(
x
)
#
define
NEGATIVE_SLOPE_ARG
#
endif
#
define
ACTIVATION_FUNCTION
(
_dst_,
_offset_,
_data_,
_channel_
)
do
{
(
_dst_
)
[
(
_offset_
)
]
=
ACTIVATION_RELU_FUNCTION
(
_data_,
_channel_
)
;} while(0)
#
define
__CAT
(
x,
y
)
x##y
#
define
CAT
(
x,
y
)
__CAT
(
x,
y
)
...
...
@@ -87,6 +99,7 @@
#ifdef KERNEL_BASIC
__kernel void ConvolveBasic(
NEGATIVE_SLOPE_ARG
__global Dtype* image_data,
int image_offset,
__global Dtype* kernel_data,
...
...
@@ -152,9 +165,9 @@ __kernel void ConvolveBasic(
{
int offset = convolved_image_offset + (kernelNum+kern)*output_height*output_width + outputY*output_width + outputX;
#if APPLY_BIAS
ACTIVATION_FUNCTION(convolved_image, offset, sum[kern] + bias[biasIndex + kern]);
ACTIVATION_FUNCTION(convolved_image, offset, sum[kern] + bias[biasIndex + kern]
, biasIndex + kern
);
#else
ACTIVATION_FUNCTION(convolved_image, offset, sum[kern]);
ACTIVATION_FUNCTION(convolved_image, offset, sum[kern]
, biasIndex + kern
);
#endif
}
}
...
...
@@ -180,6 +193,7 @@ __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
#endif
__kernel void
convolve_simd(
NEGATIVE_SLOPE_ARG
__global Dtype* inputs_base,
filter_qualifier Dtype* weights_base,
BIAS_KERNEL_ARG
...
...
@@ -359,7 +373,7 @@ convolve_simd(
for(unsigned int c = 0; c < OUT_BLOCK_WIDTH; c++) {
if (c + oc >= output_width) break;
// this does a scattered write to SIMD_SIZE different feature maps, so that data within one map is contiguous, thus ready for input to next layer.
ACTIVATION_FUNCTION(outputs, out_addr + r * output_width + c, bias + out[r * OUT_BLOCK_WIDTH + c]);
ACTIVATION_FUNCTION(outputs, out_addr + r * output_width + c, bias + out[r * OUT_BLOCK_WIDTH + c]
, fm
);
}
}
...
...
@@ -399,6 +413,7 @@ typedef struct float0 { float s0; } float0; //never used but makes compiler happ
#define ROW_PITCH input_width
#define GEMM_LIKE_KERNEL_ARGS \
NEGATIVE_SLOPE_ARG \
const __global Dtype *src0, \
const __global Dtype *src1, \
BIAS_KERNEL_ARG \
...
...
@@ -591,35 +606,15 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
Dtype4 *bias_vec;
bias_vec = (Dtype4*)bias;
*bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
#endif
#ifdef FUSED_CONV_CHANNEL_RELU
Dtype slope[4];
Dtype4 *slope_vec;
slope_vec = (Dtype4*)slope;
*slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
Dtype negative_slope;
#endif
if (global_y * TILE_M < output_width * output_height )
{
for (int i = 0; i < 8; i++)
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 0 + i ) * out_pitch_y, blockC00[i] + SUBGROUP_GET_BIAS(0, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 8 + i ) * out_pitch_y, blockC10[i] + SUBGROUP_GET_BIAS(1, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 16 + i ) * out_pitch_y, blockC20[i] + SUBGROUP_GET_BIAS(2, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 24 + i ) * out_pitch_y, blockC30[i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out_offset + ( 0 + i ) * out_pitch_y, blockC00[i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
ACTIVATION_FUNCTION(dst, out_offset + ( 8 + i ) * out_pitch_y, blockC10[i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + 8 + i);
ACTIVATION_FUNCTION(dst, out_offset + ( 16 + i ) * out_pitch_y, blockC20[i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + 16 + i);
ACTIVATION_FUNCTION(dst, out_offset + ( 24 + i ) * out_pitch_y, blockC30[i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + 24 + i);
}
}
}
...
...
@@ -773,46 +768,25 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
*bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
#endif
#ifdef FUSED_CONV_CHANNEL_RELU
Dtype slope[4];
Dtype4 *slope_vec;
slope_vec = (Dtype4*)slope;
*slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
Dtype negative_slope;
#endif
if (global_y * TILE_M < output_width * output_height )
{
for (int i = 0; i < 8; i++)
{
if ( TILE_N_LAST_DIV8 > 0 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 0+i) * out_pitch_y, blockC[0][i] + SUBGROUP_GET_BIAS(0, i));
ACTIVATION_FUNCTION(dst, out_offset + ( 0+i) * out_pitch_y, blockC[0][i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
}
if ( TILE_N_LAST_DIV8 > 1 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 8+i) * out_pitch_y, blockC[1][i] + SUBGROUP_GET_BIAS(1, i));
ACTIVATION_FUNCTION(dst, out_offset + ( 8+i) * out_pitch_y, blockC[1][i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 8);
}
if ( TILE_N_LAST_DIV8 > 2 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + (16+i) * out_pitch_y, blockC[2][i] + SUBGROUP_GET_BIAS(2, i));
ACTIVATION_FUNCTION(dst, out_offset + (16+i) * out_pitch_y, blockC[2][i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + i + 16);
}
if ( TILE_N_LAST_DIV8 > 3 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + (24+i) * out_pitch_y, blockC[3][i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out_offset + (24+i) * out_pitch_y, blockC[3][i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + i + 24);
}
}
}
...
...
@@ -1038,60 +1012,24 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
*bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
#endif
#ifdef FUSED_CONV_CHANNEL_RELU
Dtype slope[4];
Dtype4 *slope_vec;
slope_vec = (Dtype4*)slope;
*slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
Dtype negative_slope;
#endif
if( global_y * TILE_M < output_width * output_height )
{
for( int i = 0; i < 8; i++ )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + ( 0+i) * out_pitch_y, blockC00[i] + SUBGROUP_GET_BIAS(0, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + ( 8+i) * out_pitch_y, blockC10[i] + SUBGROUP_GET_BIAS(1, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + (16+i) * out_pitch_y, blockC20[i] + SUBGROUP_GET_BIAS(2, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + (24+i) * out_pitch_y, blockC30[i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out0_offset + ( 0+i) * out_pitch_y, blockC00[i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
ACTIVATION_FUNCTION(dst, out0_offset + ( 8+i) * out_pitch_y, blockC10[i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 8);
ACTIVATION_FUNCTION(dst, out0_offset + (16+i) * out_pitch_y, blockC20[i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + i + 16);
ACTIVATION_FUNCTION(dst, out0_offset + (24+i) * out_pitch_y, blockC30[i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + i + 24);
}
}
if( global_y * TILE_M + 1 < output_width * output_height )
{
for( int i = 0; i < 8; i++ )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + ( 0+i) * out_pitch_y, blockC01[i] + SUBGROUP_GET_BIAS(0, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + ( 8+i) * out_pitch_y, blockC11[i] + SUBGROUP_GET_BIAS(1, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + (16+i) * out_pitch_y, blockC21[i] + SUBGROUP_GET_BIAS(2, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + (24+i) * out_pitch_y, blockC31[i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out1_offset + ( 0+i) * out_pitch_y, blockC01[i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
ACTIVATION_FUNCTION(dst, out1_offset + ( 8+i) * out_pitch_y, blockC11[i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 8);
ACTIVATION_FUNCTION(dst, out1_offset + (16+i) * out_pitch_y, blockC21[i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + i + 16);
ACTIVATION_FUNCTION(dst, out1_offset + (24+i) * out_pitch_y, blockC31[i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + i + 24);
}
}
}
...
...
@@ -1281,13 +1219,6 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
Dtype4 *bias_vec;
bias_vec = (Dtype4*)bias;
*bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
#endif
#ifdef FUSED_CONV_CHANNEL_RELU
Dtype slope[4];
Dtype4 *slope_vec;
slope_vec = (Dtype4*)slope;
*slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
Dtype negative_slope;
#endif
if( global_y * TILE_M < output_width * output_height )
{
...
...
@@ -1295,32 +1226,19 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
{
if ( TILE_N_LAST_DIV8 > 0 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + ( 0+i) * out_pitch_y, blockC0[0][i] + SUBGROUP_GET_BIAS(0, i));
ACTIVATION_FUNCTION(dst, out0_offset + ( 0+i) * out_pitch_y, blockC0[0][i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
}
if ( TILE_N_LAST_DIV8 > 1 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + ( 8+i) * out_pitch_y, blockC0[1][i] + SUBGROUP_GET_BIAS(1, i));
ACTIVATION_FUNCTION(dst, out0_offset + ( 8+i) * out_pitch_y, blockC0[1][i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 8);
}
if ( TILE_N_LAST_DIV8 > 2 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + (16+i) * out_pitch_y, blockC0[2][i] + SUBGROUP_GET_BIAS(2, i));
ACTIVATION_FUNCTION(dst, out0_offset + (16+i) * out_pitch_y, blockC0[2][i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + i + 16);
}
if ( TILE_N_LAST_DIV8 > 3 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + (24+i) * out_pitch_y, blockC0[3][i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out0_offset + (24+i) * out_pitch_y, blockC0[3][i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + i + 24);
}
}
}
...
...
@@ -1330,31 +1248,19 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
{
if ( TILE_N_LAST_DIV8 > 0 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + ( 0+i) * out_pitch_y, blockC1[0][i] + SUBGROUP_GET_BIAS(0, i));
ACTIVATION_FUNCTION(dst, out1_offset + ( 0+i) * out_pitch_y, blockC1[0][i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
}
if ( TILE_N_LAST_DIV8 > 1 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + ( 8+i) * out_pitch_y, blockC1[1][i] + SUBGROUP_GET_BIAS(1, i));
ACTIVATION_FUNCTION(dst, out1_offset + ( 8+i) * out_pitch_y, blockC1[1][i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 8);
}
if ( TILE_N_LAST_DIV8 > 2 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + (16+i) * out_pitch_y, blockC1[2][i] + SUBGROUP_GET_BIAS(2, i));
ACTIVATION_FUNCTION(dst, out1_offset + (16+i) * out_pitch_y, blockC1[2][i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + i + 16);
}
if ( TILE_N_LAST_DIV8 > 3 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + (24+i) * out_pitch_y, blockC1[3][i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out1_offset + (24+i) * out_pitch_y, blockC1[3][i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + i + 24);
}
}
}
...
...
@@ -1364,95 +1270,28 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
#endif
#if defined(GEMM_LIKE_CONV_32_2_SIMD16) || defined(GEMM_LIKE_CONV_32_1_SIMD16)
#ifdef FUSED_CONV_CHANNEL_RELU
#define INTERLEAVED_SIMD16_OUTPUT(_out_, _offset_, _m_) do {\
if (global_y * TILE_M < output_width * output_height ) \
{ \
if ( ( OUT_DEPTH % TILE_N ) == 0 ) {\
for (int i = 0; i < 16; i++) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
negative_slope = intel_sub_group_shuffle(slope[1], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
} \
} \
else if( ( OUT_DEPTH % 16 ) == 0 ) { \
if ( ( global_x + 1 ) < get_global_size(0) ) { \
for ( int i = 0; i < 16; i++ ) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
negative_slope = intel_sub_group_shuffle(slope[1], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
} \
} \
else { \
for (int i = 0; i < 16; i++) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
} \
} \
} \
else { \
if ( ( global_x + 1 ) < get_global_size(0) ) \
{ \
for ( int i = 0; i < 16; i++ ) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
negative_slope = intel_sub_group_shuffle(slope[1], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
} \
} \
else { \
if ( (OUT_DEPTH % TILE_N) > 16 ) { \
for (int i = 0; i < 16 ; i++) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
} \
for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
{ \
negative_slope = intel_sub_group_shuffle(slope[1], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
} \
} \
else { \
for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
} \
} \
} \
} \
} \
}while(0)
#else
#define INTERLEAVED_SIMD16_OUTPUT(_out_, _offset_, _m_) do {\
if (global_y * TILE_M < output_width * output_height ) \
{ \
if ( ( OUT_DEPTH % TILE_N ) == 0 ) {\
for (int i = 0; i < 16; i++) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)
, group_x * TILE_N + i
); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)
, group_x * TILE_N + i + 16
); \
} \
} \
else if( ( OUT_DEPTH % 16 ) == 0 ) { \
if ( ( global_x + 1 ) < get_global_size(0) ) { \
for ( int i = 0; i < 16; i++ ) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)
, group_x * TILE_N + i
); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)
, group_x * TILE_N + i + 16
); \
} \
} \
else { \
for (int i = 0; i < 16; i++) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)
, group_x * TILE_N + i
); \
} \
} \
} \
...
...
@@ -1461,25 +1300,25 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
{ \
for ( int i = 0; i < 16; i++ ) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)
, group_x * TILE_N + i
); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)
, group_x * TILE_N + i + 16
); \
} \
} \
else { \
if ( (OUT_DEPTH % TILE_N) > 16 ) { \
for (int i = 0; i < 16 ; i++) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)
, group_x * TILE_N + i
); \
} \
for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)
, group_x * TILE_N + i + 16
); \
} \
} \
else { \
for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)
, group_x * TILE_N + i
); \
} \
} \
} \
...
...
@@ -1487,7 +1326,6 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
} \
}while(0)
#endif
#endif
#ifdef GEMM_LIKE_CONV_32_1_SIMD16
#define TILE_M 1
...
...
@@ -1656,14 +1494,6 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
bias_vec
=
(
Dtype2*
)
bias
;
*bias_vec
=
as_Dtype2
(
SUB_GROUP_BLOCK_READ2
((
__global
INT_TYPE
*
)
biases_base
+
group_x
*
TILE_N
))
;
#
endif
#
ifdef
FUSED_CONV_CHANNEL_RELU
Dtype
slope[2]
;
Dtype2
*slope_vec
;
slope_vec
=
(
Dtype2*
)
slope
;
*slope_vec
=
as_Dtype2
(
SUB_GROUP_BLOCK_READ2
((
__global
INT_TYPE
*
)
negative_slope_base
+
group_x
*
TILE_N
))
;
Dtype
negative_slope
;
#
endif
INTERLEAVED_SIMD16_OUTPUT
(
dst,
out_offset,
0
)
;
}
#
endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment