Commit e0e40405 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #9847 from wzw-intel:ocl4dnn_fusion

parents ff037ebe 2d8f2c2a
......@@ -1028,7 +1028,7 @@ struct Net::Impl
void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
{
if( !fusion || !(preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_CPU))
if( !fusion || preferableBackend != DNN_BACKEND_DEFAULT)
return;
CV_TRACE_FUNCTION();
......@@ -1056,6 +1056,11 @@ struct Net::Impl
// with the current layer if they follow it. Normally, the are fused with the convolution layer,
// but some of them (like activation) may be fused with fully-connected, elemwise (+) and
// some other layers.
// TODO: OpenCL target support more fusion styles.
if ( preferableTarget == DNN_TARGET_OPENCL && ld.layerInstance->type.compare("Convolution") )
continue;
Ptr<Layer>& currLayer = ld.layerInstance;
if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
{
......@@ -1100,16 +1105,27 @@ struct Net::Impl
}
}
Ptr<ActivationLayer> nextActivLayer;
if( nextData )
nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0
&& currLayer->setActivation(nextActivLayer) )
// For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU
if ( preferableTarget != DNN_TARGET_OPENCL ||
(preferableTarget == DNN_TARGET_OPENCL &&
nextData &&
(!nextData->type.compare("ReLU") ||
!nextData->type.compare("ChannelsPReLU"))) )
{
printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
nextData->skipFlags[DNN_BACKEND_DEFAULT] = true;
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
Ptr<ActivationLayer> nextActivLayer;
if( nextData )
nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0
&& currLayer->setActivation(nextActivLayer) )
{
LayerData *activData = nextData;
printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
activData->skipFlags[DNN_BACKEND_DEFAULT] = true;
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
}
}
}
......
......@@ -157,7 +157,20 @@ public:
#ifdef HAVE_OPENCL
Ptr<OCL4DNNConvSpatial<float> > convolutionOp;
std::vector<UMat> umat_blobs;
bool fusedBias;
bool newWeightAndBias;
bool newActiv;
ocl4dnnFusedActiv_t activType;
#endif
ConvolutionLayerImpl()
{
#ifdef HAVE_OPENCL
fusedBias = false;
newWeightAndBias = false;
newActiv = false;
activType = OCL4DNN_CONV_FUSED_ACTIV_NONE;
#endif
}
MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const
{
......@@ -209,6 +222,10 @@ public:
activ = layer;
if (activ.empty())
reluslope.clear();
#ifdef HAVE_OPENCL
newActiv = true;
activType = OCL4DNN_CONV_FUSED_ACTIV_NONE;
#endif
return !activ.empty();
}
......@@ -221,6 +238,10 @@ public:
// we will need to re-compute the weights with the batch
// norm coefficients taken into account
weightsMat.release();
#ifdef HAVE_OPENCL
newWeightAndBias = true;
fusedBias = false;
#endif
return !bnorm.empty();
}
......@@ -230,6 +251,10 @@ public:
// we will need to re-compute the weights with the scaling
// coefficients taken into account
weightsMat.release();
#ifdef HAVE_OPENCL
newWeightAndBias = true;
fusedBias = false;
#endif
return !scaleLayer.empty();
}
......@@ -665,19 +690,49 @@ public:
convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config));
}
for (size_t ii = 0; ii < outputs.size(); ii++)
if ( newWeightAndBias )
{
UMat inpMat, outMat;
inpMat = inputs[ii]->getUMat(ACCESS_READ);
outMat = outputs[ii].getUMat(ACCESS_WRITE);
int batch_size = inpMat.size[0];
weightsMat.copyTo(umat_blobs[0]);
if ( fusedBias )
{
if ( umat_blobs.size() < 2 )
umat_blobs.resize(2);
umat_blobs[1] = UMat(biasvec, true);
}
convolutionOp->setBias(fusedBias || hasBias());
newWeightAndBias = false;
}
if (!convolutionOp->Forward(inpMat, umat_blobs[0], hasBias() ? umat_blobs[1] : UMat(),
outMat, batch_size))
return false;
if ( newActiv )
{
if ( activType == OCL4DNN_CONV_FUSED_ACTIV_RELU )
{
CV_Assert(!reluslope.empty());
convolutionOp->setActivReLU(true, reluslope[0]);
}
else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_PRELU)
{
CV_Assert(!reluslope.empty());
convolutionOp->setActivPReLU(true, reluslope);
}
else
{
convolutionOp->setActivReLU(false, 0);
convolutionOp->setActivPReLU(false, reluslope);
}
newActiv = false;
}
return true;
UMat inpMat, outMat;
inpMat = inputs[0]->getUMat(ACCESS_READ);
outMat = outputs[0].getUMat(ACCESS_WRITE);
int batch_size = inpMat.size[0];
return convolutionOp->Forward(inpMat,
umat_blobs[0],
(hasBias() || fusedBias) ? umat_blobs[1] : UMat(),
outMat,
batch_size);
}
#endif
......@@ -693,11 +748,6 @@ public:
CV_Assert(inputs.size() == (size_t)1 && inputs[0]->size[1] % blobs[0].size[1] == 0);
int ngroups = inputs[0]->size[1]/blobs[0].size[1];
CV_Assert(outputs[0].size[1] % ngroups == 0);
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals))
int k, outCn = blobs[0].size[0];
if( weightsMat.empty() )
......@@ -761,6 +811,11 @@ public:
}
}
#ifdef HAVE_OPENCL
if (shiftptr || shiftptr2)
fusedBias = true;
#endif
for( int i = 0; i < outCn; i++ )
{
float s1 = scaleptr ? scaleptr[i] : 1.f;
......@@ -784,7 +839,12 @@ public:
{
Ptr<ReLULayer> activ_relu = activ.dynamicCast<ReLULayer>();
if( !activ_relu.empty() )
{
reluslope.assign(outCn+2, activ_relu->negativeSlope);
#ifdef HAVE_OPENCL
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
#endif
}
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
if( !activ_chprelu.empty() )
......@@ -795,9 +855,16 @@ public:
reluslope.resize(outCn+2);
std::copy(mdata, mdata + outCn, reluslope.begin());
reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1];
#ifdef HAVE_OPENCL
activType = OCL4DNN_CONV_FUSED_ACTIV_PRELU;
#endif
}
}
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals))
int nstripes = std::max(getNumThreads(), 1);
ParallelConv::run(*inputs[0], outputs[0], weightsMat, biasvec, reluslope,
......
......@@ -25,7 +25,7 @@ Number of devices 1
Max clock frequency 950MHz
Device Partition (core)
Max number of sub-devices 0
Supported partition types by <unknown> (0x7FE000000000)
Supported partition types by <unknown> (0x7F2F00000000)
Max work item dimensions 3
Max work item sizes 256x256x256
Max work group size 256
......@@ -66,15 +66,15 @@ Number of devices 1
Support is emulated in software No
Correctly-rounded divide and sqrt operations No
Address bits 64, Little-Endian
Global memory size 26887677543 (25.04GiB)
Global memory size 26888119911 (25.04GiB)
Error Correction support No
Max memory allocation 4294959103 (4GiB)
Unified memory for Host and Device Yes
Shared Virtual Memory (SVM) capabilities (core)
Coarse-grained buffer sharing Yes
Fine-grained buffer sharing No
Fine-grained buffer sharing Yes
Fine-grained system sharing No
Atomics No
Atomics Yes
Minimum alignment for any data type 128 bytes
Alignment of base address 1024 bits (128 bytes)
Preferred alignment for atomics
......@@ -140,150 +140,154 @@ NULL platform behavior
clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM) No platform
clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL) No platform
********************************************************************************/
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","12 2 8 2 1 1 8 1 0 ",
"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64","1 8 32 5 1 8 1 1 0 ",
"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","4 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192","2 7 16 2 1 1 16 1 0 ",
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48","4 3 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96","1 8 32 5 1 8 1 1 0 ",
"EU72_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64","4 1 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","4 6 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4","14 1 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4","4 4 8 2 1 1 8 1 0 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208","2 6 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320","2 5 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","1 8 32 5 1 8 1 1 0 ",
"EU72_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32","4 6 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn16_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M4","12 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64","2 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn32_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M128","1 16 32 5 1 16 1 1 0 ",
"EU72_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32","3 6 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M512","2 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384","2 7 16 2 1 1 16 1 0 ",
"EU72_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4","1 1 1 4 1 1 1 0 1 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96","4 5 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192","10 2 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192","6 4 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn4_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96","8 3 8 2 1 1 8 1 0 ",
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32","8 1 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384","4 7 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256","2 6 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128","6 4 16 2 1 1 16 1 0 ",
"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","4 4 16 2 1 1 16 1 0 ",
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48","4 3 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5","2 3 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24","8 2 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn128_g1_s1x1_d1x1_b0_in32x32_p1x1_num1_M128","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128","2 7 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M32","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112","8 2 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","4 3 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","8 2 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn16_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224","2 7 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","4 6 8 2 1 1 8 1 0 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96","4 3 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192","10 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","12 2 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128","2 5 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","2 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288","2 5 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn1024_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M1024","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn2048_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M512","4 6 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn512_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M512","2 5 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16","8 2 8 2 1 1 8 1 0 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","4 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","8 3 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288","2 7 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16","2 5 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M4","8 3 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256","2 7 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn256_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M256","2 5 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224","2 5 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
"EU72_k2x2_cn16_g1_s2x2_d1x1_b0_in256x256_p0x0_num1_M16","6 4 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M512","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192","2 5 16 2 1 1 16 1 0 ",
"EU72_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128","4 3 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","8 2 8 2 1 1 8 1 0 ",
"EU72_k2x2_cn64_g1_s2x2_d1x1_b0_in128x128_p0x0_num1_M32","8 3 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M256","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","12 2 8 2 1 1 8 1 0 ",
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32","4 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16","12 1 8 2 1 1 8 1 0 ",
"EU72_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","4 7 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 5 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16","12 1 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M512","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","12 1 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","12 2 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 7 16 2 1 1 16 1 0 ",
"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","4 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24","12 1 8 2 1 1 8 1 0 ",
"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13","1 1 1 4 1 1 1 0 1 ",
"EU72_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32","6 4 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn64_g1_s1x1_d1x1_b0_in64x64_p1x1_num1_M64","2 7 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M1024","2 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32","4 6 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","12 2 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128","2 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112","1 8 32 5 1 8 1 1 0 ",
"EU72_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2","1 3 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M2048","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M2048","1 8 32 5 1 8 1 1 0 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","8 1 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208","2 7 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1","2 7 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1","6 4 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0","1 1 1 4 1 1 1 0 1 ",
"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","4 2 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2","3 3 16 2 1 1 16 1 0 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","4 4 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1","2 7 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","2 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1","12 2 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1","2 7 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2","2 8 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1","8 3 8 2 1 1 8 1 0 ",
"EU72_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2","4 3 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0","1 8 32 5 1 8 1 1 0 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","4 4 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ0","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M512_activ1_eltwise1","1 16 32 5 1 16 1 1 0 ",
"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","2 4 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0","1 16 32 5 1 16 1 1 0 ",
"EU72_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0","2 3 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1","4 4 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1","12 1 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1","2 8 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0","1 16 32 5 1 16 1 1 0 ",
"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1","2 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1","2 7 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2","8 3 16 2 1 1 16 1 0 ",
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1","4 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1","4 7 8 2 1 1 8 1 0 ",
"EU72_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1","8 2 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0","4 6 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2","2 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1","2 5 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1","6 4 16 2 1 1 16 1 0 ",
"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1","2 8 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2","12 2 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2","4 7 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1","2 7 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1","4 4 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 7 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0","4 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1","12 1 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1","12 1 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ1_eltwise1","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M2048_activ1_eltwise1","1 8 32 5 1 8 1 1 0 ",
"EU72_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0","4 1 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU72_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0","1 4 16 2 1 1 16 1 0 ",
"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","4 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1","4 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2","12 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1","12 1 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","12 1 8 2 1 1 8 1 0 ",
"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","8 1 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","12 2 8 2 1 1 8 1 0 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 3 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0","8 3 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1","6 2 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0","1 8 32 5 1 8 1 1 0 ",
"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","8 3 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","12 1 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2","2 6 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","2 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","6 4 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2","2 7 16 2 1 1 16 1 0 ",
"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","12 1 8 2 1 1 8 1 0 ",
"EU72_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 5 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M1024_activ1_eltwise1","2 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","12 1 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","2 7 16 2 1 1 16 1 0 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","8 1 16 2 1 1 16 1 0 ",
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1","4 2 16 2 1 1 16 1 0 ",
"EU72_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0","1 1 1 4 1 1 1 0 1 ",
"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1","2 7 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1","4 3 16 2 1 1 16 1 0 ",
"EU72_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1","2 7 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1","2 5 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1","4 6 8 2 1 1 8 1 0 ",
"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 16 32 5 1 16 1 1 0 ",
"EU72_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0","2 8 32 5 1 8 1 1 0 ",
"EU72_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","2 8 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1","4 4 16 2 1 1 16 1 0 ",
"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1","1 16 32 5 1 16 1 1 0 ",
"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1","2 7 16 2 1 1 16 1 0 ",
// Below is the information for OpenCL based on which these configurations tuned
/*******************************************************************************
Number of platforms 1
......@@ -291,7 +295,7 @@ Number of platforms 1
Platform Vendor Intel(R) Corporation
Platform Version OpenCL 2.0
Platform Profile FULL_PROFILE
Platform Extensions cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_driver_diagnostics cl_intel_motion_estimation cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
Platform Extensions cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_device_side_avc_motion_estimation cl_intel_driver_diagnostics cl_intel_media_block_io cl_intel_motion_estimation cl_intel_planar_yuv cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
Platform Extensions function suffix INTEL
Platform Name Intel(R) OpenCL
......@@ -300,15 +304,15 @@ Number of devices 1
Device Vendor Intel(R) Corporation
Device Vendor ID 0x8086
Device Version OpenCL 2.0
Driver Version 16.5.56875
Device OpenCL C Version OpenCL C 2.0 ( using IGC )
Driver Version r4.1.61547
Device OpenCL C Version OpenCL C 2.0
Device Type GPU
Device Profile FULL_PROFILE
Max compute units 48
Max clock frequency 950MHz
Device Partition (core)
Max number of sub-devices 0
Supported partition types by <unknown> (0x7F4B00000000)
Supported partition types by <unknown> (0x7F2200000000)
Max work item dimensions 3
Max work item sizes 256x256x256
Max work group size 256
......@@ -410,7 +414,7 @@ Number of devices 1
Device Available Yes
Compiler Available Yes
Linker Available Yes
Device Extensions cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_driver_diagnostics cl_intel_motion_estimation cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
Device Extensions cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_device_side_avc_motion_estimation cl_intel_driver_diagnostics cl_intel_media_block_io cl_intel_motion_estimation cl_intel_planar_yuv cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
NULL platform behavior
clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...) No platform
......@@ -423,150 +427,154 @@ NULL platform behavior
clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM) No platform
clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL) No platform
********************************************************************************/
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","8 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn32_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M128","1 16 32 5 1 16 1 1 0 ",
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32","8 1 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96","1 16 32 5 1 16 1 1 0 ",
"EU48_k3x3_cn128_g1_s1x1_d1x1_b0_in32x32_p1x1_num1_M128","6 4 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128","2 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","8 1 16 2 1 1 16 1 0 ",
"EU48_k2x2_cn16_g1_s2x2_d1x1_b0_in256x256_p0x0_num1_M16","2 7 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4","6 4 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M512","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112","8 3 8 2 1 1 8 1 0 ",
"EU48_k3x3_cn512_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M512","2 7 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384","4 6 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16","8 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M1024","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","4 7 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320","2 7 16 2 1 1 16 1 0 ",
"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48","4 2 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","2 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192","2 8 16 2 1 1 16 1 0 ",
"EU48_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","12 1 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","12 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","8 2 8 2 1 1 8 1 0 ",
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192","2 7 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256","2 5 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn16_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M4","8 3 8 2 1 1 8 1 0 ",
"EU48_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32","4 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","4 7 8 2 1 1 8 1 0 ",
"EU48_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13","1 1 1 4 1 1 1 0 1 ",
"EU48_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64","4 1 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96","8 3 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M16","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32","3 3 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96","2 8 32 5 1 8 1 1 0 ",
"EU48_k2x2_cn64_g1_s2x2_d1x1_b0_in128x128_p0x0_num1_M32","4 4 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","4 3 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M128","2 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn4_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128","6 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4","4 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M128","1 16 32 5 1 16 1 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M2048","1 16 32 5 1 16 1 1 0 ",
"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn16_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","4 7 8 2 1 1 8 1 0 ",
"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192","2 5 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128","6 4 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","12 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn2048_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M512","4 7 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","12 2 8 2 1 1 8 1 0 ",
"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 7 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 4 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288","2 4 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48","4 6 8 2 1 1 8 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","8 1 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","12 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","4 6 8 2 1 1 8 1 0 ",
"EU48_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128","4 5 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn256_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M256","2 6 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","8 3 8 2 1 1 8 1 0 ",
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48","4 2 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn64_g1_s1x1_d1x1_b0_in64x64_p1x1_num1_M64","10 2 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","4 6 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","4 5 8 2 1 1 8 1 0 ",
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208","2 5 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","4 6 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M2048","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48","4 6 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M256","1 16 32 5 1 16 1 1 0 ",
"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288","2 7 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192","2 7 16 2 1 1 16 1 0 ",
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32","4 3 16 2 1 1 16 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96","4 2 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208","2 5 16 2 1 1 16 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96","4 2 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24","12 1 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M16","4 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M512","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn1024_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320","2 8 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192","6 4 16 2 1 1 16 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 3 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","4 6 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5","2 3 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","8 3 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64","1 16 32 5 1 16 1 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","8 2 16 2 1 1 16 1 0 ",
"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","4 6 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M32","1 16 32 5 1 16 1 1 0 ",
"EU48_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M512","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","4 4 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","8 3 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","12 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","8 3 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M1024","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4","1 1 1 4 1 1 1 0 1 ",
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24","8 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16","12 1 8 2 1 1 8 1 0 ",
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128","10 2 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
"EU48_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32","1 16 32 5 1 16 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","4 7 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16","12 2 8 2 1 1 8 1 0 ",
"EU48_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2","1 4 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M4","8 2 8 2 1 1 8 1 0 ",
"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","4 2 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 2 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32","2 8 16 2 1 1 16 1 0 ",
"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","4 2 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","8 1 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","12 1 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M1024_activ1_eltwise1","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1","4 4 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1","2 6 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","4 3 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0","1 16 32 5 1 16 1 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1","4 4 16 2 1 1 16 1 0 ",
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1","4 3 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0","2 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","4 6 8 2 1 1 8 1 0 ",
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1","2 8 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1","12 1 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M512_activ1_eltwise1","1 16 32 5 1 16 1 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0","1 16 32 5 1 16 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","4 6 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1","4 6 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2","8 3 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 16 32 5 1 16 1 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1","12 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1","4 6 8 2 1 1 8 1 0 ",
"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","8 1 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2","2 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","12 1 8 2 1 1 8 1 0 ",
"EU48_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1","4 2 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0","1 1 1 4 1 1 1 0 1 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ1_eltwise1","1 16 32 5 1 16 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","4 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1","12 1 8 2 1 1 8 1 0 ",
"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","2 4 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1","6 4 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2","8 3 8 2 1 1 8 1 0 ",
"EU48_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2","8 2 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1","12 1 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","1 16 32 5 1 16 1 1 0 ",
"EU48_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 16 32 5 1 16 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1","14 2 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","4 6 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0","1 2 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0","4 1 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1","2 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","4 2 8 2 1 1 8 1 0 ",
"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1","4 2 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","4 4 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M2048_activ1_eltwise1","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2","8 3 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","6 4 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","8 3 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0","4 4 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ0","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0","1 1 1 4 1 1 1 0 1 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2","2 8 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","2 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1","4 2 16 2 1 1 16 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 2 16 2 1 1 16 1 0 ",
"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 2 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","8 1 16 2 1 1 16 1 0 ",
"EU48_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 7 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","4 6 8 2 1 1 8 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2","10 2 8 2 1 1 8 1 0 ",
"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","2 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1","12 1 8 2 1 1 8 1 0 ",
"EU48_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2","2 8 16 2 1 1 16 1 0 ",
"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1","2 8 16 2 1 1 16 1 0 ",
"EU48_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1","4 3 16 2 1 1 16 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1","2 8 32 5 1 8 1 1 0 ",
"EU48_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU48_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
// Below is the information for OpenCL based on which these configurations tuned
/*******************************************************************************
Number of platforms 1
......@@ -591,7 +599,7 @@ Number of devices 1
Max clock frequency 1050MHz
Device Partition (core)
Max number of sub-devices 0
Supported partition types by <unknown> (0x7F5100000000)
Supported partition types by <unknown> (0x7FC300000000)
Max work item dimensions 3
Max work item sizes 256x256x256
Max work group size 256
......@@ -632,9 +640,9 @@ Number of devices 1
Support is emulated in software No
Correctly-rounded divide and sqrt operations No
Address bits 64, Little-Endian
Global memory size 6588802663 (6.136GiB)
Global memory size 6588809216 (6.136GiB)
Error Correction support No
Max memory allocation 3294401331 (3.068GiB)
Max memory allocation 3294404608 (3.068GiB)
Unified memory for Host and Device Yes
Shared Virtual Memory (SVM) capabilities (core)
Coarse-grained buffer sharing Yes
......@@ -648,13 +656,13 @@ Number of devices 1
Global 64 bytes
Local 64 bytes
Max size for global variable 65536 (64KiB)
Preferred total size of global vars 3294401331 (3.068GiB)
Preferred total size of global vars 3294404608 (3.068GiB)
Global Memory cache type Read/Write
Global Memory cache size 524288
Global Memory cache line 64 bytes
Image support Yes
Max number of samplers per kernel 16
Max size for 1D images from buffer 205900083 pixels
Max size for 1D images from buffer 205900288 pixels
Max 1D or 2D image array size 2048 images
Base address alignment for 2D image buffers 4 bytes
Pitch alignment for 2D image buffers 4 bytes
......@@ -668,7 +676,7 @@ Number of devices 1
Max pipe packet size 1024
Local memory type Local
Local memory size 65536 (64KiB)
Max constant buffer size 3294401331 (3.068GiB)
Max constant buffer size 3294404608 (3.068GiB)
Max number of constant args 8
Max size of kernel argument 1024
Queue properties (on host)
......@@ -706,149 +714,153 @@ NULL platform behavior
clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM) No platform
clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL) No platform
********************************************************************************/
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","2 8 32 5 1 8 1 1 0 ",
"EU24_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32","4 6 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","4 2 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224","2 5 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","1 8 32 5 1 8 1 1 0 ",
"EU24_k2x2_cn16_g1_s2x2_d1x1_b0_in256x256_p0x0_num1_M16","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 3 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn2048_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M512","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16","2 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224","2 7 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208","2 7 16 2 1 1 16 1 0 ",
"EU24_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","1 8 32 5 1 8 1 1 0 ",
"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32","3 3 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24","8 3 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn128_g1_s1x1_d1x1_b0_in32x32_p1x1_num1_M128","6 4 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn1024_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M256","2 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208","2 7 16 2 1 1 16 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","4 3 16 2 1 1 16 1 0 ",
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48","4 2 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M2048","4 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192","6 4 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M1024","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn32_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M128","1 16 32 5 1 16 1 1 0 ",
"EU24_k1x1_cn4_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M16","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M32","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","4 6 8 2 1 1 8 1 0 ",
"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 4 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","8 2 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M2048","1 16 32 5 1 16 1 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","4 3 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4","1 1 1 4 1 1 1 0 1 ",
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192","6 4 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn256_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M256","2 7 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320","2 8 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M256","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","2 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256","2 5 16 2 1 1 16 1 0 ",
"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","4 3 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16","8 3 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M256","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32","4 2 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96","8 3 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16","6 3 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96","4 3 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","8 2 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288","2 8 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288","2 7 16 2 1 1 16 1 0 ",
"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4","10 2 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16","8 2 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
"EU24_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32","4 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","4 6 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","4 6 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32","1 8 32 5 1 8 1 1 0 ",
"EU24_k2x2_cn64_g1_s2x2_d1x1_b0_in128x128_p0x0_num1_M32","2 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128","4 3 16 2 1 1 16 1 0 ",
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48","8 1 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn16_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M4","8 3 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144","2 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128","6 4 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","2 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192","2 7 16 2 1 1 16 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","4 2 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96","4 4 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","4 6 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","2 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32","2 8 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128","10 2 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn64_g1_s1x1_d1x1_b0_in64x64_p1x1_num1_M64","2 8 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5","2 3 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn16_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48","4 6 8 2 1 1 8 1 0 ",
"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","4 2 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M4","8 2 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","8 2 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48","4 6 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4","4 4 16 2 1 1 16 1 0 ",
"EU24_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2","1 3 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn512_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M512","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M1024","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
"EU24_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64","4 1 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192","6 4 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16","8 3 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M512","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M512","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384","4 7 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","8 3 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13","1 1 1 4 1 1 1 0 1 ",
"EU24_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M512","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24","8 3 8 2 1 1 8 1 0 ",
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32","4 3 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1","8 3 8 2 1 1 8 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 4 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2","8 3 16 2 1 1 16 1 0 ",
"EU24_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","4 4 16 2 1 1 16 1 0 ",
"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2","2 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0","2 4 8 2 1 1 8 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1","4 4 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2","10 2 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1","12 2 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1","4 4 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0","2 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2","2 8 16 2 1 1 16 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","4 2 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M1024_activ1_eltwise1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1","4 6 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","8 2 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1","4 4 16 2 1 1 16 1 0 ",
"EU24_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0","1 1 1 4 1 1 1 0 1 ",
"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","4 4 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2","2 4 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","2 6 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2","8 3 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","4 2 16 2 1 1 16 1 0 ",
"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0","4 4 8 2 1 1 8 1 0 ",
"EU24_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0","4 1 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1","4 3 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1","12 2 8 2 1 1 8 1 0 ",
"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","4 7 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1","4 2 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1","14 2 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","12 2 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1","14 2 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1","8 3 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1","2 8 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M512_activ1_eltwise1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1","4 6 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","4 6 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","4 6 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1","14 2 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1","12 2 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","4 6 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2","12 2 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ0","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","4 3 16 2 1 1 16 1 0 ",
"EU24_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0","1 8 32 5 1 8 1 1 0 ",
"EU24_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0","1 3 8 2 1 1 8 1 0 ",
"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0","2 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0","1 1 1 4 1 1 1 0 1 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1","2 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","12 2 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 2 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ1_eltwise1","1 16 32 5 1 16 1 1 0 ",
"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1","4 2 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","4 3 8 2 1 1 8 1 0 ",
"EU24_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0","2 8 32 5 1 8 1 1 0 ",
"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1","4 4 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1","2 7 16 2 1 1 16 1 0 ",
"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ",
"EU24_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ",
"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M2048_activ1_eltwise1","4 7 16 2 1 1 16 1 0 ",
"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1","2 7 16 2 1 1 16 1 0 ",
};
#endif
......@@ -73,6 +73,11 @@ struct OCL4DNNConvConfig
bool bias_term; // = false;
};
typedef enum {
OCL4DNN_CONV_FUSED_ACTIV_NONE = 0,
OCL4DNN_CONV_FUSED_ACTIV_RELU = 1,
OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2,
} ocl4dnnFusedActiv_t;
template<typename Dtype>
class OCL4DNNConvSpatial
......@@ -80,9 +85,13 @@ class OCL4DNNConvSpatial
public:
explicit OCL4DNNConvSpatial(OCL4DNNConvConfig config);
~OCL4DNNConvSpatial();
bool Forward(const UMat& bottom_data, const UMat& weight,
bool Forward(const UMat& bottom_data,
const UMat& weight,
const UMat& bias,
UMat& top_data, int32_t batch_size);
void setActivReLU(bool fuse_activ, float slope);
void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
void setBias(bool bias_term);
private:
struct kernelConfig
......@@ -194,9 +203,9 @@ class OCL4DNNConvSpatial
int32_t blockWidth,
int32_t blockHeight,
int32_t blockDepth);
bool setupIDLF(int32_t blockWidth,
int32_t blockHeight,
int32_t blockDepth);
bool createIDLFKernel(int32_t blockWidth,
int32_t blockHeight,
int32_t blockDepth);
bool createBasicKernel(int32_t blockWidth,
int32_t blockHeight,
int32_t blockDepth);
......@@ -244,10 +253,13 @@ class OCL4DNNConvSpatial
int lx, int ly, int lz,
bool swizzle, bool nullLocal);
void generateTunerItems(std::vector< cv::Ptr<tunerParam> > &tunerItems);
void setFusionDefine(ocl4dnnFusedActiv_t fused_activ);
void setFusionArg(ocl4dnnFusedActiv_t fused_activ, ocl::Kernel &kernel, cl_uint &argIdx);
int32_t group_;
bool bias_term_;
UMat swizzled_weights_umat;
UMat bottom_data2_;
int32_t bottom_index_;
int32_t output_h_;
......@@ -291,6 +303,9 @@ class OCL4DNNConvSpatial
std::stringstream options_;
cv::ocl::ProgramSource src_;
int32_t prev_kernel_type_;
bool negative_slope_;
UMat negative_slope_umat_;
ocl4dnnFusedActiv_t fused_activ_;
};
typedef enum {
......
......@@ -78,6 +78,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
num_output_ = config.out_shape[dims - spatial_dims - 1];
group_ = config.group;
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
negative_slope_ = 0;
prev_kernel_type_ = -1;
tuned_ = false;
......@@ -138,6 +140,38 @@ OCL4DNNConvSpatial<Dtype>::~OCL4DNNConvSpatial()
}
}
template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ)
{
switch (fused_activ) {
case OCL4DNN_CONV_FUSED_ACTIV_RELU:
addDef("FUSED_CONV_RELU", 1);
break;
case OCL4DNN_CONV_FUSED_ACTIV_PRELU:
addDef("FUSED_CONV_PRELU", 1);
break;
default:
;
}
return;
}
template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::setFusionArg(ocl4dnnFusedActiv_t fused_activ, ocl::Kernel &kernel, cl_uint &argIdx)
{
switch (fused_activ) {
case OCL4DNN_CONV_FUSED_ACTIV_RELU:
kernel.set(argIdx++, (float)negative_slope_);
break;
case OCL4DNN_CONV_FUSED_ACTIV_PRELU:
kernel.set(argIdx++, (cl_mem)negative_slope_umat_.handle(ACCESS_READ));
break;
default:
;
}
return;
}
template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::collectCommonInformation()
{
......@@ -221,6 +255,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef("ALIGNED_NUM_FILTERS", (int)alignSize(M_, simd_size));
addDef("OUT_BLOCK_SIZE", (output_block_width*output_block_height));
addDef("APPLY_BIAS", bias_term_);
setFusionDefine(fused_activ_);
src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc;
}
......@@ -242,6 +277,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef("APPLY_BIAS", bias_term_);
addDef("OUTPUT_Z", M_);
addDef("ZPAR", 1);
setFusionDefine(fused_activ_);
src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc;
}
......@@ -278,6 +314,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef("TILE_N_LAST", M_ % 32);
addDef("TILE_N_LAST_DIV8", (M_ % 32) / 8);
addDef("APPLY_BIAS", bias_term_);
setFusionDefine(fused_activ_);
src_ = ocl::dnn::conv_layer_spatial_oclsrc;
}
}
......@@ -302,6 +339,37 @@ void OCL4DNNConvSpatial<Dtype>::setupKernel()
setupKernelDetails(kernelType_, blockM_, blockK_, blockN_);
}
template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::setBias(bool bias_term)
{
bias_term_ = bias_term;
}
template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::setActivReLU(bool fuse_activ, float slope)
{
if ( fuse_activ )
{
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_RELU;
negative_slope_ = slope;
}
else
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
}
template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::setActivPReLU(bool fuse_activ, std::vector<float> &slope)
{
if ( fuse_activ )
{
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_PRELU;
Mat tmpMat = Mat(num_output_, 1, CV_32FC1, (uchar*)&slope[0]);
tmpMat.copyTo(negative_slope_umat_);
}
else
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
}
template<typename Dtype>
bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
const UMat& weight,
......@@ -310,7 +378,6 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
int32_t numImages)
{
num_ = numImages;
prepareKernel(bottom, top, weight, bias, numImages);
return convolve(bottom, top, weight, bias, numImages, bestKernelConfig, cv::ocl::Queue::getDefault());
}
......@@ -358,7 +425,9 @@ void OCL4DNNConvSpatial<Dtype>::generateKey()
<< "in" << TUNING_SIZE(width_) << "x" << TUNING_SIZE(height_) << "_"
<< "p" << pad_w_ << "x" << pad_h_ << "_"
<< "num" << num_ << "_"
<< "M" << M_;
<< "M" << M_ << "_"
<< "activ" << fused_activ_;
key_ = ocl::Device::getDefault().vendorName() + "_EU" + cv::format("%d", ocl::Device::getDefault().maxComputeUnits()) + "_" + keyBuilder.str();
key_sanitized_ = key_;
......@@ -608,6 +677,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
return false;
cl_uint argIdx = 0;
setFusionArg(fused_activ_, kernel, argIdx);
UMat img_buffer;
if (image_offset)
......@@ -700,6 +770,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
return false;
cl_uint argIdx = 0;
setFusionArg(fused_activ_, kernel, argIdx);
UMat img_buffer;
if (image_offset)
......@@ -807,13 +878,16 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
int32_t output_image_offset = n * top_dim_
+ output_w_ * output_h_ * M_ * g;
cl_uint argIdx = 0;
int32_t kernel_offset = kernel_h_ * kernel_w_ * (channels_ / group_) * M_ * g;
int32_t kernel_offset = kernel_h_ * kernel_w_ *
(channels_ / group_) * M_
* g;
ocl::Kernel kernel(config->kernelName.c_str(), program);
if (kernel.empty())
return false;
cl_uint argIdx = 0;
setFusionArg(fused_activ_, kernel, argIdx);
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
kernel.set(argIdx++, image_offset);
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
......@@ -1058,9 +1132,9 @@ bool OCL4DNNConvSpatial<float>::createGEMMLikeConvKernel(int32_t blockM,
}
template<>
bool OCL4DNNConvSpatial<float>::setupIDLF(int32_t blockWidth,
int32_t blockHeight,
int32_t simd_size)
bool OCL4DNNConvSpatial<float>::createIDLFKernel(int32_t blockWidth,
int32_t blockHeight,
int32_t simd_size)
{
int32_t workItemOutput[3] = { blockWidth, blockHeight, simd_size };
const int32_t num_output_maps = M_;
......@@ -1122,7 +1196,7 @@ bool OCL4DNNConvSpatial<float>::createConvolutionKernel(int32_t kernelType,
src_ = ocl::ProgramSource();
if (kernelType == KERNEL_TYPE_INTEL_IDLF)
return setupIDLF(blockWidth, blockHeight, blockDepth);
return createIDLFKernel(blockWidth, blockHeight, blockDepth);
else if (kernelType == KERNEL_TYPE_BASIC)
return createBasicKernel(blockWidth, blockHeight, blockDepth);
else if (kernelType == KERNEL_TYPE_GEMM_LIKE)
......
......@@ -46,7 +46,19 @@
#define BIAS_KERNEL_ARG
#endif
#define ACTIVATION_FUNCTION(_dst_, _offset_, _data_) do { (_dst_)[(_offset_)] = (_data_);} while(0)
#if defined(FUSED_CONV_RELU)
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope)))
#define NEGATIVE_SLOPE_ARG Dtype negative_slope,
#elif defined(FUSED_CONV_PRELU)
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope[c])))
#define NEGATIVE_SLOPE_ARG __global const Dtype *negative_slope,
#else
#define ACTIVATION_RELU_FUNCTION(x, c) (x)
#define NEGATIVE_SLOPE_ARG
#endif
#define ACTIVATION_FUNCTION(_dst_, _offset_, _data_, _channel_) do { (_dst_)[(_offset_)] = ACTIVATION_RELU_FUNCTION(_data_, _channel_);} while(0)
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
......@@ -87,6 +99,7 @@
#ifdef KERNEL_BASIC
__kernel void ConvolveBasic(
NEGATIVE_SLOPE_ARG
__global Dtype* image_data,
int image_offset,
__global Dtype* kernel_data,
......@@ -152,9 +165,9 @@ __kernel void ConvolveBasic(
{
int offset = convolved_image_offset + (kernelNum+kern)*output_height*output_width + outputY*output_width + outputX;
#if APPLY_BIAS
ACTIVATION_FUNCTION(convolved_image, offset, sum[kern] + bias[biasIndex + kern]);
ACTIVATION_FUNCTION(convolved_image, offset, sum[kern] + bias[biasIndex + kern], biasIndex + kern);
#else
ACTIVATION_FUNCTION(convolved_image, offset, sum[kern]);
ACTIVATION_FUNCTION(convolved_image, offset, sum[kern], biasIndex + kern);
#endif
}
}
......@@ -180,6 +193,7 @@ __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
#endif
__kernel void
convolve_simd(
NEGATIVE_SLOPE_ARG
__global Dtype* inputs_base,
filter_qualifier Dtype* weights_base,
BIAS_KERNEL_ARG
......@@ -359,7 +373,7 @@ convolve_simd(
for(unsigned int c = 0; c < OUT_BLOCK_WIDTH; c++) {
if (c + oc >= output_width) break;
// this does a scattered write to SIMD_SIZE different feature maps, so that data within one map is contiguous, thus ready for input to next layer.
ACTIVATION_FUNCTION(outputs, out_addr + r * output_width + c, bias + out[r * OUT_BLOCK_WIDTH + c]);
ACTIVATION_FUNCTION(outputs, out_addr + r * output_width + c, bias + out[r * OUT_BLOCK_WIDTH + c], fm);
}
}
......@@ -399,6 +413,7 @@ typedef struct float0 { float s0; } float0; //never used but makes compiler happ
#define ROW_PITCH input_width
#define GEMM_LIKE_KERNEL_ARGS \
NEGATIVE_SLOPE_ARG \
const __global Dtype *src0, \
const __global Dtype *src1, \
BIAS_KERNEL_ARG \
......@@ -591,35 +606,15 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
Dtype4 *bias_vec;
bias_vec = (Dtype4*)bias;
*bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
#endif
#ifdef FUSED_CONV_CHANNEL_RELU
Dtype slope[4];
Dtype4 *slope_vec;
slope_vec = (Dtype4*)slope;
*slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
Dtype negative_slope;
#endif
if (global_y * TILE_M < output_width * output_height )
{
for (int i = 0; i < 8; i++)
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 0 + i ) * out_pitch_y, blockC00[i] + SUBGROUP_GET_BIAS(0, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 8 + i ) * out_pitch_y, blockC10[i] + SUBGROUP_GET_BIAS(1, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 16 + i ) * out_pitch_y, blockC20[i] + SUBGROUP_GET_BIAS(2, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 24 + i ) * out_pitch_y, blockC30[i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out_offset + ( 0 + i ) * out_pitch_y, blockC00[i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
ACTIVATION_FUNCTION(dst, out_offset + ( 8 + i ) * out_pitch_y, blockC10[i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + 8 + i);
ACTIVATION_FUNCTION(dst, out_offset + ( 16 + i ) * out_pitch_y, blockC20[i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + 16 + i);
ACTIVATION_FUNCTION(dst, out_offset + ( 24 + i ) * out_pitch_y, blockC30[i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + 24 + i);
}
}
}
......@@ -773,46 +768,25 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
*bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
#endif
#ifdef FUSED_CONV_CHANNEL_RELU
Dtype slope[4];
Dtype4 *slope_vec;
slope_vec = (Dtype4*)slope;
*slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
Dtype negative_slope;
#endif
if (global_y * TILE_M < output_width * output_height )
{
for (int i = 0; i < 8; i++)
{
if ( TILE_N_LAST_DIV8 > 0 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 0+i) * out_pitch_y, blockC[0][i] + SUBGROUP_GET_BIAS(0, i));
ACTIVATION_FUNCTION(dst, out_offset + ( 0+i) * out_pitch_y, blockC[0][i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
}
if ( TILE_N_LAST_DIV8 > 1 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + ( 8+i) * out_pitch_y, blockC[1][i] + SUBGROUP_GET_BIAS(1, i));
ACTIVATION_FUNCTION(dst, out_offset + ( 8+i) * out_pitch_y, blockC[1][i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 8);
}
if ( TILE_N_LAST_DIV8 > 2 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + (16+i) * out_pitch_y, blockC[2][i] + SUBGROUP_GET_BIAS(2, i));
ACTIVATION_FUNCTION(dst, out_offset + (16+i) * out_pitch_y, blockC[2][i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + i + 16);
}
if ( TILE_N_LAST_DIV8 > 3 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out_offset + (24+i) * out_pitch_y, blockC[3][i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out_offset + (24+i) * out_pitch_y, blockC[3][i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + i + 24);
}
}
}
......@@ -1038,60 +1012,24 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
*bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
#endif
#ifdef FUSED_CONV_CHANNEL_RELU
Dtype slope[4];
Dtype4 *slope_vec;
slope_vec = (Dtype4*)slope;
*slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
Dtype negative_slope;
#endif
if( global_y * TILE_M < output_width * output_height )
{
for( int i = 0; i < 8; i++ )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + ( 0+i) * out_pitch_y, blockC00[i] + SUBGROUP_GET_BIAS(0, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + ( 8+i) * out_pitch_y, blockC10[i] + SUBGROUP_GET_BIAS(1, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + (16+i) * out_pitch_y, blockC20[i] + SUBGROUP_GET_BIAS(2, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + (24+i) * out_pitch_y, blockC30[i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out0_offset + ( 0+i) * out_pitch_y, blockC00[i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
ACTIVATION_FUNCTION(dst, out0_offset + ( 8+i) * out_pitch_y, blockC10[i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 8);
ACTIVATION_FUNCTION(dst, out0_offset + (16+i) * out_pitch_y, blockC20[i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + i + 16);
ACTIVATION_FUNCTION(dst, out0_offset + (24+i) * out_pitch_y, blockC30[i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + i + 24);
}
}
if( global_y * TILE_M + 1 < output_width * output_height )
{
for( int i = 0; i < 8; i++ )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + ( 0+i) * out_pitch_y, blockC01[i] + SUBGROUP_GET_BIAS(0, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + ( 8+i) * out_pitch_y, blockC11[i] + SUBGROUP_GET_BIAS(1, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + (16+i) * out_pitch_y, blockC21[i] + SUBGROUP_GET_BIAS(2, i));
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + (24+i) * out_pitch_y, blockC31[i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out1_offset + ( 0+i) * out_pitch_y, blockC01[i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
ACTIVATION_FUNCTION(dst, out1_offset + ( 8+i) * out_pitch_y, blockC11[i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 8);
ACTIVATION_FUNCTION(dst, out1_offset + (16+i) * out_pitch_y, blockC21[i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + i + 16);
ACTIVATION_FUNCTION(dst, out1_offset + (24+i) * out_pitch_y, blockC31[i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + i + 24);
}
}
}
......@@ -1281,13 +1219,6 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
Dtype4 *bias_vec;
bias_vec = (Dtype4*)bias;
*bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
#endif
#ifdef FUSED_CONV_CHANNEL_RELU
Dtype slope[4];
Dtype4 *slope_vec;
slope_vec = (Dtype4*)slope;
*slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
Dtype negative_slope;
#endif
if( global_y * TILE_M < output_width * output_height )
{
......@@ -1295,32 +1226,19 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
{
if ( TILE_N_LAST_DIV8 > 0 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + ( 0+i) * out_pitch_y, blockC0[0][i] + SUBGROUP_GET_BIAS(0, i));
ACTIVATION_FUNCTION(dst, out0_offset + ( 0+i) * out_pitch_y, blockC0[0][i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
}
if ( TILE_N_LAST_DIV8 > 1 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + ( 8+i) * out_pitch_y, blockC0[1][i] + SUBGROUP_GET_BIAS(1, i));
ACTIVATION_FUNCTION(dst, out0_offset + ( 8+i) * out_pitch_y, blockC0[1][i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 8);
}
if ( TILE_N_LAST_DIV8 > 2 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + (16+i) * out_pitch_y, blockC0[2][i] + SUBGROUP_GET_BIAS(2, i));
ACTIVATION_FUNCTION(dst, out0_offset + (16+i) * out_pitch_y, blockC0[2][i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + i + 16);
}
if ( TILE_N_LAST_DIV8 > 3 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out0_offset + (24+i) * out_pitch_y, blockC0[3][i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out0_offset + (24+i) * out_pitch_y, blockC0[3][i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + i + 24);
}
}
}
......@@ -1330,31 +1248,19 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
{
if ( TILE_N_LAST_DIV8 > 0 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[0], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + ( 0+i) * out_pitch_y, blockC1[0][i] + SUBGROUP_GET_BIAS(0, i));
ACTIVATION_FUNCTION(dst, out1_offset + ( 0+i) * out_pitch_y, blockC1[0][i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i);
}
if ( TILE_N_LAST_DIV8 > 1 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[1], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + ( 8+i) * out_pitch_y, blockC1[1][i] + SUBGROUP_GET_BIAS(1, i));
ACTIVATION_FUNCTION(dst, out1_offset + ( 8+i) * out_pitch_y, blockC1[1][i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 8);
}
if ( TILE_N_LAST_DIV8 > 2 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[2], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + (16+i) * out_pitch_y, blockC1[2][i] + SUBGROUP_GET_BIAS(2, i));
ACTIVATION_FUNCTION(dst, out1_offset + (16+i) * out_pitch_y, blockC1[2][i] + SUBGROUP_GET_BIAS(2, i), group_x * TILE_N + i + 16);
}
if ( TILE_N_LAST_DIV8 > 3 )
{
#ifdef FUSED_CONV_CHANNEL_RELU
negative_slope = intel_sub_group_shuffle(slope[3], i);
#endif
ACTIVATION_FUNCTION(dst, out1_offset + (24+i) * out_pitch_y, blockC1[3][i] + SUBGROUP_GET_BIAS(3, i));
ACTIVATION_FUNCTION(dst, out1_offset + (24+i) * out_pitch_y, blockC1[3][i] + SUBGROUP_GET_BIAS(3, i), group_x * TILE_N + i + 24);
}
}
}
......@@ -1364,95 +1270,28 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
#endif
#if defined(GEMM_LIKE_CONV_32_2_SIMD16) || defined(GEMM_LIKE_CONV_32_1_SIMD16)
#ifdef FUSED_CONV_CHANNEL_RELU
#define INTERLEAVED_SIMD16_OUTPUT(_out_, _offset_, _m_) do {\
if (global_y * TILE_M < output_width * output_height ) \
{ \
if ( ( OUT_DEPTH % TILE_N ) == 0 ) {\
for (int i = 0; i < 16; i++) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
negative_slope = intel_sub_group_shuffle(slope[1], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
} \
} \
else if( ( OUT_DEPTH % 16 ) == 0 ) { \
if ( ( global_x + 1 ) < get_global_size(0) ) { \
for ( int i = 0; i < 16; i++ ) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
negative_slope = intel_sub_group_shuffle(slope[1], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
} \
} \
else { \
for (int i = 0; i < 16; i++) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
} \
} \
} \
else { \
if ( ( global_x + 1 ) < get_global_size(0) ) \
{ \
for ( int i = 0; i < 16; i++ ) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
negative_slope = intel_sub_group_shuffle(slope[1], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
} \
} \
else { \
if ( (OUT_DEPTH % TILE_N) > 16 ) { \
for (int i = 0; i < 16 ; i++) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
} \
for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
{ \
negative_slope = intel_sub_group_shuffle(slope[1], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
} \
} \
else { \
for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
{ \
negative_slope = intel_sub_group_shuffle(slope[0], i); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
} \
} \
} \
} \
} \
}while(0)
#else
#define INTERLEAVED_SIMD16_OUTPUT(_out_, _offset_, _m_) do {\
if (global_y * TILE_M < output_width * output_height ) \
{ \
if ( ( OUT_DEPTH % TILE_N ) == 0 ) {\
for (int i = 0; i < 16; i++) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 16); \
} \
} \
else if( ( OUT_DEPTH % 16 ) == 0 ) { \
if ( ( global_x + 1 ) < get_global_size(0) ) { \
for ( int i = 0; i < 16; i++ ) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 16); \
} \
} \
else { \
for (int i = 0; i < 16; i++) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i); \
} \
} \
} \
......@@ -1461,25 +1300,25 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
{ \
for ( int i = 0; i < 16; i++ ) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 16); \
} \
} \
else { \
if ( (OUT_DEPTH % TILE_N) > 16 ) { \
for (int i = 0; i < 16 ; i++) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i); \
} \
for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i), group_x * TILE_N + i + 16); \
} \
} \
else { \
for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
{ \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i), group_x * TILE_N + i); \
} \
} \
} \
......@@ -1487,7 +1326,6 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
} \
}while(0)
#endif
#endif
#ifdef GEMM_LIKE_CONV_32_1_SIMD16
#define TILE_M 1
......@@ -1656,14 +1494,6 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
bias_vec = (Dtype2*)bias;
*bias_vec = as_Dtype2(SUB_GROUP_BLOCK_READ2((__global INT_TYPE *)biases_base + group_x * TILE_N));
#endif
#ifdef FUSED_CONV_CHANNEL_RELU
Dtype slope[2];
Dtype2 *slope_vec;
slope_vec = (Dtype2*)slope;
*slope_vec = as_Dtype2(SUB_GROUP_BLOCK_READ2((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
Dtype negative_slope;
#endif
INTERLEAVED_SIMD16_OUTPUT(dst, out_offset, 0);
}
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment