Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e0e40405
Commit
e0e40405
authored
Oct 27, 2017
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #9847 from wzw-intel:ocl4dnn_fusion
parents
ff037ebe
2d8f2c2a
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
209 additions
and
37 deletions
+209
-37
dnn.cpp
modules/dnn/src/dnn.cpp
+26
-10
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+82
-15
default_kernel_config.hpp
modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp
+0
-0
ocl4dnn.hpp
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
+19
-4
ocl4dnn_conv_spatial.cpp
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
+82
-8
conv_layer_spatial.cl
modules/dnn/src/opencl/conv_layer_spatial.cl
+0
-0
No files found.
modules/dnn/src/dnn.cpp
View file @
e0e40405
...
...
@@ -1028,7 +1028,7 @@ struct Net::Impl
void
fuseLayers
(
const
std
::
vector
<
LayerPin
>&
blobsToKeep_
)
{
if
(
!
fusion
||
!
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_CPU
)
)
if
(
!
fusion
||
preferableBackend
!=
DNN_BACKEND_DEFAULT
)
return
;
CV_TRACE_FUNCTION
();
...
...
@@ -1056,6 +1056,11 @@ struct Net::Impl
// with the current layer if they follow it. Normally, the are fused with the convolution layer,
// but some of them (like activation) may be fused with fully-connected, elemwise (+) and
// some other layers.
// TODO: OpenCL target support more fusion styles.
if
(
preferableTarget
==
DNN_TARGET_OPENCL
&&
ld
.
layerInstance
->
type
.
compare
(
"Convolution"
)
)
continue
;
Ptr
<
Layer
>&
currLayer
=
ld
.
layerInstance
;
if
(
ld
.
consumers
.
size
()
==
1
&&
pinsToKeep
.
count
(
LayerPin
(
lid
,
0
))
==
0
)
{
...
...
@@ -1100,16 +1105,27 @@ struct Net::Impl
}
}
Ptr
<
ActivationLayer
>
nextActivLayer
;
if
(
nextData
)
nextActivLayer
=
nextData
->
layerInstance
.
dynamicCast
<
ActivationLayer
>
();
if
(
!
nextActivLayer
.
empty
()
&&
pinsToKeep
.
count
(
lpNext
)
==
0
&&
currLayer
->
setActivation
(
nextActivLayer
)
)
// For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU
if
(
preferableTarget
!=
DNN_TARGET_OPENCL
||
(
preferableTarget
==
DNN_TARGET_OPENCL
&&
nextData
&&
(
!
nextData
->
type
.
compare
(
"ReLU"
)
||
!
nextData
->
type
.
compare
(
"ChannelsPReLU"
))
)
)
{
printf_
((
"
\t
fused with %s
\n
"
,
nextActivLayer
->
name
.
c_str
()));
nextData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
Ptr
<
ActivationLayer
>
nextActivLayer
;
if
(
nextData
)
nextActivLayer
=
nextData
->
layerInstance
.
dynamicCast
<
ActivationLayer
>
();
if
(
!
nextActivLayer
.
empty
()
&&
pinsToKeep
.
count
(
lpNext
)
==
0
&&
currLayer
->
setActivation
(
nextActivLayer
)
)
{
LayerData
*
activData
=
nextData
;
printf_
((
"
\t
fused with %s
\n
"
,
nextActivLayer
->
name
.
c_str
()));
activData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
}
}
}
...
...
modules/dnn/src/layers/convolution_layer.cpp
View file @
e0e40405
...
...
@@ -157,7 +157,20 @@ public:
#ifdef HAVE_OPENCL
Ptr
<
OCL4DNNConvSpatial
<
float
>
>
convolutionOp
;
std
::
vector
<
UMat
>
umat_blobs
;
bool
fusedBias
;
bool
newWeightAndBias
;
bool
newActiv
;
ocl4dnnFusedActiv_t
activType
;
#endif
ConvolutionLayerImpl
()
{
#ifdef HAVE_OPENCL
fusedBias
=
false
;
newWeightAndBias
=
false
;
newActiv
=
false
;
activType
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
#endif
}
MatShape
computeColRowShape
(
const
MatShape
&
inpShape
,
const
MatShape
&
outShape
)
const
{
...
...
@@ -209,6 +222,10 @@ public:
activ
=
layer
;
if
(
activ
.
empty
())
reluslope
.
clear
();
#ifdef HAVE_OPENCL
newActiv
=
true
;
activType
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
#endif
return
!
activ
.
empty
();
}
...
...
@@ -221,6 +238,10 @@ public:
// we will need to re-compute the weights with the batch
// norm coefficients taken into account
weightsMat
.
release
();
#ifdef HAVE_OPENCL
newWeightAndBias
=
true
;
fusedBias
=
false
;
#endif
return
!
bnorm
.
empty
();
}
...
...
@@ -230,6 +251,10 @@ public:
// we will need to re-compute the weights with the scaling
// coefficients taken into account
weightsMat
.
release
();
#ifdef HAVE_OPENCL
newWeightAndBias
=
true
;
fusedBias
=
false
;
#endif
return
!
scaleLayer
.
empty
();
}
...
...
@@ -665,19 +690,49 @@ public:
convolutionOp
=
Ptr
<
OCL4DNNConvSpatial
<
float
>
>
(
new
OCL4DNNConvSpatial
<
float
>
(
config
));
}
for
(
size_t
ii
=
0
;
ii
<
outputs
.
size
();
ii
++
)
if
(
newWeightAndBias
)
{
UMat
inpMat
,
outMat
;
inpMat
=
inputs
[
ii
]
->
getUMat
(
ACCESS_READ
);
outMat
=
outputs
[
ii
].
getUMat
(
ACCESS_WRITE
);
int
batch_size
=
inpMat
.
size
[
0
];
weightsMat
.
copyTo
(
umat_blobs
[
0
]);
if
(
fusedBias
)
{
if
(
umat_blobs
.
size
()
<
2
)
umat_blobs
.
resize
(
2
);
umat_blobs
[
1
]
=
UMat
(
biasvec
,
true
);
}
convolutionOp
->
setBias
(
fusedBias
||
hasBias
());
newWeightAndBias
=
false
;
}
if
(
!
convolutionOp
->
Forward
(
inpMat
,
umat_blobs
[
0
],
hasBias
()
?
umat_blobs
[
1
]
:
UMat
(),
outMat
,
batch_size
))
return
false
;
if
(
newActiv
)
{
if
(
activType
==
OCL4DNN_CONV_FUSED_ACTIV_RELU
)
{
CV_Assert
(
!
reluslope
.
empty
());
convolutionOp
->
setActivReLU
(
true
,
reluslope
[
0
]);
}
else
if
(
activType
==
OCL4DNN_CONV_FUSED_ACTIV_PRELU
)
{
CV_Assert
(
!
reluslope
.
empty
());
convolutionOp
->
setActivPReLU
(
true
,
reluslope
);
}
else
{
convolutionOp
->
setActivReLU
(
false
,
0
);
convolutionOp
->
setActivPReLU
(
false
,
reluslope
);
}
newActiv
=
false
;
}
return
true
;
UMat
inpMat
,
outMat
;
inpMat
=
inputs
[
0
]
->
getUMat
(
ACCESS_READ
);
outMat
=
outputs
[
0
].
getUMat
(
ACCESS_WRITE
);
int
batch_size
=
inpMat
.
size
[
0
];
return
convolutionOp
->
Forward
(
inpMat
,
umat_blobs
[
0
],
(
hasBias
()
||
fusedBias
)
?
umat_blobs
[
1
]
:
UMat
(),
outMat
,
batch_size
);
}
#endif
...
...
@@ -693,11 +748,6 @@ public:
CV_Assert
(
inputs
.
size
()
==
(
size_t
)
1
&&
inputs
[
0
]
->
size
[
1
]
%
blobs
[
0
].
size
[
1
]
==
0
);
int
ngroups
=
inputs
[
0
]
->
size
[
1
]
/
blobs
[
0
].
size
[
1
];
CV_Assert
(
outputs
[
0
].
size
[
1
]
%
ngroups
==
0
);
CV_OCL_RUN
((
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs
,
outputs
,
internals
))
int
k
,
outCn
=
blobs
[
0
].
size
[
0
];
if
(
weightsMat
.
empty
()
)
...
...
@@ -761,6 +811,11 @@ public:
}
}
#ifdef HAVE_OPENCL
if
(
shiftptr
||
shiftptr2
)
fusedBias
=
true
;
#endif
for
(
int
i
=
0
;
i
<
outCn
;
i
++
)
{
float
s1
=
scaleptr
?
scaleptr
[
i
]
:
1.
f
;
...
...
@@ -784,7 +839,12 @@ public:
{
Ptr
<
ReLULayer
>
activ_relu
=
activ
.
dynamicCast
<
ReLULayer
>
();
if
(
!
activ_relu
.
empty
()
)
{
reluslope
.
assign
(
outCn
+
2
,
activ_relu
->
negativeSlope
);
#ifdef HAVE_OPENCL
activType
=
OCL4DNN_CONV_FUSED_ACTIV_RELU
;
#endif
}
Ptr
<
ChannelsPReLULayer
>
activ_chprelu
=
activ
.
dynamicCast
<
ChannelsPReLULayer
>
();
if
(
!
activ_chprelu
.
empty
()
)
...
...
@@ -795,9 +855,16 @@ public:
reluslope
.
resize
(
outCn
+
2
);
std
::
copy
(
mdata
,
mdata
+
outCn
,
reluslope
.
begin
());
reluslope
[
outCn
]
=
reluslope
[
outCn
+
1
]
=
reluslope
[
outCn
-
1
];
#ifdef HAVE_OPENCL
activType
=
OCL4DNN_CONV_FUSED_ACTIV_PRELU
;
#endif
}
}
CV_OCL_RUN
((
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs
,
outputs
,
internals
))
int
nstripes
=
std
::
max
(
getNumThreads
(),
1
);
ParallelConv
::
run
(
*
inputs
[
0
],
outputs
[
0
],
weightsMat
,
biasvec
,
reluslope
,
...
...
modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp
View file @
e0e40405
This diff is collapsed.
Click to expand it.
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
View file @
e0e40405
...
...
@@ -73,6 +73,11 @@ struct OCL4DNNConvConfig
bool
bias_term
;
// = false;
};
typedef
enum
{
OCL4DNN_CONV_FUSED_ACTIV_NONE
=
0
,
OCL4DNN_CONV_FUSED_ACTIV_RELU
=
1
,
OCL4DNN_CONV_FUSED_ACTIV_PRELU
=
2
,
}
ocl4dnnFusedActiv_t
;
template
<
typename
Dtype
>
class
OCL4DNNConvSpatial
...
...
@@ -80,9 +85,13 @@ class OCL4DNNConvSpatial
public
:
explicit
OCL4DNNConvSpatial
(
OCL4DNNConvConfig
config
);
~
OCL4DNNConvSpatial
();
bool
Forward
(
const
UMat
&
bottom_data
,
const
UMat
&
weight
,
bool
Forward
(
const
UMat
&
bottom_data
,
const
UMat
&
weight
,
const
UMat
&
bias
,
UMat
&
top_data
,
int32_t
batch_size
);
void
setActivReLU
(
bool
fuse_activ
,
float
slope
);
void
setActivPReLU
(
bool
fuse_activ
,
std
::
vector
<
float
>
&
slope
);
void
setBias
(
bool
bias_term
);
private
:
struct
kernelConfig
...
...
@@ -194,9 +203,9 @@ class OCL4DNNConvSpatial
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
blockDepth
);
bool
setupIDLF
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
blockDepth
);
bool
createIDLFKernel
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
blockDepth
);
bool
createBasicKernel
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
blockDepth
);
...
...
@@ -244,10 +253,13 @@ class OCL4DNNConvSpatial
int
lx
,
int
ly
,
int
lz
,
bool
swizzle
,
bool
nullLocal
);
void
generateTunerItems
(
std
::
vector
<
cv
::
Ptr
<
tunerParam
>
>
&
tunerItems
);
void
setFusionDefine
(
ocl4dnnFusedActiv_t
fused_activ
);
void
setFusionArg
(
ocl4dnnFusedActiv_t
fused_activ
,
ocl
::
Kernel
&
kernel
,
cl_uint
&
argIdx
);
int32_t
group_
;
bool
bias_term_
;
UMat
swizzled_weights_umat
;
UMat
bottom_data2_
;
int32_t
bottom_index_
;
int32_t
output_h_
;
...
...
@@ -291,6 +303,9 @@ class OCL4DNNConvSpatial
std
::
stringstream
options_
;
cv
::
ocl
::
ProgramSource
src_
;
int32_t
prev_kernel_type_
;
bool
negative_slope_
;
UMat
negative_slope_umat_
;
ocl4dnnFusedActiv_t
fused_activ_
;
};
typedef
enum
{
...
...
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
View file @
e0e40405
...
...
@@ -78,6 +78,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
num_output_
=
config
.
out_shape
[
dims
-
spatial_dims
-
1
];
group_
=
config
.
group
;
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
negative_slope_
=
0
;
prev_kernel_type_
=
-
1
;
tuned_
=
false
;
...
...
@@ -138,6 +140,38 @@ OCL4DNNConvSpatial<Dtype>::~OCL4DNNConvSpatial()
}
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setFusionDefine
(
ocl4dnnFusedActiv_t
fused_activ
)
{
switch
(
fused_activ
)
{
case
OCL4DNN_CONV_FUSED_ACTIV_RELU
:
addDef
(
"FUSED_CONV_RELU"
,
1
);
break
;
case
OCL4DNN_CONV_FUSED_ACTIV_PRELU
:
addDef
(
"FUSED_CONV_PRELU"
,
1
);
break
;
default
:
;
}
return
;
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setFusionArg
(
ocl4dnnFusedActiv_t
fused_activ
,
ocl
::
Kernel
&
kernel
,
cl_uint
&
argIdx
)
{
switch
(
fused_activ
)
{
case
OCL4DNN_CONV_FUSED_ACTIV_RELU
:
kernel
.
set
(
argIdx
++
,
(
float
)
negative_slope_
);
break
;
case
OCL4DNN_CONV_FUSED_ACTIV_PRELU
:
kernel
.
set
(
argIdx
++
,
(
cl_mem
)
negative_slope_umat_
.
handle
(
ACCESS_READ
));
break
;
default
:
;
}
return
;
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
collectCommonInformation
()
{
...
...
@@ -221,6 +255,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef
(
"ALIGNED_NUM_FILTERS"
,
(
int
)
alignSize
(
M_
,
simd_size
));
addDef
(
"OUT_BLOCK_SIZE"
,
(
output_block_width
*
output_block_height
));
addDef
(
"APPLY_BIAS"
,
bias_term_
);
setFusionDefine
(
fused_activ_
);
src_
=
cv
::
ocl
::
dnn
::
conv_layer_spatial_oclsrc
;
}
...
...
@@ -242,6 +277,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef
(
"APPLY_BIAS"
,
bias_term_
);
addDef
(
"OUTPUT_Z"
,
M_
);
addDef
(
"ZPAR"
,
1
);
setFusionDefine
(
fused_activ_
);
src_
=
cv
::
ocl
::
dnn
::
conv_layer_spatial_oclsrc
;
}
...
...
@@ -278,6 +314,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef
(
"TILE_N_LAST"
,
M_
%
32
);
addDef
(
"TILE_N_LAST_DIV8"
,
(
M_
%
32
)
/
8
);
addDef
(
"APPLY_BIAS"
,
bias_term_
);
setFusionDefine
(
fused_activ_
);
src_
=
ocl
::
dnn
::
conv_layer_spatial_oclsrc
;
}
}
...
...
@@ -302,6 +339,37 @@ void OCL4DNNConvSpatial<Dtype>::setupKernel()
setupKernelDetails
(
kernelType_
,
blockM_
,
blockK_
,
blockN_
);
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setBias
(
bool
bias_term
)
{
bias_term_
=
bias_term
;
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setActivReLU
(
bool
fuse_activ
,
float
slope
)
{
if
(
fuse_activ
)
{
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_RELU
;
negative_slope_
=
slope
;
}
else
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setActivPReLU
(
bool
fuse_activ
,
std
::
vector
<
float
>
&
slope
)
{
if
(
fuse_activ
)
{
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_PRELU
;
Mat
tmpMat
=
Mat
(
num_output_
,
1
,
CV_32FC1
,
(
uchar
*
)
&
slope
[
0
]);
tmpMat
.
copyTo
(
negative_slope_umat_
);
}
else
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
}
template
<
typename
Dtype
>
bool
OCL4DNNConvSpatial
<
Dtype
>::
Forward
(
const
UMat
&
bottom
,
const
UMat
&
weight
,
...
...
@@ -310,7 +378,6 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
int32_t
numImages
)
{
num_
=
numImages
;
prepareKernel
(
bottom
,
top
,
weight
,
bias
,
numImages
);
return
convolve
(
bottom
,
top
,
weight
,
bias
,
numImages
,
bestKernelConfig
,
cv
::
ocl
::
Queue
::
getDefault
());
}
...
...
@@ -358,7 +425,9 @@ void OCL4DNNConvSpatial<Dtype>::generateKey()
<<
"in"
<<
TUNING_SIZE
(
width_
)
<<
"x"
<<
TUNING_SIZE
(
height_
)
<<
"_"
<<
"p"
<<
pad_w_
<<
"x"
<<
pad_h_
<<
"_"
<<
"num"
<<
num_
<<
"_"
<<
"M"
<<
M_
;
<<
"M"
<<
M_
<<
"_"
<<
"activ"
<<
fused_activ_
;
key_
=
ocl
::
Device
::
getDefault
().
vendorName
()
+
"_EU"
+
cv
::
format
(
"%d"
,
ocl
::
Device
::
getDefault
().
maxComputeUnits
())
+
"_"
+
keyBuilder
.
str
();
key_sanitized_
=
key_
;
...
...
@@ -608,6 +677,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
return
false
;
cl_uint
argIdx
=
0
;
setFusionArg
(
fused_activ_
,
kernel
,
argIdx
);
UMat
img_buffer
;
if
(
image_offset
)
...
...
@@ -700,6 +770,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
return
false
;
cl_uint
argIdx
=
0
;
setFusionArg
(
fused_activ_
,
kernel
,
argIdx
);
UMat
img_buffer
;
if
(
image_offset
)
...
...
@@ -807,13 +878,16 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
int32_t
output_image_offset
=
n
*
top_dim_
+
output_w_
*
output_h_
*
M_
*
g
;
cl_uint
argIdx
=
0
;
int32_t
kernel_offset
=
kernel_h_
*
kernel_w_
*
(
channels_
/
group_
)
*
M_
*
g
;
int32_t
kernel_offset
=
kernel_h_
*
kernel_w_
*
(
channels_
/
group_
)
*
M_
*
g
;
ocl
::
Kernel
kernel
(
config
->
kernelName
.
c_str
(),
program
);
if
(
kernel
.
empty
())
return
false
;
cl_uint
argIdx
=
0
;
setFusionArg
(
fused_activ_
,
kernel
,
argIdx
);
kernel
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bottom
));
kernel
.
set
(
argIdx
++
,
image_offset
);
kernel
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
weight
));
...
...
@@ -1058,9 +1132,9 @@ bool OCL4DNNConvSpatial<float>::createGEMMLikeConvKernel(int32_t blockM,
}
template
<>
bool
OCL4DNNConvSpatial
<
float
>::
setupIDLF
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
simd_size
)
bool
OCL4DNNConvSpatial
<
float
>::
createIDLFKernel
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
simd_size
)
{
int32_t
workItemOutput
[
3
]
=
{
blockWidth
,
blockHeight
,
simd_size
};
const
int32_t
num_output_maps
=
M_
;
...
...
@@ -1122,7 +1196,7 @@ bool OCL4DNNConvSpatial<float>::createConvolutionKernel(int32_t kernelType,
src_
=
ocl
::
ProgramSource
();
if
(
kernelType
==
KERNEL_TYPE_INTEL_IDLF
)
return
setupIDLF
(
blockWidth
,
blockHeight
,
blockDepth
);
return
createIDLFKernel
(
blockWidth
,
blockHeight
,
blockDepth
);
else
if
(
kernelType
==
KERNEL_TYPE_BASIC
)
return
createBasicKernel
(
blockWidth
,
blockHeight
,
blockDepth
);
else
if
(
kernelType
==
KERNEL_TYPE_GEMM_LIKE
)
...
...
modules/dnn/src/opencl/conv_layer_spatial.cl
View file @
e0e40405
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment