Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
436d7e4e
Commit
436d7e4e
authored
7 years ago
by
Li Peng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add depthwise convolution kernel
Signed-off-by:
Li Peng
<
peng.li@intel.com
>
parent
910d7dab
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
160 additions
and
4 deletions
+160
-4
ocl4dnn.hpp
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
+5
-0
ocl4dnn_conv_spatial.cpp
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
+98
-2
conv_layer_spatial.cl
modules/dnn/src/opencl/conv_layer_spatial.cl
+57
-2
No files found.
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
View file @
436d7e4e
...
...
@@ -215,6 +215,9 @@ class OCL4DNNConvSpatial
bool
createGEMMLikeConvKernel
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
blockDepth
);
bool
createDWConvKernel
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
blockDepth
);
void
CreateSubBuffer
(
const
UMat
&
buffer
,
UMat
&
sub_buffer
,
int32_t
offset
,
int32_t
size
,
bool
write_only
);
bool
convolve
(
const
UMat
&
bottom
,
UMat
&
top
,
...
...
@@ -282,6 +285,8 @@ class OCL4DNNConvSpatial
int32_t
M_
;
bool
tuned_
;
bool
dwconv_
;
std
::
string
key_
,
key_sanitized_
;
std
::
string
short_key_
;
std
::
string
kernel_name_
;
...
...
This diff is collapsed.
Click to expand it.
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
View file @
436d7e4e
...
...
@@ -103,6 +103,7 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
top_dim_
=
num_output_
*
output_w_
*
output_h_
;
cache_path_
=
utils
::
getConfigurationParameterString
(
"OPENCV_OCL4DNN_CONFIG_PATH"
,
""
);
dwconv_
=
(
num_output_
==
channels_
&&
channels_
==
group_
);
use_cache_path_
=
false
;
if
(
!
cache_path_
.
empty
())
...
...
@@ -203,7 +204,8 @@ void OCL4DNNConvSpatial<Dtype>::collectCommonInformation()
typedef
enum
{
KERNEL_TYPE_INTEL_IDLF
=
2
,
KERNEL_TYPE_BASIC
=
4
,
KERNEL_TYPE_GEMM_LIKE
=
5
KERNEL_TYPE_GEMM_LIKE
=
5
,
KERNEL_TYPE_DWCONV
=
6
}
ocl4dnnConvSpatialKernelType_t
;
template
<
typename
Dtype
>
...
...
@@ -313,6 +315,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
if
(
clOptionSupport
(
"-cl-no-subgroup-ifp"
))
options_
<<
" -cl-no-subgroup-ifp "
;
addDef
(
"KERNEL_GEMM_LIKE"
);
addDef
(
"INPUT_DEPTH"
,
channels_
);
addDef
(
"WIDTH1"
,
M_
);
addDef
(
"OUT_PADDING_LEFT"
,
0
);
...
...
@@ -329,6 +332,28 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
setFusionDefine
(
fused_activ_
,
fused_eltwise_
);
src_
=
ocl
::
dnn
::
conv_layer_spatial_oclsrc
;
}
else
if
(
kernelType
==
KERNEL_TYPE_DWCONV
)
{
kernelUKey
=
generateSpecificKey
(
KERNEL_TYPE_DWCONV
,
blockM
,
blockK
,
blockN
);
kernel_name_
=
"DWCONV_"
;
kernel_name_
+=
kernelUKey
.
c_str
();
options_
<<
" -cl-fast-relaxed-math "
;
if
(
clOptionSupport
(
"-cl-no-subgroup-ifp"
))
options_
<<
" -cl-no-subgroup-ifp "
;
addDef
(
"KERNEL_DWCONV"
);
addDef
(
"KERNEL_SIZE"
,
kernel_w_
*
kernel_h_
);
addDef
(
"KERNEL_W"
,
kernel_w_
);
addDef
(
"KERNEL_H"
,
kernel_h_
);
addDef
(
"APPLY_BIAS"
,
bias_term_
);
addDef
(
"OUTPUT_Z"
,
num_output_
*
num_
);
addDef
(
"CHANNELS"
,
num_output_
);
setFusionDefine
(
fused_activ_
,
fused_eltwise_
);
options_
<<
" -D DWCONV="
<<
kernel_name_
;
src_
=
cv
::
ocl
::
dnn
::
conv_layer_spatial_oclsrc
;
}
}
template
<
typename
Dtype
>
...
...
@@ -906,6 +931,33 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
return
false
;
}
}
}
else
if
(
config
->
kernelType
==
KERNEL_TYPE_DWCONV
)
{
ocl
::
Kernel
kernel
(
config
->
kernelName
.
c_str
(),
program
);
if
(
kernel
.
empty
())
return
false
;
cl_uint
argIdx
=
0
;
setFusionArg
(
fused_activ_
,
fused_eltwise_
,
kernel
,
argIdx
);
kernel
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bottom
));
kernel
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
weight
));
if
(
bias_term_
)
kernel
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bias
));
kernel
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
top
));
kernel
.
set
(
argIdx
++
,
(
uint16_t
)
width_
);
kernel
.
set
(
argIdx
++
,
(
uint16_t
)
height_
);
kernel
.
set
(
argIdx
++
,
(
uint16_t
)
output_w_
);
kernel
.
set
(
argIdx
++
,
(
uint16_t
)
output_h_
);
size_t
global_size
[
3
];
global_size
[
0
]
=
output_w_
;
global_size
[
1
]
=
output_h_
;
global_size
[
2
]
=
num_output_
*
num_
;
if
(
!
kernel
.
run
(
3
,
global_size
,
NULL
,
false
))
{
std
::
cout
<<
"DWCONV kernel run failed."
<<
std
::
endl
;
return
false
;
}
}
else
{
for
(
int32_t
n
=
0
;
n
<
numImages
;
++
n
)
{
for
(
int32_t
g
=
0
;
g
<
group_
;
++
g
)
{
...
...
@@ -1222,6 +1274,39 @@ bool OCL4DNNConvSpatial<float>::createIDLFKernel(int32_t blockWidth,
return
false
;
}
template
<>
bool
OCL4DNNConvSpatial
<
float
>::
createDWConvKernel
(
int32_t
blockWidth
,
int32_t
blockHeight
,
int32_t
blockDepth
)
{
if
(
!
dwconv_
)
return
false
;
int
workItemOutput
[
3
]
=
{
1
,
1
,
1
};
size_t
local_size
[
3
]
=
{
1
,
1
,
1
};
size_t
global_size
[
3
];
global_size
[
0
]
=
divUp
(
output_w_
,
workItemOutput
[
0
]);
global_size
[
1
]
=
divUp
(
output_h_
,
workItemOutput
[
1
]);
global_size
[
2
]
=
divUp
(
M_
*
num_
,
workItemOutput
[
2
]);
kernelType_
=
KERNEL_TYPE_DWCONV
;
blockM_
=
blockWidth
;
blockK_
=
blockHeight
;
blockN_
=
blockDepth
;
setupKernel
();
ocl
::
Program
program
=
compileKernel
();
if
(
program
.
ptr
())
{
kernelQueue
.
push_back
(
makePtr
<
kernelConfig
>
(
kernel_name_
,
&
global_size
[
0
],
&
local_size
[
0
],
&
workItemOutput
[
0
],
false
,
KERNEL_TYPE_DWCONV
));
return
true
;
}
else
return
false
;
}
template
<>
bool
OCL4DNNConvSpatial
<
float
>::
createConvolutionKernel
(
int32_t
kernelType
,
int32_t
blockWidth
,
...
...
@@ -1238,6 +1323,8 @@ bool OCL4DNNConvSpatial<float>::createConvolutionKernel(int32_t kernelType,
return
createBasicKernel
(
blockWidth
,
blockHeight
,
blockDepth
);
else
if
(
kernelType
==
KERNEL_TYPE_GEMM_LIKE
)
return
createGEMMLikeConvKernel
(
blockWidth
,
blockHeight
,
blockDepth
);
else
if
(
kernelType
==
KERNEL_TYPE_DWCONV
)
return
createDWConvKernel
(
blockWidth
,
blockHeight
,
blockDepth
);
else
CV_Assert
(
0
&&
"Internal error"
);
return
false
;
...
...
@@ -1246,7 +1333,16 @@ bool OCL4DNNConvSpatial<float>::createConvolutionKernel(int32_t kernelType,
template
<>
void
OCL4DNNConvSpatial
<
float
>::
generateTunerItems
(
std
::
vector
<
cv
::
Ptr
<
tunerParam
>
>
&
tunerItems
)
{
if
(
ocl
::
Device
::
getDefault
().
intelSubgroupsSupport
())
{
if
(
ocl
::
Device
::
getDefault
().
intelSubgroupsSupport
())
{
//depth_wise kernels
if
(
dwconv_
)
{
tunerItems
.
push_back
(
makePtr
<
tunerParam
>
(
KERNEL_TYPE_DWCONV
,
1
,
1
,
1
));
if
(
group_
>
8
)
return
;
}
/* IDLF kernels are using Intel specific extension which make
them intel only. */
// Generates static key_
...
...
This diff is collapsed.
Click to expand it.
modules/dnn/src/opencl/conv_layer_spatial.cl
View file @
436d7e4e
...
...
@@ -383,7 +383,7 @@ convolve_simd(
}
}
#el
se //
KERNEL_GEMM_LIKE
#el
if defined
KERNEL_GEMM_LIKE
#if APPLY_BIAS
// Dtype bias[4];
...
...
@@ -1501,4 +1501,59 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
INTERLEAVED_SIMD16_OUTPUT
(
dst,
out_offset,
0
)
;
}
#
endif
#
endif
//
KERNEL_BASIC/IDLF/GEMM_LIKE
#
elif
defined
KERNEL_DWCONV
__kernel
void
DWCONV
(
ELTWISE_DATA_ARG
NEGATIVE_SLOPE_ARG
__global
Dtype*
image_data,
__global
Dtype*
kernel_data,
BIAS_KERNEL_ARG
__global
Dtype*
convolved_image,
const
ushort
input_width,
const
ushort
input_height,
const
ushort
output_width,
const
ushort
output_height
)
{
const
int
outputX
=
get_global_id
(
0
)
;
const
int
outputY
=
get_global_id
(
1
)
;
const
int
outputZ
=
get_global_id
(
2
)
;
if
(
outputX
<
output_width
&&
outputY
<
output_height
)
{
Dtype
sum
=
0.
;
const
int
org_y
=
outputY
*
STRIDE_Y
-
INPUT_PAD_H
;
const
int
org_x
=
outputX
*
STRIDE_X
-
INPUT_PAD_W
;
const
int
currentKernelOffset
=
KERNEL_SIZE*
(
outputZ%CHANNELS
)
;
const
int
biasIndex=outputZ%CHANNELS
;
const
int
local_image_offset
=
org_y*input_width
+
org_x
;
const
int
imageSize
=
input_width*input_height
;
__global
Dtype*
image_dataPtrFloat
=
(
image_data
+
(
imageSize*outputZ
+
local_image_offset
))
;
__global
Dtype*
kernel_dataPtrFloat
=
(
kernel_data
+
(
currentKernelOffset
))
;
for
(
int
y
=
0
; y < KERNEL_H; y++)
{
for
(
int
x
=
0
; x < KERNEL_W; x++)
{
if
(
!
(
org_y
+
y
*
DILATION_Y
>=
0
&&
org_y
+
y
*
DILATION_Y
<
input_height
&&
org_x
+
x
*
DILATION_X
>=
0
&&
org_x
+
x
*
DILATION_X
<
input_width
))
{
continue
;
}
sum
+=
image_dataPtrFloat[x
*
DILATION_X]
*
kernel_dataPtrFloat[x]
;
}
image_dataPtrFloat
+=
input_width
*
DILATION_Y
;
kernel_dataPtrFloat
+=
KERNEL_W
;
}
#
if
APPLY_BIAS
int
offset
=
outputZ*output_height*output_width
+
outputY*output_width
+
outputX
;
ACTIVATION_FUNCTION
(
convolved_image,
offset,
sum
+
biases_base[biasIndex],
biasIndex
)
;
#
else
int
offset
=
outputZ*output_height*output_width
+
outputY*output_width
+
outputX
;
ACTIVATION_FUNCTION
(
convolved_image,
offset,
sum,
biasIndex
)
;
#
endif
}
}
#
endif
//
KERNEL_BASIC/IDLF/GEMM_LIKE/DWCONV
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment