Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
bd6a6690
Commit
bd6a6690
authored
Nov 24, 2017
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #10150 from alalek:dnn_ocl_refactor_pooling
parents
f071a48e
13f37466
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
107 additions
and
116 deletions
+107
-116
ocl4dnn.hpp
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
+0
-3
ocl4dnn_pool.cpp
modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
+63
-63
ocl4dnn_pooling.cl
modules/dnn/src/opencl/ocl4dnn_pooling.cl
+44
-50
No files found.
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
View file @
bd6a6690
...
...
@@ -351,8 +351,6 @@ class OCL4DNNPool
UMat
&
top_data
,
UMat
&
top_mask
);
private
:
UMat
mask_idx_
;
// Pooling parameters
std
::
vector
<
int32_t
>
pad_
;
std
::
vector
<
int32_t
>
stride_
;
...
...
@@ -362,7 +360,6 @@ class OCL4DNNPool
ocl4dnnPoolingMethod_t
pool_method_
;
int32_t
count_
;
int32_t
batch_size_
;
int32_t
channels_
;
int32_t
kernel_h_
;
int32_t
kernel_w_
;
...
...
modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
View file @
bd6a6690
...
...
@@ -54,7 +54,6 @@ OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
int
dims
=
config
.
in_shape
.
size
();
int
spatial_dims
=
2
;
batch_size_
=
config
.
in_shape
[
0
];
channels_
=
config
.
channels
;
pool_method_
=
config
.
pool_method
;
...
...
@@ -88,7 +87,7 @@ OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
template
<
typename
Dtype
>
OCL4DNNPool
<
Dtype
>::~
OCL4DNNPool
()
{
mask_idx_
.
release
();
// nothing
}
template
<
typename
Dtype
>
...
...
@@ -99,99 +98,100 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
bool
ret
=
true
;
size_t
global
[]
=
{
128
*
128
};
size_t
local
[]
=
{
128
};
cl_uint
argIdx
=
0
;
// support 2D case
switch
(
pool_method_
)
{
case
LIBDNN_POOLING_METHOD_MAX
:
{
if
(
top_mask
.
empty
()
&&
mask_idx_
.
empty
())
{
mask_idx_
.
create
(
1
,
count_
,
CV_32FC1
);
}
ocl
::
Kernel
oclk_max_pool_forward
(
CL_KERNEL_SELECT
(
"max_pool_forward"
),
cv
::
ocl
::
dnn
::
ocl4dnn_pooling_oclsrc
);
bool
haveMask
=
!
top_mask
.
empty
();
ocl
::
Kernel
oclk_max_pool_forward
(
haveMask
?
CL_KERNEL_SELECT
(
"max_pool_forward_mask"
)
:
CL_KERNEL_SELECT
(
"max_pool_forward"
),
ocl
::
dnn
::
ocl4dnn_pooling_oclsrc
,
format
(
"-D KERNEL_MAX_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
" -D STRIDE_W=%d -D STRIDE_H=%d"
" -D PAD_W=%d -D PAD_H=%d%s"
,
kernel_w_
,
kernel_h_
,
stride_w_
,
stride_h_
,
pad_w_
,
pad_h_
,
haveMask
?
" -D HAVE_MASK=1"
:
""
));
if
(
oclk_max_pool_forward
.
empty
())
return
false
;
argIdx
=
0
;
oclk_max_pool_forward
.
set
(
argIdx
++
,
count_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bottom
));
oclk_max_pool_forward
.
set
(
argIdx
++
,
batch_size_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
channels_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
height_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
width_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
pooled_height_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
pooled_width_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
kernel_h_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
kernel_w_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
stride_h_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
stride_w_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
pad_h_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
pad_w_
);
oclk_max_pool_forward
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
top
));
oclk_max_pool_forward
.
set
(
argIdx
++
,
mask_idx_
.
empty
()
?
0
:
1
);
if
(
mask_idx_
.
empty
())
oclk_max_pool_forward
.
set
(
argIdx
++
,
(
void
*
)
NULL
);
else
oclk_max_pool_forward
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
mask_idx_
));
oclk_max_pool_forward
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
top_mask
));
oclk_max_pool_forward
.
args
(
count_
,
ocl
::
KernelArg
::
PtrReadOnly
(
bottom
),
channels_
,
height_
,
width_
,
pooled_height_
,
pooled_width_
,
ocl
::
KernelArg
::
PtrWriteOnly
(
top
),
ocl
::
KernelArg
::
PtrWriteOnly
(
top_mask
)
);
ret
=
oclk_max_pool_forward
.
run
(
1
,
global
,
local
,
false
);
}
break
;
case
LIBDNN_POOLING_METHOD_AVE
:
{
CV_Assert
(
top_mask
.
empty
());
ocl
::
Kernel
oclk_ave_pool_forward
(
CL_KERNEL_SELECT
(
"ave_pool_forward"
),
cv
::
ocl
::
dnn
::
ocl4dnn_pooling_oclsrc
);
ocl
::
dnn
::
ocl4dnn_pooling_oclsrc
,
format
(
"-D KERNEL_AVE_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
" -D STRIDE_W=%d -D STRIDE_H=%d"
" -D PAD_W=%d -D PAD_H=%d"
,
kernel_w_
,
kernel_h_
,
stride_w_
,
stride_h_
,
pad_w_
,
pad_h_
));
if
(
oclk_ave_pool_forward
.
empty
())
return
false
;
argIdx
=
0
;
oclk_ave_pool_forward
.
set
(
argIdx
++
,
count_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bottom
));
oclk_ave_pool_forward
.
set
(
argIdx
++
,
batch_size_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
channels_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
height_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
width_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
pooled_height_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
pooled_width_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
kernel_h_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
kernel_w_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
stride_h_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
stride_w_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
pad_h_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
pad_w_
);
oclk_ave_pool_forward
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
top
));
oclk_ave_pool_forward
.
args
(
count_
,
ocl
::
KernelArg
::
PtrReadOnly
(
bottom
),
channels_
,
height_
,
width_
,
pooled_height_
,
pooled_width_
,
ocl
::
KernelArg
::
PtrWriteOnly
(
top
)
);
ret
=
oclk_ave_pool_forward
.
run
(
1
,
global
,
local
,
false
);
}
break
;
case
LIBDNN_POOLING_METHOD_STO
:
{
CV_Assert
(
top_mask
.
empty
());
ocl
::
Kernel
oclk_sto_pool_forward
(
CL_KERNEL_SELECT
(
"sto_pool_forward_test"
),
cv
::
ocl
::
dnn
::
ocl4dnn_pooling_oclsrc
);
ocl
::
dnn
::
ocl4dnn_pooling_oclsrc
,
format
(
"-D KERNEL_STO_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
" -D STRIDE_W=%d -D STRIDE_H=%d"
,
kernel_w_
,
kernel_h_
,
stride_w_
,
stride_h_
));
if
(
oclk_sto_pool_forward
.
empty
())
return
false
;
argIdx
=
0
;
oclk_sto_pool_forward
.
set
(
argIdx
++
,
count_
);
oclk_sto_pool_forward
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bottom
));
oclk_sto_pool_forward
.
set
(
argIdx
++
,
batch_size_
);
oclk_sto_pool_forward
.
set
(
argIdx
++
,
channels_
);
oclk_sto_pool_forward
.
set
(
argIdx
++
,
height_
);
oclk_sto_pool_forward
.
set
(
argIdx
++
,
width_
);
oclk_sto_pool_forward
.
set
(
argIdx
++
,
pooled_height_
);
oclk_sto_pool_forward
.
set
(
argIdx
++
,
pooled_width_
);
oclk_sto_pool_forward
.
set
(
argIdx
++
,
kernel_h_
);
oclk_sto_pool_forward
.
set
(
argIdx
++
,
kernel_w_
);
oclk_sto_pool_forward
.
set
(
argIdx
++
,
stride_h_
);
oclk_sto_pool_forward
.
set
(
argIdx
++
,
stride_w_
);
oclk_sto_pool_forward
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
top
));
oclk_sto_pool_forward
.
args
(
count_
,
ocl
::
KernelArg
::
PtrReadOnly
(
bottom
),
channels_
,
height_
,
width_
,
pooled_height_
,
pooled_width_
,
ocl
::
KernelArg
::
PtrWriteOnly
(
top
)
);
ret
=
oclk_sto_pool_forward
.
run
(
1
,
global
,
local
,
false
);
}
...
...
modules/dnn/src/opencl/ocl4dnn_pooling.cl
View file @
bd6a6690
...
...
@@ -44,14 +44,23 @@
#
define
TEMPLATE
(
name,type
)
CONCAT
(
name,type
)
#
define
Dtype
float
void
TEMPLATE
(
max_pool_forward_impl,
Dtype
)(
const
int
nthreads,
__global
const
Dtype*
bottom_data,
const
int
num,
#
if
defined
KERNEL_MAX_POOL
__kernel
void
#
ifdef
HAVE_MASK
TEMPLATE
(
max_pool_forward_mask,
Dtype
)
#
else
TEMPLATE
(
max_pool_forward,
Dtype
)
#
endif
(
const
int
nthreads,
__global
const
Dtype*
bottom_data,
const
int
channels,
const
int
height,
const
int
width,
const
int
pooled_height,
const
int
pooled_width,
const
int
kernel_h,
const
int
kernel_w,
const
int
stride_h,
const
int
stride_w,
const
int
pad_h,
const
int
pad_w,
__global
Dtype*
top_data,
const
int
use_mask,
__global
int*
mask,
__global
Dtype*
top_mask,
bool
no_mask
)
const
int
pooled_height,
const
int
pooled_width,
__global
Dtype*
top_data
#
ifdef
HAVE_MASK
,
__global
Dtype*
mask
#
endif
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < nthreads;
index
+=
get_global_size
(
0
))
...
...
@@ -60,10 +69,10 @@ void TEMPLATE(max_pool_forward_impl, Dtype)(
const
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
const
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
const
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
int
hstart
=
ph
*
stride_h
-
pad_h
;
int
wstart
=
pw
*
stride_w
-
pad_w
;
const
int
hend
=
min
(
hstart
+
kernel_h
,
height
)
;
const
int
wend
=
min
(
wstart
+
kernel_w
,
width
)
;
int
hstart
=
ph
*
STRIDE_H
-
PAD_H
;
int
wstart
=
pw
*
STRIDE_W
-
PAD_W
;
const
int
hend
=
min
(
hstart
+
KERNEL_H
,
height
)
;
const
int
wend
=
min
(
wstart
+
KERNEL_W
,
width
)
;
hstart
=
max
(
hstart,
(
int
)
0
)
;
wstart
=
max
(
wstart,
(
int
)
0
)
;
Dtype
maxval
=
-FLT_MAX
;
...
...
@@ -79,38 +88,19 @@ void TEMPLATE(max_pool_forward_impl, Dtype)(
}
}
top_data[index]
=
maxval
;
if
(
!no_mask
)
{
if
(
use_mask
==
1
)
{
mask[index]
=
maxidx
;
}
else
{
top_mask[index]
=
maxidx
;
}
}
#
ifdef
HAVE_MASK
mask[index]
=
maxidx
;
#
endif
}
}
__kernel
void
TEMPLATE
(
max_pool_forward,
Dtype
)(
const
int
nthreads,
__global
const
Dtype*
bottom_data,
const
int
num,
const
int
channels,
const
int
height,
const
int
width,
const
int
pooled_height,
const
int
pooled_width,
const
int
kernel_h,
const
int
kernel_w,
const
int
stride_h,
const
int
stride_w,
const
int
pad_h,
const
int
pad_w,
__global
Dtype*
top_data,
const
int
use_mask,
__global
int*
mask,
__global
Dtype*
top_mask
)
{
TEMPLATE
(
max_pool_forward_impl,
Dtype
)(
nthreads,
bottom_data,
num,
channels,
height,
width,
pooled_height,
pooled_width,
kernel_h,
kernel_w,
stride_h,
stride_w,
pad_h,
pad_w,
top_data,
use_mask,
mask,
top_mask,
false
)
;
}
#
elif
defined
KERNEL_AVE_POOL
__kernel
void
TEMPLATE
(
ave_pool_forward,
Dtype
)(
const
int
nthreads,
__global
const
Dtype*
const
bottom_data,
const
int
num,
const
int
nthreads,
__global
const
Dtype*
const
bottom_data,
const
int
channels,
const
int
height,
const
int
width,
const
int
pooled_height,
const
int
pooled_width,
const
int
kernel_h,
const
int
kernel_w,
const
int
stride_h,
const
int
stride_w,
const
int
pad_h,
const
int
pad_w,
__global
Dtype*
top_data
)
const
int
pooled_height,
const
int
pooled_width,
__global
Dtype*
top_data
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < nthreads;
index
+=
get_global_size
(
0
))
...
...
@@ -120,10 +110,10 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)(
const
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
const
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
const
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
int
hstart
=
ph
*
stride_h
-
pad_h
;
int
wstart
=
pw
*
stride_w
-
pad_w
;
int
hend
=
min
(
hstart
+
kernel_h,
height
+
pad_h
)
;
int
wend
=
min
(
wstart
+
kernel_w,
width
+
pad_w
)
;
int
hstart
=
ph
*
STRIDE_H
-
PAD_H
;
int
wstart
=
pw
*
STRIDE_W
-
PAD_W
;
int
hend
=
min
(
hstart
+
KERNEL_H,
height
+
PAD_H
)
;
int
wend
=
min
(
wstart
+
KERNEL_W,
width
+
PAD_W
)
;
const
int
pool_size
=
(
hend
-
hstart
)
*
(
wend
-
wstart
)
;
hstart
=
max
(
hstart,
(
int
)
0
)
;
wstart
=
max
(
wstart,
(
int
)
0
)
;
...
...
@@ -142,11 +132,12 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)(
}
}
#
elif
defined
KERNEL_STO_POOL
__kernel
void
TEMPLATE
(
sto_pool_forward_test,Dtype
)(
const
int
nthreads,
__global
const
Dtype*
const
bottom_data,
const
int
num,
const
int
nthreads,
__global
const
Dtype*
const
bottom_data,
const
int
channels,
const
int
height,
const
int
width,
const
int
pooled_height,
const
int
pooled_width,
const
int
kernel_h,
const
int
kernel_w,
const
int
stride_h,
const
int
stride_w,
const
int
pooled_height,
const
int
pooled_width,
__global
Dtype*
top_data
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < nthreads;
...
...
@@ -156,10 +147,10 @@ __kernel void TEMPLATE(sto_pool_forward_test,Dtype)(
const
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
const
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
const
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
const
int
hstart
=
ph
*
stride_h
;
const
int
hend
=
min
(
hstart
+
kernel_h
,
height
)
;
const
int
wstart
=
pw
*
stride_w
;
const
int
wend
=
min
(
wstart
+
kernel_w
,
width
)
;
const
int
hstart
=
ph
*
STRIDE_H
;
const
int
hend
=
min
(
hstart
+
KERNEL_H
,
height
)
;
const
int
wstart
=
pw
*
STRIDE_W
;
const
int
wend
=
min
(
wstart
+
KERNEL_W
,
width
)
;
//
We
set
cumsum
to
be
0
to
avoid
divide-by-zero
problems
Dtype
cumsum
=
FLT_MIN
;
Dtype
cumvalues
=
0.
;
...
...
@@ -168,10 +159,13 @@ __kernel void TEMPLATE(sto_pool_forward_test,Dtype)(
//
First
pass:
get
sum
for
(
int
h
=
hstart
; h < hend; ++h) {
for
(
int
w
=
wstart
; w < wend; ++w) {
cumsum
+=
bottom_slice[h
*
width
+
w]
;
cumvalues
+=
bottom_slice[h
*
width
+
w]
*
bottom_slice[h
*
width
+
w]
;
Dtype
v
=
bottom_slice[h
*
width
+
w]
;
cumsum
+=
v
;
cumvalues
+=
v
*
v
;
}
}
top_data[index]
=
cumvalues
/
cumsum
;
}
}
#
endif
//
KERNEL_*
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment