Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ec3f22ce
Commit
ec3f22ce
authored
Dec 19, 2013
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
renamed OpenCL kernel filename; made some final changes
parent
b7553d4e
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
12 additions
and
337 deletions
+12
-337
cascadedetect.cpp
modules/objdetect/src/cascadedetect.cpp
+2
-2
cascadedetect.cl
modules/objdetect/src/opencl/cascadedetect.cl
+1
-2
haarobjectdetect_scaled2.cl
modules/objdetect/src/opencl/haarobjectdetect_scaled2.cl
+0
-323
ufacedetect.cpp
samples/cpp/ufacedetect.cpp
+9
-10
No files found.
modules/objdetect/src/cascadedetect.cpp
View file @
ec3f22ce
...
...
@@ -1133,7 +1133,7 @@ bool CascadeClassifierImpl::detectSingleScale( InputArray _image, Size processin
bool
CascadeClassifierImpl
::
ocl_detectSingleScale
(
InputArray
_image
,
Size
processingRectSize
,
int
yStep
,
double
factor
,
Size
sumSize0
)
{
const
int
VECTOR_SIZE
=
4
;
const
int
VECTOR_SIZE
=
1
;
Ptr
<
HaarEvaluator
>
haar
=
featureEvaluator
.
dynamicCast
<
HaarEvaluator
>
();
if
(
haar
.
empty
()
)
return
false
;
...
...
@@ -1142,7 +1142,7 @@ bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size proce
if
(
cascadeKernel
.
empty
()
)
{
cascadeKernel
.
create
(
"runHaarClassifierStump"
,
ocl
::
objdetect
::
haarobject
detect_oclsrc
,
cascadeKernel
.
create
(
"runHaarClassifierStump"
,
ocl
::
objdetect
::
cascade
detect_oclsrc
,
format
(
"-D VECTOR_SIZE=%d"
,
VECTOR_SIZE
));
if
(
cascadeKernel
.
empty
()
)
return
false
;
...
...
modules/objdetect/src/opencl/
haarobject
detect.cl
→
modules/objdetect/src/opencl/
cascade
detect.cl
View file @
ec3f22ce
...
...
@@ -40,7 +40,7 @@ __kernel void runHaarClassifierStump(
int2
imgsize,
int
xyscale,
float
factor,
int4
normrect,
int2
windowsize,
int
maxFaces
)
{
int
ix
=
get_global_id
(
0
)
*xyscale
*
VECTOR_SIZE
;
int
ix
=
get_global_id
(
0
)
*xyscale
;
int
iy
=
get_global_id
(
1
)
*xyscale
;
sumstep
/=
sizeof
(
int
)
;
sqsumstep
/=
sizeof
(
int
)
;
...
...
@@ -94,7 +94,6 @@ __kernel void runHaarClassifierStump(
if
(
stageIdx
==
nstages
)
{
int
nfaces
=
atomic_inc
(
facepos
)
;
//printf
(
"detected face #d!!!!\n"
,
nfaces
)
;
if
(
nfaces
<
maxFaces
)
{
volatile
__global
int*
face
=
facepos
+
1
+
nfaces*4
;
...
...
modules/objdetect/src/opencl/haarobjectdetect_scaled2.cl
deleted
100644 → 0
View file @
b7553d4e
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Wu
Xinglong,
wxl370@126.com
//
Sen
Liu,
swjtuls1987@126.com
//
Peng
Xiao,
pengxiao@outlook.com
//
Erping
Pang,
erping@multicorewareinc.com
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
define
CV_HAAR_FEATURE_MAX
3
typedef
int
sumtype
;
typedef
float
sqsumtype
;
typedef
struct
__attribute__
((
aligned
(
128
)))
GpuHidHaarTreeNode
{
int
p[CV_HAAR_FEATURE_MAX][4]
__attribute__
((
aligned
(
64
)))
;
float
weight[CV_HAAR_FEATURE_MAX]
/*__attribute__
((
aligned
(
16
)))
*/
;
float
threshold
/*__attribute__
((
aligned
(
4
)))
*/
;
float
alpha[3]
__attribute__
((
aligned
(
16
)))
;
int
left
__attribute__
((
aligned
(
4
)))
;
int
right
__attribute__
((
aligned
(
4
)))
;
}
GpuHidHaarTreeNode
;
//typedef
struct
__attribute__
((
aligned
(
32
)))
GpuHidHaarClassifier
//{
//
int
count
__attribute__
((
aligned
(
4
)))
;
//
GpuHidHaarTreeNode
*node
__attribute__
((
aligned
(
8
)))
;
//
float
*alpha
__attribute__
((
aligned
(
8
)))
;
//}
//GpuHidHaarClassifier
;
typedef
struct
__attribute__
((
aligned
(
64
)))
GpuHidHaarStageClassifier
{
int
count
__attribute__
((
aligned
(
4
)))
;
float
threshold
__attribute__
((
aligned
(
4
)))
;
int
two_rects
__attribute__
((
aligned
(
4
)))
;
int
reserved0
__attribute__
((
aligned
(
8
)))
;
int
reserved1
__attribute__
((
aligned
(
8
)))
;
int
reserved2
__attribute__
((
aligned
(
8
)))
;
int
reserved3
__attribute__
((
aligned
(
8
)))
;
}
GpuHidHaarStageClassifier
;
//typedef
struct
__attribute__
((
aligned
(
64
)))
GpuHidHaarClassifierCascade
//{
//
int
count
__attribute__
((
aligned
(
4
)))
;
//
int
is_stump_based
__attribute__
((
aligned
(
4
)))
;
//
int
has_tilted_features
__attribute__
((
aligned
(
4
)))
;
//
int
is_tree
__attribute__
((
aligned
(
4
)))
;
//
int
pq0
__attribute__
((
aligned
(
4
)))
;
//
int
pq1
__attribute__
((
aligned
(
4
)))
;
//
int
pq2
__attribute__
((
aligned
(
4
)))
;
//
int
pq3
__attribute__
((
aligned
(
4
)))
;
//
int
p0
__attribute__
((
aligned
(
4
)))
;
//
int
p1
__attribute__
((
aligned
(
4
)))
;
//
int
p2
__attribute__
((
aligned
(
4
)))
;
//
int
p3
__attribute__
((
aligned
(
4
)))
;
//
float
inv_window_area
__attribute__
((
aligned
(
4
)))
;
//}
GpuHidHaarClassifierCascade
;
__kernel
void
gpuRunHaarClassifierCascade_scaled2
(
global
GpuHidHaarStageClassifier
*stagecascadeptr_,
global
int4
*info,
global
GpuHidHaarTreeNode
*nodeptr_,
global
const
int
*restrict
sum,
global
const
float
*restrict
sqsum,
global
int4
*candidate,
const
int
rows,
const
int
cols,
const
int
step,
const
int
loopcount,
const
int
start_stage,
const
int
split_stage,
const
int
end_stage,
const
int
startnode,
global
int4
*p,
global
float
*correction,
const
int
nodecount
)
{
int
grpszx
=
get_local_size
(
0
)
;
int
grpszy
=
get_local_size
(
1
)
;
int
grpnumx
=
get_num_groups
(
0
)
;
int
grpidx
=
get_group_id
(
0
)
;
int
lclidx
=
get_local_id
(
0
)
;
int
lclidy
=
get_local_id
(
1
)
;
int
lcl_id
=
mad24
(
lclidy,
grpszx,
lclidx
)
;
__local
int
glboutindex[1]
;
__local
int
lclcount[1]
;
__local
int
lcloutindex[64]
;
glboutindex[0]
=
0
;
int
outputoff
=
mul24
(
grpidx,
256
)
;
candidate[outputoff
+
(
lcl_id
<<
2
)
]
=
(
int4
)
0
;
candidate[outputoff
+
(
lcl_id
<<
2
)
+
1]
=
(
int4
)
0
;
candidate[outputoff
+
(
lcl_id
<<
2
)
+
2]
=
(
int4
)
0
;
candidate[outputoff
+
(
lcl_id
<<
2
)
+
3]
=
(
int4
)
0
;
int
max_idx
=
rows
*
cols
-
1
;
for
(
int
scalei
=
0
; scalei < loopcount; scalei++)
{
int4
scaleinfo1
=
info[scalei]
;
int
grpnumperline
=
(
scaleinfo1.y
&
0xffff0000
)
>>
16
;
int
totalgrp
=
scaleinfo1.y
&
0xffff
;
float
factor
=
as_float
(
scaleinfo1.w
)
;
float
correction_t
=
correction[scalei]
;
float
ystep
=
max
(
2.0f,
factor
)
;
for
(
int
grploop
=
get_group_id
(
0
)
; grploop < totalgrp; grploop += grpnumx)
{
int4
cascadeinfo
=
p[scalei]
;
int
grpidy
=
grploop
/
grpnumperline
;
int
grpidx
=
grploop
-
mul24
(
grpidy,
grpnumperline
)
;
int
ix
=
mad24
(
grpidx,
grpszx,
lclidx
)
;
int
iy
=
mad24
(
grpidy,
grpszy,
lclidy
)
;
int
x
=
round
(
ix
*
ystep
)
;
int
y
=
round
(
iy
*
ystep
)
;
lcloutindex[lcl_id]
=
0
;
lclcount[0]
=
0
;
int
nodecounter
;
float
mean,
variance_norm_factor
;
//if
((
ix
<
width
)
&&
(
iy
<
height
))
{
const
int
p_offset
=
mad24
(
y,
step,
x
)
;
cascadeinfo.x
+=
p_offset
;
cascadeinfo.z
+=
p_offset
;
mean
=
(
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
)
*
correction_t
;
variance_norm_factor
=
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
-
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
;
variance_norm_factor
=
variance_norm_factor
*
correction_t
-
mean
*
mean
;
variance_norm_factor
=
variance_norm_factor
>=
0.f
?
sqrt
(
variance_norm_factor
)
:
1.f
;
bool
result
=
true
;
nodecounter
=
startnode
+
nodecount
*
scalei
;
for
(
int
stageloop
=
start_stage
; (stageloop < end_stage) && result; stageloop++)
{
float
stage_sum
=
0.f
;
__global
GpuHidHaarStageClassifier*
stageinfo
=
(
__global
GpuHidHaarStageClassifier*
)
(((
__global
uchar*
)
stagecascadeptr_
)
+stageloop*sizeof
(
GpuHidHaarStageClassifier
))
;
int
stagecount
=
stageinfo->count
;
for
(
int
nodeloop
=
0
; nodeloop < stagecount;)
{
__global
GpuHidHaarTreeNode*
currentnodeptr
=
(
__global
GpuHidHaarTreeNode*
)
(((
__global
uchar*
)
nodeptr_
)
+
nodecounter
*
sizeof
(
GpuHidHaarTreeNode
))
;
int4
info1
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[0][0]
))
;
int4
info2
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info3
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[2][0]
))
;
float4
w
=
*
(
__global
float4
*
)(
&
(
currentnodeptr->weight[0]
))
;
float3
alpha3
=
*
(
__global
float3*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
info1.x
+=
p_offset
;
info1.z
+=
p_offset
;
info2.x
+=
p_offset
;
info2.z
+=
p_offset
;
info3.x
+=
p_offset
;
info3.z
+=
p_offset
;
float
classsum
=
(
sum[clamp
(
mad24
(
info1.y,
step,
info1.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info1.y,
step,
info1.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info1.w,
step,
info1.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info1.w,
step,
info1.z
)
,
0
,
max_idx
)
]
)
*
w.x
;
classsum
+=
(
sum[clamp
(
mad24
(
info2.y,
step,
info2.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.y,
step,
info2.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.w,
step,
info2.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info2.w,
step,
info2.z
)
,
0
,
max_idx
)
]
)
*
w.y
;
classsum
+=
(
sum[clamp
(
mad24
(
info3.y,
step,
info3.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info3.y,
step,
info3.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info3.w,
step,
info3.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info3.w,
step,
info3.z
)
,
0
,
max_idx
)
]
)
*
w.z
;
bool
passThres
=
(
classsum
>=
nodethreshold
)
?
1
:
0
;
#
if
STUMP_BASED
stage_sum
+=
passThres
?
alpha3.y
:
alpha3.x
;
nodecounter++
;
nodeloop++
;
#
else
bool
isRootNode
=
(
nodecounter
&
1
)
==
0
;
if
(
isRootNode
)
{
if
(
(
passThres
&&
currentnodeptr->right
)
|
|
(!passThres && currentnodeptr->left))
{
nodecounter ++;
}
else
{
stage_sum += alpha3.x;
nodecounter += 2;
nodeloop ++;
}
}
else
{
stage_sum += (passThres ? alpha3.z : alpha3.y);
nodecounter ++;
nodeloop ++;
}
#endif
}
result = (stage_sum >= stageinfo->threshold) ? 1 : 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (result)
{
int queueindex = atomic_inc(lclcount);
lcloutindex[queueindex] = (y << 16) |
x
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
queuecount
=
lclcount[0]
;
if
(
lcl_id
<
queuecount
)
{
int
temp
=
lcloutindex[lcl_id]
;
int
x
=
temp
&
0xffff
;
int
y
=
(
temp
&
(
int
)
0xffff0000
)
>>
16
;
temp
=
atomic_inc
(
glboutindex
)
;
int4
candidate_result
;
candidate_result.zw
=
(
int2
)
convert_int_rte
(
factor
*
20.f
)
;
candidate_result.x
=
x
;
candidate_result.y
=
y
;
int
i
=
outputoff+temp+lcl_id
;
if
(
candidate[i].z
==
0
)
{
candidate[i]
=
candidate_result
;
}
else
{
for
(
i=i+1
;;i++)
{
if
(
candidate[i].z
==
0
)
{
candidate[i]
=
candidate_result
;
break
;
}
}
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
}
}
__kernel
void
gpuscaleclassifier
(
global
GpuHidHaarTreeNode
*orinode,
global
GpuHidHaarTreeNode
*newnode,
float
scale,
float
weight_scale,
const
int
nodenum
)
{
const
int
counter
=
get_global_id
(
0
)
;
int
tr_x[3],
tr_y[3],
tr_h[3],
tr_w[3],
i
=
0
;
GpuHidHaarTreeNode
t1
=
*
(
__global
GpuHidHaarTreeNode*
)
(((
__global
uchar*
)
orinode
)
+
counter
*
sizeof
(
GpuHidHaarTreeNode
))
;
__global
GpuHidHaarTreeNode*
pNew
=
(
__global
GpuHidHaarTreeNode*
)
(((
__global
uchar*
)
newnode
)
+
(
counter
+
nodenum
)
*
sizeof
(
GpuHidHaarTreeNode
))
;
#
pragma
unroll
for
(
i
=
0
; i < 3; i++)
{
tr_x[i]
=
(
int
)(
t1.p[i][0]
*
scale
+
0.5f
)
;
tr_y[i]
=
(
int
)(
t1.p[i][1]
*
scale
+
0.5f
)
;
tr_w[i]
=
(
int
)(
t1.p[i][2]
*
scale
+
0.5f
)
;
tr_h[i]
=
(
int
)(
t1.p[i][3]
*
scale
+
0.5f
)
;
}
t1.weight[0]
=
-
(
t1.weight[1]
*
tr_h[1]
*
tr_w[1]
+
t1.weight[2]
*
tr_h[2]
*
tr_w[2]
)
/
(
tr_h[0]
*
tr_w[0]
)
;
#
pragma
unroll
for
(
i
=
0
; i < 3; i++)
{
pNew->p[i][0]
=
tr_x[i]
;
pNew->p[i][1]
=
tr_y[i]
;
pNew->p[i][2]
=
tr_x[i]
+
tr_w[i]
;
pNew->p[i][3]
=
tr_y[i]
+
tr_h[i]
;
pNew->weight[i]
=
t1.weight[i]
*
weight_scale
;
}
pNew->left
=
t1.left
;
pNew->right
=
t1.right
;
pNew->threshold
=
t1.threshold
;
pNew->alpha[0]
=
t1.alpha[0]
;
pNew->alpha[1]
=
t1.alpha[1]
;
pNew->alpha[2]
=
t1.alpha[2]
;
}
samples/cpp/ufacedetect.cpp
View file @
ec3f22ce
...
...
@@ -201,13 +201,12 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
t
=
(
double
)
getTickCount
();
cvtColor
(
img
,
gray
,
COLOR_BGR2GRAY
);
resize
(
gray
,
smallImg
,
Size
(),
scale0
,
scale0
,
INTER_LINEAR
);
cvtColor
(
smallImg
,
canvas
,
COLOR_GRAY2BGR
);
equalizeHist
(
smallImg
,
smallImg
);
resize
(
img
,
smallImg
,
Size
(),
scale0
,
scale0
,
INTER_LINEAR
);
cvtColor
(
smallImg
,
gray
,
COLOR_BGR2GRAY
);
equalizeHist
(
gray
,
gray
);
cascade
.
detectMultiScale
(
smallImg
,
faces
,
1.1
,
2
,
0
cascade
.
detectMultiScale
(
gray
,
faces
,
1.1
,
3
,
0
//|CASCADE_FIND_BIGGEST_OBJECT
//|CASCADE_DO_ROUGH_SEARCH
|
CASCADE_SCALE_IMAGE
...
...
@@ -215,8 +214,8 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
Size
(
30
,
30
)
);
if
(
tryflip
)
{
flip
(
smallImg
,
smallImg
,
1
);
cascade
.
detectMultiScale
(
smallImg
,
faces2
,
flip
(
gray
,
gray
,
1
);
cascade
.
detectMultiScale
(
gray
,
faces2
,
1.1
,
2
,
0
//|CASCADE_FIND_BIGGEST_OBJECT
//|CASCADE_DO_ROUGH_SEARCH
...
...
@@ -229,7 +228,7 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
}
}
t
=
(
double
)
getTickCount
()
-
t
;
cvtColor
(
smallImg
,
canvas
,
COLOR_GRAY2BGR
);
smallImg
.
copyTo
(
canvas
);
double
fps
=
getTickFrequency
()
/
t
;
...
...
@@ -257,7 +256,7 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
color
,
3
,
8
,
0
);
if
(
nestedCascade
.
empty
()
)
continue
;
UMat
smallImgROI
=
smallImg
(
*
r
);
UMat
smallImgROI
=
gray
(
*
r
);
nestedCascade
.
detectMultiScale
(
smallImgROI
,
nestedObjects
,
1.1
,
2
,
0
//|CASCADE_FIND_BIGGEST_OBJECT
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment