Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
15409105
Commit
15409105
authored
Dec 20, 2013
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
started adding OpenCL acceleration of LBP-based object detectors
parent
b4bd5bab
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
139 additions
and
110 deletions
+139
-110
cascadedetect.cpp
modules/objdetect/src/cascadedetect.cpp
+49
-20
cascadedetect.hpp
modules/objdetect/src/cascadedetect.hpp
+45
-37
cascadedetect.cl
modules/objdetect/src/opencl/cascadedetect.cl
+45
-53
No files found.
modules/objdetect/src/cascadedetect.cpp
View file @
15409105
...
...
@@ -654,6 +654,7 @@ bool LBPEvaluator::Feature :: read(const FileNode& node )
LBPEvaluator
::
LBPEvaluator
()
{
features
=
makePtr
<
std
::
vector
<
Feature
>
>
();
optfeatures
=
makePtr
<
std
::
vector
<
OptFeature
>
>
();
}
LBPEvaluator
::~
LBPEvaluator
()
{
...
...
@@ -662,11 +663,12 @@ LBPEvaluator::~LBPEvaluator()
bool
LBPEvaluator
::
read
(
const
FileNode
&
node
)
{
features
->
resize
(
node
.
size
());
featuresPtr
=
&
(
*
features
)[
0
];
optfeaturesPtr
=
&
(
*
opt
features
)[
0
];
FileNodeIterator
it
=
node
.
begin
(),
it_end
=
node
.
end
();
std
::
vector
<
Feature
>&
ff
=
*
features
;
for
(
int
i
=
0
;
it
!=
it_end
;
++
it
,
i
++
)
{
if
(
!
f
eaturesPtr
[
i
].
read
(
*
it
))
if
(
!
f
f
[
i
].
read
(
*
it
))
return
false
;
}
return
true
;
...
...
@@ -677,31 +679,58 @@ Ptr<FeatureEvaluator> LBPEvaluator::clone() const
Ptr
<
LBPEvaluator
>
ret
=
makePtr
<
LBPEvaluator
>
();
ret
->
origWinSize
=
origWinSize
;
ret
->
features
=
features
;
ret
->
featuresPtr
=
&
(
*
ret
->
features
)[
0
];
ret
->
optfeatures
=
optfeatures
;
ret
->
optfeaturesPtr
=
ret
->
optfeatures
.
empty
()
?
0
:
&
(
*
ret
->
optfeatures
)[
0
];
ret
->
sum0
=
sum0
,
ret
->
sum
=
sum
;
ret
->
normrect
=
normrect
;
ret
->
offset
=
offset
;
ret
->
pwin
=
pwin
;
return
ret
;
}
bool
LBPEvaluator
::
setImage
(
InputArray
_image
,
Size
_origWinSize
,
Size
)
bool
LBPEvaluator
::
setImage
(
InputArray
_image
,
Size
_origWinSize
,
Size
_sumSize
)
{
Mat
image
=
_image
.
getMat
();
int
rn
=
image
.
rows
+
1
,
cn
=
image
.
cols
+
1
;
origWinSize
=
_origWinSize
;
if
(
image
.
cols
<
origWinSize
.
width
||
image
.
rows
<
origWinSize
.
height
)
Size
imgsz
=
_image
.
size
();
int
cols
=
imgsz
.
width
,
rows
=
imgsz
.
height
;
if
(
imgsz
.
width
<
origWinSize
.
width
||
imgsz
.
height
<
origWinSize
.
height
)
return
false
;
if
(
sum0
.
rows
<
rn
||
sum0
.
cols
<
cn
)
origWinSize
=
_origWinSize
;
int
rn
=
_sumSize
.
height
,
cn
=
_sumSize
.
width
;
int
sumStep
;
CV_Assert
(
rn
>=
rows
+
1
&&
cn
>=
cols
+
1
);
if
(
_image
.
isUMat
()
)
{
usum0
.
create
(
rn
,
cn
,
CV_32S
);
usum
=
UMat
(
usum0
,
Rect
(
0
,
0
,
cols
+
1
,
rows
+
1
));
integral
(
_image
,
usum
,
noArray
(),
noArray
(),
CV_32S
);
sumStep
=
(
int
)(
usum
.
step
/
usum
.
elemSize
());
}
else
{
sum0
.
create
(
rn
,
cn
,
CV_32S
);
sum
=
Mat
(
rn
,
cn
,
CV_32S
,
sum0
.
data
);
integral
(
image
,
sum
);
sum
=
sum0
(
Rect
(
0
,
0
,
cols
+
1
,
rows
+
1
));
integral
(
_image
,
sum
,
noArray
(),
noArray
(),
CV_32S
);
sumStep
=
(
int
)(
sum
.
step
/
sum
.
elemSize
());
}
size_t
fi
,
nfeatures
=
features
->
size
();
for
(
fi
=
0
;
fi
<
nfeatures
;
fi
++
)
featuresPtr
[
fi
].
updatePtrs
(
sum
);
const
std
::
vector
<
Feature
>&
ff
=
*
features
;
if
(
sumSize0
!=
_sumSize
)
{
optfeatures
->
resize
(
nfeatures
);
optfeaturesPtr
=
&
(
*
optfeatures
)[
0
];
for
(
fi
=
0
;
fi
<
nfeatures
;
fi
++
)
optfeaturesPtr
[
fi
].
setOffsets
(
ff
[
fi
],
sumStep
);
}
if
(
_image
.
isUMat
()
&&
(
sumSize0
!=
_sumSize
||
ufbuf
.
empty
())
)
copyVectorToUMat
(
*
optfeatures
,
ufbuf
);
sumSize0
=
_sumSize
;
return
true
;
}
...
...
@@ -711,7 +740,7 @@ bool LBPEvaluator::setWindow( Point pt )
pt
.
x
+
origWinSize
.
width
>=
sum
.
cols
||
pt
.
y
+
origWinSize
.
height
>=
sum
.
rows
)
return
false
;
offset
=
pt
.
y
*
((
int
)
sum
.
step
/
sizeof
(
int
))
+
pt
.
x
;
pwin
=
&
sum
.
at
<
int
>
(
pt
)
;
return
true
;
}
...
...
modules/objdetect/src/cascadedetect.hpp
View file @
15409105
...
...
@@ -250,13 +250,11 @@ public:
struct
Feature
{
Feature
();
bool
read
(
const
FileNode
&
node
);
bool
tilted
;
enum
{
RECT_NUM
=
3
};
struct
{
Rect
r
;
...
...
@@ -369,14 +367,20 @@ public:
{
Feature
();
Feature
(
int
x
,
int
y
,
int
_block_w
,
int
_block_h
)
:
rect
(
x
,
y
,
_block_w
,
_block_h
)
{}
rect
(
x
,
y
,
_block_w
,
_block_h
)
{}
int
calc
(
int
offset
)
const
;
void
updatePtrs
(
const
Mat
&
sum
);
bool
read
(
const
FileNode
&
node
);
Rect
rect
;
// weight and height for block
const
int
*
p
[
16
];
// fast
};
struct
OptFeature
{
OptFeature
();
int
calc
(
const
int
*
pwin
)
const
;
void
setOffsets
(
const
Feature
&
_f
,
int
step
);
int
ofs
[
16
];
};
LBPEvaluator
();
...
...
@@ -390,53 +394,57 @@ public:
virtual
bool
setWindow
(
Point
pt
);
int
operator
()(
int
featureIdx
)
const
{
return
featuresPtr
[
featureIdx
].
calc
(
offset
);
}
{
return
optfeaturesPtr
[
featureIdx
].
calc
(
pwin
);
}
virtual
int
calcCat
(
int
featureIdx
)
const
{
return
(
*
this
)(
featureIdx
);
}
protected
:
Size
origWinSize
;
Size
origWinSize
,
sumSize0
;
Ptr
<
std
::
vector
<
Feature
>
>
features
;
Feature
*
featuresPtr
;
// optimization
Ptr
<
std
::
vector
<
OptFeature
>
>
optfeatures
;
OptFeature
*
optfeaturesPtr
;
// optimization
Mat
sum0
,
sum
;
Rect
normrect
;
int
offset
;
UMat
usum0
,
usum
,
ufbuf
;
const
int
*
pwin
;
};
inline
LBPEvaluator
::
Feature
::
Feature
()
{
rect
=
Rect
();
}
inline
LBPEvaluator
::
OptFeature
::
OptFeature
()
{
for
(
int
i
=
0
;
i
<
16
;
i
++
)
p
[
i
]
=
0
;
ofs
[
i
]
=
0
;
}
inline
int
LBPEvaluator
::
Feature
::
calc
(
int
_offset
)
const
inline
int
LBPEvaluator
::
OptFeature
::
calc
(
const
int
*
p
)
const
{
int
cval
=
CALC_SUM_
(
p
[
5
],
p
[
6
],
p
[
9
],
p
[
10
],
_offset
);
return
(
CALC_SUM_
(
p
[
0
],
p
[
1
],
p
[
4
],
p
[
5
],
_offset
)
>=
cval
?
128
:
0
)
|
// 0
(
CALC_SUM_
(
p
[
1
],
p
[
2
],
p
[
5
],
p
[
6
],
_offset
)
>=
cval
?
64
:
0
)
|
// 1
(
CALC_SUM_
(
p
[
2
],
p
[
3
],
p
[
6
],
p
[
7
],
_offset
)
>=
cval
?
32
:
0
)
|
// 2
(
CALC_SUM_
(
p
[
6
],
p
[
7
],
p
[
10
],
p
[
11
],
_offset
)
>=
cval
?
16
:
0
)
|
// 5
(
CALC_SUM_
(
p
[
10
],
p
[
11
],
p
[
14
],
p
[
15
],
_offset
)
>=
cval
?
8
:
0
)
|
// 8
(
CALC_SUM_
(
p
[
9
],
p
[
10
],
p
[
13
],
p
[
14
],
_offset
)
>=
cval
?
4
:
0
)
|
// 7
(
CALC_SUM_
(
p
[
8
],
p
[
9
],
p
[
12
],
p
[
13
],
_offset
)
>=
cval
?
2
:
0
)
|
// 6
(
CALC_SUM_
(
p
[
4
],
p
[
5
],
p
[
8
],
p
[
9
],
_offset
)
>=
cval
?
1
:
0
);
int
cval
=
CALC_SUM_
OFS_
(
ofs
[
5
],
ofs
[
6
],
ofs
[
9
],
ofs
[
10
],
p
);
return
(
CALC_SUM_
OFS_
(
ofs
[
0
],
ofs
[
1
],
ofs
[
4
],
ofs
[
5
],
p
)
>=
cval
?
128
:
0
)
|
// 0
(
CALC_SUM_
OFS_
(
ofs
[
1
],
ofs
[
2
],
ofs
[
5
],
ofs
[
6
],
p
)
>=
cval
?
64
:
0
)
|
// 1
(
CALC_SUM_
OFS_
(
ofs
[
2
],
ofs
[
3
],
ofs
[
6
],
ofs
[
7
],
p
)
>=
cval
?
32
:
0
)
|
// 2
(
CALC_SUM_
OFS_
(
ofs
[
6
],
ofs
[
7
],
ofs
[
10
],
ofs
[
11
],
p
)
>=
cval
?
16
:
0
)
|
// 5
(
CALC_SUM_
OFS_
(
ofs
[
10
],
ofs
[
11
],
ofs
[
14
],
ofs
[
15
],
p
)
>=
cval
?
8
:
0
)
|
// 8
(
CALC_SUM_
OFS_
(
ofs
[
9
],
ofs
[
10
],
ofs
[
13
],
ofs
[
14
],
p
)
>=
cval
?
4
:
0
)
|
// 7
(
CALC_SUM_
OFS_
(
ofs
[
8
],
ofs
[
9
],
ofs
[
12
],
ofs
[
13
],
p
)
>=
cval
?
2
:
0
)
|
// 6
(
CALC_SUM_
OFS_
(
ofs
[
4
],
ofs
[
5
],
ofs
[
8
],
ofs
[
9
],
p
)
>=
cval
?
1
:
0
);
}
inline
void
LBPEvaluator
::
Feature
::
updatePtrs
(
const
Mat
&
_sum
)
inline
void
LBPEvaluator
::
OptFeature
::
setOffsets
(
const
Feature
&
_f
,
int
step
)
{
const
int
*
ptr
=
(
const
int
*
)
_sum
.
data
;
size_t
step
=
_sum
.
step
/
sizeof
(
ptr
[
0
]);
Rect
tr
=
rect
;
CV_SUM_PTRS
(
p
[
0
],
p
[
1
],
p
[
4
],
p
[
5
],
ptr
,
tr
,
step
);
tr
.
x
+=
2
*
rect
.
width
;
CV_SUM_PTRS
(
p
[
2
],
p
[
3
],
p
[
6
],
p
[
7
],
ptr
,
tr
,
step
);
tr
.
y
+=
2
*
rect
.
height
;
CV_SUM_PTRS
(
p
[
10
],
p
[
11
],
p
[
14
],
p
[
15
],
ptr
,
tr
,
step
);
tr
.
x
-=
2
*
rect
.
width
;
CV_SUM_PTRS
(
p
[
8
],
p
[
9
],
p
[
12
],
p
[
13
],
ptr
,
tr
,
step
);
Rect
tr
=
_f
.
rect
;
CV_SUM_OFS
(
ofs
[
0
],
ofs
[
1
],
ofs
[
4
],
ofs
[
5
],
0
,
tr
,
step
);
tr
.
x
+=
2
*
_f
.
rect
.
width
;
CV_SUM_OFS
(
ofs
[
2
],
ofs
[
3
],
ofs
[
6
],
ofs
[
7
],
0
,
tr
,
step
);
tr
.
y
+=
2
*
_f
.
rect
.
height
;
CV_SUM_OFS
(
ofs
[
10
],
ofs
[
11
],
ofs
[
14
],
ofs
[
15
],
0
,
tr
,
step
);
tr
.
x
-=
2
*
_f
.
rect
.
width
;
CV_SUM_OFS
(
ofs
[
8
],
ofs
[
9
],
ofs
[
12
],
ofs
[
13
],
0
,
tr
,
step
);
}
//---------------------------------------------- HOGEvaluator -------------------------------------------
...
...
modules/objdetect/src/opencl/cascadedetect.cl
View file @
15409105
/////////////////////////////
OpenCL
kernels
for
face
detection
//////////////////////////////
//////////////////////////////
see
the
opencv/doc/license.txt
///////////////////////////////
typedef
struct
__attribute__
((
aligned
(
4
)))
OptFeature
typedef
struct
__attribute__
((
aligned
(
4
)))
Opt
Haar
Feature
{
int4
ofs[3]
__attribute__
((
aligned
(
4
)))
;
float4
weight
__attribute__
((
aligned
(
4
)))
;
}
OptFeature
;
OptHaarFeature
;
typedef
struct
__attribute__
((
aligned
(
4
)))
OptLBPFeature
{
int16
ofs
__attribute__
((
aligned
(
4
)))
;
}
OptLBPFeature
;
typedef
struct
__attribute__
((
aligned
(
4
)))
Stump
{
int
featureIdx
__attribute__
((
aligned
(
4
)))
;
float
threshold
__attribute__
((
aligned
(
4
)))
; // for ordered features only
float
left
__attribute__
((
aligned
(
4
)))
;
float
right
__attribute__
((
aligned
(
4
)))
;
float4
st
__attribute__
((
aligned
(
4
)))
;
}
Stump
;
...
...
@@ -30,7 +33,7 @@ __kernel void runHaarClassifierStump(
int
sumstep,
int
sumoffset,
__global
const
int*
sqsum,
int
sqsumstep,
int
sqsumoffset,
__global
const
OptFeature*
optfeatures,
__global
const
Opt
Haar
Feature*
optfeatures,
int
nstages,
__global
const
Stage*
stages,
...
...
@@ -47,11 +50,8 @@ __kernel void runHaarClassifierStump(
if
(
ix
<
imgsize.x
&&
iy
<
imgsize.y
)
{
int
ntrees
;
int
stageIdx,
i
;
float
s
=
0.f
;
int
stageIdx
;
__global
const
Stump*
stump
=
stumps
;
__global
const
OptFeature*
f
;
__global
const
int*
psum
=
sum
+
mad24
(
iy,
sumstep,
ix
)
;
__global
const
int*
pnsum
=
psum
+
mad24
(
normrect.y,
sumstep,
normrect.x
)
;
...
...
@@ -61,20 +61,19 @@ __kernel void runHaarClassifierStump(
pnsum[mad24
(
normrect.w,
sumstep,
normrect.z
)
]
)
*invarea
;
float
sqval
=
(
sqsum[mad24
(
iy
+
normrect.y,
sqsumstep,
ix
+
normrect.x
)
]
)
*invarea
;
float
nf
=
(
float
)
normarea
*
sqrt
(
max
(
sqval
-
sval
*
sval,
0.f
))
;
float4
weight,
vsval
;
int4
ofs,
ofs0,
ofs1,
ofs2
;
nf
=
nf
>
0
?
nf
:
1.f
;
for
(
stageIdx
=
0
; stageIdx < nstages; stageIdx++ )
{
ntrees
=
stages[stageIdx].ntrees
;
s
=
0.f
;
int
i,
ntrees
=
stages[stageIdx].ntrees
;
float
s
=
0.f
;
for
(
i
=
0
; i < ntrees; i++, stump++ )
{
f
=
optfeatures
+
stump->featureIdx
;
weight
=
f->weight
;
float4
st
=
stump->st
;
__global
const
OptHaarFeature*
f
=
optfeatures
+
as_int
(
st.x
)
;
float4
weight
=
f->weight
;
ofs
=
f->ofs[0]
;
int4
ofs
=
f->ofs[0]
;
sval
=
(
psum[ofs.x]
-
psum[ofs.y]
-
psum[ofs.z]
+
psum[ofs.w]
)
*weight.x
;
ofs
=
f->ofs[1]
;
sval
+=
(
psum[ofs.x]
-
psum[ofs.y]
-
psum[ofs.z]
+
psum[ofs.w]
)
*weight.y
;
...
...
@@ -84,7 +83,7 @@ __kernel void runHaarClassifierStump(
sval
+=
(
psum[ofs.x]
-
psum[ofs.y]
-
psum[ofs.z]
+
psum[ofs.w]
)
*weight.z
;
}
s
+=
(
sval
<
st
ump->threshold*nf
)
?
stump->left
:
stump->right
;
s
+=
(
sval
<
st
.y*nf
)
?
st.z
:
st.w
;
}
if
(
s
<
stages[stageIdx].threshold
)
...
...
@@ -110,9 +109,7 @@ __kernel void runHaarClassifierStump(
__kernel
void
runLBPClassifierStump
(
__global
const
int*
sum,
int
sumstep,
int
sumoffset,
__global
const
int*
sqsum,
int
sqsumstep,
int
sqsumoffset,
__global
const
OptFeature*
optfeatures,
__global
const
OptLBPFeature*
optfeatures,
int
nstages,
__global
const
Stage*
stages,
...
...
@@ -124,50 +121,45 @@ __kernel void runLBPClassifierStump(
int2
imgsize,
int
xyscale,
float
factor,
int4
normrect,
int2
windowsize,
int
maxFaces
)
{
int
ix
=
get_global_id
(
0
)
*xyscale
*
VECTOR_SIZE
;
int
ix
=
get_global_id
(
0
)
*xyscale
;
int
iy
=
get_global_id
(
1
)
*xyscale
;
sumstep
/=
sizeof
(
int
)
;
sqsumstep
/=
sizeof
(
int
)
;
if
(
ix
<
imgsize.x
&&
iy
<
imgsize.y
)
{
int
ntrees
;
int
stageIdx,
i
;
float
s
=
0.f
;
int
stageIdx
;
__global
const
Stump*
stump
=
stumps
;
__global
const
int*
bitset
=
bitsets
;
__global
const
OptFeature*
f
;
__global
const
int*
psum
=
sum
+
mad24
(
iy,
sumstep,
ix
)
;
__global
const
int*
pnsum
=
psum
+
mad24
(
normrect.y,
sumstep,
normrect.x
)
;
int
normarea
=
normrect.z
*
normrect.w
;
float
invarea
=
1.f/normarea
;
float
sval
=
(
pnsum[0]
-
pnsum[normrect.z]
-
pnsum[mul24
(
normrect.w,
sumstep
)
]
+
pnsum[mad24
(
normrect.w,
sumstep,
normrect.z
)
]
)
*invarea
;
float
sqval
=
(
sqsum[mad24
(
iy
+
normrect.y,
sqsumstep,
ix
+
normrect.x
)
]
)
*invarea
;
float
nf
=
(
float
)
normarea
*
sqrt
(
max
(
sqval
-
sval
*
sval,
0.f
))
;
float4
weight
;
int4
ofs
;
nf
=
nf
>
0
?
nf
:
1.f
;
for
(
stageIdx
=
0
; stageIdx < nstages; stageIdx++ )
{
ntrees
=
stages[stageIdx].ntrees
;
s
=
0.f
;
for
(
i
=
0
; i < ntrees; i++, stump++
, bitset += bitsetSize
)
int
i,
ntrees
=
stages[stageIdx].ntrees
;
float
s
=
0.f
;
for
(
i
=
0
; i < ntrees; i++, stump++ )
{
f
=
optfeatures
+
stump->featureIdx
;
weight
=
f->weight
;
//
compute
LBP
feature
to
val
s
+=
(
bitset[val
>>
5]
&
(
1
<<
(
val
&
31
)))
?
stump->left
:
stump->right
;
float4
st
=
stump->st
;
__global
const
OptLBPFeature*
f
=
optfeatures
+
as_int
(
st.x
)
;
int16
ofs
=
f->ofs
;
int4
ofs
=
f->ofs[0]
;
sval
=
(
psum[ofs.x]
-
psum[ofs.y]
-
psum[ofs.z]
+
psum[ofs.w]
)
*weight.x
;
ofs
=
f->ofs[1]
;
sval
+=
(
psum[ofs.x]
-
psum[ofs.y]
-
psum[ofs.z]
+
psum[ofs.w]
)
*weight.y
;
if
(
weight.z
>
0
)
{
ofs
=
f->ofs[2]
;
sval
+=
(
psum[ofs.x]
-
psum[ofs.y]
-
psum[ofs.z]
+
psum[ofs.w]
)
*weight.z
;
}
s
+=
(
sval
<
st.y*nf
)
?
st.z
:
st.w
;
}
if
(
s
<
stages[stageIdx].threshold
)
break
;
}
if
(
stageIdx
==
nstages
)
{
int
nfaces
=
atomic_inc
(
facepos
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment