Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
060d6751
Commit
060d6751
authored
Dec 22, 2014
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3518 from wangyan42164:ocl_cascade_detect
parents
199f1aec
efa84d82
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
36 deletions
+27
-36
cascadedetect.cpp
modules/objdetect/src/cascadedetect.cpp
+11
-13
cascadedetect.cl
modules/objdetect/src/opencl/cascadedetect.cl
+16
-23
No files found.
modules/objdetect/src/cascadedetect.cpp
View file @
060d6751
...
@@ -1060,6 +1060,7 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
...
@@ -1060,6 +1060,7 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
}
}
int
nstages
=
(
int
)
data
.
stages
.
size
();
int
nstages
=
(
int
)
data
.
stages
.
size
();
int
splitstage_ocl
=
1
;
if
(
featureType
==
FeatureEvaluator
::
HAAR
)
if
(
featureType
==
FeatureEvaluator
::
HAAR
)
{
{
...
@@ -1071,11 +1072,11 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
...
@@ -1071,11 +1072,11 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
{
{
String
opts
;
String
opts
;
if
(
lbufSize
.
area
())
if
(
lbufSize
.
area
())
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d"
,
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d
-D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d
"
,
localsz
.
width
,
localsz
.
height
,
lbufSize
.
area
(),
lbufSize
.
width
,
data
.
maxNodesPerTree
);
localsz
.
width
,
localsz
.
height
,
lbufSize
.
area
(),
lbufSize
.
width
,
data
.
maxNodesPerTree
,
splitstage_ocl
,
nstages
,
MAX_FACES
);
else
else
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d"
,
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d
-D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d
"
,
localsz
.
width
,
localsz
.
height
,
data
.
maxNodesPerTree
);
localsz
.
width
,
localsz
.
height
,
data
.
maxNodesPerTree
,
splitstage_ocl
,
nstages
,
MAX_FACES
);
haarKernel
.
create
(
"runHaarClassifier"
,
ocl
::
objdetect
::
cascadedetect_oclsrc
,
opts
);
haarKernel
.
create
(
"runHaarClassifier"
,
ocl
::
objdetect
::
cascadedetect_oclsrc
,
opts
);
if
(
haarKernel
.
empty
()
)
if
(
haarKernel
.
empty
()
)
return
false
;
return
false
;
...
@@ -1083,7 +1084,6 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
...
@@ -1083,7 +1084,6 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
Rect
normrect
=
haar
->
getNormRect
();
Rect
normrect
=
haar
->
getNormRect
();
int
sqofs
=
haar
->
getSquaresOffset
();
int
sqofs
=
haar
->
getSquaresOffset
();
int
splitstage_ocl
=
1
;
haarKernel
.
args
((
int
)
scales
.
size
(),
haarKernel
.
args
((
int
)
scales
.
size
(),
ocl
::
KernelArg
::
PtrReadOnly
(
bufs
[
0
]),
// scaleData
ocl
::
KernelArg
::
PtrReadOnly
(
bufs
[
0
]),
// scaleData
...
@@ -1091,13 +1091,12 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
...
@@ -1091,13 +1091,12 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
ocl
::
KernelArg
::
PtrReadOnly
(
bufs
[
2
]),
// optfeatures
ocl
::
KernelArg
::
PtrReadOnly
(
bufs
[
2
]),
// optfeatures
// cascade classifier
// cascade classifier
splitstage_ocl
,
nstages
,
ocl
::
KernelArg
::
PtrReadOnly
(
ustages
),
ocl
::
KernelArg
::
PtrReadOnly
(
ustages
),
ocl
::
KernelArg
::
PtrReadOnly
(
unodes
),
ocl
::
KernelArg
::
PtrReadOnly
(
unodes
),
ocl
::
KernelArg
::
PtrReadOnly
(
uleaves
),
ocl
::
KernelArg
::
PtrReadOnly
(
uleaves
),
ocl
::
KernelArg
::
PtrWriteOnly
(
ufacepos
),
// positions
ocl
::
KernelArg
::
PtrWriteOnly
(
ufacepos
),
// positions
normrect
,
sqofs
,
data
.
origWinSize
,
(
int
)
MAX_FACES
);
normrect
,
sqofs
,
data
.
origWinSize
);
ok
=
haarKernel
.
run
(
2
,
globalsize
,
localsize
,
true
);
ok
=
haarKernel
.
run
(
2
,
globalsize
,
localsize
,
true
);
}
}
else
if
(
featureType
==
FeatureEvaluator
::
LBP
)
else
if
(
featureType
==
FeatureEvaluator
::
LBP
)
...
@@ -1113,16 +1112,16 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
...
@@ -1113,16 +1112,16 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
{
{
String
opts
;
String
opts
;
if
(
lbufSize
.
area
())
if
(
lbufSize
.
area
())
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d"
,
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d
-D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d
"
,
localsz
.
width
,
localsz
.
height
,
lbufSize
.
area
(),
lbufSize
.
width
);
localsz
.
width
,
localsz
.
height
,
lbufSize
.
area
(),
lbufSize
.
width
,
splitstage_ocl
,
nstages
,
MAX_FACES
);
else
else
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d"
,
localsz
.
width
,
localsz
.
height
);
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d"
,
localsz
.
width
,
localsz
.
height
,
splitstage_ocl
,
nstages
,
MAX_FACES
);
lbpKernel
.
create
(
"runLBPClassifierStumpSimple"
,
ocl
::
objdetect
::
cascadedetect_oclsrc
,
opts
);
lbpKernel
.
create
(
"runLBPClassifierStumpSimple"
,
ocl
::
objdetect
::
cascadedetect_oclsrc
,
opts
);
if
(
lbpKernel
.
empty
()
)
if
(
lbpKernel
.
empty
()
)
return
false
;
return
false
;
}
}
int
splitstage_ocl
=
1
;
int
subsetSize
=
(
data
.
ncategories
+
31
)
/
32
;
int
subsetSize
=
(
data
.
ncategories
+
31
)
/
32
;
lbpKernel
.
args
((
int
)
scales
.
size
(),
lbpKernel
.
args
((
int
)
scales
.
size
(),
ocl
::
KernelArg
::
PtrReadOnly
(
bufs
[
0
]),
// scaleData
ocl
::
KernelArg
::
PtrReadOnly
(
bufs
[
0
]),
// scaleData
...
@@ -1130,14 +1129,13 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
...
@@ -1130,14 +1129,13 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
ocl
::
KernelArg
::
PtrReadOnly
(
bufs
[
2
]),
// optfeatures
ocl
::
KernelArg
::
PtrReadOnly
(
bufs
[
2
]),
// optfeatures
// cascade classifier
// cascade classifier
splitstage_ocl
,
nstages
,
ocl
::
KernelArg
::
PtrReadOnly
(
ustages
),
ocl
::
KernelArg
::
PtrReadOnly
(
ustages
),
ocl
::
KernelArg
::
PtrReadOnly
(
unodes
),
ocl
::
KernelArg
::
PtrReadOnly
(
unodes
),
ocl
::
KernelArg
::
PtrReadOnly
(
usubsets
),
ocl
::
KernelArg
::
PtrReadOnly
(
usubsets
),
subsetSize
,
subsetSize
,
ocl
::
KernelArg
::
PtrWriteOnly
(
ufacepos
),
// positions
ocl
::
KernelArg
::
PtrWriteOnly
(
ufacepos
),
// positions
data
.
origWinSize
,
(
int
)
MAX_FACES
);
data
.
origWinSize
);
ok
=
lbpKernel
.
run
(
2
,
globalsize
,
localsize
,
true
);
ok
=
lbpKernel
.
run
(
2
,
globalsize
,
localsize
,
true
);
}
}
...
...
modules/objdetect/src/opencl/cascadedetect.cl
View file @
060d6751
...
@@ -70,14 +70,12 @@ void runHaarClassifier(
...
@@ -70,14 +70,12 @@ void runHaarClassifier(
__global const int* sum,
__global const int* sum,
int _sumstep, int sumoffset,
int _sumstep, int sumoffset,
__global const OptHaarFeature* optfeatures,
__global const OptHaarFeature* optfeatures,
int splitstage, int nstages,
__global const Stage* stages,
__global const Stage* stages,
__global const Node* nodes,
__global const Node* nodes,
__global const float* leaves0,
__global const float* leaves0,
volatile __global int* facepos,
volatile __global int* facepos,
int4 normrect, int sqofs, int2 windowsize
, int maxFaces
)
int4 normrect, int sqofs, int2 windowsize)
{
{
int lx = get_local_id(0);
int lx = get_local_id(0);
int ly = get_local_id(1);
int ly = get_local_id(1);
...
@@ -165,7 +163,7 @@ void runHaarClassifier(
...
@@ -165,7 +163,7 @@ void runHaarClassifier(
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
nf = nf > 0 ? nf : 1.f;
nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx <
splitstage
; stageIdx++ )
for( stageIdx = 0; stageIdx <
SPLIT_STAGE
; stageIdx++ )
{
{
int ntrees = stages[stageIdx].ntrees;
int ntrees = stages[stageIdx].ntrees;
float s = 0.f;
float s = 0.f;
...
@@ -221,7 +219,7 @@ void runHaarClassifier(
...
@@ -221,7 +219,7 @@ void runHaarClassifier(
break;
break;
}
}
if( stageIdx ==
splitstage
&& (ystep == 1 || ((ix | iy) & 1) == 0) )
if( stageIdx ==
SPLIT_STAGE
&& (ystep == 1 || ((ix | iy) & 1) == 0) )
{
{
int count = atomic_inc(lcount);
int count = atomic_inc(lcount);
lbuf[count] = (int)(ix | (iy << 8));
lbuf[count] = (int)(ix | (iy << 8));
...
@@ -229,7 +227,7 @@ void runHaarClassifier(
...
@@ -229,7 +227,7 @@ void runHaarClassifier(
}
}
}
}
for( stageIdx =
splitstage; stageIdx < nstages
; stageIdx++ )
for( stageIdx =
SPLIT_STAGE; stageIdx < N_STAGES
; stageIdx++ )
{
{
int nrects = lcount[0];
int nrects = lcount[0];
...
@@ -335,13 +333,13 @@ void runHaarClassifier(
...
@@ -335,13 +333,13 @@ void runHaarClassifier(
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if( stageIdx ==
nstages
)
if( stageIdx ==
N_STAGES
)
{
{
int nrects = lcount[0];
int nrects = lcount[0];
if( lidx < nrects )
if( lidx < nrects )
{
{
int nfaces = atomic_inc(facepos);
int nfaces = atomic_inc(facepos);
if( nfaces <
maxFaces
)
if( nfaces <
MAX_FACES
)
{
{
volatile __global int* face = facepos + 1 + nfaces*3;
volatile __global int* face = facepos + 1 + nfaces*3;
int val = lbuf[lidx];
int val = lbuf[lidx];
...
@@ -364,15 +362,13 @@ __kernel void runLBPClassifierStumpSimple(
...
@@ -364,15 +362,13 @@ __kernel void runLBPClassifierStumpSimple(
__global const int* sum,
__global const int* sum,
int _sumstep, int sumoffset,
int _sumstep, int sumoffset,
__global const OptLBPFeature* optfeatures,
__global const OptLBPFeature* optfeatures,
int splitstage, int nstages,
__global const Stage* stages,
__global const Stage* stages,
__global const Stump* stumps,
__global const Stump* stumps,
__global const int* bitsets,
__global const int* bitsets,
int bitsetSize,
int bitsetSize,
volatile __global int* facepos,
volatile __global int* facepos,
int2 windowsize
, int maxFaces
)
int2 windowsize)
{
{
int lx = get_local_id(0);
int lx = get_local_id(0);
int ly = get_local_id(1);
int ly = get_local_id(1);
...
@@ -381,7 +377,6 @@ __kernel void runLBPClassifierStumpSimple(
...
@@ -381,7 +377,6 @@ __kernel void runLBPClassifierStumpSimple(
int groupIdx = get_group_id(1)*get_num_groups(0) + get_group_id(0);
int groupIdx = get_group_id(1)*get_num_groups(0) + get_group_id(0);
int ngroups = get_num_groups(0)*get_num_groups(1);
int ngroups = get_num_groups(0)*get_num_groups(1);
int scaleIdx, tileIdx, stageIdx;
int scaleIdx, tileIdx, stageIdx;
int startStage = 0, endStage = nstages;
int sumstep = (int)(_sumstep/sizeof(int));
int sumstep = (int)(_sumstep/sizeof(int));
for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
...
@@ -404,7 +399,7 @@ __kernel void runLBPClassifierStumpSimple(
...
@@ -404,7 +399,7 @@ __kernel void runLBPClassifierStumpSimple(
__global const Stump* stump = stumps;
__global const Stump* stump = stumps;
__global const int* bitset = bitsets;
__global const int* bitset = bitsets;
for( stageIdx = 0; stageIdx <
endStage
; stageIdx++ )
for( stageIdx = 0; stageIdx <
N_STAGES
; stageIdx++ )
{
{
int i, ntrees = stages[stageIdx].ntrees;
int i, ntrees = stages[stageIdx].ntrees;
float s = 0.f;
float s = 0.f;
...
@@ -433,10 +428,10 @@ __kernel void runLBPClassifierStumpSimple(
...
@@ -433,10 +428,10 @@ __kernel void runLBPClassifierStumpSimple(
break;
break;
}
}
if( stageIdx ==
nstages
)
if( stageIdx ==
N_STAGES
)
{
{
int nfaces = atomic_inc(facepos);
int nfaces = atomic_inc(facepos);
if( nfaces <
maxFaces
)
if( nfaces <
MAX_FACES
)
{
{
volatile __global int* face = facepos + 1 + nfaces*3;
volatile __global int* face = facepos + 1 + nfaces*3;
face[0] = scaleIdx;
face[0] = scaleIdx;
...
@@ -455,15 +450,13 @@ void runLBPClassifierStump(
...
@@ -455,15 +450,13 @@ void runLBPClassifierStump(
__global const int* sum,
__global const int* sum,
int _sumstep, int sumoffset,
int _sumstep, int sumoffset,
__global const OptLBPFeature* optfeatures,
__global const OptLBPFeature* optfeatures,
int splitstage, int nstages,
__global const Stage* stages,
__global const Stage* stages,
__global const Stump* stumps,
__global const Stump* stumps,
__global const int* bitsets,
__global const int* bitsets,
int bitsetSize,
int bitsetSize,
volatile __global int* facepos,
volatile __global int* facepos,
int2 windowsize
, int maxFaces
)
int2 windowsize)
{
{
int lx = get_local_id(0);
int lx = get_local_id(0);
int ly = get_local_id(1);
int ly = get_local_id(1);
...
@@ -525,7 +518,7 @@ void runLBPClassifierStump(
...
@@ -525,7 +518,7 @@ void runLBPClassifierStump(
__global const int* p = psum0 + mad24(iy, sumstep, ix);
__global const int* p = psum0 + mad24(iy, sumstep, ix);
#endif
#endif
for( stageIdx = 0; stageIdx <
splitstage
; stageIdx++ )
for( stageIdx = 0; stageIdx <
SPLIT_STAGE
; stageIdx++ )
{
{
int ntrees = stages[stageIdx].ntrees;
int ntrees = stages[stageIdx].ntrees;
float s = 0.f;
float s = 0.f;
...
@@ -554,14 +547,14 @@ void runLBPClassifierStump(
...
@@ -554,14 +547,14 @@ void runLBPClassifierStump(
break;
break;
}
}
if( stageIdx ==
splitstage
&& (ystep == 1 || ((ix | iy) & 1) == 0) )
if( stageIdx ==
SPLIT_STAGE
&& (ystep == 1 || ((ix | iy) & 1) == 0) )
{
{
int count = atomic_inc(lcount);
int count = atomic_inc(lcount);
lbuf[count] = (int)(ix | (iy << 8));
lbuf[count] = (int)(ix | (iy << 8));
}
}
}
}
for( stageIdx =
splitstage; stageIdx < nstages
; stageIdx++ )
for( stageIdx =
SPLIT_STAGE; stageIdx < N_STAGES
; stageIdx++ )
{
{
int nrects = lcount[0];
int nrects = lcount[0];
...
@@ -639,13 +632,13 @@ void runLBPClassifierStump(
...
@@ -639,13 +632,13 @@ void runLBPClassifierStump(
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if( stageIdx ==
nstages
)
if( stageIdx ==
N_STAGES
)
{
{
int nrects = lcount[0];
int nrects = lcount[0];
if( lidx < nrects )
if( lidx < nrects )
{
{
int nfaces = atomic_inc(facepos);
int nfaces = atomic_inc(facepos);
if( nfaces <
maxFaces
)
if( nfaces <
MAX_FACES
)
{
{
volatile __global int* face = facepos + 1 + nfaces*3;
volatile __global int* face = facepos + 1 + nfaces*3;
int val = lbuf[lidx];
int val = lbuf[lidx];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment