Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
c3f5e737
Commit
c3f5e737
authored
May 30, 2013
by
Roman Donchenko
Committed by
OpenCV Buildbot
May 30, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #930 from pengx17:2.4_haar_ext
parents
0ae40507
fd7ba355
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
291 additions
and
632 deletions
+291
-632
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+1
-1
haar.cpp
modules/ocl/src/haar.cpp
+136
-411
haarobjectdetect.cl
modules/ocl/src/opencl/haarobjectdetect.cl
+71
-181
haarobjectdetect_scaled2.cl
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+57
-31
test_haar.cpp
modules/ocl/test/test_haar.cpp
+26
-8
No files found.
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
c3f5e737
...
...
@@ -817,7 +817,7 @@ namespace cv
OclCascadeClassifierBuf
()
:
m_flags
(
0
),
initialized
(
false
),
m_scaleFactor
(
0
),
buffers
(
NULL
)
{}
~
OclCascadeClassifierBuf
()
{}
~
OclCascadeClassifierBuf
()
{
release
();
}
void
detectMultiScale
(
oclMat
&
image
,
CV_OUT
std
::
vector
<
cv
::
Rect
>&
faces
,
double
scaleFactor
=
1.1
,
int
minNeighbors
=
3
,
int
flags
=
0
,
...
...
modules/ocl/src/haar.cpp
View file @
c3f5e737
...
...
@@ -137,47 +137,22 @@ struct CvHidHaarClassifierCascade
};
typedef
struct
{
//int rows;
//int ystep;
int
width_height
;
//int height;
int
grpnumperline_totalgrp
;
//int totalgrp;
int
imgoff
;
float
factor
;
}
detect_piramid_info
;
#if defined WIN32 && !defined __MINGW__ && !defined __MINGW32__
#ifdef WIN32
#define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT))
typedef
_ALIGNED_ON
(
128
)
struct
GpuHidHaarFeature
{
_ALIGNED_ON
(
32
)
struct
{
_ALIGNED_ON
(
4
)
int
p0
;
_ALIGNED_ON
(
4
)
int
p1
;
_ALIGNED_ON
(
4
)
int
p2
;
_ALIGNED_ON
(
4
)
int
p3
;
_ALIGNED_ON
(
4
)
float
weight
;
}
/*_ALIGNED_ON(32)*/
rect
[
CV_HAAR_FEATURE_MAX
]
;
}
GpuHidHaarFeature
;
typedef
_ALIGNED_ON
(
128
)
struct
GpuHidHaarTreeNode
{
_ALIGNED_ON
(
64
)
int
p
[
CV_HAAR_FEATURE_MAX
][
4
];
//_ALIGNED_ON(16) int p1[CV_HAAR_FEATURE_MAX] ;
//_ALIGNED_ON(16) int p2[CV_HAAR_FEATURE_MAX] ;
//_ALIGNED_ON(16) int p3[CV_HAAR_FEATURE_MAX] ;
/*_ALIGNED_ON(16)*/
float
weight
[
CV_HAAR_FEATURE_MAX
]
;
/*_ALIGNED_ON(4)*/
float
threshold
;
_ALIGNED_ON
(
8
)
float
alpha
[
2
]
;
_ALIGNED_ON
(
16
)
float
alpha
[
3
]
;
_ALIGNED_ON
(
4
)
int
left
;
_ALIGNED_ON
(
4
)
int
right
;
// GpuHidHaarFeature feature __attribute__((aligned (128)));
}
GpuHidHaarTreeNode
;
...
...
@@ -185,7 +160,6 @@ GpuHidHaarTreeNode;
typedef
_ALIGNED_ON
(
32
)
struct
GpuHidHaarClassifier
{
_ALIGNED_ON
(
4
)
int
count
;
//CvHaarFeature* orig_feature;
_ALIGNED_ON
(
8
)
GpuHidHaarTreeNode
*
node
;
_ALIGNED_ON
(
8
)
float
*
alpha
;
}
...
...
@@ -220,32 +194,16 @@ typedef _ALIGNED_ON(64) struct GpuHidHaarClassifierCascade
_ALIGNED_ON
(
4
)
int
p2
;
_ALIGNED_ON
(
4
)
int
p3
;
_ALIGNED_ON
(
4
)
float
inv_window_area
;
// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
}
GpuHidHaarClassifierCascade
;
#else
#define _ALIGNED_ON(_ALIGNMENT) __attribute__((aligned(_ALIGNMENT) ))
typedef
struct
_ALIGNED_ON
(
128
)
GpuHidHaarFeature
{
struct
_ALIGNED_ON
(
32
)
{
int
p0
_ALIGNED_ON
(
4
);
int
p1
_ALIGNED_ON
(
4
);
int
p2
_ALIGNED_ON
(
4
);
int
p3
_ALIGNED_ON
(
4
);
float
weight
_ALIGNED_ON
(
4
);
}
rect
[
CV_HAAR_FEATURE_MAX
]
_ALIGNED_ON
(
32
);
}
GpuHidHaarFeature
;
typedef
struct
_ALIGNED_ON
(
128
)
GpuHidHaarTreeNode
{
int
p
[
CV_HAAR_FEATURE_MAX
][
4
]
_ALIGNED_ON
(
64
);
float
weight
[
CV_HAAR_FEATURE_MAX
];
// _ALIGNED_ON(16);
float
threshold
;
// _ALIGNED_ON(4);
float
alpha
[
2
]
_ALIGNED_ON
(
8
);
float
alpha
[
3
]
_ALIGNED_ON
(
16
);
int
left
_ALIGNED_ON
(
4
);
int
right
_ALIGNED_ON
(
4
);
}
...
...
@@ -288,7 +246,6 @@ typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade
int
p2
_ALIGNED_ON
(
4
);
int
p3
_ALIGNED_ON
(
4
);
float
inv_window_area
_ALIGNED_ON
(
4
);
// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
}
GpuHidHaarClassifierCascade
;
#endif
...
...
@@ -296,36 +253,6 @@ const int icv_object_win_border = 1;
const
float
icv_stage_threshold_bias
=
0.0001
f
;
double
globaltime
=
0
;
// static CvHaarClassifierCascade * gpuCreateHaarClassifierCascade( int stage_count )
// {
// CvHaarClassifierCascade *cascade = 0;
// int block_size = sizeof(*cascade) + stage_count * sizeof(*cascade->stage_classifier);
// if( stage_count <= 0 )
// CV_Error( CV_StsOutOfRange, "Number of stages should be positive" );
// cascade = (CvHaarClassifierCascade *)cvAlloc( block_size );
// memset( cascade, 0, block_size );
// cascade->stage_classifier = (CvHaarStageClassifier *)(cascade + 1);
// cascade->flags = CV_HAAR_MAGIC_VAL;
// cascade->count = stage_count;
// return cascade;
// }
//static int globalcounter = 0;
// static void gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade )
// {
// if( _cascade && *_cascade )
// {
// cvFree( _cascade );
// }
// }
/* create more efficient internal representation of haar classifier cascade */
static
GpuHidHaarClassifierCascade
*
gpuCreateHidHaarClassifierCascade
(
CvHaarClassifierCascade
*
cascade
,
int
*
size
,
int
*
totalclassifier
)
{
...
...
@@ -441,24 +368,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
hid_stage_classifier
->
two_rects
=
1
;
haar_classifier_ptr
+=
stage_classifier
->
count
;
/*
hid_stage_classifier->parent = (stage_classifier->parent == -1)
? NULL : stage_classifier_ptr + stage_classifier->parent;
hid_stage_classifier->next = (stage_classifier->next == -1)
? NULL : stage_classifier_ptr + stage_classifier->next;
hid_stage_classifier->child = (stage_classifier->child == -1)
? NULL : stage_classifier_ptr + stage_classifier->child;
out->is_tree |= hid_stage_classifier->next != NULL;
*/
for
(
j
=
0
;
j
<
stage_classifier
->
count
;
j
++
)
{
CvHaarClassifier
*
classifier
=
stage_classifier
->
classifier
+
j
;
GpuHidHaarClassifier
*
hid_classifier
=
hid_stage_classifier
->
classifier
+
j
;
int
node_count
=
classifier
->
count
;
// float* alpha_ptr = (float*)(haar_node_ptr + node_count);
float
*
alpha_ptr
=
&
haar_node_ptr
->
alpha
[
0
];
hid_classifier
->
count
=
node_count
;
...
...
@@ -485,16 +400,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
node
->
p
[
2
][
3
]
=
0
;
node
->
weight
[
2
]
=
0
;
}
// memset( &(node->feature.rect[2]), 0, sizeof(node->feature.rect[2]) );
else
hid_stage_classifier
->
two_rects
=
0
;
}
memcpy
(
alpha_ptr
,
classifier
->
alpha
,
(
node_count
+
1
)
*
sizeof
(
alpha_ptr
[
0
]));
haar_node_ptr
=
haar_node_ptr
+
1
;
// (GpuHidHaarTreeNode*)cvAlignPtr(alpha_ptr+node_count+1, sizeof(void*));
// (GpuHidHaarTreeNode*)(alpha_ptr+node_count+1);
memcpy
(
node
->
alpha
,
classifier
->
alpha
,
(
node_count
+
1
)
*
sizeof
(
alpha_ptr
[
0
]));
haar_node_ptr
=
haar_node_ptr
+
1
;
}
out
->
is_stump_based
&=
node_count
==
1
;
}
}
...
...
@@ -507,25 +418,19 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
#define sum_elem_ptr(sum,row,col) \
((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype)))
((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype)))
#define sqsum_elem_ptr(sqsum,row,col) \
((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype)))
((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype)))
#define calc_sum(rect,offset) \
((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
static
void
gpuSetImagesForHaarClassifierCascade
(
CvHaarClassifierCascade
*
_cascade
,
/* const CvArr* _sum,
const CvArr* _sqsum,
const CvArr* _tilted_sum,*/
double
scale
,
int
step
)
{
// CvMat sum_stub, *sum = (CvMat*)_sum;
// CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum;
// CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum;
GpuHidHaarClassifierCascade
*
cascade
;
int
coi0
=
0
,
coi1
=
0
;
int
i
;
...
...
@@ -541,61 +446,25 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
if
(
scale
<=
0
)
CV_Error
(
CV_StsOutOfRange
,
"Scale must be positive"
);
// sum = cvGetMat( sum, &sum_stub, &coi0 );
// sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 );
if
(
coi0
||
coi1
)
CV_Error
(
CV_BadCOI
,
"COI is not supported"
);
// if( !CV_ARE_SIZES_EQ( sum, sqsum ))
// CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
// if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 ||
// CV_MAT_TYPE(sum->type) != CV_32SC1 )
// CV_Error( CV_StsUnsupportedFormat,
// "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
if
(
!
_cascade
->
hid_cascade
)
gpuCreateHidHaarClassifierCascade
(
_cascade
,
&
datasize
,
&
total
);
cascade
=
(
GpuHidHaarClassifierCascade
*
)
_cascade
->
hid_cascade
;
stage_classifier
=
(
GpuHidHaarStageClassifier
*
)
(
cascade
+
1
);
if
(
cascade
->
has_tilted_features
)
{
// tilted = cvGetMat( tilted, &tilted_stub, &coi1 );
// if( CV_MAT_TYPE(tilted->type) != CV_32SC1 )
// CV_Error( CV_StsUnsupportedFormat,
// "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
// if( sum->step != tilted->step )
// CV_Error( CV_StsUnmatchedSizes,
// "Sum and tilted_sum must have the same stride (step, widthStep)" );
// if( !CV_ARE_SIZES_EQ( sum, tilted ))
// CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
// cascade->tilted = *tilted;
}
_cascade
->
scale
=
scale
;
_cascade
->
real_window_size
.
width
=
cvRound
(
_cascade
->
orig_window_size
.
width
*
scale
);
_cascade
->
real_window_size
.
height
=
cvRound
(
_cascade
->
orig_window_size
.
height
*
scale
);
//cascade->sum = *sum;
//cascade->sqsum = *sqsum;
equRect
.
x
=
equRect
.
y
=
cvRound
(
scale
);
equRect
.
width
=
cvRound
((
_cascade
->
orig_window_size
.
width
-
2
)
*
scale
);
equRect
.
height
=
cvRound
((
_cascade
->
orig_window_size
.
height
-
2
)
*
scale
);
weight_scale
=
1.
/
(
equRect
.
width
*
equRect
.
height
);
cascade
->
inv_window_area
=
weight_scale
;
// cascade->pq0 = equRect.y * step + equRect.x;
// cascade->pq1 = equRect.y * step + equRect.x + equRect.width ;
// cascade->pq2 = (equRect.y + equRect.height)*step + equRect.x;
// cascade->pq3 = (equRect.y + equRect.height)*step + equRect.x + equRect.width ;
cascade
->
pq0
=
equRect
.
x
;
cascade
->
pq1
=
equRect
.
y
;
cascade
->
pq2
=
equRect
.
x
+
equRect
.
width
;
...
...
@@ -618,10 +487,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
{
CvHaarFeature
*
feature
=
&
_cascade
->
stage_classifier
[
i
].
classifier
[
j
].
haar_feature
[
l
];
/* GpuHidHaarClassifier* classifier =
cascade->stage_classifier[i].classifier + j; */
//GpuHidHaarFeature* hidfeature =
// &cascade->stage_classifier[i].classifier[j].node[l].feature;
GpuHidHaarTreeNode
*
hidnode
=
&
stage_classifier
[
i
].
classifier
[
j
].
node
[
l
];
double
sum0
=
0
,
area0
=
0
;
CvRect
r
[
3
];
...
...
@@ -636,8 +501,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
/* align blocks */
for
(
k
=
0
;
k
<
CV_HAAR_FEATURE_MAX
;
k
++
)
{
//if( !hidfeature->rect[k].p0 )
// break;
if
(
!
hidnode
->
p
[
k
][
0
])
break
;
r
[
k
]
=
feature
->
rect
[
k
].
r
;
...
...
@@ -717,15 +580,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
if
(
!
feature
->
tilted
)
{
/* hidfeature->rect[k].p0 = tr.y * sum->cols + tr.x;
hidfeature->rect[k].p1 = tr.y * sum->cols + tr.x + tr.width;
hidfeature->rect[k].p2 = (tr.y + tr.height) * sum->cols + tr.x;
hidfeature->rect[k].p3 = (tr.y + tr.height) * sum->cols + tr.x + tr.width;
*/
/*hidnode->p0[k] = tr.y * step + tr.x;
hidnode->p1[k] = tr.y * step + tr.x + tr.width;
hidnode->p2[k] = (tr.y + tr.height) * step + tr.x;
hidnode->p3[k] = (tr.y + tr.height) * step + tr.x + tr.width;*/
hidnode
->
p
[
k
][
0
]
=
tr
.
x
;
hidnode
->
p
[
k
][
1
]
=
tr
.
y
;
hidnode
->
p
[
k
][
2
]
=
tr
.
x
+
tr
.
width
;
...
...
@@ -733,37 +587,24 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
}
else
{
/* hidfeature->rect[k].p2 = (tr.y + tr.width) * tilted->cols + tr.x + tr.width;
hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * tilted->cols + tr.x + tr.width - tr.height;
hidfeature->rect[k].p0 = tr.y * tilted->cols + tr.x;
hidfeature->rect[k].p1 = (tr.y + tr.height) * tilted->cols + tr.x - tr.height;
*/
hidnode
->
p
[
k
][
2
]
=
(
tr
.
y
+
tr
.
width
)
*
step
+
tr
.
x
+
tr
.
width
;
hidnode
->
p
[
k
][
3
]
=
(
tr
.
y
+
tr
.
width
+
tr
.
height
)
*
step
+
tr
.
x
+
tr
.
width
-
tr
.
height
;
hidnode
->
p
[
k
][
0
]
=
tr
.
y
*
step
+
tr
.
x
;
hidnode
->
p
[
k
][
1
]
=
(
tr
.
y
+
tr
.
height
)
*
step
+
tr
.
x
-
tr
.
height
;
}
//hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio);
hidnode
->
weight
[
k
]
=
(
float
)(
feature
->
rect
[
k
].
weight
*
correction_ratio
);
if
(
k
==
0
)
area0
=
tr
.
width
*
tr
.
height
;
else
//sum0 += hidfeature->rect[k].weight * tr.width * tr.height;
sum0
+=
hidnode
->
weight
[
k
]
*
tr
.
width
*
tr
.
height
;
}
// hidfeature->rect[0].weight = (float)(-sum0/area0);
hidnode
->
weight
[
0
]
=
(
float
)(
-
sum0
/
area0
);
}
/* l */
}
/* j */
}
}
static
void
gpuSetHaarClassifierCascade
(
CvHaarClassifierCascade
*
_cascade
/*double scale=0.0,*/
/*int step*/
)
static
void
gpuSetHaarClassifierCascade
(
CvHaarClassifierCascade
*
_cascade
)
{
GpuHidHaarClassifierCascade
*
cascade
;
int
i
;
...
...
@@ -817,11 +658,7 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
if
(
!
hidnode
->
p
[
k
][
0
])
break
;
r
[
k
]
=
feature
->
rect
[
k
].
r
;
// base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) );
// base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) );
// base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) );
// base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) );
}
}
nr
=
k
;
for
(
k
=
0
;
k
<
nr
;
k
++
)
...
...
@@ -839,7 +676,6 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
hidnode
->
p
[
k
][
3
]
=
tr
.
height
;
hidnode
->
weight
[
k
]
=
(
float
)(
feature
->
rect
[
k
].
weight
*
correction_ratio
);
}
//hidnode->weight[0]=(float)(-sum0/area0);
}
/* l */
}
/* j */
}
...
...
@@ -852,7 +688,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
const
double
GROUP_EPS
=
0.2
;
CvSeq
*
result_seq
=
0
;
cv
::
Ptr
<
CvMemStorage
>
temp_storage
;
cv
::
ConcurrentRectVector
allCandidates
;
std
::
vector
<
cv
::
Rect
>
rectList
;
...
...
@@ -910,6 +745,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
if
(
gimg
.
cols
<
minSize
.
width
||
gimg
.
rows
<
minSize
.
height
)
CV_Error
(
CV_StsError
,
"Image too small"
);
cl_command_queue
qu
=
reinterpret_cast
<
cl_command_queue
>
(
Context
::
getContext
()
->
oclCommandQueue
());
if
(
(
flags
&
CV_HAAR_SCALE_IMAGE
)
)
{
CvSize
winSize0
=
cascade
->
orig_window_size
;
...
...
@@ -952,7 +788,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
size_t
blocksize
=
8
;
size_t
localThreads
[
3
]
=
{
blocksize
,
blocksize
,
1
};
size_t
globalThreads
[
3
]
=
{
grp_per_CU
*
gsum
.
clCxt
->
computeUnits
(
)
*
localThreads
[
0
],
size_t
globalThreads
[
3
]
=
{
grp_per_CU
*
(
gsum
.
clCxt
->
computeUnits
()
)
*
localThreads
[
0
],
localThreads
[
1
],
1
};
int
outputsz
=
256
*
globalThreads
[
0
]
/
localThreads
[
0
];
...
...
@@ -997,7 +833,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
gpuSetImagesForHaarClassifierCascade
(
cascade
,
1.
,
gsum
.
step
/
4
);
stagebuffer
=
openCLCreateBuffer
(
gsum
.
clCxt
,
CL_MEM_READ_ONLY
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
);
cl_command_queue
qu
=
(
cl_command_queue
)
gsum
.
clCxt
->
oclCommandQueue
();
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
stagebuffer
,
1
,
0
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
,
stage
,
0
,
NULL
,
NULL
));
nodebuffer
=
openCLCreateBuffer
(
gsum
.
clCxt
,
CL_MEM_READ_ONLY
,
nodenum
*
sizeof
(
GpuHidHaarTreeNode
));
...
...
@@ -1044,7 +879,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
pq
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
correction
));
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect
,
"gpuRunHaarClassifierCascade"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
const
char
*
build_options
=
gcascade
->
is_stump_based
?
"-D STUMP_BASED=1"
:
"-D STUMP_BASED=0"
;
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect
,
"gpuRunHaarClassifierCascade"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
openCLReadBuffer
(
gsum
.
clCxt
,
candidatebuffer
,
candidate
,
4
*
sizeof
(
int
)
*
outputsz
);
...
...
@@ -1059,6 +896,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
openCLSafeCall
(
clReleaseMemObject
(
scaleinfobuffer
));
openCLSafeCall
(
clReleaseMemObject
(
nodebuffer
));
openCLSafeCall
(
clReleaseMemObject
(
candidatebuffer
));
}
else
{
...
...
@@ -1118,7 +956,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
-
sizeof
(
GpuHidHaarClassifier
)
*
totalclassifier
)
/
sizeof
(
GpuHidHaarTreeNode
);
nodebuffer
=
openCLCreateBuffer
(
gsum
.
clCxt
,
CL_MEM_READ_ONLY
,
nodenum
*
sizeof
(
GpuHidHaarTreeNode
));
cl_command_queue
qu
=
(
cl_command_queue
)
gsum
.
clCxt
->
oclCommandQueue
();
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
nodebuffer
,
1
,
0
,
nodenum
*
sizeof
(
GpuHidHaarTreeNode
),
node
,
0
,
NULL
,
NULL
));
...
...
@@ -1160,7 +997,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
args1
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
startnodenum
));
size_t
globalThreads2
[
3
]
=
{
nodenum
,
1
,
1
};
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuscaleclassifier"
,
globalThreads2
,
NULL
/*localThreads2*/
,
args1
,
-
1
,
-
1
);
}
...
...
@@ -1195,8 +1031,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
pbuffer
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
correctionbuffer
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
nodenum
));
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuRunHaarClassifierCascade_scaled2"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
const
char
*
build_options
=
gcascade
->
is_stump_based
?
"-D STUMP_BASED=1"
:
"-D STUMP_BASED=0"
;
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuRunHaarClassifierCascade_scaled2"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
candidate
=
(
int
*
)
clEnqueueMapBuffer
(
qu
,
candidatebuffer
,
1
,
CL_MAP_READ
,
0
,
4
*
sizeof
(
int
)
*
outputsz
,
0
,
0
,
0
,
&
status
);
...
...
@@ -1284,7 +1120,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
int
blocksize
=
8
;
int
grp_per_CU
=
12
;
size_t
localThreads
[
3
]
=
{
blocksize
,
blocksize
,
1
};
size_t
globalThreads
[
3
]
=
{
grp_per_CU
*
Context
::
getContext
()
->
computeUnits
()
*
localThreads
[
0
],
size_t
globalThreads
[
3
]
=
{
grp_per_CU
*
cv
::
ocl
::
Context
::
getContext
()
->
computeUnits
()
*
localThreads
[
0
],
localThreads
[
1
],
1
};
int
outputsz
=
256
*
globalThreads
[
0
]
/
localThreads
[
0
];
...
...
@@ -1300,8 +1136,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
CvHaarClassifierCascade
*
cascade
=
oldCascade
;
GpuHidHaarClassifierCascade
*
gcascade
;
GpuHidHaarStageClassifier
*
stage
;
GpuHidHaarClassifier
*
classifier
;
GpuHidHaarTreeNode
*
node
;
if
(
CV_MAT_DEPTH
(
gimg
.
type
())
!=
CV_8U
)
CV_Error
(
CV_StsUnsupportedFormat
,
"Only 8-bit images are supported"
);
...
...
@@ -1314,7 +1148,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
}
int
*
candidate
;
cl_command_queue
qu
=
reinterpret_cast
<
cl_command_queue
>
(
Context
::
getContext
()
->
oclCommandQueue
());
if
(
(
flags
&
CV_HAAR_SCALE_IMAGE
)
)
{
int
indexy
=
0
;
...
...
@@ -1340,19 +1174,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
gcascade
=
(
GpuHidHaarClassifierCascade
*
)(
cascade
->
hid_cascade
);
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
classifier
=
(
GpuHidHaarClassifier
*
)(
stage
+
gcascade
->
count
);
node
=
(
GpuHidHaarTreeNode
*
)(
classifier
->
node
);
gpuSetImagesForHaarClassifierCascade
(
cascade
,
1.
,
gsum
.
step
/
4
);
cl_command_queue
qu
=
(
cl_command_queue
)
gsum
.
clCxt
->
oclCommandQueue
();
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
stagebuffer
,
1
,
0
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
,
stage
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
nodebuffer
,
1
,
0
,
m_nodenum
*
sizeof
(
GpuHidHaarTreeNode
),
node
,
0
,
NULL
,
NULL
));
int
startstage
=
0
;
int
endstage
=
gcascade
->
count
;
...
...
@@ -1389,17 +1210,23 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
pq
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
correction
));
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect
,
"gpuRunHaarClassifierCascade"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
const
char
*
build_options
=
gcascade
->
is_stump_based
?
"-D STUMP_BASED=1"
:
"-D STUMP_BASED=0"
;
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect
,
"gpuRunHaarClassifierCascade"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
candidate
=
(
int
*
)
malloc
(
4
*
sizeof
(
int
)
*
outputsz
);
memset
(
candidate
,
0
,
4
*
sizeof
(
int
)
*
outputsz
);
openCLReadBuffer
(
gsum
.
clCxt
,
((
OclBuffers
*
)
buffers
)
->
candidatebuffer
,
candidate
,
4
*
sizeof
(
int
)
*
outputsz
);
for
(
int
i
=
0
;
i
<
outputsz
;
i
++
)
{
if
(
candidate
[
4
*
i
+
2
]
!=
0
)
{
allCandidates
.
push_back
(
Rect
(
candidate
[
4
*
i
],
candidate
[
4
*
i
+
1
],
candidate
[
4
*
i
+
2
],
candidate
[
4
*
i
+
3
]));
}
}
free
((
void
*
)
candidate
);
candidate
=
NULL
;
}
...
...
@@ -1407,56 +1234,14 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
{
cv
::
ocl
::
integral
(
gimg
,
gsum
,
gsqsum
);
gpuSetHaarClassifierCascade
(
cascade
);
gcascade
=
(
GpuHidHaarClassifierCascade
*
)
cascade
->
hid_cascade
;
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
classifier
=
(
GpuHidHaarClassifier
*
)(
stage
+
gcascade
->
count
);
node
=
(
GpuHidHaarTreeNode
*
)(
classifier
->
node
);
cl_command_queue
qu
=
(
cl_command_queue
)
gsum
.
clCxt
->
oclCommandQueue
();
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
nodebuffer
,
1
,
0
,
m_nodenum
*
sizeof
(
GpuHidHaarTreeNode
),
node
,
0
,
NULL
,
NULL
));
cl_int4
*
p
=
(
cl_int4
*
)
malloc
(
sizeof
(
cl_int4
)
*
m_loopcount
);
float
*
correction
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
m_loopcount
);
int
startstage
=
0
;
int
endstage
=
gcascade
->
count
;
double
factor
;
for
(
int
i
=
0
;
i
<
m_loopcount
;
i
++
)
{
factor
=
scalev
[
i
];
int
equRect_x
=
(
int
)(
factor
*
gcascade
->
p0
+
0.5
);
int
equRect_y
=
(
int
)(
factor
*
gcascade
->
p1
+
0.5
);
int
equRect_w
=
(
int
)(
factor
*
gcascade
->
p3
+
0.5
);
int
equRect_h
=
(
int
)(
factor
*
gcascade
->
p2
+
0.5
);
p
[
i
].
s
[
0
]
=
equRect_x
;
p
[
i
].
s
[
1
]
=
equRect_y
;
p
[
i
].
s
[
2
]
=
equRect_x
+
equRect_w
;
p
[
i
].
s
[
3
]
=
equRect_y
+
equRect_h
;
correction
[
i
]
=
1.
/
(
equRect_w
*
equRect_h
);
int
startnodenum
=
m_nodenum
*
i
;
float
factor2
=
(
float
)
factor
;
vector
<
pair
<
size_t
,
const
void
*>
>
args1
;
args1
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
nodebuffer
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
newnodebuffer
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
factor2
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
correction
[
i
]
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
startnodenum
));
size_t
globalThreads2
[
3
]
=
{
m_nodenum
,
1
,
1
};
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuscaleclassifier"
,
globalThreads2
,
NULL
/*localThreads2*/
,
args1
,
-
1
,
-
1
);
}
int
step
=
gsum
.
step
/
4
;
int
startnode
=
0
;
int
splitstage
=
3
;
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
stagebuffer
,
1
,
0
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
,
stage
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
pbuffer
,
1
,
0
,
sizeof
(
cl_int4
)
*
m_loopcount
,
p
,
0
,
NULL
,
NULL
))
;
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
correctionbuffer
,
1
,
0
,
sizeof
(
cl_float
)
*
m_loopcount
,
correction
,
0
,
NULL
,
NULL
))
;
int
startstage
=
0
;
int
endstage
=
gcascade
->
count
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
stagebuffer
));
...
...
@@ -1477,7 +1262,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
correctionbuffer
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
m_nodenum
));
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuRunHaarClassifierCascade_scaled2"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
const
char
*
build_options
=
gcascade
->
is_stump_based
?
"-D STUMP_BASED=1"
:
"-D STUMP_BASED=0"
;
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuRunHaarClassifierCascade_scaled2"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
candidate
=
(
int
*
)
clEnqueueMapBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
candidatebuffer
,
1
,
CL_MAP_READ
,
0
,
4
*
sizeof
(
int
)
*
outputsz
,
0
,
0
,
0
,
NULL
);
...
...
@@ -1487,12 +1273,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
allCandidates
.
push_back
(
Rect
(
candidate
[
4
*
i
],
candidate
[
4
*
i
+
1
],
candidate
[
4
*
i
+
2
],
candidate
[
4
*
i
+
3
]));
}
free
(
p
);
free
(
correction
);
clEnqueueUnmapMemObject
(
qu
,
((
OclBuffers
*
)
buffers
)
->
candidatebuffer
,
candidate
,
0
,
0
,
0
);
}
rectList
.
resize
(
allCandidates
.
size
());
if
(
!
allCandidates
.
empty
())
std
::
copy
(
allCandidates
.
begin
(),
allCandidates
.
end
(),
rectList
.
begin
());
...
...
@@ -1510,6 +1292,10 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
const
int
outputsz
,
const
size_t
localThreads
[],
CvSize
minSize
,
CvSize
maxSize
)
{
if
(
initialized
)
{
return
;
// we only allow one time initialization
}
CvHaarClassifierCascade
*
cascade
=
oldCascade
;
if
(
!
CV_IS_HAAR_CLASSIFIER
(
cascade
)
)
...
...
@@ -1525,7 +1311,9 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
int
totalclassifier
=
0
;
if
(
!
cascade
->
hid_cascade
)
{
gpuCreateHidHaarClassifierCascade
(
cascade
,
&
datasize
,
&
totalclassifier
);
}
if
(
maxSize
.
height
==
0
||
maxSize
.
width
==
0
)
{
...
...
@@ -1547,6 +1335,78 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
m_minSize
=
minSize
;
m_maxSize
=
maxSize
;
// initialize nodes
GpuHidHaarClassifierCascade
*
gcascade
;
GpuHidHaarStageClassifier
*
stage
;
GpuHidHaarClassifier
*
classifier
;
GpuHidHaarTreeNode
*
node
;
cl_command_queue
qu
=
reinterpret_cast
<
cl_command_queue
>
(
Context
::
getContext
()
->
oclCommandQueue
());
if
(
(
flags
&
CV_HAAR_SCALE_IMAGE
)
)
{
gcascade
=
(
GpuHidHaarClassifierCascade
*
)(
cascade
->
hid_cascade
);
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
classifier
=
(
GpuHidHaarClassifier
*
)(
stage
+
gcascade
->
count
);
node
=
(
GpuHidHaarTreeNode
*
)(
classifier
->
node
);
gpuSetImagesForHaarClassifierCascade
(
cascade
,
1.
,
gsum
.
step
/
4
);
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
stagebuffer
,
1
,
0
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
,
stage
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
nodebuffer
,
1
,
0
,
m_nodenum
*
sizeof
(
GpuHidHaarTreeNode
),
node
,
0
,
NULL
,
NULL
));
}
else
{
gpuSetHaarClassifierCascade
(
cascade
);
gcascade
=
(
GpuHidHaarClassifierCascade
*
)
cascade
->
hid_cascade
;
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
classifier
=
(
GpuHidHaarClassifier
*
)(
stage
+
gcascade
->
count
);
node
=
(
GpuHidHaarTreeNode
*
)(
classifier
->
node
);
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
nodebuffer
,
1
,
0
,
m_nodenum
*
sizeof
(
GpuHidHaarTreeNode
),
node
,
0
,
NULL
,
NULL
));
cl_int4
*
p
=
(
cl_int4
*
)
malloc
(
sizeof
(
cl_int4
)
*
m_loopcount
);
float
*
correction
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
m_loopcount
);
double
factor
;
for
(
int
i
=
0
;
i
<
m_loopcount
;
i
++
)
{
factor
=
scalev
[
i
];
int
equRect_x
=
(
int
)(
factor
*
gcascade
->
p0
+
0.5
);
int
equRect_y
=
(
int
)(
factor
*
gcascade
->
p1
+
0.5
);
int
equRect_w
=
(
int
)(
factor
*
gcascade
->
p3
+
0.5
);
int
equRect_h
=
(
int
)(
factor
*
gcascade
->
p2
+
0.5
);
p
[
i
].
s
[
0
]
=
equRect_x
;
p
[
i
].
s
[
1
]
=
equRect_y
;
p
[
i
].
s
[
2
]
=
equRect_x
+
equRect_w
;
p
[
i
].
s
[
3
]
=
equRect_y
+
equRect_h
;
correction
[
i
]
=
1.
/
(
equRect_w
*
equRect_h
);
int
startnodenum
=
m_nodenum
*
i
;
float
factor2
=
(
float
)
factor
;
vector
<
pair
<
size_t
,
const
void
*>
>
args1
;
args1
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
nodebuffer
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
newnodebuffer
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
factor2
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
correction
[
i
]
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
startnodenum
));
size_t
globalThreads2
[
3
]
=
{
m_nodenum
,
1
,
1
};
openCLExecuteKernel
(
Context
::
getContext
(),
&
haarobjectdetect_scaled2
,
"gpuscaleclassifier"
,
globalThreads2
,
NULL
/*localThreads2*/
,
args1
,
-
1
,
-
1
);
}
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
stagebuffer
,
1
,
0
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
,
stage
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
pbuffer
,
1
,
0
,
sizeof
(
cl_int4
)
*
m_loopcount
,
p
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
correctionbuffer
,
1
,
0
,
sizeof
(
cl_float
)
*
m_loopcount
,
correction
,
0
,
NULL
,
NULL
));
free
(
p
);
free
(
correction
);
}
initialized
=
true
;
}
...
...
@@ -1645,6 +1505,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
CvSize
sz
;
CvSize
winSize0
=
oldCascade
->
orig_window_size
;
detect_piramid_info
*
scaleinfo
;
cl_command_queue
qu
=
reinterpret_cast
<
cl_command_queue
>
(
Context
::
getContext
()
->
oclCommandQueue
());
if
(
flags
&
CV_HAAR_SCALE_IMAGE
)
{
for
(
factor
=
1.
f
;;
factor
*=
scaleFactor
)
...
...
@@ -1746,7 +1607,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
((
OclBuffers
*
)
buffers
)
->
scaleinfobuffer
=
openCLCreateBuffer
(
cv
::
ocl
::
Context
::
getContext
(),
CL_MEM_READ_ONLY
,
sizeof
(
detect_piramid_info
)
*
loopcount
);
}
openCLSafeCall
(
clEnqueueWriteBuffer
(
(
cl_command_queue
)
cv
::
ocl
::
Context
::
getContext
()
->
oclCommandQueue
()
,
((
OclBuffers
*
)
buffers
)
->
scaleinfobuffer
,
1
,
0
,
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
scaleinfobuffer
,
1
,
0
,
sizeof
(
detect_piramid_info
)
*
loopcount
,
scaleinfo
,
0
,
NULL
,
NULL
));
free
(
scaleinfo
);
...
...
@@ -1758,7 +1619,8 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
const
std
::
vector
<
cv
::
Rect
>
&
rectList
,
const
std
::
vector
<
int
>
&
rweights
)
{
CvSeq
*
result_seq
=
cvCreateSeq
(
0
,
sizeof
(
CvSeq
),
sizeof
(
CvAvgComp
),
cvCreateMemStorage
(
0
)
);
MemStorage
tempStorage
(
cvCreateMemStorage
(
0
));
CvSeq
*
result_seq
=
cvCreateSeq
(
0
,
sizeof
(
CvSeq
),
sizeof
(
CvAvgComp
),
tempStorage
);
if
(
findBiggestObject
&&
rectList
.
size
()
)
{
...
...
@@ -1794,167 +1656,30 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
void
cv
::
ocl
::
OclCascadeClassifierBuf
::
release
()
{
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
stagebuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
scaleinfobuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
nodebuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
candidatebuffer
));
if
(
(
m_flags
&
CV_HAAR_SCALE_IMAGE
)
)
{
cvFree
(
&
oldCascade
->
hid_cascade
);
}
else
if
(
initialized
)
{
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
newnodebuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
correctionbuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
pbuffer
));
}
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
stagebuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
scaleinfobuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
nodebuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
candidatebuffer
));
if
(
(
m_flags
&
CV_HAAR_SCALE_IMAGE
)
)
{
cvFree
(
&
oldCascade
->
hid_cascade
);
}
else
{
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
newnodebuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
correctionbuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
pbuffer
));
}
free
(
buffers
);
buffers
=
NULL
;
free
(
buffers
);
buffers
=
NULL
;
initialized
=
false
;
}
}
#ifndef _MAX_PATH
#define _MAX_PATH 1024
#endif
/****************************************************************************************\
* Persistence functions *
\****************************************************************************************/
/* field names */
#define ICV_HAAR_SIZE_NAME "size"
#define ICV_HAAR_STAGES_NAME "stages"
#define ICV_HAAR_TREES_NAME "trees"
#define ICV_HAAR_FEATURE_NAME "feature"
#define ICV_HAAR_RECTS_NAME "rects"
#define ICV_HAAR_TILTED_NAME "tilted"
#define ICV_HAAR_THRESHOLD_NAME "threshold"
#define ICV_HAAR_LEFT_NODE_NAME "left_node"
#define ICV_HAAR_LEFT_VAL_NAME "left_val"
#define ICV_HAAR_RIGHT_NODE_NAME "right_node"
#define ICV_HAAR_RIGHT_VAL_NAME "right_val"
#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold"
#define ICV_HAAR_PARENT_NAME "parent"
#define ICV_HAAR_NEXT_NAME "next"
static
int
gpuRunHaarClassifierCascade
(
/*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */
)
{
return
1
;
}
namespace
cv
{
namespace
ocl
{
struct
gpuHaarDetectObjects_ScaleImage_Invoker
{
gpuHaarDetectObjects_ScaleImage_Invoker
(
const
CvHaarClassifierCascade
*
_cascade
,
int
_stripSize
,
double
_factor
,
const
Mat
&
_sum1
,
const
Mat
&
_sqsum1
,
Mat
*
_norm1
,
Mat
*
_mask1
,
Rect
_equRect
,
ConcurrentRectVector
&
_vec
)
{
cascade
=
_cascade
;
stripSize
=
_stripSize
;
factor
=
_factor
;
sum1
=
_sum1
;
sqsum1
=
_sqsum1
;
norm1
=
_norm1
;
mask1
=
_mask1
;
equRect
=
_equRect
;
vec
=
&
_vec
;
}
void
operator
()(
const
BlockedRange
&
range
)
const
{
Size
winSize0
=
cascade
->
orig_window_size
;
Size
winSize
(
cvRound
(
winSize0
.
width
*
factor
),
cvRound
(
winSize0
.
height
*
factor
));
int
y1
=
range
.
begin
()
*
stripSize
,
y2
=
min
(
range
.
end
()
*
stripSize
,
sum1
.
rows
-
1
-
winSize0
.
height
);
Size
ssz
(
sum1
.
cols
-
1
-
winSize0
.
width
,
y2
-
y1
);
int
x
,
y
,
ystep
=
factor
>
2
?
1
:
2
;
for
(
y
=
y1
;
y
<
y2
;
y
+=
ystep
)
for
(
x
=
0
;
x
<
ssz
.
width
;
x
+=
ystep
)
{
if
(
gpuRunHaarClassifierCascade
(
/*cascade, cvPoint(x, y), 0*/
)
>
0
)
vec
->
push_back
(
Rect
(
cvRound
(
x
*
factor
),
cvRound
(
y
*
factor
),
winSize
.
width
,
winSize
.
height
));
}
}
const
CvHaarClassifierCascade
*
cascade
;
int
stripSize
;
double
factor
;
Mat
sum1
,
sqsum1
,
*
norm1
,
*
mask1
;
Rect
equRect
;
ConcurrentRectVector
*
vec
;
};
struct
gpuHaarDetectObjects_ScaleCascade_Invoker
{
gpuHaarDetectObjects_ScaleCascade_Invoker
(
const
CvHaarClassifierCascade
*
_cascade
,
Size
_winsize
,
const
Range
&
_xrange
,
double
_ystep
,
size_t
_sumstep
,
const
int
**
_p
,
const
int
**
_pq
,
ConcurrentRectVector
&
_vec
)
{
cascade
=
_cascade
;
winsize
=
_winsize
;
xrange
=
_xrange
;
ystep
=
_ystep
;
sumstep
=
_sumstep
;
p
=
_p
;
pq
=
_pq
;
vec
=
&
_vec
;
}
void
operator
()(
const
BlockedRange
&
range
)
const
{
int
iy
,
startY
=
range
.
begin
(),
endY
=
range
.
end
();
const
int
*
p0
=
p
[
0
],
*
p1
=
p
[
1
],
*
p2
=
p
[
2
],
*
p3
=
p
[
3
];
const
int
*
pq0
=
pq
[
0
],
*
pq1
=
pq
[
1
],
*
pq2
=
pq
[
2
],
*
pq3
=
pq
[
3
];
bool
doCannyPruning
=
p0
!=
0
;
int
sstep
=
(
int
)(
sumstep
/
sizeof
(
p0
[
0
]));
for
(
iy
=
startY
;
iy
<
endY
;
iy
++
)
{
int
ix
,
y
=
cvRound
(
iy
*
ystep
),
ixstep
=
1
;
for
(
ix
=
xrange
.
start
;
ix
<
xrange
.
end
;
ix
+=
ixstep
)
{
int
x
=
cvRound
(
ix
*
ystep
);
// it should really be ystep, not ixstep
if
(
doCannyPruning
)
{
int
offset
=
y
*
sstep
+
x
;
int
s
=
p0
[
offset
]
-
p1
[
offset
]
-
p2
[
offset
]
+
p3
[
offset
];
int
sq
=
pq0
[
offset
]
-
pq1
[
offset
]
-
pq2
[
offset
]
+
pq3
[
offset
];
if
(
s
<
100
||
sq
<
20
)
{
ixstep
=
2
;
continue
;
}
}
int
result
=
gpuRunHaarClassifierCascade
(
/* cascade, cvPoint(x, y), 0 */
);
if
(
result
>
0
)
vec
->
push_back
(
Rect
(
x
,
y
,
winsize
.
width
,
winsize
.
height
));
ixstep
=
result
!=
0
?
1
:
2
;
}
}
}
const
CvHaarClassifierCascade
*
cascade
;
double
ystep
;
size_t
sumstep
;
Size
winsize
;
Range
xrange
;
const
int
**
p
;
const
int
**
pq
;
ConcurrentRectVector
*
vec
;
};
}
}
modules/ocl/src/opencl/haarobjectdetect.cl
View file @
c3f5e737
...
...
@@ -10,6 +10,7 @@
//
Wang
Weiyan,
wangweiyanster@gmail.com
//
Jia
Haipeng,
jiahaipeng95@gmail.com
//
Nathan,
liujun@multicorewareinc.com
//
Peng
Xiao,
pengxiao@outlook.com
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
...
...
@@ -45,27 +46,16 @@
typedef
int
sumtype
;
typedef
float
sqsumtype
;
typedef
struct
__attribute__
((
aligned
(
128
)))
GpuHidHaarFeature
{
struct
__attribute__
((
aligned
(
32
)))
{
int
p0
__attribute__
((
aligned
(
4
)))
;
int
p1
__attribute__
((
aligned
(
4
)))
;
int
p2
__attribute__
((
aligned
(
4
)))
;
int
p3
__attribute__
((
aligned
(
4
)))
;
float
weight
__attribute__
((
aligned
(
4
)))
;
}
rect[CV_HAAR_FEATURE_MAX]
__attribute__
((
aligned
(
32
)))
;
}
GpuHidHaarFeature
;
#
ifndef
STUMP_BASED
#
define
STUMP_BASED
1
#
endif
typedef
struct
__attribute__
((
aligned
(
128
)
))
GpuHidHaarTreeNode
{
int
p[CV_HAAR_FEATURE_MAX][4]
__attribute__
((
aligned
(
64
)))
;
float
weight[CV_HAAR_FEATURE_MAX]
/*__attribute__
((
aligned
(
16
)))
*/
;
float
threshold
/*__attribute__
((
aligned
(
4
)))
*/
;
float
alpha[
2]
__attribute__
((
aligned
(
8
)))
;
float
weight[CV_HAAR_FEATURE_MAX]
;
float
threshold
;
float
alpha[
3]
__attribute__
((
aligned
(
16
)))
;
int
left
__attribute__
((
aligned
(
4
)))
;
int
right
__attribute__
((
aligned
(
4
)))
;
}
...
...
@@ -111,7 +101,6 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
float
inv_window_area
__attribute__
((
aligned
(
4
)))
;
}
GpuHidHaarClassifierCascade
;
__kernel
void
__attribute__
((
reqd_work_group_size
(
8
,
8
,
1
)))
gpuRunHaarClassifierCascade
(
global
GpuHidHaarStageClassifier
*
stagecascadeptr,
global
int4
*
info,
...
...
@@ -234,7 +223,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
float
stage_sum
=
0.f
;
int2
stageinfo
=
*
(
global
int2*
)(
stagecascadeptr+stageloop
)
;
float
stagethreshold
=
as_float
(
stageinfo.y
)
;
for
(
int
nodeloop
=
0
; nodeloop < stageinfo.x;
nodeloop++
)
for
(
int
nodeloop
=
0
; nodeloop < stageinfo.x; )
{
__global
GpuHidHaarTreeNode*
currentnodeptr
=
(
nodeptr
+
nodecounter
)
;
...
...
@@ -242,7 +231,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int4
info2
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info3
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[2][0]
))
;
float4
w
=
*
(
__global
float4*
)(
&
(
currentnodeptr->weight[0]
))
;
float2
alpha2
=
*
(
__global
float2*
)(
&
(
currentnodeptr->alpha[0]
))
;
float3
alpha3
=
*
(
__global
float3*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
info1.x
+=lcl_off
;
...
...
@@ -261,8 +251,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
classsum
+=
(
lcldata[mad24
(
info3.y,readwidth,info3.x
)
]
-
lcldata[mad24
(
info3.y,readwidth,info3.z
)
]
-
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
stage_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
bool
passThres
=
classsum
>=
nodethreshold
;
#
if
STUMP_BASED
stage_sum
+=
passThres
?
alpha3.y
:
alpha3.x
;
nodecounter++
;
nodeloop++
;
#
else
bool
isRootNode
=
(
nodecounter
&
1
)
==
0
;
if
(
isRootNode
)
{
if
(
(
passThres
&&
currentnodeptr->right
)
|
|
(!passThres && currentnodeptr->left))
{
nodecounter ++;
}
else
{
stage_sum += alpha3.x;
nodecounter += 2;
nodeloop ++;
}
}
else
{
stage_sum += passThres ? alpha3.z : alpha3.y;
nodecounter ++;
nodeloop ++;
}
#endif
}
result = (stage_sum >= stagethreshold);
...
...
@@ -301,18 +317,20 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
if
(
lcl_compute_win_id
<
queuecount
)
{
int
tempnodecounter
=
lcl_compute_id
;
float
part_sum
=
0.f
;
for
(
int
lcl_loop=0
; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x; lcl_loop++)
const
int
stump_factor
=
STUMP_BASED
?
1
:
2
;
int
root_offset
=
0
;
for
(
int
lcl_loop=0
; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x;)
{
__global
GpuHidHaarTreeNode*
currentnodeptr
=
(
nodeptr
+
nodecounter
+
tempnodecounter
)
;
__global
GpuHidHaarTreeNode*
currentnodeptr
=
nodeptr
+
(
nodecounter
+
tempnodecounter
)
*
stump_factor
+
root_offset
;
int4
info1
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[0][0]
))
;
int4
info2
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info3
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[2][0]
))
;
float4
w
=
*
(
__global
float4*
)(
&
(
currentnodeptr->weight[0]
))
;
float
2
alpha2
=
*
(
__global
float2
*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
3
alpha3
=
*
(
__global
float3
*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
info1.x
+=queue_pixel
;
...
...
@@ -332,8 +350,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
classsum
+=
(
lcldata[mad24
(
info3.y,readwidth,info3.x
)
]
-
lcldata[mad24
(
info3.y,readwidth,info3.z
)
]
-
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
part_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
tempnodecounter
+=lcl_compute_win
;
bool
passThres
=
classsum
>=
nodethreshold
;
#
if
STUMP_BASED
part_sum
+=
passThres
?
alpha3.y
:
alpha3.x
;
tempnodecounter
+=
lcl_compute_win
;
lcl_loop++
;
#
else
if
(
root_offset
==
0
)
{
if
(
(
passThres
&&
currentnodeptr->right
)
||
(
!passThres
&&
currentnodeptr->left
))
{
root_offset
=
1
;
}
else
{
part_sum
+=
alpha3.x
;
tempnodecounter
+=
lcl_compute_win
;
lcl_loop++
;
}
}
else
{
part_sum
+=
passThres
?
alpha3.z
:
alpha3.y
;
tempnodecounter
+=
lcl_compute_win
;
lcl_loop++
;
root_offset
=
0
;
}
#
endif
}//end
for
(
int
lcl_loop=0
;lcl_loop<lcl_loops;lcl_loop++)
partialsum[lcl_id]=part_sum
;
}
...
...
@@ -379,157 +423,3 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
}
/*
if
(
stagecascade->two_rects
)
{
#
pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t1
=
*
(
node
+
counter
)
;
t
=
t1.threshold
*
variance_norm_factor
;
classsum
=
calc_sum1
(
t1,p_offset,0
)
*
t1.weight[0]
;
classsum
+=
calc_sum1
(
t1,
p_offset,1
)
*
t1.weight[1]
;
stage_sum
+=
classsum
>=
t
?
t1.alpha[1]:t1.alpha[0]
;
counter++
;
}
}
else
{
#
pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t
=
node[counter].threshold*variance_norm_factor
;
classsum
=
calc_sum1
(
node[counter],p_offset,0
)
*
node[counter].weight[0]
;
classsum
+=
calc_sum1
(
node[counter],p_offset,1
)
*
node[counter].weight[1]
;
if
(
node[counter].p0[2]
)
classsum
+=
calc_sum1
(
node[counter],p_offset,2
)
*
node[counter].weight[2]
;
stage_sum
+=
classsum
>=
t
?
node[counter].alpha[1]:node[counter].alpha[0]
;// modify
counter++
;
}
}
*/
/*
__kernel
void
gpuRunHaarClassifierCascade_ScaleWindow
(
constant
GpuHidHaarClassifierCascade
*
_cascade,
global
GpuHidHaarStageClassifier
*
stagecascadeptr,
//global
GpuHidHaarClassifier
*
classifierptr,
global
GpuHidHaarTreeNode
*
nodeptr,
global
int
*
sum,
global
float
*
sqsum,
global
int
*
_candidate,
int
pixel_step,
int
cols,
int
rows,
int
start_stage,
int
end_stage,
//int
counts,
int
nodenum,
int
ystep,
int
detect_width,
//int
detect_height,
int
loopcount,
int
outputstep
)
//float
scalefactor
)
{
unsigned
int
x1
=
get_global_id
(
0
)
;
unsigned
int
y1
=
get_global_id
(
1
)
;
int
p_offset
;
int
m,
n
;
int
result
;
int
counter
;
float
mean,
variance_norm_factor
;
for
(
int
i=0
;i<loopcount;i++)
{
constant
GpuHidHaarClassifierCascade
*
cascade
=
_cascade
+
i
;
global
int
*
candidate
=
_candidate
+
i*outputstep
;
int
window_width
=
cascade->p1
-
cascade->p0
;
int
window_height
=
window_width
;
result
=
1
;
counter
=
0
;
unsigned
int
x
=
mul24
(
x1,ystep
)
;
unsigned
int
y
=
mul24
(
y1,ystep
)
;
if
((
x
<
cols
-
window_width
-
1
)
&&
(
y
<
rows
-
window_height
-1
))
{
global
GpuHidHaarStageClassifier
*stagecascade
=
stagecascadeptr
+cascade->count*i+
start_stage
;
//global
GpuHidHaarClassifier
*classifier
=
classifierptr
;
global
GpuHidHaarTreeNode
*node
=
nodeptr
+
nodenum*i
;
p_offset
=
mad24
(
y,
pixel_step,
x
)
;// modify
mean
=
(
*
(
sum
+
p_offset
+
(
int
)
cascade->p0
)
-
*
(
sum
+
p_offset
+
(
int
)
cascade->p1
)
-
*
(
sum
+
p_offset
+
(
int
)
cascade->p2
)
+
*
(
sum
+
p_offset
+
(
int
)
cascade->p3
))
*cascade->inv_window_area
;
variance_norm_factor
=
*
(
sqsum
+
p_offset
+
cascade->p0
)
-
*
(
sqsum
+
cascade->p1
+
p_offset
)
-
*
(
sqsum
+
p_offset
+
cascade->p2
)
+
*
(
sqsum
+
cascade->p3
+
p_offset
)
;
variance_norm_factor
=
variance_norm_factor
*
cascade->inv_window_area
-
mean
*
mean
;
variance_norm_factor
=
variance_norm_factor
>=0.f
?
sqrt
(
variance_norm_factor
)
:
1
;//modify
//
if
(
cascade->is_stump_based
)
//{
for
(
m
=
start_stage
; m < end_stage; m++ )
{
float
stage_sum
=
0.f
;
float
t
,
classsum
;
GpuHidHaarTreeNode
t1
;
//#pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t1
=
*
(
node
+
counter
)
;
t
=
t1.threshold
*
variance_norm_factor
;
classsum
=
calc_sum1
(
t1,
p_offset
,
0
)
*
t1.weight[0]
+
calc_sum1
(
t1,
p_offset
,
1
)
*
t1.weight[1]
;
if
((
t1.p0[2]
)
&&
(
!stagecascade->two_rects
))
classsum
+=
calc_sum1
(
t1,
p_offset,
2
)
*
t1.weight[2]
;
stage_sum
+=
classsum
>=
t
?
t1.alpha[1]
:
t1.alpha[0]
;// modify
counter++
;
}
if
(
stage_sum
<
stagecascade->threshold
)
{
result
=
0
;
break
;
}
stagecascade++
;
}
if
(
result
)
{
candidate[4
*
(
y1
*
detect_width
+
x1
)
]
=
x
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+
1]
=
y
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+2]
=
window_width
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+
3]
=
window_height
;
}
//}
}
}
}
*/
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
View file @
c3f5e737
...
...
@@ -17,7 +17,7 @@
//
@Authors
//
Wu
Xinglong,
wxl370@126.com
//
Sen
Liu,
swjtuls1987@126.com
//
//
Peng
Xiao,
pengxiao@outlook.com
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
...
...
@@ -49,25 +49,13 @@
#
define
CV_HAAR_FEATURE_MAX
3
typedef
int
sumtype
;
typedef
float
sqsumtype
;
typedef
struct
__attribute__
((
aligned
(
128
)))
GpuHidHaarFeature
{
struct
__attribute__
((
aligned
(
32
)))
{
int
p0
__attribute__
((
aligned
(
4
)))
;
int
p1
__attribute__
((
aligned
(
4
)))
;
int
p2
__attribute__
((
aligned
(
4
)))
;
int
p3
__attribute__
((
aligned
(
4
)))
;
float
weight
__attribute__
((
aligned
(
4
)))
;
}
rect[CV_HAAR_FEATURE_MAX]
__attribute__
((
aligned
(
32
)))
;
}
GpuHidHaarFeature
;
typedef
struct
__attribute__
((
aligned
(
128
)))
GpuHidHaarTreeNode
{
int
p[CV_HAAR_FEATURE_MAX][4]
__attribute__
((
aligned
(
64
)))
;
float
weight[CV_HAAR_FEATURE_MAX]
/*__attribute__
((
aligned
(
16
)))
*/
;
float
threshold
/*__attribute__
((
aligned
(
4
)))
*/
;
float
alpha[
2]
__attribute__
((
aligned
(
8
)))
;
float
alpha[
3]
__attribute__
((
aligned
(
16
)))
;
int
left
__attribute__
((
aligned
(
4
)))
;
int
right
__attribute__
((
aligned
(
4
)))
;
}
...
...
@@ -174,45 +162,83 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
const
int
p_offset
=
mad24
(
y,
step,
x
)
;
cascadeinfo.x
+=
p_offset
;
cascadeinfo.z
+=
p_offset
;
mean
=
(
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
)
mean
=
(
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
)
*
correction_t
;
variance_norm_factor
=
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
-
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
;
variance_norm_factor
=
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
-
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
;
variance_norm_factor
=
variance_norm_factor
*
correction_t
-
mean
*
mean
;
variance_norm_factor
=
variance_norm_factor
>=
0.f
?
sqrt
(
variance_norm_factor
)
:
1.f
;
bool
result
=
true
;
nodecounter
=
startnode
+
nodecount
*
scalei
;
for
(
int
stageloop
=
start_stage
; (stageloop < end_stage) && result; stageloop++)
{
float
stage_sum
=
0.f
;
int
stagecount
=
stagecascadeptr[stageloop].count
;
for
(
int
nodeloop
=
0
; nodeloop < stagecount;
nodeloop++
)
for
(
int
nodeloop
=
0
; nodeloop < stagecount;)
{
__global
GpuHidHaarTreeNode
*currentnodeptr
=
(
nodeptr
+
nodecounter
)
;
int4
info1
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[0][0]
))
;
int4
info2
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info3
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[2][0]
))
;
float4
w
=
*
(
__global
float4
*
)(
&
(
currentnodeptr->weight[0]
))
;
float
2
alpha2
=
*
(
__global
float2
*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
3
alpha3
=
*
(
__global
float3
*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
info1.x
+=
p_offset
;
info1.z
+=
p_offset
;
info2.x
+=
p_offset
;
info2.z
+=
p_offset
;
float
classsum
=
(
sum[clamp
(
mad24
(
info1.y,
step,
info1.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info1.y,
step,
info1.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info1.w,
step,
info1.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info1.w,
step,
info1.z
)
,
0
,
max_idx
)
]
)
*
w.x
;
classsum
+=
(
sum[clamp
(
mad24
(
info2.y,
step,
info2.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.y,
step,
info2.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.w,
step,
info2.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info2.w,
step,
info2.z
)
,
0
,
max_idx
)
]
)
*
w.y
;
info3.x
+=
p_offset
;
info3.z
+=
p_offset
;
classsum
+=
(
sum[clamp
(
mad24
(
info3.y,
step,
info3.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info3.y,
step,
info3.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info3.w,
step,
info3.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info3.w,
step,
info3.z
)
,
0
,
max_idx
)
]
)
*
w.z
;
stage_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
float
classsum
=
(
sum[clamp
(
mad24
(
info1.y,
step,
info1.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info1.y,
step,
info1.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info1.w,
step,
info1.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info1.w,
step,
info1.z
)
,
0
,
max_idx
)
]
)
*
w.x
;
classsum
+=
(
sum[clamp
(
mad24
(
info2.y,
step,
info2.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.y,
step,
info2.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.w,
step,
info2.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info2.w,
step,
info2.z
)
,
0
,
max_idx
)
]
)
*
w.y
;
classsum
+=
(
sum[clamp
(
mad24
(
info3.y,
step,
info3.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info3.y,
step,
info3.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info3.w,
step,
info3.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info3.w,
step,
info3.z
)
,
0
,
max_idx
)
]
)
*
w.z
;
bool
passThres
=
classsum
>=
nodethreshold
;
#
if
STUMP_BASED
stage_sum
+=
passThres
?
alpha3.y
:
alpha3.x
;
nodecounter++
;
nodeloop++
;
#
else
bool
isRootNode
=
(
nodecounter
&
1
)
==
0
;
if
(
isRootNode
)
{
if
(
(
passThres
&&
currentnodeptr->right
)
|
|
(!passThres && currentnodeptr->left))
{
nodecounter ++;
}
else
{
stage_sum += alpha3.x;
nodecounter += 2;
nodeloop ++;
}
}
else
{
stage_sum += (passThres ? alpha3.z : alpha3.y);
nodecounter ++;
nodeloop ++;
}
#endif
}
result
=
(
bool
)(
stage_sum
>=
stagecascadeptr[stageloop].threshold
)
;
result = (
int
)(stage_sum >= stagecascadeptr[stageloop].threshold);
}
barrier(CLK_LOCAL_MEM_FENCE);
...
...
@@ -222,7 +248,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
int queueindex = atomic_inc(lclcount);
lcloutindex[queueindex] = (y << 16) |
x
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
queuecount
=
lclcount[0]
;
...
...
@@ -277,5 +302,6 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
newnode[counter].threshold
=
t1.threshold
;
newnode[counter].alpha[0]
=
t1.alpha[0]
;
newnode[counter].alpha[1]
=
t1.alpha[1]
;
newnode[counter].alpha[2]
=
t1.alpha[2]
;
}
modules/ocl/test/test_haar.cpp
View file @
c3f5e737
...
...
@@ -55,6 +55,12 @@ using namespace testing;
using
namespace
std
;
using
namespace
cv
;
extern
string
workdir
;
namespace
{
IMPLEMENT_PARAM_CLASS
(
CascadeName
,
std
::
string
);
CascadeName
cascade_frontalface_alt
(
std
::
string
(
"haarcascade_frontalface_alt.xml"
));
CascadeName
cascade_frontalface_alt2
(
std
::
string
(
"haarcascade_frontalface_alt2.xml"
));
struct
getRect
{
Rect
operator
()(
const
CvAvgComp
&
e
)
const
...
...
@@ -62,23 +68,24 @@ struct getRect
return
e
.
rect
;
}
};
}
PARAM_TEST_CASE
(
Haar
,
double
,
int
)
PARAM_TEST_CASE
(
Haar
,
double
,
int
,
CascadeName
)
{
cv
::
ocl
::
OclCascadeClassifier
cascade
,
nestedCascade
;
cv
::
ocl
::
OclCascadeClassifierBuf
cascadebuf
;
cv
::
CascadeClassifier
cpucascade
,
cpunestedCascade
;
double
scale
;
int
flags
;
std
::
string
cascadeName
;
virtual
void
SetUp
()
{
scale
=
GET_PARAM
(
0
);
flags
=
GET_PARAM
(
1
);
string
cascadeName
=
workdir
+
"../../data/haarcascades/haarcascade_frontalface_alt.xml"
;
cascadeName
=
(
workdir
+
"../../data/haarcascades/"
).
append
(
GET_PARAM
(
2
))
;
if
(
(
!
cascade
.
load
(
cascadeName
))
||
(
!
cpucascade
.
load
(
cascadeName
))
||
(
!
cascadebuf
.
load
(
cascadeName
))
)
if
(
(
!
cascade
.
load
(
cascadeName
))
||
(
!
cpucascade
.
load
(
cascadeName
))
)
{
cout
<<
"ERROR: Could not load classifier cascade"
<<
endl
;
return
;
...
...
@@ -115,7 +122,7 @@ TEST_P(Haar, FaceDetect)
Seq
<
CvAvgComp
>
(
_objects
).
copyTo
(
vecAvgComp
);
oclfaces
.
resize
(
vecAvgComp
.
size
());
std
::
transform
(
vecAvgComp
.
begin
(),
vecAvgComp
.
end
(),
oclfaces
.
begin
(),
getRect
());
cpucascade
.
detectMultiScale
(
smallImg
,
faces
,
1.1
,
3
,
flags
,
Size
(
30
,
30
),
Size
(
0
,
0
)
);
...
...
@@ -136,7 +143,6 @@ TEST_P(Haar, FaceDetectUseBuf)
vector
<
Rect
>
faces
,
oclfaces
;
Mat
gray
,
smallImg
(
cvRound
(
img
.
rows
/
scale
),
cvRound
(
img
.
cols
/
scale
),
CV_8UC1
);
MemStorage
storage
(
cvCreateMemStorage
(
0
));
cvtColor
(
img
,
gray
,
CV_BGR2GRAY
);
resize
(
gray
,
smallImg
,
smallImg
.
size
(),
0
,
0
,
INTER_LINEAR
);
equalizeHist
(
smallImg
,
smallImg
);
...
...
@@ -144,19 +150,31 @@ TEST_P(Haar, FaceDetectUseBuf)
cv
::
ocl
::
oclMat
image
;
image
.
upload
(
smallImg
);
cv
::
ocl
::
OclCascadeClassifierBuf
cascadebuf
;
if
(
!
cascadebuf
.
load
(
cascadeName
)
)
{
cout
<<
"ERROR: Could not load classifier cascade for FaceDetectUseBuf!"
<<
endl
;
return
;
}
cascadebuf
.
detectMultiScale
(
image
,
oclfaces
,
1.1
,
3
,
flags
,
Size
(
30
,
30
),
Size
(
0
,
0
)
);
cascadebuf
.
release
();
cpucascade
.
detectMultiScale
(
smallImg
,
faces
,
1.1
,
3
,
flags
,
Size
(
30
,
30
),
Size
(
0
,
0
)
);
EXPECT_EQ
(
faces
.
size
(),
oclfaces
.
size
());
// intentionally run ocl facedetect again and check if it still works after the first run
cascadebuf
.
detectMultiScale
(
image
,
oclfaces
,
1.1
,
3
,
flags
,
Size
(
30
,
30
));
cascadebuf
.
release
();
EXPECT_EQ
(
faces
.
size
(),
oclfaces
.
size
());
}
INSTANTIATE_TEST_CASE_P
(
FaceDetect
,
Haar
,
Combine
(
Values
(
1.0
),
Values
(
CV_HAAR_SCALE_IMAGE
,
0
)));
Values
(
CV_HAAR_SCALE_IMAGE
,
0
)
,
Values
(
cascade_frontalface_alt
,
cascade_frontalface_alt2
)
));
#endif // HAVE_OPENCL
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment