Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
c3f5e737
Commit
c3f5e737
authored
May 30, 2013
by
Roman Donchenko
Committed by
OpenCV Buildbot
May 30, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #930 from pengx17:2.4_haar_ext
parents
0ae40507
fd7ba355
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
269 additions
and
610 deletions
+269
-610
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+1
-1
haar.cpp
modules/ocl/src/haar.cpp
+115
-390
haarobjectdetect.cl
modules/ocl/src/opencl/haarobjectdetect.cl
+71
-181
haarobjectdetect_scaled2.cl
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+57
-31
test_haar.cpp
modules/ocl/test/test_haar.cpp
+25
-7
No files found.
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
c3f5e737
...
@@ -817,7 +817,7 @@ namespace cv
...
@@ -817,7 +817,7 @@ namespace cv
OclCascadeClassifierBuf
()
:
OclCascadeClassifierBuf
()
:
m_flags
(
0
),
initialized
(
false
),
m_scaleFactor
(
0
),
buffers
(
NULL
)
{}
m_flags
(
0
),
initialized
(
false
),
m_scaleFactor
(
0
),
buffers
(
NULL
)
{}
~
OclCascadeClassifierBuf
()
{}
~
OclCascadeClassifierBuf
()
{
release
();
}
void
detectMultiScale
(
oclMat
&
image
,
CV_OUT
std
::
vector
<
cv
::
Rect
>&
faces
,
void
detectMultiScale
(
oclMat
&
image
,
CV_OUT
std
::
vector
<
cv
::
Rect
>&
faces
,
double
scaleFactor
=
1.1
,
int
minNeighbors
=
3
,
int
flags
=
0
,
double
scaleFactor
=
1.1
,
int
minNeighbors
=
3
,
int
flags
=
0
,
...
...
modules/ocl/src/haar.cpp
View file @
c3f5e737
...
@@ -137,47 +137,22 @@ struct CvHidHaarClassifierCascade
...
@@ -137,47 +137,22 @@ struct CvHidHaarClassifierCascade
};
};
typedef
struct
typedef
struct
{
{
//int rows;
//int ystep;
int
width_height
;
int
width_height
;
//int height;
int
grpnumperline_totalgrp
;
int
grpnumperline_totalgrp
;
//int totalgrp;
int
imgoff
;
int
imgoff
;
float
factor
;
float
factor
;
}
detect_piramid_info
;
}
detect_piramid_info
;
#ifdef WIN32
#if defined WIN32 && !defined __MINGW__ && !defined __MINGW32__
#define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT))
#define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT))
typedef
_ALIGNED_ON
(
128
)
struct
GpuHidHaarFeature
{
_ALIGNED_ON
(
32
)
struct
{
_ALIGNED_ON
(
4
)
int
p0
;
_ALIGNED_ON
(
4
)
int
p1
;
_ALIGNED_ON
(
4
)
int
p2
;
_ALIGNED_ON
(
4
)
int
p3
;
_ALIGNED_ON
(
4
)
float
weight
;
}
/*_ALIGNED_ON(32)*/
rect
[
CV_HAAR_FEATURE_MAX
]
;
}
GpuHidHaarFeature
;
typedef
_ALIGNED_ON
(
128
)
struct
GpuHidHaarTreeNode
typedef
_ALIGNED_ON
(
128
)
struct
GpuHidHaarTreeNode
{
{
_ALIGNED_ON
(
64
)
int
p
[
CV_HAAR_FEATURE_MAX
][
4
];
_ALIGNED_ON
(
64
)
int
p
[
CV_HAAR_FEATURE_MAX
][
4
];
//_ALIGNED_ON(16) int p1[CV_HAAR_FEATURE_MAX] ;
//_ALIGNED_ON(16) int p2[CV_HAAR_FEATURE_MAX] ;
//_ALIGNED_ON(16) int p3[CV_HAAR_FEATURE_MAX] ;
/*_ALIGNED_ON(16)*/
float
weight
[
CV_HAAR_FEATURE_MAX
]
;
float
weight
[
CV_HAAR_FEATURE_MAX
]
;
/*_ALIGNED_ON(4)*/
float
threshold
;
float
threshold
;
_ALIGNED_ON
(
8
)
float
alpha
[
2
]
;
_ALIGNED_ON
(
16
)
float
alpha
[
3
]
;
_ALIGNED_ON
(
4
)
int
left
;
_ALIGNED_ON
(
4
)
int
left
;
_ALIGNED_ON
(
4
)
int
right
;
_ALIGNED_ON
(
4
)
int
right
;
// GpuHidHaarFeature feature __attribute__((aligned (128)));
}
}
GpuHidHaarTreeNode
;
GpuHidHaarTreeNode
;
...
@@ -185,7 +160,6 @@ GpuHidHaarTreeNode;
...
@@ -185,7 +160,6 @@ GpuHidHaarTreeNode;
typedef
_ALIGNED_ON
(
32
)
struct
GpuHidHaarClassifier
typedef
_ALIGNED_ON
(
32
)
struct
GpuHidHaarClassifier
{
{
_ALIGNED_ON
(
4
)
int
count
;
_ALIGNED_ON
(
4
)
int
count
;
//CvHaarFeature* orig_feature;
_ALIGNED_ON
(
8
)
GpuHidHaarTreeNode
*
node
;
_ALIGNED_ON
(
8
)
GpuHidHaarTreeNode
*
node
;
_ALIGNED_ON
(
8
)
float
*
alpha
;
_ALIGNED_ON
(
8
)
float
*
alpha
;
}
}
...
@@ -220,32 +194,16 @@ typedef _ALIGNED_ON(64) struct GpuHidHaarClassifierCascade
...
@@ -220,32 +194,16 @@ typedef _ALIGNED_ON(64) struct GpuHidHaarClassifierCascade
_ALIGNED_ON
(
4
)
int
p2
;
_ALIGNED_ON
(
4
)
int
p2
;
_ALIGNED_ON
(
4
)
int
p3
;
_ALIGNED_ON
(
4
)
int
p3
;
_ALIGNED_ON
(
4
)
float
inv_window_area
;
_ALIGNED_ON
(
4
)
float
inv_window_area
;
// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
}
GpuHidHaarClassifierCascade
;
}
GpuHidHaarClassifierCascade
;
#else
#else
#define _ALIGNED_ON(_ALIGNMENT) __attribute__((aligned(_ALIGNMENT) ))
#define _ALIGNED_ON(_ALIGNMENT) __attribute__((aligned(_ALIGNMENT) ))
typedef
struct
_ALIGNED_ON
(
128
)
GpuHidHaarFeature
{
struct
_ALIGNED_ON
(
32
)
{
int
p0
_ALIGNED_ON
(
4
);
int
p1
_ALIGNED_ON
(
4
);
int
p2
_ALIGNED_ON
(
4
);
int
p3
_ALIGNED_ON
(
4
);
float
weight
_ALIGNED_ON
(
4
);
}
rect
[
CV_HAAR_FEATURE_MAX
]
_ALIGNED_ON
(
32
);
}
GpuHidHaarFeature
;
typedef
struct
_ALIGNED_ON
(
128
)
GpuHidHaarTreeNode
typedef
struct
_ALIGNED_ON
(
128
)
GpuHidHaarTreeNode
{
{
int
p
[
CV_HAAR_FEATURE_MAX
][
4
]
_ALIGNED_ON
(
64
);
int
p
[
CV_HAAR_FEATURE_MAX
][
4
]
_ALIGNED_ON
(
64
);
float
weight
[
CV_HAAR_FEATURE_MAX
];
// _ALIGNED_ON(16);
float
weight
[
CV_HAAR_FEATURE_MAX
];
// _ALIGNED_ON(16);
float
threshold
;
// _ALIGNED_ON(4);
float
threshold
;
// _ALIGNED_ON(4);
float
alpha
[
2
]
_ALIGNED_ON
(
8
);
float
alpha
[
3
]
_ALIGNED_ON
(
16
);
int
left
_ALIGNED_ON
(
4
);
int
left
_ALIGNED_ON
(
4
);
int
right
_ALIGNED_ON
(
4
);
int
right
_ALIGNED_ON
(
4
);
}
}
...
@@ -288,7 +246,6 @@ typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade
...
@@ -288,7 +246,6 @@ typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade
int
p2
_ALIGNED_ON
(
4
);
int
p2
_ALIGNED_ON
(
4
);
int
p3
_ALIGNED_ON
(
4
);
int
p3
_ALIGNED_ON
(
4
);
float
inv_window_area
_ALIGNED_ON
(
4
);
float
inv_window_area
_ALIGNED_ON
(
4
);
// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
}
GpuHidHaarClassifierCascade
;
}
GpuHidHaarClassifierCascade
;
#endif
#endif
...
@@ -296,36 +253,6 @@ const int icv_object_win_border = 1;
...
@@ -296,36 +253,6 @@ const int icv_object_win_border = 1;
const
float
icv_stage_threshold_bias
=
0.0001
f
;
const
float
icv_stage_threshold_bias
=
0.0001
f
;
double
globaltime
=
0
;
double
globaltime
=
0
;
// static CvHaarClassifierCascade * gpuCreateHaarClassifierCascade( int stage_count )
// {
// CvHaarClassifierCascade *cascade = 0;
// int block_size = sizeof(*cascade) + stage_count * sizeof(*cascade->stage_classifier);
// if( stage_count <= 0 )
// CV_Error( CV_StsOutOfRange, "Number of stages should be positive" );
// cascade = (CvHaarClassifierCascade *)cvAlloc( block_size );
// memset( cascade, 0, block_size );
// cascade->stage_classifier = (CvHaarStageClassifier *)(cascade + 1);
// cascade->flags = CV_HAAR_MAGIC_VAL;
// cascade->count = stage_count;
// return cascade;
// }
//static int globalcounter = 0;
// static void gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade )
// {
// if( _cascade && *_cascade )
// {
// cvFree( _cascade );
// }
// }
/* create more efficient internal representation of haar classifier cascade */
/* create more efficient internal representation of haar classifier cascade */
static
GpuHidHaarClassifierCascade
*
gpuCreateHidHaarClassifierCascade
(
CvHaarClassifierCascade
*
cascade
,
int
*
size
,
int
*
totalclassifier
)
static
GpuHidHaarClassifierCascade
*
gpuCreateHidHaarClassifierCascade
(
CvHaarClassifierCascade
*
cascade
,
int
*
size
,
int
*
totalclassifier
)
{
{
...
@@ -441,24 +368,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
...
@@ -441,24 +368,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
hid_stage_classifier
->
two_rects
=
1
;
hid_stage_classifier
->
two_rects
=
1
;
haar_classifier_ptr
+=
stage_classifier
->
count
;
haar_classifier_ptr
+=
stage_classifier
->
count
;
/*
hid_stage_classifier->parent = (stage_classifier->parent == -1)
? NULL : stage_classifier_ptr + stage_classifier->parent;
hid_stage_classifier->next = (stage_classifier->next == -1)
? NULL : stage_classifier_ptr + stage_classifier->next;
hid_stage_classifier->child = (stage_classifier->child == -1)
? NULL : stage_classifier_ptr + stage_classifier->child;
out->is_tree |= hid_stage_classifier->next != NULL;
*/
for
(
j
=
0
;
j
<
stage_classifier
->
count
;
j
++
)
for
(
j
=
0
;
j
<
stage_classifier
->
count
;
j
++
)
{
{
CvHaarClassifier
*
classifier
=
stage_classifier
->
classifier
+
j
;
CvHaarClassifier
*
classifier
=
stage_classifier
->
classifier
+
j
;
GpuHidHaarClassifier
*
hid_classifier
=
hid_stage_classifier
->
classifier
+
j
;
GpuHidHaarClassifier
*
hid_classifier
=
hid_stage_classifier
->
classifier
+
j
;
int
node_count
=
classifier
->
count
;
int
node_count
=
classifier
->
count
;
// float* alpha_ptr = (float*)(haar_node_ptr + node_count);
float
*
alpha_ptr
=
&
haar_node_ptr
->
alpha
[
0
];
float
*
alpha_ptr
=
&
haar_node_ptr
->
alpha
[
0
];
hid_classifier
->
count
=
node_count
;
hid_classifier
->
count
=
node_count
;
...
@@ -485,16 +400,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
...
@@ -485,16 +400,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
node
->
p
[
2
][
3
]
=
0
;
node
->
p
[
2
][
3
]
=
0
;
node
->
weight
[
2
]
=
0
;
node
->
weight
[
2
]
=
0
;
}
}
// memset( &(node->feature.rect[2]), 0, sizeof(node->feature.rect[2]) );
else
else
hid_stage_classifier
->
two_rects
=
0
;
hid_stage_classifier
->
two_rects
=
0
;
}
memcpy
(
alpha_ptr
,
classifier
->
alpha
,
(
node_count
+
1
)
*
sizeof
(
alpha_ptr
[
0
]));
memcpy
(
node
->
alpha
,
classifier
->
alpha
,
(
node_count
+
1
)
*
sizeof
(
alpha_ptr
[
0
]));
haar_node_ptr
=
haar_node_ptr
+
1
;
haar_node_ptr
=
haar_node_ptr
+
1
;
// (GpuHidHaarTreeNode*)cvAlignPtr(alpha_ptr+node_count+1, sizeof(void*));
}
// (GpuHidHaarTreeNode*)(alpha_ptr+node_count+1);
out
->
is_stump_based
&=
node_count
==
1
;
out
->
is_stump_based
&=
node_count
==
1
;
}
}
}
}
...
@@ -517,15 +428,9 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
...
@@ -517,15 +428,9 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
static
void
gpuSetImagesForHaarClassifierCascade
(
CvHaarClassifierCascade
*
_cascade
,
static
void
gpuSetImagesForHaarClassifierCascade
(
CvHaarClassifierCascade
*
_cascade
,
/* const CvArr* _sum,
const CvArr* _sqsum,
const CvArr* _tilted_sum,*/
double
scale
,
double
scale
,
int
step
)
int
step
)
{
{
// CvMat sum_stub, *sum = (CvMat*)_sum;
// CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum;
// CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum;
GpuHidHaarClassifierCascade
*
cascade
;
GpuHidHaarClassifierCascade
*
cascade
;
int
coi0
=
0
,
coi1
=
0
;
int
coi0
=
0
,
coi1
=
0
;
int
i
;
int
i
;
...
@@ -541,61 +446,25 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
...
@@ -541,61 +446,25 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
if
(
scale
<=
0
)
if
(
scale
<=
0
)
CV_Error
(
CV_StsOutOfRange
,
"Scale must be positive"
);
CV_Error
(
CV_StsOutOfRange
,
"Scale must be positive"
);
// sum = cvGetMat( sum, &sum_stub, &coi0 );
// sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 );
if
(
coi0
||
coi1
)
if
(
coi0
||
coi1
)
CV_Error
(
CV_BadCOI
,
"COI is not supported"
);
CV_Error
(
CV_BadCOI
,
"COI is not supported"
);
// if( !CV_ARE_SIZES_EQ( sum, sqsum ))
// CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
// if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 ||
// CV_MAT_TYPE(sum->type) != CV_32SC1 )
// CV_Error( CV_StsUnsupportedFormat,
// "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
if
(
!
_cascade
->
hid_cascade
)
if
(
!
_cascade
->
hid_cascade
)
gpuCreateHidHaarClassifierCascade
(
_cascade
,
&
datasize
,
&
total
);
gpuCreateHidHaarClassifierCascade
(
_cascade
,
&
datasize
,
&
total
);
cascade
=
(
GpuHidHaarClassifierCascade
*
)
_cascade
->
hid_cascade
;
cascade
=
(
GpuHidHaarClassifierCascade
*
)
_cascade
->
hid_cascade
;
stage_classifier
=
(
GpuHidHaarStageClassifier
*
)
(
cascade
+
1
);
stage_classifier
=
(
GpuHidHaarStageClassifier
*
)
(
cascade
+
1
);
if
(
cascade
->
has_tilted_features
)
{
// tilted = cvGetMat( tilted, &tilted_stub, &coi1 );
// if( CV_MAT_TYPE(tilted->type) != CV_32SC1 )
// CV_Error( CV_StsUnsupportedFormat,
// "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
// if( sum->step != tilted->step )
// CV_Error( CV_StsUnmatchedSizes,
// "Sum and tilted_sum must have the same stride (step, widthStep)" );
// if( !CV_ARE_SIZES_EQ( sum, tilted ))
// CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
// cascade->tilted = *tilted;
}
_cascade
->
scale
=
scale
;
_cascade
->
scale
=
scale
;
_cascade
->
real_window_size
.
width
=
cvRound
(
_cascade
->
orig_window_size
.
width
*
scale
);
_cascade
->
real_window_size
.
width
=
cvRound
(
_cascade
->
orig_window_size
.
width
*
scale
);
_cascade
->
real_window_size
.
height
=
cvRound
(
_cascade
->
orig_window_size
.
height
*
scale
);
_cascade
->
real_window_size
.
height
=
cvRound
(
_cascade
->
orig_window_size
.
height
*
scale
);
//cascade->sum = *sum;
//cascade->sqsum = *sqsum;
equRect
.
x
=
equRect
.
y
=
cvRound
(
scale
);
equRect
.
x
=
equRect
.
y
=
cvRound
(
scale
);
equRect
.
width
=
cvRound
((
_cascade
->
orig_window_size
.
width
-
2
)
*
scale
);
equRect
.
width
=
cvRound
((
_cascade
->
orig_window_size
.
width
-
2
)
*
scale
);
equRect
.
height
=
cvRound
((
_cascade
->
orig_window_size
.
height
-
2
)
*
scale
);
equRect
.
height
=
cvRound
((
_cascade
->
orig_window_size
.
height
-
2
)
*
scale
);
weight_scale
=
1.
/
(
equRect
.
width
*
equRect
.
height
);
weight_scale
=
1.
/
(
equRect
.
width
*
equRect
.
height
);
cascade
->
inv_window_area
=
weight_scale
;
cascade
->
inv_window_area
=
weight_scale
;
// cascade->pq0 = equRect.y * step + equRect.x;
// cascade->pq1 = equRect.y * step + equRect.x + equRect.width ;
// cascade->pq2 = (equRect.y + equRect.height)*step + equRect.x;
// cascade->pq3 = (equRect.y + equRect.height)*step + equRect.x + equRect.width ;
cascade
->
pq0
=
equRect
.
x
;
cascade
->
pq0
=
equRect
.
x
;
cascade
->
pq1
=
equRect
.
y
;
cascade
->
pq1
=
equRect
.
y
;
cascade
->
pq2
=
equRect
.
x
+
equRect
.
width
;
cascade
->
pq2
=
equRect
.
x
+
equRect
.
width
;
...
@@ -618,10 +487,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
...
@@ -618,10 +487,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
{
{
CvHaarFeature
*
feature
=
CvHaarFeature
*
feature
=
&
_cascade
->
stage_classifier
[
i
].
classifier
[
j
].
haar_feature
[
l
];
&
_cascade
->
stage_classifier
[
i
].
classifier
[
j
].
haar_feature
[
l
];
/* GpuHidHaarClassifier* classifier =
cascade->stage_classifier[i].classifier + j; */
//GpuHidHaarFeature* hidfeature =
// &cascade->stage_classifier[i].classifier[j].node[l].feature;
GpuHidHaarTreeNode
*
hidnode
=
&
stage_classifier
[
i
].
classifier
[
j
].
node
[
l
];
GpuHidHaarTreeNode
*
hidnode
=
&
stage_classifier
[
i
].
classifier
[
j
].
node
[
l
];
double
sum0
=
0
,
area0
=
0
;
double
sum0
=
0
,
area0
=
0
;
CvRect
r
[
3
];
CvRect
r
[
3
];
...
@@ -636,8 +501,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
...
@@ -636,8 +501,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
/* align blocks */
/* align blocks */
for
(
k
=
0
;
k
<
CV_HAAR_FEATURE_MAX
;
k
++
)
for
(
k
=
0
;
k
<
CV_HAAR_FEATURE_MAX
;
k
++
)
{
{
//if( !hidfeature->rect[k].p0 )
// break;
if
(
!
hidnode
->
p
[
k
][
0
])
if
(
!
hidnode
->
p
[
k
][
0
])
break
;
break
;
r
[
k
]
=
feature
->
rect
[
k
].
r
;
r
[
k
]
=
feature
->
rect
[
k
].
r
;
...
@@ -717,15 +580,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
...
@@ -717,15 +580,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
if
(
!
feature
->
tilted
)
if
(
!
feature
->
tilted
)
{
{
/* hidfeature->rect[k].p0 = tr.y * sum->cols + tr.x;
hidfeature->rect[k].p1 = tr.y * sum->cols + tr.x + tr.width;
hidfeature->rect[k].p2 = (tr.y + tr.height) * sum->cols + tr.x;
hidfeature->rect[k].p3 = (tr.y + tr.height) * sum->cols + tr.x + tr.width;
*/
/*hidnode->p0[k] = tr.y * step + tr.x;
hidnode->p1[k] = tr.y * step + tr.x + tr.width;
hidnode->p2[k] = (tr.y + tr.height) * step + tr.x;
hidnode->p3[k] = (tr.y + tr.height) * step + tr.x + tr.width;*/
hidnode
->
p
[
k
][
0
]
=
tr
.
x
;
hidnode
->
p
[
k
][
0
]
=
tr
.
x
;
hidnode
->
p
[
k
][
1
]
=
tr
.
y
;
hidnode
->
p
[
k
][
1
]
=
tr
.
y
;
hidnode
->
p
[
k
][
2
]
=
tr
.
x
+
tr
.
width
;
hidnode
->
p
[
k
][
2
]
=
tr
.
x
+
tr
.
width
;
...
@@ -733,37 +587,24 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
...
@@ -733,37 +587,24 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
}
}
else
else
{
{
/* hidfeature->rect[k].p2 = (tr.y + tr.width) * tilted->cols + tr.x + tr.width;
hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * tilted->cols + tr.x + tr.width - tr.height;
hidfeature->rect[k].p0 = tr.y * tilted->cols + tr.x;
hidfeature->rect[k].p1 = (tr.y + tr.height) * tilted->cols + tr.x - tr.height;
*/
hidnode
->
p
[
k
][
2
]
=
(
tr
.
y
+
tr
.
width
)
*
step
+
tr
.
x
+
tr
.
width
;
hidnode
->
p
[
k
][
2
]
=
(
tr
.
y
+
tr
.
width
)
*
step
+
tr
.
x
+
tr
.
width
;
hidnode
->
p
[
k
][
3
]
=
(
tr
.
y
+
tr
.
width
+
tr
.
height
)
*
step
+
tr
.
x
+
tr
.
width
-
tr
.
height
;
hidnode
->
p
[
k
][
3
]
=
(
tr
.
y
+
tr
.
width
+
tr
.
height
)
*
step
+
tr
.
x
+
tr
.
width
-
tr
.
height
;
hidnode
->
p
[
k
][
0
]
=
tr
.
y
*
step
+
tr
.
x
;
hidnode
->
p
[
k
][
0
]
=
tr
.
y
*
step
+
tr
.
x
;
hidnode
->
p
[
k
][
1
]
=
(
tr
.
y
+
tr
.
height
)
*
step
+
tr
.
x
-
tr
.
height
;
hidnode
->
p
[
k
][
1
]
=
(
tr
.
y
+
tr
.
height
)
*
step
+
tr
.
x
-
tr
.
height
;
}
}
//hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio);
hidnode
->
weight
[
k
]
=
(
float
)(
feature
->
rect
[
k
].
weight
*
correction_ratio
);
hidnode
->
weight
[
k
]
=
(
float
)(
feature
->
rect
[
k
].
weight
*
correction_ratio
);
if
(
k
==
0
)
if
(
k
==
0
)
area0
=
tr
.
width
*
tr
.
height
;
area0
=
tr
.
width
*
tr
.
height
;
else
else
//sum0 += hidfeature->rect[k].weight * tr.width * tr.height;
sum0
+=
hidnode
->
weight
[
k
]
*
tr
.
width
*
tr
.
height
;
sum0
+=
hidnode
->
weight
[
k
]
*
tr
.
width
*
tr
.
height
;
}
}
// hidfeature->rect[0].weight = (float)(-sum0/area0);
hidnode
->
weight
[
0
]
=
(
float
)(
-
sum0
/
area0
);
hidnode
->
weight
[
0
]
=
(
float
)(
-
sum0
/
area0
);
}
/* l */
}
/* l */
}
/* j */
}
/* j */
}
}
}
}
static
void
gpuSetHaarClassifierCascade
(
CvHaarClassifierCascade
*
_cascade
static
void
gpuSetHaarClassifierCascade
(
CvHaarClassifierCascade
*
_cascade
)
/*double scale=0.0,*/
/*int step*/
)
{
{
GpuHidHaarClassifierCascade
*
cascade
;
GpuHidHaarClassifierCascade
*
cascade
;
int
i
;
int
i
;
...
@@ -817,10 +658,6 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
...
@@ -817,10 +658,6 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
if
(
!
hidnode
->
p
[
k
][
0
])
if
(
!
hidnode
->
p
[
k
][
0
])
break
;
break
;
r
[
k
]
=
feature
->
rect
[
k
].
r
;
r
[
k
]
=
feature
->
rect
[
k
].
r
;
// base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) );
// base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) );
// base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) );
// base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) );
}
}
nr
=
k
;
nr
=
k
;
...
@@ -839,7 +676,6 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
...
@@ -839,7 +676,6 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
hidnode
->
p
[
k
][
3
]
=
tr
.
height
;
hidnode
->
p
[
k
][
3
]
=
tr
.
height
;
hidnode
->
weight
[
k
]
=
(
float
)(
feature
->
rect
[
k
].
weight
*
correction_ratio
);
hidnode
->
weight
[
k
]
=
(
float
)(
feature
->
rect
[
k
].
weight
*
correction_ratio
);
}
}
//hidnode->weight[0]=(float)(-sum0/area0);
}
/* l */
}
/* l */
}
/* j */
}
/* j */
}
}
...
@@ -852,7 +688,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -852,7 +688,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
const
double
GROUP_EPS
=
0.2
;
const
double
GROUP_EPS
=
0.2
;
CvSeq
*
result_seq
=
0
;
CvSeq
*
result_seq
=
0
;
cv
::
Ptr
<
CvMemStorage
>
temp_storage
;
cv
::
ConcurrentRectVector
allCandidates
;
cv
::
ConcurrentRectVector
allCandidates
;
std
::
vector
<
cv
::
Rect
>
rectList
;
std
::
vector
<
cv
::
Rect
>
rectList
;
...
@@ -910,6 +745,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -910,6 +745,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
if
(
gimg
.
cols
<
minSize
.
width
||
gimg
.
rows
<
minSize
.
height
)
if
(
gimg
.
cols
<
minSize
.
width
||
gimg
.
rows
<
minSize
.
height
)
CV_Error
(
CV_StsError
,
"Image too small"
);
CV_Error
(
CV_StsError
,
"Image too small"
);
cl_command_queue
qu
=
reinterpret_cast
<
cl_command_queue
>
(
Context
::
getContext
()
->
oclCommandQueue
());
if
(
(
flags
&
CV_HAAR_SCALE_IMAGE
)
)
if
(
(
flags
&
CV_HAAR_SCALE_IMAGE
)
)
{
{
CvSize
winSize0
=
cascade
->
orig_window_size
;
CvSize
winSize0
=
cascade
->
orig_window_size
;
...
@@ -952,7 +788,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -952,7 +788,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
size_t
blocksize
=
8
;
size_t
blocksize
=
8
;
size_t
localThreads
[
3
]
=
{
blocksize
,
blocksize
,
1
};
size_t
localThreads
[
3
]
=
{
blocksize
,
blocksize
,
1
};
size_t
globalThreads
[
3
]
=
{
grp_per_CU
*
gsum
.
clCxt
->
computeUnits
(
)
*
localThreads
[
0
],
size_t
globalThreads
[
3
]
=
{
grp_per_CU
*
(
gsum
.
clCxt
->
computeUnits
()
)
*
localThreads
[
0
],
localThreads
[
1
],
1
localThreads
[
1
],
1
};
};
int
outputsz
=
256
*
globalThreads
[
0
]
/
localThreads
[
0
];
int
outputsz
=
256
*
globalThreads
[
0
]
/
localThreads
[
0
];
...
@@ -997,7 +833,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -997,7 +833,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
gpuSetImagesForHaarClassifierCascade
(
cascade
,
1.
,
gsum
.
step
/
4
);
gpuSetImagesForHaarClassifierCascade
(
cascade
,
1.
,
gsum
.
step
/
4
);
stagebuffer
=
openCLCreateBuffer
(
gsum
.
clCxt
,
CL_MEM_READ_ONLY
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
);
stagebuffer
=
openCLCreateBuffer
(
gsum
.
clCxt
,
CL_MEM_READ_ONLY
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
);
cl_command_queue
qu
=
(
cl_command_queue
)
gsum
.
clCxt
->
oclCommandQueue
();
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
stagebuffer
,
1
,
0
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
,
stage
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
stagebuffer
,
1
,
0
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
,
stage
,
0
,
NULL
,
NULL
));
nodebuffer
=
openCLCreateBuffer
(
gsum
.
clCxt
,
CL_MEM_READ_ONLY
,
nodenum
*
sizeof
(
GpuHidHaarTreeNode
));
nodebuffer
=
openCLCreateBuffer
(
gsum
.
clCxt
,
CL_MEM_READ_ONLY
,
nodenum
*
sizeof
(
GpuHidHaarTreeNode
));
...
@@ -1044,7 +879,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -1044,7 +879,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
pq
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
pq
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
correction
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
correction
));
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect
,
"gpuRunHaarClassifierCascade"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
const
char
*
build_options
=
gcascade
->
is_stump_based
?
"-D STUMP_BASED=1"
:
"-D STUMP_BASED=0"
;
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect
,
"gpuRunHaarClassifierCascade"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
openCLReadBuffer
(
gsum
.
clCxt
,
candidatebuffer
,
candidate
,
4
*
sizeof
(
int
)
*
outputsz
);
openCLReadBuffer
(
gsum
.
clCxt
,
candidatebuffer
,
candidate
,
4
*
sizeof
(
int
)
*
outputsz
);
...
@@ -1059,6 +896,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -1059,6 +896,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
openCLSafeCall
(
clReleaseMemObject
(
scaleinfobuffer
));
openCLSafeCall
(
clReleaseMemObject
(
scaleinfobuffer
));
openCLSafeCall
(
clReleaseMemObject
(
nodebuffer
));
openCLSafeCall
(
clReleaseMemObject
(
nodebuffer
));
openCLSafeCall
(
clReleaseMemObject
(
candidatebuffer
));
openCLSafeCall
(
clReleaseMemObject
(
candidatebuffer
));
}
}
else
else
{
{
...
@@ -1118,7 +956,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -1118,7 +956,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
-
sizeof
(
GpuHidHaarClassifier
)
*
totalclassifier
)
/
sizeof
(
GpuHidHaarTreeNode
);
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
-
sizeof
(
GpuHidHaarClassifier
)
*
totalclassifier
)
/
sizeof
(
GpuHidHaarTreeNode
);
nodebuffer
=
openCLCreateBuffer
(
gsum
.
clCxt
,
CL_MEM_READ_ONLY
,
nodebuffer
=
openCLCreateBuffer
(
gsum
.
clCxt
,
CL_MEM_READ_ONLY
,
nodenum
*
sizeof
(
GpuHidHaarTreeNode
));
nodenum
*
sizeof
(
GpuHidHaarTreeNode
));
cl_command_queue
qu
=
(
cl_command_queue
)
gsum
.
clCxt
->
oclCommandQueue
();
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
nodebuffer
,
1
,
0
,
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
nodebuffer
,
1
,
0
,
nodenum
*
sizeof
(
GpuHidHaarTreeNode
),
nodenum
*
sizeof
(
GpuHidHaarTreeNode
),
node
,
0
,
NULL
,
NULL
));
node
,
0
,
NULL
,
NULL
));
...
@@ -1160,7 +997,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -1160,7 +997,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
args1
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
startnodenum
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
startnodenum
));
size_t
globalThreads2
[
3
]
=
{
nodenum
,
1
,
1
};
size_t
globalThreads2
[
3
]
=
{
nodenum
,
1
,
1
};
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuscaleclassifier"
,
globalThreads2
,
NULL
/*localThreads2*/
,
args1
,
-
1
,
-
1
);
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuscaleclassifier"
,
globalThreads2
,
NULL
/*localThreads2*/
,
args1
,
-
1
,
-
1
);
}
}
...
@@ -1195,8 +1031,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -1195,8 +1031,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
pbuffer
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
pbuffer
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
correctionbuffer
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
correctionbuffer
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
nodenum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
nodenum
));
const
char
*
build_options
=
gcascade
->
is_stump_based
?
"-D STUMP_BASED=1"
:
"-D STUMP_BASED=0"
;
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuRunHaarClassifierCascade_scaled2"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuRunHaarClassifierCascade_scaled2"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
candidate
=
(
int
*
)
clEnqueueMapBuffer
(
qu
,
candidatebuffer
,
1
,
CL_MAP_READ
,
0
,
4
*
sizeof
(
int
)
*
outputsz
,
0
,
0
,
0
,
&
status
);
candidate
=
(
int
*
)
clEnqueueMapBuffer
(
qu
,
candidatebuffer
,
1
,
CL_MAP_READ
,
0
,
4
*
sizeof
(
int
)
*
outputsz
,
0
,
0
,
0
,
&
status
);
...
@@ -1284,7 +1120,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
...
@@ -1284,7 +1120,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
int
blocksize
=
8
;
int
blocksize
=
8
;
int
grp_per_CU
=
12
;
int
grp_per_CU
=
12
;
size_t
localThreads
[
3
]
=
{
blocksize
,
blocksize
,
1
};
size_t
localThreads
[
3
]
=
{
blocksize
,
blocksize
,
1
};
size_t
globalThreads
[
3
]
=
{
grp_per_CU
*
Context
::
getContext
()
->
computeUnits
()
*
localThreads
[
0
],
size_t
globalThreads
[
3
]
=
{
grp_per_CU
*
cv
::
ocl
::
Context
::
getContext
()
->
computeUnits
()
*
localThreads
[
0
],
localThreads
[
1
],
localThreads
[
1
],
1
};
1
};
int
outputsz
=
256
*
globalThreads
[
0
]
/
localThreads
[
0
];
int
outputsz
=
256
*
globalThreads
[
0
]
/
localThreads
[
0
];
...
@@ -1300,8 +1136,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
...
@@ -1300,8 +1136,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
CvHaarClassifierCascade
*
cascade
=
oldCascade
;
CvHaarClassifierCascade
*
cascade
=
oldCascade
;
GpuHidHaarClassifierCascade
*
gcascade
;
GpuHidHaarClassifierCascade
*
gcascade
;
GpuHidHaarStageClassifier
*
stage
;
GpuHidHaarStageClassifier
*
stage
;
GpuHidHaarClassifier
*
classifier
;
GpuHidHaarTreeNode
*
node
;
if
(
CV_MAT_DEPTH
(
gimg
.
type
())
!=
CV_8U
)
if
(
CV_MAT_DEPTH
(
gimg
.
type
())
!=
CV_8U
)
CV_Error
(
CV_StsUnsupportedFormat
,
"Only 8-bit images are supported"
);
CV_Error
(
CV_StsUnsupportedFormat
,
"Only 8-bit images are supported"
);
...
@@ -1314,7 +1148,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
...
@@ -1314,7 +1148,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
}
}
int
*
candidate
;
int
*
candidate
;
cl_command_queue
qu
=
reinterpret_cast
<
cl_command_queue
>
(
Context
::
getContext
()
->
oclCommandQueue
());
if
(
(
flags
&
CV_HAAR_SCALE_IMAGE
)
)
if
(
(
flags
&
CV_HAAR_SCALE_IMAGE
)
)
{
{
int
indexy
=
0
;
int
indexy
=
0
;
...
@@ -1340,19 +1174,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
...
@@ -1340,19 +1174,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
gcascade
=
(
GpuHidHaarClassifierCascade
*
)(
cascade
->
hid_cascade
);
gcascade
=
(
GpuHidHaarClassifierCascade
*
)(
cascade
->
hid_cascade
);
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
classifier
=
(
GpuHidHaarClassifier
*
)(
stage
+
gcascade
->
count
);
node
=
(
GpuHidHaarTreeNode
*
)(
classifier
->
node
);
gpuSetImagesForHaarClassifierCascade
(
cascade
,
1.
,
gsum
.
step
/
4
);
cl_command_queue
qu
=
(
cl_command_queue
)
gsum
.
clCxt
->
oclCommandQueue
();
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
stagebuffer
,
1
,
0
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
,
stage
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
nodebuffer
,
1
,
0
,
m_nodenum
*
sizeof
(
GpuHidHaarTreeNode
),
node
,
0
,
NULL
,
NULL
));
int
startstage
=
0
;
int
startstage
=
0
;
int
endstage
=
gcascade
->
count
;
int
endstage
=
gcascade
->
count
;
...
@@ -1389,17 +1210,23 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
...
@@ -1389,17 +1210,23 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
pq
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
pq
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
correction
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
correction
));
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect
,
"gpuRunHaarClassifierCascade"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
const
char
*
build_options
=
gcascade
->
is_stump_based
?
"-D STUMP_BASED=1"
:
"-D STUMP_BASED=0"
;
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect
,
"gpuRunHaarClassifierCascade"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
candidate
=
(
int
*
)
malloc
(
4
*
sizeof
(
int
)
*
outputsz
);
candidate
=
(
int
*
)
malloc
(
4
*
sizeof
(
int
)
*
outputsz
);
memset
(
candidate
,
0
,
4
*
sizeof
(
int
)
*
outputsz
);
memset
(
candidate
,
0
,
4
*
sizeof
(
int
)
*
outputsz
);
openCLReadBuffer
(
gsum
.
clCxt
,
((
OclBuffers
*
)
buffers
)
->
candidatebuffer
,
candidate
,
4
*
sizeof
(
int
)
*
outputsz
);
openCLReadBuffer
(
gsum
.
clCxt
,
((
OclBuffers
*
)
buffers
)
->
candidatebuffer
,
candidate
,
4
*
sizeof
(
int
)
*
outputsz
);
for
(
int
i
=
0
;
i
<
outputsz
;
i
++
)
for
(
int
i
=
0
;
i
<
outputsz
;
i
++
)
{
if
(
candidate
[
4
*
i
+
2
]
!=
0
)
if
(
candidate
[
4
*
i
+
2
]
!=
0
)
{
allCandidates
.
push_back
(
Rect
(
candidate
[
4
*
i
],
candidate
[
4
*
i
+
1
],
allCandidates
.
push_back
(
Rect
(
candidate
[
4
*
i
],
candidate
[
4
*
i
+
1
],
candidate
[
4
*
i
+
2
],
candidate
[
4
*
i
+
3
]));
candidate
[
4
*
i
+
2
],
candidate
[
4
*
i
+
3
]));
}
}
free
((
void
*
)
candidate
);
free
((
void
*
)
candidate
);
candidate
=
NULL
;
candidate
=
NULL
;
}
}
...
@@ -1407,56 +1234,14 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
...
@@ -1407,56 +1234,14 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
{
{
cv
::
ocl
::
integral
(
gimg
,
gsum
,
gsqsum
);
cv
::
ocl
::
integral
(
gimg
,
gsum
,
gsqsum
);
gpuSetHaarClassifierCascade
(
cascade
);
gcascade
=
(
GpuHidHaarClassifierCascade
*
)
cascade
->
hid_cascade
;
gcascade
=
(
GpuHidHaarClassifierCascade
*
)
cascade
->
hid_cascade
;
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
classifier
=
(
GpuHidHaarClassifier
*
)(
stage
+
gcascade
->
count
);
node
=
(
GpuHidHaarTreeNode
*
)(
classifier
->
node
);
cl_command_queue
qu
=
(
cl_command_queue
)
gsum
.
clCxt
->
oclCommandQueue
();
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
nodebuffer
,
1
,
0
,
m_nodenum
*
sizeof
(
GpuHidHaarTreeNode
),
node
,
0
,
NULL
,
NULL
));
cl_int4
*
p
=
(
cl_int4
*
)
malloc
(
sizeof
(
cl_int4
)
*
m_loopcount
);
float
*
correction
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
m_loopcount
);
int
startstage
=
0
;
int
endstage
=
gcascade
->
count
;
double
factor
;
for
(
int
i
=
0
;
i
<
m_loopcount
;
i
++
)
{
factor
=
scalev
[
i
];
int
equRect_x
=
(
int
)(
factor
*
gcascade
->
p0
+
0.5
);
int
equRect_y
=
(
int
)(
factor
*
gcascade
->
p1
+
0.5
);
int
equRect_w
=
(
int
)(
factor
*
gcascade
->
p3
+
0.5
);
int
equRect_h
=
(
int
)(
factor
*
gcascade
->
p2
+
0.5
);
p
[
i
].
s
[
0
]
=
equRect_x
;
p
[
i
].
s
[
1
]
=
equRect_y
;
p
[
i
].
s
[
2
]
=
equRect_x
+
equRect_w
;
p
[
i
].
s
[
3
]
=
equRect_y
+
equRect_h
;
correction
[
i
]
=
1.
/
(
equRect_w
*
equRect_h
);
int
startnodenum
=
m_nodenum
*
i
;
float
factor2
=
(
float
)
factor
;
vector
<
pair
<
size_t
,
const
void
*>
>
args1
;
args1
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
nodebuffer
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
newnodebuffer
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
factor2
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
correction
[
i
]
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
startnodenum
));
size_t
globalThreads2
[
3
]
=
{
m_nodenum
,
1
,
1
};
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuscaleclassifier"
,
globalThreads2
,
NULL
/*localThreads2*/
,
args1
,
-
1
,
-
1
);
}
int
step
=
gsum
.
step
/
4
;
int
step
=
gsum
.
step
/
4
;
int
startnode
=
0
;
int
startnode
=
0
;
int
splitstage
=
3
;
int
splitstage
=
3
;
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
stagebuffer
,
1
,
0
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
,
stage
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
pbuffer
,
1
,
0
,
sizeof
(
cl_int4
)
*
m_loopcount
,
p
,
0
,
NULL
,
NULL
))
;
int
startstage
=
0
;
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
correctionbuffer
,
1
,
0
,
sizeof
(
cl_float
)
*
m_loopcount
,
correction
,
0
,
NULL
,
NULL
))
;
int
endstage
=
gcascade
->
count
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
stagebuffer
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
stagebuffer
));
...
@@ -1477,7 +1262,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
...
@@ -1477,7 +1262,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
correctionbuffer
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
correctionbuffer
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
m_nodenum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
m_nodenum
));
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuRunHaarClassifierCascade_scaled2"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
const
char
*
build_options
=
gcascade
->
is_stump_based
?
"-D STUMP_BASED=1"
:
"-D STUMP_BASED=0"
;
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuRunHaarClassifierCascade_scaled2"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
candidate
=
(
int
*
)
clEnqueueMapBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
candidatebuffer
,
1
,
CL_MAP_READ
,
0
,
4
*
sizeof
(
int
)
*
outputsz
,
0
,
0
,
0
,
NULL
);
candidate
=
(
int
*
)
clEnqueueMapBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
candidatebuffer
,
1
,
CL_MAP_READ
,
0
,
4
*
sizeof
(
int
)
*
outputsz
,
0
,
0
,
0
,
NULL
);
...
@@ -1487,12 +1273,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
...
@@ -1487,12 +1273,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
allCandidates
.
push_back
(
Rect
(
candidate
[
4
*
i
],
candidate
[
4
*
i
+
1
],
allCandidates
.
push_back
(
Rect
(
candidate
[
4
*
i
],
candidate
[
4
*
i
+
1
],
candidate
[
4
*
i
+
2
],
candidate
[
4
*
i
+
3
]));
candidate
[
4
*
i
+
2
],
candidate
[
4
*
i
+
3
]));
}
}
free
(
p
);
free
(
correction
);
clEnqueueUnmapMemObject
(
qu
,
((
OclBuffers
*
)
buffers
)
->
candidatebuffer
,
candidate
,
0
,
0
,
0
);
clEnqueueUnmapMemObject
(
qu
,
((
OclBuffers
*
)
buffers
)
->
candidatebuffer
,
candidate
,
0
,
0
,
0
);
}
}
rectList
.
resize
(
allCandidates
.
size
());
rectList
.
resize
(
allCandidates
.
size
());
if
(
!
allCandidates
.
empty
())
if
(
!
allCandidates
.
empty
())
std
::
copy
(
allCandidates
.
begin
(),
allCandidates
.
end
(),
rectList
.
begin
());
std
::
copy
(
allCandidates
.
begin
(),
allCandidates
.
end
(),
rectList
.
begin
());
...
@@ -1510,6 +1292,10 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
...
@@ -1510,6 +1292,10 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
const
int
outputsz
,
const
size_t
localThreads
[],
const
int
outputsz
,
const
size_t
localThreads
[],
CvSize
minSize
,
CvSize
maxSize
)
CvSize
minSize
,
CvSize
maxSize
)
{
{
if
(
initialized
)
{
return
;
// we only allow one time initialization
}
CvHaarClassifierCascade
*
cascade
=
oldCascade
;
CvHaarClassifierCascade
*
cascade
=
oldCascade
;
if
(
!
CV_IS_HAAR_CLASSIFIER
(
cascade
)
)
if
(
!
CV_IS_HAAR_CLASSIFIER
(
cascade
)
)
...
@@ -1525,7 +1311,9 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
...
@@ -1525,7 +1311,9 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
int
totalclassifier
=
0
;
int
totalclassifier
=
0
;
if
(
!
cascade
->
hid_cascade
)
if
(
!
cascade
->
hid_cascade
)
{
gpuCreateHidHaarClassifierCascade
(
cascade
,
&
datasize
,
&
totalclassifier
);
gpuCreateHidHaarClassifierCascade
(
cascade
,
&
datasize
,
&
totalclassifier
);
}
if
(
maxSize
.
height
==
0
||
maxSize
.
width
==
0
)
if
(
maxSize
.
height
==
0
||
maxSize
.
width
==
0
)
{
{
...
@@ -1547,6 +1335,78 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
...
@@ -1547,6 +1335,78 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
m_minSize
=
minSize
;
m_minSize
=
minSize
;
m_maxSize
=
maxSize
;
m_maxSize
=
maxSize
;
// initialize nodes
GpuHidHaarClassifierCascade
*
gcascade
;
GpuHidHaarStageClassifier
*
stage
;
GpuHidHaarClassifier
*
classifier
;
GpuHidHaarTreeNode
*
node
;
cl_command_queue
qu
=
reinterpret_cast
<
cl_command_queue
>
(
Context
::
getContext
()
->
oclCommandQueue
());
if
(
(
flags
&
CV_HAAR_SCALE_IMAGE
)
)
{
gcascade
=
(
GpuHidHaarClassifierCascade
*
)(
cascade
->
hid_cascade
);
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
classifier
=
(
GpuHidHaarClassifier
*
)(
stage
+
gcascade
->
count
);
node
=
(
GpuHidHaarTreeNode
*
)(
classifier
->
node
);
gpuSetImagesForHaarClassifierCascade
(
cascade
,
1.
,
gsum
.
step
/
4
);
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
stagebuffer
,
1
,
0
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
,
stage
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
nodebuffer
,
1
,
0
,
m_nodenum
*
sizeof
(
GpuHidHaarTreeNode
),
node
,
0
,
NULL
,
NULL
));
}
else
{
gpuSetHaarClassifierCascade
(
cascade
);
gcascade
=
(
GpuHidHaarClassifierCascade
*
)
cascade
->
hid_cascade
;
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
classifier
=
(
GpuHidHaarClassifier
*
)(
stage
+
gcascade
->
count
);
node
=
(
GpuHidHaarTreeNode
*
)(
classifier
->
node
);
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
nodebuffer
,
1
,
0
,
m_nodenum
*
sizeof
(
GpuHidHaarTreeNode
),
node
,
0
,
NULL
,
NULL
));
cl_int4
*
p
=
(
cl_int4
*
)
malloc
(
sizeof
(
cl_int4
)
*
m_loopcount
);
float
*
correction
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
m_loopcount
);
double
factor
;
for
(
int
i
=
0
;
i
<
m_loopcount
;
i
++
)
{
factor
=
scalev
[
i
];
int
equRect_x
=
(
int
)(
factor
*
gcascade
->
p0
+
0.5
);
int
equRect_y
=
(
int
)(
factor
*
gcascade
->
p1
+
0.5
);
int
equRect_w
=
(
int
)(
factor
*
gcascade
->
p3
+
0.5
);
int
equRect_h
=
(
int
)(
factor
*
gcascade
->
p2
+
0.5
);
p
[
i
].
s
[
0
]
=
equRect_x
;
p
[
i
].
s
[
1
]
=
equRect_y
;
p
[
i
].
s
[
2
]
=
equRect_x
+
equRect_w
;
p
[
i
].
s
[
3
]
=
equRect_y
+
equRect_h
;
correction
[
i
]
=
1.
/
(
equRect_w
*
equRect_h
);
int
startnodenum
=
m_nodenum
*
i
;
float
factor2
=
(
float
)
factor
;
vector
<
pair
<
size_t
,
const
void
*>
>
args1
;
args1
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
nodebuffer
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
((
OclBuffers
*
)
buffers
)
->
newnodebuffer
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
factor2
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
correction
[
i
]
));
args1
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
startnodenum
));
size_t
globalThreads2
[
3
]
=
{
m_nodenum
,
1
,
1
};
openCLExecuteKernel
(
Context
::
getContext
(),
&
haarobjectdetect_scaled2
,
"gpuscaleclassifier"
,
globalThreads2
,
NULL
/*localThreads2*/
,
args1
,
-
1
,
-
1
);
}
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
stagebuffer
,
1
,
0
,
sizeof
(
GpuHidHaarStageClassifier
)
*
gcascade
->
count
,
stage
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
pbuffer
,
1
,
0
,
sizeof
(
cl_int4
)
*
m_loopcount
,
p
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
correctionbuffer
,
1
,
0
,
sizeof
(
cl_float
)
*
m_loopcount
,
correction
,
0
,
NULL
,
NULL
));
free
(
p
);
free
(
correction
);
}
initialized
=
true
;
initialized
=
true
;
}
}
...
@@ -1645,6 +1505,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
...
@@ -1645,6 +1505,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
CvSize
sz
;
CvSize
sz
;
CvSize
winSize0
=
oldCascade
->
orig_window_size
;
CvSize
winSize0
=
oldCascade
->
orig_window_size
;
detect_piramid_info
*
scaleinfo
;
detect_piramid_info
*
scaleinfo
;
cl_command_queue
qu
=
reinterpret_cast
<
cl_command_queue
>
(
Context
::
getContext
()
->
oclCommandQueue
());
if
(
flags
&
CV_HAAR_SCALE_IMAGE
)
if
(
flags
&
CV_HAAR_SCALE_IMAGE
)
{
{
for
(
factor
=
1.
f
;;
factor
*=
scaleFactor
)
for
(
factor
=
1.
f
;;
factor
*=
scaleFactor
)
...
@@ -1746,7 +1607,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
...
@@ -1746,7 +1607,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
((
OclBuffers
*
)
buffers
)
->
scaleinfobuffer
=
openCLCreateBuffer
(
cv
::
ocl
::
Context
::
getContext
(),
CL_MEM_READ_ONLY
,
sizeof
(
detect_piramid_info
)
*
loopcount
);
((
OclBuffers
*
)
buffers
)
->
scaleinfobuffer
=
openCLCreateBuffer
(
cv
::
ocl
::
Context
::
getContext
(),
CL_MEM_READ_ONLY
,
sizeof
(
detect_piramid_info
)
*
loopcount
);
}
}
openCLSafeCall
(
clEnqueueWriteBuffer
(
(
cl_command_queue
)
cv
::
ocl
::
Context
::
getContext
()
->
oclCommandQueue
()
,
((
OclBuffers
*
)
buffers
)
->
scaleinfobuffer
,
1
,
0
,
openCLSafeCall
(
clEnqueueWriteBuffer
(
qu
,
((
OclBuffers
*
)
buffers
)
->
scaleinfobuffer
,
1
,
0
,
sizeof
(
detect_piramid_info
)
*
loopcount
,
sizeof
(
detect_piramid_info
)
*
loopcount
,
scaleinfo
,
0
,
NULL
,
NULL
));
scaleinfo
,
0
,
NULL
,
NULL
));
free
(
scaleinfo
);
free
(
scaleinfo
);
...
@@ -1758,7 +1619,8 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
...
@@ -1758,7 +1619,8 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
const
std
::
vector
<
cv
::
Rect
>
&
rectList
,
const
std
::
vector
<
cv
::
Rect
>
&
rectList
,
const
std
::
vector
<
int
>
&
rweights
)
const
std
::
vector
<
int
>
&
rweights
)
{
{
CvSeq
*
result_seq
=
cvCreateSeq
(
0
,
sizeof
(
CvSeq
),
sizeof
(
CvAvgComp
),
cvCreateMemStorage
(
0
)
);
MemStorage
tempStorage
(
cvCreateMemStorage
(
0
));
CvSeq
*
result_seq
=
cvCreateSeq
(
0
,
sizeof
(
CvSeq
),
sizeof
(
CvAvgComp
),
tempStorage
);
if
(
findBiggestObject
&&
rectList
.
size
()
)
if
(
findBiggestObject
&&
rectList
.
size
()
)
{
{
...
@@ -1794,6 +1656,8 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
...
@@ -1794,6 +1656,8 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
void
cv
::
ocl
::
OclCascadeClassifierBuf
::
release
()
void
cv
::
ocl
::
OclCascadeClassifierBuf
::
release
()
{
{
if
(
initialized
)
{
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
stagebuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
stagebuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
scaleinfobuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
scaleinfobuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
nodebuffer
));
openCLSafeCall
(
clReleaseMemObject
(((
OclBuffers
*
)
buffers
)
->
nodebuffer
));
...
@@ -1812,149 +1676,10 @@ void cv::ocl::OclCascadeClassifierBuf::release()
...
@@ -1812,149 +1676,10 @@ void cv::ocl::OclCascadeClassifierBuf::release()
free
(
buffers
);
free
(
buffers
);
buffers
=
NULL
;
buffers
=
NULL
;
initialized
=
false
;
}
}
}
#ifndef _MAX_PATH
#ifndef _MAX_PATH
#define _MAX_PATH 1024
#define _MAX_PATH 1024
#endif
#endif
/****************************************************************************************\
* Persistence functions *
\****************************************************************************************/
/* field names */
#define ICV_HAAR_SIZE_NAME "size"
#define ICV_HAAR_STAGES_NAME "stages"
#define ICV_HAAR_TREES_NAME "trees"
#define ICV_HAAR_FEATURE_NAME "feature"
#define ICV_HAAR_RECTS_NAME "rects"
#define ICV_HAAR_TILTED_NAME "tilted"
#define ICV_HAAR_THRESHOLD_NAME "threshold"
#define ICV_HAAR_LEFT_NODE_NAME "left_node"
#define ICV_HAAR_LEFT_VAL_NAME "left_val"
#define ICV_HAAR_RIGHT_NODE_NAME "right_node"
#define ICV_HAAR_RIGHT_VAL_NAME "right_val"
#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold"
#define ICV_HAAR_PARENT_NAME "parent"
#define ICV_HAAR_NEXT_NAME "next"
static
int
gpuRunHaarClassifierCascade
(
/*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */
)
{
return
1
;
}
namespace
cv
{
namespace
ocl
{
struct
gpuHaarDetectObjects_ScaleImage_Invoker
{
gpuHaarDetectObjects_ScaleImage_Invoker
(
const
CvHaarClassifierCascade
*
_cascade
,
int
_stripSize
,
double
_factor
,
const
Mat
&
_sum1
,
const
Mat
&
_sqsum1
,
Mat
*
_norm1
,
Mat
*
_mask1
,
Rect
_equRect
,
ConcurrentRectVector
&
_vec
)
{
cascade
=
_cascade
;
stripSize
=
_stripSize
;
factor
=
_factor
;
sum1
=
_sum1
;
sqsum1
=
_sqsum1
;
norm1
=
_norm1
;
mask1
=
_mask1
;
equRect
=
_equRect
;
vec
=
&
_vec
;
}
void
operator
()(
const
BlockedRange
&
range
)
const
{
Size
winSize0
=
cascade
->
orig_window_size
;
Size
winSize
(
cvRound
(
winSize0
.
width
*
factor
),
cvRound
(
winSize0
.
height
*
factor
));
int
y1
=
range
.
begin
()
*
stripSize
,
y2
=
min
(
range
.
end
()
*
stripSize
,
sum1
.
rows
-
1
-
winSize0
.
height
);
Size
ssz
(
sum1
.
cols
-
1
-
winSize0
.
width
,
y2
-
y1
);
int
x
,
y
,
ystep
=
factor
>
2
?
1
:
2
;
for
(
y
=
y1
;
y
<
y2
;
y
+=
ystep
)
for
(
x
=
0
;
x
<
ssz
.
width
;
x
+=
ystep
)
{
if
(
gpuRunHaarClassifierCascade
(
/*cascade, cvPoint(x, y), 0*/
)
>
0
)
vec
->
push_back
(
Rect
(
cvRound
(
x
*
factor
),
cvRound
(
y
*
factor
),
winSize
.
width
,
winSize
.
height
));
}
}
const
CvHaarClassifierCascade
*
cascade
;
int
stripSize
;
double
factor
;
Mat
sum1
,
sqsum1
,
*
norm1
,
*
mask1
;
Rect
equRect
;
ConcurrentRectVector
*
vec
;
};
struct
gpuHaarDetectObjects_ScaleCascade_Invoker
{
gpuHaarDetectObjects_ScaleCascade_Invoker
(
const
CvHaarClassifierCascade
*
_cascade
,
Size
_winsize
,
const
Range
&
_xrange
,
double
_ystep
,
size_t
_sumstep
,
const
int
**
_p
,
const
int
**
_pq
,
ConcurrentRectVector
&
_vec
)
{
cascade
=
_cascade
;
winsize
=
_winsize
;
xrange
=
_xrange
;
ystep
=
_ystep
;
sumstep
=
_sumstep
;
p
=
_p
;
pq
=
_pq
;
vec
=
&
_vec
;
}
void
operator
()(
const
BlockedRange
&
range
)
const
{
int
iy
,
startY
=
range
.
begin
(),
endY
=
range
.
end
();
const
int
*
p0
=
p
[
0
],
*
p1
=
p
[
1
],
*
p2
=
p
[
2
],
*
p3
=
p
[
3
];
const
int
*
pq0
=
pq
[
0
],
*
pq1
=
pq
[
1
],
*
pq2
=
pq
[
2
],
*
pq3
=
pq
[
3
];
bool
doCannyPruning
=
p0
!=
0
;
int
sstep
=
(
int
)(
sumstep
/
sizeof
(
p0
[
0
]));
for
(
iy
=
startY
;
iy
<
endY
;
iy
++
)
{
int
ix
,
y
=
cvRound
(
iy
*
ystep
),
ixstep
=
1
;
for
(
ix
=
xrange
.
start
;
ix
<
xrange
.
end
;
ix
+=
ixstep
)
{
int
x
=
cvRound
(
ix
*
ystep
);
// it should really be ystep, not ixstep
if
(
doCannyPruning
)
{
int
offset
=
y
*
sstep
+
x
;
int
s
=
p0
[
offset
]
-
p1
[
offset
]
-
p2
[
offset
]
+
p3
[
offset
];
int
sq
=
pq0
[
offset
]
-
pq1
[
offset
]
-
pq2
[
offset
]
+
pq3
[
offset
];
if
(
s
<
100
||
sq
<
20
)
{
ixstep
=
2
;
continue
;
}
}
int
result
=
gpuRunHaarClassifierCascade
(
/* cascade, cvPoint(x, y), 0 */
);
if
(
result
>
0
)
vec
->
push_back
(
Rect
(
x
,
y
,
winsize
.
width
,
winsize
.
height
));
ixstep
=
result
!=
0
?
1
:
2
;
}
}
}
const
CvHaarClassifierCascade
*
cascade
;
double
ystep
;
size_t
sumstep
;
Size
winsize
;
Range
xrange
;
const
int
**
p
;
const
int
**
pq
;
ConcurrentRectVector
*
vec
;
};
}
}
modules/ocl/src/opencl/haarobjectdetect.cl
View file @
c3f5e737
...
@@ -10,6 +10,7 @@
...
@@ -10,6 +10,7 @@
//
Wang
Weiyan,
wangweiyanster@gmail.com
//
Wang
Weiyan,
wangweiyanster@gmail.com
//
Jia
Haipeng,
jiahaipeng95@gmail.com
//
Jia
Haipeng,
jiahaipeng95@gmail.com
//
Nathan,
liujun@multicorewareinc.com
//
Nathan,
liujun@multicorewareinc.com
//
Peng
Xiao,
pengxiao@outlook.com
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
...
@@ -45,27 +46,16 @@
...
@@ -45,27 +46,16 @@
typedef
int
sumtype
;
typedef
int
sumtype
;
typedef
float
sqsumtype
;
typedef
float
sqsumtype
;
typedef
struct
__attribute__
((
aligned
(
128
)))
GpuHidHaarFeature
#
ifndef
STUMP_BASED
{
#
define
STUMP_BASED
1
struct
__attribute__
((
aligned
(
32
)))
#
endif
{
int
p0
__attribute__
((
aligned
(
4
)))
;
int
p1
__attribute__
((
aligned
(
4
)))
;
int
p2
__attribute__
((
aligned
(
4
)))
;
int
p3
__attribute__
((
aligned
(
4
)))
;
float
weight
__attribute__
((
aligned
(
4
)))
;
}
rect[CV_HAAR_FEATURE_MAX]
__attribute__
((
aligned
(
32
)))
;
}
GpuHidHaarFeature
;
typedef
struct
__attribute__
((
aligned
(
128
)
))
GpuHidHaarTreeNode
typedef
struct
__attribute__
((
aligned
(
128
)
))
GpuHidHaarTreeNode
{
{
int
p[CV_HAAR_FEATURE_MAX][4]
__attribute__
((
aligned
(
64
)))
;
int
p[CV_HAAR_FEATURE_MAX][4]
__attribute__
((
aligned
(
64
)))
;
float
weight[CV_HAAR_FEATURE_MAX]
/*__attribute__
((
aligned
(
16
)))
*/
;
float
weight[CV_HAAR_FEATURE_MAX]
;
float
threshold
/*__attribute__
((
aligned
(
4
)))
*/
;
float
threshold
;
float
alpha[
2]
__attribute__
((
aligned
(
8
)))
;
float
alpha[
3]
__attribute__
((
aligned
(
16
)))
;
int
left
__attribute__
((
aligned
(
4
)))
;
int
left
__attribute__
((
aligned
(
4
)))
;
int
right
__attribute__
((
aligned
(
4
)))
;
int
right
__attribute__
((
aligned
(
4
)))
;
}
}
...
@@ -111,7 +101,6 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
...
@@ -111,7 +101,6 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
float
inv_window_area
__attribute__
((
aligned
(
4
)))
;
float
inv_window_area
__attribute__
((
aligned
(
4
)))
;
}
GpuHidHaarClassifierCascade
;
}
GpuHidHaarClassifierCascade
;
__kernel
void
__attribute__
((
reqd_work_group_size
(
8
,
8
,
1
)))
gpuRunHaarClassifierCascade
(
__kernel
void
__attribute__
((
reqd_work_group_size
(
8
,
8
,
1
)))
gpuRunHaarClassifierCascade
(
global
GpuHidHaarStageClassifier
*
stagecascadeptr,
global
GpuHidHaarStageClassifier
*
stagecascadeptr,
global
int4
*
info,
global
int4
*
info,
...
@@ -234,7 +223,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
...
@@ -234,7 +223,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
float
stage_sum
=
0.f
;
float
stage_sum
=
0.f
;
int2
stageinfo
=
*
(
global
int2*
)(
stagecascadeptr+stageloop
)
;
int2
stageinfo
=
*
(
global
int2*
)(
stagecascadeptr+stageloop
)
;
float
stagethreshold
=
as_float
(
stageinfo.y
)
;
float
stagethreshold
=
as_float
(
stageinfo.y
)
;
for
(
int
nodeloop
=
0
; nodeloop < stageinfo.x;
nodeloop++
)
for
(
int
nodeloop
=
0
; nodeloop < stageinfo.x; )
{
{
__global
GpuHidHaarTreeNode*
currentnodeptr
=
(
nodeptr
+
nodecounter
)
;
__global
GpuHidHaarTreeNode*
currentnodeptr
=
(
nodeptr
+
nodecounter
)
;
...
@@ -242,7 +231,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
...
@@ -242,7 +231,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int4
info2
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info2
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info3
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[2][0]
))
;
int4
info3
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[2][0]
))
;
float4
w
=
*
(
__global
float4*
)(
&
(
currentnodeptr->weight[0]
))
;
float4
w
=
*
(
__global
float4*
)(
&
(
currentnodeptr->weight[0]
))
;
float2
alpha2
=
*
(
__global
float2*
)(
&
(
currentnodeptr->alpha[0]
))
;
float3
alpha3
=
*
(
__global
float3*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
info1.x
+=lcl_off
;
info1.x
+=lcl_off
;
...
@@ -261,8 +251,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
...
@@ -261,8 +251,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
classsum
+=
(
lcldata[mad24
(
info3.y,readwidth,info3.x
)
]
-
lcldata[mad24
(
info3.y,readwidth,info3.z
)
]
-
classsum
+=
(
lcldata[mad24
(
info3.y,readwidth,info3.x
)
]
-
lcldata[mad24
(
info3.y,readwidth,info3.z
)
]
-
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
stage_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
bool
passThres
=
classsum
>=
nodethreshold
;
#
if
STUMP_BASED
stage_sum
+=
passThres
?
alpha3.y
:
alpha3.x
;
nodecounter++
;
nodecounter++
;
nodeloop++
;
#
else
bool
isRootNode
=
(
nodecounter
&
1
)
==
0
;
if
(
isRootNode
)
{
if
(
(
passThres
&&
currentnodeptr->right
)
|
|
(!passThres && currentnodeptr->left))
{
nodecounter ++;
}
else
{
stage_sum += alpha3.x;
nodecounter += 2;
nodeloop ++;
}
}
else
{
stage_sum += passThres ? alpha3.z : alpha3.y;
nodecounter ++;
nodeloop ++;
}
#endif
}
}
result = (stage_sum >= stagethreshold);
result = (stage_sum >= stagethreshold);
...
@@ -301,18 +317,20 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
...
@@ -301,18 +317,20 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
if
(
lcl_compute_win_id
<
queuecount
)
if
(
lcl_compute_win_id
<
queuecount
)
{
{
int
tempnodecounter
=
lcl_compute_id
;
int
tempnodecounter
=
lcl_compute_id
;
float
part_sum
=
0.f
;
float
part_sum
=
0.f
;
for
(
int
lcl_loop=0
; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x; lcl_loop++)
const
int
stump_factor
=
STUMP_BASED
?
1
:
2
;
int
root_offset
=
0
;
for
(
int
lcl_loop=0
; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x;)
{
{
__global
GpuHidHaarTreeNode*
currentnodeptr
=
(
nodeptr
+
nodecounter
+
tempnodecounter
)
;
__global
GpuHidHaarTreeNode*
currentnodeptr
=
nodeptr
+
(
nodecounter
+
tempnodecounter
)
*
stump_factor
+
root_offset
;
int4
info1
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[0][0]
))
;
int4
info1
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[0][0]
))
;
int4
info2
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info2
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info3
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[2][0]
))
;
int4
info3
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[2][0]
))
;
float4
w
=
*
(
__global
float4*
)(
&
(
currentnodeptr->weight[0]
))
;
float4
w
=
*
(
__global
float4*
)(
&
(
currentnodeptr->weight[0]
))
;
float
2
alpha2
=
*
(
__global
float2
*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
3
alpha3
=
*
(
__global
float3
*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
info1.x
+=queue_pixel
;
info1.x
+=queue_pixel
;
...
@@ -332,8 +350,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
...
@@ -332,8 +350,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
classsum
+=
(
lcldata[mad24
(
info3.y,readwidth,info3.x
)
]
-
lcldata[mad24
(
info3.y,readwidth,info3.z
)
]
-
classsum
+=
(
lcldata[mad24
(
info3.y,readwidth,info3.x
)
]
-
lcldata[mad24
(
info3.y,readwidth,info3.z
)
]
-
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
part_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
bool
passThres
=
classsum
>=
nodethreshold
;
tempnodecounter
+=lcl_compute_win
;
#
if
STUMP_BASED
part_sum
+=
passThres
?
alpha3.y
:
alpha3.x
;
tempnodecounter
+=
lcl_compute_win
;
lcl_loop++
;
#
else
if
(
root_offset
==
0
)
{
if
(
(
passThres
&&
currentnodeptr->right
)
||
(
!passThres
&&
currentnodeptr->left
))
{
root_offset
=
1
;
}
else
{
part_sum
+=
alpha3.x
;
tempnodecounter
+=
lcl_compute_win
;
lcl_loop++
;
}
}
else
{
part_sum
+=
passThres
?
alpha3.z
:
alpha3.y
;
tempnodecounter
+=
lcl_compute_win
;
lcl_loop++
;
root_offset
=
0
;
}
#
endif
}//end
for
(
int
lcl_loop=0
;lcl_loop<lcl_loops;lcl_loop++)
}//end
for
(
int
lcl_loop=0
;lcl_loop<lcl_loops;lcl_loop++)
partialsum[lcl_id]=part_sum
;
partialsum[lcl_id]=part_sum
;
}
}
...
@@ -379,157 +423,3 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
...
@@ -379,157 +423,3 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
}
}
/*
if
(
stagecascade->two_rects
)
{
#
pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t1
=
*
(
node
+
counter
)
;
t
=
t1.threshold
*
variance_norm_factor
;
classsum
=
calc_sum1
(
t1,p_offset,0
)
*
t1.weight[0]
;
classsum
+=
calc_sum1
(
t1,
p_offset,1
)
*
t1.weight[1]
;
stage_sum
+=
classsum
>=
t
?
t1.alpha[1]:t1.alpha[0]
;
counter++
;
}
}
else
{
#
pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t
=
node[counter].threshold*variance_norm_factor
;
classsum
=
calc_sum1
(
node[counter],p_offset,0
)
*
node[counter].weight[0]
;
classsum
+=
calc_sum1
(
node[counter],p_offset,1
)
*
node[counter].weight[1]
;
if
(
node[counter].p0[2]
)
classsum
+=
calc_sum1
(
node[counter],p_offset,2
)
*
node[counter].weight[2]
;
stage_sum
+=
classsum
>=
t
?
node[counter].alpha[1]:node[counter].alpha[0]
;// modify
counter++
;
}
}
*/
/*
__kernel
void
gpuRunHaarClassifierCascade_ScaleWindow
(
constant
GpuHidHaarClassifierCascade
*
_cascade,
global
GpuHidHaarStageClassifier
*
stagecascadeptr,
//global
GpuHidHaarClassifier
*
classifierptr,
global
GpuHidHaarTreeNode
*
nodeptr,
global
int
*
sum,
global
float
*
sqsum,
global
int
*
_candidate,
int
pixel_step,
int
cols,
int
rows,
int
start_stage,
int
end_stage,
//int
counts,
int
nodenum,
int
ystep,
int
detect_width,
//int
detect_height,
int
loopcount,
int
outputstep
)
//float
scalefactor
)
{
unsigned
int
x1
=
get_global_id
(
0
)
;
unsigned
int
y1
=
get_global_id
(
1
)
;
int
p_offset
;
int
m,
n
;
int
result
;
int
counter
;
float
mean,
variance_norm_factor
;
for
(
int
i=0
;i<loopcount;i++)
{
constant
GpuHidHaarClassifierCascade
*
cascade
=
_cascade
+
i
;
global
int
*
candidate
=
_candidate
+
i*outputstep
;
int
window_width
=
cascade->p1
-
cascade->p0
;
int
window_height
=
window_width
;
result
=
1
;
counter
=
0
;
unsigned
int
x
=
mul24
(
x1,ystep
)
;
unsigned
int
y
=
mul24
(
y1,ystep
)
;
if
((
x
<
cols
-
window_width
-
1
)
&&
(
y
<
rows
-
window_height
-1
))
{
global
GpuHidHaarStageClassifier
*stagecascade
=
stagecascadeptr
+cascade->count*i+
start_stage
;
//global
GpuHidHaarClassifier
*classifier
=
classifierptr
;
global
GpuHidHaarTreeNode
*node
=
nodeptr
+
nodenum*i
;
p_offset
=
mad24
(
y,
pixel_step,
x
)
;// modify
mean
=
(
*
(
sum
+
p_offset
+
(
int
)
cascade->p0
)
-
*
(
sum
+
p_offset
+
(
int
)
cascade->p1
)
-
*
(
sum
+
p_offset
+
(
int
)
cascade->p2
)
+
*
(
sum
+
p_offset
+
(
int
)
cascade->p3
))
*cascade->inv_window_area
;
variance_norm_factor
=
*
(
sqsum
+
p_offset
+
cascade->p0
)
-
*
(
sqsum
+
cascade->p1
+
p_offset
)
-
*
(
sqsum
+
p_offset
+
cascade->p2
)
+
*
(
sqsum
+
cascade->p3
+
p_offset
)
;
variance_norm_factor
=
variance_norm_factor
*
cascade->inv_window_area
-
mean
*
mean
;
variance_norm_factor
=
variance_norm_factor
>=0.f
?
sqrt
(
variance_norm_factor
)
:
1
;//modify
//
if
(
cascade->is_stump_based
)
//{
for
(
m
=
start_stage
; m < end_stage; m++ )
{
float
stage_sum
=
0.f
;
float
t
,
classsum
;
GpuHidHaarTreeNode
t1
;
//#pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t1
=
*
(
node
+
counter
)
;
t
=
t1.threshold
*
variance_norm_factor
;
classsum
=
calc_sum1
(
t1,
p_offset
,
0
)
*
t1.weight[0]
+
calc_sum1
(
t1,
p_offset
,
1
)
*
t1.weight[1]
;
if
((
t1.p0[2]
)
&&
(
!stagecascade->two_rects
))
classsum
+=
calc_sum1
(
t1,
p_offset,
2
)
*
t1.weight[2]
;
stage_sum
+=
classsum
>=
t
?
t1.alpha[1]
:
t1.alpha[0]
;// modify
counter++
;
}
if
(
stage_sum
<
stagecascade->threshold
)
{
result
=
0
;
break
;
}
stagecascade++
;
}
if
(
result
)
{
candidate[4
*
(
y1
*
detect_width
+
x1
)
]
=
x
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+
1]
=
y
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+2]
=
window_width
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+
3]
=
window_height
;
}
//}
}
}
}
*/
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
View file @
c3f5e737
...
@@ -17,7 +17,7 @@
...
@@ -17,7 +17,7 @@
//
@Authors
//
@Authors
//
Wu
Xinglong,
wxl370@126.com
//
Wu
Xinglong,
wxl370@126.com
//
Sen
Liu,
swjtuls1987@126.com
//
Sen
Liu,
swjtuls1987@126.com
//
//
Peng
Xiao,
pengxiao@outlook.com
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
...
@@ -49,25 +49,13 @@
...
@@ -49,25 +49,13 @@
#
define
CV_HAAR_FEATURE_MAX
3
#
define
CV_HAAR_FEATURE_MAX
3
typedef
int
sumtype
;
typedef
int
sumtype
;
typedef
float
sqsumtype
;
typedef
float
sqsumtype
;
typedef
struct
__attribute__
((
aligned
(
128
)))
GpuHidHaarFeature
{
struct
__attribute__
((
aligned
(
32
)))
{
int
p0
__attribute__
((
aligned
(
4
)))
;
int
p1
__attribute__
((
aligned
(
4
)))
;
int
p2
__attribute__
((
aligned
(
4
)))
;
int
p3
__attribute__
((
aligned
(
4
)))
;
float
weight
__attribute__
((
aligned
(
4
)))
;
}
rect[CV_HAAR_FEATURE_MAX]
__attribute__
((
aligned
(
32
)))
;
}
GpuHidHaarFeature
;
typedef
struct
__attribute__
((
aligned
(
128
)))
GpuHidHaarTreeNode
typedef
struct
__attribute__
((
aligned
(
128
)))
GpuHidHaarTreeNode
{
{
int
p[CV_HAAR_FEATURE_MAX][4]
__attribute__
((
aligned
(
64
)))
;
int
p[CV_HAAR_FEATURE_MAX][4]
__attribute__
((
aligned
(
64
)))
;
float
weight[CV_HAAR_FEATURE_MAX]
/*__attribute__
((
aligned
(
16
)))
*/
;
float
weight[CV_HAAR_FEATURE_MAX]
/*__attribute__
((
aligned
(
16
)))
*/
;
float
threshold
/*__attribute__
((
aligned
(
4
)))
*/
;
float
threshold
/*__attribute__
((
aligned
(
4
)))
*/
;
float
alpha[
2]
__attribute__
((
aligned
(
8
)))
;
float
alpha[
3]
__attribute__
((
aligned
(
16
)))
;
int
left
__attribute__
((
aligned
(
4
)))
;
int
left
__attribute__
((
aligned
(
4
)))
;
int
right
__attribute__
((
aligned
(
4
)))
;
int
right
__attribute__
((
aligned
(
4
)))
;
}
}
...
@@ -174,45 +162,83 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
...
@@ -174,45 +162,83 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
const
int
p_offset
=
mad24
(
y,
step,
x
)
;
const
int
p_offset
=
mad24
(
y,
step,
x
)
;
cascadeinfo.x
+=
p_offset
;
cascadeinfo.x
+=
p_offset
;
cascadeinfo.z
+=
p_offset
;
cascadeinfo.z
+=
p_offset
;
mean
=
(
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
mean
=
(
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
)
-
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
)
*
correction_t
;
*
correction_t
;
variance_norm_factor
=
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
-
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
variance_norm_factor
=
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
;
-
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
;
variance_norm_factor
=
variance_norm_factor
*
correction_t
-
mean
*
mean
;
variance_norm_factor
=
variance_norm_factor
*
correction_t
-
mean
*
mean
;
variance_norm_factor
=
variance_norm_factor
>=
0.f
?
sqrt
(
variance_norm_factor
)
:
1.f
;
variance_norm_factor
=
variance_norm_factor
>=
0.f
?
sqrt
(
variance_norm_factor
)
:
1.f
;
bool
result
=
true
;
bool
result
=
true
;
nodecounter
=
startnode
+
nodecount
*
scalei
;
nodecounter
=
startnode
+
nodecount
*
scalei
;
for
(
int
stageloop
=
start_stage
; (stageloop < end_stage) && result; stageloop++)
for
(
int
stageloop
=
start_stage
; (stageloop < end_stage) && result; stageloop++)
{
{
float
stage_sum
=
0.f
;
float
stage_sum
=
0.f
;
int
stagecount
=
stagecascadeptr[stageloop].count
;
int
stagecount
=
stagecascadeptr[stageloop].count
;
for
(
int
nodeloop
=
0
; nodeloop < stagecount;
nodeloop++
)
for
(
int
nodeloop
=
0
; nodeloop < stagecount;)
{
{
__global
GpuHidHaarTreeNode
*currentnodeptr
=
(
nodeptr
+
nodecounter
)
;
__global
GpuHidHaarTreeNode
*currentnodeptr
=
(
nodeptr
+
nodecounter
)
;
int4
info1
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[0][0]
))
;
int4
info1
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[0][0]
))
;
int4
info2
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info2
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info3
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[2][0]
))
;
int4
info3
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[2][0]
))
;
float4
w
=
*
(
__global
float4
*
)(
&
(
currentnodeptr->weight[0]
))
;
float4
w
=
*
(
__global
float4
*
)(
&
(
currentnodeptr->weight[0]
))
;
float
2
alpha2
=
*
(
__global
float2
*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
3
alpha3
=
*
(
__global
float3
*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
info1.x
+=
p_offset
;
info1.x
+=
p_offset
;
info1.z
+=
p_offset
;
info1.z
+=
p_offset
;
info2.x
+=
p_offset
;
info2.x
+=
p_offset
;
info2.z
+=
p_offset
;
info2.z
+=
p_offset
;
float
classsum
=
(
sum[clamp
(
mad24
(
info1.y,
step,
info1.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info1.y,
step,
info1.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info1.w,
step,
info1.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info1.w,
step,
info1.z
)
,
0
,
max_idx
)
]
)
*
w.x
;
classsum
+=
(
sum[clamp
(
mad24
(
info2.y,
step,
info2.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.y,
step,
info2.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.w,
step,
info2.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info2.w,
step,
info2.z
)
,
0
,
max_idx
)
]
)
*
w.y
;
info3.x
+=
p_offset
;
info3.x
+=
p_offset
;
info3.z
+=
p_offset
;
info3.z
+=
p_offset
;
classsum
+=
(
sum[clamp
(
mad24
(
info3.y,
step,
info3.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info3.y,
step,
info3.z
)
,
0
,
max_idx
)
]
-
float
classsum
=
(
sum[clamp
(
mad24
(
info1.y,
step,
info1.x
)
,
0
,
max_idx
)
]
sum[clamp
(
mad24
(
info3.w,
step,
info3.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info3.w,
step,
info3.z
)
,
0
,
max_idx
)
]
)
*
w.z
;
-
sum[clamp
(
mad24
(
info1.y,
step,
info1.z
)
,
0
,
max_idx
)
]
-
stage_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
sum[clamp
(
mad24
(
info1.w,
step,
info1.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info1.w,
step,
info1.z
)
,
0
,
max_idx
)
]
)
*
w.x
;
classsum
+=
(
sum[clamp
(
mad24
(
info2.y,
step,
info2.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.y,
step,
info2.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.w,
step,
info2.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info2.w,
step,
info2.z
)
,
0
,
max_idx
)
]
)
*
w.y
;
classsum
+=
(
sum[clamp
(
mad24
(
info3.y,
step,
info3.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info3.y,
step,
info3.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info3.w,
step,
info3.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info3.w,
step,
info3.z
)
,
0
,
max_idx
)
]
)
*
w.z
;
bool
passThres
=
classsum
>=
nodethreshold
;
#
if
STUMP_BASED
stage_sum
+=
passThres
?
alpha3.y
:
alpha3.x
;
nodecounter++
;
nodecounter++
;
nodeloop++
;
#
else
bool
isRootNode
=
(
nodecounter
&
1
)
==
0
;
if
(
isRootNode
)
{
if
(
(
passThres
&&
currentnodeptr->right
)
|
|
(!passThres && currentnodeptr->left))
{
nodecounter ++;
}
else
{
stage_sum += alpha3.x;
nodecounter += 2;
nodeloop ++;
}
}
result
=
(
bool
)(
stage_sum
>=
stagecascadeptr[stageloop].threshold
)
;
}
else
{
stage_sum += (passThres ? alpha3.z : alpha3.y);
nodecounter ++;
nodeloop ++;
}
#endif
}
result = (int)(stage_sum >= stagecascadeptr[stageloop].threshold);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
...
@@ -222,7 +248,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
...
@@ -222,7 +248,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
int queueindex = atomic_inc(lclcount);
int queueindex = atomic_inc(lclcount);
lcloutindex[queueindex] = (y << 16) |
x
;
lcloutindex[queueindex] = (y << 16) |
x
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
queuecount
=
lclcount[0]
;
int
queuecount
=
lclcount[0]
;
...
@@ -277,5 +302,6 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
...
@@ -277,5 +302,6 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
newnode[counter].threshold
=
t1.threshold
;
newnode[counter].threshold
=
t1.threshold
;
newnode[counter].alpha[0]
=
t1.alpha[0]
;
newnode[counter].alpha[0]
=
t1.alpha[0]
;
newnode[counter].alpha[1]
=
t1.alpha[1]
;
newnode[counter].alpha[1]
=
t1.alpha[1]
;
newnode[counter].alpha[2]
=
t1.alpha[2]
;
}
}
modules/ocl/test/test_haar.cpp
View file @
c3f5e737
...
@@ -55,6 +55,12 @@ using namespace testing;
...
@@ -55,6 +55,12 @@ using namespace testing;
using
namespace
std
;
using
namespace
std
;
using
namespace
cv
;
using
namespace
cv
;
extern
string
workdir
;
extern
string
workdir
;
namespace
{
IMPLEMENT_PARAM_CLASS
(
CascadeName
,
std
::
string
);
CascadeName
cascade_frontalface_alt
(
std
::
string
(
"haarcascade_frontalface_alt.xml"
));
CascadeName
cascade_frontalface_alt2
(
std
::
string
(
"haarcascade_frontalface_alt2.xml"
));
struct
getRect
struct
getRect
{
{
Rect
operator
()(
const
CvAvgComp
&
e
)
const
Rect
operator
()(
const
CvAvgComp
&
e
)
const
...
@@ -62,23 +68,24 @@ struct getRect
...
@@ -62,23 +68,24 @@ struct getRect
return
e
.
rect
;
return
e
.
rect
;
}
}
};
};
}
PARAM_TEST_CASE
(
Haar
,
double
,
int
)
PARAM_TEST_CASE
(
Haar
,
double
,
int
,
CascadeName
)
{
{
cv
::
ocl
::
OclCascadeClassifier
cascade
,
nestedCascade
;
cv
::
ocl
::
OclCascadeClassifier
cascade
,
nestedCascade
;
cv
::
ocl
::
OclCascadeClassifierBuf
cascadebuf
;
cv
::
CascadeClassifier
cpucascade
,
cpunestedCascade
;
cv
::
CascadeClassifier
cpucascade
,
cpunestedCascade
;
double
scale
;
double
scale
;
int
flags
;
int
flags
;
std
::
string
cascadeName
;
virtual
void
SetUp
()
virtual
void
SetUp
()
{
{
scale
=
GET_PARAM
(
0
);
scale
=
GET_PARAM
(
0
);
flags
=
GET_PARAM
(
1
);
flags
=
GET_PARAM
(
1
);
string
cascadeName
=
workdir
+
"../../data/haarcascades/haarcascade_frontalface_alt.xml"
;
cascadeName
=
(
workdir
+
"../../data/haarcascades/"
).
append
(
GET_PARAM
(
2
))
;
if
(
(
!
cascade
.
load
(
cascadeName
))
||
(
!
cpucascade
.
load
(
cascadeName
))
||
(
!
cascadebuf
.
load
(
cascadeName
))
)
if
(
(
!
cascade
.
load
(
cascadeName
))
||
(
!
cpucascade
.
load
(
cascadeName
))
)
{
{
cout
<<
"ERROR: Could not load classifier cascade"
<<
endl
;
cout
<<
"ERROR: Could not load classifier cascade"
<<
endl
;
return
;
return
;
...
@@ -136,7 +143,6 @@ TEST_P(Haar, FaceDetectUseBuf)
...
@@ -136,7 +143,6 @@ TEST_P(Haar, FaceDetectUseBuf)
vector
<
Rect
>
faces
,
oclfaces
;
vector
<
Rect
>
faces
,
oclfaces
;
Mat
gray
,
smallImg
(
cvRound
(
img
.
rows
/
scale
),
cvRound
(
img
.
cols
/
scale
),
CV_8UC1
);
Mat
gray
,
smallImg
(
cvRound
(
img
.
rows
/
scale
),
cvRound
(
img
.
cols
/
scale
),
CV_8UC1
);
MemStorage
storage
(
cvCreateMemStorage
(
0
));
cvtColor
(
img
,
gray
,
CV_BGR2GRAY
);
cvtColor
(
img
,
gray
,
CV_BGR2GRAY
);
resize
(
gray
,
smallImg
,
smallImg
.
size
(),
0
,
0
,
INTER_LINEAR
);
resize
(
gray
,
smallImg
,
smallImg
.
size
(),
0
,
0
,
INTER_LINEAR
);
equalizeHist
(
smallImg
,
smallImg
);
equalizeHist
(
smallImg
,
smallImg
);
...
@@ -144,19 +150,31 @@ TEST_P(Haar, FaceDetectUseBuf)
...
@@ -144,19 +150,31 @@ TEST_P(Haar, FaceDetectUseBuf)
cv
::
ocl
::
oclMat
image
;
cv
::
ocl
::
oclMat
image
;
image
.
upload
(
smallImg
);
image
.
upload
(
smallImg
);
cv
::
ocl
::
OclCascadeClassifierBuf
cascadebuf
;
if
(
!
cascadebuf
.
load
(
cascadeName
)
)
{
cout
<<
"ERROR: Could not load classifier cascade for FaceDetectUseBuf!"
<<
endl
;
return
;
}
cascadebuf
.
detectMultiScale
(
image
,
oclfaces
,
1.1
,
3
,
cascadebuf
.
detectMultiScale
(
image
,
oclfaces
,
1.1
,
3
,
flags
,
flags
,
Size
(
30
,
30
),
Size
(
0
,
0
)
);
Size
(
30
,
30
),
Size
(
0
,
0
)
);
cascadebuf
.
release
();
cpucascade
.
detectMultiScale
(
smallImg
,
faces
,
1.1
,
3
,
cpucascade
.
detectMultiScale
(
smallImg
,
faces
,
1.1
,
3
,
flags
,
flags
,
Size
(
30
,
30
),
Size
(
0
,
0
)
);
Size
(
30
,
30
),
Size
(
0
,
0
)
);
EXPECT_EQ
(
faces
.
size
(),
oclfaces
.
size
());
EXPECT_EQ
(
faces
.
size
(),
oclfaces
.
size
());
// intentionally run ocl facedetect again and check if it still works after the first run
cascadebuf
.
detectMultiScale
(
image
,
oclfaces
,
1.1
,
3
,
flags
,
Size
(
30
,
30
));
cascadebuf
.
release
();
EXPECT_EQ
(
faces
.
size
(),
oclfaces
.
size
());
}
}
INSTANTIATE_TEST_CASE_P
(
FaceDetect
,
Haar
,
INSTANTIATE_TEST_CASE_P
(
FaceDetect
,
Haar
,
Combine
(
Values
(
1.0
),
Combine
(
Values
(
1.0
),
Values
(
CV_HAAR_SCALE_IMAGE
,
0
)));
Values
(
CV_HAAR_SCALE_IMAGE
,
0
)
,
Values
(
cascade_frontalface_alt
,
cascade_frontalface_alt2
)
));
#endif // HAVE_OPENCL
#endif // HAVE_OPENCL
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment