Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
07d92d9e
Commit
07d92d9e
authored
Sep 04, 2012
by
Andrey Kamaev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix android build warnings
parent
8325a28d
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
467 additions
and
467 deletions
+467
-467
bowmsctrainer.cpp
modules/contrib/src/bowmsctrainer.cpp
+11
-11
openfabmap.cpp
modules/contrib/src/openfabmap.cpp
+14
-14
haar.cpp
modules/objdetect/src/haar.cpp
+410
-410
pca.cpp
samples/cpp/pca.cpp
+32
-32
No files found.
modules/contrib/src/bowmsctrainer.cpp
View file @
07d92d9e
...
...
@@ -81,46 +81,46 @@ Mat BOWMSCTrainer::cluster() const {
return
cluster
(
mergedDescriptors
);
}
Mat
BOWMSCTrainer
::
cluster
(
const
Mat
&
descriptors
)
const
{
Mat
BOWMSCTrainer
::
cluster
(
const
Mat
&
_
descriptors
)
const
{
CV_Assert
(
!
descriptors
.
empty
());
CV_Assert
(
!
_
descriptors
.
empty
());
// TODO: sort the descriptors before clustering.
Mat
icovar
=
Mat
::
eye
(
descriptors
.
cols
,
descriptors
.
cols
,
descriptors
.
type
());
Mat
icovar
=
Mat
::
eye
(
_descriptors
.
cols
,
_descriptors
.
cols
,
_
descriptors
.
type
());
vector
<
Mat
>
initialCentres
;
initialCentres
.
push_back
(
descriptors
.
row
(
0
));
for
(
int
i
=
1
;
i
<
descriptors
.
rows
;
i
++
)
{
initialCentres
.
push_back
(
_
descriptors
.
row
(
0
));
for
(
int
i
=
1
;
i
<
_
descriptors
.
rows
;
i
++
)
{
double
minDist
=
DBL_MAX
;
for
(
size_t
j
=
0
;
j
<
initialCentres
.
size
();
j
++
)
{
minDist
=
std
::
min
(
minDist
,
cv
::
Mahalanobis
(
descriptors
.
row
(
i
),
initialCentres
[
j
],
cv
::
Mahalanobis
(
_
descriptors
.
row
(
i
),
initialCentres
[
j
],
icovar
));
}
if
(
minDist
>
clusterSize
)
initialCentres
.
push_back
(
descriptors
.
row
(
i
));
initialCentres
.
push_back
(
_
descriptors
.
row
(
i
));
}
std
::
vector
<
std
::
list
<
cv
::
Mat
>
>
clusters
;
clusters
.
resize
(
initialCentres
.
size
());
for
(
int
i
=
0
;
i
<
descriptors
.
rows
;
i
++
)
{
for
(
int
i
=
0
;
i
<
_
descriptors
.
rows
;
i
++
)
{
int
index
=
0
;
double
dist
=
0
,
minDist
=
DBL_MAX
;
for
(
size_t
j
=
0
;
j
<
initialCentres
.
size
();
j
++
)
{
dist
=
cv
::
Mahalanobis
(
descriptors
.
row
(
i
),
initialCentres
[
j
],
icovar
);
dist
=
cv
::
Mahalanobis
(
_
descriptors
.
row
(
i
),
initialCentres
[
j
],
icovar
);
if
(
dist
<
minDist
)
{
minDist
=
dist
;
index
=
(
int
)
j
;
}
}
clusters
[
index
].
push_back
(
descriptors
.
row
(
i
));
clusters
[
index
].
push_back
(
_
descriptors
.
row
(
i
));
}
// TODO: throw away small clusters.
Mat
vocabulary
;
Mat
centre
=
Mat
::
zeros
(
1
,
descriptors
.
cols
,
descriptors
.
type
());
Mat
centre
=
Mat
::
zeros
(
1
,
_descriptors
.
cols
,
_
descriptors
.
type
());
for
(
size_t
i
=
0
;
i
<
clusters
.
size
();
i
++
)
{
centre
.
setTo
(
0
);
for
(
std
::
list
<
cv
::
Mat
>::
iterator
Ci
=
clusters
[
i
].
begin
();
Ci
!=
clusters
[
i
].
end
();
Ci
++
)
{
...
...
modules/contrib/src/openfabmap.cpp
View file @
07d92d9e
...
...
@@ -63,7 +63,7 @@ namespace of2 {
static
double
logsumexp
(
double
a
,
double
b
)
{
return
a
>
b
?
log
(
1
+
exp
(
b
-
a
))
+
a
:
log
(
1
+
exp
(
a
-
b
))
+
b
;
}
FabMap
::
FabMap
(
const
Mat
&
_clTree
,
double
_PzGe
,
double
_PzGNe
,
int
_flags
,
int
_numSamples
)
:
clTree
(
_clTree
),
PzGe
(
_PzGe
),
PzGNe
(
_PzGNe
),
flags
(
...
...
@@ -445,16 +445,16 @@ FabMap1::~FabMap1() {
}
void
FabMap1
::
getLikelihoods
(
const
Mat
&
queryImgDescriptor
,
const
vector
<
Mat
>&
testIm
g
Descriptors
,
vector
<
IMatch
>&
matches
)
{
const
vector
<
Mat
>&
testIm
age
Descriptors
,
vector
<
IMatch
>&
matches
)
{
for
(
size_t
i
=
0
;
i
<
testIm
g
Descriptors
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
testIm
age
Descriptors
.
size
();
i
++
)
{
bool
zq
,
zpq
,
Lzq
;
double
logP
=
0
;
for
(
int
q
=
0
;
q
<
clTree
.
cols
;
q
++
)
{
zq
=
queryImgDescriptor
.
at
<
float
>
(
0
,
q
)
>
0
;
zpq
=
queryImgDescriptor
.
at
<
float
>
(
0
,
pq
(
q
))
>
0
;
Lzq
=
testIm
g
Descriptors
[
i
].
at
<
float
>
(
0
,
q
)
>
0
;
Lzq
=
testIm
age
Descriptors
[
i
].
at
<
float
>
(
0
,
q
)
>
0
;
logP
+=
log
((
this
->*
PzGL
)(
q
,
zq
,
zpq
,
Lzq
));
...
...
@@ -490,16 +490,16 @@ FabMapLUT::~FabMapLUT() {
}
void
FabMapLUT
::
getLikelihoods
(
const
Mat
&
queryImgDescriptor
,
const
vector
<
Mat
>&
testIm
g
Descriptors
,
vector
<
IMatch
>&
matches
)
{
const
vector
<
Mat
>&
testIm
age
Descriptors
,
vector
<
IMatch
>&
matches
)
{
double
precFactor
=
(
double
)
pow
(
10.0
,
-
precision
);
for
(
size_t
i
=
0
;
i
<
testIm
g
Descriptors
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
testIm
age
Descriptors
.
size
();
i
++
)
{
unsigned
long
long
int
logP
=
0
;
for
(
int
q
=
0
;
q
<
clTree
.
cols
;
q
++
)
{
logP
+=
table
[
q
][(
queryImgDescriptor
.
at
<
float
>
(
0
,
pq
(
q
))
>
0
)
+
((
queryImgDescriptor
.
at
<
float
>
(
0
,
q
)
>
0
)
<<
1
)
+
((
testIm
g
Descriptors
[
i
].
at
<
float
>
(
0
,
q
)
>
0
)
<<
2
)];
((
testIm
age
Descriptors
[
i
].
at
<
float
>
(
0
,
q
)
>
0
)
<<
2
)];
}
matches
.
push_back
(
IMatch
(
0
,(
int
)
i
,
-
precFactor
*
(
double
)
logP
,
0
));
}
...
...
@@ -518,7 +518,7 @@ FabMapFBO::~FabMapFBO() {
}
void
FabMapFBO
::
getLikelihoods
(
const
Mat
&
queryImgDescriptor
,
const
vector
<
Mat
>&
testIm
g
Descriptors
,
vector
<
IMatch
>&
matches
)
{
const
vector
<
Mat
>&
testIm
age
Descriptors
,
vector
<
IMatch
>&
matches
)
{
std
::
multiset
<
WordStats
>
wordData
;
setWordStatistics
(
queryImgDescriptor
,
wordData
);
...
...
@@ -526,7 +526,7 @@ void FabMapFBO::getLikelihoods(const Mat& queryImgDescriptor,
vector
<
int
>
matchIndices
;
vector
<
IMatch
>
queryMatches
;
for
(
size_t
i
=
0
;
i
<
testIm
g
Descriptors
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
testIm
age
Descriptors
.
size
();
i
++
)
{
queryMatches
.
push_back
(
IMatch
(
0
,(
int
)
i
,
0
,
0
));
matchIndices
.
push_back
((
int
)
i
);
}
...
...
@@ -543,7 +543,7 @@ void FabMapFBO::getLikelihoods(const Mat& queryImgDescriptor,
for
(
size_t
i
=
0
;
i
<
matchIndices
.
size
();
i
++
)
{
bool
Lzq
=
testIm
g
Descriptors
[
matchIndices
[
i
]].
at
<
float
>
(
0
,
wordIter
->
q
)
>
0
;
testIm
age
Descriptors
[
matchIndices
[
i
]].
at
<
float
>
(
0
,
wordIter
->
q
)
>
0
;
queryMatches
[
matchIndices
[
i
]].
likelihood
+=
log
((
this
->*
PzGL
)(
wordIter
->
q
,
zq
,
zpq
,
Lzq
));
currBest
=
...
...
@@ -689,17 +689,17 @@ void FabMap2::add(const vector<Mat>& queryImgDescriptors) {
}
void
FabMap2
::
getLikelihoods
(
const
Mat
&
queryImgDescriptor
,
const
vector
<
Mat
>&
testIm
g
Descriptors
,
vector
<
IMatch
>&
matches
)
{
const
vector
<
Mat
>&
testIm
age
Descriptors
,
vector
<
IMatch
>&
matches
)
{
if
(
&
testIm
gDescriptors
==
&
(
this
->
testImgDescriptors
)
)
{
if
(
&
testIm
ageDescriptors
==
&
testImgDescriptors
)
{
getIndexLikelihoods
(
queryImgDescriptor
,
testDefaults
,
testInvertedMap
,
matches
);
}
else
{
CV_Assert
(
!
(
flags
&
MOTION_MODEL
));
vector
<
double
>
defaults
;
std
::
map
<
int
,
vector
<
int
>
>
invertedMap
;
for
(
size_t
i
=
0
;
i
<
testIm
g
Descriptors
.
size
();
i
++
)
{
addToIndex
(
testIm
g
Descriptors
[
i
],
defaults
,
invertedMap
);
for
(
size_t
i
=
0
;
i
<
testIm
age
Descriptors
.
size
();
i
++
)
{
addToIndex
(
testIm
age
Descriptors
[
i
],
defaults
,
invertedMap
);
}
getIndexLikelihoods
(
queryImgDescriptor
,
defaults
,
invertedMap
,
matches
);
}
...
...
modules/objdetect/src/haar.cpp
View file @
07d92d9e
...
...
@@ -47,18 +47,18 @@
#if CV_SSE2 || CV_SSE3
# if !CV_SSE4_1 && !CV_SSE4_2
#
define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m))
# define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m))
#
define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m))
# define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m))
# endif
#endif
#
if CV_AVX
#
define CV_HAAR_USE_AVX 1
#
else
#
if CV_SSE2 || CV_SSE3
#
define CV_HAAR_USE_SSE 1
#
endif
#
endif
#
if CV_AVX
#
define CV_HAAR_USE_AVX 1
#
else
#
if CV_SSE2 || CV_SSE3
#
define CV_HAAR_USE_SSE 1
#
endif
#
endif
/* these settings affect the quality of detection: change with care */
#define CV_ADJUST_FEATURES 1
...
...
@@ -634,86 +634,86 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
//AVX version icvEvalHidHaarClassifier. Process 8 CvHidHaarClassifiers per call. Check AVX support before invocation!!
#ifdef CV_HAAR_USE_AVX
#ifdef CV_HAAR_USE_AVX
CV_INLINE
double
icvEvalHidHaarClassifierAVX
(
CvHidHaarClassifier
*
classifier
,
double
variance_norm_factor
,
size_t
p_offset
)
double
variance_norm_factor
,
size_t
p_offset
)
{
int
CV_DECL_ALIGNED
(
32
)
idxV
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
char
flags
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
CvHidHaarTreeNode
*
nodes
[
8
];
double
res
=
0
;
char
exitConditionFlag
=
0
;
for
(;;)
{
float
CV_DECL_ALIGNED
(
32
)
tmp
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
nodes
[
0
]
=
classifier
->
node
+
idxV
[
0
];
nodes
[
1
]
=
(
classifier
+
1
)
->
node
+
idxV
[
1
];
nodes
[
2
]
=
(
classifier
+
2
)
->
node
+
idxV
[
2
];
nodes
[
3
]
=
(
classifier
+
3
)
->
node
+
idxV
[
3
];
nodes
[
4
]
=
(
classifier
+
4
)
->
node
+
idxV
[
4
];
nodes
[
5
]
=
(
classifier
+
5
)
->
node
+
idxV
[
5
];
nodes
[
6
]
=
(
classifier
+
6
)
->
node
+
idxV
[
6
];
nodes
[
7
]
=
(
classifier
+
7
)
->
node
+
idxV
[
7
];
__m256
t
=
_mm256_set1_ps
(
variance_norm_factor
);
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
if
(
nodes
[
0
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
0
]
=
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
0
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
1
]
->
feature
.
rect
[
2
].
p0
)
int
CV_DECL_ALIGNED
(
32
)
idxV
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
char
flags
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
CvHidHaarTreeNode
*
nodes
[
8
];
double
res
=
0
;
char
exitConditionFlag
=
0
;
for
(;;)
{
float
CV_DECL_ALIGNED
(
32
)
tmp
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
nodes
[
0
]
=
classifier
->
node
+
idxV
[
0
];
nodes
[
1
]
=
(
classifier
+
1
)
->
node
+
idxV
[
1
];
nodes
[
2
]
=
(
classifier
+
2
)
->
node
+
idxV
[
2
];
nodes
[
3
]
=
(
classifier
+
3
)
->
node
+
idxV
[
3
];
nodes
[
4
]
=
(
classifier
+
4
)
->
node
+
idxV
[
4
];
nodes
[
5
]
=
(
classifier
+
5
)
->
node
+
idxV
[
5
];
nodes
[
6
]
=
(
classifier
+
6
)
->
node
+
idxV
[
6
];
nodes
[
7
]
=
(
classifier
+
7
)
->
node
+
idxV
[
7
];
__m256
t
=
_mm256_set1_ps
(
variance_norm_factor
);
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
if
(
nodes
[
0
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
0
]
=
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
0
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
1
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
1
]
=
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
1
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
2
]
->
feature
.
rect
[
2
].
p0
)
if
(
nodes
[
2
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
2
]
=
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
2
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
3
]
->
feature
.
rect
[
2
].
p0
)
if
(
nodes
[
3
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
3
]
=
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
3
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
4
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
4
]
=
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
4
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
5
]
->
feature
.
rect
[
2
].
p0
)
if
(
nodes
[
4
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
4
]
=
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
4
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
5
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
5
]
=
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
5
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
6
]
->
feature
.
rect
[
2
].
p0
)
if
(
nodes
[
6
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
6
]
=
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
6
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
7
]
->
feature
.
rect
[
2
].
p0
)
if
(
nodes
[
7
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
7
]
=
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
7
]
->
feature
.
rect
[
2
].
weight
;
sum
=
_mm256_add_ps
(
sum
,
_mm256_load_ps
(
tmp
));
__m256
left
=
_mm256_set_ps
(
nodes
[
7
]
->
left
,
nodes
[
6
]
->
left
,
nodes
[
5
]
->
left
,
nodes
[
4
]
->
left
,
nodes
[
3
]
->
left
,
nodes
[
2
]
->
left
,
nodes
[
1
]
->
left
,
nodes
[
0
]
->
left
);
__m256
right
=
_mm256_set_ps
(
nodes
[
7
]
->
right
,
nodes
[
6
]
->
right
,
nodes
[
5
]
->
right
,
nodes
[
4
]
->
right
,
nodes
[
3
]
->
right
,
nodes
[
2
]
->
right
,
nodes
[
1
]
->
right
,
nodes
[
0
]
->
right
);
_mm256_store_si256
((
__m256i
*
)
idxV
,
_mm256_cvttps_epi32
(
_mm256_blendv_ps
(
right
,
left
,
_mm256_cmp_ps
(
sum
,
t
,
_CMP_LT_OQ
))));
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
if
(
idxV
[
i
]
<=
0
)
{
if
(
!
flags
[
i
])
{
exitConditionFlag
++
;
flags
[
i
]
=
1
;
res
+=
((
classifier
+
i
)
->
alpha
[
-
idxV
[
i
]]);
}
idxV
[
i
]
=
0
;
}
}
if
(
exitConditionFlag
==
8
)
return
res
;
}
sum
=
_mm256_add_ps
(
sum
,
_mm256_load_ps
(
tmp
));
__m256
left
=
_mm256_set_ps
(
nodes
[
7
]
->
left
,
nodes
[
6
]
->
left
,
nodes
[
5
]
->
left
,
nodes
[
4
]
->
left
,
nodes
[
3
]
->
left
,
nodes
[
2
]
->
left
,
nodes
[
1
]
->
left
,
nodes
[
0
]
->
left
);
__m256
right
=
_mm256_set_ps
(
nodes
[
7
]
->
right
,
nodes
[
6
]
->
right
,
nodes
[
5
]
->
right
,
nodes
[
4
]
->
right
,
nodes
[
3
]
->
right
,
nodes
[
2
]
->
right
,
nodes
[
1
]
->
right
,
nodes
[
0
]
->
right
);
_mm256_store_si256
((
__m256i
*
)
idxV
,
_mm256_cvttps_epi32
(
_mm256_blendv_ps
(
right
,
left
,
_mm256_cmp_ps
(
sum
,
t
,
_CMP_LT_OQ
))));
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
if
(
idxV
[
i
]
<=
0
)
{
if
(
!
flags
[
i
])
{
exitConditionFlag
++
;
flags
[
i
]
=
1
;
res
+=
((
classifier
+
i
)
->
alpha
[
-
idxV
[
i
]]);
}
idxV
[
i
]
=
0
;
}
}
if
(
exitConditionFlag
==
8
)
return
res
;
}
}
#endif
...
...
@@ -723,50 +723,50 @@ double icvEvalHidHaarClassifier( CvHidHaarClassifier* classifier,
size_t
p_offset
)
{
int
idx
=
0
;
/*#if CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX
if(cv::checkHardwareSupport(CV_CPU_SSE2))//based on old SSE variant. Works slow
{
double CV_DECL_ALIGNED(16) temp[2];
__m128d zero = _mm_setzero_pd();
do
{
CvHidHaarTreeNode* node = classifier->node + idx;
__m128d t = _mm_set1_pd((node->threshold)*variance_norm_factor);
__m128d left = _mm_set1_pd(node->left);
__m128d right = _mm_set1_pd(node->right);
double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
_sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
_sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
__m128d sum = _mm_set1_pd(_sum);
t = _mm_cmplt_sd(sum, t);
sum = _mm_blendv_pd(right, left, t);
_mm_store_pd(temp, sum);
idx = (int)temp[0];
}
while(idx > 0 );
}
else
#endif*/
/*#if CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX
if(cv::checkHardwareSupport(CV_CPU_SSE2))//based on old SSE variant. Works slow
{
double CV_DECL_ALIGNED(16) temp[2];
__m128d zero = _mm_setzero_pd();
do
{
CvHidHaarTreeNode* node = classifier->node + idx;
__m128d t = _mm_set1_pd((node->threshold)*variance_norm_factor);
__m128d left = _mm_set1_pd(node->left);
__m128d right = _mm_set1_pd(node->right);
double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
_sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
_sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
__m128d sum = _mm_set1_pd(_sum);
t = _mm_cmplt_sd(sum, t);
sum = _mm_blendv_pd(right, left, t);
_mm_store_pd(temp, sum);
idx = (int)temp[0];
}
while(idx > 0 );
}
else
#endif*/
{
do
{
do
{
CvHidHaarTreeNode
*
node
=
classifier
->
node
+
idx
;
double
t
=
node
->
threshold
*
variance_norm_factor
;
double
sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
double
sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
if
(
node
->
feature
.
rect
[
2
].
p0
)
sum
+=
calc_sum
(
node
->
feature
.
rect
[
2
],
p_offset
)
*
node
->
feature
.
rect
[
2
].
weight
;
if
(
node
->
feature
.
rect
[
2
].
p0
)
sum
+=
calc_sum
(
node
->
feature
.
rect
[
2
],
p_offset
)
*
node
->
feature
.
rect
[
2
].
weight
;
idx
=
sum
<
t
?
node
->
left
:
node
->
right
;
}
while
(
idx
>
0
);
idx
=
sum
<
t
?
node
->
left
:
node
->
right
;
}
while
(
idx
>
0
);
}
return
classifier
->
alpha
[
-
idx
];
}
...
...
@@ -777,18 +777,18 @@ static int
cvRunHaarClassifierCascadeSum
(
const
CvHaarClassifierCascade
*
_cascade
,
CvPoint
pt
,
double
&
stage_sum
,
int
start_stage
)
{
#ifdef CV_HAAR_USE_AVX
bool
haveAVX
=
false
;
if
(
cv
::
checkHardwareSupport
(
CV_CPU_AVX
))
if
(
_xgetbv
(
_XCR_XFEATURE_ENABLED_MASK
)
&
0x6
)
// Check if the OS will save the YMM registers
{
haveAVX
=
true
;
}
#else
#ifdef CV_HAAR_USE_SSE
bool
haveSSE2
=
cv
::
checkHardwareSupport
(
CV_CPU_SSE2
);
#endif
#endif
#ifdef CV_HAAR_USE_AVX
bool
haveAVX
=
false
;
if
(
cv
::
checkHardwareSupport
(
CV_CPU_AVX
))
if
(
_xgetbv
(
_XCR_XFEATURE_ENABLED_MASK
)
&
0x6
)
// Check if the OS will save the YMM registers
{
haveAVX
=
true
;
}
#else
#ifdef CV_HAAR_USE_SSE
bool
haveSSE2
=
cv
::
checkHardwareSupport
(
CV_CPU_SSE2
);
#endif
#endif
int
p_offset
,
pq_offset
;
int
i
,
j
;
...
...
@@ -828,17 +828,17 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
{
stage_sum
=
0.0
;
#ifdef CV_HAAR_USE_AVX
if
(
haveAVX
)
{
for
(
;
j
<
cascade
->
stage_classifier
[
i
].
count
-
8
;
j
+=
8
)
{
stage_sum
+=
icvEvalHidHaarClassifierAVX
(
cascade
->
stage_classifier
[
i
].
classifier
+
j
,
variance_norm_factor
,
p_offset
);
}
}
#endif
#ifdef CV_HAAR_USE_AVX
if
(
haveAVX
)
{
for
(
;
j
<
cascade
->
stage_classifier
[
i
].
count
-
8
;
j
+=
8
)
{
stage_sum
+=
icvEvalHidHaarClassifierAVX
(
cascade
->
stage_classifier
[
i
].
classifier
+
j
,
variance_norm_factor
,
p_offset
);
}
}
#endif
for
(
j
=
0
;
j
<
ptr
->
count
;
j
++
)
{
stage_sum
+=
icvEvalHidHaarClassifier
(
ptr
->
classifier
+
j
,
variance_norm_factor
,
p_offset
);
...
...
@@ -859,283 +859,283 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
}
else
if
(
cascade
->
isStumpBased
)
{
#ifdef CV_HAAR_USE_AVX
if
(
haveAVX
)
{
CvHidHaarClassifier
*
classifiers
[
8
];
CvHidHaarTreeNode
*
nodes
[
8
];
for
(
i
=
start_stage
;
i
<
cascade
->
count
;
i
++
)
{
stage_sum
=
0.0
;
int
j
=
0
;
float
CV_DECL_ALIGNED
(
32
)
buf
[
8
];
if
(
cascade
->
stage_classifier
[
i
].
two_rects
)
{
for
(
;
j
<=
cascade
->
stage_classifier
[
i
].
count
-
8
;
j
+=
8
)
{
//__m256 stage_sumPart = _mm256_setzero_ps();
classifiers
[
0
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
nodes
[
0
]
=
classifiers
[
0
]
->
node
;
classifiers
[
1
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
1
;
nodes
[
1
]
=
classifiers
[
1
]
->
node
;
classifiers
[
2
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
2
;
nodes
[
2
]
=
classifiers
[
2
]
->
node
;
classifiers
[
3
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
3
;
nodes
[
3
]
=
classifiers
[
3
]
->
node
;
classifiers
[
4
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
4
;
nodes
[
4
]
=
classifiers
[
4
]
->
node
;
classifiers
[
5
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
5
;
nodes
[
5
]
=
classifiers
[
5
]
->
node
;
classifiers
[
6
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
6
;
nodes
[
6
]
=
classifiers
[
6
]
->
node
;
classifiers
[
7
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
7
;
nodes
[
7
]
=
classifiers
[
7
]
->
node
;
__m256
t
=
_mm256_set1_ps
(
variance_norm_factor
);
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
__m256
alpha0
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
0
],
classifiers
[
6
]
->
alpha
[
0
],
classifiers
[
5
]
->
alpha
[
0
],
classifiers
[
4
]
->
alpha
[
0
],
classifiers
[
3
]
->
alpha
[
0
],
classifiers
[
2
]
->
alpha
[
0
],
classifiers
[
1
]
->
alpha
[
0
],
classifiers
[
0
]
->
alpha
[
0
]);
__m256
alpha1
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
1
],
classifiers
[
6
]
->
alpha
[
1
],
classifiers
[
5
]
->
alpha
[
1
],
classifiers
[
4
]
->
alpha
[
1
],
classifiers
[
3
]
->
alpha
[
1
],
classifiers
[
2
]
->
alpha
[
1
],
classifiers
[
1
]
->
alpha
[
1
],
classifiers
[
0
]
->
alpha
[
1
]);
_mm256_store_ps
(
buf
,
_mm256_blendv_ps
(
alpha0
,
alpha1
,
_mm256_cmp_ps
(
t
,
sum
,
_CMP_LE_OQ
)));
stage_sum
+=
(
buf
[
0
]
+
buf
[
1
]
+
buf
[
2
]
+
buf
[
3
]
+
buf
[
4
]
+
buf
[
5
]
+
buf
[
6
]
+
buf
[
7
]);
}
for
(
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
double
t
=
node
->
threshold
*
variance_norm_factor
;
double
sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
stage_sum
+=
classifier
->
alpha
[
sum
>=
t
];
}
}
else
{
for
(
;
j
<=
(
cascade
->
stage_classifier
[
i
].
count
)
-
8
;
j
+=
8
)
{
float
CV_DECL_ALIGNED
(
32
)
tmp
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
classifiers
[
0
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
nodes
[
0
]
=
classifiers
[
0
]
->
node
;
classifiers
[
1
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
1
;
nodes
[
1
]
=
classifiers
[
1
]
->
node
;
classifiers
[
2
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
2
;
nodes
[
2
]
=
classifiers
[
2
]
->
node
;
classifiers
[
3
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
3
;
nodes
[
3
]
=
classifiers
[
3
]
->
node
;
classifiers
[
4
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
4
;
nodes
[
4
]
=
classifiers
[
4
]
->
node
;
classifiers
[
5
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
5
;
nodes
[
5
]
=
classifiers
[
5
]
->
node
;
classifiers
[
6
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
6
;
nodes
[
6
]
=
classifiers
[
6
]
->
node
;
classifiers
[
7
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
7
;
nodes
[
7
]
=
classifiers
[
7
]
->
node
;
__m256
t
=
_mm256_set1_ps
(
variance_norm_factor
);
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
if
(
nodes
[
0
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
0
]
=
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
0
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
1
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
1
]
=
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
1
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
2
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
2
]
=
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
2
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
3
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
3
]
=
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
3
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
4
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
4
]
=
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
4
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
5
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
5
]
=
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
5
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
6
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
6
]
=
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
6
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
7
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
7
]
=
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
7
]
->
feature
.
rect
[
2
].
weight
;
sum
=
_mm256_add_ps
(
sum
,
_mm256_load_ps
(
tmp
));
__m256
alpha0
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
0
],
classifiers
[
6
]
->
alpha
[
0
],
classifiers
[
5
]
->
alpha
[
0
],
classifiers
[
4
]
->
alpha
[
0
],
classifiers
[
3
]
->
alpha
[
0
],
classifiers
[
2
]
->
alpha
[
0
],
classifiers
[
1
]
->
alpha
[
0
],
classifiers
[
0
]
->
alpha
[
0
]);
__m256
alpha1
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
1
],
classifiers
[
6
]
->
alpha
[
1
],
classifiers
[
5
]
->
alpha
[
1
],
classifiers
[
4
]
->
alpha
[
1
],
classifiers
[
3
]
->
alpha
[
1
],
classifiers
[
2
]
->
alpha
[
1
],
classifiers
[
1
]
->
alpha
[
1
],
classifiers
[
0
]
->
alpha
[
1
]);
__m256
outBuf
=
_mm256_blendv_ps
(
alpha0
,
alpha1
,
_mm256_cmp_ps
(
t
,
sum
,
_CMP_LE_OQ
));
outBuf
=
_mm256_hadd_ps
(
outBuf
,
outBuf
);
outBuf
=
_mm256_hadd_ps
(
outBuf
,
outBuf
);
_mm256_store_ps
(
buf
,
outBuf
);
stage_sum
+=
(
buf
[
0
]
+
buf
[
4
]);
//(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
}
for
(
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
double
t
=
node
->
threshold
*
variance_norm_factor
;
double
sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
if
(
node
->
feature
.
rect
[
2
].
p0
)
sum
+=
calc_sum
(
node
->
feature
.
rect
[
2
],
p_offset
)
*
node
->
feature
.
rect
[
2
].
weight
;
stage_sum
+=
classifier
->
alpha
[
sum
>=
t
];
}
}
if
(
stage_sum
<
cascade
->
stage_classifier
[
i
].
threshold
)
return
-
i
;
}
}
else
#endif
#ifdef
CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX //old SSE optimization
if
(
haveSSE2
)
{
for
(
i
=
start_stage
;
i
<
cascade
->
count
;
i
++
)
{
__m128d
stage_sum
=
_mm_setzero_pd
();
if
(
cascade
->
stage_classifier
[
i
].
two_rects
)
{
for
(
j
=
0
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
// ayasin - NHM perf optim. Avoid use of costly flaky jcc
__m128d
t
=
_mm_set_sd
(
node
->
threshold
*
variance_norm_factor
);
__m128d
a
=
_mm_set_sd
(
classifier
->
alpha
[
0
]);
__m128d
b
=
_mm_set_sd
(
classifier
->
alpha
[
1
]);
__m128d
sum
=
_mm_set_sd
(
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
+
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
);
t
=
_mm_cmpgt_sd
(
t
,
sum
);
stage_sum
=
_mm_add_sd
(
stage_sum
,
_mm_blendv_pd
(
b
,
a
,
t
));
}
}
else
{
for
(
j
=
0
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
// ayasin - NHM perf optim. Avoid use of costly flaky jcc
__m128d
t
=
_mm_set_sd
(
node
->
threshold
*
variance_norm_factor
);
__m128d
a
=
_mm_set_sd
(
classifier
->
alpha
[
0
]);
__m128d
b
=
_mm_set_sd
(
classifier
->
alpha
[
1
]);
double
_sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
_sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
if
(
node
->
feature
.
rect
[
2
].
p0
)
_sum
+=
calc_sum
(
node
->
feature
.
rect
[
2
],
p_offset
)
*
node
->
feature
.
rect
[
2
].
weight
;
__m128d
sum
=
_mm_set_sd
(
_sum
);
t
=
_mm_cmpgt_sd
(
t
,
sum
);
stage_sum
=
_mm_add_sd
(
stage_sum
,
_mm_blendv_pd
(
b
,
a
,
t
));
}
}
__m128d
i_threshold
=
_mm_set1_pd
(
cascade
->
stage_classifier
[
i
].
threshold
);
if
(
_mm_comilt_sd
(
stage_sum
,
i_threshold
)
)
return
-
i
;
}
}
else
#endif
{
for
(
i
=
start_stage
;
i
<
cascade
->
count
;
i
++
)
{
stage_sum
=
0.0
;
if
(
cascade
->
stage_classifier
[
i
].
two_rects
)
{
for
(
j
=
0
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
double
t
=
node
->
threshold
*
variance_norm_factor
;
double
sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
stage_sum
+=
classifier
->
alpha
[
sum
>=
t
];
}
}
else
{
for
(
j
=
0
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
double
t
=
node
->
threshold
*
variance_norm_factor
;
double
sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
if
(
node
->
feature
.
rect
[
2
].
p0
)
sum
+=
calc_sum
(
node
->
feature
.
rect
[
2
],
p_offset
)
*
node
->
feature
.
rect
[
2
].
weight
;
stage_sum
+=
classifier
->
alpha
[
sum
>=
t
];
}
}
if
(
stage_sum
<
cascade
->
stage_classifier
[
i
].
threshold
)
return
-
i
;
}
}
}
else
#ifdef CV_HAAR_USE_AVX
if
(
haveAVX
)
{
CvHidHaarClassifier
*
classifiers
[
8
];
CvHidHaarTreeNode
*
nodes
[
8
];
for
(
i
=
start_stage
;
i
<
cascade
->
count
;
i
++
)
{
stage_sum
=
0.0
;
int
j
=
0
;
float
CV_DECL_ALIGNED
(
32
)
buf
[
8
];
if
(
cascade
->
stage_classifier
[
i
].
two_rects
)
{
for
(
;
j
<=
cascade
->
stage_classifier
[
i
].
count
-
8
;
j
+=
8
)
{
//__m256 stage_sumPart = _mm256_setzero_ps();
classifiers
[
0
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
nodes
[
0
]
=
classifiers
[
0
]
->
node
;
classifiers
[
1
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
1
;
nodes
[
1
]
=
classifiers
[
1
]
->
node
;
classifiers
[
2
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
2
;
nodes
[
2
]
=
classifiers
[
2
]
->
node
;
classifiers
[
3
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
3
;
nodes
[
3
]
=
classifiers
[
3
]
->
node
;
classifiers
[
4
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
4
;
nodes
[
4
]
=
classifiers
[
4
]
->
node
;
classifiers
[
5
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
5
;
nodes
[
5
]
=
classifiers
[
5
]
->
node
;
classifiers
[
6
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
6
;
nodes
[
6
]
=
classifiers
[
6
]
->
node
;
classifiers
[
7
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
7
;
nodes
[
7
]
=
classifiers
[
7
]
->
node
;
__m256
t
=
_mm256_set1_ps
(
variance_norm_factor
);
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
__m256
alpha0
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
0
],
classifiers
[
6
]
->
alpha
[
0
],
classifiers
[
5
]
->
alpha
[
0
],
classifiers
[
4
]
->
alpha
[
0
],
classifiers
[
3
]
->
alpha
[
0
],
classifiers
[
2
]
->
alpha
[
0
],
classifiers
[
1
]
->
alpha
[
0
],
classifiers
[
0
]
->
alpha
[
0
]);
__m256
alpha1
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
1
],
classifiers
[
6
]
->
alpha
[
1
],
classifiers
[
5
]
->
alpha
[
1
],
classifiers
[
4
]
->
alpha
[
1
],
classifiers
[
3
]
->
alpha
[
1
],
classifiers
[
2
]
->
alpha
[
1
],
classifiers
[
1
]
->
alpha
[
1
],
classifiers
[
0
]
->
alpha
[
1
]);
_mm256_store_ps
(
buf
,
_mm256_blendv_ps
(
alpha0
,
alpha1
,
_mm256_cmp_ps
(
t
,
sum
,
_CMP_LE_OQ
)));
stage_sum
+=
(
buf
[
0
]
+
buf
[
1
]
+
buf
[
2
]
+
buf
[
3
]
+
buf
[
4
]
+
buf
[
5
]
+
buf
[
6
]
+
buf
[
7
]);
}
for
(
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
double
t
=
node
->
threshold
*
variance_norm_factor
;
double
sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
stage_sum
+=
classifier
->
alpha
[
sum
>=
t
];
}
}
else
{
for
(
;
j
<=
(
cascade
->
stage_classifier
[
i
].
count
)
-
8
;
j
+=
8
)
{
float
CV_DECL_ALIGNED
(
32
)
tmp
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
classifiers
[
0
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
nodes
[
0
]
=
classifiers
[
0
]
->
node
;
classifiers
[
1
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
1
;
nodes
[
1
]
=
classifiers
[
1
]
->
node
;
classifiers
[
2
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
2
;
nodes
[
2
]
=
classifiers
[
2
]
->
node
;
classifiers
[
3
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
3
;
nodes
[
3
]
=
classifiers
[
3
]
->
node
;
classifiers
[
4
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
4
;
nodes
[
4
]
=
classifiers
[
4
]
->
node
;
classifiers
[
5
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
5
;
nodes
[
5
]
=
classifiers
[
5
]
->
node
;
classifiers
[
6
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
6
;
nodes
[
6
]
=
classifiers
[
6
]
->
node
;
classifiers
[
7
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
7
;
nodes
[
7
]
=
classifiers
[
7
]
->
node
;
__m256
t
=
_mm256_set1_ps
(
variance_norm_factor
);
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
if
(
nodes
[
0
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
0
]
=
calc_sum
(
nodes
[
0
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
0
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
1
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
1
]
=
calc_sum
(
nodes
[
1
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
1
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
2
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
2
]
=
calc_sum
(
nodes
[
2
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
2
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
3
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
3
]
=
calc_sum
(
nodes
[
3
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
3
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
4
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
4
]
=
calc_sum
(
nodes
[
4
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
4
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
5
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
5
]
=
calc_sum
(
nodes
[
5
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
5
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
6
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
6
]
=
calc_sum
(
nodes
[
6
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
6
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
7
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
7
]
=
calc_sum
(
nodes
[
7
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
7
]
->
feature
.
rect
[
2
].
weight
;
sum
=
_mm256_add_ps
(
sum
,
_mm256_load_ps
(
tmp
));
__m256
alpha0
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
0
],
classifiers
[
6
]
->
alpha
[
0
],
classifiers
[
5
]
->
alpha
[
0
],
classifiers
[
4
]
->
alpha
[
0
],
classifiers
[
3
]
->
alpha
[
0
],
classifiers
[
2
]
->
alpha
[
0
],
classifiers
[
1
]
->
alpha
[
0
],
classifiers
[
0
]
->
alpha
[
0
]);
__m256
alpha1
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
1
],
classifiers
[
6
]
->
alpha
[
1
],
classifiers
[
5
]
->
alpha
[
1
],
classifiers
[
4
]
->
alpha
[
1
],
classifiers
[
3
]
->
alpha
[
1
],
classifiers
[
2
]
->
alpha
[
1
],
classifiers
[
1
]
->
alpha
[
1
],
classifiers
[
0
]
->
alpha
[
1
]);
__m256
outBuf
=
_mm256_blendv_ps
(
alpha0
,
alpha1
,
_mm256_cmp_ps
(
t
,
sum
,
_CMP_LE_OQ
));
outBuf
=
_mm256_hadd_ps
(
outBuf
,
outBuf
);
outBuf
=
_mm256_hadd_ps
(
outBuf
,
outBuf
);
_mm256_store_ps
(
buf
,
outBuf
);
stage_sum
+=
(
buf
[
0
]
+
buf
[
4
]);
//(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
}
for
(
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
double
t
=
node
->
threshold
*
variance_norm_factor
;
double
sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
if
(
node
->
feature
.
rect
[
2
].
p0
)
sum
+=
calc_sum
(
node
->
feature
.
rect
[
2
],
p_offset
)
*
node
->
feature
.
rect
[
2
].
weight
;
stage_sum
+=
classifier
->
alpha
[
sum
>=
t
];
}
}
if
(
stage_sum
<
cascade
->
stage_classifier
[
i
].
threshold
)
return
-
i
;
}
}
else
#endif
#if defined CV_HAAR_USE_SSE &&
CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX //old SSE optimization
if
(
haveSSE2
)
{
for
(
i
=
start_stage
;
i
<
cascade
->
count
;
i
++
)
{
__m128d
stage_sum
=
_mm_setzero_pd
();
if
(
cascade
->
stage_classifier
[
i
].
two_rects
)
{
for
(
j
=
0
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
// ayasin - NHM perf optim. Avoid use of costly flaky jcc
__m128d
t
=
_mm_set_sd
(
node
->
threshold
*
variance_norm_factor
);
__m128d
a
=
_mm_set_sd
(
classifier
->
alpha
[
0
]);
__m128d
b
=
_mm_set_sd
(
classifier
->
alpha
[
1
]);
__m128d
sum
=
_mm_set_sd
(
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
+
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
);
t
=
_mm_cmpgt_sd
(
t
,
sum
);
stage_sum
=
_mm_add_sd
(
stage_sum
,
_mm_blendv_pd
(
b
,
a
,
t
));
}
}
else
{
for
(
j
=
0
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
// ayasin - NHM perf optim. Avoid use of costly flaky jcc
__m128d
t
=
_mm_set_sd
(
node
->
threshold
*
variance_norm_factor
);
__m128d
a
=
_mm_set_sd
(
classifier
->
alpha
[
0
]);
__m128d
b
=
_mm_set_sd
(
classifier
->
alpha
[
1
]);
double
_sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
_sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
if
(
node
->
feature
.
rect
[
2
].
p0
)
_sum
+=
calc_sum
(
node
->
feature
.
rect
[
2
],
p_offset
)
*
node
->
feature
.
rect
[
2
].
weight
;
__m128d
sum
=
_mm_set_sd
(
_sum
);
t
=
_mm_cmpgt_sd
(
t
,
sum
);
stage_sum
=
_mm_add_sd
(
stage_sum
,
_mm_blendv_pd
(
b
,
a
,
t
));
}
}
__m128d
i_threshold
=
_mm_set1_pd
(
cascade
->
stage_classifier
[
i
].
threshold
);
if
(
_mm_comilt_sd
(
stage_sum
,
i_threshold
)
)
return
-
i
;
}
}
else
#endif
{
for
(
i
=
start_stage
;
i
<
cascade
->
count
;
i
++
)
{
stage_sum
=
0.0
;
if
(
cascade
->
stage_classifier
[
i
].
two_rects
)
{
for
(
j
=
0
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
double
t
=
node
->
threshold
*
variance_norm_factor
;
double
sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
stage_sum
+=
classifier
->
alpha
[
sum
>=
t
];
}
}
else
{
for
(
j
=
0
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
double
t
=
node
->
threshold
*
variance_norm_factor
;
double
sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
sum
+=
calc_sum
(
node
->
feature
.
rect
[
1
],
p_offset
)
*
node
->
feature
.
rect
[
1
].
weight
;
if
(
node
->
feature
.
rect
[
2
].
p0
)
sum
+=
calc_sum
(
node
->
feature
.
rect
[
2
],
p_offset
)
*
node
->
feature
.
rect
[
2
].
weight
;
stage_sum
+=
classifier
->
alpha
[
sum
>=
t
];
}
}
if
(
stage_sum
<
cascade
->
stage_classifier
[
i
].
threshold
)
return
-
i
;
}
}
}
else
{
for
(
i
=
start_stage
;
i
<
cascade
->
count
;
i
++
)
{
stage_sum
=
0.0
;
int
j
=
0
;
#ifdef CV_HAAR_USE_AVX
if
(
haveAVX
)
{
for
(
;
j
<
cascade
->
stage_classifier
[
i
].
count
-
8
;
j
+=
8
)
{
stage_sum
+=
icvEvalHidHaarClassifierAVX
(
cascade
->
stage_classifier
[
i
].
classifier
+
j
,
variance_norm_factor
,
p_offset
);
}
}
#endif
for
(;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
{
stage_sum
+=
icvEvalHidHaarClassifier
(
cascade
->
stage_classifier
[
i
].
classifier
+
j
,
variance_norm_factor
,
p_offset
);
}
int
k
=
0
;
#ifdef CV_HAAR_USE_AVX
if
(
haveAVX
)
{
for
(
;
k
<
cascade
->
stage_classifier
[
i
].
count
-
8
;
k
+=
8
)
{
stage_sum
+=
icvEvalHidHaarClassifierAVX
(
cascade
->
stage_classifier
[
i
].
classifier
+
k
,
variance_norm_factor
,
p_offset
);
}
}
#endif
for
(;
k
<
cascade
->
stage_classifier
[
i
].
count
;
k
++
)
{
stage_sum
+=
icvEvalHidHaarClassifier
(
cascade
->
stage_classifier
[
i
].
classifier
+
k
,
variance_norm_factor
,
p_offset
);
}
if
(
stage_sum
<
cascade
->
stage_classifier
[
i
].
threshold
)
return
-
i
;
}
}
//_mm256_zeroupper();
//_mm256_zeroupper();
return
1
;
}
...
...
samples/cpp/pca.cpp
View file @
07d92d9e
/*
* pca.cpp
*
* Author:
* Author:
* Kevin Hughes <kevinhughes27[at]gmail[dot]com>
*
* Special Thanks to:
* Philipp Wagner <bytefish[at]gmx[dot]de>
*
* This program demonstrates how to use OpenCV PCA with a
* This program demonstrates how to use OpenCV PCA with a
* specified amount of variance to retain. The effect
* is illustrated further by using a trackbar to
* change the value for retained varaince.
...
...
@@ -17,9 +17,9 @@
* on this list of images. The author recommends using
* the first 15 faces of the AT&T face data set:
* http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html
*
*
* so for example your input text file would look like this:
*
*
* <path_to_at&t_faces>/orl_faces/s1/1.pgm
* <path_to_at&t_faces>/orl_faces/s2/1.pgm
* <path_to_at&t_faces>/orl_faces/s3/1.pgm
...
...
@@ -50,7 +50,7 @@ using namespace std;
///////////////////////
// Functions
void
read_imgList
(
const
string
&
filename
,
vector
<
Mat
>&
images
)
{
static
void
read_imgList
(
const
string
&
filename
,
vector
<
Mat
>&
images
)
{
std
::
ifstream
file
(
filename
.
c_str
(),
ifstream
::
in
);
if
(
!
file
)
{
string
error_message
=
"No valid input file was given, please check the given filename."
;
...
...
@@ -62,19 +62,19 @@ void read_imgList(const string& filename, vector<Mat>& images) {
}
}
Mat
formatImagesForPCA
(
const
vector
<
Mat
>
&
data
)
static
Mat
formatImagesForPCA
(
const
vector
<
Mat
>
&
data
)
{
Mat
dst
(
data
.
size
(),
data
[
0
].
rows
*
data
[
0
].
cols
,
CV_32F
);
for
(
unsigned
int
i
=
0
;
i
<
data
.
size
();
i
++
)
{
Mat
image_row
=
data
[
i
].
clone
().
reshape
(
1
,
1
);
Mat
row_i
=
dst
.
row
(
i
);
image_row
.
convertTo
(
row_i
,
CV_32F
);
image_row
.
convertTo
(
row_i
,
CV_32F
);
}
return
dst
;
}
Mat
toGrayscale
(
InputArray
_src
)
{
static
Mat
toGrayscale
(
InputArray
_src
)
{
Mat
src
=
_src
.
getMat
();
// only allow one channel
if
(
src
.
channels
()
!=
1
)
{
...
...
@@ -95,22 +95,22 @@ struct params
string
winName
;
};
void
onTrackbar
(
int
pos
,
void
*
ptr
)
{
static
void
onTrackbar
(
int
pos
,
void
*
ptr
)
{
cout
<<
"Retained Variance = "
<<
pos
<<
"% "
;
cout
<<
"re-calculating PCA..."
<<
std
::
flush
;
double
var
=
pos
/
100.0
;
struct
params
*
p
=
(
struct
params
*
)
ptr
;
p
->
pca
=
PCA
(
p
->
data
,
cv
::
Mat
(),
CV_PCA_DATA_AS_ROW
,
var
);
Mat
point
=
p
->
pca
.
project
(
p
->
data
.
row
(
0
));
Mat
reconstruction
=
p
->
pca
.
backProject
(
point
);
reconstruction
=
reconstruction
.
reshape
(
p
->
ch
,
p
->
rows
);
reconstruction
=
toGrayscale
(
reconstruction
);
imshow
(
p
->
winName
,
reconstruction
);
cout
<<
"done! # of principal components: "
<<
p
->
pca
.
eigenvectors
.
rows
<<
endl
;
}
...
...
@@ -118,19 +118,19 @@ void onTrackbar(int pos, void* ptr)
///////////////////////
// Main
int
main
(
int
argc
,
char
**
argv
)
int
main
(
int
argc
,
char
**
argv
)
{
if
(
argc
!=
2
)
{
cout
<<
"usage: "
<<
argv
[
0
]
<<
" <image_list.txt>"
<<
endl
;
exit
(
1
);
}
// Get the path to your CSV.
string
imgList
=
string
(
argv
[
1
]);
// vector to hold the images
vector
<
Mat
>
images
;
// Read in the data. This can fail if not valid
try
{
read_imgList
(
imgList
,
images
);
...
...
@@ -138,29 +138,29 @@ int main(int argc, char** argv)
cerr
<<
"Error opening file
\"
"
<<
imgList
<<
"
\"
. Reason: "
<<
e
.
msg
<<
endl
;
exit
(
1
);
}
// Quit if there are not enough images for this demo.
if
(
images
.
size
()
<=
1
)
{
string
error_message
=
"This demo needs at least 2 images to work. Please add more images to your data set!"
;
CV_Error
(
CV_StsError
,
error_message
);
}
// Reshape and stack images into a rowMatrix
Mat
data
=
formatImagesForPCA
(
images
);
// perform PCA
PCA
pca
(
data
,
cv
::
Mat
(),
CV_PCA_DATA_AS_ROW
,
0.95
);
// trackbar is initially set here, also this is a common value for retainedVariance
// Demonstration of the effect of retainedVariance on the first image
// Demonstration of the effect of retainedVariance on the first image
Mat
point
=
pca
.
project
(
data
.
row
(
0
));
// project into the eigenspace, thus the image becomes a "point"
Mat
reconstruction
=
pca
.
backProject
(
point
);
// re-create the image from the "point"
reconstruction
=
reconstruction
.
reshape
(
images
[
0
].
channels
(),
images
[
0
].
rows
);
// reshape from a row vector into image shape
reconstruction
=
toGrayscale
(
reconstruction
);
// re-scale for displaying purposes
// init highgui window
string
winName
=
"Reconstruction | press 'q' to quit"
;
namedWindow
(
winName
,
CV_WINDOW_NORMAL
);
// params struct to pass to the trackbar handler
params
p
;
p
.
data
=
data
;
...
...
@@ -168,17 +168,17 @@ int main(int argc, char** argv)
p
.
rows
=
images
[
0
].
rows
;
p
.
pca
=
pca
;
p
.
winName
=
winName
;
// create the tracbar
int
pos
=
95
;
createTrackbar
(
"Retained Variance (%)"
,
winName
,
&
pos
,
100
,
onTrackbar
,
(
void
*
)
&
p
);
createTrackbar
(
"Retained Variance (%)"
,
winName
,
&
pos
,
100
,
onTrackbar
,
(
void
*
)
&
p
);
// display until user presses q
imshow
(
winName
,
reconstruction
);
char
key
=
0
;
while
(
key
!=
'q'
)
key
=
waitKey
();
return
0
;
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment