Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
77264dcc
Commit
77264dcc
authored
Jun 29, 2017
by
Vitaly Tuzov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
AVX optimized implementation of haar migrated to separate file
parent
20f603a2
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
492 additions
and
372 deletions
+492
-372
haar.avx.cpp
modules/objdetect/src/haar.avx.cpp
+369
-0
haar.cpp
modules/objdetect/src/haar.cpp
+22
-372
haar.hpp
modules/objdetect/src/haar.hpp
+101
-0
No files found.
modules/objdetect/src/haar.avx.cpp
0 → 100644
View file @
77264dcc
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/* Haar features calculation */
#include "precomp.hpp"
#include "haar.hpp"
namespace
cv_haar_avx
{
// AVX version icvEvalHidHaarClassifier. Process 8 CvHidHaarClassifiers per call. Check AVX support before invocation!!
#if CV_HAAR_USE_AVX
double
icvEvalHidHaarClassifierAVX
(
CvHidHaarClassifier
*
classifier
,
double
variance_norm_factor
,
size_t
p_offset
)
{
int
CV_DECL_ALIGNED
(
32
)
idxV
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
uchar
flags
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
CvHidHaarTreeNode
*
nodes
[
8
];
double
res
=
0
;
uchar
exitConditionFlag
=
0
;
for
(;;)
{
float
CV_DECL_ALIGNED
(
32
)
tmp
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
nodes
[
0
]
=
(
classifier
+
0
)
->
node
+
idxV
[
0
];
nodes
[
1
]
=
(
classifier
+
1
)
->
node
+
idxV
[
1
];
nodes
[
2
]
=
(
classifier
+
2
)
->
node
+
idxV
[
2
];
nodes
[
3
]
=
(
classifier
+
3
)
->
node
+
idxV
[
3
];
nodes
[
4
]
=
(
classifier
+
4
)
->
node
+
idxV
[
4
];
nodes
[
5
]
=
(
classifier
+
5
)
->
node
+
idxV
[
5
];
nodes
[
6
]
=
(
classifier
+
6
)
->
node
+
idxV
[
6
];
nodes
[
7
]
=
(
classifier
+
7
)
->
node
+
idxV
[
7
];
__m256
t
=
_mm256_set1_ps
(
static_cast
<
float
>
(
variance_norm_factor
));
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
if
(
nodes
[
0
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
0
]
=
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
0
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
1
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
1
]
=
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
1
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
2
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
2
]
=
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
2
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
3
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
3
]
=
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
3
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
4
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
4
]
=
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
4
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
5
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
5
]
=
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
5
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
6
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
6
]
=
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
6
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
7
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
7
]
=
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
7
]
->
feature
.
rect
[
2
].
weight
;
sum
=
_mm256_add_ps
(
sum
,
_mm256_load_ps
(
tmp
));
__m256
left
=
_mm256_set_ps
(
static_cast
<
float
>
(
nodes
[
7
]
->
left
),
static_cast
<
float
>
(
nodes
[
6
]
->
left
),
static_cast
<
float
>
(
nodes
[
5
]
->
left
),
static_cast
<
float
>
(
nodes
[
4
]
->
left
),
static_cast
<
float
>
(
nodes
[
3
]
->
left
),
static_cast
<
float
>
(
nodes
[
2
]
->
left
),
static_cast
<
float
>
(
nodes
[
1
]
->
left
),
static_cast
<
float
>
(
nodes
[
0
]
->
left
));
__m256
right
=
_mm256_set_ps
(
static_cast
<
float
>
(
nodes
[
7
]
->
right
),
static_cast
<
float
>
(
nodes
[
6
]
->
right
),
static_cast
<
float
>
(
nodes
[
5
]
->
right
),
static_cast
<
float
>
(
nodes
[
4
]
->
right
),
static_cast
<
float
>
(
nodes
[
3
]
->
right
),
static_cast
<
float
>
(
nodes
[
2
]
->
right
),
static_cast
<
float
>
(
nodes
[
1
]
->
right
),
static_cast
<
float
>
(
nodes
[
0
]
->
right
));
_mm256_store_si256
((
__m256i
*
)
idxV
,
_mm256_cvttps_epi32
(
_mm256_blendv_ps
(
right
,
left
,
_mm256_cmp_ps
(
sum
,
t
,
_CMP_LT_OQ
))));
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
if
(
idxV
[
i
]
<=
0
)
{
if
(
!
flags
[
i
])
{
exitConditionFlag
++
;
flags
[
i
]
=
1
;
res
+=
(
classifier
+
i
)
->
alpha
[
-
idxV
[
i
]];
}
idxV
[
i
]
=
0
;
}
}
if
(
exitConditionFlag
==
8
)
return
res
;
}
}
double
icvEvalHidHaarStumpClassifierAVX
(
CvHidHaarClassifier
*
classifier
,
double
variance_norm_factor
,
size_t
p_offset
)
{
float
CV_DECL_ALIGNED
(
32
)
tmp
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
CvHidHaarTreeNode
*
nodes
[
8
];
nodes
[
0
]
=
classifier
[
0
].
node
;
nodes
[
1
]
=
classifier
[
1
].
node
;
nodes
[
2
]
=
classifier
[
2
].
node
;
nodes
[
3
]
=
classifier
[
3
].
node
;
nodes
[
4
]
=
classifier
[
4
].
node
;
nodes
[
5
]
=
classifier
[
5
].
node
;
nodes
[
6
]
=
classifier
[
6
].
node
;
nodes
[
7
]
=
classifier
[
7
].
node
;
__m256
t
=
_mm256_set1_ps
(
static_cast
<
float
>
(
variance_norm_factor
));
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
if
(
nodes
[
0
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
0
]
=
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
0
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
1
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
1
]
=
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
1
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
2
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
2
]
=
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
2
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
3
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
3
]
=
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
3
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
4
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
4
]
=
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
4
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
5
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
5
]
=
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
5
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
6
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
6
]
=
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
6
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
7
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
7
]
=
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
7
]
->
feature
.
rect
[
2
].
weight
;
sum
=
_mm256_add_ps
(
sum
,
_mm256_load_ps
(
tmp
));
__m256
alpha0
=
_mm256_set_ps
(
classifier
[
7
].
alpha
[
0
],
classifier
[
6
].
alpha
[
0
],
classifier
[
5
].
alpha
[
0
],
classifier
[
4
].
alpha
[
0
],
classifier
[
3
].
alpha
[
0
],
classifier
[
2
].
alpha
[
0
],
classifier
[
1
].
alpha
[
0
],
classifier
[
0
].
alpha
[
0
]);
__m256
alpha1
=
_mm256_set_ps
(
classifier
[
7
].
alpha
[
1
],
classifier
[
6
].
alpha
[
1
],
classifier
[
5
].
alpha
[
1
],
classifier
[
4
].
alpha
[
1
],
classifier
[
3
].
alpha
[
1
],
classifier
[
2
].
alpha
[
1
],
classifier
[
1
].
alpha
[
1
],
classifier
[
0
].
alpha
[
1
]);
__m256
outBuf
=
_mm256_blendv_ps
(
alpha0
,
alpha1
,
_mm256_cmp_ps
(
t
,
sum
,
_CMP_LE_OQ
));
outBuf
=
_mm256_hadd_ps
(
outBuf
,
outBuf
);
outBuf
=
_mm256_hadd_ps
(
outBuf
,
outBuf
);
_mm256_store_ps
(
tmp
,
outBuf
);
return
(
tmp
[
0
]
+
tmp
[
4
]);
}
double
icvEvalHidHaarStumpClassifierTwoRectAVX
(
CvHidHaarClassifier
*
classifier
,
double
variance_norm_factor
,
size_t
p_offset
)
{
float
CV_DECL_ALIGNED
(
32
)
buf
[
8
];
CvHidHaarTreeNode
*
nodes
[
8
];
nodes
[
0
]
=
classifier
[
0
].
node
;
nodes
[
1
]
=
classifier
[
1
].
node
;
nodes
[
2
]
=
classifier
[
2
].
node
;
nodes
[
3
]
=
classifier
[
3
].
node
;
nodes
[
4
]
=
classifier
[
4
].
node
;
nodes
[
5
]
=
classifier
[
5
].
node
;
nodes
[
6
]
=
classifier
[
6
].
node
;
nodes
[
7
]
=
classifier
[
7
].
node
;
__m256
t
=
_mm256_set1_ps
(
static_cast
<
float
>
(
variance_norm_factor
));
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
__m256
alpha0
=
_mm256_set_ps
(
classifier
[
7
].
alpha
[
0
],
classifier
[
6
].
alpha
[
0
],
classifier
[
5
].
alpha
[
0
],
classifier
[
4
].
alpha
[
0
],
classifier
[
3
].
alpha
[
0
],
classifier
[
2
].
alpha
[
0
],
classifier
[
1
].
alpha
[
0
],
classifier
[
0
].
alpha
[
0
]);
__m256
alpha1
=
_mm256_set_ps
(
classifier
[
7
].
alpha
[
1
],
classifier
[
6
].
alpha
[
1
],
classifier
[
5
].
alpha
[
1
],
classifier
[
4
].
alpha
[
1
],
classifier
[
3
].
alpha
[
1
],
classifier
[
2
].
alpha
[
1
],
classifier
[
1
].
alpha
[
1
],
classifier
[
0
].
alpha
[
1
]);
_mm256_store_ps
(
buf
,
_mm256_blendv_ps
(
alpha0
,
alpha1
,
_mm256_cmp_ps
(
t
,
sum
,
_CMP_LE_OQ
)));
return
(
buf
[
0
]
+
buf
[
1
]
+
buf
[
2
]
+
buf
[
3
]
+
buf
[
4
]
+
buf
[
5
]
+
buf
[
6
]
+
buf
[
7
]);
}
#endif //CV_HAAR_USE_AVX
}
/* End of file. */
modules/objdetect/src/haar.cpp
View file @
77264dcc
...
...
@@ -45,6 +45,10 @@
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/objdetect/objdetect_c.h"
#include <stdio.h>
#include "haar.hpp"
#if CV_HAAR_FEATURE_MAX_LOCAL != CV_HAAR_FEATURE_MAX
#error CV_HAAR_FEATURE_MAX definition changed. Adjust CV_HAAR_FEATURE_MAX_LOCAL value please.
#endif
#if CV_SSE2
# if 1
/*!CV_SSE4_1 && !CV_SSE4_2*/
...
...
@@ -53,8 +57,7 @@
# endif
#endif
#if 0 /*CV_AVX*/
# define CV_HAAR_USE_AVX 1
#if CV_HAAR_USE_AVX
# if defined _MSC_VER
# pragma warning( disable : 4752 )
# endif
...
...
@@ -68,38 +71,6 @@
#define CV_ADJUST_FEATURES 1
#define CV_ADJUST_WEIGHTS 0
typedef
int
sumtype
;
typedef
double
sqsumtype
;
typedef
struct
CvHidHaarFeature
{
struct
{
sumtype
*
p0
,
*
p1
,
*
p2
,
*
p3
;
float
weight
;
}
rect
[
CV_HAAR_FEATURE_MAX
];
}
CvHidHaarFeature
;
typedef
struct
CvHidHaarTreeNode
{
CvHidHaarFeature
feature
;
float
threshold
;
int
left
;
int
right
;
}
CvHidHaarTreeNode
;
typedef
struct
CvHidHaarClassifier
{
int
count
;
//CvHaarFeature* orig_feature;
CvHidHaarTreeNode
*
node
;
float
*
alpha
;
}
CvHidHaarClassifier
;
typedef
struct
CvHidHaarStageClassifier
{
int
count
;
...
...
@@ -420,10 +391,6 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade )
#define calc_sum(rect,offset) \
((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
#define calc_sumf(rect,offset) \
static_cast<float>((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
CV_IMPL
void
cvSetImagesForHaarClassifierCascade
(
CvHaarClassifierCascade
*
_cascade
,
const
CvArr
*
_sum
,
...
...
@@ -640,129 +607,6 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
}
// AVX version icvEvalHidHaarClassifier. Process 8 CvHidHaarClassifiers per call. Check AVX support before invocation!!
#ifdef CV_HAAR_USE_AVX
CV_INLINE
double
icvEvalHidHaarClassifierAVX
(
CvHidHaarClassifier
*
classifier
,
double
variance_norm_factor
,
size_t
p_offset
)
{
int
CV_DECL_ALIGNED
(
32
)
idxV
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
uchar
flags
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
CvHidHaarTreeNode
*
nodes
[
8
];
double
res
=
0
;
uchar
exitConditionFlag
=
0
;
for
(;;)
{
float
CV_DECL_ALIGNED
(
32
)
tmp
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
nodes
[
0
]
=
(
classifier
+
0
)
->
node
+
idxV
[
0
];
nodes
[
1
]
=
(
classifier
+
1
)
->
node
+
idxV
[
1
];
nodes
[
2
]
=
(
classifier
+
2
)
->
node
+
idxV
[
2
];
nodes
[
3
]
=
(
classifier
+
3
)
->
node
+
idxV
[
3
];
nodes
[
4
]
=
(
classifier
+
4
)
->
node
+
idxV
[
4
];
nodes
[
5
]
=
(
classifier
+
5
)
->
node
+
idxV
[
5
];
nodes
[
6
]
=
(
classifier
+
6
)
->
node
+
idxV
[
6
];
nodes
[
7
]
=
(
classifier
+
7
)
->
node
+
idxV
[
7
];
__m256
t
=
_mm256_set1_ps
(
static_cast
<
float
>
(
variance_norm_factor
));
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
if
(
nodes
[
0
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
0
]
=
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
0
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
1
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
1
]
=
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
1
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
2
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
2
]
=
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
2
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
3
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
3
]
=
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
3
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
4
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
4
]
=
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
4
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
5
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
5
]
=
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
5
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
6
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
6
]
=
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
6
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
7
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
7
]
=
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
7
]
->
feature
.
rect
[
2
].
weight
;
sum
=
_mm256_add_ps
(
sum
,
_mm256_load_ps
(
tmp
));
__m256
left
=
_mm256_set_ps
(
static_cast
<
float
>
(
nodes
[
7
]
->
left
),
static_cast
<
float
>
(
nodes
[
6
]
->
left
),
static_cast
<
float
>
(
nodes
[
5
]
->
left
),
static_cast
<
float
>
(
nodes
[
4
]
->
left
),
static_cast
<
float
>
(
nodes
[
3
]
->
left
),
static_cast
<
float
>
(
nodes
[
2
]
->
left
),
static_cast
<
float
>
(
nodes
[
1
]
->
left
),
static_cast
<
float
>
(
nodes
[
0
]
->
left
));
__m256
right
=
_mm256_set_ps
(
static_cast
<
float
>
(
nodes
[
7
]
->
right
),
static_cast
<
float
>
(
nodes
[
6
]
->
right
),
static_cast
<
float
>
(
nodes
[
5
]
->
right
),
static_cast
<
float
>
(
nodes
[
4
]
->
right
),
static_cast
<
float
>
(
nodes
[
3
]
->
right
),
static_cast
<
float
>
(
nodes
[
2
]
->
right
),
static_cast
<
float
>
(
nodes
[
1
]
->
right
),
static_cast
<
float
>
(
nodes
[
0
]
->
right
));
_mm256_store_si256
((
__m256i
*
)
idxV
,
_mm256_cvttps_epi32
(
_mm256_blendv_ps
(
right
,
left
,
_mm256_cmp_ps
(
sum
,
t
,
_CMP_LT_OQ
))));
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
if
(
idxV
[
i
]
<=
0
)
{
if
(
!
flags
[
i
])
{
exitConditionFlag
++
;
flags
[
i
]
=
1
;
res
+=
(
classifier
+
i
)
->
alpha
[
-
idxV
[
i
]];
}
idxV
[
i
]
=
0
;
}
}
if
(
exitConditionFlag
==
8
)
return
res
;
}
}
#endif //CV_HAAR_USE_AVX
CV_INLINE
double
icvEvalHidHaarClassifier
(
CvHidHaarClassifier
*
classifier
,
double
variance_norm_factor
,
...
...
@@ -823,8 +667,8 @@ static int
cvRunHaarClassifierCascadeSum
(
const
CvHaarClassifierCascade
*
_cascade
,
CvPoint
pt
,
double
&
stage_sum
,
int
start_stage
)
{
#if
def
CV_HAAR_USE_AVX
bool
haveAVX
=
cv
::
checkHardwareSupport
(
CV_CPU_AVX
)
;
#if CV_HAAR_USE_AVX
bool
haveAVX
=
CV_CPU_HAS_SUPPORT_AVX
;
#else
# ifdef CV_HAAR_USE_SSE
bool
haveSSE2
=
cv
::
checkHardwareSupport
(
CV_CPU_SSE2
);
...
...
@@ -870,14 +714,14 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
stage_sum
=
0.0
;
j
=
0
;
#if
def
CV_HAAR_USE_AVX
#if CV_HAAR_USE_AVX
if
(
haveAVX
)
{
for
(
;
j
<=
ptr
->
count
-
8
;
j
+=
8
)
{
stage_sum
+=
icvEvalHidHaarClassifierAVX
(
ptr
->
classifier
+
j
,
variance_norm_factor
,
p_offset
);
stage_sum
+=
cv_haar_avx
::
icvEvalHidHaarClassifierAVX
(
ptr
->
classifier
+
j
,
variance_norm_factor
,
p_offset
);
}
}
#endif
...
...
@@ -901,106 +745,20 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
}
else
if
(
cascade
->
isStumpBased
)
{
#if
def
CV_HAAR_USE_AVX
#if CV_HAAR_USE_AVX
if
(
haveAVX
)
{
CvHidHaarClassifier
*
classifiers
[
8
];
CvHidHaarTreeNode
*
nodes
[
8
];
for
(
i
=
start_stage
;
i
<
cascade
->
count
;
i
++
)
{
stage_sum
=
0.0
;
j
=
0
;
float
CV_DECL_ALIGNED
(
32
)
buf
[
8
];
if
(
cascade
->
stage_classifier
[
i
].
two_rects
)
{
for
(
;
j
<=
cascade
->
stage_classifier
[
i
].
count
-
8
;
j
+=
8
)
{
classifiers
[
0
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
nodes
[
0
]
=
classifiers
[
0
]
->
node
;
classifiers
[
1
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
1
;
nodes
[
1
]
=
classifiers
[
1
]
->
node
;
classifiers
[
2
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
2
;
nodes
[
2
]
=
classifiers
[
2
]
->
node
;
classifiers
[
3
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
3
;
nodes
[
3
]
=
classifiers
[
3
]
->
node
;
classifiers
[
4
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
4
;
nodes
[
4
]
=
classifiers
[
4
]
->
node
;
classifiers
[
5
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
5
;
nodes
[
5
]
=
classifiers
[
5
]
->
node
;
classifiers
[
6
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
6
;
nodes
[
6
]
=
classifiers
[
6
]
->
node
;
classifiers
[
7
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
7
;
nodes
[
7
]
=
classifiers
[
7
]
->
node
;
__m256
t
=
_mm256_set1_ps
(
static_cast
<
float
>
(
variance_norm_factor
));
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
__m256
alpha0
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
0
],
classifiers
[
6
]
->
alpha
[
0
],
classifiers
[
5
]
->
alpha
[
0
],
classifiers
[
4
]
->
alpha
[
0
],
classifiers
[
3
]
->
alpha
[
0
],
classifiers
[
2
]
->
alpha
[
0
],
classifiers
[
1
]
->
alpha
[
0
],
classifiers
[
0
]
->
alpha
[
0
]);
__m256
alpha1
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
1
],
classifiers
[
6
]
->
alpha
[
1
],
classifiers
[
5
]
->
alpha
[
1
],
classifiers
[
4
]
->
alpha
[
1
],
classifiers
[
3
]
->
alpha
[
1
],
classifiers
[
2
]
->
alpha
[
1
],
classifiers
[
1
]
->
alpha
[
1
],
classifiers
[
0
]
->
alpha
[
1
]);
_mm256_store_ps
(
buf
,
_mm256_blendv_ps
(
alpha0
,
alpha1
,
_mm256_cmp_ps
(
t
,
sum
,
_CMP_LE_OQ
)));
stage_sum
+=
(
buf
[
0
]
+
buf
[
1
]
+
buf
[
2
]
+
buf
[
3
]
+
buf
[
4
]
+
buf
[
5
]
+
buf
[
6
]
+
buf
[
7
]);
stage_sum
+=
cv_haar_avx
::
icvEvalHidHaarStumpClassifierTwoRectAVX
(
cascade
->
stage_classifier
[
i
].
classifier
+
j
,
variance_norm_factor
,
p_offset
);
}
for
(
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
...
...
@@ -1018,117 +776,9 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
{
for
(
;
j
<=
(
cascade
->
stage_classifier
[
i
].
count
)
-
8
;
j
+=
8
)
{
float
CV_DECL_ALIGNED
(
32
)
tmp
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
classifiers
[
0
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
nodes
[
0
]
=
classifiers
[
0
]
->
node
;
classifiers
[
1
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
1
;
nodes
[
1
]
=
classifiers
[
1
]
->
node
;
classifiers
[
2
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
2
;
nodes
[
2
]
=
classifiers
[
2
]
->
node
;
classifiers
[
3
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
3
;
nodes
[
3
]
=
classifiers
[
3
]
->
node
;
classifiers
[
4
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
4
;
nodes
[
4
]
=
classifiers
[
4
]
->
node
;
classifiers
[
5
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
5
;
nodes
[
5
]
=
classifiers
[
5
]
->
node
;
classifiers
[
6
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
6
;
nodes
[
6
]
=
classifiers
[
6
]
->
node
;
classifiers
[
7
]
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
+
7
;
nodes
[
7
]
=
classifiers
[
7
]
->
node
;
__m256
t
=
_mm256_set1_ps
(
static_cast
<
float
>
(
variance_norm_factor
));
t
=
_mm256_mul_ps
(
t
,
_mm256_set_ps
(
nodes
[
7
]
->
threshold
,
nodes
[
6
]
->
threshold
,
nodes
[
5
]
->
threshold
,
nodes
[
4
]
->
threshold
,
nodes
[
3
]
->
threshold
,
nodes
[
2
]
->
threshold
,
nodes
[
1
]
->
threshold
,
nodes
[
0
]
->
threshold
));
__m256
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
0
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
0
],
p_offset
));
__m256
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
0
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
0
].
weight
);
__m256
sum
=
_mm256_mul_ps
(
offset
,
weight
);
offset
=
_mm256_set_ps
(
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
1
],
p_offset
),
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
1
],
p_offset
));
weight
=
_mm256_set_ps
(
nodes
[
7
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
6
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
5
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
4
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
3
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
2
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
1
]
->
feature
.
rect
[
1
].
weight
,
nodes
[
0
]
->
feature
.
rect
[
1
].
weight
);
sum
=
_mm256_add_ps
(
sum
,
_mm256_mul_ps
(
offset
,
weight
));
if
(
nodes
[
0
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
0
]
=
calc_sumf
(
nodes
[
0
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
0
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
1
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
1
]
=
calc_sumf
(
nodes
[
1
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
1
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
2
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
2
]
=
calc_sumf
(
nodes
[
2
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
2
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
3
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
3
]
=
calc_sumf
(
nodes
[
3
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
3
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
4
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
4
]
=
calc_sumf
(
nodes
[
4
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
4
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
5
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
5
]
=
calc_sumf
(
nodes
[
5
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
5
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
6
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
6
]
=
calc_sumf
(
nodes
[
6
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
6
]
->
feature
.
rect
[
2
].
weight
;
if
(
nodes
[
7
]
->
feature
.
rect
[
2
].
p0
)
tmp
[
7
]
=
calc_sumf
(
nodes
[
7
]
->
feature
.
rect
[
2
],
p_offset
)
*
nodes
[
7
]
->
feature
.
rect
[
2
].
weight
;
sum
=
_mm256_add_ps
(
sum
,
_mm256_load_ps
(
tmp
));
__m256
alpha0
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
0
],
classifiers
[
6
]
->
alpha
[
0
],
classifiers
[
5
]
->
alpha
[
0
],
classifiers
[
4
]
->
alpha
[
0
],
classifiers
[
3
]
->
alpha
[
0
],
classifiers
[
2
]
->
alpha
[
0
],
classifiers
[
1
]
->
alpha
[
0
],
classifiers
[
0
]
->
alpha
[
0
]);
__m256
alpha1
=
_mm256_set_ps
(
classifiers
[
7
]
->
alpha
[
1
],
classifiers
[
6
]
->
alpha
[
1
],
classifiers
[
5
]
->
alpha
[
1
],
classifiers
[
4
]
->
alpha
[
1
],
classifiers
[
3
]
->
alpha
[
1
],
classifiers
[
2
]
->
alpha
[
1
],
classifiers
[
1
]
->
alpha
[
1
],
classifiers
[
0
]
->
alpha
[
1
]);
__m256
outBuf
=
_mm256_blendv_ps
(
alpha0
,
alpha1
,
_mm256_cmp_ps
(
t
,
sum
,
_CMP_LE_OQ
));
outBuf
=
_mm256_hadd_ps
(
outBuf
,
outBuf
);
outBuf
=
_mm256_hadd_ps
(
outBuf
,
outBuf
);
_mm256_store_ps
(
buf
,
outBuf
);
stage_sum
+=
(
buf
[
0
]
+
buf
[
4
]);
stage_sum
+=
cv_haar_avx
::
icvEvalHidHaarStumpClassifierAVX
(
cascade
->
stage_classifier
[
i
].
classifier
+
j
,
variance_norm_factor
,
p_offset
);
}
for
(
;
j
<
cascade
->
stage_classifier
[
i
].
count
;
j
++
)
...
...
@@ -1241,14 +891,14 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
stage_sum
=
0.0
;
int
k
=
0
;
#if
def
CV_HAAR_USE_AVX
#if CV_HAAR_USE_AVX
if
(
haveAVX
)
{
for
(
;
k
<
cascade
->
stage_classifier
[
i
].
count
-
8
;
k
+=
8
)
{
stage_sum
+=
icvEvalHidHaarClassifierAVX
(
cascade
->
stage_classifier
[
i
].
classifier
+
k
,
variance_norm_factor
,
p_offset
);
stage_sum
+=
cv_haar_avx
::
icvEvalHidHaarClassifierAVX
(
cascade
->
stage_classifier
[
i
].
classifier
+
k
,
variance_norm_factor
,
p_offset
);
}
}
#endif
...
...
modules/objdetect/src/haar.hpp
0 → 100644
View file @
77264dcc
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/* Haar features calculation */
#ifndef OPENCV_OBJDETECT_HAAR_HPP
#define OPENCV_OBJDETECT_HAAR_HPP
#define CV_HAAR_FEATURE_MAX_LOCAL 3
typedef
int
sumtype
;
typedef
double
sqsumtype
;
typedef
struct
CvHidHaarFeature
{
struct
{
sumtype
*
p0
,
*
p1
,
*
p2
,
*
p3
;
float
weight
;
}
rect
[
CV_HAAR_FEATURE_MAX_LOCAL
];
}
CvHidHaarFeature
;
typedef
struct
CvHidHaarTreeNode
{
CvHidHaarFeature
feature
;
float
threshold
;
int
left
;
int
right
;
}
CvHidHaarTreeNode
;
typedef
struct
CvHidHaarClassifier
{
int
count
;
//CvHaarFeature* orig_feature;
CvHidHaarTreeNode
*
node
;
float
*
alpha
;
}
CvHidHaarClassifier
;
#define calc_sumf(rect,offset) \
static_cast<float>((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
namespace
cv_haar_avx
{
#if 0 /*CV_TRY_AVX*/
#define CV_HAAR_USE_AVX 1
#else
#define CV_HAAR_USE_AVX 0
#endif
#if CV_HAAR_USE_AVX
// AVX version icvEvalHidHaarClassifier. Process 8 CvHidHaarClassifiers per call. Check AVX support before invocation!!
double
icvEvalHidHaarClassifierAVX
(
CvHidHaarClassifier
*
classifier
,
double
variance_norm_factor
,
size_t
p_offset
);
double
icvEvalHidHaarStumpClassifierAVX
(
CvHidHaarClassifier
*
classifier
,
double
variance_norm_factor
,
size_t
p_offset
);
double
icvEvalHidHaarStumpClassifierTwoRectAVX
(
CvHidHaarClassifier
*
classifier
,
double
variance_norm_factor
,
size_t
p_offset
);
#endif
}
#endif
/* End of file. */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment