Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
fac3d999
Commit
fac3d999
authored
Jul 31, 2012
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
integrated another portion of SSE optimizations from Grigory Frolov
parent
5f2ce22f
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
108 additions
and
57 deletions
+108
-57
OpenCVCompilerOptions.cmake
cmake/OpenCVCompilerOptions.cmake
+1
-1
internal.hpp
modules/core/include/opencv2/core/internal.hpp
+9
-3
lapack.cpp
modules/core/src/lapack.cpp
+31
-29
shapedescr.cpp
modules/imgproc/src/shapedescr.cpp
+51
-17
haar.cpp
modules/objdetect/src/haar.cpp
+16
-7
No files found.
cmake/OpenCVCompilerOptions.cmake
View file @
fac3d999
...
...
@@ -139,7 +139,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
if
(
ENABLE_SSSE3
)
add_extra_compiler_option
(
-mssse3
)
endif
()
if
(
HAVE_GCC43_OR_NEWER
)
if
(
HAVE_GCC43_OR_NEWER
OR APPLE
)
if
(
ENABLE_SSE41
)
add_extra_compiler_option
(
-msse4.1
)
endif
()
...
...
modules/core/include/opencv2/core/internal.hpp
View file @
fac3d999
...
...
@@ -120,17 +120,23 @@ CV_INLINE IppiSize ippiSize(int width, int height)
# else
# define CV_SSSE3 0
# endif
# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1
6
00)
# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1
5
00)
# include <smmintrin.h>
# define CV_SSE4_1 1
# else
# define CV_SSE4_1 0
# endif
# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1
6
00)
# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1
5
00)
# include <nmmintrin.h>
# define CV_SSE4_2 1
# else
# define CV_SSE4_2 0
# endif
# if defined __AVX__ || (defined _MSC_
VER && _MSC_VER >= 1600
)
# if defined __AVX__ || (defined _MSC_
FULL_VER && _MSC_FULL_VER >= 160040219
)
# include <immintrin.h>
# define CV_AVX 1
# else
# define CV_AVX 0
# endif
# else
# define CV_SSE 0
...
...
modules/core/src/lapack.cpp
View file @
fac3d999
...
...
@@ -1010,15 +1010,18 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
if
(
type
==
CV_32FC1
)
{
double
d
=
det2
(
Sf
);
#if CV_SSE4_2
if
(
USE_SSE4_2
)
if
(
d
!=
0.
)
{
result
=
true
;
d
=
1.
/
d
;
#if CV_SSE2
if
(
USE_SSE2
)
{
__m128
zero
=
_mm_setzero_ps
();
__m128
t0
=
_mm_loadl_pi
(
zero
,
(
const
__m64
*
)
srcdata
);
//t0 = sf(0,0) sf(0,1)
__m128
t1
=
_mm_loadh_pi
(
zero
,(
const
__m64
*
)((
const
float
*
)(
srcdata
+
srcstep
)));
//t1 = sf(1,0) sf(1,1)
__m128
s0
=
_mm_blend_ps
(
t0
,
t1
,
12
);
d
=
1.
/
d
;
result
=
true
;
__m128
t1
=
_mm_loadh_pi
(
zero
,
(
const
__m64
*
)(
srcdata
+
srcstep
));
//t1 = sf(1,0) sf(1,1)
__m128
s0
=
_mm_or_ps
(
t0
,
t1
);
__m128
det
=
_mm_set1_ps
((
float
)
d
);
s0
=
_mm_mul_ps
(
s0
,
det
);
const
uchar
CV_DECL_ALIGNED
(
16
)
inv
[
16
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0x80
,
0
,
0
,
0
,
0x80
,
0
,
0
,
0
,
0
};
...
...
@@ -1028,12 +1031,10 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
_mm_storel_pi
((
__m64
*
)
dstdata
,
s0
);
_mm_storeh_pi
((
__m64
*
)((
float
*
)(
dstdata
+
dststep
)),
s0
);
}
#
else
if
(
d
!=
0.
)
else
#endif
{
double
t0
,
t1
;
result
=
true
;
d
=
1.
/
d
;
t0
=
Sf
(
0
,
0
)
*
d
;
t1
=
Sf
(
1
,
1
)
*
d
;
Df
(
1
,
1
)
=
(
float
)
t0
;
...
...
@@ -1043,39 +1044,40 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
Df
(
0
,
1
)
=
(
float
)
t0
;
Df
(
1
,
0
)
=
(
float
)
t1
;
}
#endif
}
}
else
{
double
d
=
det2
(
Sd
);
if
(
d
!=
0.
)
{
result
=
true
;
d
=
1.
/
d
;
#if CV_SSE2
if
(
USE_SSE2
)
{
__m128d
s0
=
_mm_loadu_pd
((
const
double
*
)
srcdata
);
//s0 = sf(0,0) sf(0,1)
__m128d
s1
=
_mm_loadu_pd
((
const
double
*
)(
srcdata
+
srcstep
));
//s1 = sf(1,0) sf(1,1)
__m128d
sm
=
_mm_shuffle_pd
(
s0
,
s1
,
_MM_SHUFFLE2
(
1
,
0
));
//sm = sf(0,0) sf(1,1) - main diagonal
__m128d
ss
=
_mm_shuffle_pd
(
s0
,
s1
,
_MM_SHUFFLE2
(
0
,
1
));
//sm = sf(0,1) sf(1,0) - secondary diagonal
result
=
true
;
d
=
1.
/
d
;
__m128d
sm
=
_mm_unpacklo_pd
(
s0
,
_mm_load_sd
((
const
double
*
)(
srcdata
+
srcstep
)
+
1
));
//sm = sf(0,0) sf(1,1) - main diagonal
__m128d
ss
=
_mm_shuffle_pd
(
s0
,
s1
,
_MM_SHUFFLE2
(
0
,
1
));
//ss = sf(0,1) sf(1,0) - secondary diagonal
__m128d
det
=
_mm_load1_pd
((
const
double
*
)
&
d
);
sm
=
_mm_mul_pd
(
sm
,
det
);
//__m128d pattern = _mm_set1_pd(-1.);
static
const
uchar
CV_DECL_ALIGNED
(
16
)
inv
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0x80
};
uchar
CV_DECL_ALIGNED
(
16
)
inv
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0x80
};
__m128d
pattern
=
_mm_load1_pd
((
double
*
)
inv
);
ss
=
_mm_mul_pd
(
ss
,
det
);
ss
=
_mm_xor_pd
(
ss
,
pattern
);
//==-1*ss
//ss = _mm_mul_pd(ss,pattern);
s0
=
_mm_shuffle_pd
(
sm
,
ss
,
_MM_SHUFFLE2
(
0
,
1
));
s1
=
_mm_shuffle_pd
(
ss
,
sm
,
_MM_SHUFFLE2
(
0
,
1
));
_mm_store
_pd
((
double
*
)
dstdata
,
s0
);
_mm_store
_pd
((
double
*
)(
dstdata
+
dststep
),
s1
);
_mm_storeu
_pd
((
double
*
)
dstdata
,
s0
);
_mm_storeu
_pd
((
double
*
)(
dstdata
+
dststep
),
s1
);
}
#
else
if
(
d
!=
0.
)
else
#endif
{
double
t0
,
t1
;
result
=
true
;
d
=
1.
/
d
;
t0
=
Sd
(
0
,
0
)
*
d
;
t1
=
Sd
(
1
,
1
)
*
d
;
Dd
(
1
,
1
)
=
t0
;
...
...
@@ -1085,7 +1087,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
Dd
(
0
,
1
)
=
t0
;
Dd
(
1
,
0
)
=
t1
;
}
#endif
}
}
}
else
if
(
n
==
3
)
...
...
@@ -1095,10 +1097,9 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
double
d
=
det3
(
Sf
);
if
(
d
!=
0.
)
{
float
t
[
9
];
result
=
true
;
d
=
1.
/
d
;
float
t
[
9
];
t
[
0
]
=
(
float
)(((
double
)
Sf
(
1
,
1
)
*
Sf
(
2
,
2
)
-
(
double
)
Sf
(
1
,
2
)
*
Sf
(
2
,
1
))
*
d
);
t
[
1
]
=
(
float
)(((
double
)
Sf
(
0
,
2
)
*
Sf
(
2
,
1
)
-
(
double
)
Sf
(
0
,
1
)
*
Sf
(
2
,
2
))
*
d
);
t
[
2
]
=
(
float
)(((
double
)
Sf
(
0
,
1
)
*
Sf
(
1
,
2
)
-
(
double
)
Sf
(
0
,
2
)
*
Sf
(
1
,
1
))
*
d
);
...
...
@@ -1121,9 +1122,9 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
double
d
=
det3
(
Sd
);
if
(
d
!=
0.
)
{
double
t
[
9
];
result
=
true
;
d
=
1.
/
d
;
double
t
[
9
];
t
[
0
]
=
(
Sd
(
1
,
1
)
*
Sd
(
2
,
2
)
-
Sd
(
1
,
2
)
*
Sd
(
2
,
1
))
*
d
;
t
[
1
]
=
(
Sd
(
0
,
2
)
*
Sd
(
2
,
1
)
-
Sd
(
0
,
1
)
*
Sd
(
2
,
2
))
*
d
;
...
...
@@ -1193,6 +1194,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
}
/****************************************************************************************\
* Solving a linear system *
\****************************************************************************************/
...
...
@@ -1603,7 +1605,7 @@ void SVD::backSubst( InputArray _w, InputArray _u, InputArray _vt,
Mat
w
=
_w
.
getMat
(),
u
=
_u
.
getMat
(),
vt
=
_vt
.
getMat
(),
rhs
=
_rhs
.
getMat
();
int
type
=
w
.
type
(),
esz
=
(
int
)
w
.
elemSize
();
int
m
=
u
.
rows
,
n
=
vt
.
cols
,
nb
=
rhs
.
data
?
rhs
.
cols
:
m
,
nm
=
std
::
min
(
m
,
n
);
size_t
wstep
=
w
.
rows
==
1
?
esz
:
w
.
cols
==
1
?
(
size_t
)
w
.
step
:
(
size_t
)
w
.
step
+
esz
;
size_t
wstep
=
w
.
rows
==
1
?
(
size_t
)
esz
:
w
.
cols
==
1
?
(
size_t
)
w
.
step
:
(
size_t
)
w
.
step
+
esz
;
AutoBuffer
<
uchar
>
buffer
(
nb
*
sizeof
(
double
)
+
16
);
CV_Assert
(
w
.
type
()
==
u
.
type
()
&&
u
.
type
()
==
vt
.
type
()
&&
u
.
data
&&
vt
.
data
&&
w
.
data
);
CV_Assert
(
u
.
cols
>=
nm
&&
vt
.
rows
>=
nm
&&
...
...
modules/imgproc/src/shapedescr.cpp
View file @
fac3d999
...
...
@@ -951,9 +951,6 @@ cvBoundingRect( CvArr* array, int update )
if
(
ptseq
->
header_size
<
(
int
)
sizeof
(
CvContour
))
{
/*if( update == 1 )
CV_Error( CV_StsBadArg, "The header is too small to fit the rectangle, "
"so it could not be updated" );*/
update
=
0
;
calculate
=
1
;
}
...
...
@@ -1072,12 +1069,56 @@ cvBoundingRect( CvArr* array, int update )
{
int
is_float
=
CV_SEQ_ELTYPE
(
ptseq
)
==
CV_32FC2
;
cvStartReadSeq
(
ptseq
,
&
reader
,
0
);
CvPoint
pt
;
CV_READ_SEQ_ELEM
(
pt
,
reader
);
#if CV_SSE4_2
if
(
cv
::
checkHardwareSupport
(
CV_CPU_SSE4_2
))
{
if
(
!
is_float
)
{
__m128i
minval
,
maxval
;
minval
=
maxval
=
_mm_loadl_epi64
((
const
__m128i
*
)(
&
pt
));
//min[0]=pt.x, min[1]=pt.y
for
(
i
=
1
;
i
<
ptseq
->
total
;
i
++
)
{
__m128i
ptXY
=
_mm_loadl_epi64
((
const
__m128i
*
)(
reader
.
ptr
));
CV_NEXT_SEQ_ELEM
(
sizeof
(
pt
),
reader
);
minval
=
_mm_min_epi32
(
ptXY
,
minval
);
maxval
=
_mm_max_epi32
(
ptXY
,
maxval
);
}
xmin
=
_mm_cvtsi128_si32
(
minval
);
ymin
=
_mm_cvtsi128_si32
(
_mm_srli_si128
(
minval
,
4
));
xmax
=
_mm_cvtsi128_si32
(
maxval
);
ymax
=
_mm_cvtsi128_si32
(
_mm_srli_si128
(
maxval
,
4
));
}
else
{
__m128
minvalf
,
maxvalf
,
z
=
_mm_setzero_ps
(),
ptXY
=
_mm_setzero_ps
();
minvalf
=
maxvalf
=
_mm_loadl_pi
(
z
,
(
const
__m64
*
)(
&
pt
));
for
(
i
=
1
;
i
<
ptseq
->
total
;
i
++
)
{
ptXY
=
_mm_loadl_pi
(
ptXY
,
(
const
__m64
*
)
reader
.
ptr
);
CV_NEXT_SEQ_ELEM
(
sizeof
(
pt
),
reader
);
minvalf
=
_mm_min_ps
(
minvalf
,
ptXY
);
maxvalf
=
_mm_max_ps
(
maxvalf
,
ptXY
);
}
float
xyminf
[
2
],
xymaxf
[
2
];
_mm_storel_pi
((
__m64
*
)
xyminf
,
minvalf
);
_mm_storel_pi
((
__m64
*
)
xymaxf
,
maxvalf
);
xmin
=
cvFloor
(
xyminf
[
0
]);
ymin
=
cvFloor
(
xyminf
[
1
]);
xmax
=
cvFloor
(
xymaxf
[
0
]);
ymax
=
cvFloor
(
xymaxf
[
1
]);
}
}
else
#endif
{
if
(
!
is_float
)
{
CvPoint
pt
;
/* init values */
CV_READ_SEQ_ELEM
(
pt
,
reader
);
xmin
=
xmax
=
pt
.
x
;
ymin
=
ymax
=
pt
.
y
;
...
...
@@ -1100,10 +1141,8 @@ cvBoundingRect( CvArr* array, int update )
}
else
{
CvPoint
pt
;
Cv32suf
v
;
/* init values */
CV_READ_SEQ_ELEM
(
pt
,
reader
);
// init values
xmin
=
xmax
=
CV_TOGGLE_FLT
(
pt
.
x
);
ymin
=
ymax
=
CV_TOGGLE_FLT
(
pt
.
y
);
...
...
@@ -1128,25 +1167,20 @@ cvBoundingRect( CvArr* array, int update )
v
.
i
=
CV_TOGGLE_FLT
(
xmin
);
xmin
=
cvFloor
(
v
.
f
);
v
.
i
=
CV_TOGGLE_FLT
(
ymin
);
ymin
=
cvFloor
(
v
.
f
);
/* because right and bottom sides of
the bounding rectangle are not inclusive
(note +1 in width and height calculation below),
cvFloor is used here instead of cvCeil */
// because right and bottom sides of the bounding rectangle are not inclusive
// (note +1 in width and height calculation below), cvFloor is used here instead of cvCeil
v
.
i
=
CV_TOGGLE_FLT
(
xmax
);
xmax
=
cvFloor
(
v
.
f
);
v
.
i
=
CV_TOGGLE_FLT
(
ymax
);
ymax
=
cvFloor
(
v
.
f
);
}
}
rect
.
x
=
xmin
;
rect
.
y
=
ymin
;
rect
.
width
=
xmax
-
xmin
+
1
;
rect
.
height
=
ymax
-
ymin
+
1
;
}
if
(
update
)
((
CvContour
*
)
ptseq
)
->
rect
=
rect
;
return
rect
;
}
/* End of file. */
modules/objdetect/src/haar.cpp
View file @
fac3d999
...
...
@@ -43,19 +43,26 @@
#include "precomp.hpp"
#include <stdio.h>
/*#if CV_SSE2
# if CV_SSE4 || defined __SSE4__
# include <smmintrin.h>
# else
/*
#if CV_SSE2
# if !CV_SSE4_1 && !CV_SSE4_2
# define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m))
# define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m))
# endif
#endif
#if defined CV_ICC
# if defined CV_AVX
# define CV_HAAR_USE_AVX 1
# else
# if defined CV_SSE2 || defined CV_SSE4_1 || defined CV_SSE4_2
# define CV_HAAR_USE_SSE 1
# else
# define CV_HAAR_NO_SIMD 1
# endif
# endif
#endif
#endif*/
*/
/* these settings affect the quality of detection: change with care */
#define CV_ADJUST_FEATURES 1
#define CV_ADJUST_WEIGHTS 0
...
...
@@ -730,6 +737,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
{
CvHidHaarClassifier
*
classifier
=
cascade
->
stage_classifier
[
i
].
classifier
+
j
;
CvHidHaarTreeNode
*
node
=
classifier
->
node
;
#ifndef CV_HAAR_USE_SSE
double
t
=
node
->
threshold
*
variance_norm_factor
;
double
sum
=
calc_sum
(
node
->
feature
.
rect
[
0
],
p_offset
)
*
node
->
feature
.
rect
[
0
].
weight
;
...
...
@@ -745,6 +753,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
t
=
_mm_cmpgt_sd
(
t
,
sum
);
stage_sum
=
_mm_add_sd
(
stage_sum
,
_mm_blendv_pd
(
b
,
a
,
t
));
#endif
}
}
else
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment