Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6bce6ee3
Commit
6bce6ee3
authored
Jan 12, 2015
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
checks
parent
1d3c8604
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
144 additions
and
63 deletions
+144
-63
arithm.cpp
modules/core/src/arithm.cpp
+76
-13
convert.cpp
modules/core/src/convert.cpp
+0
-0
mathfuncs.cpp
modules/core/src/mathfuncs.cpp
+58
-44
stat.cpp
modules/core/src/stat.cpp
+5
-1
color.cpp
modules/imgproc/src/color.cpp
+0
-0
imgwarp.cpp
modules/imgproc/src/imgwarp.cpp
+0
-0
pyramids.cpp
modules/imgproc/src/pyramids.cpp
+5
-5
No files found.
modules/core/src/arithm.cpp
View file @
6bce6ee3
...
...
@@ -64,7 +64,7 @@ FUNCTOR_TEMPLATE(VLoadStore128);
#if CV_SSE2
FUNCTOR_TEMPLATE
(
VLoadStore64
);
FUNCTOR_TEMPLATE
(
VLoadStore128Aligned
);
#if CV_AVX
#if CV_AVX
2
FUNCTOR_TEMPLATE
(
VLoadStore256
);
FUNCTOR_TEMPLATE
(
VLoadStore256Aligned
);
#endif
...
...
@@ -2626,10 +2626,16 @@ struct Div_SIMD
template
<>
struct
Div_SIMD
<
uchar
>
{
bool
haveSIMD
;
Div_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
}
int
operator
()
(
const
uchar
*
src1
,
const
uchar
*
src2
,
uchar
*
dst
,
int
width
,
double
scale
)
const
{
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
...
...
@@ -2672,10 +2678,16 @@ struct Div_SIMD<uchar>
template
<>
struct
Div_SIMD
<
schar
>
{
bool
haveSIMD
;
Div_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
schar
*
src1
,
const
schar
*
src2
,
schar
*
dst
,
int
width
,
double
scale
)
const
{
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
...
...
@@ -2718,10 +2730,16 @@ struct Div_SIMD<schar>
template
<>
struct
Div_SIMD
<
ushort
>
{
bool
haveSIMD
;
Div_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
}
int
operator
()
(
const
ushort
*
src1
,
const
ushort
*
src2
,
ushort
*
dst
,
int
width
,
double
scale
)
const
{
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
...
...
@@ -2763,10 +2781,16 @@ struct Div_SIMD<ushort>
template
<>
struct
Div_SIMD
<
short
>
{
bool
haveSIMD
;
Div_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
short
*
src1
,
const
short
*
src2
,
short
*
dst
,
int
width
,
double
scale
)
const
{
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
...
...
@@ -2806,10 +2830,16 @@ struct Div_SIMD<short>
template
<>
struct
Div_SIMD
<
int
>
{
bool
haveSIMD
;
Div_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
int
*
src1
,
const
int
*
src2
,
int
*
dst
,
int
width
,
double
scale
)
const
{
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
...
...
@@ -2902,10 +2932,16 @@ struct Recip_SIMD
template
<>
struct
Recip_SIMD
<
uchar
>
{
bool
haveSIMD
;
Recip_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
}
int
operator
()
(
const
uchar
*
src2
,
uchar
*
dst
,
int
width
,
double
scale
)
const
{
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
...
...
@@ -2941,10 +2977,16 @@ struct Recip_SIMD<uchar>
template
<>
struct
Recip_SIMD
<
schar
>
{
bool
haveSIMD
;
Recip_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
schar
*
src2
,
schar
*
dst
,
int
width
,
double
scale
)
const
{
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
...
...
@@ -2980,10 +3022,16 @@ struct Recip_SIMD<schar>
template
<>
struct
Recip_SIMD
<
ushort
>
{
bool
haveSIMD
;
Recip_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
}
int
operator
()
(
const
ushort
*
src2
,
ushort
*
dst
,
int
width
,
double
scale
)
const
{
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
...
...
@@ -3018,10 +3066,16 @@ struct Recip_SIMD<ushort>
template
<>
struct
Recip_SIMD
<
short
>
{
bool
haveSIMD
;
Recip_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
short
*
src2
,
short
*
dst
,
int
width
,
double
scale
)
const
{
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
...
...
@@ -3054,10 +3108,16 @@ struct Recip_SIMD<short>
template
<>
struct
Recip_SIMD
<
int
>
{
bool
haveSIMD
;
Recip_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
int
*
src2
,
int
*
dst
,
int
width
,
double
scale
)
const
{
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
...
...
@@ -4126,7 +4186,8 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
{
int
x
=
0
;
#if CV_SSE2
if
(
USE_SSE2
){
if
(
USE_SSE2
)
{
__m128i
m128
=
code
==
CMP_GT
?
_mm_setzero_si128
()
:
_mm_set1_epi8
(
-
1
);
__m128i
c128
=
_mm_set1_epi8
(
-
128
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
...
...
@@ -4142,7 +4203,7 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
}
}
#elif CV_NEON
#elif CV_NEON
uint8x16_t
mask
=
code
==
CMP_GT
?
vdupq_n_u8
(
0
)
:
vdupq_n_u8
(
255
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
...
...
@@ -4164,7 +4225,8 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
{
int
x
=
0
;
#if CV_SSE2
if
(
USE_SSE2
){
if
(
USE_SSE2
)
{
__m128i
m128
=
code
==
CMP_EQ
?
_mm_setzero_si128
()
:
_mm_set1_epi8
(
-
1
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
...
...
@@ -4174,7 +4236,7 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
),
r00
);
}
}
#elif CV_NEON
#elif CV_NEON
uint8x16_t
mask
=
code
==
CMP_EQ
?
vdupq_n_u8
(
0
)
:
vdupq_n_u8
(
255
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
...
...
@@ -4254,7 +4316,8 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
{
int
x
=
0
;
#if CV_SSE2
if
(
USE_SSE2
){
//
if
(
USE_SSE2
)
{
__m128i
m128
=
code
==
CMP_GT
?
_mm_setzero_si128
()
:
_mm_set1_epi16
(
-
1
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
...
...
@@ -4278,7 +4341,7 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
x
+=
8
;
}
}
#elif CV_NEON
#elif CV_NEON
uint8x16_t
mask
=
code
==
CMP_GT
?
vdupq_n_u8
(
0
)
:
vdupq_n_u8
(
255
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
...
...
@@ -4293,8 +4356,7 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
vst1q_u8
(
dst
+
x
,
veorq_u8
(
vcombine_u8
(
t1
,
t2
),
mask
));
}
#endif
#endif
for
(
;
x
<
size
.
width
;
x
++
){
dst
[
x
]
=
(
uchar
)(
-
(
src1
[
x
]
>
src2
[
x
])
^
m
);
...
...
@@ -4308,7 +4370,8 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
{
int
x
=
0
;
#if CV_SSE2
if
(
USE_SSE2
){
if
(
USE_SSE2
)
{
__m128i
m128
=
code
==
CMP_EQ
?
_mm_setzero_si128
()
:
_mm_set1_epi16
(
-
1
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
...
...
@@ -4332,7 +4395,7 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
x
+=
8
;
}
}
#elif CV_NEON
#elif CV_NEON
uint8x16_t
mask
=
code
==
CMP_EQ
?
vdupq_n_u8
(
0
)
:
vdupq_n_u8
(
255
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
...
...
@@ -4347,8 +4410,8 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
vst1q_u8
(
dst
+
x
,
veorq_u8
(
vcombine_u8
(
t1
,
t2
),
mask
));
}
#endif
for
(
;
x
<
size
.
width
;
x
++
)
#endif
for
(
;
x
<
size
.
width
;
x
++
)
dst
[
x
]
=
(
uchar
)(
-
(
src1
[
x
]
==
src2
[
x
])
^
m
);
}
}
...
...
modules/core/src/convert.cpp
View file @
6bce6ee3
This diff is collapsed.
Click to expand it.
modules/core/src/mathfuncs.cpp
View file @
6bce6ee3
...
...
@@ -597,15 +597,18 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
k
=
0
;
#if CV_SSE2
for
(
;
k
<=
len
-
4
;
k
+=
4
)
if
(
USE_SSE2
)
{
__m128
v_dst0
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
+
2
)));
__m128
v_dst1
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
+
2
)));
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
__m128
v_dst0
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
+
2
)));
__m128
v_dst1
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
+
2
)));
_mm_storeu_ps
(
buf
[
0
]
+
k
,
v_dst0
);
_mm_storeu_ps
(
buf
[
1
]
+
k
,
v_dst1
);
_mm_storeu_ps
(
buf
[
0
]
+
k
,
v_dst0
);
_mm_storeu_ps
(
buf
[
1
]
+
k
,
v_dst1
);
}
}
#endif
...
...
@@ -619,11 +622,14 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
k
=
0
;
#if CV_SSE2
for
(
;
k
<=
len
-
4
;
k
+=
4
)
if
(
USE_SSE2
)
{
__m128
v_src
=
_mm_loadu_ps
(
buf
[
0
]
+
k
);
_mm_storeu_pd
(
angle
+
k
,
_mm_cvtps_pd
(
v_src
));
_mm_storeu_pd
(
angle
+
k
+
2
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_src
),
8
))));
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
__m128
v_src
=
_mm_loadu_ps
(
buf
[
0
]
+
k
);
_mm_storeu_pd
(
angle
+
k
,
_mm_cvtps_pd
(
v_src
));
_mm_storeu_pd
(
angle
+
k
+
2
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_src
),
8
))));
}
}
#endif
...
...
@@ -728,15 +734,18 @@ void cartToPolar( InputArray src1, InputArray src2,
k
=
0
;
#if CV_SSE2
for
(
;
k
<=
len
-
4
;
k
+=
4
)
if
(
USE_SSE2
)
{
__m128
v_dst0
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
+
2
)));
__m128
v_dst1
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
+
2
)));
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
__m128
v_dst0
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
+
2
)));
__m128
v_dst1
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
+
2
)));
_mm_storeu_ps
(
buf
[
0
]
+
k
,
v_dst0
);
_mm_storeu_ps
(
buf
[
1
]
+
k
,
v_dst1
);
_mm_storeu_ps
(
buf
[
0
]
+
k
,
v_dst0
);
_mm_storeu_ps
(
buf
[
1
]
+
k
,
v_dst1
);
}
}
#endif
...
...
@@ -750,11 +759,14 @@ void cartToPolar( InputArray src1, InputArray src2,
k
=
0
;
#if CV_SSE2
for
(
;
k
<=
len
-
4
;
k
+=
4
)
if
(
USE_SSE2
)
{
__m128
v_src
=
_mm_loadu_ps
(
buf
[
0
]
+
k
);
_mm_storeu_pd
(
angle
+
k
,
_mm_cvtps_pd
(
v_src
));
_mm_storeu_pd
(
angle
+
k
+
2
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_src
),
8
))));
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
__m128
v_src
=
_mm_loadu_ps
(
buf
[
0
]
+
k
);
_mm_storeu_pd
(
angle
+
k
,
_mm_cvtps_pd
(
v_src
));
_mm_storeu_pd
(
angle
+
k
+
2
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_src
),
8
))));
}
}
#endif
...
...
@@ -832,17 +844,16 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
k1
=
N
/
360.
;
#if CV_AVX2
__m128d
v_i
=
_mm_set_pd
(
1
,
0
);
__m128d
v_k1
=
_mm_set1_pd
(
k1
);
__m128d
v_1
=
_mm_set1_pd
(
1
);
__m128i
v_N1
=
_mm_set1_epi32
(
N
-
1
);
__m128i
v_N4
=
_mm_set1_epi32
(
N
>>
2
);
__m128d
v_sin_a0
=
_mm_set1_pd
(
sin_a0
);
__m128d
v_sin_a2
=
_mm_set1_pd
(
sin_a2
);
__m128d
v_cos_a0
=
_mm_set1_pd
(
cos_a0
);
if
(
USE_AVX2
)
{
__m128d
v_k1
=
_mm_set1_pd
(
k1
);
__m128d
v_1
=
_mm_set1_pd
(
1
);
__m128i
v_N1
=
_mm_set1_epi32
(
N
-
1
);
__m128i
v_N4
=
_mm_set1_epi32
(
N
>>
2
);
__m128d
v_sin_a0
=
_mm_set1_pd
(
sin_a0
);
__m128d
v_sin_a2
=
_mm_set1_pd
(
sin_a2
);
__m128d
v_cos_a0
=
_mm_set1_pd
(
cos_a0
);
for
(
;
i
<=
len
-
4
;
i
+=
4
)
{
__m128
v_angle
=
_mm_loadu_ps
(
angle
+
i
);
...
...
@@ -859,8 +870,8 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
__m128d
v_sin_b
=
_mm_mul_pd
(
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a0
,
v_t2
),
v_sin_a2
),
v_t
);
__m128d
v_cos_b
=
_mm_add_pd
(
_mm_mul_pd
(
v_cos_a0
,
v_t2
),
v_1
);
__m128d
v_sin_a
=
_mm_i32gather_pd
(
sin_table
,
v_sin_idx
,
1
);
__m128d
v_cos_a
=
_mm_i32gather_pd
(
sin_table
,
v_cos_idx
,
1
);
__m128d
v_sin_a
=
_mm_i32gather_pd
(
sin_table
,
v_sin_idx
,
8
);
__m128d
v_cos_a
=
_mm_i32gather_pd
(
sin_table
,
v_cos_idx
,
8
);
__m128d
v_sin_val_0
=
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a
,
v_cos_b
),
_mm_mul_pd
(
v_cos_a
,
v_sin_b
));
...
...
@@ -868,7 +879,7 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
_mm_mul_pd
(
v_sin_a
,
v_sin_b
));
// 2-3
v_t
=
_mm_mul_pd
(
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_s
l
li_si128
(
_mm_castps_si128
(
v_angle
),
8
))),
v_k1
);
v_t
=
_mm_mul_pd
(
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_s
r
li_si128
(
_mm_castps_si128
(
v_angle
),
8
))),
v_k1
);
v_it
=
_mm_cvtpd_epi32
(
v_t
);
v_t
=
_mm_sub_pd
(
v_t
,
_mm_cvtepi32_pd
(
v_it
));
...
...
@@ -879,8 +890,8 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
v_sin_b
=
_mm_mul_pd
(
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a0
,
v_t2
),
v_sin_a2
),
v_t
);
v_cos_b
=
_mm_add_pd
(
_mm_mul_pd
(
v_cos_a0
,
v_t2
),
v_1
);
v_sin_a
=
_mm_i32gather_pd
(
sin_table
,
v_sin_idx
,
1
);
v_cos_a
=
_mm_i32gather_pd
(
sin_table
,
v_cos_idx
,
1
);
v_sin_a
=
_mm_i32gather_pd
(
sin_table
,
v_sin_idx
,
8
);
v_cos_a
=
_mm_i32gather_pd
(
sin_table
,
v_cos_idx
,
8
);
__m128d
v_sin_val_1
=
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a
,
v_cos_b
),
_mm_mul_pd
(
v_cos_a
,
v_sin_b
));
...
...
@@ -1032,11 +1043,14 @@ void polarToCart( InputArray src1, InputArray src2,
vst1q_f32
(
y
+
k
,
vmulq_f32
(
vld1q_f32
(
y
+
k
),
v_m
));
}
#elif CV_SSE2
for
(
;
k
<=
len
-
4
;
k
+=
4
)
if
(
USE_SSE2
)
{
__m128
v_m
=
_mm_loadu_ps
(
mag
+
k
);
_mm_storeu_ps
(
x
+
k
,
_mm_mul_ps
(
_mm_loadu_ps
(
x
+
k
),
v_m
));
_mm_storeu_ps
(
y
+
k
,
_mm_mul_ps
(
_mm_loadu_ps
(
y
+
k
),
v_m
));
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
__m128
v_m
=
_mm_loadu_ps
(
mag
+
k
);
_mm_storeu_ps
(
x
+
k
,
_mm_mul_ps
(
_mm_loadu_ps
(
x
+
k
),
v_m
));
_mm_storeu_ps
(
y
+
k
,
_mm_mul_ps
(
_mm_loadu_ps
(
y
+
k
),
v_m
));
}
}
#endif
...
...
@@ -1063,10 +1077,10 @@ void polarToCart( InputArray src1, InputArray src2,
x
[
k
]
=
buf
[
0
][
k
]
*
m
;
y
[
k
]
=
buf
[
1
][
k
]
*
m
;
}
else
for
(
k
=
0
;
k
<
len
;
k
++
)
{
x
[
k
]
=
buf
[
0
][
k
];
y
[
k
]
=
buf
[
1
][
k
]
;
}
{
std
::
memcpy
(
x
,
buf
[
0
],
sizeof
(
float
)
*
len
);
std
::
memcpy
(
y
,
buf
[
1
],
sizeof
(
float
)
*
len
)
;
}
}
if
(
ptrs
[
0
]
)
...
...
modules/core/src/stat.cpp
View file @
6bce6ee3
...
...
@@ -397,6 +397,8 @@ static int countNonZero_(const T* src, int len )
return
nz
;
}
#if CV_SSE2
static
const
uchar
*
initPopcountTable
()
{
static
uchar
tab
[
256
];
...
...
@@ -425,6 +427,8 @@ static const uchar * initPopcountTable()
return
tab
;
}
#endif
static
int
countNonZero8u
(
const
uchar
*
src
,
int
len
)
{
int
i
=
0
,
nz
=
0
;
...
...
@@ -645,7 +649,7 @@ static int countNonZero32f( const float* src, int len )
}
static
int
countNonZero64f
(
const
double
*
src
,
int
len
)
{
{
int
i
=
0
,
nz
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
...
...
modules/imgproc/src/color.cpp
View file @
6bce6ee3
This diff is collapsed.
Click to expand it.
modules/imgproc/src/imgwarp.cpp
View file @
6bce6ee3
This diff is collapsed.
Click to expand it.
modules/imgproc/src/pyramids.cpp
View file @
6bce6ee3
...
...
@@ -386,10 +386,10 @@ struct PyrUpVec_32s16s
__m128i
v_dst01
=
_mm_add_epi32
(
_mm_add_epi32
(
v_r0
,
v_r2
),
_mm_add_epi32
(
v_2r1
,
v_4r1
));
__m128i
v_dst11
=
_mm_slli_epi32
(
_mm_add_epi32
(
v_r1
,
v_r2
),
2
);
_mm_storeu_si128
((
__m128i
*
)(
dst0
+
x
),
_mm_storeu_si128
((
__m128i
*
)(
dst0
+
x
),
_mm_packs_epi32
(
_mm_srai_epi32
(
_mm_add_epi32
(
v_dst00
,
v_delta
),
6
),
_mm_srai_epi32
(
_mm_add_epi32
(
v_dst01
,
v_delta
),
6
)));
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
),
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
),
_mm_packs_epi32
(
_mm_srai_epi32
(
_mm_add_epi32
(
v_dst10
,
v_delta
),
6
),
_mm_srai_epi32
(
_mm_add_epi32
(
v_dst11
,
v_delta
),
6
)));
}
...
...
@@ -446,10 +446,10 @@ struct PyrUpVec_32s16u
__m128i
v_dst01
=
_mm_add_epi32
(
_mm_add_epi32
(
v_r0
,
v_r2
),
_mm_add_epi32
(
v_2r1
,
v_4r1
));
__m128i
v_dst11
=
_mm_slli_epi32
(
_mm_add_epi32
(
v_r1
,
v_r2
),
2
);
_mm_storeu_si128
((
__m128i
*
)(
dst0
+
x
),
_mm_storeu_si128
((
__m128i
*
)(
dst0
+
x
),
_mm_packus_epi32
(
_mm_srli_epi32
(
_mm_add_epi32
(
v_dst00
,
v_delta
),
6
),
_mm_srli_epi32
(
_mm_add_epi32
(
v_dst01
,
v_delta
),
6
)));
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
),
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
),
_mm_packus_epi32
(
_mm_srli_epi32
(
_mm_add_epi32
(
v_dst10
,
v_delta
),
6
),
_mm_srli_epi32
(
_mm_add_epi32
(
v_dst11
,
v_delta
),
6
)));
}
...
...
@@ -491,7 +491,7 @@ struct PyrUpVec_32f
const
float
*
row0
=
src
[
0
],
*
row1
=
src
[
1
],
*
row2
=
src
[
2
];
float
*
dst0
=
dst
[
0
],
*
dst1
=
dst
[
1
];
__m128
v_6
=
_mm_set1_ps
(
6.0
f
),
v_scale
=
_mm_set1_ps
(
1.
f
/
64.0
f
),
__m128
v_6
=
_mm_set1_ps
(
6.0
f
),
v_scale
=
_mm_set1_ps
(
1.
f
/
64.0
f
),
v_scale4
=
_mm_mul_ps
(
v_scale
,
_mm_set1_ps
(
4.0
f
));
for
(
;
x
<=
width
-
8
;
x
+=
8
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment