Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6bce6ee3
Commit
6bce6ee3
authored
Jan 12, 2015
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
checks
parent
1d3c8604
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
136 additions
and
24 deletions
+136
-24
arithm.cpp
modules/core/src/arithm.cpp
+69
-6
convert.cpp
modules/core/src/convert.cpp
+0
-0
mathfuncs.cpp
modules/core/src/mathfuncs.cpp
+24
-10
stat.cpp
modules/core/src/stat.cpp
+4
-0
color.cpp
modules/imgproc/src/color.cpp
+0
-0
imgwarp.cpp
modules/imgproc/src/imgwarp.cpp
+39
-8
No files found.
modules/core/src/arithm.cpp
View file @
6bce6ee3
...
@@ -64,7 +64,7 @@ FUNCTOR_TEMPLATE(VLoadStore128);
...
@@ -64,7 +64,7 @@ FUNCTOR_TEMPLATE(VLoadStore128);
#if CV_SSE2
#if CV_SSE2
FUNCTOR_TEMPLATE
(
VLoadStore64
);
FUNCTOR_TEMPLATE
(
VLoadStore64
);
FUNCTOR_TEMPLATE
(
VLoadStore128Aligned
);
FUNCTOR_TEMPLATE
(
VLoadStore128Aligned
);
#if CV_AVX
#if CV_AVX
2
FUNCTOR_TEMPLATE
(
VLoadStore256
);
FUNCTOR_TEMPLATE
(
VLoadStore256
);
FUNCTOR_TEMPLATE
(
VLoadStore256Aligned
);
FUNCTOR_TEMPLATE
(
VLoadStore256Aligned
);
#endif
#endif
...
@@ -2626,10 +2626,16 @@ struct Div_SIMD
...
@@ -2626,10 +2626,16 @@ struct Div_SIMD
template
<>
template
<>
struct
Div_SIMD
<
uchar
>
struct
Div_SIMD
<
uchar
>
{
{
bool
haveSIMD
;
Div_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
}
int
operator
()
(
const
uchar
*
src1
,
const
uchar
*
src2
,
uchar
*
dst
,
int
width
,
double
scale
)
const
int
operator
()
(
const
uchar
*
src1
,
const
uchar
*
src2
,
uchar
*
dst
,
int
width
,
double
scale
)
const
{
{
int
x
=
0
;
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
__m128i
v_zero
=
_mm_setzero_si128
();
...
@@ -2672,10 +2678,16 @@ struct Div_SIMD<uchar>
...
@@ -2672,10 +2678,16 @@ struct Div_SIMD<uchar>
template
<>
template
<>
struct
Div_SIMD
<
schar
>
struct
Div_SIMD
<
schar
>
{
{
bool
haveSIMD
;
Div_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
schar
*
src1
,
const
schar
*
src2
,
schar
*
dst
,
int
width
,
double
scale
)
const
int
operator
()
(
const
schar
*
src1
,
const
schar
*
src2
,
schar
*
dst
,
int
width
,
double
scale
)
const
{
{
int
x
=
0
;
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
__m128i
v_zero
=
_mm_setzero_si128
();
...
@@ -2718,10 +2730,16 @@ struct Div_SIMD<schar>
...
@@ -2718,10 +2730,16 @@ struct Div_SIMD<schar>
template
<>
template
<>
struct
Div_SIMD
<
ushort
>
struct
Div_SIMD
<
ushort
>
{
{
bool
haveSIMD
;
Div_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
}
int
operator
()
(
const
ushort
*
src1
,
const
ushort
*
src2
,
ushort
*
dst
,
int
width
,
double
scale
)
const
int
operator
()
(
const
ushort
*
src1
,
const
ushort
*
src2
,
ushort
*
dst
,
int
width
,
double
scale
)
const
{
{
int
x
=
0
;
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
__m128i
v_zero
=
_mm_setzero_si128
();
...
@@ -2763,10 +2781,16 @@ struct Div_SIMD<ushort>
...
@@ -2763,10 +2781,16 @@ struct Div_SIMD<ushort>
template
<>
template
<>
struct
Div_SIMD
<
short
>
struct
Div_SIMD
<
short
>
{
{
bool
haveSIMD
;
Div_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
short
*
src1
,
const
short
*
src2
,
short
*
dst
,
int
width
,
double
scale
)
const
int
operator
()
(
const
short
*
src1
,
const
short
*
src2
,
short
*
dst
,
int
width
,
double
scale
)
const
{
{
int
x
=
0
;
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
__m128i
v_zero
=
_mm_setzero_si128
();
...
@@ -2806,10 +2830,16 @@ struct Div_SIMD<short>
...
@@ -2806,10 +2830,16 @@ struct Div_SIMD<short>
template
<>
template
<>
struct
Div_SIMD
<
int
>
struct
Div_SIMD
<
int
>
{
{
bool
haveSIMD
;
Div_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
int
*
src1
,
const
int
*
src2
,
int
*
dst
,
int
width
,
double
scale
)
const
int
operator
()
(
const
int
*
src1
,
const
int
*
src2
,
int
*
dst
,
int
width
,
double
scale
)
const
{
{
int
x
=
0
;
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
__m128i
v_zero
=
_mm_setzero_si128
();
...
@@ -2902,10 +2932,16 @@ struct Recip_SIMD
...
@@ -2902,10 +2932,16 @@ struct Recip_SIMD
template
<>
template
<>
struct
Recip_SIMD
<
uchar
>
struct
Recip_SIMD
<
uchar
>
{
{
bool
haveSIMD
;
Recip_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
}
int
operator
()
(
const
uchar
*
src2
,
uchar
*
dst
,
int
width
,
double
scale
)
const
int
operator
()
(
const
uchar
*
src2
,
uchar
*
dst
,
int
width
,
double
scale
)
const
{
{
int
x
=
0
;
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
__m128i
v_zero
=
_mm_setzero_si128
();
...
@@ -2941,10 +2977,16 @@ struct Recip_SIMD<uchar>
...
@@ -2941,10 +2977,16 @@ struct Recip_SIMD<uchar>
template
<>
template
<>
struct
Recip_SIMD
<
schar
>
struct
Recip_SIMD
<
schar
>
{
{
bool
haveSIMD
;
Recip_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
schar
*
src2
,
schar
*
dst
,
int
width
,
double
scale
)
const
int
operator
()
(
const
schar
*
src2
,
schar
*
dst
,
int
width
,
double
scale
)
const
{
{
int
x
=
0
;
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
__m128i
v_zero
=
_mm_setzero_si128
();
...
@@ -2980,10 +3022,16 @@ struct Recip_SIMD<schar>
...
@@ -2980,10 +3022,16 @@ struct Recip_SIMD<schar>
template
<>
template
<>
struct
Recip_SIMD
<
ushort
>
struct
Recip_SIMD
<
ushort
>
{
{
bool
haveSIMD
;
Recip_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
}
int
operator
()
(
const
ushort
*
src2
,
ushort
*
dst
,
int
width
,
double
scale
)
const
int
operator
()
(
const
ushort
*
src2
,
ushort
*
dst
,
int
width
,
double
scale
)
const
{
{
int
x
=
0
;
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
__m128i
v_zero
=
_mm_setzero_si128
();
...
@@ -3018,10 +3066,16 @@ struct Recip_SIMD<ushort>
...
@@ -3018,10 +3066,16 @@ struct Recip_SIMD<ushort>
template
<>
template
<>
struct
Recip_SIMD
<
short
>
struct
Recip_SIMD
<
short
>
{
{
bool
haveSIMD
;
Recip_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
short
*
src2
,
short
*
dst
,
int
width
,
double
scale
)
const
int
operator
()
(
const
short
*
src2
,
short
*
dst
,
int
width
,
double
scale
)
const
{
{
int
x
=
0
;
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
__m128i
v_zero
=
_mm_setzero_si128
();
...
@@ -3054,10 +3108,16 @@ struct Recip_SIMD<short>
...
@@ -3054,10 +3108,16 @@ struct Recip_SIMD<short>
template
<>
template
<>
struct
Recip_SIMD
<
int
>
struct
Recip_SIMD
<
int
>
{
{
bool
haveSIMD
;
Recip_SIMD
()
{
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
int
*
src2
,
int
*
dst
,
int
width
,
double
scale
)
const
int
operator
()
(
const
int
*
src2
,
int
*
dst
,
int
width
,
double
scale
)
const
{
{
int
x
=
0
;
int
x
=
0
;
if
(
!
haveSIMD
)
return
x
;
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128d
v_scale
=
_mm_set1_pd
(
scale
);
__m128i
v_zero
=
_mm_setzero_si128
();
__m128i
v_zero
=
_mm_setzero_si128
();
...
@@ -4126,7 +4186,8 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
...
@@ -4126,7 +4186,8 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
{
{
int
x
=
0
;
int
x
=
0
;
#if CV_SSE2
#if CV_SSE2
if
(
USE_SSE2
){
if
(
USE_SSE2
)
{
__m128i
m128
=
code
==
CMP_GT
?
_mm_setzero_si128
()
:
_mm_set1_epi8
(
-
1
);
__m128i
m128
=
code
==
CMP_GT
?
_mm_setzero_si128
()
:
_mm_set1_epi8
(
-
1
);
__m128i
c128
=
_mm_set1_epi8
(
-
128
);
__m128i
c128
=
_mm_set1_epi8
(
-
128
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
...
@@ -4164,7 +4225,8 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
...
@@ -4164,7 +4225,8 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
{
{
int
x
=
0
;
int
x
=
0
;
#if CV_SSE2
#if CV_SSE2
if
(
USE_SSE2
){
if
(
USE_SSE2
)
{
__m128i
m128
=
code
==
CMP_EQ
?
_mm_setzero_si128
()
:
_mm_set1_epi8
(
-
1
);
__m128i
m128
=
code
==
CMP_EQ
?
_mm_setzero_si128
()
:
_mm_set1_epi8
(
-
1
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
{
...
@@ -4254,7 +4316,8 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
...
@@ -4254,7 +4316,8 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
{
{
int
x
=
0
;
int
x
=
0
;
#if CV_SSE2
#if CV_SSE2
if
(
USE_SSE2
){
//
if
(
USE_SSE2
)
{
__m128i
m128
=
code
==
CMP_GT
?
_mm_setzero_si128
()
:
_mm_set1_epi16
(
-
1
);
__m128i
m128
=
code
==
CMP_GT
?
_mm_setzero_si128
()
:
_mm_set1_epi16
(
-
1
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
{
...
@@ -4293,7 +4356,6 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
...
@@ -4293,7 +4356,6 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
vst1q_u8
(
dst
+
x
,
veorq_u8
(
vcombine_u8
(
t1
,
t2
),
mask
));
vst1q_u8
(
dst
+
x
,
veorq_u8
(
vcombine_u8
(
t1
,
t2
),
mask
));
}
}
#endif
#endif
for
(
;
x
<
size
.
width
;
x
++
){
for
(
;
x
<
size
.
width
;
x
++
){
...
@@ -4308,7 +4370,8 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
...
@@ -4308,7 +4370,8 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
{
{
int
x
=
0
;
int
x
=
0
;
#if CV_SSE2
#if CV_SSE2
if
(
USE_SSE2
){
if
(
USE_SSE2
)
{
__m128i
m128
=
code
==
CMP_EQ
?
_mm_setzero_si128
()
:
_mm_set1_epi16
(
-
1
);
__m128i
m128
=
code
==
CMP_EQ
?
_mm_setzero_si128
()
:
_mm_set1_epi16
(
-
1
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
{
...
...
modules/core/src/convert.cpp
View file @
6bce6ee3
This diff is collapsed.
Click to expand it.
modules/core/src/mathfuncs.cpp
View file @
6bce6ee3
...
@@ -597,6 +597,8 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
...
@@ -597,6 +597,8 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
k
=
0
;
k
=
0
;
#if CV_SSE2
#if CV_SSE2
if
(
USE_SSE2
)
{
for
(
;
k
<=
len
-
4
;
k
+=
4
)
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
{
__m128
v_dst0
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
)),
__m128
v_dst0
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
)),
...
@@ -607,6 +609,7 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
...
@@ -607,6 +609,7 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
_mm_storeu_ps
(
buf
[
0
]
+
k
,
v_dst0
);
_mm_storeu_ps
(
buf
[
0
]
+
k
,
v_dst0
);
_mm_storeu_ps
(
buf
[
1
]
+
k
,
v_dst1
);
_mm_storeu_ps
(
buf
[
1
]
+
k
,
v_dst1
);
}
}
}
#endif
#endif
for
(
;
k
<
len
;
k
++
)
for
(
;
k
<
len
;
k
++
)
...
@@ -619,12 +622,15 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
...
@@ -619,12 +622,15 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
k
=
0
;
k
=
0
;
#if CV_SSE2
#if CV_SSE2
if
(
USE_SSE2
)
{
for
(
;
k
<=
len
-
4
;
k
+=
4
)
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
{
__m128
v_src
=
_mm_loadu_ps
(
buf
[
0
]
+
k
);
__m128
v_src
=
_mm_loadu_ps
(
buf
[
0
]
+
k
);
_mm_storeu_pd
(
angle
+
k
,
_mm_cvtps_pd
(
v_src
));
_mm_storeu_pd
(
angle
+
k
,
_mm_cvtps_pd
(
v_src
));
_mm_storeu_pd
(
angle
+
k
+
2
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_src
),
8
))));
_mm_storeu_pd
(
angle
+
k
+
2
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_src
),
8
))));
}
}
}
#endif
#endif
for
(
;
k
<
len
;
k
++
)
for
(
;
k
<
len
;
k
++
)
...
@@ -728,6 +734,8 @@ void cartToPolar( InputArray src1, InputArray src2,
...
@@ -728,6 +734,8 @@ void cartToPolar( InputArray src1, InputArray src2,
k
=
0
;
k
=
0
;
#if CV_SSE2
#if CV_SSE2
if
(
USE_SSE2
)
{
for
(
;
k
<=
len
-
4
;
k
+=
4
)
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
{
__m128
v_dst0
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
)),
__m128
v_dst0
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
)),
...
@@ -738,6 +746,7 @@ void cartToPolar( InputArray src1, InputArray src2,
...
@@ -738,6 +746,7 @@ void cartToPolar( InputArray src1, InputArray src2,
_mm_storeu_ps
(
buf
[
0
]
+
k
,
v_dst0
);
_mm_storeu_ps
(
buf
[
0
]
+
k
,
v_dst0
);
_mm_storeu_ps
(
buf
[
1
]
+
k
,
v_dst1
);
_mm_storeu_ps
(
buf
[
1
]
+
k
,
v_dst1
);
}
}
}
#endif
#endif
for
(
;
k
<
len
;
k
++
)
for
(
;
k
<
len
;
k
++
)
...
@@ -750,12 +759,15 @@ void cartToPolar( InputArray src1, InputArray src2,
...
@@ -750,12 +759,15 @@ void cartToPolar( InputArray src1, InputArray src2,
k
=
0
;
k
=
0
;
#if CV_SSE2
#if CV_SSE2
if
(
USE_SSE2
)
{
for
(
;
k
<=
len
-
4
;
k
+=
4
)
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
{
__m128
v_src
=
_mm_loadu_ps
(
buf
[
0
]
+
k
);
__m128
v_src
=
_mm_loadu_ps
(
buf
[
0
]
+
k
);
_mm_storeu_pd
(
angle
+
k
,
_mm_cvtps_pd
(
v_src
));
_mm_storeu_pd
(
angle
+
k
,
_mm_cvtps_pd
(
v_src
));
_mm_storeu_pd
(
angle
+
k
+
2
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_src
),
8
))));
_mm_storeu_pd
(
angle
+
k
+
2
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_src
),
8
))));
}
}
}
#endif
#endif
for
(
;
k
<
len
;
k
++
)
for
(
;
k
<
len
;
k
++
)
...
@@ -832,7 +844,8 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
...
@@ -832,7 +844,8 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
k1
=
N
/
360.
;
k1
=
N
/
360.
;
#if CV_AVX2
#if CV_AVX2
__m128d
v_i
=
_mm_set_pd
(
1
,
0
);
if
(
USE_AVX2
)
{
__m128d
v_k1
=
_mm_set1_pd
(
k1
);
__m128d
v_k1
=
_mm_set1_pd
(
k1
);
__m128d
v_1
=
_mm_set1_pd
(
1
);
__m128d
v_1
=
_mm_set1_pd
(
1
);
__m128i
v_N1
=
_mm_set1_epi32
(
N
-
1
);
__m128i
v_N1
=
_mm_set1_epi32
(
N
-
1
);
...
@@ -841,8 +854,6 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
...
@@ -841,8 +854,6 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
__m128d
v_sin_a2
=
_mm_set1_pd
(
sin_a2
);
__m128d
v_sin_a2
=
_mm_set1_pd
(
sin_a2
);
__m128d
v_cos_a0
=
_mm_set1_pd
(
cos_a0
);
__m128d
v_cos_a0
=
_mm_set1_pd
(
cos_a0
);
if
(
USE_AVX2
)
{
for
(
;
i
<=
len
-
4
;
i
+=
4
)
for
(
;
i
<=
len
-
4
;
i
+=
4
)
{
{
__m128
v_angle
=
_mm_loadu_ps
(
angle
+
i
);
__m128
v_angle
=
_mm_loadu_ps
(
angle
+
i
);
...
@@ -859,8 +870,8 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
...
@@ -859,8 +870,8 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
__m128d
v_sin_b
=
_mm_mul_pd
(
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a0
,
v_t2
),
v_sin_a2
),
v_t
);
__m128d
v_sin_b
=
_mm_mul_pd
(
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a0
,
v_t2
),
v_sin_a2
),
v_t
);
__m128d
v_cos_b
=
_mm_add_pd
(
_mm_mul_pd
(
v_cos_a0
,
v_t2
),
v_1
);
__m128d
v_cos_b
=
_mm_add_pd
(
_mm_mul_pd
(
v_cos_a0
,
v_t2
),
v_1
);
__m128d
v_sin_a
=
_mm_i32gather_pd
(
sin_table
,
v_sin_idx
,
1
);
__m128d
v_sin_a
=
_mm_i32gather_pd
(
sin_table
,
v_sin_idx
,
8
);
__m128d
v_cos_a
=
_mm_i32gather_pd
(
sin_table
,
v_cos_idx
,
1
);
__m128d
v_cos_a
=
_mm_i32gather_pd
(
sin_table
,
v_cos_idx
,
8
);
__m128d
v_sin_val_0
=
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a
,
v_cos_b
),
__m128d
v_sin_val_0
=
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a
,
v_cos_b
),
_mm_mul_pd
(
v_cos_a
,
v_sin_b
));
_mm_mul_pd
(
v_cos_a
,
v_sin_b
));
...
@@ -868,7 +879,7 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
...
@@ -868,7 +879,7 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
_mm_mul_pd
(
v_sin_a
,
v_sin_b
));
_mm_mul_pd
(
v_sin_a
,
v_sin_b
));
// 2-3
// 2-3
v_t
=
_mm_mul_pd
(
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_s
l
li_si128
(
_mm_castps_si128
(
v_angle
),
8
))),
v_k1
);
v_t
=
_mm_mul_pd
(
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_s
r
li_si128
(
_mm_castps_si128
(
v_angle
),
8
))),
v_k1
);
v_it
=
_mm_cvtpd_epi32
(
v_t
);
v_it
=
_mm_cvtpd_epi32
(
v_t
);
v_t
=
_mm_sub_pd
(
v_t
,
_mm_cvtepi32_pd
(
v_it
));
v_t
=
_mm_sub_pd
(
v_t
,
_mm_cvtepi32_pd
(
v_it
));
...
@@ -879,8 +890,8 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
...
@@ -879,8 +890,8 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
v_sin_b
=
_mm_mul_pd
(
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a0
,
v_t2
),
v_sin_a2
),
v_t
);
v_sin_b
=
_mm_mul_pd
(
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a0
,
v_t2
),
v_sin_a2
),
v_t
);
v_cos_b
=
_mm_add_pd
(
_mm_mul_pd
(
v_cos_a0
,
v_t2
),
v_1
);
v_cos_b
=
_mm_add_pd
(
_mm_mul_pd
(
v_cos_a0
,
v_t2
),
v_1
);
v_sin_a
=
_mm_i32gather_pd
(
sin_table
,
v_sin_idx
,
1
);
v_sin_a
=
_mm_i32gather_pd
(
sin_table
,
v_sin_idx
,
8
);
v_cos_a
=
_mm_i32gather_pd
(
sin_table
,
v_cos_idx
,
1
);
v_cos_a
=
_mm_i32gather_pd
(
sin_table
,
v_cos_idx
,
8
);
__m128d
v_sin_val_1
=
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a
,
v_cos_b
),
__m128d
v_sin_val_1
=
_mm_add_pd
(
_mm_mul_pd
(
v_sin_a
,
v_cos_b
),
_mm_mul_pd
(
v_cos_a
,
v_sin_b
));
_mm_mul_pd
(
v_cos_a
,
v_sin_b
));
...
@@ -1032,12 +1043,15 @@ void polarToCart( InputArray src1, InputArray src2,
...
@@ -1032,12 +1043,15 @@ void polarToCart( InputArray src1, InputArray src2,
vst1q_f32
(
y
+
k
,
vmulq_f32
(
vld1q_f32
(
y
+
k
),
v_m
));
vst1q_f32
(
y
+
k
,
vmulq_f32
(
vld1q_f32
(
y
+
k
),
v_m
));
}
}
#elif CV_SSE2
#elif CV_SSE2
if
(
USE_SSE2
)
{
for
(
;
k
<=
len
-
4
;
k
+=
4
)
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
{
__m128
v_m
=
_mm_loadu_ps
(
mag
+
k
);
__m128
v_m
=
_mm_loadu_ps
(
mag
+
k
);
_mm_storeu_ps
(
x
+
k
,
_mm_mul_ps
(
_mm_loadu_ps
(
x
+
k
),
v_m
));
_mm_storeu_ps
(
x
+
k
,
_mm_mul_ps
(
_mm_loadu_ps
(
x
+
k
),
v_m
));
_mm_storeu_ps
(
y
+
k
,
_mm_mul_ps
(
_mm_loadu_ps
(
y
+
k
),
v_m
));
_mm_storeu_ps
(
y
+
k
,
_mm_mul_ps
(
_mm_loadu_ps
(
y
+
k
),
v_m
));
}
}
}
#endif
#endif
for
(
;
k
<
len
;
k
++
)
for
(
;
k
<
len
;
k
++
)
...
@@ -1063,9 +1077,9 @@ void polarToCart( InputArray src1, InputArray src2,
...
@@ -1063,9 +1077,9 @@ void polarToCart( InputArray src1, InputArray src2,
x
[
k
]
=
buf
[
0
][
k
]
*
m
;
y
[
k
]
=
buf
[
1
][
k
]
*
m
;
x
[
k
]
=
buf
[
0
][
k
]
*
m
;
y
[
k
]
=
buf
[
1
][
k
]
*
m
;
}
}
else
else
for
(
k
=
0
;
k
<
len
;
k
++
)
{
{
x
[
k
]
=
buf
[
0
][
k
];
y
[
k
]
=
buf
[
1
][
k
];
std
::
memcpy
(
x
,
buf
[
0
],
sizeof
(
float
)
*
len
);
std
::
memcpy
(
y
,
buf
[
1
],
sizeof
(
float
)
*
len
);
}
}
}
}
...
...
modules/core/src/stat.cpp
View file @
6bce6ee3
...
@@ -397,6 +397,8 @@ static int countNonZero_(const T* src, int len )
...
@@ -397,6 +397,8 @@ static int countNonZero_(const T* src, int len )
return
nz
;
return
nz
;
}
}
#if CV_SSE2
static
const
uchar
*
initPopcountTable
()
static
const
uchar
*
initPopcountTable
()
{
{
static
uchar
tab
[
256
];
static
uchar
tab
[
256
];
...
@@ -425,6 +427,8 @@ static const uchar * initPopcountTable()
...
@@ -425,6 +427,8 @@ static const uchar * initPopcountTable()
return
tab
;
return
tab
;
}
}
#endif
static
int
countNonZero8u
(
const
uchar
*
src
,
int
len
)
static
int
countNonZero8u
(
const
uchar
*
src
,
int
len
)
{
{
int
i
=
0
,
nz
=
0
;
int
i
=
0
,
nz
=
0
;
...
...
modules/imgproc/src/color.cpp
View file @
6bce6ee3
This diff is collapsed.
Click to expand it.
modules/imgproc/src/imgwarp.cpp
View file @
6bce6ee3
...
@@ -1963,9 +1963,9 @@ private:
...
@@ -1963,9 +1963,9 @@ private:
struct
ResizeAreaFastVec_SIMD_32f
struct
ResizeAreaFastVec_SIMD_32f
{
{
ResizeAreaFastVec_SIMD_32f
(
int
_scale_x
,
int
_scale_y
,
int
_cn
,
int
_step
)
:
ResizeAreaFastVec_SIMD_32f
(
int
_scale_x
,
int
_scale_y
,
int
_cn
,
int
_step
)
:
scale_x
(
_scale_x
),
scale_y
(
_scale_y
),
cn
(
_cn
),
step
(
_step
)
cn
(
_cn
),
step
(
_step
)
{
{
fast_mode
=
scale_x
==
2
&&
scale_y
==
2
&&
(
cn
==
1
||
cn
==
3
||
cn
==
4
);
fast_mode
=
_scale_x
==
2
&&
_scale_y
==
2
&&
(
cn
==
1
||
cn
==
4
);
}
}
int
operator
()
(
const
float
*
S
,
float
*
D
,
int
w
)
const
int
operator
()
(
const
float
*
S
,
float
*
D
,
int
w
)
const
...
@@ -2005,7 +2005,6 @@ struct ResizeAreaFastVec_SIMD_32f
...
@@ -2005,7 +2005,6 @@ struct ResizeAreaFastVec_SIMD_32f
}
}
private
:
private
:
int
scale_x
,
scale_y
;
int
cn
;
int
cn
;
bool
fast_mode
;
bool
fast_mode
;
int
step
;
int
step
;
...
@@ -2289,9 +2288,10 @@ private:
...
@@ -2289,9 +2288,10 @@ private:
struct
ResizeAreaFastVec_SIMD_32f
struct
ResizeAreaFastVec_SIMD_32f
{
{
ResizeAreaFastVec_SIMD_32f
(
int
_scale_x
,
int
_scale_y
,
int
_cn
,
int
_step
)
:
ResizeAreaFastVec_SIMD_32f
(
int
_scale_x
,
int
_scale_y
,
int
_cn
,
int
_step
)
:
scale_x
(
_scale_x
),
scale_y
(
_scale_y
),
cn
(
_cn
),
step
(
_step
)
cn
(
_cn
),
step
(
_step
)
{
{
fast_mode
=
scale_x
==
2
&&
scale_y
==
2
&&
(
cn
==
1
||
cn
==
3
||
cn
==
4
);
fast_mode
=
_scale_x
==
2
&&
_scale_y
==
2
&&
(
cn
==
1
||
cn
==
4
);
fast_mode
=
fast_mode
&&
checkHardwareSupport
(
CV_CPU_SSE2
);
}
}
int
operator
()
(
const
float
*
S
,
float
*
D
,
int
w
)
const
int
operator
()
(
const
float
*
S
,
float
*
D
,
int
w
)
const
...
@@ -2335,7 +2335,6 @@ struct ResizeAreaFastVec_SIMD_32f
...
@@ -2335,7 +2335,6 @@ struct ResizeAreaFastVec_SIMD_32f
}
}
private
:
private
:
int
scale_x
,
scale_y
;
int
cn
;
int
cn
;
bool
fast_mode
;
bool
fast_mode
;
int
step
;
int
step
;
...
@@ -4817,6 +4816,13 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
...
@@ -4817,6 +4816,13 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
size
.
height
=
1
;
size
.
height
=
1
;
}
}
#if CV_SSE2
bool
useSSE2
=
checkHardwareSupport
(
CV_CPU_SSE2
);
#endif
#if CV_SSE4_1
bool
useSSE4_1
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
#endif
const
float
scale
=
1.
f
/
INTER_TAB_SIZE
;
const
float
scale
=
1.
f
/
INTER_TAB_SIZE
;
int
x
,
y
;
int
x
,
y
;
for
(
y
=
0
;
y
<
size
.
height
;
y
++
)
for
(
y
=
0
;
y
<
size
.
height
;
y
++
)
...
@@ -4848,6 +4854,8 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
...
@@ -4848,6 +4854,8 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
vst2q_s16
(
dst1
+
(
x
<<
1
),
v_dst
);
vst2q_s16
(
dst1
+
(
x
<<
1
),
v_dst
);
}
}
#elif CV_SSE4_1
#elif CV_SSE4_1
if
(
useSSE4_1
)
{
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
{
__m128i
v_dst0
=
_mm_packs_epi32
(
_mm_cvtps_epi32
(
_mm_loadu_ps
(
src1f
+
x
)),
__m128i
v_dst0
=
_mm_packs_epi32
(
_mm_cvtps_epi32
(
_mm_loadu_ps
(
src1f
+
x
)),
...
@@ -4867,6 +4875,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
...
@@ -4867,6 +4875,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
*
2
+
16
),
v_dst2
);
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
*
2
+
16
),
v_dst2
);
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
*
2
+
24
),
v_dst3
);
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
*
2
+
24
),
v_dst3
);
}
}
}
#endif
#endif
for
(
;
x
<
size
.
width
;
x
++
)
for
(
;
x
<
size
.
width
;
x
++
)
{
{
...
@@ -4902,6 +4911,8 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
...
@@ -4902,6 +4911,8 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
vst1q_u16
(
dst2
+
x
,
vcombine_u16
(
v_dst0
,
v_dst1
));
vst1q_u16
(
dst2
+
x
,
vcombine_u16
(
v_dst0
,
v_dst1
));
}
}
#elif CV_SSE4_1
#elif CV_SSE4_1
if
(
useSSE4_1
)
{
__m128
v_its
=
_mm_set1_ps
(
INTER_TAB_SIZE
);
__m128
v_its
=
_mm_set1_ps
(
INTER_TAB_SIZE
);
__m128i
v_its1
=
_mm_set1_epi32
(
INTER_TAB_SIZE
-
1
);
__m128i
v_its1
=
_mm_set1_epi32
(
INTER_TAB_SIZE
-
1
);
...
@@ -4944,6 +4955,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
...
@@ -4944,6 +4955,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
*
2
+
16
),
v_dst12
);
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
*
2
+
16
),
v_dst12
);
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
*
2
+
24
),
v_dst13
);
_mm_storeu_si128
((
__m128i
*
)(
dst1
+
x
*
2
+
24
),
v_dst13
);
}
}
}
#endif
#endif
for
(
;
x
<
size
.
width
;
x
++
)
for
(
;
x
<
size
.
width
;
x
++
)
{
{
...
@@ -5005,6 +5017,8 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
...
@@ -5005,6 +5017,8 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
vst1q_u16
(
dst2
+
x
,
vcombine_u16
(
v_dst0
,
v_dst1
));
vst1q_u16
(
dst2
+
x
,
vcombine_u16
(
v_dst0
,
v_dst1
));
}
}
#elif CV_SSE2
#elif CV_SSE2
if
(
useSSE2
)
{
__m128
v_its
=
_mm_set1_ps
(
INTER_TAB_SIZE
);
__m128
v_its
=
_mm_set1_ps
(
INTER_TAB_SIZE
);
__m128i
v_its1
=
_mm_set1_epi32
(
INTER_TAB_SIZE
-
1
);
__m128i
v_its1
=
_mm_set1_epi32
(
INTER_TAB_SIZE
-
1
);
__m128i
v_y_mask
=
_mm_set1_epi32
((
INTER_TAB_SIZE
-
1
)
<<
16
);
__m128i
v_y_mask
=
_mm_set1_epi32
((
INTER_TAB_SIZE
-
1
)
<<
16
);
...
@@ -5025,6 +5039,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
...
@@ -5025,6 +5039,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
_mm_and_si128
(
v_src0
,
v_its1
));
// 0 x0 0 x1 . . .
_mm_and_si128
(
v_src0
,
v_its1
));
// 0 x0 0 x1 . . .
_mm_storel_epi64
((
__m128i
*
)(
dst2
+
x
),
_mm_packus_epi32
(
v_dst2
,
v_dst2
));
_mm_storel_epi64
((
__m128i
*
)(
dst2
+
x
),
_mm_packus_epi32
(
v_dst2
,
v_dst2
));
}
}
}
#endif
#endif
for
(
;
x
<
size
.
width
;
x
++
)
for
(
;
x
<
size
.
width
;
x
++
)
{
{
...
@@ -5150,6 +5165,8 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
...
@@ -5150,6 +5165,8 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
vst2q_f32
(
dst1f
+
(
x
<<
1
)
+
8
,
v_dst
);
vst2q_f32
(
dst1f
+
(
x
<<
1
)
+
8
,
v_dst
);
}
}
#elif CV_SSE2
#elif CV_SSE2
if
(
useSSE2
)
{
__m128i
v_mask2
=
_mm_set1_epi16
(
INTER_TAB_SIZE2
-
1
);
__m128i
v_mask2
=
_mm_set1_epi16
(
INTER_TAB_SIZE2
-
1
);
__m128i
v_zero
=
_mm_set1_epi32
(
0
),
v_mask
=
_mm_set1_epi32
(
INTER_TAB_SIZE
-
1
);
__m128i
v_zero
=
_mm_set1_epi32
(
0
),
v_mask
=
_mm_set1_epi32
(
INTER_TAB_SIZE
-
1
);
__m128
v_scale
=
_mm_set1_ps
(
scale
);
__m128
v_scale
=
_mm_set1_ps
(
scale
);
...
@@ -5167,6 +5184,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
...
@@ -5167,6 +5184,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
v_add
=
_mm_mul_ps
(
_mm_cvtepi32_ps
(
_mm_unpackhi_epi16
(
v_fxy1
,
v_fxy2
)),
v_scale
);
v_add
=
_mm_mul_ps
(
_mm_cvtepi32_ps
(
_mm_unpackhi_epi16
(
v_fxy1
,
v_fxy2
)),
v_scale
);
_mm_storeu_ps
(
dst1f
+
x
*
2
,
_mm_add_ps
(
_mm_cvtepi32_ps
(
_mm_unpackhi_epi16
(
v_src
,
v_zero
)),
v_add
));
_mm_storeu_ps
(
dst1f
+
x
*
2
,
_mm_add_ps
(
_mm_cvtepi32_ps
(
_mm_unpackhi_epi16
(
v_src
,
v_zero
)),
v_add
));
}
}
}
#endif
#endif
for
(
;
x
<
size
.
width
;
x
++
)
for
(
;
x
<
size
.
width
;
x
++
)
{
{
...
@@ -5204,7 +5222,10 @@ public:
...
@@ -5204,7 +5222,10 @@ public:
const
int
AB_SCALE
=
1
<<
AB_BITS
;
const
int
AB_SCALE
=
1
<<
AB_BITS
;
int
round_delta
=
interpolation
==
INTER_NEAREST
?
AB_SCALE
/
2
:
AB_SCALE
/
INTER_TAB_SIZE
/
2
,
x
,
y
,
x1
,
y1
;
int
round_delta
=
interpolation
==
INTER_NEAREST
?
AB_SCALE
/
2
:
AB_SCALE
/
INTER_TAB_SIZE
/
2
,
x
,
y
,
x1
,
y1
;
#if CV_SSE2
#if CV_SSE2
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
bool
useSSE2
=
checkHardwareSupport
(
CV_CPU_SSE2
);
#endif
#if CV_SSE4_1
bool
useSSE4_1
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
#endif
#endif
int
bh0
=
std
::
min
(
BLOCK_SZ
/
2
,
dst
.
rows
);
int
bh0
=
std
::
min
(
BLOCK_SZ
/
2
,
dst
.
rows
);
...
@@ -5243,6 +5264,8 @@ public:
...
@@ -5243,6 +5264,8 @@ public:
vst2q_s16
(
xy
+
(
x1
<<
1
),
v_dst
);
vst2q_s16
(
xy
+
(
x1
<<
1
),
v_dst
);
}
}
#elif CV_SSE4_1
#elif CV_SSE4_1
if
(
useSSE4_1
)
{
__m128i
v_X0
=
_mm_set1_epi32
(
X0
);
__m128i
v_X0
=
_mm_set1_epi32
(
X0
);
__m128i
v_Y0
=
_mm_set1_epi32
(
Y0
);
__m128i
v_Y0
=
_mm_set1_epi32
(
Y0
);
for
(
;
x1
<=
bw
-
16
;
x1
+=
16
)
for
(
;
x1
<=
bw
-
16
;
x1
+=
16
)
...
@@ -5264,6 +5287,7 @@ public:
...
@@ -5264,6 +5287,7 @@ public:
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
16
),
v_y0
);
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
16
),
v_y0
);
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
24
),
v_y1
);
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
24
),
v_y1
);
}
}
}
#endif
#endif
for
(
;
x1
<
bw
;
x1
++
)
for
(
;
x1
<
bw
;
x1
++
)
{
{
...
@@ -5278,7 +5302,7 @@ public:
...
@@ -5278,7 +5302,7 @@ public:
short
*
alpha
=
A
+
y1
*
bw
;
short
*
alpha
=
A
+
y1
*
bw
;
x1
=
0
;
x1
=
0
;
#if CV_SSE2
#if CV_SSE2
if
(
useS
IMD
)
if
(
useS
SE2
)
{
{
__m128i
fxy_mask
=
_mm_set1_epi32
(
INTER_TAB_SIZE
-
1
);
__m128i
fxy_mask
=
_mm_set1_epi32
(
INTER_TAB_SIZE
-
1
);
__m128i
XX
=
_mm_set1_epi32
(
X0
),
YY
=
_mm_set1_epi32
(
Y0
);
__m128i
XX
=
_mm_set1_epi32
(
X0
),
YY
=
_mm_set1_epi32
(
Y0
);
...
@@ -5672,6 +5696,7 @@ public:
...
@@ -5672,6 +5696,7 @@ public:
bh0
=
std
::
min
(
BLOCK_SZ
*
BLOCK_SZ
/
bw0
,
height
);
bh0
=
std
::
min
(
BLOCK_SZ
*
BLOCK_SZ
/
bw0
,
height
);
#if CV_SSE4_1
#if CV_SSE4_1
bool
haveSSE4_1
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
__m128d
v_M0
=
_mm_set1_pd
(
M
[
0
]);
__m128d
v_M0
=
_mm_set1_pd
(
M
[
0
]);
__m128d
v_M3
=
_mm_set1_pd
(
M
[
3
]);
__m128d
v_M3
=
_mm_set1_pd
(
M
[
3
]);
__m128d
v_M6
=
_mm_set1_pd
(
M
[
6
]);
__m128d
v_M6
=
_mm_set1_pd
(
M
[
6
]);
...
@@ -5706,6 +5731,8 @@ public:
...
@@ -5706,6 +5731,8 @@ public:
x1
=
0
;
x1
=
0
;
#if CV_SSE4_1
#if CV_SSE4_1
if
(
haveSSE4_1
)
{
__m128d
v_X0d
=
_mm_set1_pd
(
X0
);
__m128d
v_X0d
=
_mm_set1_pd
(
X0
);
__m128d
v_Y0d
=
_mm_set1_pd
(
Y0
);
__m128d
v_Y0d
=
_mm_set1_pd
(
Y0
);
__m128d
v_W0
=
_mm_set1_pd
(
W0
);
__m128d
v_W0
=
_mm_set1_pd
(
W0
);
...
@@ -5810,6 +5837,7 @@ public:
...
@@ -5810,6 +5837,7 @@ public:
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
16
),
v_Y0
);
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
16
),
v_Y0
);
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
24
),
v_Y1
);
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
24
),
v_Y1
);
}
}
}
#endif
#endif
for
(
;
x1
<
bw
;
x1
++
)
for
(
;
x1
<
bw
;
x1
++
)
...
@@ -5831,6 +5859,8 @@ public:
...
@@ -5831,6 +5859,8 @@ public:
x1
=
0
;
x1
=
0
;
#if CV_SSE4_1
#if CV_SSE4_1
if
(
haveSSE4_1
)
{
__m128d
v_X0d
=
_mm_set1_pd
(
X0
);
__m128d
v_X0d
=
_mm_set1_pd
(
X0
);
__m128d
v_Y0d
=
_mm_set1_pd
(
Y0
);
__m128d
v_Y0d
=
_mm_set1_pd
(
Y0
);
__m128d
v_W0
=
_mm_set1_pd
(
W0
);
__m128d
v_W0
=
_mm_set1_pd
(
W0
);
...
@@ -5948,6 +5978,7 @@ public:
...
@@ -5948,6 +5978,7 @@ public:
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
16
),
v_Y0
);
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
16
),
v_Y0
);
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
24
),
v_Y1
);
_mm_storeu_si128
((
__m128i
*
)(
xy
+
x1
*
2
+
24
),
v_Y1
);
}
}
}
#endif
#endif
for
(
;
x1
<
bw
;
x1
++
)
for
(
;
x1
<
bw
;
x1
++
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment