Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
30b45d9c
Commit
30b45d9c
authored
Jul 26, 2012
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86inc: automatically insert vzeroupper for YMM functions.
parent
8ea1459b
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
12 additions
and
53 deletions
+12
-53
dsputil_yasm.asm
libavcodec/x86/dsputil_yasm.asm
+0
-14
fft_mmx.asm
libavcodec/x86/fft_mmx.asm
+0
-6
audio_convert.asm
libavresample/x86/audio_convert.asm
+0
-10
audio_mix.asm
libavresample/x86/audio_mix.asm
+0
-10
float_dsp.asm
libavutil/x86/float_dsp.asm
+0
-10
x86inc.asm
libavutil/x86/x86inc.asm
+12
-3
No files found.
libavcodec/x86/dsputil_yasm.asm
View file @
30b45d9c
...
@@ -1158,12 +1158,7 @@ ALIGN 16
...
@@ -1158,12 +1158,7 @@ ALIGN 16
add
src1q
,
2
*
mmsize
add
src1q
,
2
*
mmsize
sub
lenq
,
2
*
mmsize
sub
lenq
,
2
*
mmsize
jge
.
loop
jge
.
loop
%if
mmsize
==
32
vzeroupper
RET
%else
REP_RET
REP_RET
%endif
%endmacro
%endmacro
INIT_XMM
sse
INIT_XMM
sse
...
@@ -1193,12 +1188,7 @@ ALIGN 16
...
@@ -1193,12 +1188,7 @@ ALIGN 16
sub
lenq
,
2
*
mmsize
sub
lenq
,
2
*
mmsize
jge
.
loop
jge
.
loop
%if
mmsize
==
32
vzeroupper
RET
%else
REP_RET
REP_RET
%endif
%endmacro
%endmacro
INIT_XMM
sse
INIT_XMM
sse
...
@@ -1243,10 +1233,6 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len
...
@@ -1243,10 +1233,6 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len
%endif
%endif
add
lenq
,
mmsize
add
lenq
,
mmsize
jl
.
loop
jl
.
loop
%if
mmsize
==
32
vzeroupper
RET
%endif
.
end
:
.
end
:
REP_RET
REP_RET
%endmacro
%endmacro
...
...
libavcodec/x86/fft_mmx.asm
View file @
30b45d9c
...
@@ -749,9 +749,6 @@ section .text
...
@@ -749,9 +749,6 @@ section .text
; The others pass args in registers and don't spill anything.
; The others pass args in registers and don't spill anything.
cglobal
fft_dispatch
%2
,
2
,
5
,
8
,
z
,
nbits
cglobal
fft_dispatch
%2
,
2
,
5
,
8
,
z
,
nbits
FFT_DISPATCH
fullsuffix
,
nbits
FFT_DISPATCH
fullsuffix
,
nbits
%if
mmsize
==
32
vzeroupper
%endif
RET
RET
%endmacro
; DECL_FFT
%endmacro
; DECL_FFT
...
@@ -956,9 +953,6 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
...
@@ -956,9 +953,6 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
%1
r0
,
r1
,
r6
,
rtcos
,
rtsin
%1
r0
,
r1
,
r6
,
rtcos
,
rtsin
%if
ARCH_X86_64
==
0
%if
ARCH_X86_64
==
0
add
esp
,
12
add
esp
,
12
%endif
%if
mmsize
==
32
vzeroupper
%endif
%endif
RET
RET
%endmacro
%endmacro
...
...
libavresample/x86/audio_convert.asm
View file @
30b45d9c
...
@@ -145,12 +145,7 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len
...
@@ -145,12 +145,7 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len
mova
[
dstq
+
lenq
+
mmsize
]
,
m2
mova
[
dstq
+
lenq
+
mmsize
]
,
m2
add
lenq
,
mmsize
*
2
add
lenq
,
mmsize
*
2
jl
.
loop
jl
.
loop
%if
mmsize
==
32
vzeroupper
RET
%else
REP_RET
REP_RET
%endif
%endmacro
%endmacro
INIT_XMM
sse2
INIT_XMM
sse2
...
@@ -218,12 +213,7 @@ cglobal conv_flt_to_s32, 3,3,5, dst, src, len
...
@@ -218,12 +213,7 @@ cglobal conv_flt_to_s32, 3,3,5, dst, src, len
mova
[
dstq
+
lenq
+
3
*
mmsize
]
,
m3
mova
[
dstq
+
lenq
+
3
*
mmsize
]
,
m3
add
lenq
,
mmsize
*
4
add
lenq
,
mmsize
*
4
jl
.
loop
jl
.
loop
%if
mmsize
==
32
vzeroupper
RET
%else
REP_RET
REP_RET
%endif
%endmacro
%endmacro
INIT_XMM
sse2
INIT_XMM
sse2
...
...
libavresample/x86/audio_mix.asm
View file @
30b45d9c
...
@@ -51,12 +51,7 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1
...
@@ -51,12 +51,7 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1
add
srcq
,
mmsize
*
2
add
srcq
,
mmsize
*
2
sub
lend
,
mmsize
*
2
/
4
sub
lend
,
mmsize
*
2
/
4
jg
.
loop
jg
.
loop
%if
mmsize
==
32
vzeroupper
RET
%else
REP_RET
REP_RET
%endif
%endmacro
%endmacro
INIT_XMM
sse
INIT_XMM
sse
...
@@ -175,12 +170,7 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1
...
@@ -175,12 +170,7 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1
add
src0q
,
mmsize
add
src0q
,
mmsize
sub
lend
,
mmsize
/
4
sub
lend
,
mmsize
/
4
jg
.
loop
jg
.
loop
%if
mmsize
==
32
vzeroupper
RET
%else
REP_RET
REP_RET
%endif
%endmacro
%endmacro
INIT_XMM
sse
INIT_XMM
sse
...
...
libavutil/x86/float_dsp.asm
View file @
30b45d9c
...
@@ -40,12 +40,7 @@ ALIGN 16
...
@@ -40,12 +40,7 @@ ALIGN 16
sub
lenq
,
2
*
mmsize
sub
lenq
,
2
*
mmsize
jge
.
loop
jge
.
loop
%if
mmsize
==
32
vzeroupper
RET
%else
REP_RET
REP_RET
%endif
%endmacro
%endmacro
INIT_XMM
sse
INIT_XMM
sse
...
@@ -86,12 +81,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len
...
@@ -86,12 +81,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len
mova
[
dstq
+
lenq
+
mmsize
]
,
m2
mova
[
dstq
+
lenq
+
mmsize
]
,
m2
sub
lenq
,
2
*
mmsize
sub
lenq
,
2
*
mmsize
jge
.
loop
jge
.
loop
%if
mmsize
==
32
vzeroupper
RET
%else
REP_RET
REP_RET
%endif
%endmacro
%endmacro
INIT_XMM
sse
INIT_XMM
sse
...
...
libavutil/x86/x86inc.asm
View file @
30b45d9c
...
@@ -369,11 +369,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
...
@@ -369,11 +369,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
%macro
RET
0
%macro
RET
0
WIN64_RESTORE_XMM_INTERNAL
rsp
WIN64_RESTORE_XMM_INTERNAL
rsp
POP_IF_USED
14
,
13
,
12
,
11
,
10
,
9
,
8
,
7
POP_IF_USED
14
,
13
,
12
,
11
,
10
,
9
,
8
,
7
%if
mmsize
==
32
vzeroupper
%endif
ret
ret
%endmacro
%endmacro
%macro
REP_RET
0
%macro
REP_RET
0
%
if
regs_used
>
7
||
xmm_regs_used
>
6
%
if
regs_used
>
7
||
xmm_regs_used
>
6
||
mmsize
==
32
RET
RET
%
else
%
else
rep
ret
rep
ret
...
@@ -410,11 +413,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72
...
@@ -410,11 +413,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72
%macro
RET
0
%macro
RET
0
POP_IF_USED
14
,
13
,
12
,
11
,
10
,
9
POP_IF_USED
14
,
13
,
12
,
11
,
10
,
9
%if
mmsize
==
32
vzeroupper
%endif
ret
ret
%endmacro
%endmacro
%macro
REP_RET
0
%macro
REP_RET
0
%
if
regs_used
>
9
%
if
regs_used
>
9
||
mmsize
==
32
RET
RET
%
else
%
else
rep
ret
rep
ret
...
@@ -456,11 +462,14 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
...
@@ -456,11 +462,14 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%macro
RET
0
%macro
RET
0
POP_IF_USED
6
,
5
,
4
,
3
POP_IF_USED
6
,
5
,
4
,
3
%if
mmsize
==
32
vzeroupper
%endif
ret
ret
%endmacro
%endmacro
%macro
REP_RET
0
%macro
REP_RET
0
%
if
regs_used
>
3
%
if
regs_used
>
3
||
mmsize
==
32
RET
RET
%
else
%
else
rep
ret
rep
ret
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment