Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
681a86ab
Commit
681a86ab
authored
Jul 15, 2012
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: fft: Port to cpuflags
parent
e9bb77fb
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
43 additions
and
51 deletions
+43
-51
fft.asm
libavcodec/x86/fft.asm
+43
-51
No files found.
libavcodec/x86/fft.asm
View file @
681a86ab
...
@@ -190,6 +190,23 @@ SECTION .text
...
@@ -190,6 +190,23 @@ SECTION .text
addps
%2
,
%2
,
%5
; {i0,i1,i2,i3}
addps
%2
,
%2
,
%5
; {i0,i1,i2,i3}
%endmacro
%endmacro
%macro
INTERL
5
%if
cpuflag
(
avx
)
vunpckhps
%3
,
%2
,
%1
vunpcklps
%2
,
%2
,
%1
vextractf128
%4
(
%5
),
%2
,
0
vextractf128
%4
%
+
H
(
%5
),
%3
,
0
vextractf128
%4
(
%5
+
1
),
%2
,
1
vextractf128
%4
%
+
H
(
%5
+
1
),
%3
,
1
%elif
cpuflag
(
sse
)
mova
%3
,
%2
unpcklps
%2
,
%1
unpckhps
%3
,
%1
mova
%4
(
%5
),
%2
mova
%4
(
%5
+
1
),
%3
%endif
%endmacro
; scheduled for cpu-bound sizes
; scheduled for cpu-bound sizes
%macro
PASS_SMALL
3
; (to load m4-m7), wre, wim
%macro
PASS_SMALL
3
; (to load m4-m7), wre, wim
IF%1
mova
m4
,
Z
(
4
)
IF%1
mova
m4
,
Z
(
4
)
...
@@ -536,17 +553,6 @@ DEFINE_ARGS zc, w, n, o1, o3
...
@@ -536,17 +553,6 @@ DEFINE_ARGS zc, w, n, o1, o3
INIT_YMM
avx
INIT_YMM
avx
%macro
INTERL_AVX
5
vunpckhps
%3
,
%2
,
%1
vunpcklps
%2
,
%2
,
%1
vextractf128
%4
(
%5
),
%2
,
0
vextractf128
%4
%
+
H
(
%5
),
%3
,
0
vextractf128
%4
(
%5
+
1
),
%2
,
1
vextractf128
%4
%
+
H
(
%5
+
1
),
%3
,
1
%endmacro
%define
INTERL
INTERL_AVX
DECL_PASS
pass_avx
,
PASS_BIG
1
DECL_PASS
pass_avx
,
PASS_BIG
1
DECL_PASS
pass_interleave_avx
,
PASS_BIG
0
DECL_PASS
pass_interleave_avx
,
PASS_BIG
0
...
@@ -560,16 +566,6 @@ cglobal fft_calc, 2,5,8
...
@@ -560,16 +566,6 @@ cglobal fft_calc, 2,5,8
INIT_XMM
sse
INIT_XMM
sse
%macro
INTERL_SSE
5
mova
%3
,
%2
unpcklps
%2
,
%1
unpckhps
%3
,
%1
mova
%4
(
%5
),
%2
mova
%4
(
%5
+
1
),
%3
%endmacro
%define
INTERL
INTERL_SSE
DECL_PASS
pass_sse
,
PASS_BIG
1
DECL_PASS
pass_sse
,
PASS_BIG
1
DECL_PASS
pass_interleave_sse
,
PASS_BIG
0
DECL_PASS
pass_interleave_sse
,
PASS_BIG
0
...
@@ -855,16 +851,30 @@ INIT_XMM sse
...
@@ -855,16 +851,30 @@ INIT_XMM sse
%endmacro
%endmacro
%macro
CMUL
6
;j, xmm0, xmm1, 3, 4, 5
%macro
CMUL
6
;j, xmm0, xmm1, 3, 4, 5
%if
cpuflag
(
sse
)
mulps
m6
,
%3
,
[
%5
+
%1
]
mulps
m6
,
%3
,
[
%5
+
%1
]
mulps
m7
,
%2
,
[
%5
+
%1
]
mulps
m7
,
%2
,
[
%5
+
%1
]
mulps
%2
,
%2
,
[
%6
+
%1
]
mulps
%2
,
%2
,
[
%6
+
%1
]
mulps
%3
,
%3
,
[
%6
+
%1
]
mulps
%3
,
%3
,
[
%6
+
%1
]
subps
%2
,
%2
,
m6
subps
%2
,
%2
,
m6
addps
%3
,
%3
,
m7
addps
%3
,
%3
,
m7
%elif
cpuflag
(
3
dnow
)
mova
m6
,
[
%1
+
%2
*
2
]
mova
%3
,
[
%1
+
%2
*
2
+
8
]
mova
%4
,
m6
mova
m7
,
%3
pfmul
m6
,
[
%5
+
%2
]
pfmul
%3
,
[
%6
+
%2
]
pfmul
%4
,
[
%6
+
%2
]
pfmul
m7
,
[
%5
+
%2
]
pfsub
%3
,
m6
pfadd
%4
,
m7
%endif
%endmacro
%endmacro
%macro
POSROTATESHUF
_AVX
5
;j, k, z+n8, tcos+n8, tsin+n8
%macro
POSROTATESHUF
5
;j, k, z+n8, tcos+n8, tsin+n8
.
post
:
.
post
:
%if
cpuflag
(
avx
)
vmovaps
ymm1
,
[
%3
+
%1
*
2
]
vmovaps
ymm1
,
[
%3
+
%1
*
2
]
vmovaps
ymm0
,
[
%3
+
%1
*
2
+
0x20
]
vmovaps
ymm0
,
[
%3
+
%1
*
2
+
0x20
]
vmovaps
ymm3
,
[
%3
+
%2
*
2
]
vmovaps
ymm3
,
[
%3
+
%2
*
2
]
...
@@ -893,10 +903,7 @@ INIT_XMM sse
...
@@ -893,10 +903,7 @@ INIT_XMM sse
sub
%2
,
0x20
sub
%2
,
0x20
add
%1
,
0x20
add
%1
,
0x20
jl
.
post
jl
.
post
%endmacro
%elif
cpuflag
(
sse
)
%macro
POSROTATESHUF
5
;j, k, z+n8, tcos+n8, tsin+n8
.
post
:
movaps
xmm1
,
[
%3
+
%1
*
2
]
movaps
xmm1
,
[
%3
+
%1
*
2
]
movaps
xmm0
,
[
%3
+
%1
*
2
+
0x10
]
movaps
xmm0
,
[
%3
+
%1
*
2
+
0x10
]
CMUL
%1
,
xmm0
,
xmm1
,
%3
,
%4
,
%5
CMUL
%1
,
xmm0
,
xmm1
,
%3
,
%4
,
%5
...
@@ -918,25 +925,9 @@ INIT_XMM sse
...
@@ -918,25 +925,9 @@ INIT_XMM sse
sub
%2
,
0x10
sub
%2
,
0x10
add
%1
,
0x10
add
%1
,
0x10
jl
.
post
jl
.
post
%endmacro
%elif
cpuflag
(
3
dnow
)
CMUL
%3
,
%1
,
m0
,
m1
,
%4
,
%5
%macro
CMUL_3DNOW
6
CMUL
%3
,
%2
,
m2
,
m3
,
%4
,
%5
mova
m6
,
[
%1
+
%2
*
2
]
mova
%3
,
[
%1
+
%2
*
2
+
8
]
mova
%4
,
m6
mova
m7
,
%3
pfmul
m6
,
[
%5
+
%2
]
pfmul
%3
,
[
%6
+
%2
]
pfmul
%4
,
[
%6
+
%2
]
pfmul
m7
,
[
%5
+
%2
]
pfsub
%3
,
m6
pfadd
%4
,
m7
%endmacro
%macro
POSROTATESHUF_3DNOW
5
;j, k, z+n8, tcos+n8, tsin+n8
.
post
:
CMUL_3DNOW
%3
,
%1
,
m0
,
m1
,
%4
,
%5
CMUL_3DNOW
%3
,
%2
,
m2
,
m3
,
%4
,
%5
movd
[
%3
+
%1
*
2
+
0
]
,
m0
movd
[
%3
+
%1
*
2
+
0
]
,
m0
movd
[
%3
+
%2
*
2
+
12
]
,
m1
movd
[
%3
+
%2
*
2
+
12
]
,
m1
movd
[
%3
+
%2
*
2
+
0
]
,
m2
movd
[
%3
+
%2
*
2
+
0
]
,
m2
...
@@ -952,9 +943,10 @@ INIT_XMM sse
...
@@ -952,9 +943,10 @@ INIT_XMM sse
sub
%2
,
8
sub
%2
,
8
add
%1
,
8
add
%1
,
8
jl
.
post
jl
.
post
%endif
%endmacro
%endmacro
%macro
DECL_IMDCT
1
%macro
DECL_IMDCT
0
cglobal
imdct_half
,
3
,
12
,
8
; FFTContext *s, FFTSample *output, const FFTSample *input
cglobal
imdct_half
,
3
,
12
,
8
; FFTContext *s, FFTSample *output, const FFTSample *input
%if
ARCH_X86_64
%if
ARCH_X86_64
%define
rrevtab
r7
%define
rrevtab
r7
...
@@ -1060,7 +1052,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
...
@@ -1060,7 +1052,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
neg
r0
neg
r0
mov
r1
,
-
mmsize
mov
r1
,
-
mmsize
sub
r1
,
r0
sub
r1
,
r0
%1
r0
,
r1
,
r6
,
rtcos
,
rtsin
POSROTATESHUF
r0
,
r1
,
r6
,
rtcos
,
rtsin
%if
ARCH_X86_64
==
0
%if
ARCH_X86_64
==
0
add
esp
,
12
add
esp
,
12
%endif
%endif
...
@@ -1070,17 +1062,17 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
...
@@ -1070,17 +1062,17 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
RET
RET
%endmacro
%endmacro
DECL_IMDCT
POSROTATESHUF
DECL_IMDCT
%if
ARCH_X86_32
%if
ARCH_X86_32
INIT_MMX
3
dnow
INIT_MMX
3
dnow
DECL_IMDCT
POSROTATESHUF_3DNOW
DECL_IMDCT
INIT_MMX
3
dnowext
INIT_MMX
3
dnowext
DECL_IMDCT
POSROTATESHUF_3DNOW
DECL_IMDCT
%endif
%endif
INIT_YMM
avx
INIT_YMM
avx
DECL_IMDCT
POSROTATESHUF_AVX
DECL_IMDCT
%endif
; CONFIG_MDCT
%endif
; CONFIG_MDCT
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment