Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
2fd5e708
Commit
2fd5e708
authored
May 27, 2012
by
Vitor Sessak
Committed by
Michael Niedermayer
May 27, 2012
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: use new schema for ASM macros
Signed-off-by:
Michael Niedermayer
<
michaelni@gmx.at
>
parent
65212e3e
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
79 additions
and
76 deletions
+79
-76
fft.c
libavcodec/x86/fft.c
+6
-6
fft.h
libavcodec/x86/fft.h
+6
-6
fft_3dn2.c
libavcodec/x86/fft_3dn2.c
+13
-13
fft_mmx.asm
libavcodec/x86/fft_mmx.asm
+54
-51
No files found.
libavcodec/x86/fft.c
View file @
2fd5e708
...
...
@@ -27,15 +27,15 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
int
has_vectors
=
av_get_cpu_flags
();
if
(
has_vectors
&
AV_CPU_FLAG_3DNOW
&&
HAVE_AMD3DNOW
)
{
/* 3DNow! for K6-2/3 */
s
->
imdct_calc
=
ff_imdct_calc_3dn
;
s
->
imdct_half
=
ff_imdct_half_3dn
;
s
->
fft_calc
=
ff_fft_calc_3dn
;
s
->
imdct_calc
=
ff_imdct_calc_3dn
ow
;
s
->
imdct_half
=
ff_imdct_half_3dn
ow
;
s
->
fft_calc
=
ff_fft_calc_3dn
ow
;
}
if
(
has_vectors
&
AV_CPU_FLAG_3DNOWEXT
&&
HAVE_AMD3DNOWEXT
)
{
/* 3DNowEx for K7 */
s
->
imdct_calc
=
ff_imdct_calc_3dn2
;
s
->
imdct_half
=
ff_imdct_half_3dn2
;
s
->
fft_calc
=
ff_fft_calc_3dn2
;
s
->
imdct_calc
=
ff_imdct_calc_3dn
ow
2
;
s
->
imdct_half
=
ff_imdct_half_3dn
ow
2
;
s
->
fft_calc
=
ff_fft_calc_3dn
ow
2
;
}
if
(
has_vectors
&
AV_CPU_FLAG_SSE
&&
HAVE_SSE
)
{
/* SSE for P3/P4/K8 */
...
...
libavcodec/x86/fft.h
View file @
2fd5e708
...
...
@@ -24,13 +24,13 @@
void
ff_fft_permute_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_avx
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn2
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn
ow
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_imdct_calc_3dn
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dn
ow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn
ow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_avx
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
...
...
libavcodec/x86/fft_3dn2.c
View file @
2fd5e708
...
...
@@ -30,30 +30,30 @@ DECLARE_ALIGNED(8, static const unsigned int, m1m1)[2] = { 1U<<31, 1U<<31 };
"movq "#s","#d"\n"\
"psrlq $32,"#d"\n"\
"punpckldq "#s","#d"\n"
#define ff_fft_calc_3dn
2 ff_fft_calc_3dn
#define ff_fft_dispatch_3dn
2 ff_fft_dispatch_3dn
#define ff_fft_dispatch_interleave_3dn
2 ff_fft_dispatch_interleave_3dn
#define ff_imdct_calc_3dn
2 ff_imdct_calc_3dn
#define ff_imdct_half_3dn
2 ff_imdct_half_3dn
#define ff_fft_calc_3dn
ow2 ff_fft_calc_3dnow
#define ff_fft_dispatch_3dn
ow2 ff_fft_dispatch_3dnow
#define ff_fft_dispatch_interleave_3dn
ow2 ff_fft_dispatch_interleave_3dnow
#define ff_imdct_calc_3dn
ow2 ff_imdct_calc_3dnow
#define ff_imdct_half_3dn
ow2 ff_imdct_half_3dnow
#else
#define PSWAPD(s,d) "pswapd "#s","#d"\n"
#endif
void
ff_fft_dispatch_3dn2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_dispatch_interleave_3dn2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_dispatch_3dn
ow
2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_dispatch_interleave_3dn
ow
2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_calc_3dn2
(
FFTContext
*
s
,
FFTComplex
*
z
)
void
ff_fft_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTComplex
*
z
)
{
int
n
=
1
<<
s
->
nbits
;
int
i
;
ff_fft_dispatch_interleave_3dn2
(
z
,
s
->
nbits
);
ff_fft_dispatch_interleave_3dn
ow
2
(
z
,
s
->
nbits
);
__asm__
volatile
(
"femms"
);
if
(
n
<=
8
)
for
(
i
=
0
;
i
<
n
;
i
+=
2
)
FFSWAP
(
FFTSample
,
z
[
i
].
im
,
z
[
i
+
1
].
re
);
}
void
ff_imdct_half_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
void
ff_imdct_half_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
x86_reg
j
,
k
;
long
n
=
s
->
mdct_size
;
...
...
@@ -101,7 +101,7 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
);
}
ff_fft_dispatch_3dn2
(
z
,
s
->
nbits
);
ff_fft_dispatch_3dn
ow
2
(
z
,
s
->
nbits
);
#define CMUL(j,mm0,mm1)\
"movq (%2,"#j",2), %%mm6 \n"\
...
...
@@ -144,13 +144,13 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
__asm__
volatile
(
"femms"
);
}
void
ff_imdct_calc_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
void
ff_imdct_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
x86_reg
j
,
k
;
long
n
=
s
->
mdct_size
;
long
n4
=
n
>>
2
;
ff_imdct_half_3dn2
(
s
,
output
+
n4
,
input
);
ff_imdct_half_3dn
ow
2
(
s
,
output
+
n4
,
input
);
j
=
-
n
;
k
=
n
-
8
;
...
...
libavcodec/x86/fft_mmx.asm
View file @
2fd5e708
...
...
@@ -297,7 +297,7 @@ IF%1 mova Z(1), m5
%define
Z2
(
x
)
[
r0
+
mmsize
*
x
]
%define
ZH
(
x
)
[
r0
+
mmsize
*
x
+
mmsize
/
2
]
INIT_YMM
INIT_YMM
avx
%if
HAVE_AVX
align
16
...
...
@@ -391,7 +391,7 @@ fft32_interleave_avx:
%endif
INIT_XMM
INIT_XMM
sse
%define
movdqa
movaps
align
16
...
...
@@ -440,11 +440,9 @@ fft16_sse:
ret
INIT_MMX
%macro
FFT48_3DN
1
%macro
FFT48_3DN
0
align
16
fft4
%1
:
fft4
_
%
+
cpuname
:
T2_3DN
m0
,
m1
,
Z
(
0
),
Z
(
1
)
mova
m2
,
Z
(
2
)
mova
m3
,
Z
(
3
)
...
...
@@ -458,7 +456,7 @@ fft4%1:
ret
align
16
fft8
%1
:
fft8
_
%
+
cpuname
:
T2_3DN
m0
,
m1
,
Z
(
0
),
Z
(
1
)
mova
m2
,
Z
(
2
)
mova
m3
,
Z
(
3
)
...
...
@@ -496,7 +494,8 @@ fft8%1:
ret
%endmacro
FFT48_3DN
_3dn2
INIT_MMX
3
dnow2
FFT48_3DN
%macro
pswapd
2
%ifidn
%1
,
%2
...
...
@@ -509,7 +508,8 @@ FFT48_3DN _3dn2
%endif
%endmacro
FFT48_3DN
_3dn
INIT_MMX
3
dnow
FFT48_3DN
%define
Z
(
x
)
[
zq
+
o1q
*
(
x
&
6
)
+
mmsize
*
(
x
&
1
)
]
...
...
@@ -533,7 +533,7 @@ DEFINE_ARGS z, w, n, o1, o3
rep
ret
%endmacro
INIT_YMM
INIT_YMM
avx
%if
HAVE_AVX
%macro
INTERL_AVX
5
...
...
@@ -551,7 +551,7 @@ DECL_PASS pass_avx, PASS_BIG 1
DECL_PASS
pass_interleave_avx
,
PASS_BIG
0
%endif
INIT_XMM
INIT_XMM
sse
%macro
INTERL_SSE
5
mova
%3
,
%2
...
...
@@ -566,16 +566,16 @@ INIT_XMM
DECL_PASS
pass_sse
,
PASS_BIG
1
DECL_PASS
pass_interleave_sse
,
PASS_BIG
0
INIT_MMX
INIT_MMX
3
dnow
%define
mulps
pfmul
%define
addps
pfadd
%define
subps
pfsub
%define
unpcklps
punpckldq
%define
unpckhps
punpckhdq
DECL_PASS
pass_3dn
,
PASS_SMALL
1
,
[wq],
[
wq
+
o1q
]
DECL_PASS
pass_interleave_3dn
,
PASS_BIG
0
%define
pass_3dn
2
pass_3dn
%define
pass_interleave_3dn
2
pass_interleave_3dn
DECL_PASS
pass_3dn
ow
,
PASS_SMALL
1
,
[wq],
[
wq
+
o1q
]
DECL_PASS
pass_interleave_3dn
ow
,
PASS_BIG
0
%define
pass_3dn
ow2
pass_3dnow
%define
pass_interleave_3dn
ow2
pass_interleave_3dnow
%ifdef
PIC
%define
SECTION_REL
-
$$
...
...
@@ -593,67 +593,70 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0
call
r2
%endmacro
; FFT_DISPATCH
%macro
DECL_FFT
2
-
3
; nbits, cpu, suffix
%xdefine
list_of_fft
fft4
%2
SECTION_REL
,
fft8
%2
SECTION_REL
%macro
DECL_FFT
1
-
2
; nbits, cpu, suffix
%xdefine
cpusuffix
_
%
+
cpuname
%xdefine
fullsuffix
%2
_
%
+
cpuname
%xdefine
list_of_fft
fft4
%
+
cpusuffix
SECTION_REL
,
fft8
%
+
cpusuffix
SECTION_REL
%if
%1
>=
5
%xdefine
list_of_fft
list_of_fft
,
fft16
%2
SECTION_REL
%xdefine
list_of_fft
list_of_fft
,
fft16
%
+
cpusuffix
SECTION_REL
%endif
%if
%1
>=
6
%xdefine
list_of_fft
list_of_fft
,
fft32
%3%2
SECTION_REL
%xdefine
list_of_fft
list_of_fft
,
fft32
%
+
fullsuffix
SECTION_REL
%endif
%assign
n
1
<<
%1
%rep
17
-
%1
%assign
n2
n
/
2
%assign
n4
n
/
4
%xdefine
list_of_fft
list_of_fft
,
fft
%
+
n
%
+
%3%2
SECTION_REL
%xdefine
list_of_fft
list_of_fft
,
fft
%
+
n
%
+
fullsuffix
SECTION_REL
align
16
fft
%
+
n
%
+
%3%2
:
call
fft
%
+
n2
%
+
%2
fft
%
+
n
%
+
fullsuffix
:
call
fft
%
+
n2
%
+
cpusuffix
add
r0
,
n
*
4
-
(
n
&
(
-
2
<<
%1
))
call
fft
%
+
n4
%
+
%2
call
fft
%
+
n4
%
+
cpusuffix
add
r0
,
n
*
2
-
(
n2
&
(
-
2
<<
%1
))
call
fft
%
+
n4
%
+
%2
call
fft
%
+
n4
%
+
cpusuffix
sub
r0
,
n
*
6
+
(
n2
&
(
-
2
<<
%1
))
lea
r1
,
[
cos_
%
+
n
]
mov
r2d
,
n4
/
2
jmp
pass
%3
%2
jmp
pass
%
+
fullsuffix
%assign
n
n
*
2
%endrep
%undef
n
align
8
dispatch_tab
%3%2
:
pointer
list_of_fft
dispatch_tab
%
+
fullsuffix
:
pointer
list_of_fft
section
.
text
; On x86_32, this function does the register saving and restoring for all of fft.
; The others pass args in registers and don't spill anything.
cglobal
fft_dispatch
%
3%
2
,
2
,
5
,
8
,
z
,
nbits
FFT_DISPATCH
%3%2
,
nbits
%if
idn
%2
,
_avx
cglobal
fft_dispatch
%2
,
2
,
5
,
8
,
z
,
nbits
FFT_DISPATCH
fullsuffix
,
nbits
%if
mmsize
==
32
vzeroupper
%endif
RET
%endmacro
; DECL_FFT
%if
HAVE_AVX
INIT_YMM
DECL_FFT
6
,
_avx
DECL_FFT
6
,
_
avx
,
_
interleave
INIT_YMM
avx
DECL_FFT
6
DECL_FFT
6
,
_interleave
%endif
INIT_XMM
DECL_FFT
5
,
_sse
DECL_FFT
5
,
_sse
,
_interleave
INIT_MMX
DECL_FFT
4
,
_3dn
DECL_FFT
4
,
_3dn
,
_interleave
DECL_FFT
4
,
_3dn2
DECL_FFT
4
,
_3dn2
,
_interleave
INIT_XMM
INIT_XMM
sse
DECL_FFT
5
DECL_FFT
5
,
_interleave
INIT_MMX
3
dnow
DECL_FFT
4
DECL_FFT
4
,
_interleave
INIT_MMX
3
dnow2
DECL_FFT
4
DECL_FFT
4
,
_interleave
INIT_XMM
sse
%undef
mulps
%undef
addps
%undef
subps
...
...
@@ -749,8 +752,8 @@ INIT_XMM
jl
.
post
%endmacro
%macro
DECL_IMDCT
2
cglobal
imdct_half
%1
,
3
,
12
,
8
; FFTContext *s, FFTSample *output, const FFTSample *input
%macro
DECL_IMDCT
1
cglobal
imdct_half
,
3
,
12
,
8
; FFTContext *s, FFTSample *output, const FFTSample *input
%if
ARCH_X86_64
%define
rrevtab
r7
%define
rtcos
r8
...
...
@@ -822,7 +825,7 @@ cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample
mov
r0
,
r1
mov
r1d
,
[
r5
+
FFTContext
.
nbits
]
FFT_DISPATCH
%1
,
r1
FFT_DISPATCH
_
%
+
cpuname
,
r1
mov
r0d
,
[
r5
+
FFTContext
.
mdctsize
]
add
r6
,
r0
...
...
@@ -836,20 +839,20 @@ cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample
neg
r0
mov
r1
,
-
mmsize
sub
r1
,
r0
%
2
r0
,
r1
,
r6
,
rtcos
,
rtsin
%
1
r0
,
r1
,
r6
,
rtcos
,
rtsin
%if
ARCH_X86_64
==
0
add
esp
,
12
%endif
%if
idn
avx_enabled
,
1
%if
mmsize
==
32
vzeroupper
%endif
RET
%endmacro
DECL_IMDCT
_sse
,
POSROTATESHUF
DECL_IMDCT
POSROTATESHUF
INIT_YMM
INIT_YMM
avx
%if
HAVE_AVX
DECL_IMDCT
_avx
,
POSROTATESHUF_AVX
DECL_IMDCT
POSROTATESHUF_AVX
%endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment