Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
2fd5e708
Commit
2fd5e708
authored
May 27, 2012
by
Vitor Sessak
Committed by
Michael Niedermayer
May 27, 2012
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: use new schema for ASM macros
Signed-off-by:
Michael Niedermayer
<
michaelni@gmx.at
>
parent
65212e3e
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
79 additions
and
76 deletions
+79
-76
fft.c
libavcodec/x86/fft.c
+6
-6
fft.h
libavcodec/x86/fft.h
+6
-6
fft_3dn2.c
libavcodec/x86/fft_3dn2.c
+13
-13
fft_mmx.asm
libavcodec/x86/fft_mmx.asm
+54
-51
No files found.
libavcodec/x86/fft.c
View file @
2fd5e708
...
@@ -27,15 +27,15 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
...
@@ -27,15 +27,15 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
int
has_vectors
=
av_get_cpu_flags
();
int
has_vectors
=
av_get_cpu_flags
();
if
(
has_vectors
&
AV_CPU_FLAG_3DNOW
&&
HAVE_AMD3DNOW
)
{
if
(
has_vectors
&
AV_CPU_FLAG_3DNOW
&&
HAVE_AMD3DNOW
)
{
/* 3DNow! for K6-2/3 */
/* 3DNow! for K6-2/3 */
s
->
imdct_calc
=
ff_imdct_calc_3dn
;
s
->
imdct_calc
=
ff_imdct_calc_3dn
ow
;
s
->
imdct_half
=
ff_imdct_half_3dn
;
s
->
imdct_half
=
ff_imdct_half_3dn
ow
;
s
->
fft_calc
=
ff_fft_calc_3dn
;
s
->
fft_calc
=
ff_fft_calc_3dn
ow
;
}
}
if
(
has_vectors
&
AV_CPU_FLAG_3DNOWEXT
&&
HAVE_AMD3DNOWEXT
)
{
if
(
has_vectors
&
AV_CPU_FLAG_3DNOWEXT
&&
HAVE_AMD3DNOWEXT
)
{
/* 3DNowEx for K7 */
/* 3DNowEx for K7 */
s
->
imdct_calc
=
ff_imdct_calc_3dn2
;
s
->
imdct_calc
=
ff_imdct_calc_3dn
ow
2
;
s
->
imdct_half
=
ff_imdct_half_3dn2
;
s
->
imdct_half
=
ff_imdct_half_3dn
ow
2
;
s
->
fft_calc
=
ff_fft_calc_3dn2
;
s
->
fft_calc
=
ff_fft_calc_3dn
ow
2
;
}
}
if
(
has_vectors
&
AV_CPU_FLAG_SSE
&&
HAVE_SSE
)
{
if
(
has_vectors
&
AV_CPU_FLAG_SSE
&&
HAVE_SSE
)
{
/* SSE for P3/P4/K8 */
/* SSE for P3/P4/K8 */
...
...
libavcodec/x86/fft.h
View file @
2fd5e708
...
@@ -24,13 +24,13 @@
...
@@ -24,13 +24,13 @@
void
ff_fft_permute_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_permute_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_avx
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_avx
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn
ow
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn2
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_imdct_calc_3dn
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dn
ow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn
ow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_avx
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_avx
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
...
...
libavcodec/x86/fft_3dn2.c
View file @
2fd5e708
...
@@ -30,30 +30,30 @@ DECLARE_ALIGNED(8, static const unsigned int, m1m1)[2] = { 1U<<31, 1U<<31 };
...
@@ -30,30 +30,30 @@ DECLARE_ALIGNED(8, static const unsigned int, m1m1)[2] = { 1U<<31, 1U<<31 };
"movq "#s","#d"\n"\
"movq "#s","#d"\n"\
"psrlq $32,"#d"\n"\
"psrlq $32,"#d"\n"\
"punpckldq "#s","#d"\n"
"punpckldq "#s","#d"\n"
#define ff_fft_calc_3dn
2 ff_fft_calc_3dn
#define ff_fft_calc_3dn
ow2 ff_fft_calc_3dnow
#define ff_fft_dispatch_3dn
2 ff_fft_dispatch_3dn
#define ff_fft_dispatch_3dn
ow2 ff_fft_dispatch_3dnow
#define ff_fft_dispatch_interleave_3dn
2 ff_fft_dispatch_interleave_3dn
#define ff_fft_dispatch_interleave_3dn
ow2 ff_fft_dispatch_interleave_3dnow
#define ff_imdct_calc_3dn
2 ff_imdct_calc_3dn
#define ff_imdct_calc_3dn
ow2 ff_imdct_calc_3dnow
#define ff_imdct_half_3dn
2 ff_imdct_half_3dn
#define ff_imdct_half_3dn
ow2 ff_imdct_half_3dnow
#else
#else
#define PSWAPD(s,d) "pswapd "#s","#d"\n"
#define PSWAPD(s,d) "pswapd "#s","#d"\n"
#endif
#endif
void
ff_fft_dispatch_3dn2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_dispatch_3dn
ow
2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_dispatch_interleave_3dn2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_dispatch_interleave_3dn
ow
2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_calc_3dn2
(
FFTContext
*
s
,
FFTComplex
*
z
)
void
ff_fft_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTComplex
*
z
)
{
{
int
n
=
1
<<
s
->
nbits
;
int
n
=
1
<<
s
->
nbits
;
int
i
;
int
i
;
ff_fft_dispatch_interleave_3dn2
(
z
,
s
->
nbits
);
ff_fft_dispatch_interleave_3dn
ow
2
(
z
,
s
->
nbits
);
__asm__
volatile
(
"femms"
);
__asm__
volatile
(
"femms"
);
if
(
n
<=
8
)
if
(
n
<=
8
)
for
(
i
=
0
;
i
<
n
;
i
+=
2
)
for
(
i
=
0
;
i
<
n
;
i
+=
2
)
FFSWAP
(
FFTSample
,
z
[
i
].
im
,
z
[
i
+
1
].
re
);
FFSWAP
(
FFTSample
,
z
[
i
].
im
,
z
[
i
+
1
].
re
);
}
}
void
ff_imdct_half_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
void
ff_imdct_half_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
{
x86_reg
j
,
k
;
x86_reg
j
,
k
;
long
n
=
s
->
mdct_size
;
long
n
=
s
->
mdct_size
;
...
@@ -101,7 +101,7 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
...
@@ -101,7 +101,7 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
);
);
}
}
ff_fft_dispatch_3dn2
(
z
,
s
->
nbits
);
ff_fft_dispatch_3dn
ow
2
(
z
,
s
->
nbits
);
#define CMUL(j,mm0,mm1)\
#define CMUL(j,mm0,mm1)\
"movq (%2,"#j",2), %%mm6 \n"\
"movq (%2,"#j",2), %%mm6 \n"\
...
@@ -144,13 +144,13 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
...
@@ -144,13 +144,13 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
__asm__
volatile
(
"femms"
);
__asm__
volatile
(
"femms"
);
}
}
void
ff_imdct_calc_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
void
ff_imdct_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
{
x86_reg
j
,
k
;
x86_reg
j
,
k
;
long
n
=
s
->
mdct_size
;
long
n
=
s
->
mdct_size
;
long
n4
=
n
>>
2
;
long
n4
=
n
>>
2
;
ff_imdct_half_3dn2
(
s
,
output
+
n4
,
input
);
ff_imdct_half_3dn
ow
2
(
s
,
output
+
n4
,
input
);
j
=
-
n
;
j
=
-
n
;
k
=
n
-
8
;
k
=
n
-
8
;
...
...
libavcodec/x86/fft_mmx.asm
View file @
2fd5e708
...
@@ -297,7 +297,7 @@ IF%1 mova Z(1), m5
...
@@ -297,7 +297,7 @@ IF%1 mova Z(1), m5
%define
Z2
(
x
)
[
r0
+
mmsize
*
x
]
%define
Z2
(
x
)
[
r0
+
mmsize
*
x
]
%define
ZH
(
x
)
[
r0
+
mmsize
*
x
+
mmsize
/
2
]
%define
ZH
(
x
)
[
r0
+
mmsize
*
x
+
mmsize
/
2
]
INIT_YMM
INIT_YMM
avx
%if
HAVE_AVX
%if
HAVE_AVX
align
16
align
16
...
@@ -391,7 +391,7 @@ fft32_interleave_avx:
...
@@ -391,7 +391,7 @@ fft32_interleave_avx:
%endif
%endif
INIT_XMM
INIT_XMM
sse
%define
movdqa
movaps
%define
movdqa
movaps
align
16
align
16
...
@@ -440,11 +440,9 @@ fft16_sse:
...
@@ -440,11 +440,9 @@ fft16_sse:
ret
ret
INIT_MMX
%macro
FFT48_3DN
0
%macro
FFT48_3DN
1
align
16
align
16
fft4
%1
:
fft4
_
%
+
cpuname
:
T2_3DN
m0
,
m1
,
Z
(
0
),
Z
(
1
)
T2_3DN
m0
,
m1
,
Z
(
0
),
Z
(
1
)
mova
m2
,
Z
(
2
)
mova
m2
,
Z
(
2
)
mova
m3
,
Z
(
3
)
mova
m3
,
Z
(
3
)
...
@@ -458,7 +456,7 @@ fft4%1:
...
@@ -458,7 +456,7 @@ fft4%1:
ret
ret
align
16
align
16
fft8
%1
:
fft8
_
%
+
cpuname
:
T2_3DN
m0
,
m1
,
Z
(
0
),
Z
(
1
)
T2_3DN
m0
,
m1
,
Z
(
0
),
Z
(
1
)
mova
m2
,
Z
(
2
)
mova
m2
,
Z
(
2
)
mova
m3
,
Z
(
3
)
mova
m3
,
Z
(
3
)
...
@@ -496,7 +494,8 @@ fft8%1:
...
@@ -496,7 +494,8 @@ fft8%1:
ret
ret
%endmacro
%endmacro
FFT48_3DN
_3dn2
INIT_MMX
3
dnow2
FFT48_3DN
%macro
pswapd
2
%macro
pswapd
2
%ifidn
%1
,
%2
%ifidn
%1
,
%2
...
@@ -509,7 +508,8 @@ FFT48_3DN _3dn2
...
@@ -509,7 +508,8 @@ FFT48_3DN _3dn2
%endif
%endif
%endmacro
%endmacro
FFT48_3DN
_3dn
INIT_MMX
3
dnow
FFT48_3DN
%define
Z
(
x
)
[
zq
+
o1q
*
(
x
&
6
)
+
mmsize
*
(
x
&
1
)
]
%define
Z
(
x
)
[
zq
+
o1q
*
(
x
&
6
)
+
mmsize
*
(
x
&
1
)
]
...
@@ -533,7 +533,7 @@ DEFINE_ARGS z, w, n, o1, o3
...
@@ -533,7 +533,7 @@ DEFINE_ARGS z, w, n, o1, o3
rep
ret
rep
ret
%endmacro
%endmacro
INIT_YMM
INIT_YMM
avx
%if
HAVE_AVX
%if
HAVE_AVX
%macro
INTERL_AVX
5
%macro
INTERL_AVX
5
...
@@ -551,7 +551,7 @@ DECL_PASS pass_avx, PASS_BIG 1
...
@@ -551,7 +551,7 @@ DECL_PASS pass_avx, PASS_BIG 1
DECL_PASS
pass_interleave_avx
,
PASS_BIG
0
DECL_PASS
pass_interleave_avx
,
PASS_BIG
0
%endif
%endif
INIT_XMM
INIT_XMM
sse
%macro
INTERL_SSE
5
%macro
INTERL_SSE
5
mova
%3
,
%2
mova
%3
,
%2
...
@@ -566,16 +566,16 @@ INIT_XMM
...
@@ -566,16 +566,16 @@ INIT_XMM
DECL_PASS
pass_sse
,
PASS_BIG
1
DECL_PASS
pass_sse
,
PASS_BIG
1
DECL_PASS
pass_interleave_sse
,
PASS_BIG
0
DECL_PASS
pass_interleave_sse
,
PASS_BIG
0
INIT_MMX
INIT_MMX
3
dnow
%define
mulps
pfmul
%define
mulps
pfmul
%define
addps
pfadd
%define
addps
pfadd
%define
subps
pfsub
%define
subps
pfsub
%define
unpcklps
punpckldq
%define
unpcklps
punpckldq
%define
unpckhps
punpckhdq
%define
unpckhps
punpckhdq
DECL_PASS
pass_3dn
,
PASS_SMALL
1
,
[wq],
[
wq
+
o1q
]
DECL_PASS
pass_3dn
ow
,
PASS_SMALL
1
,
[wq],
[
wq
+
o1q
]
DECL_PASS
pass_interleave_3dn
,
PASS_BIG
0
DECL_PASS
pass_interleave_3dn
ow
,
PASS_BIG
0
%define
pass_3dn
2
pass_3dn
%define
pass_3dn
ow2
pass_3dnow
%define
pass_interleave_3dn
2
pass_interleave_3dn
%define
pass_interleave_3dn
ow2
pass_interleave_3dnow
%ifdef
PIC
%ifdef
PIC
%define
SECTION_REL
-
$$
%define
SECTION_REL
-
$$
...
@@ -593,67 +593,70 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0
...
@@ -593,67 +593,70 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0
call
r2
call
r2
%endmacro
; FFT_DISPATCH
%endmacro
; FFT_DISPATCH
%macro
DECL_FFT
2
-
3
; nbits, cpu, suffix
%macro
DECL_FFT
1
-
2
; nbits, cpu, suffix
%xdefine
list_of_fft
fft4
%2
SECTION_REL
,
fft8
%2
SECTION_REL
%xdefine
cpusuffix
_
%
+
cpuname
%xdefine
fullsuffix
%2
_
%
+
cpuname
%xdefine
list_of_fft
fft4
%
+
cpusuffix
SECTION_REL
,
fft8
%
+
cpusuffix
SECTION_REL
%if
%1
>=
5
%if
%1
>=
5
%xdefine
list_of_fft
list_of_fft
,
fft16
%2
SECTION_REL
%xdefine
list_of_fft
list_of_fft
,
fft16
%
+
cpusuffix
SECTION_REL
%endif
%endif
%if
%1
>=
6
%if
%1
>=
6
%xdefine
list_of_fft
list_of_fft
,
fft32
%3%2
SECTION_REL
%xdefine
list_of_fft
list_of_fft
,
fft32
%
+
fullsuffix
SECTION_REL
%endif
%endif
%assign
n
1
<<
%1
%assign
n
1
<<
%1
%rep
17
-
%1
%rep
17
-
%1
%assign
n2
n
/
2
%assign
n2
n
/
2
%assign
n4
n
/
4
%assign
n4
n
/
4
%xdefine
list_of_fft
list_of_fft
,
fft
%
+
n
%
+
%3%2
SECTION_REL
%xdefine
list_of_fft
list_of_fft
,
fft
%
+
n
%
+
fullsuffix
SECTION_REL
align
16
align
16
fft
%
+
n
%
+
%3%2
:
fft
%
+
n
%
+
fullsuffix
:
call
fft
%
+
n2
%
+
%2
call
fft
%
+
n2
%
+
cpusuffix
add
r0
,
n
*
4
-
(
n
&
(
-
2
<<
%1
))
add
r0
,
n
*
4
-
(
n
&
(
-
2
<<
%1
))
call
fft
%
+
n4
%
+
%2
call
fft
%
+
n4
%
+
cpusuffix
add
r0
,
n
*
2
-
(
n2
&
(
-
2
<<
%1
))
add
r0
,
n
*
2
-
(
n2
&
(
-
2
<<
%1
))
call
fft
%
+
n4
%
+
%2
call
fft
%
+
n4
%
+
cpusuffix
sub
r0
,
n
*
6
+
(
n2
&
(
-
2
<<
%1
))
sub
r0
,
n
*
6
+
(
n2
&
(
-
2
<<
%1
))
lea
r1
,
[
cos_
%
+
n
]
lea
r1
,
[
cos_
%
+
n
]
mov
r2d
,
n4
/
2
mov
r2d
,
n4
/
2
jmp
pass
%3
%2
jmp
pass
%
+
fullsuffix
%assign
n
n
*
2
%assign
n
n
*
2
%endrep
%endrep
%undef
n
%undef
n
align
8
align
8
dispatch_tab
%3%2
:
pointer
list_of_fft
dispatch_tab
%
+
fullsuffix
:
pointer
list_of_fft
section
.
text
section
.
text
; On x86_32, this function does the register saving and restoring for all of fft.
; On x86_32, this function does the register saving and restoring for all of fft.
; The others pass args in registers and don't spill anything.
; The others pass args in registers and don't spill anything.
cglobal
fft_dispatch
%
3%
2
,
2
,
5
,
8
,
z
,
nbits
cglobal
fft_dispatch
%2
,
2
,
5
,
8
,
z
,
nbits
FFT_DISPATCH
%3%2
,
nbits
FFT_DISPATCH
fullsuffix
,
nbits
%if
idn
%2
,
_avx
%if
mmsize
==
32
vzeroupper
vzeroupper
%endif
%endif
RET
RET
%endmacro
; DECL_FFT
%endmacro
; DECL_FFT
%if
HAVE_AVX
%if
HAVE_AVX
INIT_YMM
INIT_YMM
avx
DECL_FFT
6
,
_avx
DECL_FFT
6
DECL_FFT
6
,
_
avx
,
_
interleave
DECL_FFT
6
,
_interleave
%endif
%endif
INIT_XMM
INIT_XMM
sse
DECL_FFT
5
,
_sse
DECL_FFT
5
DECL_FFT
5
,
_sse
,
_interleave
DECL_FFT
5
,
_interleave
INIT_MMX
INIT_MMX
3
dnow
DECL_FFT
4
,
_3dn
DECL_FFT
4
DECL_FFT
4
,
_3dn
,
_interleave
DECL_FFT
4
,
_interleave
DECL_FFT
4
,
_3dn2
INIT_MMX
3
dnow2
DECL_FFT
4
,
_3dn2
,
_interleave
DECL_FFT
4
DECL_FFT
4
,
_interleave
INIT_XMM
INIT_XMM
sse
%undef
mulps
%undef
mulps
%undef
addps
%undef
addps
%undef
subps
%undef
subps
...
@@ -749,8 +752,8 @@ INIT_XMM
...
@@ -749,8 +752,8 @@ INIT_XMM
jl
.
post
jl
.
post
%endmacro
%endmacro
%macro
DECL_IMDCT
2
%macro
DECL_IMDCT
1
cglobal
imdct_half
%1
,
3
,
12
,
8
; FFTContext *s, FFTSample *output, const FFTSample *input
cglobal
imdct_half
,
3
,
12
,
8
; FFTContext *s, FFTSample *output, const FFTSample *input
%if
ARCH_X86_64
%if
ARCH_X86_64
%define
rrevtab
r7
%define
rrevtab
r7
%define
rtcos
r8
%define
rtcos
r8
...
@@ -822,7 +825,7 @@ cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample
...
@@ -822,7 +825,7 @@ cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample
mov
r0
,
r1
mov
r0
,
r1
mov
r1d
,
[
r5
+
FFTContext
.
nbits
]
mov
r1d
,
[
r5
+
FFTContext
.
nbits
]
FFT_DISPATCH
%1
,
r1
FFT_DISPATCH
_
%
+
cpuname
,
r1
mov
r0d
,
[
r5
+
FFTContext
.
mdctsize
]
mov
r0d
,
[
r5
+
FFTContext
.
mdctsize
]
add
r6
,
r0
add
r6
,
r0
...
@@ -836,20 +839,20 @@ cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample
...
@@ -836,20 +839,20 @@ cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample
neg
r0
neg
r0
mov
r1
,
-
mmsize
mov
r1
,
-
mmsize
sub
r1
,
r0
sub
r1
,
r0
%
2
r0
,
r1
,
r6
,
rtcos
,
rtsin
%
1
r0
,
r1
,
r6
,
rtcos
,
rtsin
%if
ARCH_X86_64
==
0
%if
ARCH_X86_64
==
0
add
esp
,
12
add
esp
,
12
%endif
%endif
%if
idn
avx_enabled
,
1
%if
mmsize
==
32
vzeroupper
vzeroupper
%endif
%endif
RET
RET
%endmacro
%endmacro
DECL_IMDCT
_sse
,
POSROTATESHUF
DECL_IMDCT
POSROTATESHUF
INIT_YMM
INIT_YMM
avx
%if
HAVE_AVX
%if
HAVE_AVX
DECL_IMDCT
_avx
,
POSROTATESHUF_AVX
DECL_IMDCT
POSROTATESHUF_AVX
%endif
%endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment