Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
1a094af6
Commit
1a094af6
authored
Jan 30, 2016
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fft: Split MDCT bits off from FFT
parent
4d13bcce
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
463 additions
and
243 deletions
+463
-243
Makefile
libavcodec/aarch64/Makefile
+1
-0
fft_init_aarch64.c
libavcodec/aarch64/fft_init_aarch64.c
+0
-12
mdct_init.c
libavcodec/aarch64/mdct_init.c
+39
-0
Makefile
libavcodec/arm/Makefile
+2
-0
fft_fixed_init_arm.c
libavcodec/arm/fft_fixed_init_arm.c
+0
-10
fft_init_arm.c
libavcodec/arm/fft_init_arm.c
+0
-15
mdct_fixed_init_arm.c
libavcodec/arm/mdct_fixed_init_arm.c
+40
-0
mdct_init_arm.c
libavcodec/arm/mdct_init_arm.c
+47
-0
fft.h
libavcodec/fft.h
+7
-0
fft_template.c
libavcodec/fft_template.c
+0
-7
mdct_template.c
libavcodec/mdct_template.c
+20
-0
Makefile
libavcodec/ppc/Makefile
+1
-0
fft_init.c
libavcodec/ppc/fft_init.c
+2
-122
mdct_init.c
libavcodec/ppc/mdct_init.c
+154
-0
Makefile
libavcodec/x86/Makefile
+1
-0
fft.asm
libavcodec/x86/fft.asm
+66
-62
fft.h
libavcodec/x86/fft.h
+0
-8
fft_init.c
libavcodec/x86/fft_init.c
+0
-7
mdct.h
libavcodec/x86/mdct.h
+32
-0
mdct_init.c
libavcodec/x86/mdct_init.c
+51
-0
No files found.
libavcodec/aarch64/Makefile
View file @
1a094af6
...
...
@@ -7,6 +7,7 @@ OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
OBJS-$(CONFIG_H264QPEL)
+=
aarch64/h264qpel_init_aarch64.o
OBJS-$(CONFIG_HPELDSP)
+=
aarch64/hpeldsp_init_aarch64.o
OBJS-$(CONFIG_IMDCT15)
+=
aarch64/imdct15_init.o
OBJS-$(CONFIG_MDCT)
+=
aarch64/mdct_init.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
aarch64/mpegaudiodsp_init.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST)
+=
aarch64/neontest.o
OBJS-$(CONFIG_VIDEODSP)
+=
aarch64/videodsp_init.o
...
...
libavcodec/aarch64/fft_init_aarch64.c
View file @
1a094af6
...
...
@@ -18,8 +18,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
...
...
@@ -29,10 +27,6 @@
void
ff_fft_permute_neon
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_neon
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_imdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_mdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
av_cold
void
ff_fft_init_aarch64
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
...
...
@@ -40,11 +34,5 @@ av_cold void ff_fft_init_aarch64(FFTContext *s)
if
(
have_neon
(
cpu_flags
))
{
s
->
fft_permute
=
ff_fft_permute_neon
;
s
->
fft_calc
=
ff_fft_calc_neon
;
#if CONFIG_MDCT
s
->
imdct_calc
=
ff_imdct_calc_neon
;
s
->
imdct_half
=
ff_imdct_half_neon
;
s
->
mdct_calc
=
ff_mdct_calc_neon
;
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
#endif
}
}
libavcodec/aarch64/mdct_init.c
0 → 100644
View file @
1a094af6
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/fft.h"
void
ff_imdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_mdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
av_cold
void
ff_mdct_init_aarch64
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_neon
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_neon
;
s
->
imdct_half
=
ff_imdct_half_neon
;
s
->
mdct_calc
=
ff_mdct_calc_neon
;
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
}
}
libavcodec/arm/Makefile
View file @
1a094af6
...
...
@@ -21,6 +21,8 @@ OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_arm.o \
OBJS-$(CONFIG_FLACDSP)
+=
arm/flacdsp_init_arm.o
\
arm/flacdsp_arm.o
OBJS-$(CONFIG_G722DSP)
+=
arm/g722dsp_init_arm.o
OBJS-$(CONFIG_MDCT)
+=
arm/mdct_init_arm.o
\
arm/mdct_fixed_init_arm.o
OBJS-$(CONFIG_ME_CMP)
+=
arm/me_cmp_init_arm.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
arm/mpegaudiodsp_init_arm.o
OBJS-$(CONFIG_MPEGVIDEO)
+=
arm/mpegvideo_arm.o
...
...
libavcodec/arm/fft_fixed_init_arm.c
View file @
1a094af6
...
...
@@ -26,8 +26,6 @@
#include "libavcodec/fft.h"
void
ff_fft_fixed_calc_neon
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_mdct_fixed_calc_neon
(
FFTContext
*
s
,
FFTSample
*
o
,
const
FFTSample
*
i
);
void
ff_mdct_fixed_calcw_neon
(
FFTContext
*
s
,
FFTDouble
*
o
,
const
FFTSample
*
i
);
av_cold
void
ff_fft_fixed_init_arm
(
FFTContext
*
s
)
{
...
...
@@ -36,13 +34,5 @@ av_cold void ff_fft_fixed_init_arm(FFTContext *s)
if
(
have_neon
(
cpu_flags
))
{
s
->
fft_permutation
=
FF_FFT_PERM_SWAP_LSBS
;
s
->
fft_calc
=
ff_fft_fixed_calc_neon
;
#if CONFIG_MDCT
if
(
!
s
->
inverse
&&
s
->
nbits
>=
3
)
{
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
s
->
mdct_calc
=
ff_mdct_fixed_calc_neon
;
s
->
mdct_calcw
=
ff_mdct_fixed_calcw_neon
;
}
#endif
}
}
libavcodec/arm/fft_init_arm.c
View file @
1a094af6
...
...
@@ -29,31 +29,16 @@ void ff_fft_calc_vfp(FFTContext *s, FFTComplex *z);
void
ff_fft_permute_neon
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_neon
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_imdct_half_vfp
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_mdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
av_cold
void
ff_fft_init_arm
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_vfp_vm
(
cpu_flags
))
{
s
->
fft_calc
=
ff_fft_calc_vfp
;
#if CONFIG_MDCT
s
->
imdct_half
=
ff_imdct_half_vfp
;
#endif
}
if
(
have_neon
(
cpu_flags
))
{
s
->
fft_permute
=
ff_fft_permute_neon
;
s
->
fft_calc
=
ff_fft_calc_neon
;
#if CONFIG_MDCT
s
->
imdct_calc
=
ff_imdct_calc_neon
;
s
->
imdct_half
=
ff_imdct_half_neon
;
s
->
mdct_calc
=
ff_mdct_calc_neon
;
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
#endif
}
}
libavcodec/arm/mdct_fixed_init_arm.c
0 → 100644
View file @
1a094af6
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/arm/cpu.h"
#define FFT_FLOAT 0
#include "libavcodec/fft.h"
void
ff_mdct_fixed_calc_neon
(
FFTContext
*
s
,
FFTSample
*
o
,
const
FFTSample
*
i
);
void
ff_mdct_fixed_calcw_neon
(
FFTContext
*
s
,
FFTDouble
*
o
,
const
FFTSample
*
i
);
av_cold
void
ff_mdct_fixed_init_arm
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_neon
(
cpu_flags
))
{
if
(
!
s
->
inverse
&&
s
->
nbits
>=
3
)
{
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
s
->
mdct_calc
=
ff_mdct_fixed_calc_neon
;
s
->
mdct_calcw
=
ff_mdct_fixed_calcw_neon
;
}
}
}
libavcodec/arm/mdct_init_arm.c
0 → 100644
View file @
1a094af6
/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/arm/cpu.h"
#include "libavcodec/fft.h"
void
ff_imdct_half_vfp
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_mdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
av_cold
void
ff_mdct_init_arm
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_vfp_vm
(
cpu_flags
))
{
s
->
imdct_half
=
ff_imdct_half_vfp
;
}
if
(
have_neon
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_neon
;
s
->
imdct_half
=
ff_imdct_half_neon
;
s
->
mdct_calc
=
ff_mdct_calc_neon
;
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
}
}
libavcodec/fft.h
View file @
1a094af6
...
...
@@ -154,4 +154,11 @@ void ff_fft_end(FFTContext *s);
int
ff_mdct_init
(
FFTContext
*
s
,
int
nbits
,
int
inverse
,
double
scale
);
void
ff_mdct_end
(
FFTContext
*
s
);
void
ff_mdct_init_aarch64
(
FFTContext
*
s
);
void
ff_mdct_init_arm
(
FFTContext
*
s
);
void
ff_mdct_init_ppc
(
FFTContext
*
s
);
void
ff_mdct_init_x86
(
FFTContext
*
s
);
void
ff_mdct_fixed_init_arm
(
FFTContext
*
s
);
#endif
/* AVCODEC_FFT_H */
libavcodec/fft_template.c
View file @
1a094af6
...
...
@@ -151,20 +151,13 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
s
->
fft_permute
=
fft_permute_c
;
s
->
fft_calc
=
fft_calc_c
;
#if CONFIG_MDCT
s
->
imdct_calc
=
ff_imdct_calc_c
;
s
->
imdct_half
=
ff_imdct_half_c
;
s
->
mdct_calc
=
ff_mdct_calc_c
;
#endif
#if FFT_FLOAT
if
(
ARCH_AARCH64
)
ff_fft_init_aarch64
(
s
);
if
(
ARCH_ARM
)
ff_fft_init_arm
(
s
);
if
(
ARCH_PPC
)
ff_fft_init_ppc
(
s
);
if
(
ARCH_X86
)
ff_fft_init_x86
(
s
);
if
(
CONFIG_MDCT
)
s
->
mdct_calcw
=
s
->
mdct_calc
;
#else
if
(
CONFIG_MDCT
)
s
->
mdct_calcw
=
ff_mdct_calcw_c
;
if
(
ARCH_ARM
)
ff_fft_fixed_init_arm
(
s
);
#endif
...
...
libavcodec/mdct_template.c
View file @
1a094af6
...
...
@@ -56,6 +56,26 @@ av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
if
(
ff_fft_init
(
s
,
s
->
mdct_bits
-
2
,
inverse
)
<
0
)
goto
fail
;
s
->
imdct_calc
=
ff_imdct_calc_c
;
s
->
imdct_half
=
ff_imdct_half_c
;
s
->
mdct_calc
=
ff_mdct_calc_c
;
#if FFT_FLOAT
if
(
ARCH_AARCH64
)
ff_mdct_init_aarch64
(
s
);
if
(
ARCH_ARM
)
ff_mdct_init_arm
(
s
);
if
(
ARCH_PPC
)
ff_mdct_init_ppc
(
s
);
if
(
ARCH_X86
)
ff_mdct_init_x86
(
s
);
s
->
mdct_calcw
=
s
->
mdct_calc
;
#else
s
->
mdct_calcw
=
ff_mdct_calcw_c
;
if
(
ARCH_ARM
)
ff_mdct_fixed_init_arm
(
s
);
#endif
s
->
tcos
=
av_malloc
(
n
/
2
*
sizeof
(
FFTSample
));
if
(
!
s
->
tcos
)
goto
fail
;
...
...
libavcodec/ppc/Makefile
View file @
1a094af6
...
...
@@ -11,6 +11,7 @@ OBJS-$(CONFIG_HPELDSP) += ppc/hpeldsp_altivec.o
OBJS-$(CONFIG_HUFFYUVDSP)
+=
ppc/huffyuvdsp_altivec.o
OBJS-$(CONFIG_FDCTDSP)
+=
ppc/fdctdsp.o
OBJS-$(CONFIG_IDCTDSP)
+=
ppc/idctdsp.o
OBJS-$(CONFIG_MDCT)
+=
ppc/mdct_init.o
OBJS-$(CONFIG_ME_CMP)
+=
ppc/me_cmp.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
ppc/mpegaudiodsp_altivec.o
OBJS-$(CONFIG_MPEGVIDEO)
+=
ppc/mpegvideo_altivec.o
\
...
...
libavcodec/ppc/fft_init.c
View file @
1a094af6
/*
* FFT/IFFT transforms
* AltiVec-enabled
* Copyright (c) 2009 Loren Merritt
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
...
...
@@ -21,126 +17,14 @@
*/
#include "config.h"
#include "libavutil/cpu.h"
#include "libavutil/ppc/cpu.h"
#include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h"
#include "libavcodec/fft.h"
/**
* Do a complex FFT with the parameters defined in ff_fft_init().
* The input data must be permuted before with s->revtab table.
* No 1.0 / sqrt(n) normalization is done.
* AltiVec-enabled:
* This code assumes that the 'z' pointer is 16 bytes-aligned.
* It also assumes all FFTComplex are 8 bytes-aligned pairs of floats.
*/
#include "libavcodec/fft.h"
void
ff_fft_calc_altivec
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_interleave_altivec
(
FFTContext
*
s
,
FFTComplex
*
z
);
#if HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN
static
void
imdct_half_altivec
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
int
j
,
k
;
int
n
=
1
<<
s
->
mdct_bits
;
int
n4
=
n
>>
2
;
int
n8
=
n
>>
3
;
int
n32
=
n
>>
5
;
const
uint16_t
*
revtabj
=
s
->
revtab
;
const
uint16_t
*
revtabk
=
s
->
revtab
+
n4
;
const
vec_f
*
tcos
=
(
const
vec_f
*
)(
s
->
tcos
+
n8
);
const
vec_f
*
tsin
=
(
const
vec_f
*
)(
s
->
tsin
+
n8
);
const
vec_f
*
pin
=
(
const
vec_f
*
)(
input
+
n4
);
vec_f
*
pout
=
(
vec_f
*
)(
output
+
n4
);
/* pre rotation */
k
=
n32
-
1
;
do
{
vec_f
cos
,
sin
,
cos0
,
sin0
,
cos1
,
sin1
,
re
,
im
,
r0
,
i0
,
r1
,
i1
,
a
,
b
,
c
,
d
;
#define CMULA(p,o0,o1,o2,o3)\
a = pin[ k*2+p];
/* { z[k].re, z[k].im, z[k+1].re, z[k+1].im } */
\
b = pin[-k*2-p-1];
/* { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } */
\
re = vec_perm(a, b, vcprm(0,2,s0,s2));
/* { z[k].re, z[k+1].re, z[-k-2].re, z[-k-1].re } */
\
im = vec_perm(a, b, vcprm(s3,s1,3,1));
/* { z[-k-1].im, z[-k-2].im, z[k+1].im, z[k].im } */
\
cos = vec_perm(cos0, cos1, vcprm(o0,o1,s##o2,s##o3));
/* { cos[k], cos[k+1], cos[-k-2], cos[-k-1] } */
\
sin = vec_perm(sin0, sin1, vcprm(o0,o1,s##o2,s##o3));\
r##p = im*cos - re*sin;\
i##p = re*cos + im*sin;
#define STORE2(v,dst)\
j = dst;\
vec_ste(v, 0, output+j*2);\
vec_ste(v, 4, output+j*2);
#define STORE8(p)\
a = vec_perm(r##p, i##p, vcprm(0,s0,0,s0));\
b = vec_perm(r##p, i##p, vcprm(1,s1,1,s1));\
c = vec_perm(r##p, i##p, vcprm(2,s2,2,s2));\
d = vec_perm(r##p, i##p, vcprm(3,s3,3,s3));\
STORE2(a, revtabk[ p*2-4]);\
STORE2(b, revtabk[ p*2-3]);\
STORE2(c, revtabj[-p*2+2]);\
STORE2(d, revtabj[-p*2+3]);
cos0
=
tcos
[
k
];
sin0
=
tsin
[
k
];
cos1
=
tcos
[
-
k
-
1
];
sin1
=
tsin
[
-
k
-
1
];
CMULA
(
0
,
0
,
1
,
2
,
3
);
CMULA
(
1
,
2
,
3
,
0
,
1
);
STORE8
(
0
);
STORE8
(
1
);
revtabj
+=
4
;
revtabk
-=
4
;
k
--
;
}
while
(
k
>=
0
);
ff_fft_calc_altivec
(
s
,
(
FFTComplex
*
)
output
);
/* post rotation + reordering */
j
=
-
n32
;
k
=
n32
-
1
;
do
{
vec_f
cos
,
sin
,
re
,
im
,
a
,
b
,
c
,
d
;
#define CMULB(d0,d1,o)\
re = pout[o*2];\
im = pout[o*2+1];\
cos = tcos[o];\
sin = tsin[o];\
d0 = im*sin - re*cos;\
d1 = re*sin + im*cos;
CMULB
(
a
,
b
,
j
);
CMULB
(
c
,
d
,
k
);
pout
[
2
*
j
]
=
vec_perm
(
a
,
d
,
vcprm
(
0
,
s3
,
1
,
s2
));
pout
[
2
*
j
+
1
]
=
vec_perm
(
a
,
d
,
vcprm
(
2
,
s1
,
3
,
s0
));
pout
[
2
*
k
]
=
vec_perm
(
c
,
b
,
vcprm
(
0
,
s3
,
1
,
s2
));
pout
[
2
*
k
+
1
]
=
vec_perm
(
c
,
b
,
vcprm
(
2
,
s1
,
3
,
s0
));
j
++
;
k
--
;
}
while
(
k
>=
0
);
}
static
void
imdct_calc_altivec
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
int
k
;
int
n
=
1
<<
s
->
mdct_bits
;
int
n4
=
n
>>
2
;
int
n16
=
n
>>
4
;
vec_u32
sign
=
{
1U
<<
31
,
1U
<<
31
,
1U
<<
31
,
1U
<<
31
};
vec_u32
*
p0
=
(
vec_u32
*
)(
output
+
n4
);
vec_u32
*
p1
=
(
vec_u32
*
)(
output
+
n4
*
3
);
imdct_half_altivec
(
s
,
output
+
n4
,
input
);
for
(
k
=
0
;
k
<
n16
;
k
++
)
{
vec_u32
a
=
p0
[
k
]
^
sign
;
vec_u32
b
=
p1
[
-
k
-
1
];
p0
[
-
k
-
1
]
=
vec_perm
(
a
,
a
,
vcprm
(
3
,
2
,
1
,
0
));
p1
[
k
]
=
vec_perm
(
b
,
b
,
vcprm
(
3
,
2
,
1
,
0
));
}
}
#endif
/* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
av_cold
void
ff_fft_init_ppc
(
FFTContext
*
s
)
{
#if HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN
...
...
@@ -148,9 +32,5 @@ av_cold void ff_fft_init_ppc(FFTContext *s)
return
;
s
->
fft_calc
=
ff_fft_calc_interleave_altivec
;
if
(
s
->
mdct_bits
>=
5
)
{
s
->
imdct_calc
=
imdct_calc_altivec
;
s
->
imdct_half
=
imdct_half_altivec
;
}
#endif
/* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
}
libavcodec/ppc/mdct_init.c
0 → 100644
View file @
1a094af6
/*
* FFT/IFFT transforms
* AltiVec-enabled
* Copyright (c) 2009 Loren Merritt
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/cpu.h"
#include "libavutil/ppc/cpu.h"
#include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h"
#include "libavcodec/fft.h"
/**
* Do a complex FFT with the parameters defined in ff_fft_init().
* The input data must be permuted before with s->revtab table.
* No 1.0 / sqrt(n) normalization is done.
* AltiVec-enabled:
* This code assumes that the 'z' pointer is 16 bytes-aligned.
* It also assumes all FFTComplex are 8 bytes-aligned pairs of floats.
*/
void
ff_fft_calc_altivec
(
FFTContext
*
s
,
FFTComplex
*
z
);
#if HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN
static
void
imdct_half_altivec
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
int
j
,
k
;
int
n
=
1
<<
s
->
mdct_bits
;
int
n4
=
n
>>
2
;
int
n8
=
n
>>
3
;
int
n32
=
n
>>
5
;
const
uint16_t
*
revtabj
=
s
->
revtab
;
const
uint16_t
*
revtabk
=
s
->
revtab
+
n4
;
const
vec_f
*
tcos
=
(
const
vec_f
*
)(
s
->
tcos
+
n8
);
const
vec_f
*
tsin
=
(
const
vec_f
*
)(
s
->
tsin
+
n8
);
const
vec_f
*
pin
=
(
const
vec_f
*
)(
input
+
n4
);
vec_f
*
pout
=
(
vec_f
*
)(
output
+
n4
);
/* pre rotation */
k
=
n32
-
1
;
do
{
vec_f
cos
,
sin
,
cos0
,
sin0
,
cos1
,
sin1
,
re
,
im
,
r0
,
i0
,
r1
,
i1
,
a
,
b
,
c
,
d
;
#define CMULA(p,o0,o1,o2,o3)\
a = pin[ k*2+p];
/* { z[k].re, z[k].im, z[k+1].re, z[k+1].im } */
\
b = pin[-k*2-p-1];
/* { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } */
\
re = vec_perm(a, b, vcprm(0,2,s0,s2));
/* { z[k].re, z[k+1].re, z[-k-2].re, z[-k-1].re } */
\
im = vec_perm(a, b, vcprm(s3,s1,3,1));
/* { z[-k-1].im, z[-k-2].im, z[k+1].im, z[k].im } */
\
cos = vec_perm(cos0, cos1, vcprm(o0,o1,s##o2,s##o3));
/* { cos[k], cos[k+1], cos[-k-2], cos[-k-1] } */
\
sin = vec_perm(sin0, sin1, vcprm(o0,o1,s##o2,s##o3));\
r##p = im*cos - re*sin;\
i##p = re*cos + im*sin;
#define STORE2(v,dst)\
j = dst;\
vec_ste(v, 0, output+j*2);\
vec_ste(v, 4, output+j*2);
#define STORE8(p)\
a = vec_perm(r##p, i##p, vcprm(0,s0,0,s0));\
b = vec_perm(r##p, i##p, vcprm(1,s1,1,s1));\
c = vec_perm(r##p, i##p, vcprm(2,s2,2,s2));\
d = vec_perm(r##p, i##p, vcprm(3,s3,3,s3));\
STORE2(a, revtabk[ p*2-4]);\
STORE2(b, revtabk[ p*2-3]);\
STORE2(c, revtabj[-p*2+2]);\
STORE2(d, revtabj[-p*2+3]);
cos0
=
tcos
[
k
];
sin0
=
tsin
[
k
];
cos1
=
tcos
[
-
k
-
1
];
sin1
=
tsin
[
-
k
-
1
];
CMULA
(
0
,
0
,
1
,
2
,
3
);
CMULA
(
1
,
2
,
3
,
0
,
1
);
STORE8
(
0
);
STORE8
(
1
);
revtabj
+=
4
;
revtabk
-=
4
;
k
--
;
}
while
(
k
>=
0
);
ff_fft_calc_altivec
(
s
,
(
FFTComplex
*
)
output
);
/* post rotation + reordering */
j
=
-
n32
;
k
=
n32
-
1
;
do
{
vec_f
cos
,
sin
,
re
,
im
,
a
,
b
,
c
,
d
;
#define CMULB(d0,d1,o)\
re = pout[o*2];\
im = pout[o*2+1];\
cos = tcos[o];\
sin = tsin[o];\
d0 = im*sin - re*cos;\
d1 = re*sin + im*cos;
CMULB
(
a
,
b
,
j
);
CMULB
(
c
,
d
,
k
);
pout
[
2
*
j
]
=
vec_perm
(
a
,
d
,
vcprm
(
0
,
s3
,
1
,
s2
));
pout
[
2
*
j
+
1
]
=
vec_perm
(
a
,
d
,
vcprm
(
2
,
s1
,
3
,
s0
));
pout
[
2
*
k
]
=
vec_perm
(
c
,
b
,
vcprm
(
0
,
s3
,
1
,
s2
));
pout
[
2
*
k
+
1
]
=
vec_perm
(
c
,
b
,
vcprm
(
2
,
s1
,
3
,
s0
));
j
++
;
k
--
;
}
while
(
k
>=
0
);
}
static
void
imdct_calc_altivec
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
int
k
;
int
n
=
1
<<
s
->
mdct_bits
;
int
n4
=
n
>>
2
;
int
n16
=
n
>>
4
;
vec_u32
sign
=
{
1U
<<
31
,
1U
<<
31
,
1U
<<
31
,
1U
<<
31
};
vec_u32
*
p0
=
(
vec_u32
*
)(
output
+
n4
);
vec_u32
*
p1
=
(
vec_u32
*
)(
output
+
n4
*
3
);
imdct_half_altivec
(
s
,
output
+
n4
,
input
);
for
(
k
=
0
;
k
<
n16
;
k
++
)
{
vec_u32
a
=
p0
[
k
]
^
sign
;
vec_u32
b
=
p1
[
-
k
-
1
];
p0
[
-
k
-
1
]
=
vec_perm
(
a
,
a
,
vcprm
(
3
,
2
,
1
,
0
));
p1
[
k
]
=
vec_perm
(
b
,
b
,
vcprm
(
3
,
2
,
1
,
0
));
}
}
#endif
/* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
av_cold
void
ff_mdct_init_ppc
(
FFTContext
*
s
)
{
#if HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN
if
(
!
PPC_ALTIVEC
(
av_get_cpu_flags
()))
return
;
if
(
s
->
mdct_bits
>=
5
)
{
s
->
imdct_calc
=
imdct_calc_altivec
;
s
->
imdct_half
=
imdct_half_altivec
;
}
#endif
/* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
}
libavcodec/x86/Makefile
View file @
1a094af6
...
...
@@ -19,6 +19,7 @@ OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o
OBJS-$(CONFIG_HUFFYUVENCDSP)
+=
x86/huffyuvencdsp_mmx.o
OBJS-$(CONFIG_IDCTDSP)
+=
x86/idctdsp_init.o
OBJS-$(CONFIG_LPC)
+=
x86/lpc.o
OBJS-$(CONFIG_MDCT)
+=
x86/mdct_init.o
OBJS-$(CONFIG_ME_CMP)
+=
x86/me_cmp_init.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
x86/mpegaudiodsp.o
OBJS-$(CONFIG_MPEGVIDEO)
+=
x86/mpegvideo.o
\
...
...
libavcodec/x86/fft.asm
View file @
1a094af6
...
...
@@ -655,68 +655,6 @@ cglobal fft_permute, 2,7,1
jl
.
loopcopy
REP_RET
%macro
IMDCT_CALC_FUNC
0
cglobal
imdct_calc
,
3
,
5
,
3
mov
r3d
,
[
r0
+
FFTContext
.
mdctsize
]
mov
r4
,
[
r0
+
FFTContext
.
imdcthalf
]
add
r1
,
r3
PUSH
r3
PUSH
r1
%if
ARCH_X86_32
push
r2
push
r1
push
r0
%else
sub
rsp
,
8
+
32
*
WIN64
; allocate win64 shadow space
%endif
call
r4
%if
ARCH_X86_32
add
esp
,
12
%else
add
rsp
,
8
+
32
*
WIN64
%endif
POP
r1
POP
r3
lea
r0
,
[
r1
+
2
*
r3
]
mov
r2
,
r3
sub
r3
,
mmsize
neg
r2
mova
m2
,
[
ps_m1m1m1m1
]
.
loop
:
%if
mmsize
==
8
PSWAPD
m0
,
[
r1
+
r3
]
PSWAPD
m1
,
[
r0
+
r2
]
pxor
m0
,
m2
%else
mova
m0
,
[
r1
+
r3
]
mova
m1
,
[
r0
+
r2
]
shufps
m0
,
m0
,
0x1b
shufps
m1
,
m1
,
0x1b
xorps
m0
,
m2
%endif
mova
[
r0
+
r3
]
,
m1
mova
[
r1
+
r2
]
,
m0
sub
r3
,
mmsize
add
r2
,
mmsize
jl
.
loop
%if
cpuflag
(
3
dnow
)
femms
RET
%else
REP_RET
%endif
%endmacro
%if
ARCH_X86_32
INIT_MMX
3
dnow
IMDCT_CALC_FUNC
INIT_MMX
3
dnowext
IMDCT_CALC_FUNC
%endif
INIT_XMM
sse
IMDCT_CALC_FUNC
%if
ARCH_X86_32
INIT_MMX
3
dnow
%define
mulps
pfmul
...
...
@@ -791,6 +729,70 @@ DECL_FFT 4
DECL_FFT
4
,
_interleave
%endif
%if
CONFIG_MDCT
%macro
IMDCT_CALC_FUNC
0
cglobal
imdct_calc
,
3
,
5
,
3
mov
r3d
,
[
r0
+
FFTContext
.
mdctsize
]
mov
r4
,
[
r0
+
FFTContext
.
imdcthalf
]
add
r1
,
r3
PUSH
r3
PUSH
r1
%if
ARCH_X86_32
push
r2
push
r1
push
r0
%else
sub
rsp
,
8
+
32
*
WIN64
; allocate win64 shadow space
%endif
call
r4
%if
ARCH_X86_32
add
esp
,
12
%else
add
rsp
,
8
+
32
*
WIN64
%endif
POP
r1
POP
r3
lea
r0
,
[
r1
+
2
*
r3
]
mov
r2
,
r3
sub
r3
,
mmsize
neg
r2
mova
m2
,
[
ps_m1m1m1m1
]
.
loop
:
%if
mmsize
==
8
PSWAPD
m0
,
[
r1
+
r3
]
PSWAPD
m1
,
[
r0
+
r2
]
pxor
m0
,
m2
%else
mova
m0
,
[
r1
+
r3
]
mova
m1
,
[
r0
+
r2
]
shufps
m0
,
m0
,
0x1b
shufps
m1
,
m1
,
0x1b
xorps
m0
,
m2
%endif
mova
[
r0
+
r3
]
,
m1
mova
[
r1
+
r2
]
,
m0
sub
r3
,
mmsize
add
r2
,
mmsize
jl
.
loop
%if
cpuflag
(
3
dnow
)
femms
RET
%else
REP_RET
%endif
%endmacro
%if
ARCH_X86_32
INIT_MMX
3
dnow
IMDCT_CALC_FUNC
INIT_MMX
3
dnowext
IMDCT_CALC_FUNC
%endif
INIT_XMM
sse
IMDCT_CALC_FUNC
INIT_XMM
sse
%undef
mulps
%undef
addps
...
...
@@ -1081,3 +1083,5 @@ DECL_IMDCT POSROTATESHUF_3DNOW
INIT_YMM
avx
DECL_IMDCT
POSROTATESHUF_AVX
%endif
; CONFIG_MDCT
libavcodec/x86/fft.h
View file @
1a094af6
...
...
@@ -27,12 +27,4 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
void
ff_fft_calc_3dnow
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dnowext
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_imdct_calc_3dnow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dnow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dnowext
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dnowext
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_avx
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
#endif
/* AVCODEC_X86_FFT_H */
libavcodec/x86/fft_init.c
View file @
1a094af6
...
...
@@ -30,28 +30,21 @@ av_cold void ff_fft_init_x86(FFTContext *s)
#if ARCH_X86_32
if
(
EXTERNAL_AMD3DNOW
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_3dnow
;
s
->
imdct_half
=
ff_imdct_half_3dnow
;
s
->
fft_calc
=
ff_fft_calc_3dnow
;
}
if
(
EXTERNAL_AMD3DNOWEXT
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_3dnowext
;
s
->
imdct_half
=
ff_imdct_half_3dnowext
;
s
->
fft_calc
=
ff_fft_calc_3dnowext
;
}
#endif
/* ARCH_X86_32 */
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_sse
;
s
->
imdct_half
=
ff_imdct_half_sse
;
s
->
fft_permute
=
ff_fft_permute_sse
;
s
->
fft_calc
=
ff_fft_calc_sse
;
s
->
fft_permutation
=
FF_FFT_PERM_SWAP_LSBS
;
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
)
&&
s
->
nbits
>=
5
)
{
s
->
imdct_half
=
ff_imdct_half_avx
;
s
->
fft_calc
=
ff_fft_calc_avx
;
s
->
fft_permutation
=
FF_FFT_PERM_AVX
;
}
...
...
libavcodec/x86/mdct.h
0 → 100644
View file @
1a094af6
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_X86_MDCT_H
#define AVCODEC_X86_MDCT_H
#include "libavcodec/fft.h"
void
ff_imdct_calc_3dnow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dnow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dnowext
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dnowext
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_avx
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
#endif
/* AVCODEC_X86_MDCT_H */
libavcodec/x86/mdct_init.c
0 → 100644
View file @
1a094af6
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "mdct.h"
av_cold
void
ff_mdct_init_x86
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
#if ARCH_X86_32
if
(
EXTERNAL_AMD3DNOW
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_3dnow
;
s
->
imdct_half
=
ff_imdct_half_3dnow
;
}
if
(
EXTERNAL_AMD3DNOWEXT
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_3dnowext
;
s
->
imdct_half
=
ff_imdct_half_3dnowext
;
}
#endif
/* ARCH_X86_32 */
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_sse
;
s
->
imdct_half
=
ff_imdct_half_sse
;
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
)
&&
s
->
nbits
>=
5
)
{
s
->
imdct_half
=
ff_imdct_half_avx
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment