Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
209f50e1
Commit
209f50e1
authored
Jan 25, 2016
by
James Almer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/synth_filter: split off remaining code from dcadec files
Signed-off-by:
James Almer
<
jamrial@gmail.com
>
parent
5dc37a5d
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
424 additions
and
320 deletions
+424
-320
Makefile
libavcodec/aarch64/Makefile
+2
-1
dcadsp_init.c
libavcodec/aarch64/dcadsp_init.c
+0
-21
synth_filter_init.c
libavcodec/aarch64/synth_filter_init.c
+47
-0
Makefile
libavcodec/arm/Makefile
+2
-1
dcadsp_init_arm.c
libavcodec/arm/dcadsp_init_arm.c
+0
-22
synth_filter_init_arm.c
libavcodec/arm/synth_filter_init_arm.c
+49
-0
Makefile
libavcodec/x86/Makefile
+4
-2
dcadsp.asm
libavcodec/x86/dcadsp.asm
+0
-222
dcadsp_init.c
libavcodec/x86/dcadsp_init.c
+0
-51
synth_filter.asm
libavcodec/x86/synth_filter.asm
+246
-0
synth_filter_init.c
libavcodec/x86/synth_filter_init.c
+74
-0
No files found.
libavcodec/aarch64/Makefile
View file @
209f50e1
OBJS-$(CONFIG_DCA_DECODER)
+=
aarch64/dcadsp_init.o
OBJS-$(CONFIG_DCA_DECODER)
+=
aarch64/dcadsp_init.o
\
aarch64/synth_filter_init.o
OBJS-$(CONFIG_FFT)
+=
aarch64/fft_init_aarch64.o
OBJS-$(CONFIG_FMTCONVERT)
+=
aarch64/fmtconvert_init.o
OBJS-$(CONFIG_H264CHROMA)
+=
aarch64/h264chroma_init_aarch64.o
...
...
libavcodec/aarch64/dcadsp_init.c
View file @
209f50e1
...
...
@@ -24,23 +24,10 @@
#include "libavutil/attributes.h"
#include "libavutil/internal.h"
#include "libavcodec/dcadsp.h"
#include "libavcodec/fft.h"
#include "asm-offsets.h"
#if HAVE_NEON || HAVE_VFP
AV_CHECK_OFFSET
(
FFTContext
,
imdct_half
,
IMDCT_HALF
);
#endif
void
ff_dca_lfe_fir0_neon
(
float
*
out
,
const
float
*
in
,
const
float
*
coefs
);
void
ff_dca_lfe_fir1_neon
(
float
*
out
,
const
float
*
in
,
const
float
*
coefs
);
void
ff_synth_filter_float_neon
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
av_cold
void
ff_dcadsp_init_aarch64
(
DCADSPContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
...
...
@@ -50,11 +37,3 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
s
->
lfe_fir
[
1
]
=
ff_dca_lfe_fir1_neon
;
}
}
av_cold
void
ff_synth_filter_init_aarch64
(
SynthFilterContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_neon
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_neon
;
}
libavcodec/aarch64/synth_filter_init.c
0 → 100644
View file @
209f50e1
/*
* Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/aarch64/cpu.h"
#include "libavutil/attributes.h"
#include "libavutil/internal.h"
#include "libavcodec/fft.h"
#include "libavcodec/synth_filter.h"
#include "asm-offsets.h"
#if HAVE_NEON || HAVE_VFP
AV_CHECK_OFFSET
(
FFTContext
,
imdct_half
,
IMDCT_HALF
);
#endif
void
ff_synth_filter_float_neon
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
av_cold
void
ff_synth_filter_init_aarch64
(
SynthFilterContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_neon
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_neon
;
}
libavcodec/arm/Makefile
View file @
209f50e1
...
...
@@ -36,7 +36,8 @@ OBJS-$(CONFIG_VP8DSP) += arm/vp8dsp_init_arm.o
# decoders/encoders
OBJS-$(CONFIG_AAC_DECODER)
+=
arm/aacpsdsp_init_arm.o
\
arm/sbrdsp_init_arm.o
OBJS-$(CONFIG_DCA_DECODER)
+=
arm/dcadsp_init_arm.o
OBJS-$(CONFIG_DCA_DECODER)
+=
arm/dcadsp_init_arm.o
\
arm/synth_filter_init_arm.o
OBJS-$(CONFIG_HEVC_DECODER)
+=
arm/hevcdsp_init_arm.o
OBJS-$(CONFIG_MLP_DECODER)
+=
arm/mlpdsp_init_arm.o
OBJS-$(CONFIG_RV40_DECODER)
+=
arm/rv40dsp_init_arm.o
...
...
libavcodec/arm/dcadsp_init_arm.c
View file @
209f50e1
...
...
@@ -37,18 +37,6 @@ void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
const
float
window
[
512
],
float
*
samples_out
,
float
raXin
[
32
],
float
scale
);
void
ff_synth_filter_float_vfp
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
void
ff_synth_filter_float_neon
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
av_cold
void
ff_dcadsp_init_arm
(
DCADSPContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
...
...
@@ -63,13 +51,3 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
s
->
lfe_fir
[
1
]
=
ff_dca_lfe_fir1_neon
;
}
}
av_cold
void
ff_synth_filter_init_arm
(
SynthFilterContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_vfp_vm
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_vfp
;
if
(
have_neon
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_neon
;
}
libavcodec/arm/synth_filter_init_arm.c
0 → 100644
View file @
209f50e1
/*
* Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/arm/cpu.h"
#include "libavutil/attributes.h"
#include "libavutil/internal.h"
#include "libavcodec/fft.h"
#include "libavcodec/synth_filter.h"
void
ff_synth_filter_float_vfp
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
void
ff_synth_filter_float_neon
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
av_cold
void
ff_synth_filter_init_arm
(
SynthFilterContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_vfp_vm
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_vfp
;
if
(
have_neon
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_neon
;
}
libavcodec/x86/Makefile
View file @
209f50e1
...
...
@@ -44,7 +44,8 @@ OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp_init.o
OBJS-$(CONFIG_ALAC_DECODER)
+=
x86/alacdsp_init.o
OBJS-$(CONFIG_APNG_DECODER)
+=
x86/pngdsp_init.o
OBJS-$(CONFIG_CAVS_DECODER)
+=
x86/cavsdsp.o
OBJS-$(CONFIG_DCA_DECODER)
+=
x86/dcadsp_init.o
OBJS-$(CONFIG_DCA_DECODER)
+=
x86/dcadsp_init.o
\
x86/synth_filter_init.o
OBJS-$(CONFIG_DNXHD_ENCODER)
+=
x86/dnxhdenc_init.o
OBJS-$(CONFIG_HEVC_DECODER)
+=
x86/hevcdsp_init.o
OBJS-$(CONFIG_JPEG2000_DECODER)
+=
x86/jpeg2000dsp_init.o
...
...
@@ -132,7 +133,8 @@ YASM-OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp.o
YASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER)
+=
x86/g722dsp.o
YASM-OBJS-$(CONFIG_ALAC_DECODER)
+=
x86/alacdsp.o
YASM-OBJS-$(CONFIG_APNG_DECODER)
+=
x86/pngdsp.o
YASM-OBJS-$(CONFIG_DCA_DECODER)
+=
x86/dcadsp.o
YASM-OBJS-$(CONFIG_DCA_DECODER)
+=
x86/dcadsp.o
\
x86/synth_filter.o
YASM-OBJS-$(CONFIG_DIRAC_DECODER)
+=
x86/diracdsp_mmx.o
x86/diracdsp_yasm.o
\
x86/dwt_yasm.o
YASM-OBJS-$(CONFIG_DNXHD_ENCODER)
+=
x86/dnxhdenc.o
...
...
libavcodec/x86/dcadsp.asm
View file @
209f50e1
...
...
@@ -121,225 +121,3 @@ DCA_LFE_FIR 1
INIT_XMM
fma3
DCA_LFE_FIR
0
%endif
%macro
SETZERO
1
%if
cpuflag
(
sse2
)
&&
notcpuflag
(
avx
)
pxor
%1
,
%1
%else
xorps
%1
,
%1
,
%1
%endif
%endmacro
%macro
SHUF
3
%if
cpuflag
(
avx
)
mova
%3
,
[
%2
-
16
]
vperm2f128
%1
,
%3
,
%3
,
1
vshufps
%1
,
%1
,
%1
,
q0123
%elif
cpuflag
(
sse2
)
pshufd
%1
,
[
%2
]
,
q0123
%else
mova
%1
,
[
%2
]
shufps
%1
,
%1
,
q0123
%endif
%endmacro
%macro
INNER_LOOP
1
; reading backwards: ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
;~ a += window[i + j] * (-synth_buf[15 - i + j])
;~ b += window[i + j + 16] * (synth_buf[i + j])
SHUF
m5
,
ptr2
+
j
+
(
15
-
3
)
*
4
,
m6
mova
m6
,
[
ptr1
+
j
]
%if
ARCH_X86_64
SHUF
m11
,
ptr2
+
j
+
(
15
-
3
)
*
4
-
mmsize
,
m12
mova
m12
,
[
ptr1
+
j
+
mmsize
]
%endif
%if
cpuflag
(
fma3
)
fmaddps
m2
,
m6
,
[
win
+
%1
+
j
+
16
*
4
]
,
m2
fnmaddps
m1
,
m5
,
[
win
+
%1
+
j
]
,
m1
%if
ARCH_X86_64
fmaddps
m8
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
16
*
4
]
,
m8
fnmaddps
m7
,
m11
,
[
win
+
%1
+
j
+
mmsize
]
,
m7
%endif
%else
; non-FMA
mulps
m6
,
m6
,
[
win
+
%1
+
j
+
16
*
4
]
mulps
m5
,
m5
,
[
win
+
%1
+
j
]
%if
ARCH_X86_64
mulps
m12
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
16
*
4
]
mulps
m11
,
m11
,
[
win
+
%1
+
j
+
mmsize
]
%endif
addps
m2
,
m2
,
m6
subps
m1
,
m1
,
m5
%if
ARCH_X86_64
addps
m8
,
m8
,
m12
subps
m7
,
m7
,
m11
%endif
%endif
; cpuflag(fma3)
;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
SHUF
m6
,
ptr2
+
j
+
(
31
-
3
)
*
4
,
m5
mova
m5
,
[
ptr1
+
j
+
16
*
4
]
%if
ARCH_X86_64
SHUF
m12
,
ptr2
+
j
+
(
31
-
3
)
*
4
-
mmsize
,
m11
mova
m11
,
[
ptr1
+
j
+
mmsize
+
16
*
4
]
%endif
%if
cpuflag
(
fma3
)
fmaddps
m3
,
m5
,
[
win
+
%1
+
j
+
32
*
4
]
,
m3
fmaddps
m4
,
m6
,
[
win
+
%1
+
j
+
48
*
4
]
,
m4
%if
ARCH_X86_64
fmaddps
m9
,
m11
,
[
win
+
%1
+
j
+
mmsize
+
32
*
4
]
,
m9
fmaddps
m10
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
48
*
4
]
,
m10
%endif
%else
; non-FMA
mulps
m5
,
m5
,
[
win
+
%1
+
j
+
32
*
4
]
mulps
m6
,
m6
,
[
win
+
%1
+
j
+
48
*
4
]
%if
ARCH_X86_64
mulps
m11
,
m11
,
[
win
+
%1
+
j
+
mmsize
+
32
*
4
]
mulps
m12
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
48
*
4
]
%endif
addps
m3
,
m3
,
m5
addps
m4
,
m4
,
m6
%if
ARCH_X86_64
addps
m9
,
m9
,
m11
addps
m10
,
m10
,
m12
%endif
%endif
; cpuflag(fma3)
sub
j
,
64
*
4
%endmacro
; void ff_synth_filter_inner_<opt>(float *synth_buf, float synth_buf2[32],
; const float window[512], float out[32],
; intptr_t offset, float scale)
%macro
SYNTH_FILTER
0
cglobal
synth_filter_inner
,
0
,
6
+
4
*
ARCH_X86_64
,
7
+
6
*
ARCH_X86_64
,
\
synth_buf
,
synth_buf2
,
window
,
out
,
off
,
scale
%define
scale
m0
%if
ARCH_X86_32
||
WIN64
%if
cpuflag
(
sse2
)
&&
notcpuflag
(
avx
)
movd
scale
,
scalem
SPLATD
m0
%else
VBROADCASTSS
m0
,
scalem
%endif
; Make sure offset is in a register and not on the stack
%define
OFFQ
r4q
%else
SPLATD
xmm0
%if
cpuflag
(
avx
)
vinsertf128
m0
,
m0
,
xmm0
,
1
%endif
%define
OFFQ
offq
%endif
; prepare inner counter limit 1
mov
r5q
,
480
sub
r5q
,
offmp
and
r5q
,
-
64
shl
r5q
,
2
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
mov
OFFQ
,
r5q
%define
i
r5q
mov
i
,
16
*
4
-
(
ARCH_X86_64
+
1
)
*
mmsize
; main loop counter
%else
%define
i
0
%define
OFFQ
r5q
%endif
%define
buf2
synth_buf2q
%if
ARCH_X86_32
mov
buf2
,
synth_buf2mp
%endif
.
mainloop
:
; m1 = a m2 = b m3 = c m4 = d
SETZERO
m3
SETZERO
m4
mova
m1
,
[
buf2
+
i
]
mova
m2
,
[
buf2
+
i
+
16
*
4
]
%if
ARCH_X86_32
%define
ptr1
r0q
%define
ptr2
r1q
%define
win
r2q
%define
j
r3q
mov
win
,
windowm
mov
ptr1
,
synth_bufm
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
add
win
,
i
add
ptr1
,
i
%endif
%else
; ARCH_X86_64
%define
ptr1
r6q
%define
ptr2
r7q
; must be loaded
%define
win
r8q
%define
j
r9q
SETZERO
m9
SETZERO
m10
mova
m7
,
[
buf2
+
i
+
mmsize
]
mova
m8
,
[
buf2
+
i
+
mmsize
+
16
*
4
]
lea
win
,
[
windowq
+
i
]
lea
ptr1
,
[
synth_bufq
+
i
]
%endif
mov
ptr2
,
synth_bufmp
; prepare the inner loop counter
mov
j
,
OFFQ
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
sub
ptr2
,
i
%endif
.
loop1
:
INNER_LOOP
0
jge
.
loop1
mov
j
,
448
*
4
sub
j
,
OFFQ
jz
.
end
sub
ptr1
,
j
sub
ptr2
,
j
add
win
,
OFFQ
; now at j-64, so define OFFSET
sub
j
,
64
*
4
.
loop2
:
INNER_LOOP
64
*
4
jge
.
loop2
.
end
:
%if
ARCH_X86_32
mov
buf2
,
synth_buf2m
; needed for next iteration anyway
mov
outq
,
outmp
; j, which will be set again during it
%endif
;~ out[i] = a * scale;
;~ out[i + 16] = b * scale;
mulps
m1
,
m1
,
scale
mulps
m2
,
m2
,
scale
%if
ARCH_X86_64
mulps
m7
,
m7
,
scale
mulps
m8
,
m8
,
scale
%endif
;~ synth_buf2[i] = c;
;~ synth_buf2[i + 16] = d;
mova
[
buf2
+
i
+
0
*
4
]
,
m3
mova
[
buf2
+
i
+
16
*
4
]
,
m4
%if
ARCH_X86_64
mova
[
buf2
+
i
+
0
*
4
+
mmsize
]
,
m9
mova
[
buf2
+
i
+
16
*
4
+
mmsize
]
,
m10
%endif
;~ out[i] = a;
;~ out[i + 16] = a;
mova
[
outq
+
i
+
0
*
4
]
,
m1
mova
[
outq
+
i
+
16
*
4
]
,
m2
%if
ARCH_X86_64
mova
[
outq
+
i
+
0
*
4
+
mmsize
]
,
m7
mova
[
outq
+
i
+
16
*
4
+
mmsize
]
,
m8
%endif
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
sub
i
,
(
ARCH_X86_64
+
1
)
*
mmsize
jge
.
mainloop
%endif
RET
%endmacro
%if
ARCH_X86_32
INIT_XMM
sse
SYNTH_FILTER
%endif
INIT_XMM
sse2
SYNTH_FILTER
INIT_YMM
avx
SYNTH_FILTER
INIT_YMM
fma3
SYNTH_FILTER
libavcodec/x86/dcadsp_init.c
View file @
209f50e1
...
...
@@ -40,54 +40,3 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
s
->
lfe_fir
[
0
]
=
ff_dca_lfe_fir0_fma3
;
}
}
#define SYNTH_FILTER_FUNC(opt) \
void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \
const float window[512], \
float out[32], intptr_t offset, float scale); \
static void synth_filter_##opt(FFTContext *imdct, \
float *synth_buf_ptr, int *synth_buf_offset, \
float synth_buf2[32], const float window[512], \
float out[32], const float in[32], float scale) \
{ \
float *synth_buf= synth_buf_ptr + *synth_buf_offset; \
\
imdct->imdct_half(imdct, synth_buf, in); \
\
ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window, \
out, *synth_buf_offset, scale); \
\
*synth_buf_offset = (*synth_buf_offset - 32) & 511; \
} \
#if HAVE_YASM
#if ARCH_X86_32
SYNTH_FILTER_FUNC
(
sse
)
#endif
SYNTH_FILTER_FUNC
(
sse2
)
SYNTH_FILTER_FUNC
(
avx
)
SYNTH_FILTER_FUNC
(
fma3
)
#endif
/* HAVE_YASM */
av_cold
void
ff_synth_filter_init_x86
(
SynthFilterContext
*
s
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
#if ARCH_X86_32
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_sse
;
}
#endif
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_sse2
;
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_avx
;
}
if
(
EXTERNAL_FMA3
(
cpu_flags
)
&&
!
(
cpu_flags
&
AV_CPU_FLAG_AVXSLOW
))
{
s
->
synth_filter_float
=
synth_filter_fma3
;
}
#endif
/* HAVE_YASM */
}
libavcodec/x86/synth_filter.asm
0 → 100644
View file @
209f50e1
;******************************************************************************
;* SSE-optimized functions for the DCA decoder
;* Copyright (C) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"libavutil/x86/x86util.asm"
SECTION
.
text
%macro
SETZERO
1
%if
cpuflag
(
sse2
)
&&
notcpuflag
(
avx
)
pxor
%1
,
%1
%else
xorps
%1
,
%1
,
%1
%endif
%endmacro
%macro
SHUF
3
%if
cpuflag
(
avx
)
mova
%3
,
[
%2
-
16
]
vperm2f128
%1
,
%3
,
%3
,
1
vshufps
%1
,
%1
,
%1
,
q0123
%elif
cpuflag
(
sse2
)
pshufd
%1
,
[
%2
]
,
q0123
%else
mova
%1
,
[
%2
]
shufps
%1
,
%1
,
q0123
%endif
%endmacro
%macro
INNER_LOOP
1
; reading backwards: ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
;~ a += window[i + j] * (-synth_buf[15 - i + j])
;~ b += window[i + j + 16] * (synth_buf[i + j])
SHUF
m5
,
ptr2
+
j
+
(
15
-
3
)
*
4
,
m6
mova
m6
,
[
ptr1
+
j
]
%if
ARCH_X86_64
SHUF
m11
,
ptr2
+
j
+
(
15
-
3
)
*
4
-
mmsize
,
m12
mova
m12
,
[
ptr1
+
j
+
mmsize
]
%endif
%if
cpuflag
(
fma3
)
fmaddps
m2
,
m6
,
[
win
+
%1
+
j
+
16
*
4
]
,
m2
fnmaddps
m1
,
m5
,
[
win
+
%1
+
j
]
,
m1
%if
ARCH_X86_64
fmaddps
m8
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
16
*
4
]
,
m8
fnmaddps
m7
,
m11
,
[
win
+
%1
+
j
+
mmsize
]
,
m7
%endif
%else
; non-FMA
mulps
m6
,
m6
,
[
win
+
%1
+
j
+
16
*
4
]
mulps
m5
,
m5
,
[
win
+
%1
+
j
]
%if
ARCH_X86_64
mulps
m12
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
16
*
4
]
mulps
m11
,
m11
,
[
win
+
%1
+
j
+
mmsize
]
%endif
addps
m2
,
m2
,
m6
subps
m1
,
m1
,
m5
%if
ARCH_X86_64
addps
m8
,
m8
,
m12
subps
m7
,
m7
,
m11
%endif
%endif
; cpuflag(fma3)
;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
SHUF
m6
,
ptr2
+
j
+
(
31
-
3
)
*
4
,
m5
mova
m5
,
[
ptr1
+
j
+
16
*
4
]
%if
ARCH_X86_64
SHUF
m12
,
ptr2
+
j
+
(
31
-
3
)
*
4
-
mmsize
,
m11
mova
m11
,
[
ptr1
+
j
+
mmsize
+
16
*
4
]
%endif
%if
cpuflag
(
fma3
)
fmaddps
m3
,
m5
,
[
win
+
%1
+
j
+
32
*
4
]
,
m3
fmaddps
m4
,
m6
,
[
win
+
%1
+
j
+
48
*
4
]
,
m4
%if
ARCH_X86_64
fmaddps
m9
,
m11
,
[
win
+
%1
+
j
+
mmsize
+
32
*
4
]
,
m9
fmaddps
m10
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
48
*
4
]
,
m10
%endif
%else
; non-FMA
mulps
m5
,
m5
,
[
win
+
%1
+
j
+
32
*
4
]
mulps
m6
,
m6
,
[
win
+
%1
+
j
+
48
*
4
]
%if
ARCH_X86_64
mulps
m11
,
m11
,
[
win
+
%1
+
j
+
mmsize
+
32
*
4
]
mulps
m12
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
48
*
4
]
%endif
addps
m3
,
m3
,
m5
addps
m4
,
m4
,
m6
%if
ARCH_X86_64
addps
m9
,
m9
,
m11
addps
m10
,
m10
,
m12
%endif
%endif
; cpuflag(fma3)
sub
j
,
64
*
4
%endmacro
; void ff_synth_filter_inner_<opt>(float *synth_buf, float synth_buf2[32],
; const float window[512], float out[32],
; intptr_t offset, float scale)
%macro
SYNTH_FILTER
0
cglobal
synth_filter_inner
,
0
,
6
+
4
*
ARCH_X86_64
,
7
+
6
*
ARCH_X86_64
,
\
synth_buf
,
synth_buf2
,
window
,
out
,
off
,
scale
%define
scale
m0
%if
ARCH_X86_32
||
WIN64
%if
cpuflag
(
sse2
)
&&
notcpuflag
(
avx
)
movd
scale
,
scalem
SPLATD
m0
%else
VBROADCASTSS
m0
,
scalem
%endif
; Make sure offset is in a register and not on the stack
%define
OFFQ
r4q
%else
SPLATD
xmm0
%if
cpuflag
(
avx
)
vinsertf128
m0
,
m0
,
xmm0
,
1
%endif
%define
OFFQ
offq
%endif
; prepare inner counter limit 1
mov
r5q
,
480
sub
r5q
,
offmp
and
r5q
,
-
64
shl
r5q
,
2
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
mov
OFFQ
,
r5q
%define
i
r5q
mov
i
,
16
*
4
-
(
ARCH_X86_64
+
1
)
*
mmsize
; main loop counter
%else
%define
i
0
%define
OFFQ
r5q
%endif
%define
buf2
synth_buf2q
%if
ARCH_X86_32
mov
buf2
,
synth_buf2mp
%endif
.
mainloop
:
; m1 = a m2 = b m3 = c m4 = d
SETZERO
m3
SETZERO
m4
mova
m1
,
[
buf2
+
i
]
mova
m2
,
[
buf2
+
i
+
16
*
4
]
%if
ARCH_X86_32
%define
ptr1
r0q
%define
ptr2
r1q
%define
win
r2q
%define
j
r3q
mov
win
,
windowm
mov
ptr1
,
synth_bufm
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
add
win
,
i
add
ptr1
,
i
%endif
%else
; ARCH_X86_64
%define
ptr1
r6q
%define
ptr2
r7q
; must be loaded
%define
win
r8q
%define
j
r9q
SETZERO
m9
SETZERO
m10
mova
m7
,
[
buf2
+
i
+
mmsize
]
mova
m8
,
[
buf2
+
i
+
mmsize
+
16
*
4
]
lea
win
,
[
windowq
+
i
]
lea
ptr1
,
[
synth_bufq
+
i
]
%endif
mov
ptr2
,
synth_bufmp
; prepare the inner loop counter
mov
j
,
OFFQ
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
sub
ptr2
,
i
%endif
.
loop1
:
INNER_LOOP
0
jge
.
loop1
mov
j
,
448
*
4
sub
j
,
OFFQ
jz
.
end
sub
ptr1
,
j
sub
ptr2
,
j
add
win
,
OFFQ
; now at j-64, so define OFFSET
sub
j
,
64
*
4
.
loop2
:
INNER_LOOP
64
*
4
jge
.
loop2
.
end
:
%if
ARCH_X86_32
mov
buf2
,
synth_buf2m
; needed for next iteration anyway
mov
outq
,
outmp
; j, which will be set again during it
%endif
;~ out[i] = a * scale;
;~ out[i + 16] = b * scale;
mulps
m1
,
m1
,
scale
mulps
m2
,
m2
,
scale
%if
ARCH_X86_64
mulps
m7
,
m7
,
scale
mulps
m8
,
m8
,
scale
%endif
;~ synth_buf2[i] = c;
;~ synth_buf2[i + 16] = d;
mova
[
buf2
+
i
+
0
*
4
]
,
m3
mova
[
buf2
+
i
+
16
*
4
]
,
m4
%if
ARCH_X86_64
mova
[
buf2
+
i
+
0
*
4
+
mmsize
]
,
m9
mova
[
buf2
+
i
+
16
*
4
+
mmsize
]
,
m10
%endif
;~ out[i] = a;
;~ out[i + 16] = a;
mova
[
outq
+
i
+
0
*
4
]
,
m1
mova
[
outq
+
i
+
16
*
4
]
,
m2
%if
ARCH_X86_64
mova
[
outq
+
i
+
0
*
4
+
mmsize
]
,
m7
mova
[
outq
+
i
+
16
*
4
+
mmsize
]
,
m8
%endif
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
sub
i
,
(
ARCH_X86_64
+
1
)
*
mmsize
jge
.
mainloop
%endif
RET
%endmacro
%if
ARCH_X86_32
INIT_XMM
sse
SYNTH_FILTER
%endif
INIT_XMM
sse2
SYNTH_FILTER
INIT_YMM
avx
SYNTH_FILTER
INIT_YMM
fma3
SYNTH_FILTER
libavcodec/x86/synth_filter_init.c
0 → 100644
View file @
209f50e1
/*
* Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/synth_filter.h"
#define SYNTH_FILTER_FUNC(opt) \
void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \
const float window[512], \
float out[32], intptr_t offset, float scale); \
static void synth_filter_##opt(FFTContext *imdct, \
float *synth_buf_ptr, int *synth_buf_offset, \
float synth_buf2[32], const float window[512], \
float out[32], const float in[32], float scale) \
{ \
float *synth_buf= synth_buf_ptr + *synth_buf_offset; \
\
imdct->imdct_half(imdct, synth_buf, in); \
\
ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window, \
out, *synth_buf_offset, scale); \
\
*synth_buf_offset = (*synth_buf_offset - 32) & 511; \
} \
#if HAVE_YASM
#if ARCH_X86_32
SYNTH_FILTER_FUNC
(
sse
)
#endif
SYNTH_FILTER_FUNC
(
sse2
)
SYNTH_FILTER_FUNC
(
avx
)
SYNTH_FILTER_FUNC
(
fma3
)
#endif
/* HAVE_YASM */
av_cold
void
ff_synth_filter_init_x86
(
SynthFilterContext
*
s
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
#if ARCH_X86_32
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_sse
;
}
#endif
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_sse2
;
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_avx
;
}
if
(
EXTERNAL_FMA3
(
cpu_flags
)
&&
!
(
cpu_flags
&
AV_CPU_FLAG_AVXSLOW
))
{
s
->
synth_filter_float
=
synth_filter_fma3
;
}
#endif
/* HAVE_YASM */
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment