Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
97cb9236
Commit
97cb9236
authored
Sep 11, 2012
by
Mans Rullgard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ac3: move ac3_downmix() from dsputil to ac3dsp
Signed-off-by:
Mans Rullgard
<
mans@mansr.com
>
parent
b901c30d
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
154 additions
and
153 deletions
+154
-153
ac3dec.c
libavcodec/ac3dec.c
+6
-34
ac3dec.h
libavcodec/ac3dec.h
+0
-3
ac3dsp.c
libavcodec/ac3dsp.c
+26
-0
ac3dsp.h
libavcodec/ac3dsp.h
+3
-0
dsputil.c
libavcodec/dsputil.c
+0
-4
dsputil.h
libavcodec/dsputil.h
+0
-1
ac3dsp_init.c
libavcodec/x86/ac3dsp_init.c
+119
-0
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+0
-111
No files found.
libavcodec/ac3dec.c
View file @
97cb9236
...
@@ -620,34 +620,6 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
...
@@ -620,34 +620,6 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
}
}
}
}
/**
* Downmix the output to mono or stereo.
*/
void
ff_ac3_downmix_c
(
float
(
*
samples
)[
256
],
float
(
*
matrix
)[
2
],
int
out_ch
,
int
in_ch
,
int
len
)
{
int
i
,
j
;
float
v0
,
v1
;
if
(
out_ch
==
2
)
{
for
(
i
=
0
;
i
<
len
;
i
++
)
{
v0
=
v1
=
0
.
0
f
;
for
(
j
=
0
;
j
<
in_ch
;
j
++
)
{
v0
+=
samples
[
j
][
i
]
*
matrix
[
j
][
0
];
v1
+=
samples
[
j
][
i
]
*
matrix
[
j
][
1
];
}
samples
[
0
][
i
]
=
v0
;
samples
[
1
][
i
]
=
v1
;
}
}
else
if
(
out_ch
==
1
)
{
for
(
i
=
0
;
i
<
len
;
i
++
)
{
v0
=
0
.
0
f
;
for
(
j
=
0
;
j
<
in_ch
;
j
++
)
v0
+=
samples
[
j
][
i
]
*
matrix
[
j
][
0
];
samples
[
0
][
i
]
=
v0
;
}
}
}
/**
/**
* Upmix delay samples from stereo to original channel layout.
* Upmix delay samples from stereo to original channel layout.
*/
*/
...
@@ -1266,19 +1238,19 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
...
@@ -1266,19 +1238,19 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
do_imdct
(
s
,
s
->
channels
);
do_imdct
(
s
,
s
->
channels
);
if
(
downmix_output
)
{
if
(
downmix_output
)
{
s
->
dsp
.
ac3_
downmix
(
s
->
output
,
s
->
downmix_coeffs
,
s
->
ac3dsp
.
downmix
(
s
->
output
,
s
->
downmix_coeffs
,
s
->
out_channels
,
s
->
fbw_channels
,
256
);
s
->
out_channels
,
s
->
fbw_channels
,
256
);
}
}
}
else
{
}
else
{
if
(
downmix_output
)
{
if
(
downmix_output
)
{
s
->
dsp
.
ac3_
downmix
(
s
->
transform_coeffs
+
1
,
s
->
downmix_coeffs
,
s
->
ac3dsp
.
downmix
(
s
->
transform_coeffs
+
1
,
s
->
downmix_coeffs
,
s
->
out_channels
,
s
->
fbw_channels
,
256
);
s
->
out_channels
,
s
->
fbw_channels
,
256
);
}
}
if
(
downmix_output
&&
!
s
->
downmixed
)
{
if
(
downmix_output
&&
!
s
->
downmixed
)
{
s
->
downmixed
=
1
;
s
->
downmixed
=
1
;
s
->
dsp
.
ac3_
downmix
(
s
->
delay
,
s
->
downmix_coeffs
,
s
->
out_channels
,
s
->
ac3dsp
.
downmix
(
s
->
delay
,
s
->
downmix_coeffs
,
s
->
out_channels
,
s
->
fbw_channels
,
128
);
s
->
fbw_channels
,
128
);
}
}
do_imdct
(
s
,
s
->
out_channels
);
do_imdct
(
s
,
s
->
out_channels
);
...
...
libavcodec/ac3dec.h
View file @
97cb9236
...
@@ -221,9 +221,6 @@ int ff_eac3_parse_header(AC3DecodeContext *s);
...
@@ -221,9 +221,6 @@ int ff_eac3_parse_header(AC3DecodeContext *s);
*/
*/
void
ff_eac3_decode_transform_coeffs_aht_ch
(
AC3DecodeContext
*
s
,
int
ch
);
void
ff_eac3_decode_transform_coeffs_aht_ch
(
AC3DecodeContext
*
s
,
int
ch
);
void
ff_ac3_downmix_c
(
float
(
*
samples
)[
256
],
float
(
*
matrix
)[
2
],
int
out_ch
,
int
in_ch
,
int
len
);
/**
/**
* Apply spectral extension to each channel by copying lower frequency
* Apply spectral extension to each channel by copying lower frequency
* coefficients to higher frequency bins and applying side information to
* coefficients to higher frequency bins and applying side information to
...
...
libavcodec/ac3dsp.c
View file @
97cb9236
...
@@ -171,6 +171,31 @@ static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs)
...
@@ -171,6 +171,31 @@ static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs)
}
}
}
}
static
void
ac3_downmix_c
(
float
(
*
samples
)[
256
],
float
(
*
matrix
)[
2
],
int
out_ch
,
int
in_ch
,
int
len
)
{
int
i
,
j
;
float
v0
,
v1
;
if
(
out_ch
==
2
)
{
for
(
i
=
0
;
i
<
len
;
i
++
)
{
v0
=
v1
=
0
.
0
f
;
for
(
j
=
0
;
j
<
in_ch
;
j
++
)
{
v0
+=
samples
[
j
][
i
]
*
matrix
[
j
][
0
];
v1
+=
samples
[
j
][
i
]
*
matrix
[
j
][
1
];
}
samples
[
0
][
i
]
=
v0
;
samples
[
1
][
i
]
=
v1
;
}
}
else
if
(
out_ch
==
1
)
{
for
(
i
=
0
;
i
<
len
;
i
++
)
{
v0
=
0
.
0
f
;
for
(
j
=
0
;
j
<
in_ch
;
j
++
)
v0
+=
samples
[
j
][
i
]
*
matrix
[
j
][
0
];
samples
[
0
][
i
]
=
v0
;
}
}
}
av_cold
void
ff_ac3dsp_init
(
AC3DSPContext
*
c
,
int
bit_exact
)
av_cold
void
ff_ac3dsp_init
(
AC3DSPContext
*
c
,
int
bit_exact
)
{
{
c
->
ac3_exponent_min
=
ac3_exponent_min_c
;
c
->
ac3_exponent_min
=
ac3_exponent_min_c
;
...
@@ -182,6 +207,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
...
@@ -182,6 +207,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
c
->
update_bap_counts
=
ac3_update_bap_counts_c
;
c
->
update_bap_counts
=
ac3_update_bap_counts_c
;
c
->
compute_mantissa_size
=
ac3_compute_mantissa_size_c
;
c
->
compute_mantissa_size
=
ac3_compute_mantissa_size_c
;
c
->
extract_exponents
=
ac3_extract_exponents_c
;
c
->
extract_exponents
=
ac3_extract_exponents_c
;
c
->
downmix
=
ac3_downmix_c
;
if
(
ARCH_ARM
)
if
(
ARCH_ARM
)
ff_ac3dsp_init_arm
(
c
,
bit_exact
);
ff_ac3dsp_init_arm
(
c
,
bit_exact
);
...
...
libavcodec/ac3dsp.h
View file @
97cb9236
...
@@ -125,6 +125,9 @@ typedef struct AC3DSPContext {
...
@@ -125,6 +125,9 @@ typedef struct AC3DSPContext {
int
(
*
compute_mantissa_size
)(
uint16_t
mant_cnt
[
6
][
16
]);
int
(
*
compute_mantissa_size
)(
uint16_t
mant_cnt
[
6
][
16
]);
void
(
*
extract_exponents
)(
uint8_t
*
exp
,
int32_t
*
coef
,
int
nb_coefs
);
void
(
*
extract_exponents
)(
uint8_t
*
exp
,
int32_t
*
coef
,
int
nb_coefs
);
void
(
*
downmix
)(
float
(
*
samples
)[
256
],
float
(
*
matrix
)[
2
],
int
out_ch
,
int
in_ch
,
int
len
);
}
AC3DSPContext
;
}
AC3DSPContext
;
void
ff_ac3dsp_init
(
AC3DSPContext
*
c
,
int
bit_exact
);
void
ff_ac3dsp_init
(
AC3DSPContext
*
c
,
int
bit_exact
);
...
...
libavcodec/dsputil.c
View file @
97cb9236
...
@@ -36,7 +36,6 @@
...
@@ -36,7 +36,6 @@
#include "mathops.h"
#include "mathops.h"
#include "mpegvideo.h"
#include "mpegvideo.h"
#include "config.h"
#include "config.h"
#include "ac3dec.h"
#include "vorbis.h"
#include "vorbis.h"
uint8_t
ff_cropTbl
[
256
+
2
*
MAX_NEG_CROP
]
=
{
0
,
};
uint8_t
ff_cropTbl
[
256
+
2
*
MAX_NEG_CROP
]
=
{
0
,
};
...
@@ -2862,9 +2861,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
...
@@ -2862,9 +2861,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
#if CONFIG_VORBIS_DECODER
#if CONFIG_VORBIS_DECODER
c
->
vorbis_inverse_coupling
=
ff_vorbis_inverse_coupling
;
c
->
vorbis_inverse_coupling
=
ff_vorbis_inverse_coupling
;
#endif
#if CONFIG_AC3_DECODER
c
->
ac3_downmix
=
ff_ac3_downmix_c
;
#endif
#endif
c
->
vector_fmul_reverse
=
vector_fmul_reverse_c
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_c
;
c
->
vector_fmul_add
=
vector_fmul_add_c
;
c
->
vector_fmul_add
=
vector_fmul_add_c
;
...
...
libavcodec/dsputil.h
View file @
97cb9236
...
@@ -374,7 +374,6 @@ typedef struct DSPContext {
...
@@ -374,7 +374,6 @@ typedef struct DSPContext {
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void
(
*
vorbis_inverse_coupling
)(
float
*
mag
,
float
*
ang
,
int
blocksize
);
void
(
*
vorbis_inverse_coupling
)(
float
*
mag
,
float
*
ang
,
int
blocksize
);
void
(
*
ac3_downmix
)(
float
(
*
samples
)[
256
],
float
(
*
matrix
)[
2
],
int
out_ch
,
int
in_ch
,
int
len
);
/* assume len is a multiple of 16, and arrays are 32-byte aligned */
/* assume len is a multiple of 16, and arrays are 32-byte aligned */
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
...
...
libavcodec/x86/ac3dsp_init.c
View file @
97cb9236
...
@@ -19,9 +19,11 @@
...
@@ -19,9 +19,11 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
*/
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavutil/x86/cpu.h"
#include "dsputil_mmx.h"
#include "dsputil_mmx.h"
#include "libavcodec/ac3.h"
#include "libavcodec/ac3dsp.h"
#include "libavcodec/ac3dsp.h"
extern
void
ff_ac3_exponent_min_mmx
(
uint8_t
*
exp
,
int
num_reuse_blocks
,
int
nb_coefs
);
extern
void
ff_ac3_exponent_min_mmx
(
uint8_t
*
exp
,
int
num_reuse_blocks
,
int
nb_coefs
);
...
@@ -49,6 +51,119 @@ extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_c
...
@@ -49,6 +51,119 @@ extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_c
extern
void
ff_ac3_extract_exponents_sse2
(
uint8_t
*
exp
,
int32_t
*
coef
,
int
nb_coefs
);
extern
void
ff_ac3_extract_exponents_sse2
(
uint8_t
*
exp
,
int32_t
*
coef
,
int
nb_coefs
);
extern
void
ff_ac3_extract_exponents_ssse3
(
uint8_t
*
exp
,
int32_t
*
coef
,
int
nb_coefs
);
extern
void
ff_ac3_extract_exponents_ssse3
(
uint8_t
*
exp
,
int32_t
*
coef
,
int
nb_coefs
);
#if HAVE_SSE_INLINE
#define IF1(x) x
#define IF0(x)
#define MIX5(mono, stereo) \
__asm__ volatile ( \
"movss 0(%2), %%xmm5 \n" \
"movss 8(%2), %%xmm6 \n" \
"movss 24(%2), %%xmm7 \n" \
"shufps $0, %%xmm5, %%xmm5 \n" \
"shufps $0, %%xmm6, %%xmm6 \n" \
"shufps $0, %%xmm7, %%xmm7 \n" \
"1: \n" \
"movaps (%0, %1), %%xmm0 \n" \
"movaps 0x400(%0, %1), %%xmm1 \n" \
"movaps 0x800(%0, %1), %%xmm2 \n" \
"movaps 0xc00(%0, %1), %%xmm3 \n" \
"movaps 0x1000(%0, %1), %%xmm4 \n" \
"mulps %%xmm5, %%xmm0 \n" \
"mulps %%xmm6, %%xmm1 \n" \
"mulps %%xmm5, %%xmm2 \n" \
"mulps %%xmm7, %%xmm3 \n" \
"mulps %%xmm7, %%xmm4 \n" \
stereo("addps %%xmm1, %%xmm0 \n") \
"addps %%xmm1, %%xmm2 \n" \
"addps %%xmm3, %%xmm0 \n" \
"addps %%xmm4, %%xmm2 \n" \
mono("addps %%xmm2, %%xmm0 \n") \
"movaps %%xmm0, (%0, %1) \n" \
stereo("movaps %%xmm2, 0x400(%0, %1) \n") \
"add $16, %0 \n" \
"jl 1b \n" \
: "+&r"(i) \
: "r"(samples[0] + len), "r"(matrix) \
: XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", \
"%xmm4", "%xmm5", "%xmm6", "%xmm7",) \
"memory" \
);
#define MIX_MISC(stereo) \
__asm__ volatile ( \
"1: \n" \
"movaps (%3, %0), %%xmm0 \n" \
stereo("movaps %%xmm0, %%xmm1 \n") \
"mulps %%xmm4, %%xmm0 \n" \
stereo("mulps %%xmm5, %%xmm1 \n") \
"lea 1024(%3, %0), %1 \n" \
"mov %5, %2 \n" \
"2: \n" \
"movaps (%1), %%xmm2 \n" \
stereo("movaps %%xmm2, %%xmm3 \n") \
"mulps (%4, %2), %%xmm2 \n" \
stereo("mulps 16(%4, %2), %%xmm3 \n") \
"addps %%xmm2, %%xmm0 \n" \
stereo("addps %%xmm3, %%xmm1 \n") \
"add $1024, %1 \n" \
"add $32, %2 \n" \
"jl 2b \n" \
"movaps %%xmm0, (%3, %0) \n" \
stereo("movaps %%xmm1, 1024(%3, %0) \n") \
"add $16, %0 \n" \
"jl 1b \n" \
: "+&r"(i), "=&r"(j), "=&r"(k) \
: "r"(samples[0] + len), "r"(matrix_simd + in_ch), \
"g"((intptr_t) - 32 * (in_ch - 1)) \
: "memory" \
);
static
void
ac3_downmix_sse
(
float
(
*
samples
)[
256
],
float
(
*
matrix
)[
2
],
int
out_ch
,
int
in_ch
,
int
len
)
{
int
(
*
matrix_cmp
)[
2
]
=
(
int
(
*
)[
2
])
matrix
;
intptr_t
i
,
j
,
k
;
i
=
-
len
*
sizeof
(
float
);
if
(
in_ch
==
5
&&
out_ch
==
2
&&
!
(
matrix_cmp
[
0
][
1
]
|
matrix_cmp
[
2
][
0
]
|
matrix_cmp
[
3
][
1
]
|
matrix_cmp
[
4
][
0
]
|
(
matrix_cmp
[
1
][
0
]
^
matrix_cmp
[
1
][
1
])
|
(
matrix_cmp
[
0
][
0
]
^
matrix_cmp
[
2
][
1
])))
{
MIX5
(
IF0
,
IF1
);
}
else
if
(
in_ch
==
5
&&
out_ch
==
1
&&
matrix_cmp
[
0
][
0
]
==
matrix_cmp
[
2
][
0
]
&&
matrix_cmp
[
3
][
0
]
==
matrix_cmp
[
4
][
0
])
{
MIX5
(
IF1
,
IF0
);
}
else
{
DECLARE_ALIGNED
(
16
,
float
,
matrix_simd
)[
AC3_MAX_CHANNELS
][
2
][
4
];
j
=
2
*
in_ch
*
sizeof
(
float
);
__asm__
volatile
(
"1:
\n
"
"sub $8, %0
\n
"
"movss (%2, %0), %%xmm4
\n
"
"movss 4(%2, %0), %%xmm5
\n
"
"shufps $0, %%xmm4, %%xmm4
\n
"
"shufps $0, %%xmm5, %%xmm5
\n
"
"movaps %%xmm4, (%1, %0, 4)
\n
"
"movaps %%xmm5, 16(%1, %0, 4)
\n
"
"jg 1b
\n
"
:
"+&r"
(
j
)
:
"r"
(
matrix_simd
),
"r"
(
matrix
)
:
"memory"
);
if
(
out_ch
==
2
)
{
MIX_MISC
(
IF1
);
}
else
{
MIX_MISC
(
IF0
);
}
}
}
#endif
/* HAVE_SSE_INLINE */
av_cold
void
ff_ac3dsp_init_x86
(
AC3DSPContext
*
c
,
int
bit_exact
)
av_cold
void
ff_ac3dsp_init_x86
(
AC3DSPContext
*
c
,
int
bit_exact
)
{
{
int
mm_flags
=
av_get_cpu_flags
();
int
mm_flags
=
av_get_cpu_flags
();
...
@@ -89,4 +204,8 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
...
@@ -89,4 +204,8 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
c
->
extract_exponents
=
ff_ac3_extract_exponents_ssse3
;
c
->
extract_exponents
=
ff_ac3_extract_exponents_ssse3
;
}
}
}
}
if
(
INLINE_SSE
(
mm_flags
))
{
c
->
downmix
=
ac3_downmix_sse
;
}
}
}
libavcodec/x86/dsputil_mmx.c
View file @
97cb9236
...
@@ -28,7 +28,6 @@
...
@@ -28,7 +28,6 @@
#include "libavcodec/h264dsp.h"
#include "libavcodec/h264dsp.h"
#include "libavcodec/mpegvideo.h"
#include "libavcodec/mpegvideo.h"
#include "libavcodec/simple_idct.h"
#include "libavcodec/simple_idct.h"
#include "libavcodec/ac3dec.h"
#include "dsputil_mmx.h"
#include "dsputil_mmx.h"
#include "idct_xvid.h"
#include "idct_xvid.h"
...
@@ -2248,115 +2247,6 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
...
@@ -2248,115 +2247,6 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
}
}
}
}
#define IF1(x) x
#define IF0(x)
#define MIX5(mono, stereo) \
__asm__ volatile ( \
"movss 0(%2), %%xmm5 \n" \
"movss 8(%2), %%xmm6 \n" \
"movss 24(%2), %%xmm7 \n" \
"shufps $0, %%xmm5, %%xmm5 \n" \
"shufps $0, %%xmm6, %%xmm6 \n" \
"shufps $0, %%xmm7, %%xmm7 \n" \
"1: \n" \
"movaps (%0, %1), %%xmm0 \n" \
"movaps 0x400(%0, %1), %%xmm1 \n" \
"movaps 0x800(%0, %1), %%xmm2 \n" \
"movaps 0xc00(%0, %1), %%xmm3 \n" \
"movaps 0x1000(%0, %1), %%xmm4 \n" \
"mulps %%xmm5, %%xmm0 \n" \
"mulps %%xmm6, %%xmm1 \n" \
"mulps %%xmm5, %%xmm2 \n" \
"mulps %%xmm7, %%xmm3 \n" \
"mulps %%xmm7, %%xmm4 \n" \
stereo("addps %%xmm1, %%xmm0 \n") \
"addps %%xmm1, %%xmm2 \n" \
"addps %%xmm3, %%xmm0 \n" \
"addps %%xmm4, %%xmm2 \n" \
mono("addps %%xmm2, %%xmm0 \n") \
"movaps %%xmm0, (%0, %1) \n" \
stereo("movaps %%xmm2, 0x400(%0, %1) \n") \
"add $16, %0 \n" \
"jl 1b \n" \
: "+&r"(i) \
: "r"(samples[0] + len), "r"(matrix) \
: XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", \
"%xmm4", "%xmm5", "%xmm6", "%xmm7",) \
"memory" \
);
#define MIX_MISC(stereo) \
__asm__ volatile ( \
"1: \n" \
"movaps (%3, %0), %%xmm0 \n" \
stereo("movaps %%xmm0, %%xmm1 \n") \
"mulps %%xmm4, %%xmm0 \n" \
stereo("mulps %%xmm5, %%xmm1 \n") \
"lea 1024(%3, %0), %1 \n" \
"mov %5, %2 \n" \
"2: \n" \
"movaps (%1), %%xmm2 \n" \
stereo("movaps %%xmm2, %%xmm3 \n") \
"mulps (%4, %2), %%xmm2 \n" \
stereo("mulps 16(%4, %2), %%xmm3 \n") \
"addps %%xmm2, %%xmm0 \n" \
stereo("addps %%xmm3, %%xmm1 \n") \
"add $1024, %1 \n" \
"add $32, %2 \n" \
"jl 2b \n" \
"movaps %%xmm0, (%3, %0) \n" \
stereo("movaps %%xmm1, 1024(%3, %0) \n") \
"add $16, %0 \n" \
"jl 1b \n" \
: "+&r"(i), "=&r"(j), "=&r"(k) \
: "r"(samples[0] + len), "r"(matrix_simd + in_ch), \
"g"((intptr_t) - 32 * (in_ch - 1)) \
: "memory" \
);
static
void
ac3_downmix_sse
(
float
(
*
samples
)[
256
],
float
(
*
matrix
)[
2
],
int
out_ch
,
int
in_ch
,
int
len
)
{
int
(
*
matrix_cmp
)[
2
]
=
(
int
(
*
)[
2
])
matrix
;
intptr_t
i
,
j
,
k
;
i
=
-
len
*
sizeof
(
float
);
if
(
in_ch
==
5
&&
out_ch
==
2
&&
!
(
matrix_cmp
[
0
][
1
]
|
matrix_cmp
[
2
][
0
]
|
matrix_cmp
[
3
][
1
]
|
matrix_cmp
[
4
][
0
]
|
(
matrix_cmp
[
1
][
0
]
^
matrix_cmp
[
1
][
1
])
|
(
matrix_cmp
[
0
][
0
]
^
matrix_cmp
[
2
][
1
])))
{
MIX5
(
IF0
,
IF1
);
}
else
if
(
in_ch
==
5
&&
out_ch
==
1
&&
matrix_cmp
[
0
][
0
]
==
matrix_cmp
[
2
][
0
]
&&
matrix_cmp
[
3
][
0
]
==
matrix_cmp
[
4
][
0
])
{
MIX5
(
IF1
,
IF0
);
}
else
{
DECLARE_ALIGNED
(
16
,
float
,
matrix_simd
)[
AC3_MAX_CHANNELS
][
2
][
4
];
j
=
2
*
in_ch
*
sizeof
(
float
);
__asm__
volatile
(
"1:
\n
"
"sub $8, %0
\n
"
"movss (%2, %0), %%xmm4
\n
"
"movss 4(%2, %0), %%xmm5
\n
"
"shufps $0, %%xmm4, %%xmm4
\n
"
"shufps $0, %%xmm5, %%xmm5
\n
"
"movaps %%xmm4, (%1, %0, 4)
\n
"
"movaps %%xmm5, 16(%1, %0, 4)
\n
"
"jg 1b
\n
"
:
"+&r"
(
j
)
:
"r"
(
matrix_simd
),
"r"
(
matrix
)
:
"memory"
);
if
(
out_ch
==
2
)
{
MIX_MISC
(
IF1
);
}
else
{
MIX_MISC
(
IF0
);
}
}
}
#if HAVE_6REGS
#if HAVE_6REGS
static
void
vector_fmul_window_3dnowext
(
float
*
dst
,
const
float
*
src0
,
static
void
vector_fmul_window_3dnowext
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
const
float
*
src1
,
const
float
*
win
,
...
@@ -2849,7 +2739,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
...
@@ -2849,7 +2739,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
}
}
c
->
vorbis_inverse_coupling
=
vorbis_inverse_coupling_sse
;
c
->
vorbis_inverse_coupling
=
vorbis_inverse_coupling_sse
;
c
->
ac3_downmix
=
ac3_downmix_sse
;
#if HAVE_6REGS
#if HAVE_6REGS
c
->
vector_fmul_window
=
vector_fmul_window_sse
;
c
->
vector_fmul_window
=
vector_fmul_window_sse
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment