Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
d74a8cb7
Commit
d74a8cb7
authored
Feb 21, 2015
by
Anton Khirnov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fmtconvert: drop unused functions
parent
ee964145
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
0 additions
and
760 deletions
+0
-760
Makefile
libavcodec/arm/Makefile
+0
-1
fmtconvert_init_arm.c
libavcodec/arm/fmtconvert_init_arm.c
+0
-14
fmtconvert_neon.S
libavcodec/arm/fmtconvert_neon.S
+0
-0
fmtconvert_vfp_armv6.S
libavcodec/arm/fmtconvert_vfp_armv6.S
+0
-78
fmtconvert.c
libavcodec/fmtconvert.c
+0
-49
fmtconvert.h
libavcodec/fmtconvert.h
+0
-48
fmtconvert_altivec.c
libavcodec/ppc/fmtconvert_altivec.c
+0
-111
fmtconvert.asm
libavcodec/x86/fmtconvert.asm
+0
-362
fmtconvert_init.c
libavcodec/x86/fmtconvert_init.c
+0
-97
No files found.
libavcodec/arm/Makefile
View file @
d74a8cb7
...
@@ -91,7 +91,6 @@ VFP-OBJS += arm/fmtconvert_vfp.o
...
@@ -91,7 +91,6 @@ VFP-OBJS += arm/fmtconvert_vfp.o
# subsystems
# subsystems
VFP-OBJS-$(CONFIG_FFT)
+=
arm/fft_vfp.o
VFP-OBJS-$(CONFIG_FFT)
+=
arm/fft_vfp.o
VFP-OBJS-$(CONFIG_MDCT)
+=
arm/mdct_vfp.o
VFP-OBJS-$(CONFIG_MDCT)
+=
arm/mdct_vfp.o
VFP-OBJS-$(HAVE_ARMV6)
+=
arm/fmtconvert_vfp_armv6.o
# decoders/encoders
# decoders/encoders
VFP-OBJS-$(CONFIG_DCA_DECODER)
+=
arm/dcadsp_vfp.o
\
VFP-OBJS-$(CONFIG_DCA_DECODER)
+=
arm/dcadsp_vfp.o
\
...
...
libavcodec/arm/fmtconvert_init_arm.c
View file @
d74a8cb7
...
@@ -34,11 +34,6 @@ void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst,
...
@@ -34,11 +34,6 @@ void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst,
const
int32_t
*
src
,
const
float
*
mul
,
const
int32_t
*
src
,
const
float
*
mul
,
int
len
);
int
len
);
void
ff_float_to_int16_neon
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_interleave_neon
(
int16_t
*
,
const
float
**
,
long
,
int
);
void
ff_float_to_int16_vfp
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
av_cold
void
ff_fmt_convert_init_arm
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
)
av_cold
void
ff_fmt_convert_init_arm
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
)
{
{
int
cpu_flags
=
av_get_cpu_flags
();
int
cpu_flags
=
av_get_cpu_flags
();
...
@@ -48,18 +43,9 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
...
@@ -48,18 +43,9 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_vfp
;
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_vfp
;
c
->
int32_to_float_fmul_array8
=
ff_int32_to_float_fmul_array8_vfp
;
c
->
int32_to_float_fmul_array8
=
ff_int32_to_float_fmul_array8_vfp
;
}
}
if
(
have_armv6
(
cpu_flags
))
{
c
->
float_to_int16
=
ff_float_to_int16_vfp
;
}
}
}
if
(
have_neon
(
cpu_flags
))
{
if
(
have_neon
(
cpu_flags
))
{
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_neon
;
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_neon
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
float_to_int16
=
ff_float_to_int16_neon
;
c
->
float_to_int16_interleave
=
ff_float_to_int16_interleave_neon
;
}
}
}
}
}
libavcodec/arm/fmtconvert_neon.S
View file @
d74a8cb7
This diff is collapsed.
Click to expand it.
libavcodec/arm/fmtconvert_vfp_armv6.S
deleted
100644 → 0
View file @
ee964145
/*
* Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/arm/asm.S"
/**
* ARM VFP optimized float to int16 conversion.
* Assume that len is a positive number and is multiple of 8, destination
* buffer is at least 4 bytes aligned (8 bytes alignment is better for
* performance), little-endian byte sex.
*/
@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
function ff_float_to_int16_vfp, export=1
push {r4-r8,lr}
vpush {d8-d11}
vldmia r1!, {s16-s23}
vcvt.s32.f32 s0, s16
vcvt.s32.f32 s1, s17
vcvt.s32.f32 s2, s18
vcvt.s32.f32 s3, s19
vcvt.s32.f32 s4, s20
vcvt.s32.f32 s5, s21
vcvt.s32.f32 s6, s22
vcvt.s32.f32 s7, s23
1:
subs r2, r2, #8
vmov r3, r4, s0, s1
vmov r5, r6, s2, s3
vmov r7, r8, s4, s5
vmov ip, lr, s6, s7
it gt
vldmiagt r1!, {s16-s23}
ssat r4, #16, r4
ssat r3, #16, r3
ssat r6, #16, r6
ssat r5, #16, r5
pkhbt r3, r3, r4, lsl #16
pkhbt r4, r5, r6, lsl #16
itttt gt
vcvtgt.s32.f32 s0, s16
vcvtgt.s32.f32 s1, s17
vcvtgt.s32.f32 s2, s18
vcvtgt.s32.f32 s3, s19
itttt gt
vcvtgt.s32.f32 s4, s20
vcvtgt.s32.f32 s5, s21
vcvtgt.s32.f32 s6, s22
vcvtgt.s32.f32 s7, s23
ssat r8, #16, r8
ssat r7, #16, r7
ssat lr, #16, lr
ssat ip, #16, ip
pkhbt r5, r7, r8, lsl #16
pkhbt r6, ip, lr, lsl #16
stmia r0!, {r3-r6}
bgt 1b
vpop {d8-d11}
pop {r4-r8,pc}
endfunc
libavcodec/fmtconvert.c
View file @
d74a8cb7
...
@@ -41,59 +41,10 @@ static void int32_to_float_fmul_array8_c(FmtConvertContext *c, float *dst,
...
@@ -41,59 +41,10 @@ static void int32_to_float_fmul_array8_c(FmtConvertContext *c, float *dst,
c
->
int32_to_float_fmul_scalar
(
&
dst
[
i
],
&
src
[
i
],
*
mul
++
,
8
);
c
->
int32_to_float_fmul_scalar
(
&
dst
[
i
],
&
src
[
i
],
*
mul
++
,
8
);
}
}
static
av_always_inline
int
float_to_int16_one
(
const
float
*
src
){
return
av_clip_int16
(
lrintf
(
*
src
));
}
static
void
float_to_int16_c
(
int16_t
*
dst
,
const
float
*
src
,
long
len
)
{
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
float_to_int16_one
(
src
+
i
);
}
static
void
float_to_int16_interleave_c
(
int16_t
*
dst
,
const
float
**
src
,
long
len
,
int
channels
)
{
int
i
,
j
,
c
;
if
(
channels
==
2
){
for
(
i
=
0
;
i
<
len
;
i
++
){
dst
[
2
*
i
]
=
float_to_int16_one
(
src
[
0
]
+
i
);
dst
[
2
*
i
+
1
]
=
float_to_int16_one
(
src
[
1
]
+
i
);
}
}
else
{
for
(
c
=
0
;
c
<
channels
;
c
++
)
for
(
i
=
0
,
j
=
c
;
i
<
len
;
i
++
,
j
+=
channels
)
dst
[
j
]
=
float_to_int16_one
(
src
[
c
]
+
i
);
}
}
void
ff_float_interleave_c
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
,
int
channels
)
{
int
j
,
c
;
unsigned
int
i
;
if
(
channels
==
2
)
{
for
(
i
=
0
;
i
<
len
;
i
++
)
{
dst
[
2
*
i
]
=
src
[
0
][
i
];
dst
[
2
*
i
+
1
]
=
src
[
1
][
i
];
}
}
else
if
(
channels
==
1
&&
len
<
INT_MAX
/
sizeof
(
float
))
{
memcpy
(
dst
,
src
[
0
],
len
*
sizeof
(
float
));
}
else
{
for
(
c
=
0
;
c
<
channels
;
c
++
)
for
(
i
=
0
,
j
=
c
;
i
<
len
;
i
++
,
j
+=
channels
)
dst
[
j
]
=
src
[
c
][
i
];
}
}
av_cold
void
ff_fmt_convert_init
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
)
av_cold
void
ff_fmt_convert_init
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
)
{
{
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_c
;
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_c
;
c
->
int32_to_float_fmul_array8
=
int32_to_float_fmul_array8_c
;
c
->
int32_to_float_fmul_array8
=
int32_to_float_fmul_array8_c
;
c
->
float_to_int16
=
float_to_int16_c
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_c
;
c
->
float_interleave
=
ff_float_interleave_c
;
if
(
ARCH_ARM
)
ff_fmt_convert_init_arm
(
c
,
avctx
);
if
(
ARCH_ARM
)
ff_fmt_convert_init_arm
(
c
,
avctx
);
if
(
ARCH_PPC
)
ff_fmt_convert_init_ppc
(
c
,
avctx
);
if
(
ARCH_PPC
)
ff_fmt_convert_init_ppc
(
c
,
avctx
);
...
...
libavcodec/fmtconvert.h
View file @
d74a8cb7
...
@@ -54,56 +54,8 @@ typedef struct FmtConvertContext {
...
@@ -54,56 +54,8 @@ typedef struct FmtConvertContext {
float
*
dst
,
const
int32_t
*
src
,
float
*
dst
,
const
int32_t
*
src
,
const
float
*
mul
,
int
len
);
const
float
*
mul
,
int
len
);
/**
* Convert an array of float to an array of int16_t.
*
* Convert floats from in the range [-32768.0,32767.0] to ints
* without rescaling
*
* @param dst destination array of int16_t.
* constraints: 16-byte aligned
* @param src source array of float.
* constraints: 16-byte aligned
* @param len number of elements to convert.
* constraints: multiple of 8
*/
void
(
*
float_to_int16
)(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
/**
* Convert multiple arrays of float to an interleaved array of int16_t.
*
* Convert floats from in the range [-32768.0,32767.0] to ints
* without rescaling
*
* @param dst destination array of interleaved int16_t.
* constraints: 16-byte aligned
* @param src source array of float arrays, one for each channel.
* constraints: 16-byte aligned
* @param len number of elements to convert.
* constraints: multiple of 8
* @param channels number of channels
*/
void
(
*
float_to_int16_interleave
)(
int16_t
*
dst
,
const
float
**
src
,
long
len
,
int
channels
);
/**
* Convert multiple arrays of float to an array of interleaved float.
*
* @param dst destination array of interleaved float.
* constraints: 16-byte aligned
* @param src source array of float arrays, one for each channel.
* constraints: 16-byte aligned
* @param len number of elements to convert.
* constraints: multiple of 8
* @param channels number of channels
*/
void
(
*
float_interleave
)(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
,
int
channels
);
}
FmtConvertContext
;
}
FmtConvertContext
;
void
ff_float_interleave_c
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
,
int
channels
);
void
ff_fmt_convert_init
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
);
void
ff_fmt_convert_init
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
);
void
ff_fmt_convert_init_arm
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
);
void
ff_fmt_convert_init_arm
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
);
...
...
libavcodec/ppc/fmtconvert_altivec.c
View file @
d74a8cb7
...
@@ -52,113 +52,6 @@ static void int32_to_float_fmul_scalar_altivec(float *dst, const int32_t *src,
...
@@ -52,113 +52,6 @@ static void int32_to_float_fmul_scalar_altivec(float *dst, const int32_t *src,
}
}
}
}
static
vector
signed
short
float_to_int16_one_altivec
(
const
float
*
src
)
{
vector
float
s0
=
vec_ld
(
0
,
src
);
vector
float
s1
=
vec_ld
(
16
,
src
);
vector
signed
int
t0
=
vec_cts
(
s0
,
0
);
vector
signed
int
t1
=
vec_cts
(
s1
,
0
);
return
vec_packs
(
t0
,
t1
);
}
static
void
float_to_int16_altivec
(
int16_t
*
dst
,
const
float
*
src
,
long
len
)
{
int
i
;
vector
signed
short
d0
,
d1
,
d
;
vector
unsigned
char
align
;
if
(((
long
)
dst
)
&
15
)
{
//FIXME
for
(
i
=
0
;
i
<
len
-
7
;
i
+=
8
)
{
d0
=
vec_ld
(
0
,
dst
+
i
);
d
=
float_to_int16_one_altivec
(
src
+
i
);
d1
=
vec_ld
(
15
,
dst
+
i
);
d1
=
vec_perm
(
d1
,
d0
,
vec_lvsl
(
0
,
dst
+
i
));
align
=
vec_lvsr
(
0
,
dst
+
i
);
d0
=
vec_perm
(
d1
,
d
,
align
);
d1
=
vec_perm
(
d
,
d1
,
align
);
vec_st
(
d0
,
0
,
dst
+
i
);
vec_st
(
d1
,
15
,
dst
+
i
);
}
}
else
{
for
(
i
=
0
;
i
<
len
-
7
;
i
+=
8
)
{
d
=
float_to_int16_one_altivec
(
src
+
i
);
vec_st
(
d
,
0
,
dst
+
i
);
}
}
}
#define VSTE_INC(dst, v, elem, inc) do { \
vector signed short s = vec_splat(v, elem); \
vec_ste(s, 0, dst); \
dst += inc; \
} while (0)
static
void
float_to_int16_stride_altivec
(
int16_t
*
dst
,
const
float
*
src
,
long
len
,
int
stride
)
{
int
i
;
vector
signed
short
d
;
for
(
i
=
0
;
i
<
len
-
7
;
i
+=
8
)
{
d
=
float_to_int16_one_altivec
(
src
+
i
);
VSTE_INC
(
dst
,
d
,
0
,
stride
);
VSTE_INC
(
dst
,
d
,
1
,
stride
);
VSTE_INC
(
dst
,
d
,
2
,
stride
);
VSTE_INC
(
dst
,
d
,
3
,
stride
);
VSTE_INC
(
dst
,
d
,
4
,
stride
);
VSTE_INC
(
dst
,
d
,
5
,
stride
);
VSTE_INC
(
dst
,
d
,
6
,
stride
);
VSTE_INC
(
dst
,
d
,
7
,
stride
);
}
}
static
void
float_to_int16_interleave_altivec
(
int16_t
*
dst
,
const
float
**
src
,
long
len
,
int
channels
)
{
int
i
;
vector
signed
short
d0
,
d1
,
d2
,
c0
,
c1
,
t0
,
t1
;
vector
unsigned
char
align
;
if
(
channels
==
1
)
float_to_int16_altivec
(
dst
,
src
[
0
],
len
);
else
{
if
(
channels
==
2
)
{
if
(((
long
)
dst
)
&
15
)
{
for
(
i
=
0
;
i
<
len
-
7
;
i
+=
8
)
{
d0
=
vec_ld
(
0
,
dst
+
i
);
t0
=
float_to_int16_one_altivec
(
src
[
0
]
+
i
);
d1
=
vec_ld
(
31
,
dst
+
i
);
t1
=
float_to_int16_one_altivec
(
src
[
1
]
+
i
);
c0
=
vec_mergeh
(
t0
,
t1
);
c1
=
vec_mergel
(
t0
,
t1
);
d2
=
vec_perm
(
d1
,
d0
,
vec_lvsl
(
0
,
dst
+
i
));
align
=
vec_lvsr
(
0
,
dst
+
i
);
d0
=
vec_perm
(
d2
,
c0
,
align
);
d1
=
vec_perm
(
c0
,
c1
,
align
);
vec_st
(
d0
,
0
,
dst
+
i
);
d0
=
vec_perm
(
c1
,
d2
,
align
);
vec_st
(
d1
,
15
,
dst
+
i
);
vec_st
(
d0
,
31
,
dst
+
i
);
dst
+=
8
;
}
}
else
{
for
(
i
=
0
;
i
<
len
-
7
;
i
+=
8
)
{
t0
=
float_to_int16_one_altivec
(
src
[
0
]
+
i
);
t1
=
float_to_int16_one_altivec
(
src
[
1
]
+
i
);
d0
=
vec_mergeh
(
t0
,
t1
);
d1
=
vec_mergel
(
t0
,
t1
);
vec_st
(
d0
,
0
,
dst
+
i
);
vec_st
(
d1
,
16
,
dst
+
i
);
dst
+=
8
;
}
}
}
else
{
for
(
i
=
0
;
i
<
channels
;
i
++
)
float_to_int16_stride_altivec
(
dst
+
i
,
src
[
i
],
len
,
channels
);
}
}
}
#endif
/* HAVE_ALTIVEC */
#endif
/* HAVE_ALTIVEC */
av_cold
void
ff_fmt_convert_init_ppc
(
FmtConvertContext
*
c
,
av_cold
void
ff_fmt_convert_init_ppc
(
FmtConvertContext
*
c
,
...
@@ -169,9 +62,5 @@ av_cold void ff_fmt_convert_init_ppc(FmtConvertContext *c,
...
@@ -169,9 +62,5 @@ av_cold void ff_fmt_convert_init_ppc(FmtConvertContext *c,
return
;
return
;
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_altivec
;
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_altivec
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
float_to_int16
=
float_to_int16_altivec
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_altivec
;
}
#endif
/* HAVE_ALTIVEC */
#endif
/* HAVE_ALTIVEC */
}
}
libavcodec/x86/fmtconvert.asm
View file @
d74a8cb7
...
@@ -23,14 +23,6 @@
...
@@ -23,14 +23,6 @@
SECTION_TEXT
SECTION_TEXT
%macro
CVTPS2PI
2
%if
cpuflag
(
sse
)
cvtps2pi
%1
,
%2
%elif
cpuflag
(
3
dnow
)
pf2id
%1
,
%2
%endif
%endmacro
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
; void ff_int32_to_float_fmul_scalar(float *dst, const int32_t *src, float mul,
; void ff_int32_to_float_fmul_scalar(float *dst, const int32_t *src, float mul,
; int len);
; int len);
...
@@ -76,357 +68,3 @@ INIT_XMM sse
...
@@ -76,357 +68,3 @@ INIT_XMM sse
INT32_TO_FLOAT_FMUL_SCALAR
5
INT32_TO_FLOAT_FMUL_SCALAR
5
INIT_XMM
sse2
INIT_XMM
sse2
INT32_TO_FLOAT_FMUL_SCALAR
3
INT32_TO_FLOAT_FMUL_SCALAR
3
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16
1
cglobal
float_to_int16
,
3
,
3
,
%1
,
dst
,
src
,
len
add
lenq
,
lenq
lea
srcq
,
[
srcq
+
2
*
lenq
]
add
dstq
,
lenq
neg
lenq
.
loop
:
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
packssdw
m0
,
m1
mova
[
dstq
+
lenq
]
,
m0
%else
CVTPS2PI
m0
,
[
srcq
+
2
*
lenq
]
CVTPS2PI
m1
,
[
srcq
+
2
*
lenq
+
8
]
CVTPS2PI
m2
,
[
srcq
+
2
*
lenq
+
16
]
CVTPS2PI
m3
,
[
srcq
+
2
*
lenq
+
24
]
packssdw
m0
,
m1
packssdw
m2
,
m3
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
8
]
,
m2
%endif
add
lenq
,
16
js
.
loop
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_XMM
sse2
FLOAT_TO_INT16
2
INIT_MMX
sse
FLOAT_TO_INT16
0
INIT_MMX
3
dnow
FLOAT_TO_INT16
0
;------------------------------------------------------------------------------
; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
;------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_STEP
1
cglobal
float_to_int16_step
,
4
,
7
,
%1
,
dst
,
src
,
len
,
step
,
step3
,
v1
,
v2
add
lenq
,
lenq
lea
srcq
,
[
srcq
+
2
*
lenq
]
lea
step3q
,
[
stepq
*
3
]
neg
lenq
.
loop
:
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
packssdw
m0
,
m1
movd
v1d
,
m0
psrldq
m0
,
4
movd
v2d
,
m0
psrldq
m0
,
4
mov
[dstq],
v1w
mov
[
dstq
+
stepq
*
4
]
,
v2w
shr
v1d
,
16
shr
v2d
,
16
mov
[
dstq
+
stepq
*
2
]
,
v1w
mov
[
dstq
+
step3q
*
2
]
,
v2w
lea
dstq
,
[
dstq
+
stepq
*
8
]
movd
v1d
,
m0
psrldq
m0
,
4
movd
v2d
,
m0
mov
[dstq],
v1w
mov
[
dstq
+
stepq
*
4
]
,
v2w
shr
v1d
,
16
shr
v2d
,
16
mov
[
dstq
+
stepq
*
2
]
,
v1w
mov
[
dstq
+
step3q
*
2
]
,
v2w
lea
dstq
,
[
dstq
+
stepq
*
8
]
%else
CVTPS2PI
m0
,
[
srcq
+
2
*
lenq
]
CVTPS2PI
m1
,
[
srcq
+
2
*
lenq
+
8
]
CVTPS2PI
m2
,
[
srcq
+
2
*
lenq
+
16
]
CVTPS2PI
m3
,
[
srcq
+
2
*
lenq
+
24
]
packssdw
m0
,
m1
packssdw
m2
,
m3
movd
v1d
,
m0
psrlq
m0
,
32
movd
v2d
,
m0
mov
[dstq],
v1w
mov
[
dstq
+
stepq
*
4
]
,
v2w
shr
v1d
,
16
shr
v2d
,
16
mov
[
dstq
+
stepq
*
2
]
,
v1w
mov
[
dstq
+
step3q
*
2
]
,
v2w
lea
dstq
,
[
dstq
+
stepq
*
8
]
movd
v1d
,
m2
psrlq
m2
,
32
movd
v2d
,
m2
mov
[dstq],
v1w
mov
[
dstq
+
stepq
*
4
]
,
v2w
shr
v1d
,
16
shr
v2d
,
16
mov
[
dstq
+
stepq
*
2
]
,
v1w
mov
[
dstq
+
step3q
*
2
]
,
v2w
lea
dstq
,
[
dstq
+
stepq
*
8
]
%endif
add
lenq
,
16
js
.
loop
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_XMM
sse2
FLOAT_TO_INT16_STEP
2
INIT_MMX
sse
FLOAT_TO_INT16_STEP
0
INIT_MMX
3
dnow
FLOAT_TO_INT16_STEP
0
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;-------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_INTERLEAVE2
0
cglobal
float_to_int16_interleave2
,
3
,
4
,
2
,
dst
,
src0
,
src1
,
len
lea
lenq
,
[
4
*
r2q
]
mov
src1q
,
[
src0q
+
gprsize
]
mov
src0q
,
[src0q]
add
dstq
,
lenq
add
src0q
,
lenq
add
src1q
,
lenq
neg
lenq
.
loop
:
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
src0q
+
lenq
]
cvtps2dq
m1
,
[
src1q
+
lenq
]
packssdw
m0
,
m1
movhlps
m1
,
m0
punpcklwd
m0
,
m1
mova
[
dstq
+
lenq
]
,
m0
%else
CVTPS2PI
m0
,
[
src0q
+
lenq
]
CVTPS2PI
m1
,
[
src0q
+
lenq
+
8
]
CVTPS2PI
m2
,
[
src1q
+
lenq
]
CVTPS2PI
m3
,
[
src1q
+
lenq
+
8
]
packssdw
m0
,
m1
packssdw
m2
,
m3
mova
m1
,
m0
punpcklwd
m0
,
m2
punpckhwd
m1
,
m2
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
8
]
,
m1
%endif
add
lenq
,
16
js
.
loop
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_MMX
3
dnow
FLOAT_TO_INT16_INTERLEAVE2
INIT_MMX
sse
FLOAT_TO_INT16_INTERLEAVE2
INIT_XMM
sse2
FLOAT_TO_INT16_INTERLEAVE2
;-----------------------------------------------------------------------------
; void ff_float_to_int16_interleave6(int16_t *dst, const float **src, int len)
;-----------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_INTERLEAVE6
0
cglobal
float_to_int16_interleave6
,
2
,
8
,
0
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
%if
ARCH_X86_64
mov
lend
,
r2d
%else
%
define
lend
dword
r2m
%endif
mov
src1q
,
[
srcq
+
1
*
gprsize
]
mov
src2q
,
[
srcq
+
2
*
gprsize
]
mov
src3q
,
[
srcq
+
3
*
gprsize
]
mov
src4q
,
[
srcq
+
4
*
gprsize
]
mov
src5q
,
[
srcq
+
5
*
gprsize
]
mov
srcq
,
[srcq]
sub
src1q
,
srcq
sub
src2q
,
srcq
sub
src3q
,
srcq
sub
src4q
,
srcq
sub
src5q
,
srcq
.
loop
:
CVTPS2PI
mm0
,
[srcq]
CVTPS2PI
mm1
,
[
srcq
+
src1q
]
CVTPS2PI
mm2
,
[
srcq
+
src2q
]
CVTPS2PI
mm3
,
[
srcq
+
src3q
]
CVTPS2PI
mm4
,
[
srcq
+
src4q
]
CVTPS2PI
mm5
,
[
srcq
+
src5q
]
packssdw
mm0
,
mm3
packssdw
mm1
,
mm4
packssdw
mm2
,
mm5
PSWAPD
mm3
,
mm0
punpcklwd
mm0
,
mm1
punpckhwd
mm1
,
mm2
punpcklwd
mm2
,
mm3
PSWAPD
mm3
,
mm0
punpckldq
mm0
,
mm2
punpckhdq
mm2
,
mm1
punpckldq
mm1
,
mm3
movq
[
dstq
]
,
mm0
movq
[
dstq
+
16
]
,
mm2
movq
[
dstq
+
8
]
,
mm1
add
srcq
,
8
add
dstq
,
24
sub
lend
,
2
jg
.
loop
emms
RET
%endmacro
; FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX
sse
FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX
3
dnow
FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX
3
dnowext
FLOAT_TO_INT16_INTERLEAVE6
;-----------------------------------------------------------------------------
; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
%macro
FLOAT_INTERLEAVE6
1
cglobal
float_interleave6
,
2
,
8
,
%1
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
%if
ARCH_X86_64
mov
lend
,
r2d
%else
%
define
lend
dword
r2m
%endif
mov
src1q
,
[
srcq
+
1
*
gprsize
]
mov
src2q
,
[
srcq
+
2
*
gprsize
]
mov
src3q
,
[
srcq
+
3
*
gprsize
]
mov
src4q
,
[
srcq
+
4
*
gprsize
]
mov
src5q
,
[
srcq
+
5
*
gprsize
]
mov
srcq
,
[srcq]
sub
src1q
,
srcq
sub
src2q
,
srcq
sub
src3q
,
srcq
sub
src4q
,
srcq
sub
src5q
,
srcq
.
loop
:
%if
cpuflag
(
sse
)
movaps
m0
,
[srcq]
movaps
m1
,
[
srcq
+
src1q
]
movaps
m2
,
[
srcq
+
src2q
]
movaps
m3
,
[
srcq
+
src3q
]
movaps
m4
,
[
srcq
+
src4q
]
movaps
m5
,
[
srcq
+
src5q
]
SBUTTERFLYPS
0
,
1
,
6
SBUTTERFLYPS
2
,
3
,
6
SBUTTERFLYPS
4
,
5
,
6
movaps
m6
,
m4
shufps
m4
,
m0
,
0xe4
movlhps
m0
,
m2
movhlps
m6
,
m2
movaps
[
dstq
]
,
m0
movaps
[
dstq
+
16
]
,
m4
movaps
[
dstq
+
32
]
,
m6
movaps
m6
,
m5
shufps
m5
,
m1
,
0xe4
movlhps
m1
,
m3
movhlps
m6
,
m3
movaps
[
dstq
+
48
]
,
m1
movaps
[
dstq
+
64
]
,
m5
movaps
[
dstq
+
80
]
,
m6
%else
; mmx
movq
m0
,
[srcq]
movq
m1
,
[
srcq
+
src1q
]
movq
m2
,
[
srcq
+
src2q
]
movq
m3
,
[
srcq
+
src3q
]
movq
m4
,
[
srcq
+
src4q
]
movq
m5
,
[
srcq
+
src5q
]
SBUTTERFLY
dq
,
0
,
1
,
6
SBUTTERFLY
dq
,
2
,
3
,
6
SBUTTERFLY
dq
,
4
,
5
,
6
movq
[
dstq
]
,
m0
movq
[
dstq
+
8
]
,
m2
movq
[
dstq
+
16
]
,
m4
movq
[
dstq
+
24
]
,
m1
movq
[
dstq
+
32
]
,
m3
movq
[
dstq
+
40
]
,
m5
%endif
add
srcq
,
mmsize
add
dstq
,
mmsize
*
6
sub
lend
,
mmsize
/
4
jg
.
loop
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_MMX
mmx
FLOAT_INTERLEAVE6
0
INIT_XMM
sse
FLOAT_INTERLEAVE6
7
;-----------------------------------------------------------------------------
; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
%macro
FLOAT_INTERLEAVE2
1
cglobal
float_interleave2
,
3
,
4
,
%1
,
dst
,
src
,
len
,
src1
mov
src1q
,
[
srcq
+
gprsize
]
mov
srcq
,
[
srcq
]
sub
src1q
,
srcq
.
loop
:
mova
m0
,
[
srcq
]
mova
m1
,
[
srcq
+
src1q
]
mova
m3
,
[
srcq
+
mmsize
]
mova
m4
,
[
srcq
+
src1q
+
mmsize
]
mova
m2
,
m0
PUNPCKLDQ
m0
,
m1
PUNPCKHDQ
m2
,
m1
mova
m1
,
m3
PUNPCKLDQ
m3
,
m4
PUNPCKHDQ
m1
,
m4
mova
[
dstq
]
,
m0
mova
[
dstq
+
1
*
mmsize
]
,
m2
mova
[
dstq
+
2
*
mmsize
]
,
m3
mova
[
dstq
+
3
*
mmsize
]
,
m1
add
srcq
,
mmsize
*
2
add
dstq
,
mmsize
*
4
sub
lend
,
mmsize
/
2
jg
.
loop
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_MMX
mmx
%define
PUNPCKLDQ
punpckldq
%define
PUNPCKHDQ
punpckhdq
FLOAT_INTERLEAVE2
0
INIT_XMM
sse
%define
PUNPCKLDQ
unpcklps
%define
PUNPCKHDQ
unpckhps
FLOAT_INTERLEAVE2
5
libavcodec/x86/fmtconvert_init.c
View file @
d74a8cb7
...
@@ -33,84 +33,6 @@
...
@@ -33,84 +33,6 @@
void
ff_int32_to_float_fmul_scalar_sse
(
float
*
dst
,
const
int32_t
*
src
,
float
mul
,
int
len
);
void
ff_int32_to_float_fmul_scalar_sse
(
float
*
dst
,
const
int32_t
*
src
,
float
mul
,
int
len
);
void
ff_int32_to_float_fmul_scalar_sse2
(
float
*
dst
,
const
int32_t
*
src
,
float
mul
,
int
len
);
void
ff_int32_to_float_fmul_scalar_sse2
(
float
*
dst
,
const
int32_t
*
src
,
float
mul
,
int
len
);
void
ff_float_to_int16_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_step_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
,
long
step
);
void
ff_float_to_int16_step_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
,
long
step
);
void
ff_float_to_int16_step_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
,
long
step
);
void
ff_float_to_int16_interleave2_3dnow
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave2_sse
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave2_sse2
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave6_sse
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dnow
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dnowext
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/
\
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
int c;\
for(c=0; c<channels; c++){\
ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\
}\
}\
\
static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
if(channels==1)\
ff_float_to_int16_##cpu(dst, src[0], len);\
else if(channels==2){\
ff_float_to_int16_interleave2_##cpu(dst, src, len);\
}else if(channels==6){\
ff_float_to_int16_interleave6_##cpu(dst, src, len);\
}else\
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
}
FLOAT_TO_INT16_INTERLEAVE
(
3
dnow
)
FLOAT_TO_INT16_INTERLEAVE
(
sse
)
FLOAT_TO_INT16_INTERLEAVE
(
sse2
)
static
void
float_to_int16_interleave_3dnowext
(
int16_t
*
dst
,
const
float
**
src
,
long
len
,
int
channels
)
{
if
(
channels
==
6
)
ff_float_to_int16_interleave6_3dnowext
(
dst
,
src
,
len
);
else
float_to_int16_interleave_3dnow
(
dst
,
src
,
len
,
channels
);
}
void
ff_float_interleave2_mmx
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
void
ff_float_interleave2_sse
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
void
ff_float_interleave6_mmx
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
void
ff_float_interleave6_sse
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
static
void
float_interleave_mmx
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
,
int
channels
)
{
if
(
channels
==
2
)
{
ff_float_interleave2_mmx
(
dst
,
src
,
len
);
}
else
if
(
channels
==
6
)
ff_float_interleave6_mmx
(
dst
,
src
,
len
);
else
ff_float_interleave_c
(
dst
,
src
,
len
,
channels
);
}
static
void
float_interleave_sse
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
,
int
channels
)
{
if
(
channels
==
2
)
{
ff_float_interleave2_sse
(
dst
,
src
,
len
);
}
else
if
(
channels
==
6
)
ff_float_interleave6_sse
(
dst
,
src
,
len
);
else
ff_float_interleave_c
(
dst
,
src
,
len
,
channels
);
}
#endif
/* HAVE_YASM */
#endif
/* HAVE_YASM */
av_cold
void
ff_fmt_convert_init_x86
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
)
av_cold
void
ff_fmt_convert_init_x86
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
)
...
@@ -118,30 +40,11 @@ av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx
...
@@ -118,30 +40,11 @@ av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx
#if HAVE_YASM
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
c
->
float_interleave
=
float_interleave_mmx
;
}
if
(
EXTERNAL_AMD3DNOW
(
cpu_flags
))
{
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
float_to_int16
=
ff_float_to_int16_3dnow
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dnow
;
}
}
if
(
EXTERNAL_AMD3DNOWEXT
(
cpu_flags
))
{
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dnowext
;
}
}
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_sse
;
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_sse
;
c
->
float_to_int16
=
ff_float_to_int16_sse
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse
;
c
->
float_interleave
=
float_interleave_sse
;
}
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_sse2
;
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_sse2
;
c
->
float_to_int16
=
ff_float_to_int16_sse2
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse2
;
}
}
#endif
/* HAVE_YASM */
#endif
/* HAVE_YASM */
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment