Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
708ab7dd
Commit
708ab7dd
authored
Oct 09, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fmtconvert: port float_to_int16() x86 inline asm to yasm
parent
45add995
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
55 additions
and
76 deletions
+55
-76
fmtconvert.asm
libavcodec/x86/fmtconvert.asm
+42
-0
fmtconvert_mmx.c
libavcodec/x86/fmtconvert_mmx.c
+13
-76
No files found.
libavcodec/x86/fmtconvert.asm
View file @
708ab7dd
...
...
@@ -24,6 +24,48 @@
SECTION_TEXT
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16
2
cglobal
float_to_int16_
%1
,
3
,
3
,
%2
,
dst
,
src
,
len
add
lenq
,
lenq
lea
srcq
,
[
srcq
+
2
*
lenq
]
add
dstq
,
lenq
neg
lenq
.
loop
:
%ifidn
%1
,
sse2
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
packssdw
m0
,
m1
mova
[
dstq
+
lenq
]
,
m0
%else
cvtps2pi
m0
,
[
srcq
+
2
*
lenq
]
cvtps2pi
m1
,
[
srcq
+
2
*
lenq
+
8
]
cvtps2pi
m2
,
[
srcq
+
2
*
lenq
+
16
]
cvtps2pi
m3
,
[
srcq
+
2
*
lenq
+
24
]
packssdw
m0
,
m1
packssdw
m2
,
m3
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
8
]
,
m2
%endif
add
lenq
,
16
js
.
loop
%ifnidn
%1
,
sse2
emms
%endif
REP_RET
%endmacro
INIT_XMM
FLOAT_TO_INT16
sse2
,
2
INIT_MMX
FLOAT_TO_INT16
sse
,
0
%define
cvtps2pi
pf2id
FLOAT_TO_INT16
3
dnow
,
0
%undef
cvtps2pi
%macro
PSWAPD_SSE
2
pshufw
%1
,
%2
,
0x4e
%endmacro
...
...
libavcodec/x86/fmtconvert_mmx.c
View file @
708ab7dd
...
...
@@ -70,80 +70,16 @@ static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mu
);
}
static
void
float_to_int16_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
){
x86_reg
reglen
=
len
;
// not bit-exact: pf2id uses different rounding than C and SSE
__asm__
volatile
(
"add %0 , %0
\n\t
"
"lea (%2,%0,2) , %2
\n\t
"
"add %0 , %1
\n\t
"
"neg %0
\n\t
"
"1:
\n\t
"
"pf2id (%2,%0,2) , %%mm0
\n\t
"
"pf2id 8(%2,%0,2) , %%mm1
\n\t
"
"pf2id 16(%2,%0,2) , %%mm2
\n\t
"
"pf2id 24(%2,%0,2) , %%mm3
\n\t
"
"packssdw %%mm1 , %%mm0
\n\t
"
"packssdw %%mm3 , %%mm2
\n\t
"
"movq %%mm0 , (%1,%0)
\n\t
"
"movq %%mm2 , 8(%1,%0)
\n\t
"
"add $16 , %0
\n\t
"
" js 1b
\n\t
"
"femms
\n\t
"
:
"+r"
(
reglen
),
"+r"
(
dst
),
"+r"
(
src
)
);
}
#if HAVE_YASM
static
void
float_to_int16_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
){
x86_reg
reglen
=
len
;
__asm__
volatile
(
"add %0 , %0
\n\t
"
"lea (%2,%0,2) , %2
\n\t
"
"add %0 , %1
\n\t
"
"neg %0
\n\t
"
"1:
\n\t
"
"cvtps2pi (%2,%0,2) , %%mm0
\n\t
"
"cvtps2pi 8(%2,%0,2) , %%mm1
\n\t
"
"cvtps2pi 16(%2,%0,2) , %%mm2
\n\t
"
"cvtps2pi 24(%2,%0,2) , %%mm3
\n\t
"
"packssdw %%mm1 , %%mm0
\n\t
"
"packssdw %%mm3 , %%mm2
\n\t
"
"movq %%mm0 , (%1,%0)
\n\t
"
"movq %%mm2 , 8(%1,%0)
\n\t
"
"add $16 , %0
\n\t
"
" js 1b
\n\t
"
"emms
\n\t
"
:
"+r"
(
reglen
),
"+r"
(
dst
),
"+r"
(
src
)
);
}
static
void
float_to_int16_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
){
x86_reg
reglen
=
len
;
__asm__
volatile
(
"add %0 , %0
\n\t
"
"lea (%2,%0,2) , %2
\n\t
"
"add %0 , %1
\n\t
"
"neg %0
\n\t
"
"1:
\n\t
"
"cvtps2dq (%2,%0,2) , %%xmm0
\n\t
"
"cvtps2dq 16(%2,%0,2) , %%xmm1
\n\t
"
"packssdw %%xmm1 , %%xmm0
\n\t
"
"movdqa %%xmm0 , (%1,%0)
\n\t
"
"add $16 , %0
\n\t
"
" js 1b
\n\t
"
:
"+r"
(
reglen
),
"+r"
(
dst
),
"+r"
(
src
)
);
}
void
ff_float_to_int16_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_interleave6_sse
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dnow
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dn2
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
#if !HAVE_YASM
#define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
#define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#endif
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
...
...
@@ -152,7 +88,7 @@ static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
DECLARE_ALIGNED(16, int16_t, tmp)[len];\
int i,j,c;\
for(c=0; c<channels; c++){\
float_to_int16_##cpu(tmp, src[c], len);\
f
f_f
loat_to_int16_##cpu(tmp, src[c], len);\
for(i=0, j=c; i<len; i++, j+=channels)\
dst[j] = tmp[i];\
}\
...
...
@@ -160,7 +96,7 @@ static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
\
static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
if(channels==1)\
float_to_int16_##cpu(dst, src[0], len);\
f
f_f
loat_to_int16_##cpu(dst, src[0], len);\
else if(channels==2){\
x86_reg reglen = len; \
const float *src0 = src[0];\
...
...
@@ -235,7 +171,6 @@ static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
float_to_int16_interleave_3dnow
(
dst
,
src
,
len
,
channels
);
}
#if HAVE_YASM
void
ff_float_interleave2_mmx
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
void
ff_float_interleave2_sse
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
...
...
@@ -272,11 +207,10 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
#if HAVE_YASM
c
->
float_interleave
=
float_interleave_mmx
;
#endif
if
(
mm_flags
&
AV_CPU_FLAG_3DNOW
){
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)){
c
->
float_to_int16
=
float_to_int16_3dnow
;
c
->
float_to_int16
=
f
f_f
loat_to_int16_3dnow
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dnow
;
}
}
...
...
@@ -285,18 +219,21 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dn2
;
}
}
#endif
if
(
mm_flags
&
AV_CPU_FLAG_SSE
){
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_sse
;
c
->
float_to_int16
=
float_to_int16_sse
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse
;
#if HAVE_YASM
c
->
float_to_int16
=
ff_float_to_int16_sse
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse
;
c
->
float_interleave
=
float_interleave_sse
;
#endif
}
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
){
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_sse2
;
c
->
float_to_int16
=
float_to_int16_sse2
;
#if HAVE_YASM
c
->
float_to_int16
=
ff_float_to_int16_sse2
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse2
;
#endif
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment