Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
aad3429d
Commit
aad3429d
authored
Oct 10, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fmtconvert: port float_to_int16_interleave() 2-channel x86 inline asm to yasm
parent
4e8e2624
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
61 additions
and
60 deletions
+61
-60
fmtconvert.asm
libavcodec/x86/fmtconvert.asm
+52
-0
fmtconvert_mmx.c
libavcodec/x86/fmtconvert_mmx.c
+9
-60
No files found.
libavcodec/x86/fmtconvert.asm
View file @
aad3429d
...
...
@@ -112,6 +112,58 @@ FLOAT_TO_INT16 3dnow, 0
%undef
cvtps2pi
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;-------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_INTERLEAVE2
1
cglobal
float_to_int16_interleave2_
%1
,
3
,
4
,
2
,
dst
,
src0
,
src1
,
len
lea
lenq
,
[
4
*
r2q
]
mov
src1q
,
[
src0q
+
gprsize
]
mov
src0q
,
[src0q]
add
dstq
,
lenq
add
src0q
,
lenq
add
src1q
,
lenq
neg
lenq
.
loop
:
%ifidn
%1
,
sse2
cvtps2dq
m0
,
[
src0q
+
lenq
]
cvtps2dq
m1
,
[
src1q
+
lenq
]
packssdw
m0
,
m1
movhlps
m1
,
m0
punpcklwd
m0
,
m1
mova
[
dstq
+
lenq
]
,
m0
%else
cvtps2pi
m0
,
[
src0q
+
lenq
]
cvtps2pi
m1
,
[
src0q
+
lenq
+
8
]
cvtps2pi
m2
,
[
src1q
+
lenq
]
cvtps2pi
m3
,
[
src1q
+
lenq
+
8
]
packssdw
m0
,
m1
packssdw
m2
,
m3
mova
m1
,
m0
punpcklwd
m0
,
m2
punpckhwd
m1
,
m2
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
8
]
,
m1
%endif
add
lenq
,
16
js
.
loop
%ifnidn
%1
,
sse2
emms
%endif
REP_RET
%endmacro
INIT_MMX
%define
cvtps2pi
pf2id
FLOAT_TO_INT16_INTERLEAVE2
3
dnow
%undef
cvtps2pi
%define
movdqa
movaps
FLOAT_TO_INT16_INTERLEAVE2
sse
%undef
movdqa
INIT_XMM
FLOAT_TO_INT16_INTERLEAVE2
sse2
%macro
PSWAPD_SSE
2
pshufw
%1
,
%2
,
0x4e
%endmacro
...
...
libavcodec/x86/fmtconvert_mmx.c
View file @
aad3429d
...
...
@@ -35,13 +35,17 @@ void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
void
ff_float_to_int16_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_interleave2_3dnow
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave2_sse
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave2_sse2
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave6_sse
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dnow
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dn2
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
#define FLOAT_TO_INT16_INTERLEAVE(cpu
, body
) \
#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/
\
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
DECLARE_ALIGNED(16, int16_t, tmp)[len];\
...
...
@@ -57,71 +61,16 @@ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon
if(channels==1)\
ff_float_to_int16_##cpu(dst, src[0], len);\
else if(channels==2){\
x86_reg reglen = len; \
const float *src0 = src[0];\
const float *src1 = src[1];\
__asm__ volatile(\
"shl $2, %0 \n"\
"add %0, %1 \n"\
"add %0, %2 \n"\
"add %0, %3 \n"\
"neg %0 \n"\
body\
:"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\
);\
ff_float_to_int16_interleave2_##cpu(dst, src, len);\
}else if(channels==6){\
ff_float_to_int16_interleave6_##cpu(dst, src, len);\
}else\
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
}
FLOAT_TO_INT16_INTERLEAVE
(
3
dnow
,
"1:
\n
"
"pf2id (%2,%0), %%mm0
\n
"
"pf2id 8(%2,%0), %%mm1
\n
"
"pf2id (%3,%0), %%mm2
\n
"
"pf2id 8(%3,%0), %%mm3
\n
"
"packssdw %%mm1, %%mm0
\n
"
"packssdw %%mm3, %%mm2
\n
"
"movq %%mm0, %%mm1
\n
"
"punpcklwd %%mm2, %%mm0
\n
"
"punpckhwd %%mm2, %%mm1
\n
"
"movq %%mm0, (%1,%0)
\n
"
"movq %%mm1, 8(%1,%0)
\n
"
"add $16, %0
\n
"
"js 1b
\n
"
"femms
\n
"
)
FLOAT_TO_INT16_INTERLEAVE
(
sse
,
"1:
\n
"
"cvtps2pi (%2,%0), %%mm0
\n
"
"cvtps2pi 8(%2,%0), %%mm1
\n
"
"cvtps2pi (%3,%0), %%mm2
\n
"
"cvtps2pi 8(%3,%0), %%mm3
\n
"
"packssdw %%mm1, %%mm0
\n
"
"packssdw %%mm3, %%mm2
\n
"
"movq %%mm0, %%mm1
\n
"
"punpcklwd %%mm2, %%mm0
\n
"
"punpckhwd %%mm2, %%mm1
\n
"
"movq %%mm0, (%1,%0)
\n
"
"movq %%mm1, 8(%1,%0)
\n
"
"add $16, %0
\n
"
"js 1b
\n
"
"emms
\n
"
)
FLOAT_TO_INT16_INTERLEAVE
(
sse2
,
"1:
\n
"
"cvtps2dq (%2,%0), %%xmm0
\n
"
"cvtps2dq (%3,%0), %%xmm1
\n
"
"packssdw %%xmm1, %%xmm0
\n
"
"movhlps %%xmm0, %%xmm1
\n
"
"punpcklwd %%xmm1, %%xmm0
\n
"
"movdqa %%xmm0, (%1,%0)
\n
"
"add $16, %0
\n
"
"js 1b
\n
"
)
FLOAT_TO_INT16_INTERLEAVE
(
3
dnow
)
FLOAT_TO_INT16_INTERLEAVE
(
sse
)
FLOAT_TO_INT16_INTERLEAVE
(
sse2
)
static
void
float_to_int16_interleave_3dn2
(
int16_t
*
dst
,
const
float
**
src
,
long
len
,
int
channels
){
if
(
channels
==
6
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment