Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
be923ed6
Commit
be923ed6
authored
Jul 15, 2012
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: fmtconvert: port to cpuflags
parent
588fafe7
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
71 additions
and
70 deletions
+71
-70
fmtconvert.asm
libavcodec/x86/fmtconvert.asm
+71
-70
No files found.
libavcodec/x86/fmtconvert.asm
View file @
be923ed6
...
...
@@ -26,11 +26,11 @@ SECTION_TEXT
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
;---------------------------------------------------------------------------------
%macro
INT32_TO_FLOAT_FMUL_SCALAR
2
%macro
INT32_TO_FLOAT_FMUL_SCALAR
1
%if
UNIX64
cglobal
int32_to_float_fmul_scalar
_
%1
,
3
,
3
,
%2
,
dst
,
src
,
len
cglobal
int32_to_float_fmul_scalar
,
3
,
3
,
%1
,
dst
,
src
,
len
%else
cglobal
int32_to_float_fmul_scalar
_
%1
,
4
,
4
,
%2
,
dst
,
src
,
mul
,
len
cglobal
int32_to_float_fmul_scalar
,
4
,
4
,
%1
,
dst
,
src
,
mul
,
len
%endif
%if
WIN64
SWAP
0
,
2
...
...
@@ -43,7 +43,7 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
add
dstq
,
lenq
neg
lenq
.
loop
:
%if
idn
%1
,
sse2
%if
cpuflag
(
sse2
)
cvtdq2ps
m1
,
[
srcq
+
lenq
]
cvtdq2ps
m2
,
[
srcq
+
lenq
+
16
]
%else
...
...
@@ -63,27 +63,26 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
REP_RET
%endmacro
INIT_XMM
INIT_XMM
sse
%define
SPLATD
SPLATD_SSE
%define
movdqa
movaps
INT32_TO_FLOAT_FMUL_SCALAR
sse
,
5
%undef
movdqa
INT32_TO_FLOAT_FMUL_SCALAR
5
INIT_XMM
sse2
%define
SPLATD
SPLATD_SSE2
INT32_TO_FLOAT_FMUL_SCALAR
sse2
,
3
INT32_TO_FLOAT_FMUL_SCALAR
3
%undef
SPLATD
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16
2
cglobal
float_to_int16
_
%1
,
3
,
3
,
%2
,
dst
,
src
,
len
%macro
FLOAT_TO_INT16
1
cglobal
float_to_int16
,
3
,
3
,
%1
,
dst
,
src
,
len
add
lenq
,
lenq
lea
srcq
,
[
srcq
+
2
*
lenq
]
add
dstq
,
lenq
neg
lenq
.
loop
:
%if
idn
%1
,
sse2
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
packssdw
m0
,
m1
...
...
@@ -100,31 +99,32 @@ cglobal float_to_int16_%1, 3,3,%2, dst, src, len
%endif
add
lenq
,
16
js
.
loop
%if
nidn
%1
,
sse2
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_XMM
FLOAT_TO_INT16
sse2
,
2
INIT_MMX
FLOAT_TO_INT16
sse
,
0
INIT_XMM
sse2
FLOAT_TO_INT16
2
INIT_MMX
sse
FLOAT_TO_INT16
0
%define
cvtps2pi
pf2id
FLOAT_TO_INT16
3
dnow
,
0
INIT_MMX
3
dnow
FLOAT_TO_INT16
0
%undef
cvtps2pi
;------------------------------------------------------------------------------
; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
;------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_STEP
2
cglobal
float_to_int16_step
_
%1
,
4
,
7
,
%2
,
dst
,
src
,
len
,
step
,
step3
,
v1
,
v2
%macro
FLOAT_TO_INT16_STEP
1
cglobal
float_to_int16_step
,
4
,
7
,
%1
,
dst
,
src
,
len
,
step
,
step3
,
v1
,
v2
add
lenq
,
lenq
lea
srcq
,
[
srcq
+
2
*
lenq
]
lea
step3q
,
[
stepq
*
3
]
neg
lenq
.
loop
:
%if
idn
%1
,
sse2
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
packssdw
m0
,
m1
...
...
@@ -179,25 +179,26 @@ cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
%endif
add
lenq
,
16
js
.
loop
%if
nidn
%1
,
sse2
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_XMM
FLOAT_TO_INT16_STEP
sse2
,
2
INIT_MMX
FLOAT_TO_INT16_STEP
sse
,
0
INIT_XMM
sse2
FLOAT_TO_INT16_STEP
2
INIT_MMX
sse
FLOAT_TO_INT16_STEP
0
%define
cvtps2pi
pf2id
FLOAT_TO_INT16_STEP
3
dnow
,
0
INIT_MMX
3
dnow
FLOAT_TO_INT16_STEP
0
%undef
cvtps2pi
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;-------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_INTERLEAVE2
1
cglobal
float_to_int16_interleave2
_
%1
,
3
,
4
,
2
,
dst
,
src0
,
src1
,
len
%macro
FLOAT_TO_INT16_INTERLEAVE2
0
cglobal
float_to_int16_interleave2
,
3
,
4
,
2
,
dst
,
src0
,
src1
,
len
lea
lenq
,
[
4
*
r2q
]
mov
src1q
,
[
src0q
+
gprsize
]
mov
src0q
,
[src0q]
...
...
@@ -206,7 +207,7 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
add
src1q
,
lenq
neg
lenq
.
loop
:
%if
idn
%1
,
sse2
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
src0q
+
lenq
]
cvtps2dq
m1
,
[
src1q
+
lenq
]
packssdw
m0
,
m1
...
...
@@ -228,21 +229,20 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
%endif
add
lenq
,
16
js
.
loop
%if
nidn
%1
,
sse2
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_MMX
INIT_MMX
3
dnow
%define
cvtps2pi
pf2id
FLOAT_TO_INT16_INTERLEAVE2
3
dnow
FLOAT_TO_INT16_INTERLEAVE2
%undef
cvtps2pi
%define
movdqa
movaps
FLOAT_TO_INT16_INTERLEAVE2
sse
%undef
movdqa
INIT_XMM
FLOAT_TO_INT16_INTERLEAVE2
sse2
INIT_MMX
sse
FLOAT_TO_INT16_INTERLEAVE2
INIT_XMM
sse2
FLOAT_TO_INT16_INTERLEAVE2
%macro
PSWAPD_SSE
2
...
...
@@ -254,9 +254,9 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
punpckldq
%1
,
%2
%endmacro
%macro
FLOAT_TO_INT16_INTERLEAVE6
1
%macro
FLOAT_TO_INT16_INTERLEAVE6
0
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
cglobal
float_to_int16_interleave6
_
%1
,
2
,
8
,
0
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
cglobal
float_to_int16_interleave6
,
2
,
8
,
0
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
%if
ARCH_X86_64
mov
lend
,
r2d
%else
...
...
@@ -302,21 +302,24 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4,
RET
%endmacro
; FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX
sse
%define
pswapd
PSWAPD_SSE
FLOAT_TO_INT16_INTERLEAVE6
sse
FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX
3
dnow
%define
cvtps2pi
pf2id
%define
pswapd
PSWAPD_3DNOW
FLOAT_TO_INT16_INTERLEAVE6
3
dnow
FLOAT_TO_INT16_INTERLEAVE6
%undef
pswapd
FLOAT_TO_INT16_INTERLEAVE6
3
dnowext
INIT_MMX
3
dnowext
FLOAT_TO_INT16_INTERLEAVE6
%undef
cvtps2pi
;-----------------------------------------------------------------------------
; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
%macro
FLOAT_INTERLEAVE6
2
cglobal
float_interleave6
_
%1
,
2
,
8
,
%2
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
%macro
FLOAT_INTERLEAVE6
1
cglobal
float_interleave6
,
2
,
8
,
%1
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
%if
ARCH_X86_64
mov
lend
,
r2d
%else
...
...
@@ -334,7 +337,7 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
sub
src4q
,
srcq
sub
src5q
,
srcq
.
loop
:
%if
idn
%1
,
sse
%if
cpuflag
(
sse
)
movaps
m0
,
[srcq]
movaps
m1
,
[
srcq
+
src1q
]
movaps
m2
,
[
srcq
+
src2q
]
...
...
@@ -383,62 +386,60 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
add
dstq
,
mmsize
*
6
sub
lend
,
mmsize
/
4
jg
.
loop
%if
idn
%1
,
mmx
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_MMX
FLOAT_INTERLEAVE6
mmx
,
0
INIT_XMM
FLOAT_INTERLEAVE6
sse
,
7
INIT_MMX
mmx
FLOAT_INTERLEAVE6
0
INIT_XMM
sse
FLOAT_INTERLEAVE6
7
;-----------------------------------------------------------------------------
; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
%macro
FLOAT_INTERLEAVE2
2
cglobal
float_interleave2
_
%1
,
3
,
4
,
%2
,
dst
,
src
,
len
,
src1
%macro
FLOAT_INTERLEAVE2
1
cglobal
float_interleave2
,
3
,
4
,
%1
,
dst
,
src
,
len
,
src1
mov
src1q
,
[
srcq
+
gprsize
]
mov
srcq
,
[
srcq
]
sub
src1q
,
srcq
.
loop
:
MOVPS
m0
,
[
srcq
]
MOVPS
m1
,
[
srcq
+
src1q
]
MOVPS
m3
,
[
srcq
+
mmsize
]
MOVPS
m4
,
[
srcq
+
src1q
+
mmsize
]
mova
m0
,
[
srcq
]
mova
m1
,
[
srcq
+
src1q
]
mova
m3
,
[
srcq
+
mmsize
]
mova
m4
,
[
srcq
+
src1q
+
mmsize
]
MOVPS
m2
,
m0
mova
m2
,
m0
PUNPCKLDQ
m0
,
m1
PUNPCKHDQ
m2
,
m1
MOVPS
m1
,
m3
mova
m1
,
m3
PUNPCKLDQ
m3
,
m4
PUNPCKHDQ
m1
,
m4
MOVPS
[
dstq
]
,
m0
MOVPS
[
dstq
+
1
*
mmsize
]
,
m2
MOVPS
[
dstq
+
2
*
mmsize
]
,
m3
MOVPS
[
dstq
+
3
*
mmsize
]
,
m1
mova
[
dstq
]
,
m0
mova
[
dstq
+
1
*
mmsize
]
,
m2
mova
[
dstq
+
2
*
mmsize
]
,
m3
mova
[
dstq
+
3
*
mmsize
]
,
m1
add
srcq
,
mmsize
*
2
add
dstq
,
mmsize
*
4
sub
lend
,
mmsize
/
2
jg
.
loop
%if
idn
%1
,
mmx
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_MMX
%define
MOVPS
movq
INIT_MMX
mmx
%define
PUNPCKLDQ
punpckldq
%define
PUNPCKHDQ
punpckhdq
FLOAT_INTERLEAVE2
mmx
,
0
INIT_XMM
%define
MOVPS
movaps
FLOAT_INTERLEAVE2
0
INIT_XMM
sse
%define
PUNPCKLDQ
unpcklps
%define
PUNPCKHDQ
unpckhps
FLOAT_INTERLEAVE2
sse
,
5
FLOAT_INTERLEAVE2
5
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment