Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
be923ed6
Commit
be923ed6
authored
Jul 15, 2012
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: fmtconvert: port to cpuflags
parent
588fafe7
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
71 additions
and
70 deletions
+71
-70
fmtconvert.asm
libavcodec/x86/fmtconvert.asm
+71
-70
No files found.
libavcodec/x86/fmtconvert.asm
View file @
be923ed6
...
@@ -26,11 +26,11 @@ SECTION_TEXT
...
@@ -26,11 +26,11 @@ SECTION_TEXT
;---------------------------------------------------------------------------------
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
;---------------------------------------------------------------------------------
;---------------------------------------------------------------------------------
%macro
INT32_TO_FLOAT_FMUL_SCALAR
2
%macro
INT32_TO_FLOAT_FMUL_SCALAR
1
%if
UNIX64
%if
UNIX64
cglobal
int32_to_float_fmul_scalar
_
%1
,
3
,
3
,
%2
,
dst
,
src
,
len
cglobal
int32_to_float_fmul_scalar
,
3
,
3
,
%1
,
dst
,
src
,
len
%else
%else
cglobal
int32_to_float_fmul_scalar
_
%1
,
4
,
4
,
%2
,
dst
,
src
,
mul
,
len
cglobal
int32_to_float_fmul_scalar
,
4
,
4
,
%1
,
dst
,
src
,
mul
,
len
%endif
%endif
%if
WIN64
%if
WIN64
SWAP
0
,
2
SWAP
0
,
2
...
@@ -43,7 +43,7 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
...
@@ -43,7 +43,7 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
add
dstq
,
lenq
add
dstq
,
lenq
neg
lenq
neg
lenq
.
loop
:
.
loop
:
%if
idn
%1
,
sse2
%if
cpuflag
(
sse2
)
cvtdq2ps
m1
,
[
srcq
+
lenq
]
cvtdq2ps
m1
,
[
srcq
+
lenq
]
cvtdq2ps
m2
,
[
srcq
+
lenq
+
16
]
cvtdq2ps
m2
,
[
srcq
+
lenq
+
16
]
%else
%else
...
@@ -63,27 +63,26 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
...
@@ -63,27 +63,26 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
REP_RET
REP_RET
%endmacro
%endmacro
INIT_XMM
INIT_XMM
sse
%define
SPLATD
SPLATD_SSE
%define
SPLATD
SPLATD_SSE
%define
movdqa
movaps
INT32_TO_FLOAT_FMUL_SCALAR
5
INT32_TO_FLOAT_FMUL_SCALAR
sse
,
5
INIT_XMM
sse2
%undef
movdqa
%define
SPLATD
SPLATD_SSE2
%define
SPLATD
SPLATD_SSE2
INT32_TO_FLOAT_FMUL_SCALAR
sse2
,
3
INT32_TO_FLOAT_FMUL_SCALAR
3
%undef
SPLATD
%undef
SPLATD
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16
2
%macro
FLOAT_TO_INT16
1
cglobal
float_to_int16
_
%1
,
3
,
3
,
%2
,
dst
,
src
,
len
cglobal
float_to_int16
,
3
,
3
,
%1
,
dst
,
src
,
len
add
lenq
,
lenq
add
lenq
,
lenq
lea
srcq
,
[
srcq
+
2
*
lenq
]
lea
srcq
,
[
srcq
+
2
*
lenq
]
add
dstq
,
lenq
add
dstq
,
lenq
neg
lenq
neg
lenq
.
loop
:
.
loop
:
%if
idn
%1
,
sse2
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
packssdw
m0
,
m1
packssdw
m0
,
m1
...
@@ -100,31 +99,32 @@ cglobal float_to_int16_%1, 3,3,%2, dst, src, len
...
@@ -100,31 +99,32 @@ cglobal float_to_int16_%1, 3,3,%2, dst, src, len
%endif
%endif
add
lenq
,
16
add
lenq
,
16
js
.
loop
js
.
loop
%if
nidn
%1
,
sse2
%if
mmsize
==
8
emms
emms
%endif
%endif
REP_RET
REP_RET
%endmacro
%endmacro
INIT_XMM
INIT_XMM
sse2
FLOAT_TO_INT16
sse2
,
2
FLOAT_TO_INT16
2
INIT_MMX
INIT_MMX
sse
FLOAT_TO_INT16
sse
,
0
FLOAT_TO_INT16
0
%define
cvtps2pi
pf2id
%define
cvtps2pi
pf2id
FLOAT_TO_INT16
3
dnow
,
0
INIT_MMX
3
dnow
FLOAT_TO_INT16
0
%undef
cvtps2pi
%undef
cvtps2pi
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_STEP
2
%macro
FLOAT_TO_INT16_STEP
1
cglobal
float_to_int16_step
_
%1
,
4
,
7
,
%2
,
dst
,
src
,
len
,
step
,
step3
,
v1
,
v2
cglobal
float_to_int16_step
,
4
,
7
,
%1
,
dst
,
src
,
len
,
step
,
step3
,
v1
,
v2
add
lenq
,
lenq
add
lenq
,
lenq
lea
srcq
,
[
srcq
+
2
*
lenq
]
lea
srcq
,
[
srcq
+
2
*
lenq
]
lea
step3q
,
[
stepq
*
3
]
lea
step3q
,
[
stepq
*
3
]
neg
lenq
neg
lenq
.
loop
:
.
loop
:
%if
idn
%1
,
sse2
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
packssdw
m0
,
m1
packssdw
m0
,
m1
...
@@ -179,25 +179,26 @@ cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
...
@@ -179,25 +179,26 @@ cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
%endif
%endif
add
lenq
,
16
add
lenq
,
16
js
.
loop
js
.
loop
%if
nidn
%1
,
sse2
%if
mmsize
==
8
emms
emms
%endif
%endif
REP_RET
REP_RET
%endmacro
%endmacro
INIT_XMM
INIT_XMM
sse2
FLOAT_TO_INT16_STEP
sse2
,
2
FLOAT_TO_INT16_STEP
2
INIT_MMX
INIT_MMX
sse
FLOAT_TO_INT16_STEP
sse
,
0
FLOAT_TO_INT16_STEP
0
%define
cvtps2pi
pf2id
%define
cvtps2pi
pf2id
FLOAT_TO_INT16_STEP
3
dnow
,
0
INIT_MMX
3
dnow
FLOAT_TO_INT16_STEP
0
%undef
cvtps2pi
%undef
cvtps2pi
;-------------------------------------------------------------------------------
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;-------------------------------------------------------------------------------
;-------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_INTERLEAVE2
1
%macro
FLOAT_TO_INT16_INTERLEAVE2
0
cglobal
float_to_int16_interleave2
_
%1
,
3
,
4
,
2
,
dst
,
src0
,
src1
,
len
cglobal
float_to_int16_interleave2
,
3
,
4
,
2
,
dst
,
src0
,
src1
,
len
lea
lenq
,
[
4
*
r2q
]
lea
lenq
,
[
4
*
r2q
]
mov
src1q
,
[
src0q
+
gprsize
]
mov
src1q
,
[
src0q
+
gprsize
]
mov
src0q
,
[src0q]
mov
src0q
,
[src0q]
...
@@ -206,7 +207,7 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
...
@@ -206,7 +207,7 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
add
src1q
,
lenq
add
src1q
,
lenq
neg
lenq
neg
lenq
.
loop
:
.
loop
:
%if
idn
%1
,
sse2
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
src0q
+
lenq
]
cvtps2dq
m0
,
[
src0q
+
lenq
]
cvtps2dq
m1
,
[
src1q
+
lenq
]
cvtps2dq
m1
,
[
src1q
+
lenq
]
packssdw
m0
,
m1
packssdw
m0
,
m1
...
@@ -228,21 +229,20 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
...
@@ -228,21 +229,20 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
%endif
%endif
add
lenq
,
16
add
lenq
,
16
js
.
loop
js
.
loop
%if
nidn
%1
,
sse2
%if
mmsize
==
8
emms
emms
%endif
%endif
REP_RET
REP_RET
%endmacro
%endmacro
INIT_MMX
INIT_MMX
3
dnow
%define
cvtps2pi
pf2id
%define
cvtps2pi
pf2id
FLOAT_TO_INT16_INTERLEAVE2
3
dnow
FLOAT_TO_INT16_INTERLEAVE2
%undef
cvtps2pi
%undef
cvtps2pi
%define
movdqa
movaps
INIT_MMX
sse
FLOAT_TO_INT16_INTERLEAVE2
sse
FLOAT_TO_INT16_INTERLEAVE2
%undef
movdqa
INIT_XMM
sse2
INIT_XMM
FLOAT_TO_INT16_INTERLEAVE2
FLOAT_TO_INT16_INTERLEAVE2
sse2
%macro
PSWAPD_SSE
2
%macro
PSWAPD_SSE
2
...
@@ -254,9 +254,9 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
...
@@ -254,9 +254,9 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
punpckldq
%1
,
%2
punpckldq
%1
,
%2
%endmacro
%endmacro
%macro
FLOAT_TO_INT16_INTERLEAVE6
1
%macro
FLOAT_TO_INT16_INTERLEAVE6
0
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
cglobal
float_to_int16_interleave6
_
%1
,
2
,
8
,
0
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
cglobal
float_to_int16_interleave6
,
2
,
8
,
0
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
%if
ARCH_X86_64
%if
ARCH_X86_64
mov
lend
,
r2d
mov
lend
,
r2d
%else
%else
...
@@ -302,21 +302,24 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4,
...
@@ -302,21 +302,24 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4,
RET
RET
%endmacro
; FLOAT_TO_INT16_INTERLEAVE6
%endmacro
; FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX
sse
%define
pswapd
PSWAPD_SSE
%define
pswapd
PSWAPD_SSE
FLOAT_TO_INT16_INTERLEAVE6
sse
FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX
3
dnow
%define
cvtps2pi
pf2id
%define
cvtps2pi
pf2id
%define
pswapd
PSWAPD_3DNOW
%define
pswapd
PSWAPD_3DNOW
FLOAT_TO_INT16_INTERLEAVE6
3
dnow
FLOAT_TO_INT16_INTERLEAVE6
%undef
pswapd
%undef
pswapd
FLOAT_TO_INT16_INTERLEAVE6
3
dnowext
INIT_MMX
3
dnowext
FLOAT_TO_INT16_INTERLEAVE6
%undef
cvtps2pi
%undef
cvtps2pi
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
%macro
FLOAT_INTERLEAVE6
2
%macro
FLOAT_INTERLEAVE6
1
cglobal
float_interleave6
_
%1
,
2
,
8
,
%2
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
cglobal
float_interleave6
,
2
,
8
,
%1
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
%if
ARCH_X86_64
%if
ARCH_X86_64
mov
lend
,
r2d
mov
lend
,
r2d
%else
%else
...
@@ -334,7 +337,7 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
...
@@ -334,7 +337,7 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
sub
src4q
,
srcq
sub
src4q
,
srcq
sub
src5q
,
srcq
sub
src5q
,
srcq
.
loop
:
.
loop
:
%if
idn
%1
,
sse
%if
cpuflag
(
sse
)
movaps
m0
,
[srcq]
movaps
m0
,
[srcq]
movaps
m1
,
[
srcq
+
src1q
]
movaps
m1
,
[
srcq
+
src1q
]
movaps
m2
,
[
srcq
+
src2q
]
movaps
m2
,
[
srcq
+
src2q
]
...
@@ -383,62 +386,60 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
...
@@ -383,62 +386,60 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
add
dstq
,
mmsize
*
6
add
dstq
,
mmsize
*
6
sub
lend
,
mmsize
/
4
sub
lend
,
mmsize
/
4
jg
.
loop
jg
.
loop
%if
idn
%1
,
mmx
%if
mmsize
==
8
emms
emms
%endif
%endif
REP_RET
REP_RET
%endmacro
%endmacro
INIT_MMX
INIT_MMX
mmx
FLOAT_INTERLEAVE6
mmx
,
0
FLOAT_INTERLEAVE6
0
INIT_XMM
INIT_XMM
sse
FLOAT_INTERLEAVE6
sse
,
7
FLOAT_INTERLEAVE6
7
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
%macro
FLOAT_INTERLEAVE2
2
%macro
FLOAT_INTERLEAVE2
1
cglobal
float_interleave2
_
%1
,
3
,
4
,
%2
,
dst
,
src
,
len
,
src1
cglobal
float_interleave2
,
3
,
4
,
%1
,
dst
,
src
,
len
,
src1
mov
src1q
,
[
srcq
+
gprsize
]
mov
src1q
,
[
srcq
+
gprsize
]
mov
srcq
,
[
srcq
]
mov
srcq
,
[
srcq
]
sub
src1q
,
srcq
sub
src1q
,
srcq
.
loop
:
.
loop
:
MOVPS
m0
,
[
srcq
]
mova
m0
,
[
srcq
]
MOVPS
m1
,
[
srcq
+
src1q
]
mova
m1
,
[
srcq
+
src1q
]
MOVPS
m3
,
[
srcq
+
mmsize
]
mova
m3
,
[
srcq
+
mmsize
]
MOVPS
m4
,
[
srcq
+
src1q
+
mmsize
]
mova
m4
,
[
srcq
+
src1q
+
mmsize
]
MOVPS
m2
,
m0
mova
m2
,
m0
PUNPCKLDQ
m0
,
m1
PUNPCKLDQ
m0
,
m1
PUNPCKHDQ
m2
,
m1
PUNPCKHDQ
m2
,
m1
MOVPS
m1
,
m3
mova
m1
,
m3
PUNPCKLDQ
m3
,
m4
PUNPCKLDQ
m3
,
m4
PUNPCKHDQ
m1
,
m4
PUNPCKHDQ
m1
,
m4
MOVPS
[
dstq
]
,
m0
mova
[
dstq
]
,
m0
MOVPS
[
dstq
+
1
*
mmsize
]
,
m2
mova
[
dstq
+
1
*
mmsize
]
,
m2
MOVPS
[
dstq
+
2
*
mmsize
]
,
m3
mova
[
dstq
+
2
*
mmsize
]
,
m3
MOVPS
[
dstq
+
3
*
mmsize
]
,
m1
mova
[
dstq
+
3
*
mmsize
]
,
m1
add
srcq
,
mmsize
*
2
add
srcq
,
mmsize
*
2
add
dstq
,
mmsize
*
4
add
dstq
,
mmsize
*
4
sub
lend
,
mmsize
/
2
sub
lend
,
mmsize
/
2
jg
.
loop
jg
.
loop
%if
idn
%1
,
mmx
%if
mmsize
==
8
emms
emms
%endif
%endif
REP_RET
REP_RET
%endmacro
%endmacro
INIT_MMX
INIT_MMX
mmx
%define
MOVPS
movq
%define
PUNPCKLDQ
punpckldq
%define
PUNPCKLDQ
punpckldq
%define
PUNPCKHDQ
punpckhdq
%define
PUNPCKHDQ
punpckhdq
FLOAT_INTERLEAVE2
mmx
,
0
FLOAT_INTERLEAVE2
0
INIT_XMM
INIT_XMM
sse
%define
MOVPS
movaps
%define
PUNPCKLDQ
unpcklps
%define
PUNPCKLDQ
unpcklps
%define
PUNPCKHDQ
unpckhps
%define
PUNPCKHDQ
unpckhps
FLOAT_INTERLEAVE2
sse
,
5
FLOAT_INTERLEAVE2
5
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment