Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
6092dafb
Commit
6092dafb
authored
May 02, 2012
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
lavr: x86: optimized 6-channel s16 to fltp conversion
parent
91851a7b
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
110 additions
and
0 deletions
+110
-0
audio_convert.asm
libavresample/x86/audio_convert.asm
+83
-0
audio_convert_init.c
libavresample/x86/audio_convert_init.c
+15
-0
x86util.asm
libavutil/x86/x86util.asm
+12
-0
No files found.
libavresample/x86/audio_convert.asm
View file @
6092dafb
...
@@ -962,3 +962,86 @@ CONV_S16_TO_FLTP_2CH
...
@@ -962,3 +962,86 @@ CONV_S16_TO_FLTP_2CH
INIT_XMM
avx
INIT_XMM
avx
CONV_S16_TO_FLTP_2CH
CONV_S16_TO_FLTP_2CH
%endif
%endif
;------------------------------------------------------------------------------
; void ff_conv_s16_to_fltp_6ch(float *const *dst, int16_t *src, int len,
; int channels);
;------------------------------------------------------------------------------
%macro
CONV_S16_TO_FLTP_6CH
0
%if
ARCH_X86_64
cglobal
conv_s16_to_fltp_6ch
,
3
,
8
,
7
,
dst
,
src
,
len
,
dst1
,
dst2
,
dst3
,
dst4
,
dst5
%else
cglobal
conv_s16_to_fltp_6ch
,
2
,
7
,
7
,
dst
,
src
,
dst1
,
dst2
,
dst3
,
dst4
,
dst5
%define
lend
dword
r2m
%endif
mov
dst1q
,
[
dstq
+
gprsize
]
mov
dst2q
,
[
dstq
+
2
*
gprsize
]
mov
dst3q
,
[
dstq
+
3
*
gprsize
]
mov
dst4q
,
[
dstq
+
4
*
gprsize
]
mov
dst5q
,
[
dstq
+
5
*
gprsize
]
mov
dstq
,
[
dstq
]
sub
dst1q
,
dstq
sub
dst2q
,
dstq
sub
dst3q
,
dstq
sub
dst4q
,
dstq
sub
dst5q
,
dstq
mova
m6
,
[
pf_s16_inv_scale
]
.
loop
:
mova
m0
,
[
srcq
+
0
*
mmsize
]
; m0 = 0, 1, 2, 3, 4, 5, 6, 7
mova
m3
,
[
srcq
+
1
*
mmsize
]
; m3 = 8, 9, 10, 11, 12, 13, 14, 15
mova
m2
,
[
srcq
+
2
*
mmsize
]
; m2 = 16, 17, 18, 19, 20, 21, 22, 23
PALIGNR
m1
,
m3
,
m0
,
12
,
m4
; m1 = 6, 7, 8, 9, 10, 11, x, x
shufps
m3
,
m2
,
q1032
; m3 = 12, 13, 14, 15, 16, 17, 18, 19
psrldq
m2
,
4
; m2 = 18, 19, 20, 21, 22, 23, x, x
SBUTTERFLY2
wd
,
0
,
1
,
4
; m0 = 0, 6, 1, 7, 2, 8, 3, 9
; m1 = 4, 10, 5, 11, x, x, x, x
SBUTTERFLY2
wd
,
3
,
2
,
4
; m3 = 12, 18, 13, 19, 14, 20, 15, 21
; m2 = 16, 22, 17, 23, x, x, x, x
SBUTTERFLY2
dq
,
0
,
3
,
4
; m0 = 0, 6, 12, 18, 1, 7, 13, 19
; m3 = 2, 8, 14, 20, 3, 9, 15, 21
punpckldq
m1
,
m2
; m1 = 4, 10, 16, 22, 5, 11, 17, 23
S16_TO_S32_SX
0
,
2
; m0 = 0, 6, 12, 18
; m2 = 1, 7, 13, 19
S16_TO_S32_SX
3
,
4
; m3 = 2, 8, 14, 20
; m4 = 3, 9, 15, 21
S16_TO_S32_SX
1
,
5
; m1 = 4, 10, 16, 22
; m5 = 5, 11, 17, 23
SWAP
1
,
2
,
3
,
4
cvtdq2ps
m0
,
m0
cvtdq2ps
m1
,
m1
cvtdq2ps
m2
,
m2
cvtdq2ps
m3
,
m3
cvtdq2ps
m4
,
m4
cvtdq2ps
m5
,
m5
mulps
m0
,
m6
mulps
m1
,
m6
mulps
m2
,
m6
mulps
m3
,
m6
mulps
m4
,
m6
mulps
m5
,
m6
mova
[
dstq
]
,
m0
mova
[
dstq
+
dst1q
]
,
m1
mova
[
dstq
+
dst2q
]
,
m2
mova
[
dstq
+
dst3q
]
,
m3
mova
[
dstq
+
dst4q
]
,
m4
mova
[
dstq
+
dst5q
]
,
m5
add
srcq
,
mmsize
*
3
add
dstq
,
mmsize
sub
lend
,
mmsize
/
4
jg
.
loop
REP_RET
%endmacro
%define
PALIGNR
PALIGNR_MMX
INIT_XMM
sse2
CONV_S16_TO_FLTP_6CH
%define
PALIGNR
PALIGNR_SSSE3
INIT_XMM
ssse3
CONV_S16_TO_FLTP_6CH
INIT_XMM
sse4
CONV_S16_TO_FLTP_6CH
%if
HAVE_AVX
INIT_XMM
avx
CONV_S16_TO_FLTP_6CH
%endif
libavresample/x86/audio_convert_init.c
View file @
6092dafb
...
@@ -111,6 +111,15 @@ extern void ff_conv_s16_to_fltp_2ch_sse2(float *const *dst, int16_t *src,
...
@@ -111,6 +111,15 @@ extern void ff_conv_s16_to_fltp_2ch_sse2(float *const *dst, int16_t *src,
extern
void
ff_conv_s16_to_fltp_2ch_avx
(
float
*
const
*
dst
,
int16_t
*
src
,
extern
void
ff_conv_s16_to_fltp_2ch_avx
(
float
*
const
*
dst
,
int16_t
*
src
,
int
len
,
int
channels
);
int
len
,
int
channels
);
extern
void
ff_conv_s16_to_fltp_6ch_sse2
(
float
*
const
*
dst
,
int16_t
*
src
,
int
len
,
int
channels
);
extern
void
ff_conv_s16_to_fltp_6ch_ssse3
(
float
*
const
*
dst
,
int16_t
*
src
,
int
len
,
int
channels
);
extern
void
ff_conv_s16_to_fltp_6ch_sse4
(
float
*
const
*
dst
,
int16_t
*
src
,
int
len
,
int
channels
);
extern
void
ff_conv_s16_to_fltp_6ch_avx
(
float
*
const
*
dst
,
int16_t
*
src
,
int
len
,
int
channels
);
av_cold
void
ff_audio_convert_init_x86
(
AudioConvert
*
ac
)
av_cold
void
ff_audio_convert_init_x86
(
AudioConvert
*
ac
)
{
{
#if HAVE_YASM
#if HAVE_YASM
...
@@ -164,6 +173,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
...
@@ -164,6 +173,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
6
,
16
,
4
,
"SSE2"
,
ff_conv_s16_to_s16p_6ch_sse2
);
6
,
16
,
4
,
"SSE2"
,
ff_conv_s16_to_s16p_6ch_sse2
);
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLTP
,
AV_SAMPLE_FMT_S16
,
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLTP
,
AV_SAMPLE_FMT_S16
,
2
,
16
,
8
,
"SSE2"
,
ff_conv_s16_to_fltp_2ch_sse2
);
2
,
16
,
8
,
"SSE2"
,
ff_conv_s16_to_fltp_2ch_sse2
);
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLTP
,
AV_SAMPLE_FMT_S16
,
6
,
16
,
4
,
"SSE2"
,
ff_conv_s16_to_fltp_6ch_sse2
);
}
}
if
(
mm_flags
&
AV_CPU_FLAG_SSSE3
&&
HAVE_SSE
)
{
if
(
mm_flags
&
AV_CPU_FLAG_SSSE3
&&
HAVE_SSE
)
{
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLT
,
AV_SAMPLE_FMT_S16P
,
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLT
,
AV_SAMPLE_FMT_S16P
,
...
@@ -174,6 +185,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
...
@@ -174,6 +185,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
2
,
16
,
8
,
"SSSE3"
,
ff_conv_s16_to_s16p_2ch_ssse3
);
2
,
16
,
8
,
"SSSE3"
,
ff_conv_s16_to_s16p_2ch_ssse3
);
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_S16P
,
AV_SAMPLE_FMT_S16
,
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_S16P
,
AV_SAMPLE_FMT_S16
,
6
,
16
,
4
,
"SSSE3"
,
ff_conv_s16_to_s16p_6ch_ssse3
);
6
,
16
,
4
,
"SSSE3"
,
ff_conv_s16_to_s16p_6ch_ssse3
);
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLTP
,
AV_SAMPLE_FMT_S16
,
6
,
16
,
4
,
"SSSE3"
,
ff_conv_s16_to_fltp_6ch_ssse3
);
}
}
if
(
mm_flags
&
AV_CPU_FLAG_SSE4
&&
HAVE_SSE
)
{
if
(
mm_flags
&
AV_CPU_FLAG_SSE4
&&
HAVE_SSE
)
{
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLT
,
AV_SAMPLE_FMT_S16
,
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLT
,
AV_SAMPLE_FMT_S16
,
...
@@ -204,6 +217,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
...
@@ -204,6 +217,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
6
,
16
,
4
,
"AVX"
,
ff_conv_s16_to_s16p_6ch_avx
);
6
,
16
,
4
,
"AVX"
,
ff_conv_s16_to_s16p_6ch_avx
);
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLTP
,
AV_SAMPLE_FMT_S16
,
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLTP
,
AV_SAMPLE_FMT_S16
,
2
,
16
,
8
,
"AVX"
,
ff_conv_s16_to_fltp_2ch_avx
);
2
,
16
,
8
,
"AVX"
,
ff_conv_s16_to_fltp_2ch_avx
);
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLTP
,
AV_SAMPLE_FMT_S16
,
6
,
16
,
4
,
"AVX"
,
ff_conv_s16_to_fltp_6ch_avx
);
}
}
#endif
#endif
}
}
libavutil/x86/x86util.asm
View file @
6092dafb
...
@@ -637,3 +637,15 @@
...
@@ -637,3 +637,15 @@
%
rotate
1
%
rotate
1
%
endrep
%
endrep
%endmacro
%endmacro
%macro
PMOVSXWD
2
; dst, src
%if
cpuflag
(
sse4
)
pmovsxwd
%1
,
%2
%else
%
ifnidn
%1
,
%2
mova
%1
,
%2
%
endif
punpcklwd
%1
,
%1
psrad
%1
,
16
%endif
%endmacro
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment