Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
8bb59e67
Commit
8bb59e67
authored
Jun 12, 2017
by
James Almer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86/aacpsdsp: add ff_ps_hybrid_analysis_ileave_sse
About 2x faster than the c version.
parent
e229df94
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
109 additions
and
0 deletions
+109
-0
aacpsdsp.asm
libavcodec/x86/aacpsdsp.asm
+106
-0
aacpsdsp_init.c
libavcodec/x86/aacpsdsp_init.c
+3
-0
No files found.
libavcodec/x86/aacpsdsp.asm
View file @
8bb59e67
...
...
@@ -166,6 +166,112 @@ align 16
jl
.
loop
REP_RET
;**********************************************************
;void ps_hybrid_analysis_ileave_sse(float out[2][38][64],
; float (*in)[32][2],
; int i, int len)
;**********************************************************
INIT_XMM
sse
cglobal
ps_hybrid_analysis_ileave
,
3
,
7
,
5
,
out
,
in
,
i
,
len
,
in0
,
in1
,
tmp
movsxdifnidn
iq
,
id
mov
lend
,
32
<<
3
lea
inq
,
[
inq
+
iq
*
4
]
mov
tmpd
,
id
shl
tmpd
,
8
add
outq
,
tmpq
mov
tmpd
,
64
sub
tmpd
,
id
mov
id
,
tmpd
test
id
,
1
jne
.
loop4
test
id
,
2
jne
.
loop8
align
16
.
loop16
:
mov
in0q
,
inq
mov
in1q
,
38
*
64
*
4
add
in1q
,
in0q
mov
tmpd
,
lend
.
inner_loop16
:
movaps
m0
,
[in0q]
movaps
m1
,
[in1q]
movaps
m2
,
[
in0q
+
lenq
]
movaps
m3
,
[
in1q
+
lenq
]
TRANSPOSE4x4PS
0
,
1
,
2
,
3
,
4
movaps
[outq],
m0
movaps
[
outq
+
lenq
]
,
m1
movaps
[
outq
+
lenq
*
2
]
,
m2
movaps
[
outq
+
3
*
32
*
2
*
4
]
,
m3
lea
in0q
,
[
in0q
+
lenq
*
2
]
lea
in1q
,
[
in1q
+
lenq
*
2
]
add
outq
,
mmsize
sub
tmpd
,
mmsize
jg
.
inner_loop16
add
inq
,
16
add
outq
,
3
*
32
*
2
*
4
sub
id
,
4
jg
.
loop16
RET
align
16
.
loop8
:
mov
in0q
,
inq
mov
in1q
,
38
*
64
*
4
add
in1q
,
in0q
mov
tmpd
,
lend
.
inner_loop8
:
movlps
m0
,
[in0q]
movlps
m1
,
[in1q]
movhps
m0
,
[
in0q
+
lenq
]
movhps
m1
,
[
in1q
+
lenq
]
SBUTTERFLYPS
0
,
1
,
2
SBUTTERFLYPD
0
,
1
,
2
movaps
[outq],
m0
movaps
[
outq
+
lenq
]
,
m1
lea
in0q
,
[
in0q
+
lenq
*
2
]
lea
in1q
,
[
in1q
+
lenq
*
2
]
add
outq
,
mmsize
sub
tmpd
,
mmsize
jg
.
inner_loop8
add
inq
,
8
add
outq
,
lenq
sub
id
,
2
jg
.
loop16
RET
align
16
.
loop4
:
mov
in0q
,
inq
mov
in1q
,
38
*
64
*
4
add
in1q
,
in0q
mov
tmpd
,
lend
.
inner_loop4
:
movss
m0
,
[in0q]
movss
m1
,
[in1q]
movss
m2
,
[
in0q
+
lenq
]
movss
m3
,
[
in1q
+
lenq
]
movlhps
m0
,
m1
movlhps
m2
,
m3
shufps
m0
,
m2
,
q2020
movaps
[outq],
m0
lea
in0q
,
[
in0q
+
lenq
*
2
]
lea
in1q
,
[
in1q
+
lenq
*
2
]
add
outq
,
mmsize
sub
tmpd
,
mmsize
jg
.
inner_loop4
add
inq
,
4
sub
id
,
1
test
id
,
2
jne
.
loop8
cmp
id
,
4
jge
.
loop16
RET
;***********************************************************
;void ps_hybrid_synthesis_deint_sse4(float out[2][38][64],
; float (*in)[32][2],
...
...
libavcodec/x86/aacpsdsp_init.c
View file @
8bb59e67
...
...
@@ -44,6 +44,8 @@ void ff_ps_hybrid_synthesis_deint_sse(float out[2][38][64], float (*in)[32][2],
int
i
,
int
len
);
void
ff_ps_hybrid_synthesis_deint_sse4
(
float
out
[
2
][
38
][
64
],
float
(
*
in
)[
32
][
2
],
int
i
,
int
len
);
void
ff_ps_hybrid_analysis_ileave_sse
(
float
(
*
out
)[
32
][
2
],
float
L
[
2
][
38
][
64
],
int
i
,
int
len
);
av_cold
void
ff_psdsp_init_x86
(
PSDSPContext
*
s
)
{
...
...
@@ -52,6 +54,7 @@ av_cold void ff_psdsp_init_x86(PSDSPContext *s)
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
s
->
add_squares
=
ff_ps_add_squares_sse
;
s
->
mul_pair_single
=
ff_ps_mul_pair_single_sse
;
s
->
hybrid_analysis_ileave
=
ff_ps_hybrid_analysis_ileave_sse
;
s
->
hybrid_synthesis_deint
=
ff_ps_hybrid_synthesis_deint_sse
;
s
->
hybrid_analysis
=
ff_ps_hybrid_analysis_sse
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment