Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
57b5b84e
Commit
57b5b84e
authored
Mar 26, 2014
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: dsputil: Move ff_apply_window_int16_* bits to ac3dsp, where they belong
parent
c2c5be57
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
131 additions
and
130 deletions
+131
-130
ac3dsp.asm
libavcodec/x86/ac3dsp.asm
+131
-0
dsputil.asm
libavcodec/x86/dsputil.asm
+0
-130
No files found.
libavcodec/x86/ac3dsp.asm
View file @
57b5b84e
...
...
@@ -35,6 +35,10 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
pd_1
:
times
4
dd
1
pd_151
:
times
4
dd
151
; used in ff_apply_window_int16()
pb_revwords
:
SHUFFLE_MASK_W
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
pd_16384
:
times
4
dd
16384
SECTION
.
text
;-----------------------------------------------------------------------------
...
...
@@ -419,3 +423,130 @@ AC3_EXTRACT_EXPONENTS
INIT_XMM
ssse3
AC3_EXTRACT_EXPONENTS
%endif
;-----------------------------------------------------------------------------
; void ff_apply_window_int16(int16_t *output, const int16_t *input,
; const int16_t *window, unsigned int len)
;-----------------------------------------------------------------------------
%macro
REVERSE_WORDS
1
-
2
%if
cpuflag
(
ssse3
)
&&
notcpuflag
(
atom
)
pshufb
%1
,
%2
%elif
cpuflag
(
sse2
)
pshuflw
%1
,
%1
,
0x1B
pshufhw
%1
,
%1
,
0x1B
pshufd
%1
,
%1
,
0x4E
%elif
cpuflag
(
mmxext
)
pshufw
%1
,
%1
,
0x1B
%endif
%endmacro
%macro
MUL16FIXED
3
%if
cpuflag
(
ssse3
)
; dst, src, unused
; dst = ((dst * src) + (1<<14)) >> 15
pmulhrsw
%1
,
%2
%elif
cpuflag
(
mmxext
)
; dst, src, temp
; dst = (dst * src) >> 15
; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back
; in from the pmullw result.
mova
%3
,
%1
pmulhw
%1
,
%2
pmullw
%3
,
%2
psrlw
%3
,
15
psllw
%1
,
1
por
%1
,
%3
%endif
%endmacro
%macro
APPLY_WINDOW_INT16
1
; %1 bitexact version
%if
%1
cglobal
apply_window_int16
,
4
,
5
,
6
,
output
,
input
,
window
,
offset
,
offset2
%else
cglobal
apply_window_int16_round
,
4
,
5
,
6
,
output
,
input
,
window
,
offset
,
offset2
%endif
lea
offset2q
,
[
offsetq
-
mmsize
]
%if
cpuflag
(
ssse3
)
&&
notcpuflag
(
atom
)
mova
m5
,
[
pb_revwords
]
ALIGN
16
%elif
%1
mova
m5
,
[
pd_16384
]
%endif
.
loop
:
%if
cpuflag
(
ssse3
)
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The ssse3 version is bit-identical.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
pmulhrsw
m1
,
m0
REVERSE_WORDS
m0
,
m5
pmulhrsw
m0
,
[
inputq
+
offsetq
]
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m0
%elif
%1
; This version expands 16-bit to 32-bit, multiplies by the window,
; adds 16384 for rounding, right shifts 15, then repacks back to words to
; save to the output. The window is reversed for the second half.
mova
m3
,
[
windowq
+
offset2q
]
mova
m4
,
[
inputq
+
offset2q
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offset2q
]
,
m0
REVERSE_WORDS
m3
mova
m4
,
[
inputq
+
offsetq
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offsetq
]
,
m0
%else
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The mmxext and sse2 versions do not use rounding, and
; therefore are not bit-identical to the C version.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
mova
m2
,
[
inputq
+
offsetq
]
MUL16FIXED
m1
,
m0
,
m3
REVERSE_WORDS
m0
MUL16FIXED
m2
,
m0
,
m3
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m2
%endif
add
offsetd
,
mmsize
sub
offset2d
,
mmsize
jae
.
loop
REP_RET
%endmacro
INIT_MMX
mmxext
APPLY_WINDOW_INT16
0
INIT_XMM
sse2
APPLY_WINDOW_INT16
0
INIT_MMX
mmxext
APPLY_WINDOW_INT16
1
INIT_XMM
sse2
APPLY_WINDOW_INT16
1
INIT_XMM
ssse3
APPLY_WINDOW_INT16
1
INIT_XMM
ssse3
,
atom
APPLY_WINDOW_INT16
1
libavcodec/x86/dsputil.asm
View file @
57b5b84e
...
...
@@ -27,8 +27,6 @@ pb_zzzzzzzz77777777: times 8 db -1
pb_7
:
times
8
db
7
pb_zzzz3333zzzzbbbb
:
db
-
1
,
-
1
,
-
1
,
-
1
,
3
,
3
,
3
,
3
,
-
1
,
-
1
,
-
1
,
-
1
,
11
,
11
,
11
,
11
pb_zz11zz55zz99zzdd
:
db
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
,
5
,
5
,
-
1
,
-
1
,
9
,
9
,
-
1
,
-
1
,
13
,
13
pb_revwords
:
SHUFFLE_MASK_W
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
pd_16384
:
times
4
dd
16384
pb_bswap32
:
db
3
,
2
,
1
,
0
,
7
,
6
,
5
,
4
,
11
,
10
,
9
,
8
,
15
,
14
,
13
,
12
SECTION_TEXT
...
...
@@ -205,134 +203,6 @@ SCALARPRODUCT_LOOP 0
RET
;-----------------------------------------------------------------------------
; void ff_apply_window_int16(int16_t *output, const int16_t *input,
; const int16_t *window, unsigned int len)
;-----------------------------------------------------------------------------
%macro
REVERSE_WORDS
1
-
2
%if
cpuflag
(
ssse3
)
&&
notcpuflag
(
atom
)
pshufb
%1
,
%2
%elif
cpuflag
(
sse2
)
pshuflw
%1
,
%1
,
0x1B
pshufhw
%1
,
%1
,
0x1B
pshufd
%1
,
%1
,
0x4E
%elif
cpuflag
(
mmxext
)
pshufw
%1
,
%1
,
0x1B
%endif
%endmacro
%macro
MUL16FIXED
3
%if
cpuflag
(
ssse3
)
; dst, src, unused
; dst = ((dst * src) + (1<<14)) >> 15
pmulhrsw
%1
,
%2
%elif
cpuflag
(
mmxext
)
; dst, src, temp
; dst = (dst * src) >> 15
; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back
; in from the pmullw result.
mova
%3
,
%1
pmulhw
%1
,
%2
pmullw
%3
,
%2
psrlw
%3
,
15
psllw
%1
,
1
por
%1
,
%3
%endif
%endmacro
%macro
APPLY_WINDOW_INT16
1
; %1 bitexact version
%if
%1
cglobal
apply_window_int16
,
4
,
5
,
6
,
output
,
input
,
window
,
offset
,
offset2
%else
cglobal
apply_window_int16_round
,
4
,
5
,
6
,
output
,
input
,
window
,
offset
,
offset2
%endif
lea
offset2q
,
[
offsetq
-
mmsize
]
%if
cpuflag
(
ssse3
)
&&
notcpuflag
(
atom
)
mova
m5
,
[
pb_revwords
]
ALIGN
16
%elif
%1
mova
m5
,
[
pd_16384
]
%endif
.
loop
:
%if
cpuflag
(
ssse3
)
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The ssse3 version is bit-identical.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
pmulhrsw
m1
,
m0
REVERSE_WORDS
m0
,
m5
pmulhrsw
m0
,
[
inputq
+
offsetq
]
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m0
%elif
%1
; This version expands 16-bit to 32-bit, multiplies by the window,
; adds 16384 for rounding, right shifts 15, then repacks back to words to
; save to the output. The window is reversed for the second half.
mova
m3
,
[
windowq
+
offset2q
]
mova
m4
,
[
inputq
+
offset2q
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offset2q
]
,
m0
REVERSE_WORDS
m3
mova
m4
,
[
inputq
+
offsetq
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offsetq
]
,
m0
%else
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The mmxext and sse2 versions do not use rounding, and
; therefore are not bit-identical to the C version.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
mova
m2
,
[
inputq
+
offsetq
]
MUL16FIXED
m1
,
m0
,
m3
REVERSE_WORDS
m0
MUL16FIXED
m2
,
m0
,
m3
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m2
%endif
add
offsetd
,
mmsize
sub
offset2d
,
mmsize
jae
.
loop
REP_RET
%endmacro
INIT_MMX
mmxext
APPLY_WINDOW_INT16
0
INIT_XMM
sse2
APPLY_WINDOW_INT16
0
INIT_MMX
mmxext
APPLY_WINDOW_INT16
1
INIT_XMM
sse2
APPLY_WINDOW_INT16
1
INIT_XMM
ssse3
APPLY_WINDOW_INT16
1
INIT_XMM
ssse3
,
atom
APPLY_WINDOW_INT16
1
; void ff_add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top,
; const uint8_t *diff, int w,
; int *left, int *left_top)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment