Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
f1a9eee4
Commit
f1a9eee4
authored
Jun 11, 2016
by
Martin Storsjö
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: Add missing movsxd for the int stride parameter
Signed-off-by:
Martin Storsjö
<
martin@martin.st
>
parent
a2ddfadc
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
0 deletions
+29
-0
h264_idct.asm
libavcodec/x86/h264_idct.asm
+21
-0
h264_idct_10bit.asm
libavcodec/x86/h264_idct_10bit.asm
+8
-0
No files found.
libavcodec/x86/h264_idct.asm
View file @
f1a9eee4
...
...
@@ -82,6 +82,7 @@ SECTION .text
INIT_MMX
mmx
; void ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_add_8
,
3
,
3
,
0
movsxdifnidn
r2
,
r2d
IDCT4_ADD
r0
,
r1
,
r2
RET
...
...
@@ -204,6 +205,7 @@ cglobal h264_idct_add_8, 3, 3, 0
INIT_MMX
mmx
; void ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_8
,
3
,
4
,
0
movsxdifnidn
r2
,
r2d
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -272,6 +274,7 @@ cglobal h264_idct8_add_8, 3, 4, 0
INIT_XMM
sse2
; void ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_8
,
3
,
4
,
10
movsxdifnidn
r2
,
r2d
IDCT8_ADD_SSE
r0
,
r1
,
r2
,
r3
RET
...
...
@@ -310,6 +313,7 @@ INIT_MMX mmxext
; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
%if
ARCH_X86_64
cglobal
h264_idct_dc_add_8
,
3
,
4
,
0
movsxd
r2
,
r2d
movsx
r3
,
word
[r1]
mov
dword
[r1],
0
DC_ADD_MMXEXT_INIT
r3
,
r2
...
...
@@ -318,6 +322,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_8
,
3
,
4
,
0
movsxd
r2
,
r2d
movsx
r3
,
word
[r1]
mov
dword
[r1],
0
DC_ADD_MMXEXT_INIT
r3
,
r2
...
...
@@ -352,6 +357,7 @@ INIT_MMX mmx
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
movsxdifnidn
r3
,
r3d
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -375,6 +381,7 @@ cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct8_add4_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
movsxdifnidn
r3
,
r3d
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -409,6 +416,7 @@ INIT_MMX mmxext
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -456,6 +464,7 @@ INIT_MMX mmx
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16intra_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
movsxdifnidn
r3
,
r3d
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -481,6 +490,7 @@ INIT_MMX mmxext
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16intra_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -525,6 +535,7 @@ cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, s
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct8_add4_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -587,6 +598,7 @@ INIT_XMM sse2
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct8_add4_8
,
5
,
8
+
npicregs
,
10
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -638,6 +650,7 @@ INIT_XMM cpuname
INIT_MMX
mmx
h264_idct_add8_mmx_plane
:
movsxdifnidn
r3
,
r3d
.
nextblock
:
movzx
r6
,
byte
[
scan8
+
r5
]
movzx
r6
,
byte
[
r4
+
r6
]
...
...
@@ -664,6 +677,7 @@ h264_idct_add8_mmx_plane:
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add8_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
mov
r5
,
16
add
r2
,
512
%ifdef
PIC
...
...
@@ -684,6 +698,7 @@ cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride,
RET
h264_idct_add8_mmxext_plane
:
movsxdifnidn
r3
,
r3d
.
nextblock
:
movzx
r6
,
byte
[
scan8
+
r5
]
movzx
r6
,
byte
[
r4
+
r6
]
...
...
@@ -730,6 +745,7 @@ INIT_MMX mmxext
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add8_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
mov
r5
,
16
add
r2
,
512
%if
ARCH_X86_64
...
...
@@ -751,6 +767,7 @@ cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride,
; r0 = uint8_t *dst, r2 = int16_t *block, r3 = int stride, r6=clobbered
h264_idct_dc_add8_mmxext
:
movsxdifnidn
r3
,
r3d
movd
m0
,
[
r2
]
; 0 0 X D
mov
word
[
r2
+
0
]
,
0
punpcklwd
m0
,
[
r2
+
32
]
; x X d D
...
...
@@ -771,6 +788,7 @@ ALIGN 16
INIT_XMM
sse2
; r0 = uint8_t *dst (clobbered), r2 = int16_t *block, r3 = int stride
h264_add8x4_idct_sse2
:
movsxdifnidn
r3
,
r3d
movq
m0
,
[
r2
+
0
]
movq
m1
,
[
r2
+
8
]
movq
m2
,
[
r2
+
16
]
...
...
@@ -814,6 +832,7 @@ h264_add8x4_idct_sse2:
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
5
+
ARCH_X86_64
,
8
movsxdifnidn
r3
,
r3d
%if
ARCH_X86_64
mov
r5
,
r0
%endif
...
...
@@ -862,6 +881,7 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16intra_8
,
5
,
7
+
ARCH_X86_64
,
8
movsxdifnidn
r3
,
r3d
%if
ARCH_X86_64
mov
r7
,
r0
%endif
...
...
@@ -914,6 +934,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add8_8
,
5
,
7
+
ARCH_X86_64
,
8
movsxdifnidn
r3
,
r3d
add
r2
,
512
%if
ARCH_X86_64
mov
r7
,
r0
...
...
libavcodec/x86/h264_idct_10bit.asm
View file @
f1a9eee4
...
...
@@ -77,6 +77,7 @@ SECTION .text
%macro
IDCT_ADD_10
0
cglobal
h264_idct_add_10
,
3
,
3
movsxdifnidn
r2
,
r2d
IDCT4_ADD_10
r0
,
r1
,
r2
RET
%endmacro
...
...
@@ -134,6 +135,7 @@ ADD4x4IDCT
%macro
IDCT_ADD16_10
0
cglobal
h264_idct_add16_10
,
5
,
6
movsxdifnidn
r3
,
r3d
ADD16_OP
0
,
4
+
1
*
8
ADD16_OP
1
,
5
+
1
*
8
ADD16_OP
2
,
4
+
2
*
8
...
...
@@ -190,6 +192,7 @@ IDCT_ADD16_10
INIT_MMX
mmxext
cglobal
h264_idct_dc_add_10
,
3
,
3
movsxdifnidn
r2
,
r2d
movd
m0
,
[r1]
mov
dword
[r1],
0
paddd
m0
,
[
pd_32
]
...
...
@@ -205,6 +208,7 @@ cglobal h264_idct_dc_add_10,3,3
;-----------------------------------------------------------------------------
%macro
IDCT8_DC_ADD
0
cglobal
h264_idct8_dc_add_10
,
3
,
4
,
7
movsxdifnidn
r2
,
r2d
movd
m0
,
[r1]
mov
dword
[r1],
0
paddd
m0
,
[
pd_32
]
...
...
@@ -272,6 +276,7 @@ idct_dc_add %+ SUFFIX:
ret
cglobal
h264_idct_add16intra_10
,
5
,
7
,
8
movsxdifnidn
r3
,
r3d
ADD16_OP_INTRA
0
,
4
+
1
*
8
ADD16_OP_INTRA
2
,
4
+
2
*
8
ADD16_OP_INTRA
4
,
6
+
1
*
8
...
...
@@ -304,6 +309,7 @@ IDCT_ADD16INTRA_10
;-----------------------------------------------------------------------------
%macro
IDCT_ADD8
0
cglobal
h264_idct_add8_10
,
5
,
8
,
7
movsxdifnidn
r3
,
r3d
%if
ARCH_X86_64
mov
r7
,
r0
%endif
...
...
@@ -438,6 +444,7 @@ IDCT_ADD8
%macro
IDCT8_ADD
0
cglobal
h264_idct8_add_10
,
3
,
4
,
16
movsxdifnidn
r2
,
r2d
%if
UNIX64
==
0
%
assign
pad
16
-
gprsize
-
(
stack_offset
&
15
)
sub
rsp
,
pad
...
...
@@ -560,6 +567,7 @@ IDCT8_ADD
%macro
IDCT8_ADD4
0
cglobal
h264_idct8_add4_10
,
0
,
7
,
16
movsxdifnidn
r3
,
r3d
%
assign
pad
16
-
gprsize
-
(
stack_offset
&
15
)
SUB
rsp
,
pad
mov
r5
,
r0mp
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment