Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
6405ca7d
Commit
6405ca7d
authored
Oct 06, 2013
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: h264_idct: Update comments to match 8/10-bit depth optimization split
parent
3e2fa991
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
42 additions
and
32 deletions
+42
-32
h264_idct.asm
libavcodec/x86/h264_idct.asm
+42
-32
No files found.
libavcodec/x86/h264_idct.asm
View file @
6405ca7d
...
...
@@ -80,7 +80,7 @@ SECTION .text
%endmacro
INIT_MMX
mmx
; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct_add_
8_
mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_add_8
,
3
,
3
,
0
IDCT4_ADD
r0
,
r1
,
r2
RET
...
...
@@ -202,7 +202,7 @@ cglobal h264_idct_add_8, 3, 3, 0
%endmacro
INIT_MMX
mmx
; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct8_add_
8_
mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_8
,
3
,
4
,
0
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -270,7 +270,7 @@ cglobal h264_idct8_add_8, 3, 4, 0
%endmacro
INIT_XMM
sse2
; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct8_add_
8_
sse2(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_8
,
3
,
4
,
10
IDCT8_ADD_SSE
r0
,
r1
,
r2
,
r3
RET
...
...
@@ -307,7 +307,7 @@ cglobal h264_idct8_add_8, 3, 4, 10
%endmacro
INIT_MMX
mmxext
; ff_h264_idct_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct_dc_add_
8_
mmxext(uint8_t *dst, int16_t *block, int stride)
%if
ARCH_X86_64
cglobal
h264_idct_dc_add_8
,
3
,
4
,
0
movsx
r3
,
word
[r1]
...
...
@@ -316,7 +316,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
DC_ADD_MMXEXT_OP
movh
,
r0
,
r2
,
r3
RET
; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct8_dc_add_
8_
mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_8
,
3
,
4
,
0
movsx
r3
,
word
[r1]
mov
dword
[r1],
0
...
...
@@ -326,6 +326,7 @@ cglobal h264_idct8_dc_add_8, 3, 4, 0
DC_ADD_MMXEXT_OP
mova
,
r0
,
r2
,
r3
RET
%else
; ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_dc_add_8
,
2
,
3
,
0
movsx
r2
,
word
[r1]
mov
dword
[r1],
0
...
...
@@ -334,7 +335,7 @@ cglobal h264_idct_dc_add_8, 2, 3, 0
DC_ADD_MMXEXT_OP
movh
,
r0
,
r1
,
r2
RET
; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct8_dc_add_
8_
mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_8
,
2
,
3
,
0
movsx
r2
,
word
[r1]
mov
dword
[r1],
0
...
...
@@ -347,8 +348,9 @@ cglobal h264_idct8_dc_add_8, 2, 3, 0
%endif
INIT_MMX
mmx
; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; ff_h264_idct_add16_8_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
xor
r5
,
r5
%ifdef
PIC
...
...
@@ -369,8 +371,9 @@ cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
jl
.
nextblock
REP_RET
; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; ff_h264_idct8_add4_8_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct8_add4_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -402,8 +405,9 @@ cglobal h264_idct8_add4_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
RET
INIT_MMX
mmxext
; ff_h264_idct_add16_mmxext(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; ff_h264_idct_add16_8_mmxext(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
xor
r5
,
r5
%ifdef
PIC
...
...
@@ -448,8 +452,9 @@ cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
REP_RET
INIT_MMX
mmx
; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; ff_h264_idct_add16intra_8_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16intra_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
xor
r5
,
r5
%ifdef
PIC
...
...
@@ -472,9 +477,9 @@ cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, st
REP_RET
INIT_MMX
mmxext
; ff_h264_idct_add16intra_mmxext(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
;
const uint8_t nnzc[6*
8])
; ff_h264_idct_add16intra_
8_
mmxext(uint8_t *dst, const int *block_offset,
;
int16_t *block, int stride,
;
const uint8_t nnzc[6 *
8])
cglobal
h264_idct_add16intra_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
xor
r5
,
r5
%ifdef
PIC
...
...
@@ -516,9 +521,9 @@ cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, s
jl
.
nextblock
REP_RET
; ff_h264_idct8_add4_mmxext(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
;
const uint8_t nnzc[6*
8])
; ff_h264_idct8_add4_
8_
mmxext(uint8_t *dst, const int *block_offset,
;
int16_t *block, int stride,
;
const uint8_t nnzc[6 *
8])
cglobal
h264_idct8_add4_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -578,8 +583,9 @@ cglobal h264_idct8_add4_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
RET
INIT_XMM
sse2
; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; ff_h264_idct8_add4_8_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct8_add4_8
,
5
,
8
+
npicregs
,
10
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
xor
r5
,
r5
%ifdef
PIC
...
...
@@ -654,8 +660,8 @@ h264_idct_add8_mmx_plane:
jnz
.
nextblock
rep
ret
; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
;
int16_t *block, int stride, const uint8_t nnzc[6*
8])
; ff_h264_idct_add8_
8_
mmx(uint8_t **dest, const int *block_offset,
;
int16_t *block, int stride, const uint8_t nnzc[6 *
8])
cglobal
h264_idct_add8_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
mov
r5
,
16
add
r2
,
512
...
...
@@ -719,8 +725,9 @@ h264_idct_add8_mmxext_plane:
rep
ret
INIT_MMX
mmxext
; ff_h264_idct_add8_mmxext(uint8_t **dest, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; ff_h264_idct_add8_8_mmxext(uint8_t **dest, const int *block_offset,
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add8_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
mov
r5
,
16
add
r2
,
512
...
...
@@ -802,8 +809,9 @@ h264_add8x4_idct_sse2:
%endif
%endmacro
; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; ff_h264_idct_add16_8_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
5
+
ARCH_X86_64
,
8
%if
ARCH_X86_64
mov
r5
,
r0
...
...
@@ -849,8 +857,9 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
%endif
%endmacro
; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; ff_h264_idct_add16intra_8_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16intra_8
,
5
,
7
+
ARCH_X86_64
,
8
%if
ARCH_X86_64
mov
r7
,
r0
...
...
@@ -900,8 +909,9 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
%endif
%endmacro
; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; ff_h264_idct_add8_8_sse2(uint8_t **dest, const int *block_offset,
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add8_8
,
5
,
7
+
ARCH_X86_64
,
8
add
r2
,
512
%if
ARCH_X86_64
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment