Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
a5bbb124
Commit
a5bbb124
authored
Jul 28, 2012
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
h264_loopfilter: port x86 simd to cpuflags.
parent
23565c26
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
120 additions
and
121 deletions
+120
-121
h264_deblock.asm
libavcodec/x86/h264_deblock.asm
+52
-52
h264_deblock_10bit.asm
libavcodec/x86/h264_deblock_10bit.asm
+38
-39
h264dsp_mmx.c
libavcodec/x86/h264dsp_mmx.c
+30
-30
No files found.
libavcodec/x86/h264_deblock.asm
View file @
a5bbb124
...
...
@@ -282,8 +282,8 @@ cextern pb_A1
;-----------------------------------------------------------------------------
; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
%macro
DEBLOCK_LUMA
1
cglobal
deblock_v_luma_8
_
%1
,
5
,
5
,
10
%macro
DEBLOCK_LUMA
0
cglobal
deblock_v_luma_8
,
5
,
5
,
10
movd
m8
,
[r4]
; tc0
lea
r4
,
[
r1
*
3
]
dec
r2d
; alpha-1
...
...
@@ -327,8 +327,8 @@ cglobal deblock_v_luma_8_%1, 5,5,10
;-----------------------------------------------------------------------------
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
INIT_MMX
cglobal
deblock_h_luma_8
_
%1
,
5
,
9
INIT_MMX
cpuname
cglobal
deblock_h_luma_8
,
5
,
9
movsxd
r7
,
r1d
lea
r8
,
[
r7
+
r7
*
2
]
lea
r6
,
[
r0
-
4
]
...
...
@@ -355,7 +355,7 @@ cglobal deblock_h_luma_8_%1, 5,9
%if
WIN64
mov
[
rsp
+
0x20
]
,
r4
%endif
call
deblock_v_luma_8
_
%1
call
deblock_v_luma_8
; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter)
add
r6
,
2
...
...
@@ -384,24 +384,24 @@ cglobal deblock_h_luma_8_%1, 5,9
RET
%endmacro
INIT_XMM
DEBLOCK_LUMA
sse2
INIT_
AVX
DEBLOCK_LUMA
avx
INIT_XMM
sse2
DEBLOCK_LUMA
INIT_
XMM
avx
DEBLOCK_LUMA
%else
%macro
DEBLOCK_LUMA
3
%macro
DEBLOCK_LUMA
2
;-----------------------------------------------------------------------------
; void deblock_v8_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal
deblock_
%
2
_luma_8_
%1
,
5
,
5
cglobal
deblock_
%
1
_luma_8
,
5
,
5
lea
r4
,
[
r1
*
3
]
dec
r2
; alpha-1
neg
r4
dec
r3
; beta-1
add
r4
,
r0
; pix-3*stride
%
assign
pad
2
*
%
3
+
12
-
(
stack_offset
&
15
)
%
assign
pad
2
*
%
2
+
12
-
(
stack_offset
&
15
)
SUB
esp
,
pad
mova
m0
,
[
r4
+
r1
]
; p1
...
...
@@ -415,7 +415,7 @@ cglobal deblock_%2_luma_8_%1, 5,5
movd
m4
,
[r3]
; tc0
punpcklbw
m4
,
m4
punpcklbw
m4
,
m4
; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0]
mova
[
esp
+
%
3
]
,
m4
; tc
mova
[
esp
+
%
2
]
,
m4
; tc
pcmpgtb
m4
,
m3
mova
m3
,
[r4]
; p2
pand
m4
,
m7
...
...
@@ -423,7 +423,7 @@ cglobal deblock_%2_luma_8_%1, 5,5
DIFF_GT2
m1
,
m3
,
m5
,
m6
,
m7
; |p2-p0| > beta-1
pand
m6
,
m4
pand
m4
,
[
esp
+
%
3
]
; tc
pand
m4
,
[
esp
+
%
2
]
; tc
psubb
m7
,
m4
,
m6
pand
m6
,
m4
LUMA_Q1
m0
,
m3
,
[r4],
[
r4
+
r1
]
,
m6
,
m4
...
...
@@ -431,7 +431,7 @@ cglobal deblock_%2_luma_8_%1, 5,5
mova
m4
,
[
r0
+
2
*
r1
]
; q2
DIFF_GT2
m2
,
m4
,
m5
,
m6
,
m3
; |q2-q0| > beta-1
pand
m6
,
[esp]
; mask
mova
m5
,
[
esp
+
%
3
]
; tc
mova
m5
,
[
esp
+
%
2
]
; tc
psubb
m7
,
m6
pand
m5
,
m6
mova
m3
,
[
r0
+
r1
]
...
...
@@ -446,8 +446,8 @@ cglobal deblock_%2_luma_8_%1, 5,5
;-----------------------------------------------------------------------------
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
INIT_MMX
cglobal
deblock_h_luma_8
_
%1
,
0
,
5
INIT_MMX
cpuname
cglobal
deblock_h_luma_8
,
0
,
5
mov
r0
,
r0mp
mov
r3
,
r1m
lea
r4
,
[
r3
*
3
]
...
...
@@ -470,11 +470,11 @@ cglobal deblock_h_luma_8_%1, 0,5
PUSH
dword
r2m
PUSH
dword
16
PUSH
dword
r0
call
deblock_
%
2
_luma_8_
%1
%ifidn
%
2
,
v8
call
deblock_
%
1
_luma_8
%ifidn
%
1
,
v8
add
dword
[
esp
]
,
8
; pix_tmp+0x38
add
dword
[
esp
+
16
]
,
2
; tc0+2
call
deblock_
%
2
_luma_8_
%1
call
deblock_
%
1
_luma_8
%endif
ADD
esp
,
20
...
...
@@ -501,12 +501,12 @@ cglobal deblock_h_luma_8_%1, 0,5
RET
%endmacro
; DEBLOCK_LUMA
INIT_MMX
DEBLOCK_LUMA
mmxext
,
v8
,
8
INIT_XMM
DEBLOCK_LUMA
sse2
,
v
,
16
INIT_
AVX
DEBLOCK_LUMA
avx
,
v
,
16
INIT_MMX
mmx2
DEBLOCK_LUMA
v8
,
8
INIT_XMM
sse2
DEBLOCK_LUMA
v
,
16
INIT_
XMM
avx
DEBLOCK_LUMA
v
,
16
%endif
; ARCH
...
...
@@ -608,7 +608,7 @@ DEBLOCK_LUMA avx, v, 16
%
define
mask1p
mask1q
%endmacro
%macro
DEBLOCK_LUMA_INTRA
2
%macro
DEBLOCK_LUMA_INTRA
1
%
define
p1
m0
%
define
p0
m1
%
define
q0
m2
...
...
@@ -643,7 +643,7 @@ DEBLOCK_LUMA avx, v, 16
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal
deblock_
%
2
_luma_intra_8_
%1
,
4
,
6
,
16
cglobal
deblock_
%
1
_luma_intra_8
,
4
,
6
,
16
%if
ARCH_X86_64
==
0
sub
esp
,
0x60
%endif
...
...
@@ -700,12 +700,12 @@ cglobal deblock_%2_luma_intra_8_%1, 4,6,16
%endif
RET
INIT_MMX
INIT_MMX
cpuname
%if
ARCH_X86_64
;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal
deblock_h_luma_intra_8
_
%1
,
4
,
9
cglobal
deblock_h_luma_intra_8
,
4
,
9
movsxd
r7
,
r1d
lea
r8
,
[
r7
*
3
]
lea
r6
,
[
r0
-
4
]
...
...
@@ -721,7 +721,7 @@ cglobal deblock_h_luma_intra_8_%1, 4,9
lea
r0
,
[
pix_tmp
+
0x40
]
mov
r1
,
0x10
call
deblock_v_luma_intra_8
_
%1
call
deblock_v_luma_intra_8
; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8)
lea
r5
,
[
r6
+
r8
]
...
...
@@ -734,7 +734,7 @@ cglobal deblock_h_luma_intra_8_%1, 4,9
add
rsp
,
0x88
RET
%else
cglobal
deblock_h_luma_intra_8
_
%1
,
2
,
4
cglobal
deblock_h_luma_intra_8
,
2
,
4
lea
r3
,
[
r1
*
3
]
sub
r0
,
4
lea
r2
,
[
r0
+
r3
]
...
...
@@ -753,10 +753,10 @@ cglobal deblock_h_luma_intra_8_%1, 2,4
PUSH
dword
r2m
PUSH
dword
16
PUSH
r0
call
deblock_
%
2
_luma_intra_8_
%1
%ifidn
%
2
,
v8
call
deblock_
%
1
_luma_intra_8
%ifidn
%
1
,
v8
add
dword
[rsp],
8
; pix_tmp+8
call
deblock_
%
2
_luma_intra_8_
%1
call
deblock_
%
1
_luma_intra_8
%endif
ADD
esp
,
16
...
...
@@ -775,16 +775,16 @@ cglobal deblock_h_luma_intra_8_%1, 2,4
%endif
; ARCH_X86_64
%endmacro
; DEBLOCK_LUMA_INTRA
INIT_XMM
DEBLOCK_LUMA_INTRA
sse2
,
v
INIT_
AVX
DEBLOCK_LUMA_INTRA
avx
,
v
INIT_XMM
sse2
DEBLOCK_LUMA_INTRA
v
INIT_
XMM
avx
DEBLOCK_LUMA_INTRA
v
%if
ARCH_X86_64
==
0
INIT_MMX
DEBLOCK_LUMA_INTRA
mmxext
,
v8
INIT_MMX
mmx2
DEBLOCK_LUMA_INTRA
v8
%endif
INIT_MMX
INIT_MMX
mmx2
%macro
CHROMA_V_START
0
dec
r2d
; alpha-1
...
...
@@ -809,13 +809,13 @@ INIT_MMX
;-----------------------------------------------------------------------------
; void ff_deblock_v_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal
deblock_v_chroma_8
_mmxext
,
5
,
6
cglobal
deblock_v_chroma_8
,
5
,
6
CHROMA_V_START
movq
m0
,
[t5]
movq
m1
,
[
t5
+
r1
]
movq
m2
,
[r0]
movq
m3
,
[
r0
+
r1
]
call
ff_chroma_inter_body_mmx
ext
call
ff_chroma_inter_body_mmx
2
movq
[
t5
+
r1
]
,
m1
movq
[r0],
m2
RET
...
...
@@ -823,7 +823,7 @@ cglobal deblock_v_chroma_8_mmxext, 5,6
;-----------------------------------------------------------------------------
; void ff_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal
deblock_h_chroma_8
_mmxext
,
5
,
7
cglobal
deblock_h_chroma_8
,
5
,
7
%if
UNIX64
%
define
buf0
[
rsp
-
24
]
%
define
buf1
[
rsp
-
16
]
...
...
@@ -839,7 +839,7 @@ cglobal deblock_h_chroma_8_mmxext, 5,7
TRANSPOSE4x8_LOAD
bw
,
wd
,
dq
,
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
movq
buf0
,
m0
movq
buf1
,
m3
call
ff_chroma_inter_body_mmx
ext
call
ff_chroma_inter_body_mmx
2
movq
m0
,
buf0
movq
m3
,
buf1
TRANSPOSE8x4B_STORE
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
...
...
@@ -849,7 +849,7 @@ cglobal deblock_h_chroma_8_mmxext, 5,7
RET
ALIGN
16
ff_chroma_inter_body_mmx
ext
:
ff_chroma_inter_body_mmx
2
:
LOAD_MASK
r2d
,
r3d
movd
m6
,
[r4]
; tc0
punpcklbw
m6
,
m6
...
...
@@ -876,13 +876,13 @@ ff_chroma_inter_body_mmxext:
;-----------------------------------------------------------------------------
; void ff_deblock_v_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal
deblock_v_chroma_intra_8
_mmxext
,
4
,
5
cglobal
deblock_v_chroma_intra_8
,
4
,
5
CHROMA_V_START
movq
m0
,
[t5]
movq
m1
,
[
t5
+
r1
]
movq
m2
,
[r0]
movq
m3
,
[
r0
+
r1
]
call
ff_chroma_intra_body_mmx
ext
call
ff_chroma_intra_body_mmx
2
movq
[
t5
+
r1
]
,
m1
movq
[r0],
m2
RET
...
...
@@ -890,15 +890,15 @@ cglobal deblock_v_chroma_intra_8_mmxext, 4,5
;-----------------------------------------------------------------------------
; void ff_deblock_h_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal
deblock_h_chroma_intra_8
_mmxext
,
4
,
6
cglobal
deblock_h_chroma_intra_8
,
4
,
6
CHROMA_H_START
TRANSPOSE4x8_LOAD
bw
,
wd
,
dq
,
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
call
ff_chroma_intra_body_mmx
ext
call
ff_chroma_intra_body_mmx
2
TRANSPOSE8x4B_STORE
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
RET
ALIGN
16
ff_chroma_intra_body_mmx
ext
:
ff_chroma_intra_body_mmx
2
:
LOAD_MASK
r2d
,
r3d
movq
m5
,
m1
movq
m6
,
m2
...
...
libavcodec/x86/h264_deblock_10bit.asm
View file @
a5bbb124
...
...
@@ -151,11 +151,11 @@ cextern pw_4
%endif
%endmacro
%macro
DEBLOCK_LUMA
1
%macro
DEBLOCK_LUMA
0
;-----------------------------------------------------------------------------
; void deblock_v_luma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal
deblock_v_luma_10
_
%1
,
5
,
5
,
8
*
(
mmsize
/
16
)
cglobal
deblock_v_luma_10
,
5
,
5
,
8
*
(
mmsize
/
16
)
%
assign
pad
5
*
mmsize
+
12
-
(
stack_offset
&
15
)
%
define
tcm
[rsp]
%
define
ms1
[
rsp
+
mmsize
]
...
...
@@ -210,7 +210,7 @@ cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16)
ADD
rsp
,
pad
RET
cglobal
deblock_h_luma_10
_
%1
,
5
,
6
,
8
*
(
mmsize
/
16
)
cglobal
deblock_h_luma_10
,
5
,
6
,
8
*
(
mmsize
/
16
)
%
assign
pad
7
*
mmsize
+
12
-
(
stack_offset
&
15
)
%
define
tcm
[rsp]
%
define
ms1
[
rsp
+
mmsize
]
...
...
@@ -301,7 +301,6 @@ cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16)
RET
%endmacro
INIT_XMM
%if
ARCH_X86_64
; in: m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2
; m12=alpha, m13=beta
...
...
@@ -339,8 +338,8 @@ INIT_XMM
SWAP
3
,
9
%endmacro
%macro
DEBLOCK_LUMA_64
1
cglobal
deblock_v_luma_10
_
%1
,
5
,
5
,
15
%macro
DEBLOCK_LUMA_64
0
cglobal
deblock_v_luma_10
,
5
,
5
,
15
%
define
p2
m8
%
define
p1
m0
%
define
p0
m1
...
...
@@ -377,7 +376,7 @@ cglobal deblock_v_luma_10_%1, 5,5,15
jg
.
loop
REP_RET
cglobal
deblock_h_luma_10
_
%1
,
5
,
7
,
15
cglobal
deblock_h_luma_10
,
5
,
7
,
15
shl
r2d
,
2
shl
r3d
,
2
LOAD_AB
m12
,
m13
,
r2
,
r3
...
...
@@ -417,10 +416,10 @@ cglobal deblock_h_luma_10_%1, 5,7,15
REP_RET
%endmacro
INIT_XMM
DEBLOCK_LUMA_64
sse2
INIT_
AVX
DEBLOCK_LUMA_64
avx
INIT_XMM
sse2
DEBLOCK_LUMA_64
INIT_
XMM
avx
DEBLOCK_LUMA_64
%endif
%macro
SWAPMOVA
2
...
...
@@ -602,8 +601,8 @@ DEBLOCK_LUMA_64 avx
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
%macro
DEBLOCK_LUMA_INTRA_64
1
cglobal
deblock_v_luma_intra_10
_
%1
,
4
,
7
,
16
%macro
DEBLOCK_LUMA_INTRA_64
0
cglobal
deblock_v_luma_intra_10
,
4
,
7
,
16
%
define
t0
m1
%
define
t1
m2
%
define
t2
m4
...
...
@@ -653,7 +652,7 @@ cglobal deblock_v_luma_intra_10_%1, 4,7,16
;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal
deblock_h_luma_intra_10
_
%1
,
4
,
7
,
16
cglobal
deblock_h_luma_intra_10
,
4
,
7
,
16
%
define
t0
m15
%
define
t1
m14
%
define
t2
m2
...
...
@@ -712,18 +711,18 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,16
RET
%endmacro
INIT_XMM
DEBLOCK_LUMA_INTRA_64
sse2
INIT_
AVX
DEBLOCK_LUMA_INTRA_64
avx
INIT_XMM
sse2
DEBLOCK_LUMA_INTRA_64
INIT_
XMM
avx
DEBLOCK_LUMA_INTRA_64
%endif
%macro
DEBLOCK_LUMA_INTRA
1
%macro
DEBLOCK_LUMA_INTRA
0
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal
deblock_v_luma_intra_10
_
%1
,
4
,
7
,
8
*
(
mmsize
/
16
)
cglobal
deblock_v_luma_intra_10
,
4
,
7
,
8
*
(
mmsize
/
16
)
LUMA_INTRA_INIT
3
lea
r4
,
[
r1
*
4
]
lea
r5
,
[
r1
*
3
]
...
...
@@ -751,7 +750,7 @@ cglobal deblock_v_luma_intra_10_%1, 4,7,8*(mmsize/16)
;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal
deblock_h_luma_intra_10
_
%1
,
4
,
7
,
8
*
(
mmsize
/
16
)
cglobal
deblock_h_luma_intra_10
,
4
,
7
,
8
*
(
mmsize
/
16
)
LUMA_INTRA_INIT
8
%if
mmsize
==
8
lea
r4
,
[
r1
*
3
]
...
...
@@ -793,15 +792,15 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16)
%endmacro
%if
ARCH_X86_64
==
0
INIT_MMX
DEBLOCK_LUMA
mmxext
DEBLOCK_LUMA_INTRA
mmxext
INIT_XMM
DEBLOCK_LUMA
sse2
DEBLOCK_LUMA_INTRA
sse2
INIT_
AVX
DEBLOCK_LUMA
avx
DEBLOCK_LUMA_INTRA
avx
INIT_MMX
mmx2
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
INIT_XMM
sse2
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
INIT_
XMM
avx
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
%endif
; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp
...
...
@@ -843,11 +842,11 @@ DEBLOCK_LUMA_INTRA avx
psraw
%1
,
6
%endmacro
%macro
DEBLOCK_CHROMA
1
%macro
DEBLOCK_CHROMA
0
;-----------------------------------------------------------------------------
; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal
deblock_v_chroma_10
_
%1
,
5
,
7
-
(
mmsize
/
16
),
8
*
(
mmsize
/
16
)
cglobal
deblock_v_chroma_10
,
5
,
7
-
(
mmsize
/
16
),
8
*
(
mmsize
/
16
)
mov
r5
,
r0
sub
r0
,
r1
sub
r0
,
r1
...
...
@@ -881,7 +880,7 @@ cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16)
;-----------------------------------------------------------------------------
; void deblock_v_chroma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal
deblock_v_chroma_intra_10
_
%1
,
4
,
6
-
(
mmsize
/
16
),
8
*
(
mmsize
/
16
)
cglobal
deblock_v_chroma_intra_10
,
4
,
6
-
(
mmsize
/
16
),
8
*
(
mmsize
/
16
)
mov
r4
,
r0
sub
r0
,
r1
sub
r0
,
r1
...
...
@@ -908,10 +907,10 @@ cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16)
%endmacro
%if
ARCH_X86_64
==
0
INIT_MMX
DEBLOCK_CHROMA
mmxext
INIT_MMX
mmx2
DEBLOCK_CHROMA
%endif
INIT_XMM
DEBLOCK_CHROMA
sse2
INIT_
AVX
DEBLOCK_CHROMA
avx
INIT_XMM
sse2
DEBLOCK_CHROMA
INIT_
XMM
avx
DEBLOCK_CHROMA
libavcodec/x86/h264dsp_mmx.c
View file @
a5bbb124
...
...
@@ -249,12 +249,12 @@ void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, in
int alpha, int beta);
#define LF_FUNCS(type, depth)\
LF_FUNC (h, chroma, depth, mmx
ext
)\
LF_IFUNC(h, chroma_intra, depth, mmx
ext
)\
LF_FUNC (v, chroma, depth, mmx
ext
)\
LF_IFUNC(v, chroma_intra, depth, mmx
ext
)\
LF_FUNC (h, luma, depth, mmx
ext
)\
LF_IFUNC(h, luma_intra, depth, mmx
ext
)\
LF_FUNC (h, chroma, depth, mmx
2
)\
LF_IFUNC(h, chroma_intra, depth, mmx
2
)\
LF_FUNC (v, chroma, depth, mmx
2
)\
LF_IFUNC(v, chroma_intra, depth, mmx
2
)\
LF_FUNC (h, luma, depth, mmx
2
)\
LF_IFUNC(h, luma_intra, depth, mmx
2
)\
LF_FUNC (h, luma, depth, sse2)\
LF_IFUNC(h, luma_intra, depth, sse2)\
LF_FUNC (v, luma, depth, sse2)\
...
...
@@ -276,24 +276,24 @@ LF_FUNCS( uint8_t, 8)
LF_FUNCS
(
uint16_t
,
10
)
#if ARCH_X86_32
LF_FUNC
(
v8
,
luma
,
8
,
mmx
ext
)
static
void
ff_deblock_v_luma_8_mmx
ext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
,
int8_t
*
tc0
)
LF_FUNC
(
v8
,
luma
,
8
,
mmx
2
)
static
void
ff_deblock_v_luma_8_mmx
2
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
,
int8_t
*
tc0
)
{
if
((
tc0
[
0
]
&
tc0
[
1
])
>=
0
)
ff_deblock_v8_luma_8_mmx
ext
(
pix
+
0
,
stride
,
alpha
,
beta
,
tc0
);
ff_deblock_v8_luma_8_mmx
2
(
pix
+
0
,
stride
,
alpha
,
beta
,
tc0
);
if
((
tc0
[
2
]
&
tc0
[
3
])
>=
0
)
ff_deblock_v8_luma_8_mmx
ext
(
pix
+
8
,
stride
,
alpha
,
beta
,
tc0
+
2
);
ff_deblock_v8_luma_8_mmx
2
(
pix
+
8
,
stride
,
alpha
,
beta
,
tc0
+
2
);
}
LF_IFUNC
(
v8
,
luma_intra
,
8
,
mmx
ext
)
static
void
ff_deblock_v_luma_intra_8_mmx
ext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
)
LF_IFUNC
(
v8
,
luma_intra
,
8
,
mmx
2
)
static
void
ff_deblock_v_luma_intra_8_mmx
2
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
)
{
ff_deblock_v8_luma_intra_8_mmx
ext
(
pix
+
0
,
stride
,
alpha
,
beta
);
ff_deblock_v8_luma_intra_8_mmx
ext
(
pix
+
8
,
stride
,
alpha
,
beta
);
ff_deblock_v8_luma_intra_8_mmx
2
(
pix
+
0
,
stride
,
alpha
,
beta
);
ff_deblock_v8_luma_intra_8_mmx
2
(
pix
+
8
,
stride
,
alpha
,
beta
);
}
#endif
/* ARCH_X86_32 */
LF_FUNC
(
v
,
luma
,
10
,
mmx
ext
)
LF_IFUNC
(
v
,
luma_intra
,
10
,
mmx
ext
)
LF_FUNC
(
v
,
luma
,
10
,
mmx
2
)
LF_IFUNC
(
v
,
luma_intra
,
10
,
mmx
2
)
/***********************************/
/* weighted prediction */
...
...
@@ -373,17 +373,17 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c
->
h264_idct_add8
=
ff_h264_idct_add8_8_mmx2
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_8_mmx2
;
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_8_mmx
ext
;
c
->
h264_v_loop_filter_chroma_intra
=
ff_deblock_v_chroma_intra_8_mmx
ext
;
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_8_mmx
2
;
c
->
h264_v_loop_filter_chroma_intra
=
ff_deblock_v_chroma_intra_8_mmx
2
;
if
(
chroma_format_idc
==
1
)
{
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma_8_mmx
ext
;
c
->
h264_h_loop_filter_chroma_intra
=
ff_deblock_h_chroma_intra_8_mmx
ext
;
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma_8_mmx
2
;
c
->
h264_h_loop_filter_chroma_intra
=
ff_deblock_h_chroma_intra_8_mmx
2
;
}
#if ARCH_X86_32
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_8_mmx
ext
;
c
->
h264_h_loop_filter_luma
=
ff_deblock_h_luma_8_mmx
ext
;
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_8_mmx
ext
;
c
->
h264_h_loop_filter_luma_intra
=
ff_deblock_h_luma_intra_8_mmx
ext
;
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_8_mmx
2
;
c
->
h264_h_loop_filter_luma
=
ff_deblock_h_luma_8_mmx
2
;
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_8_mmx
2
;
c
->
h264_h_loop_filter_luma_intra
=
ff_deblock_h_luma_intra_8_mmx
2
;
#endif
c
->
weight_h264_pixels_tab
[
0
]
=
ff_h264_weight_16_mmx2
;
c
->
weight_h264_pixels_tab
[
1
]
=
ff_h264_weight_8_mmx2
;
...
...
@@ -436,12 +436,12 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
if
(
mm_flags
&
AV_CPU_FLAG_MMX2
)
{
#if ARCH_X86_32
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_10_mmx
ext
;
c
->
h264_v_loop_filter_chroma_intra
=
ff_deblock_v_chroma_intra_10_mmx
ext
;
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_10_mmx
ext
;
c
->
h264_h_loop_filter_luma
=
ff_deblock_h_luma_10_mmx
ext
;
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_10_mmx
ext
;
c
->
h264_h_loop_filter_luma_intra
=
ff_deblock_h_luma_intra_10_mmx
ext
;
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_10_mmx
2
;
c
->
h264_v_loop_filter_chroma_intra
=
ff_deblock_v_chroma_intra_10_mmx
2
;
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_10_mmx
2
;
c
->
h264_h_loop_filter_luma
=
ff_deblock_h_luma_10_mmx
2
;
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_10_mmx
2
;
c
->
h264_h_loop_filter_luma_intra
=
ff_deblock_h_luma_intra_10_mmx
2
;
#endif
c
->
h264_idct_dc_add
=
ff_h264_idct_dc_add_10_mmx2
;
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment