Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
7042a55c
Commit
7042a55c
authored
Jan 13, 2016
by
James Darnley
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/h264: mmxext 4:2:2 chroma deblock/loop filter
2.6 times faster (366 vs. 142 cycles)
parent
95564466
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
47 additions
and
3 deletions
+47
-3
h264_deblock.asm
libavcodec/x86/h264_deblock.asm
+43
-3
h264dsp_init.c
libavcodec/x86/h264dsp_init.c
+4
-0
No files found.
libavcodec/x86/h264_deblock.asm
View file @
7042a55c
...
@@ -864,7 +864,50 @@ ff_chroma_inter_body_mmxext:
...
@@ -864,7 +864,50 @@ ff_chroma_inter_body_mmxext:
DEBLOCK_P0_Q0
DEBLOCK_P0_Q0
ret
ret
%define
t5
r4
%define
t6
r5
cglobal
deblock_h_chroma422_8
,
5
,
6
,
0
,
0
-
(
1
+
ARCH_X86_64
*
2
)
*
mmsize
%
if
ARCH_X86_64
%
define
buf0
[
rsp
+
16
]
%
define
buf1
[
rsp
+
8
]
%
else
%
define
buf0
r0m
%
define
buf1
r2m
%
endif
movd
m6
,
[r4]
punpcklbw
m6
,
m6
movq
[rsp],
m6
CHROMA_H_START
TRANSPOSE4x8B_LOAD
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
movq
buf0
,
m0
movq
buf1
,
m3
LOAD_MASK
r2d
,
r3d
movd
m6
,
[rsp]
punpcklwd
m6
,
m6
pand
m7
,
m6
DEBLOCK_P0_Q0
movq
m0
,
buf0
movq
m3
,
buf1
TRANSPOSE8x4B_STORE
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
lea
r0
,
[
r0
+
r1
*
8
]
lea
t5
,
[
t5
+
r1
*
8
]
TRANSPOSE4x8B_LOAD
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
movq
buf0
,
m0
movq
buf1
,
m3
LOAD_MASK
r2d
,
r3d
movd
m6
,
[
rsp
+
4
]
punpcklwd
m6
,
m6
pand
m7
,
m6
DEBLOCK_P0_Q0
movq
m0
,
buf0
movq
m3
,
buf1
TRANSPOSE8x4B_STORE
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
RET
; in: %1=p0 %2=p1 %3=q1
; in: %1=p0 %2=p1 %3=q1
; out: p0 = (p0 + q1 + 2*p1 + 2) >> 2
; out: p0 = (p0 + q1 + 2*p1 + 2) >> 2
...
@@ -877,9 +920,6 @@ ff_chroma_inter_body_mmxext:
...
@@ -877,9 +920,6 @@ ff_chroma_inter_body_mmxext:
pavgb
%1
,
%2
; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
pavgb
%1
,
%2
; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
%endmacro
%endmacro
%define
t5
r4
%define
t6
r5
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
; void ff_deblock_v_chroma_intra(uint8_t *pix, int stride, int alpha, int beta)
; void ff_deblock_v_chroma_intra(uint8_t *pix, int stride, int alpha, int beta)
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
...
...
libavcodec/x86/h264dsp_init.c
View file @
7042a55c
...
@@ -129,6 +129,8 @@ LF_IFUNC(v, chroma_intra, depth, avx)
...
@@ -129,6 +129,8 @@ LF_IFUNC(v, chroma_intra, depth, avx)
LF_FUNCS
(
uint8_t
,
8
)
LF_FUNCS
(
uint8_t
,
8
)
LF_FUNCS
(
uint16_t
,
10
)
LF_FUNCS
(
uint16_t
,
10
)
void
ff_deblock_h_chroma422_8_mmxext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
,
int8_t
*
tc0
);
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
LF_FUNC
(
v8
,
luma
,
8
,
mmxext
)
LF_FUNC
(
v8
,
luma
,
8
,
mmxext
)
static
void
deblock_v_luma_8_mmxext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
static
void
deblock_v_luma_8_mmxext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
...
@@ -245,6 +247,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
...
@@ -245,6 +247,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
if
(
chroma_format_idc
<=
1
)
{
if
(
chroma_format_idc
<=
1
)
{
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma_8_mmxext
;
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma_8_mmxext
;
c
->
h264_h_loop_filter_chroma_intra
=
ff_deblock_h_chroma_intra_8_mmxext
;
c
->
h264_h_loop_filter_chroma_intra
=
ff_deblock_h_chroma_intra_8_mmxext
;
}
else
{
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma422_8_mmxext
;
}
}
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
c
->
h264_v_loop_filter_luma
=
deblock_v_luma_8_mmxext
;
c
->
h264_v_loop_filter_luma
=
deblock_v_luma_8_mmxext
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment