Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
91d85bb1
Commit
91d85bb1
authored
Feb 05, 2014
by
Clément Bœsch
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86/vp9lpf: add ff_vp9_loop_filter_[vh]_44_16_{sse2,ssse3,avx}.
parent
acd75053
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
105 additions
and
27 deletions
+105
-27
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+5
-0
vp9lpf.asm
libavcodec/x86/vp9lpf.asm
+100
-27
No files found.
libavcodec/x86/vp9dsp_init.c
View file @
91d85bb1
...
...
@@ -187,6 +187,9 @@ void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stri
lpf_funcs
(
16
,
16
,
sse2
);
lpf_funcs
(
16
,
16
,
ssse3
);
lpf_funcs
(
16
,
16
,
avx
);
lpf_funcs
(
44
,
16
,
sse2
);
lpf_funcs
(
44
,
16
,
ssse3
);
lpf_funcs
(
44
,
16
,
avx
);
lpf_funcs
(
84
,
16
,
sse2
);
lpf_funcs
(
84
,
16
,
ssse3
);
lpf_funcs
(
84
,
16
,
avx
);
...
...
@@ -234,6 +237,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
if (ARCH_X86_64) { \
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
...
...
libavcodec/x86/vp9lpf.asm
View file @
91d85bb1
...
...
@@ -290,23 +290,23 @@ SECTION .text
SWAP
%3
,
%2
%endmacro
%macro
DEFINE_REAL_P7_TO_Q7
0
%define
P7
dst1q
+
2
*
mstrideq
%define
P6
dst1q
+
mstrideq
%define
P5
dst1q
%define
P4
dst1q
+
strideq
%define
P3
dstq
+
4
*
mstrideq
%define
P2
dstq
+
mstride3q
%define
P1
dstq
+
2
*
mstrideq
%define
P0
dstq
+
mstrideq
%define
Q0
dstq
%define
Q1
dstq
+
strideq
%define
Q2
dstq
+
2
*
strideq
%define
Q3
dstq
+
stride3q
%define
Q4
dstq
+
4
*
strideq
%define
Q5
dst2q
+
mstrideq
%define
Q6
dst2q
%define
Q7
dst2q
+
strideq
%macro
DEFINE_REAL_P7_TO_Q7
0
-
1
0
%define
P7
dst1q
+
2
*
mstrideq
+
%1
%define
P6
dst1q
+
mstrideq
+
%1
%define
P5
dst1q
+
%1
%define
P4
dst1q
+
strideq
+
%1
%define
P3
dstq
+
4
*
mstrideq
+
%1
%define
P2
dstq
+
mstride3q
+
%1
%define
P1
dstq
+
2
*
mstrideq
+
%1
%define
P0
dstq
+
mstrideq
+
%1
%define
Q0
dstq
+
%1
%define
Q1
dstq
+
strideq
+
%1
%define
Q2
dstq
+
2
*
strideq
+
%1
%define
Q3
dstq
+
stride3q
+
%1
%define
Q4
dstq
+
4
*
strideq
+
%1
%define
Q5
dst2q
+
mstrideq
+
%1
%define
Q6
dst2q
+
%1
%define
Q7
dst2q
+
strideq
+
%1
%endmacro
%macro
SPLATB_MASK
2
...
...
@@ -460,8 +460,9 @@ SECTION .text
pand
m3
,
m5
; fm final value
; (m3: fm, m8..15: p3 p2 p1 p0 q0 q1 q2 q3)
; calc flat8in and hev masks
; calc flat8in
(if not 44_16)
and hev masks
mova
m6
,
[
pb_81
]
; [1 1 1 1 ...] ^ 0x80
%if
%2
!
=
44
ABSSUB_CMP
m2
,
m8
,
m11
,
m6
,
m4
,
m5
; abs(p3 - p0) <= 1
mova
m8
,
[
pb_80
]
ABSSUB_CMP
m1
,
m9
,
m11
,
m6
,
m4
,
m5
,
m8
; abs(p2 - p0) <= 1
...
...
@@ -498,6 +499,19 @@ SECTION .text
%if
%2
==
84
||
%2
==
48
pand
m2
,
[
mask_mix
%2
]
%endif
%else
mova
m6
,
[
pb_80
]
movd
m7
,
Hd
SPLATB_MASK
m7
,
[
mask_mix
]
pxor
m7
,
m6
ABSSUB
m4
,
m10
,
m11
,
m1
; abs(p1 - p0)
pxor
m4
,
m6
pcmpgtb
m0
,
m4
,
m7
; abs(p1 - p0) > H (1/2 hev condition)
ABSSUB
m4
,
m13
,
m12
,
m1
; abs(q1 - q0)
pxor
m4
,
m6
pcmpgtb
m5
,
m4
,
m7
; abs(q1 - q0) > H (2/2 hev condition)
por
m0
,
m5
; hev final value
%endif
%if
%2
==
16
; (m0: hev, m2: flat8in, m3: fm, m6: pb_81, m9..15: p2 p1 p0 q0 q1 q2 q3)
...
...
@@ -539,9 +553,11 @@ SECTION .text
; f2: fm & ~f14 & ~f6 & hev => fm & ~(out & in) & ~(~out & in) & hev => fm & ~in & hev
; f4: fm & ~f14 & ~f6 & ~f2 => fm & ~(out & in) & ~(~out & in) & ~(~in & hev) => fm & ~in & ~hev
; (m0: hev, [m1: flat8out],
m2: flat8in
, m3: fm, m8..15: p5 p4 p1 p0 q0 q1 q6 q7)
; (m0: hev, [m1: flat8out],
[m2: flat8in]
, m3: fm, m8..15: p5 p4 p1 p0 q0 q1 q6 q7)
; filter2()
mova
m6
,
[
pb_80
]
%if
%2
!
=
44
mova
m6
,
[
pb_80
]
; already in m6 if 44_16
%endif
pxor
m15
,
m12
,
m6
; q0 ^ 0x80
pxor
m14
,
m11
,
m6
; p0 ^ 0x80
psubsb
m15
,
m14
; (signed) q0 - p0
...
...
@@ -557,12 +573,16 @@ SECTION .text
SRSHIFT3B_2X
m6
,
m4
,
m14
,
m7
; f1 and f2 sign byte shift by 3
SIGN_SUB
m7
,
m12
,
m6
,
m5
,
m9
; m7 = q0 - f1
SIGN_ADD
m8
,
m11
,
m4
,
m5
,
m9
; m8 = p0 + f2
%if
%2
!
=
44
pandn
m6
,
m2
,
m3
; ~mask(in) & mask(fm)
pand
m6
,
m0
; (~mask(in) & mask(fm)) & mask(hev)
%else
pand
m6
,
m3
,
m0
%endif
MASK_APPLY
m7
,
m12
,
m6
,
m5
; m7 = filter2(q0) & mask / we write it in filter4()
MASK_APPLY
m8
,
m11
,
m6
,
m5
; m8 = filter2(p0) & mask / we write it in filter4()
; (m0: hev, [m1: flat8out],
m2: flat8in
, m3: fm, m7..m8: q0' p0', m10..13: p1 p0 q0 q1, m14: pb_10, m15: q0-p0)
; (m0: hev, [m1: flat8out],
[m2: flat8in]
, m3: fm, m7..m8: q0' p0', m10..13: p1 p0 q0 q1, m14: pb_10, m15: q0-p0)
; filter4()
mova
m4
,
m15
paddsb
m15
,
m4
; 2 * (q0 - p0)
...
...
@@ -570,14 +590,22 @@ SECTION .text
paddsb
m6
,
m15
,
[
pb_4
]
; m6: f1 = clip(f + 4, 127)
paddsb
m15
,
[
pb_3
]
; m15: f2 = clip(f + 3, 127)
SRSHIFT3B_2X
m6
,
m15
,
m14
,
m9
; f1 and f2 sign byte shift by 3
%if
%2
!
=
44
%define
p0tmp
m7
%define
q0tmp
m9
pandn
m5
,
m2
,
m3
; ~mask(in) & mask(fm)
pandn
m0
,
m5
; ~mask(hev) & (~mask(in) & mask(fm))
SIGN_SUB
m9
,
m12
,
m6
,
m4
,
m14
; q0 - f1
MASK_APPLY
m9
,
m7
,
m0
,
m5
; m9 = filter4(q0) & mask
mova
[Q0],
m9
SIGN_ADD
m7
,
m11
,
m15
,
m4
,
m14
; p0 + f2
MASK_APPLY
m7
,
m8
,
m0
,
m5
; m7 = filter4(p0) & mask
mova
[P0],
m7
%else
%define
p0tmp
m1
%define
q0tmp
m2
pandn
m0
,
m3
%endif
SIGN_SUB
q0tmp
,
m12
,
m6
,
m4
,
m14
; q0 - f1
MASK_APPLY
q0tmp
,
m7
,
m0
,
m5
; filter4(q0) & mask
mova
[Q0],
q0tmp
SIGN_ADD
p0tmp
,
m11
,
m15
,
m4
,
m14
; p0 + f2
MASK_APPLY
p0tmp
,
m8
,
m0
,
m5
; filter4(p0) & mask
mova
[P0],
p0tmp
paddb
m6
,
[
pb_80
]
;
pxor
m8
,
m8
; f=(f1+1)>>1
pavgb
m6
,
m8
;
...
...
@@ -591,6 +619,7 @@ SECTION .text
; ([m1: flat8out], m2: flat8in, m3: fm, m10..13: p1 p0 q0 q1)
; filter6()
%if
%2
!
=
44
pxor
m0
,
m0
%if
%2
>
16
pand
m3
,
m2
...
...
@@ -608,6 +637,7 @@ SECTION .text
FILTER_UPDATE
m6
,
m7
,
m4
,
m5
,
[Q0],
m14
,
m11
,
m12
,
m9
,
3
,
m3
; [q0] -p3 -p0 +q0 +q3
FILTER_UPDATE
m4
,
m5
,
m6
,
m7
,
[Q1],
m15
,
m12
,
m13
,
m9
,
3
,
m3
; [q1] -p2 -q0 +q1 +q3
FILTER_UPDATE
m6
,
m7
,
m4
,
m5
,
[Q2],
m10
,
m13
,
m8
,
m9
,
3
,
m3
,
m8
; [q2] -p1 -q1 +q2 +q3
%endif
; (m0: 0, [m1: flat8out], m2: fm & flat8in, m8..15: q2 q3 p1 p0 q0 q1 p3 p2)
; filter14()
...
...
@@ -688,6 +718,48 @@ SECTION .text
movu
[Q5],
m13
movu
[Q6],
m14
movu
[Q7],
m15
%elif
%2
==
44
SWAP
0
,
7
; m0 = p1
SWAP
3
,
4
; m3 = q1
DEFINE_REAL_P7_TO_Q7
2
SBUTTERFLY
bw
,
0
,
1
,
8
SBUTTERFLY
bw
,
2
,
3
,
8
SBUTTERFLY
wd
,
0
,
2
,
8
SBUTTERFLY
wd
,
1
,
3
,
8
SBUTTERFLY
dq
,
0
,
4
,
8
SBUTTERFLY
dq
,
1
,
5
,
8
SBUTTERFLY
dq
,
2
,
6
,
8
SBUTTERFLY
dq
,
3
,
7
,
8
PUNPCKHQDQ_SWAP
0
,
8
,
15
movd
[P7],
m0
PUNPCKHQDQ_SWAP
1
,
9
,
0
PUNPCKHQDQ_SWAP
2
,
10
,
0
PUNPCKHQDQ_SWAP
3
,
11
,
0
PUNPCKHQDQ_SWAP
4
,
12
,
0
PUNPCKHQDQ_SWAP
5
,
13
,
0
PUNPCKHQDQ_SWAP
6
,
14
,
0
PUNPCKHQDQ_SWAP
7
,
15
,
0
SWAP
1
,
8
SWAP
2
,
4
SWAP
3
,
12
SWAP
5
,
10
SWAP
7
,
14
SWAP
11
,
13
movd
[P6],
m1
movd
[P5],
m2
movd
[P4],
m3
movd
[P3],
m4
movd
[P2],
m5
movd
[P1],
m6
movd
[P0],
m7
movd
[Q0],
m8
movd
[Q1],
m9
movd
[Q2],
m10
movd
[Q3],
m11
movd
[Q4],
m12
movd
[Q5],
m13
movd
[Q6],
m14
movd
[Q7],
m15
%else
mova
m4
,
[P3]
mova
m5
,
[P2]
...
...
@@ -776,6 +848,7 @@ LPF_16_VH %1, avx
%endmacro
LPF_16_VH_ALL_OPTS
16
LPF_16_VH_ALL_OPTS
44
LPF_16_VH_ALL_OPTS
48
LPF_16_VH_ALL_OPTS
84
LPF_16_VH_ALL_OPTS
88
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment