Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
f3df42e8
Commit
f3df42e8
authored
Mar 17, 2018
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter/x86/vf_blend : add SIMD for 16 bit version of
grainextract grainmerge average extremity negation
parent
8eb0bb11
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
128 additions
and
60 deletions
+128
-60
vf_blend.asm
libavfilter/x86/vf_blend.asm
+108
-60
vf_blend_init.c
libavfilter/x86/vf_blend_init.c
+20
-0
No files found.
libavfilter/x86/vf_blend.asm
View file @
f3df42e8
...
...
@@ -27,6 +27,8 @@
SECTION_RODATA
ps_255
:
times
4
dd
255
.
0
pd_32768
:
times
4
dd
32768
pd_65535
:
times
4
dd
65535
pw_1
:
times
8
dw
1
pw_128
:
times
8
dw
128
pw_255
:
times
8
dw
255
...
...
@@ -79,26 +81,33 @@ BLEND_INIT %1, 2, %3
BLEND_END
%endmacro
%macro
GRAINEXTRACT
0
BLEND_INIT
grainextract
,
6
; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
%macro
GRAINEXTRACT
3
-
4
BLEND_INIT
%1
,
6
,
%4
pxor
m4
,
m4
%if
%0
==
4
; 16 bit
VBROADCASTI128
m5
,
[
pd_32768
]
%else
VBROADCASTI128
m5
,
[
pw_128
]
%endif
.
nextrow
:
mov
xq
,
widthq
.
loop
:
movu
m1
,
[
topq
+
xq
]
movu
m3
,
[
bottomq
+
xq
]
punpcklbw
m0
,
m1
,
m4
punpckhbw
m1
,
m4
punpcklbw
m2
,
m3
,
m4
punpckhbw
m3
,
m4
paddw
m0
,
m5
paddw
m1
,
m5
psubw
m0
,
m2
psubw
m1
,
m3
punpckl%2
%3
m0
,
m1
,
m4
punpckh%2
%3
m1
,
m4
punpckl%2
%3
m2
,
m3
,
m4
punpckh%2
%3
m3
,
m4
padd%3
m0
,
m5
padd%3
m1
,
m5
psub%3
m0
,
m2
psub%3
m1
,
m3
packus%3
%2
m0
,
m1
packuswb
m0
,
m1
mova
[
dstq
+
xq
]
,
m0
add
xq
,
mmsize
jl
.
loop
...
...
@@ -172,8 +181,9 @@ BLEND_INIT screen, 7
BLEND_END
%endmacro
%macro
AVERAGE
0
BLEND_INIT
average
,
3
;%1 name, %2 (b or w), %3 (set if 16 bit)
%macro
AVERAGE
2
-
3
BLEND_INIT
%1
,
3
,
%3
pcmpeqb
m2
,
m2
.
nextrow
:
...
...
@@ -184,7 +194,7 @@ BLEND_INIT average, 3
movu
m1
,
[
bottomq
+
xq
]
pxor
m0
,
m2
pxor
m1
,
m2
pavg
b
m0
,
m1
pavg
%2
m0
,
m1
pxor
m0
,
m2
mova
[
dstq
+
xq
]
,
m0
add
xq
,
mmsize
...
...
@@ -192,29 +202,34 @@ BLEND_INIT average, 3
BLEND_END
%endmacro
%macro
GRAINMERGE
0
BLEND_INIT
grainmerge
,
6
; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
%macro
GRAINMERGE
3
-
4
BLEND_INIT
%1
,
6
,
%4
pxor
m4
,
m4
%if
%0
==
4
; 16 bit
VBROADCASTI128
m5
,
[
pd_32768
]
%else
VBROADCASTI128
m5
,
[
pw_128
]
%endif
.
nextrow
:
mov
xq
,
widthq
.
loop
:
movu
m1
,
[
topq
+
xq
]
movu
m3
,
[
bottomq
+
xq
]
punpcklbw
m0
,
m1
,
m4
punpckhbw
m1
,
m4
punpcklbw
m2
,
m3
,
m4
punpckhbw
m3
,
m4
paddw
m0
,
m2
paddw
m1
,
m3
psubw
m0
,
m5
psubw
m1
,
m5
punpckl%2
%3
m0
,
m1
,
m4
punpckh%2
%3
m1
,
m4
punpckl%2
%3
m2
,
m3
,
m4
punpckh%2
%3
m3
,
m4
padd%3
m0
,
m2
padd%3
m1
,
m3
psub%3
m0
,
m5
psub%3
m1
,
m5
packus%3
%2
m0
,
m1
packuswb
m0
,
m1
mova
[
dstq
+
xq
]
,
m0
add
xq
,
mmsize
jl
.
loop
...
...
@@ -324,52 +339,73 @@ BLEND_INIT %1, 5, %4
BLEND_END
%endmacro
%macro
BLEND_ABS
0
BLEND_INIT
extremity
,
8
; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
%macro
EXTREMITY
3
-
4
BLEND_INIT
%1
,
8
,
%4
pxor
m2
,
m2
%if
%0
==
4
; 16 bit
VBROADCASTI128
m4
,
[
pd_65535
]
%else
VBROADCASTI128
m4
,
[
pw_255
]
%endif
.
nextrow
:
mov
xq
,
widthq
.
loop
:
movu
m0
,
[
topq
+
xq
]
movu
m1
,
[
bottomq
+
xq
]
punpckhbw
m5
,
m0
,
m2
punpcklbw
m0
,
m2
punpckhbw
m6
,
m1
,
m2
punpcklbw
m1
,
m2
psubw
m3
,
m4
,
m0
psubw
m7
,
m4
,
m5
psubw
m3
,
m1
psubw
m7
,
m6
punpckh%2
%3
m5
,
m0
,
m2
punpckl%2
%3
m0
,
m2
punpckh%2
%3
m6
,
m1
,
m2
punpckl%2
%3
m1
,
m2
psub%3
m3
,
m4
,
m0
psub%3
m7
,
m4
,
m5
psub%3
m3
,
m1
psub%3
m7
,
m6
%if
%0
==
4
; 16 bit
pabsd
m3
,
m3
pabsd
m7
,
m7
%else
ABS2
m3
,
m7
,
m1
,
m6
packuswb
m3
,
m7
%endif
packus%3
%2
m3
,
m7
mova
[
dstq
+
xq
]
,
m3
add
xq
,
mmsize
jl
.
loop
BLEND_END
%endmacro
BLEND_INIT
negation
,
8
%macro
NEGATION
3
-
4
BLEND_INIT
%1
,
8
,
%4
pxor
m2
,
m2
%if
%0
==
4
; 16 bit
VBROADCASTI128
m4
,
[
pd_65535
]
%else
VBROADCASTI128
m4
,
[
pw_255
]
%endif
.
nextrow
:
mov
xq
,
widthq
.
loop
:
movu
m0
,
[
topq
+
xq
]
movu
m1
,
[
bottomq
+
xq
]
punpckhbw
m5
,
m0
,
m2
punpcklbw
m0
,
m2
punpckhbw
m6
,
m1
,
m2
punpcklbw
m1
,
m2
psubw
m3
,
m4
,
m0
psubw
m7
,
m4
,
m5
psubw
m3
,
m1
psubw
m7
,
m6
punpckh%2
%3
m5
,
m0
,
m2
punpckl%2
%3
m0
,
m2
punpckh%2
%3
m6
,
m1
,
m2
punpckl%2
%3
m1
,
m2
psub%3
m3
,
m4
,
m0
psub%3
m7
,
m4
,
m5
psub%3
m3
,
m1
psub%3
m7
,
m6
%if
%0
==
4
; 16 bit
pabsd
m3
,
m3
pabsd
m7
,
m7
%else
ABS2
m3
,
m7
,
m1
,
m6
psubw
m0
,
m4
,
m3
psubw
m1
,
m4
,
m7
packuswb
m0
,
m1
%endif
psub%3
m0
,
m4
,
m3
psub%3
m1
,
m4
,
m7
packus%3
%2
m0
,
m1
mova
[
dstq
+
xq
]
,
m0
add
xq
,
mmsize
jl
.
loop
...
...
@@ -384,17 +420,17 @@ BLEND_SIMPLE addition, addusb
BLEND_SIMPLE
subtract
,
subusb
BLEND_SIMPLE
darken
,
minub
BLEND_SIMPLE
lighten
,
maxub
GRAINEXTRACT
GRAINEXTRACT
grainextract
,
b
,
w
BLEND_MULTIPLY
BLEND_SCREEN
AVERAGE
GRAINMERGE
AVERAGE
average
,
b
GRAINMERGE
grainmerge
,
b
,
w
HARDMIX
PHOENIX
phoenix
,
b
DIFFERENCE
difference
,
b
,
w
DIVIDE
BLEND_ABS
EXTREMITY
extremity
,
b
,
w
NEGATION
negation
,
b
,
w
%if
ARCH_X86_64
BLEND_SIMPLE
addition_16
,
addusw
,
1
...
...
@@ -402,18 +438,24 @@ BLEND_SIMPLE and_16, and, 1
BLEND_SIMPLE
or_16
,
or
,
1
BLEND_SIMPLE
subtract_16
,
subusw
,
1
BLEND_SIMPLE
xor_16
,
xor
,
1
AVERAGE
average_16
,
w
,
1
%endif
INIT_XMM
ssse3
DIFFERENCE
difference
,
b
,
w
BLEND_ABS
EXTREMITY
extremity
,
b
,
w
NEGATION
negation
,
b
,
w
INIT_XMM
sse4
%if
ARCH_X86_64
BLEND_SIMPLE
darken_16
,
minuw
,
1
BLEND_SIMPLE
lighten_16
,
maxuw
,
1
GRAINEXTRACT
grainextract_16
,
w
,
d
,
1
GRAINMERGE
grainmerge_16
,
w
,
d
,
1
PHOENIX
phoenix_16
,
w
,
1
DIFFERENCE
difference_16
,
w
,
d
,
1
EXTREMITY
extremity_16
,
w
,
d
,
1
NEGATION
negation_16
,
w
,
d
,
1
%endif
%if
HAVE_AVX2_EXTERNAL
...
...
@@ -425,16 +467,17 @@ BLEND_SIMPLE addition, addusb
BLEND_SIMPLE
subtract
,
subusb
BLEND_SIMPLE
darken
,
minub
BLEND_SIMPLE
lighten
,
maxub
GRAINEXTRACT
GRAINEXTRACT
grainextract
,
b
,
w
BLEND_MULTIPLY
BLEND_SCREEN
AVERAGE
GRAINMERGE
AVERAGE
average
,
b
GRAINMERGE
grainmerge
,
b
,
w
HARDMIX
PHOENIX
phoenix
,
b
DIFFERENCE
difference
,
b
,
w
BLEND_ABS
EXTREMITY
extremity
,
b
,
w
NEGATION
negation
,
b
,
w
%if
ARCH_X86_64
BLEND_SIMPLE
addition_16
,
addusw
,
1
...
...
@@ -444,7 +487,12 @@ BLEND_SIMPLE lighten_16, maxuw, 1
BLEND_SIMPLE
or_16
,
or
,
1
BLEND_SIMPLE
subtract_16
,
subusw
,
1
BLEND_SIMPLE
xor_16
,
xor
,
1
GRAINEXTRACT
grainextract_16
,
w
,
d
,
1
AVERAGE
average_16
,
w
,
1
GRAINMERGE
grainmerge_16
,
w
,
d
,
1
PHOENIX
phoenix_16
,
w
,
1
DIFFERENCE
difference_16
,
w
,
d
,
1
EXTREMITY
extremity_16
,
w
,
d
,
1
NEGATION
negation_16
,
w
,
d
,
1
%endif
%endif
libavfilter/x86/vf_blend_init.c
View file @
f3df42e8
...
...
@@ -72,12 +72,22 @@ BLEND_FUNC(negation, avx2)
#if ARCH_X86_64
BLEND_FUNC
(
addition_16
,
sse2
)
BLEND_FUNC
(
addition_16
,
avx2
)
BLEND_FUNC
(
grainmerge_16
,
sse4
)
BLEND_FUNC
(
grainmerge_16
,
avx2
)
BLEND_FUNC
(
average_16
,
sse2
)
BLEND_FUNC
(
average_16
,
avx2
)
BLEND_FUNC
(
and_16
,
sse2
)
BLEND_FUNC
(
and_16
,
avx2
)
BLEND_FUNC
(
darken_16
,
sse4
)
BLEND_FUNC
(
darken_16
,
avx2
)
BLEND_FUNC
(
grainextract_16
,
sse4
)
BLEND_FUNC
(
grainextract_16
,
avx2
)
BLEND_FUNC
(
difference_16
,
sse4
)
BLEND_FUNC
(
difference_16
,
avx2
)
BLEND_FUNC
(
extremity_16
,
sse4
)
BLEND_FUNC
(
extremity_16
,
avx2
)
BLEND_FUNC
(
negation_16
,
sse4
)
BLEND_FUNC
(
negation_16
,
avx2
)
BLEND_FUNC
(
lighten_16
,
sse4
)
BLEND_FUNC
(
lighten_16
,
avx2
)
BLEND_FUNC
(
or_16
,
sse2
)
...
...
@@ -152,6 +162,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
switch
(
param
->
mode
)
{
case
BLEND_ADDITION
:
param
->
blend
=
ff_blend_addition_16_sse2
;
break
;
case
BLEND_AND
:
param
->
blend
=
ff_blend_and_16_sse2
;
break
;
case
BLEND_AVERAGE
:
param
->
blend
=
ff_blend_average_16_sse2
;
break
;
case
BLEND_OR
:
param
->
blend
=
ff_blend_or_16_sse2
;
break
;
case
BLEND_SUBTRACT
:
param
->
blend
=
ff_blend_subtract_16_sse2
;
break
;
case
BLEND_XOR
:
param
->
blend
=
ff_blend_xor_16_sse2
;
break
;
...
...
@@ -159,8 +170,12 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
}
if
(
EXTERNAL_SSE4
(
cpu_flags
)
&&
param
->
opacity
==
1
)
{
switch
(
param
->
mode
)
{
case
BLEND_GRAINMERGE
:
param
->
blend
=
ff_blend_grainmerge_16_sse4
;
break
;
case
BLEND_DARKEN
:
param
->
blend
=
ff_blend_darken_16_sse4
;
break
;
case
BLEND_GRAINEXTRACT
:
param
->
blend
=
ff_blend_grainextract_16_sse4
;
break
;
case
BLEND_DIFFERENCE
:
param
->
blend
=
ff_blend_difference_16_sse4
;
break
;
case
BLEND_EXTREMITY
:
param
->
blend
=
ff_blend_extremity_16_sse4
;
break
;
case
BLEND_NEGATION
:
param
->
blend
=
ff_blend_negation_16_sse4
;
break
;
case
BLEND_LIGHTEN
:
param
->
blend
=
ff_blend_lighten_16_sse4
;
break
;
case
BLEND_PHOENIX
:
param
->
blend
=
ff_blend_phoenix_16_sse4
;
break
;
}
...
...
@@ -168,9 +183,14 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
param
->
opacity
==
1
)
{
switch
(
param
->
mode
)
{
case
BLEND_ADDITION
:
param
->
blend
=
ff_blend_addition_16_avx2
;
break
;
case
BLEND_GRAINMERGE
:
param
->
blend
=
ff_blend_grainmerge_16_avx2
;
break
;
case
BLEND_AND
:
param
->
blend
=
ff_blend_and_16_avx2
;
break
;
case
BLEND_AVERAGE
:
param
->
blend
=
ff_blend_average_16_avx2
;
break
;
case
BLEND_DARKEN
:
param
->
blend
=
ff_blend_darken_16_avx2
;
break
;
case
BLEND_GRAINEXTRACT
:
param
->
blend
=
ff_blend_grainextract_16_avx2
;
break
;
case
BLEND_DIFFERENCE
:
param
->
blend
=
ff_blend_difference_16_avx2
;
break
;
case
BLEND_EXTREMITY
:
param
->
blend
=
ff_blend_extremity_16_avx2
;
break
;
case
BLEND_NEGATION
:
param
->
blend
=
ff_blend_negation_16_avx2
;
break
;
case
BLEND_LIGHTEN
:
param
->
blend
=
ff_blend_lighten_16_avx2
;
break
;
case
BLEND_OR
:
param
->
blend
=
ff_blend_or_16_avx2
;
break
;
case
BLEND_PHOENIX
:
param
->
blend
=
ff_blend_phoenix_16_avx2
;
break
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment