Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
869efbf9
Commit
869efbf9
authored
Dec 07, 2017
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter/x86/vf_threshold : add threshold16 SIMD (SSE4 and AVX2)
parent
713f9c5b
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
34 additions
and
21 deletions
+34
-21
vf_threshold.asm
libavfilter/x86/vf_threshold.asm
+14
-7
vf_threshold_init.c
libavfilter/x86/vf_threshold_init.c
+20
-14
No files found.
libavfilter/x86/vf_threshold.asm
View file @
869efbf9
...
@@ -25,16 +25,18 @@
...
@@ -25,16 +25,18 @@
SECTION_RODATA
SECTION_RODATA
pb_128
:
times
16
db
128
pb_128
:
times
16
db
128
pb_128_0
:
times
8
db
0
,
128
SECTION
.
text
SECTION
.
text
%macro
THRESHOLD_8
0
;%1 depth (8 or 16) ; %2 b or w ; %3 constant
%macro
THRESHOLD
3
%if
ARCH_X86_64
%if
ARCH_X86_64
cglobal
threshold
8
,
10
,
13
,
5
,
in
,
threshold
,
min
,
max
,
out
,
ilinesize
,
tlinesize
,
flinesize
,
slinesize
,
olinesize
,
w
,
h
,
x
cglobal
threshold
%1
,
10
,
13
,
5
,
in
,
threshold
,
min
,
max
,
out
,
ilinesize
,
tlinesize
,
flinesize
,
slinesize
,
olinesize
,
w
,
h
,
x
mov
wd
,
dword
wm
mov
wd
,
dword
wm
mov
hd
,
dword
hm
mov
hd
,
dword
hm
%else
%else
cglobal
threshold
8
,
5
,
7
,
5
,
in
,
threshold
,
min
,
max
,
out
,
w
,
x
cglobal
threshold
%1
,
5
,
7
,
5
,
in
,
threshold
,
min
,
max
,
out
,
w
,
x
mov
wd
,
r10m
mov
wd
,
r10m
%define
ilinesizeq
r5mp
%define
ilinesizeq
r5mp
%define
tlinesizeq
r6mp
%define
tlinesizeq
r6mp
...
@@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
...
@@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
%define
olinesizeq
r9mp
%define
olinesizeq
r9mp
%define
hd
r11mp
%define
hd
r11mp
%endif
%endif
VBROADCASTI128
m4
,
[
pb_128
]
VBROADCASTI128
m4
,
[
%3
]
%if
%1
==
16
add
wq
,
wq
; w *= 2 (16 bits instead of 8)
%endif
add
inq
,
wq
add
inq
,
wq
add
thresholdq
,
wq
add
thresholdq
,
wq
add
minq
,
wq
add
minq
,
wq
...
@@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
...
@@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
movu
m3
,
[
maxq
+
xq
]
movu
m3
,
[
maxq
+
xq
]
pxor
m0
,
m4
pxor
m0
,
m4
pxor
m1
,
m4
pxor
m1
,
m4
pcmpgt
b
m0
,
m1
pcmpgt
%2
m0
,
m1
PBLENDVB
m3
,
m2
,
m0
PBLENDVB
m3
,
m2
,
m0
movu
[
outq
+
xq
]
,
m3
movu
[
outq
+
xq
]
,
m3
add
xq
,
mmsize
add
xq
,
mmsize
...
@@ -77,9 +82,11 @@ RET
...
@@ -77,9 +82,11 @@ RET
%endmacro
%endmacro
INIT_XMM
sse4
INIT_XMM
sse4
THRESHOLD_8
THRESHOLD
8
,
b
,
pb_128
THRESHOLD
16
,
w
,
pb_128_0
%if
HAVE_AVX2_EXTERNAL
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
INIT_YMM
avx2
THRESHOLD_8
THRESHOLD
8
,
b
,
pb_128
THRESHOLD
16
,
w
,
pb_128_0
%endif
%endif
libavfilter/x86/vf_threshold_init.c
View file @
869efbf9
...
@@ -23,20 +23,19 @@
...
@@ -23,20 +23,19 @@
#include "libavutil/x86/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/threshold.h"
#include "libavfilter/threshold.h"
void
ff_threshold8_sse4
(
const
uint8_t
*
in
,
const
uint8_t
*
threshold
,
#define THRESHOLD_FUNC(depth, opt) \
const
uint8_t
*
min
,
const
uint8_t
*
max
,
void ff_threshold##depth##_##opt(const uint8_t *in, const uint8_t *threshold,\
uint8_t
*
out
,
const uint8_t *min, const uint8_t *max, \
ptrdiff_t
ilinesize
,
ptrdiff_t
tlinesize
,
uint8_t *out, \
ptrdiff_t
flinesize
,
ptrdiff_t
slinesize
,
ptrdiff_t ilinesize, ptrdiff_t tlinesize, \
ptrdiff_t
olinesize
,
ptrdiff_t flinesize, ptrdiff_t slinesize, \
int
w
,
int
h
);
ptrdiff_t olinesize, \
void
ff_threshold8_avx2
(
const
uint8_t
*
in
,
const
uint8_t
*
threshold
,
int w, int h);
const
uint8_t
*
min
,
const
uint8_t
*
max
,
uint8_t
*
out
,
THRESHOLD_FUNC
(
8
,
sse4
)
ptrdiff_t
ilinesize
,
ptrdiff_t
tlinesize
,
THRESHOLD_FUNC
(
8
,
avx2
)
ptrdiff_t
flinesize
,
ptrdiff_t
slinesize
,
THRESHOLD_FUNC
(
16
,
sse4
)
ptrdiff_t
olinesize
,
THRESHOLD_FUNC
(
16
,
avx2
)
int
w
,
int
h
);
av_cold
void
ff_threshold_init_x86
(
ThresholdContext
*
s
)
av_cold
void
ff_threshold_init_x86
(
ThresholdContext
*
s
)
{
{
...
@@ -49,5 +48,12 @@ av_cold void ff_threshold_init_x86(ThresholdContext *s)
...
@@ -49,5 +48,12 @@ av_cold void ff_threshold_init_x86(ThresholdContext *s)
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
s
->
threshold
=
ff_threshold8_avx2
;
s
->
threshold
=
ff_threshold8_avx2
;
}
}
}
else
if
(
s
->
depth
==
16
)
{
if
(
EXTERNAL_SSE4
(
cpu_flags
))
{
s
->
threshold
=
ff_threshold16_sse4
;
}
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
s
->
threshold
=
ff_threshold16_avx2
;
}
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment