Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
921eb21b
Commit
921eb21b
authored
Sep 15, 2019
by
Paul B Mahol
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter/x86/vf_360: add most of >8 depth asm
parent
8e8fd252
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
79 additions
and
0 deletions
+79
-0
vf_v360.asm
libavfilter/x86/vf_v360.asm
+67
-0
vf_v360_init.c
libavfilter/x86/vf_v360_init.c
+12
-0
No files found.
libavfilter/x86/vf_v360.asm
View file @
921eb21b
...
@@ -26,7 +26,9 @@
...
@@ -26,7 +26,9 @@
SECTION_RODATA
SECTION_RODATA
pb_mask
:
db
0
,
4
,
8
,
12
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
pb_mask
:
db
0
,
4
,
8
,
12
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
pw_mask
:
db
0
,
1
,
4
,
5
,
8
,
9
,
12
,
13
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
pd_255
:
times
4
dd
255
pd_255
:
times
4
dd
255
pd_65535
:
times
4
dd
65535
SECTION
.
text
SECTION
.
text
...
@@ -60,6 +62,34 @@ cglobal remap1_8bit_line, 6, 7, 6, dst, width, src, in_linesize, u, v, x
...
@@ -60,6 +62,34 @@ cglobal remap1_8bit_line, 6, 7, 6, dst, width, src, in_linesize, u, v, x
jl
.
loop
jl
.
loop
RET
RET
INIT_YMM
avx2
cglobal
remap1_16bit_line
,
6
,
7
,
6
,
dst
,
width
,
src
,
in_linesize
,
u
,
v
,
x
movsxdifnidn
widthq
,
widthd
xor
xq
,
xq
movd
xm0
,
in_linesized
pcmpeqw
m4
,
m4
VBROADCASTI128
m3
,
[
pw_mask
]
vpbroadcastd
m0
,
xm0
.
loop
:
pmovsxwd
m1
,
[
vq
+
xq
*
2
]
pmovsxwd
m2
,
[
uq
+
xq
*
2
]
pslld
m2
,
0x1
pmulld
m1
,
m0
paddd
m1
,
m2
mova
m2
,
m4
vpgatherdd
m5
,
[
srcq
+
m1
]
,
m2
pshufb
m1
,
m5
,
m3
vextracti128
xm2
,
m1
,
1
movq
[
dstq
+
xq
*
2
]
,
xm1
movq
[
dstq
+
xq
*
2
+
8
]
,
xm2
add
xq
,
mmsize
/
4
cmp
xq
,
widthq
jl
.
loop
RET
INIT_YMM
avx2
INIT_YMM
avx2
cglobal
remap2_8bit_line
,
7
,
8
,
8
,
dst
,
width
,
src
,
in_linesize
,
u
,
v
,
ker
,
x
cglobal
remap2_8bit_line
,
7
,
8
,
8
,
dst
,
width
,
src
,
in_linesize
,
u
,
v
,
ker
,
x
movsxdifnidn
widthq
,
widthd
movsxdifnidn
widthq
,
widthd
...
@@ -96,6 +126,43 @@ DEFINE_ARGS dst, width, src, x, u, v, ker
...
@@ -96,6 +126,43 @@ DEFINE_ARGS dst, width, src, x, u, v, ker
jl
.
loop
jl
.
loop
RET
RET
INIT_YMM
avx2
cglobal
remap2_16bit_line
,
7
,
8
,
8
,
dst
,
width
,
src
,
in_linesize
,
u
,
v
,
ker
,
x
movsxdifnidn
widthq
,
widthd
movd
xm0
,
in_linesized
%if
ARCH_X86_32
DEFINE_ARGS
dst
,
width
,
src
,
x
,
u
,
v
,
ker
%endif
xor
xq
,
xq
pcmpeqw
m7
,
m7
vpbroadcastd
m0
,
xm0
vpbroadcastd
m6
,
[
pd_65535
]
.
loop
:
pmovsxwd
m1
,
[
kerq
+
xq
*
8
]
pmovsxwd
m2
,
[
vq
+
xq
*
8
]
pmovsxwd
m3
,
[
uq
+
xq
*
8
]
pslld
m3
,
0x1
pmulld
m4
,
m2
,
m0
paddd
m4
,
m3
mova
m3
,
m7
vpgatherdd
m2
,
[
srcq
+
m4
]
,
m3
pand
m2
,
m6
pmulld
m2
,
m1
phaddd
m2
,
m2
phaddd
m1
,
m2
,
m2
psrld
m1
,
m1
,
0xe
vextracti128
xm2
,
m1
,
1
pextrw
[
dstq
+
xq
*
2
]
,
xm1
,
0
pextrw
[
dstq
+
xq
*
2
+
2
]
,
xm2
,
0
add
xq
,
mmsize
/
16
cmp
xq
,
widthq
jl
.
loop
RET
%if
ARCH_X86_64
%if
ARCH_X86_64
INIT_YMM
avx2
INIT_YMM
avx2
...
...
libavfilter/x86/vf_v360_init.c
View file @
921eb21b
...
@@ -32,6 +32,12 @@ void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdi
...
@@ -32,6 +32,12 @@ void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdi
void
ff_remap4_8bit_line_avx2
(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
void
ff_remap4_8bit_line_avx2
(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
void
ff_remap1_16bit_line_avx2
(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
void
ff_remap2_16bit_line_avx2
(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
av_cold
void
ff_v360_init_x86
(
V360Context
*
s
,
int
depth
)
av_cold
void
ff_v360_init_x86
(
V360Context
*
s
,
int
depth
)
{
{
int
cpu_flags
=
av_get_cpu_flags
();
int
cpu_flags
=
av_get_cpu_flags
();
...
@@ -42,6 +48,12 @@ av_cold void ff_v360_init_x86(V360Context *s, int depth)
...
@@ -42,6 +48,12 @@ av_cold void ff_v360_init_x86(V360Context *s, int depth)
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
s
->
interp
==
BILINEAR
&&
depth
<=
8
)
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
s
->
interp
==
BILINEAR
&&
depth
<=
8
)
s
->
remap_line
=
ff_remap2_8bit_line_avx2
;
s
->
remap_line
=
ff_remap2_8bit_line_avx2
;
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
s
->
interp
==
NEAREST
&&
depth
>
8
)
s
->
remap_line
=
ff_remap1_16bit_line_avx2
;
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
s
->
interp
==
BILINEAR
&&
depth
>
8
)
s
->
remap_line
=
ff_remap2_16bit_line_avx2
;
#if ARCH_X86_64
#if ARCH_X86_64
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
(
s
->
interp
==
BICUBIC
||
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
(
s
->
interp
==
BICUBIC
||
s
->
interp
==
LANCZOS
)
&&
depth
<=
8
)
s
->
interp
==
LANCZOS
)
&&
depth
<=
8
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment