Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
4353c350
Commit
4353c350
authored
Dec 02, 2017
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/x86/lossless_videodsp : add avx2 version for add_left_pred
parent
cfbcea1c
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
44 additions
and
22 deletions
+44
-22
lossless_videodsp.asm
libavcodec/x86/lossless_videodsp.asm
+41
-22
lossless_videodsp_init.c
libavcodec/x86/lossless_videodsp_init.c
+3
-0
No files found.
libavcodec/x86/lossless_videodsp.asm
View file @
4353c350
...
@@ -114,40 +114,54 @@ MEDIAN_PRED
...
@@ -114,40 +114,54 @@ MEDIAN_PRED
add
dstq
,
wq
add
dstq
,
wq
neg
wq
neg
wq
%%
.
loop
:
%%
.
loop
:
pshufb
xm0
,
xm5
%if
%2
%if
%2
mova
m1
,
[
srcq
+
wq
]
mova
m1
,
[
srcq
+
wq
]
%else
%else
movu
m1
,
[
srcq
+
wq
]
movu
m1
,
[
srcq
+
wq
]
%endif
%endif
mova
m2
,
m1
psllw
m2
,
m1
,
8
psllw
m1
,
8
paddb
m1
,
m2
paddb
m1
,
m2
mova
m2
,
m1
pshufb
m2
,
m1
,
m3
pshufb
m1
,
m3
paddb
m1
,
m2
paddb
m1
,
m2
pshufb
m0
,
m5
pshufb
m2
,
m1
,
m4
mova
m2
,
m1
pshufb
m1
,
m4
paddb
m1
,
m2
paddb
m1
,
m2
%if
mmsize
==
16
%if
mmsize
>=
16
mova
m2
,
m1
pshufb
m2
,
m1
,
m6
pshufb
m1
,
m6
paddb
m1
,
m2
paddb
m1
,
m2
%endif
%endif
paddb
m0
,
m1
paddb
xm0
,
x
m1
%if
%1
%if
%1
mova
[
dstq
+
wq
]
,
m0
mova
[
dstq
+
wq
]
,
x
m0
%else
%else
movq
[
dstq
+
wq
]
,
m0
movq
[
dstq
+
wq
]
,
xm0
movhps
[
dstq
+
wq
+
8
]
,
m0
movhps
[
dstq
+
wq
+
8
]
,
xm0
%endif
%if
mmsize
==
32
vextracti128
xm2
,
m1
,
1
; get second lane of the ymm
pshufb
xm0
,
xm5
; set alls val to last val of the first lane
paddb
xm0
,
xm2
;store val
%if
%1
mova
[
dstq
+
wq
+
16
]
,
xm0
%else
;
movq
[
dstq
+
wq
+
16
]
,
xm0
movhps
[
dstq
+
wq
+
16
+
8
]
,
xm0
%endif
%endif
%endif
add
wq
,
mmsize
add
wq
,
mmsize
jl
%%
.
loop
jl
%%
.
loop
%if
mmsize
==
32
mov
eax
,
[
dstq
-
1
]
and
eax
,
0xff
%else
;
mov
eax
,
mmsize
-
1
mov
eax
,
mmsize
-
1
sub
eax
,
wd
sub
eax
,
wd
movd
m1
,
eax
movd
m1
,
eax
pshufb
m0
,
m1
pshufb
m0
,
m1
movd
eax
,
m0
movd
eax
,
m0
%endif
RET
RET
%endmacro
%endmacro
...
@@ -166,15 +180,15 @@ cglobal add_left_pred, 3,3,7, dst, src, w, left
...
@@ -166,15 +180,15 @@ cglobal add_left_pred, 3,3,7, dst, src, w, left
%macro
ADD_LEFT_PRED_UNALIGNED
0
%macro
ADD_LEFT_PRED_UNALIGNED
0
cglobal
add_left_pred_unaligned
,
3
,
3
,
7
,
dst
,
src
,
w
,
left
cglobal
add_left_pred_unaligned
,
3
,
3
,
7
,
dst
,
src
,
w
,
left
mova
m5
,
[
pb_15
]
mova
x
m5
,
[
pb_15
]
mova
m6
,
[
pb_zzzzzzzz77777777
]
VBROADCASTI128
m6
,
[
pb_zzzzzzzz77777777
]
mova
m4
,
[
pb_zzzz3333zzzzbbbb
]
VBROADCASTI128
m4
,
[
pb_zzzz3333zzzzbbbb
]
mova
m3
,
[
pb_zz11zz55zz99zzdd
]
VBROADCASTI128
m3
,
[
pb_zz11zz55zz99zzdd
]
movd
m0
,
leftm
movd
x
m0
,
leftm
pslldq
m0
,
15
pslldq
x
m0
,
15
test
srcq
,
15
test
srcq
,
mmsize
-
1
jnz
.
src_unaligned
jnz
.
src_unaligned
test
dstq
,
15
test
dstq
,
mmsize
-
1
jnz
.
dst_unaligned
jnz
.
dst_unaligned
ADD_LEFT_LOOP
1
,
1
ADD_LEFT_LOOP
1
,
1
.
dst_unaligned
:
.
dst_unaligned
:
...
@@ -186,6 +200,11 @@ cglobal add_left_pred_unaligned, 3,3,7, dst, src, w, left
...
@@ -186,6 +200,11 @@ cglobal add_left_pred_unaligned, 3,3,7, dst, src, w, left
INIT_XMM
ssse3
INIT_XMM
ssse3
ADD_LEFT_PRED_UNALIGNED
ADD_LEFT_PRED_UNALIGNED
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
ADD_LEFT_PRED_UNALIGNED
%endif
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
; void ff_add_bytes(uint8_t *dst, uint8_t *src, ptrdiff_t w);
; void ff_add_bytes(uint8_t *dst, uint8_t *src, ptrdiff_t w);
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
...
...
libavcodec/x86/lossless_videodsp_init.c
View file @
4353c350
...
@@ -38,6 +38,8 @@ int ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
...
@@ -38,6 +38,8 @@ int ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
ptrdiff_t
w
,
int
left
);
ptrdiff_t
w
,
int
left
);
int
ff_add_left_pred_unaligned_ssse3
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
ff_add_left_pred_unaligned_ssse3
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
ptrdiff_t
w
,
int
left
);
ptrdiff_t
w
,
int
left
);
int
ff_add_left_pred_unaligned_avx2
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
ptrdiff_t
w
,
int
left
);
int
ff_add_left_pred_int16_ssse3
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
int
ff_add_left_pred_int16_ssse3
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
int
ff_add_left_pred_int16_sse4
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
int
ff_add_left_pred_int16_sse4
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
...
@@ -118,5 +120,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
...
@@ -118,5 +120,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
}
}
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
c
->
add_bytes
=
ff_add_bytes_avx2
;
c
->
add_bytes
=
ff_add_bytes_avx2
;
c
->
add_left_pred
=
ff_add_left_pred_unaligned_avx2
;
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment