Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
630967ef
Commit
630967ef
authored
Dec 02, 2017
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/utvideodec : add SIMD (SSSE3 and AVX2) for gradient_pred
parent
4353c350
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
101 additions
and
1 deletion
+101
-1
lossless_videodsp.c
libavcodec/lossless_videodsp.c
+11
-0
lossless_videodsp.h
libavcodec/lossless_videodsp.h
+1
-0
utvideodec.c
libavcodec/utvideodec.c
+4
-1
lossless_videodsp.asm
libavcodec/x86/lossless_videodsp.asm
+80
-0
lossless_videodsp_init.c
libavcodec/x86/lossless_videodsp_init.c
+5
-0
No files found.
libavcodec/lossless_videodsp.c
View file @
630967ef
...
@@ -98,6 +98,16 @@ static int add_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsigned ma
...
@@ -98,6 +98,16 @@ static int add_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsigned ma
return
acc
;
return
acc
;
}
}
static
void
add_gradient_pred_c
(
uint8_t
*
src
,
const
ptrdiff_t
stride
,
const
ptrdiff_t
width
){
int
A
,
B
,
C
,
i
;
for
(
i
=
0
;
i
<
width
;
i
++
)
{
A
=
src
[
i
-
stride
];
B
=
src
[
i
-
(
stride
+
1
)];
C
=
src
[
i
-
1
];
src
[
i
]
=
(
A
-
B
+
C
+
src
[
i
])
&
0xFF
;
}
}
void
ff_llviddsp_init
(
LLVidDSPContext
*
c
)
void
ff_llviddsp_init
(
LLVidDSPContext
*
c
)
{
{
...
@@ -106,6 +116,7 @@ void ff_llviddsp_init(LLVidDSPContext *c)
...
@@ -106,6 +116,7 @@ void ff_llviddsp_init(LLVidDSPContext *c)
c
->
add_left_pred
=
add_left_pred_c
;
c
->
add_left_pred
=
add_left_pred_c
;
c
->
add_left_pred_int16
=
add_left_pred_int16_c
;
c
->
add_left_pred_int16
=
add_left_pred_int16_c
;
c
->
add_gradient_pred
=
add_gradient_pred_c
;
if
(
ARCH_PPC
)
if
(
ARCH_PPC
)
ff_llviddsp_init_ppc
(
c
);
ff_llviddsp_init_ppc
(
c
);
...
...
libavcodec/lossless_videodsp.h
View file @
630967ef
...
@@ -39,6 +39,7 @@ typedef struct LLVidDSPContext {
...
@@ -39,6 +39,7 @@ typedef struct LLVidDSPContext {
int
(
*
add_left_pred_int16
)(
uint16_t
*
dst
,
const
uint16_t
*
src
,
int
(
*
add_left_pred_int16
)(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
left
);
unsigned
mask
,
ptrdiff_t
w
,
unsigned
left
);
void
(
*
add_gradient_pred
)(
uint8_t
*
src
/* align 32 */
,
const
ptrdiff_t
stride
,
const
ptrdiff_t
width
);
}
LLVidDSPContext
;
}
LLVidDSPContext
;
void
ff_llviddsp_init
(
LLVidDSPContext
*
llviddsp
);
void
ff_llviddsp_init
(
LLVidDSPContext
*
llviddsp
);
...
...
libavcodec/utvideodec.c
View file @
630967ef
...
@@ -460,6 +460,7 @@ static void restore_gradient_planar(UtvideoContext *c, uint8_t *src, ptrdiff_t s
...
@@ -460,6 +460,7 @@ static void restore_gradient_planar(UtvideoContext *c, uint8_t *src, ptrdiff_t s
uint8_t
*
bsrc
;
uint8_t
*
bsrc
;
int
slice_start
,
slice_height
;
int
slice_start
,
slice_height
;
const
int
cmask
=
~
rmode
;
const
int
cmask
=
~
rmode
;
int
min_width
=
FFMIN
(
width
,
32
);
for
(
slice
=
0
;
slice
<
slices
;
slice
++
)
{
for
(
slice
=
0
;
slice
<
slices
;
slice
++
)
{
slice_start
=
((
slice
*
height
)
/
slices
)
&
cmask
;
slice_start
=
((
slice
*
height
)
/
slices
)
&
cmask
;
...
@@ -479,12 +480,14 @@ static void restore_gradient_planar(UtvideoContext *c, uint8_t *src, ptrdiff_t s
...
@@ -479,12 +480,14 @@ static void restore_gradient_planar(UtvideoContext *c, uint8_t *src, ptrdiff_t s
for
(
j
=
1
;
j
<
slice_height
;
j
++
)
{
for
(
j
=
1
;
j
<
slice_height
;
j
++
)
{
// second line - first element has top prediction, the rest uses gradient
// second line - first element has top prediction, the rest uses gradient
bsrc
[
0
]
=
(
bsrc
[
0
]
+
bsrc
[
-
stride
])
&
0xFF
;
bsrc
[
0
]
=
(
bsrc
[
0
]
+
bsrc
[
-
stride
])
&
0xFF
;
for
(
i
=
1
;
i
<
width
;
i
++
)
{
for
(
i
=
1
;
i
<
min_width
;
i
++
)
{
/* dsp need align 32 */
A
=
bsrc
[
i
-
stride
];
A
=
bsrc
[
i
-
stride
];
B
=
bsrc
[
i
-
(
stride
+
1
)];
B
=
bsrc
[
i
-
(
stride
+
1
)];
C
=
bsrc
[
i
-
1
];
C
=
bsrc
[
i
-
1
];
bsrc
[
i
]
=
(
A
-
B
+
C
+
bsrc
[
i
])
&
0xFF
;
bsrc
[
i
]
=
(
A
-
B
+
C
+
bsrc
[
i
])
&
0xFF
;
}
}
if
(
width
>
32
)
c
->
llviddsp
.
add_gradient_pred
(
bsrc
+
32
,
stride
,
width
-
32
);
bsrc
+=
stride
;
bsrc
+=
stride
;
}
}
}
}
...
...
libavcodec/x86/lossless_videodsp.asm
View file @
630967ef
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
;* SIMD lossless video DSP utils
;* SIMD lossless video DSP utils
;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2014 Michael Niedermayer
;* Copyright (c) 2014 Michael Niedermayer
;* Copyright (c) 2017 Jokyo Images
;*
;*
;* This file is part of FFmpeg.
;* This file is part of FFmpeg.
;*
;*
...
@@ -325,3 +326,82 @@ cglobal add_left_pred_int16, 4,4,8, dst, src, mask, w, left
...
@@ -325,3 +326,82 @@ cglobal add_left_pred_int16, 4,4,8, dst, src, mask, w, left
ADD_HFYU_LEFT_LOOP_INT16
u
,
a
ADD_HFYU_LEFT_LOOP_INT16
u
,
a
.
src_unaligned
:
.
src_unaligned
:
ADD_HFYU_LEFT_LOOP_INT16
u
,
u
ADD_HFYU_LEFT_LOOP_INT16
u
,
u
;---------------------------------------------------------------------------------------------
; void add_gradient_pred(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width)
;---------------------------------------------------------------------------------------------
%macro
ADD_GRADIENT_PRED
0
cglobal
add_gradient_pred
,
3
,
4
,
5
,
src
,
stride
,
width
,
tmp
mova
xm0
,
[
pb_15
]
;load src - 1 in xm1
movd
xm1
,
[
srcq
-
1
]
%if
cpuflag
(
avx2
)
vpbroadcastb
xm1
,
xm1
%else
pxor
xm2
,
xm2
pshufb
xm1
,
xm2
%endif
add
srcq
,
widthq
neg
widthq
neg
strideq
.
loop
:
lea
tmpq
,
[
srcq
+
strideq
]
mova
m2
,
[
tmpq
+
widthq
]
; A = src[x-stride]
movu
m3
,
[
tmpq
+
widthq
-
1
]
; B = src[x - (stride + 1)]
mova
m4
,
[
srcq
+
widthq
]
; current val (src[x])
psubb
m2
,
m3
; A - B
; prefix sum A-B
pslldq
m3
,
m2
,
1
paddb
m2
,
m3
pslldq
m3
,
m2
,
2
paddb
m2
,
m3
pslldq
m3
,
m2
,
4
paddb
m2
,
m3
pslldq
m3
,
m2
,
8
paddb
m2
,
m3
; prefix sum current val
pslldq
m3
,
m4
,
1
paddb
m4
,
m3
pslldq
m3
,
m4
,
2
paddb
m4
,
m3
pslldq
m3
,
m4
,
4
paddb
m4
,
m3
pslldq
m3
,
m4
,
8
paddb
m4
,
m3
; last sum
paddb
m2
,
m4
; current + (A - B)
paddb
xm1
,
xm2
; += C
mova
[
srcq
+
widthq
]
,
xm1
; store
pshufb
xm1
,
xm0
; put last val in all val of xm1
%if
mmsize
==
32
vextracti128
xm2
,
m2
,
1
; get second lane of the ymm
paddb
xm1
,
xm2
; += C
mova
[
srcq
+
widthq
+
16
]
,
xm1
; store
pshufb
xm1
,
xm0
; put last val in all val of m1
%endif
add
widthq
,
mmsize
jl
.
loop
RET
%endmacro
INIT_XMM
ssse3
ADD_GRADIENT_PRED
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
ADD_GRADIENT_PRED
%endif
libavcodec/x86/lossless_videodsp_init.c
View file @
630967ef
...
@@ -44,6 +44,9 @@ int ff_add_left_pred_unaligned_avx2(uint8_t *dst, const uint8_t *src,
...
@@ -44,6 +44,9 @@ int ff_add_left_pred_unaligned_avx2(uint8_t *dst, const uint8_t *src,
int
ff_add_left_pred_int16_ssse3
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
int
ff_add_left_pred_int16_ssse3
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
int
ff_add_left_pred_int16_sse4
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
int
ff_add_left_pred_int16_sse4
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
void
ff_add_gradient_pred_ssse3
(
uint8_t
*
src
,
const
ptrdiff_t
stride
,
const
ptrdiff_t
width
);
void
ff_add_gradient_pred_avx2
(
uint8_t
*
src
,
const
ptrdiff_t
stride
,
const
ptrdiff_t
width
);
#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
static
void
add_median_pred_cmov
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
static
void
add_median_pred_cmov
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
const
uint8_t
*
diff
,
ptrdiff_t
w
,
const
uint8_t
*
diff
,
ptrdiff_t
w
,
...
@@ -109,6 +112,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
...
@@ -109,6 +112,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
c
->
add_left_pred
=
ff_add_left_pred_ssse3
;
c
->
add_left_pred
=
ff_add_left_pred_ssse3
;
c
->
add_left_pred_int16
=
ff_add_left_pred_int16_ssse3
;
c
->
add_left_pred_int16
=
ff_add_left_pred_int16_ssse3
;
c
->
add_gradient_pred
=
ff_add_gradient_pred_ssse3
;
}
}
if
(
EXTERNAL_SSSE3_FAST
(
cpu_flags
))
{
if
(
EXTERNAL_SSSE3_FAST
(
cpu_flags
))
{
...
@@ -121,5 +125,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
...
@@ -121,5 +125,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
c
->
add_bytes
=
ff_add_bytes_avx2
;
c
->
add_bytes
=
ff_add_bytes_avx2
;
c
->
add_left_pred
=
ff_add_left_pred_unaligned_avx2
;
c
->
add_left_pred
=
ff_add_left_pred_unaligned_avx2
;
c
->
add_gradient_pred
=
ff_add_gradient_pred_avx2
;
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment