Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
77f35967
Commit
77f35967
authored
Sep 16, 2015
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9: add fullpel (avg) MC SIMD for 10/12bpp.
parent
6354ff03
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
120 additions
and
65 deletions
+120
-65
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+28
-28
vp9dsp_init.h
libavcodec/x86/vp9dsp_init.h
+6
-6
vp9dsp_init_16bpp.c
libavcodec/x86/vp9dsp_init_16bpp.c
+42
-16
vp9mc.asm
libavcodec/x86/vp9mc.asm
+44
-15
No files found.
libavcodec/x86/vp9dsp_init.c
View file @
77f35967
...
...
@@ -29,20 +29,20 @@
#if HAVE_YASM
decl_fpel_func
(
put
,
4
,
mmx
);
decl_fpel_func
(
put
,
8
,
mmx
);
decl_fpel_func
(
put
,
16
,
sse
);
decl_fpel_func
(
put
,
32
,
sse
);
decl_fpel_func
(
put
,
64
,
sse
);
decl_fpel_func
(
avg
,
4
,
mmxext
);
decl_fpel_func
(
avg
,
8
,
mmxext
);
decl_fpel_func
(
avg
,
16
,
sse2
);
decl_fpel_func
(
avg
,
32
,
sse2
);
decl_fpel_func
(
avg
,
64
,
sse2
);
decl_fpel_func
(
put
,
32
,
avx
);
decl_fpel_func
(
put
,
64
,
avx
);
decl_fpel_func
(
avg
,
32
,
avx2
);
decl_fpel_func
(
avg
,
64
,
avx2
);
decl_fpel_func
(
put
,
4
,
,
mmx
);
decl_fpel_func
(
put
,
8
,
,
mmx
);
decl_fpel_func
(
put
,
16
,
,
sse
);
decl_fpel_func
(
put
,
32
,
,
sse
);
decl_fpel_func
(
put
,
64
,
,
sse
);
decl_fpel_func
(
avg
,
4
,
_8
,
mmxext
);
decl_fpel_func
(
avg
,
8
,
_8
,
mmxext
);
decl_fpel_func
(
avg
,
16
,
_8
,
sse2
);
decl_fpel_func
(
avg
,
32
,
_8
,
sse2
);
decl_fpel_func
(
avg
,
64
,
_8
,
sse2
);
decl_fpel_func
(
put
,
32
,
,
avx
);
decl_fpel_func
(
put
,
64
,
,
avx
);
decl_fpel_func
(
avg
,
32
,
_8
,
avx2
);
decl_fpel_func
(
avg
,
64
,
_8
,
avx2
);
#define mc_func(avg, sz, dir, opt, type, f_sz) \
void ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
...
...
@@ -378,8 +378,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
} while (0)
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
init_fpel_func
(
4
,
0
,
4
,
put
,
mmx
);
init_fpel_func
(
3
,
0
,
8
,
put
,
mmx
);
init_fpel_func
(
4
,
0
,
4
,
put
,
,
mmx
);
init_fpel_func
(
3
,
0
,
8
,
put
,
,
mmx
);
if
(
!
bitexact
)
{
dsp
->
itxfm_add
[
4
/* lossless */
][
DCT_DCT
]
=
dsp
->
itxfm_add
[
4
/* lossless */
][
ADST_DCT
]
=
...
...
@@ -392,8 +392,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
init_subpel2
(
4
,
0
,
4
,
put
,
mmxext
);
init_subpel2
(
4
,
1
,
4
,
avg
,
mmxext
);
init_fpel_func
(
4
,
1
,
4
,
avg
,
mmxext
);
init_fpel_func
(
3
,
1
,
8
,
avg
,
mmxext
);
init_fpel_func
(
4
,
1
,
4
,
avg
,
_8
,
mmxext
);
init_fpel_func
(
3
,
1
,
8
,
avg
,
_8
,
mmxext
);
dsp
->
itxfm_add
[
TX_4X4
][
DCT_DCT
]
=
ff_vp9_idct_idct_4x4_add_mmxext
;
init_dc_ipred
(
4
,
mmxext
);
init_dc_ipred
(
8
,
mmxext
);
...
...
@@ -401,9 +401,9 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
}
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
init_fpel_func
(
2
,
0
,
16
,
put
,
sse
);
init_fpel_func
(
1
,
0
,
32
,
put
,
sse
);
init_fpel_func
(
0
,
0
,
64
,
put
,
sse
);
init_fpel_func
(
2
,
0
,
16
,
put
,
,
sse
);
init_fpel_func
(
1
,
0
,
32
,
put
,
,
sse
);
init_fpel_func
(
0
,
0
,
64
,
put
,
,
sse
);
init_ipred
(
16
,
sse
,
v
,
VERT
);
init_ipred
(
32
,
sse
,
v
,
VERT
);
}
...
...
@@ -411,9 +411,9 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
init_subpel3_8to64
(
0
,
put
,
sse2
);
init_subpel3_8to64
(
1
,
avg
,
sse2
);
init_fpel_func
(
2
,
1
,
16
,
avg
,
sse2
);
init_fpel_func
(
1
,
1
,
32
,
avg
,
sse2
);
init_fpel_func
(
0
,
1
,
64
,
avg
,
sse2
);
init_fpel_func
(
2
,
1
,
16
,
avg
,
_8
,
sse2
);
init_fpel_func
(
1
,
1
,
32
,
avg
,
_8
,
sse2
);
init_fpel_func
(
0
,
1
,
64
,
avg
,
_8
,
sse2
);
init_lpf
(
sse2
);
dsp
->
itxfm_add
[
TX_4X4
][
ADST_DCT
]
=
ff_vp9_idct_iadst_4x4_add_sse2
;
dsp
->
itxfm_add
[
TX_4X4
][
DCT_ADST
]
=
ff_vp9_iadst_idct_4x4_add_sse2
;
...
...
@@ -483,14 +483,14 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
init_dir_tm_h_ipred
(
32
,
avx
);
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
init_fpel_func
(
1
,
0
,
32
,
put
,
avx
);
init_fpel_func
(
0
,
0
,
64
,
put
,
avx
);
init_fpel_func
(
1
,
0
,
32
,
put
,
,
avx
);
init_fpel_func
(
0
,
0
,
64
,
put
,
,
avx
);
init_ipred
(
32
,
avx
,
v
,
VERT
);
}
if
(
EXTERNAL_AVX2
(
cpu_flags
))
{
init_fpel_func
(
1
,
1
,
32
,
avg
,
avx2
);
init_fpel_func
(
0
,
1
,
64
,
avg
,
avx2
);
init_fpel_func
(
1
,
1
,
32
,
avg
,
_8
,
avx2
);
init_fpel_func
(
0
,
1
,
64
,
avg
,
_8
,
avx2
);
if
(
ARCH_X86_64
)
{
#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
init_subpel3_32_64
(
0
,
put
,
avx2
);
...
...
libavcodec/x86/vp9dsp_init.h
View file @
77f35967
...
...
@@ -23,16 +23,16 @@
#ifndef AVCODEC_X86_VP9DSP_INIT_H
#define AVCODEC_X86_VP9DSP_INIT_H
#define decl_fpel_func(avg, sz, opt) \
void ff_vp9_##avg##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
#define decl_fpel_func(avg, sz,
bpp,
opt) \
void ff_vp9_##avg##sz##
bpp##
_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
#define init_fpel_func(idx1, idx2, sz, type, opt) \
#define init_fpel_func(idx1, idx2, sz, type,
bpp,
opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_vp9_##type##sz##_##opt
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_vp9_##type##sz##
bpp##
_##opt
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
,
int
bpp
);
...
...
libavcodec/x86/vp9dsp_init_16bpp.c
View file @
77f35967
...
...
@@ -29,14 +29,22 @@
#if HAVE_YASM
decl_fpel_func
(
put
,
8
,
mmx
);
decl_fpel_func
(
put
,
16
,
sse
);
decl_fpel_func
(
put
,
32
,
sse
);
decl_fpel_func
(
put
,
64
,
sse
);
decl_fpel_func
(
put
,
128
,
sse
);
decl_fpel_func
(
put
,
32
,
avx
);
decl_fpel_func
(
put
,
64
,
avx
);
decl_fpel_func
(
put
,
128
,
avx
);
decl_fpel_func
(
put
,
8
,
,
mmx
);
decl_fpel_func
(
avg
,
8
,
_16
,
mmxext
);
decl_fpel_func
(
put
,
16
,
,
sse
);
decl_fpel_func
(
put
,
32
,
,
sse
);
decl_fpel_func
(
put
,
64
,
,
sse
);
decl_fpel_func
(
put
,
128
,
,
sse
);
decl_fpel_func
(
avg
,
16
,
_16
,
sse2
);
decl_fpel_func
(
avg
,
32
,
_16
,
sse2
);
decl_fpel_func
(
avg
,
64
,
_16
,
sse2
);
decl_fpel_func
(
avg
,
128
,
_16
,
sse2
);
decl_fpel_func
(
put
,
32
,
,
avx
);
decl_fpel_func
(
put
,
64
,
,
avx
);
decl_fpel_func
(
put
,
128
,
,
avx
);
decl_fpel_func
(
avg
,
32
,
_16
,
avx2
);
decl_fpel_func
(
avg
,
64
,
_16
,
avx2
);
decl_fpel_func
(
avg
,
128
,
_16
,
avx2
);
#endif
/* HAVE_YASM */
...
...
@@ -46,19 +54,37 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp, int bpp)
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
init_fpel_func
(
4
,
0
,
8
,
put
,
mmx
);
init_fpel_func
(
4
,
0
,
8
,
put
,
,
mmx
);
}
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
init_fpel_func
(
4
,
1
,
8
,
avg
,
_16
,
mmxext
);
}
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
init_fpel_func
(
3
,
0
,
16
,
put
,
sse
);
init_fpel_func
(
2
,
0
,
32
,
put
,
sse
);
init_fpel_func
(
1
,
0
,
64
,
put
,
sse
);
init_fpel_func
(
0
,
0
,
128
,
put
,
sse
);
init_fpel_func
(
3
,
0
,
16
,
put
,
,
sse
);
init_fpel_func
(
2
,
0
,
32
,
put
,
,
sse
);
init_fpel_func
(
1
,
0
,
64
,
put
,
,
sse
);
init_fpel_func
(
0
,
0
,
128
,
put
,
,
sse
);
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
init_fpel_func
(
3
,
1
,
16
,
avg
,
_16
,
sse2
);
init_fpel_func
(
2
,
1
,
32
,
avg
,
_16
,
sse2
);
init_fpel_func
(
1
,
1
,
64
,
avg
,
_16
,
sse2
);
init_fpel_func
(
0
,
1
,
128
,
avg
,
_16
,
sse2
);
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
init_fpel_func
(
2
,
0
,
32
,
put
,
avx
);
init_fpel_func
(
1
,
0
,
64
,
put
,
avx
);
init_fpel_func
(
0
,
0
,
128
,
put
,
avx
);
init_fpel_func
(
2
,
0
,
32
,
put
,
,
avx
);
init_fpel_func
(
1
,
0
,
64
,
put
,
,
avx
);
init_fpel_func
(
0
,
0
,
128
,
put
,
,
avx
);
}
if
(
EXTERNAL_AVX2
(
cpu_flags
))
{
init_fpel_func
(
2
,
1
,
32
,
avg
,
_16
,
avx2
);
init_fpel_func
(
1
,
1
,
64
,
avg
,
_16
,
avx2
);
init_fpel_func
(
0
,
1
,
128
,
avg
,
_16
,
avx2
);
}
#endif
/* HAVE_YASM */
...
...
libavcodec/x86/vp9mc.asm
View file @
77f35967
...
...
@@ -553,7 +553,7 @@ filter_vx2_fn avg
%endif
; ARCH_X86_64
%macro
fpel_fn
6
-
7
4
%macro
fpel_fn
6
-
8
0
,
4
%if
%2
==
4
%define
%%
srcfn
movh
%define
%%
dstfn
movh
...
...
@@ -562,12 +562,22 @@ filter_vx2_fn avg
%define
%%
dstfn
mova
%endif
%if
%7
==
8
%define
%%
pavg
pavgb
%define
%%
szsuf
_8
%elif
%7
==
16
%define
%%
pavg
pavgw
%define
%%
szsuf
_16
%else
%define
%%
szsuf
%endif
%if
%2
<=
mmsize
cglobal
vp9_
%1%2
,
5
,
7
,
4
,
dst
,
dstride
,
src
,
sstride
,
h
,
dstride3
,
sstride3
cglobal
vp9_
%1%2
%%
szsuf
,
5
,
7
,
4
,
dst
,
dstride
,
src
,
sstride
,
h
,
dstride3
,
sstride3
lea
sstride3q
,
[
sstrideq
*
3
]
lea
dstride3q
,
[
dstrideq
*
3
]
%else
cglobal
vp9_
%1%2
,
5
,
5
,
%7
,
dst
,
dstride
,
src
,
sstride
,
h
cglobal
vp9_
%1%2
%%
szsuf
,
5
,
5
,
%8
,
dst
,
dstride
,
src
,
sstride
,
h
%endif
.
loop
:
%%
srcfn
m0
,
[srcq]
...
...
@@ -582,10 +592,16 @@ cglobal vp9_%1%2, 5, 5, %7, dst, dstride, src, sstride, h
%endif
lea
srcq
,
[
srcq
+
sstrideq
*
%6
]
%ifidn
%1
,
avg
pavgb
m0
,
[dstq]
pavgb
m1
,
[
dstq
+
d%3
]
pavgb
m2
,
[
dstq
+
d%4
]
pavgb
m3
,
[
dstq
+
d%5
]
%%
pavg
m0
,
[dstq]
%%
pavg
m1
,
[
dstq
+
d%3
]
%%
pavg
m2
,
[
dstq
+
d%4
]
%%
pavg
m3
,
[
dstq
+
d%5
]
%if
%2
/
mmsize
==
8
%%
pavg
m4
,
[
dstq
+
mmsize
*
4
]
%%
pavg
m5
,
[
dstq
+
mmsize
*
5
]
%%
pavg
m6
,
[
dstq
+
mmsize
*
6
]
%%
pavg
m7
,
[
dstq
+
mmsize
*
7
]
%endif
%endif
%%
dstfn
[dstq],
m0
%%
dstfn
[
dstq
+
d%3
]
,
m1
...
...
@@ -611,25 +627,38 @@ INIT_MMX mmx
fpel_fn
put
,
4
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
INIT_MMX
mmxext
fpel_fn
avg
,
4
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
4
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
8
fpel_fn
avg
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
8
INIT_XMM
sse
fpel_fn
put
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
put
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
fpel_fn
put
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
8
fpel_fn
put
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
0
,
8
INIT_XMM
sse2
fpel_fn
avg
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
avg
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
fpel_fn
avg
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
8
fpel_fn
avg
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
,
8
fpel_fn
avg
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
8
INIT_YMM
avx
fpel_fn
put
,
32
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
64
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
put
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
fpel_fn
avg
,
32
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
64
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
avg
,
32
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
8
fpel_fn
avg
,
64
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
,
8
%endif
INIT_MMX
mmxext
fpel_fn
avg
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
16
INIT_XMM
sse2
fpel_fn
avg
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
16
fpel_fn
avg
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
,
16
fpel_fn
avg
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
16
fpel_fn
avg
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
16
,
8
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
fpel_fn
avg
,
32
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
16
fpel_fn
avg
,
64
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
,
16
fpel_fn
avg
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
16
%endif
%undef
s16
%undef
d16
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment