Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
344d5190
Commit
344d5190
authored
Sep 16, 2015
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9: add subpel MC SIMD for 10/12bpp.
parent
77f35967
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
244 additions
and
11 deletions
+244
-11
Makefile
libavcodec/x86/Makefile
+4
-1
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+0
-0
vp9dsp_init.h
libavcodec/x86/vp9dsp_init.h
+109
-1
vp9dsp_init_10bpp.c
libavcodec/x86/vp9dsp_init_10bpp.c
+25
-0
vp9dsp_init_12bpp.c
libavcodec/x86/vp9dsp_init_12bpp.c
+25
-0
vp9dsp_init_16bpp.c
libavcodec/x86/vp9dsp_init_16bpp.c
+1
-1
vp9dsp_init_16bpp_template.c
libavcodec/x86/vp9dsp_init_16bpp_template.c
+62
-0
vp9mc.asm
libavcodec/x86/vp9mc.asm
+18
-8
vp9mc_16bpp.asm
libavcodec/x86/vp9mc_16bpp.asm
+0
-0
No files found.
libavcodec/x86/Makefile
View file @
344d5190
...
...
@@ -63,6 +63,8 @@ OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o
OBJS-$(CONFIG_VORBIS_DECODER)
+=
x86/vorbisdsp_init.o
OBJS-$(CONFIG_VP6_DECODER)
+=
x86/vp6dsp_init.o
OBJS-$(CONFIG_VP9_DECODER)
+=
x86/vp9dsp_init.o
\
x86/vp9dsp_init_10bpp.o
\
x86/vp9dsp_init_12bpp.o
\
x86/vp9dsp_init_16bpp.o
OBJS-$(CONFIG_WEBP_DECODER)
+=
x86/vp8dsp_init.o
...
...
@@ -157,5 +159,6 @@ YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
YASM-OBJS-$(CONFIG_VP9_DECODER)
+=
x86/vp9intrapred.o
\
x86/vp9itxfm.o
\
x86/vp9lpf.o
\
x86/vp9mc.o
x86/vp9mc.o
\
x86/vp9mc_16bpp.o
YASM-OBJS-$(CONFIG_WEBP_DECODER)
+=
x86/vp8dsp.o
libavcodec/x86/vp9dsp_init.c
View file @
344d5190
This diff is collapsed.
Click to expand it.
libavcodec/x86/vp9dsp_init.h
View file @
344d5190
...
...
@@ -28,12 +28,120 @@ void ff_vp9_##avg##sz##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
#define decl_mc_func(avg, sz, dir, opt, type, f_sz, bpp) \
void ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, const type (*filter)[f_sz])
#define decl_mc_funcs(sz, opt, type, fsz, bpp) \
decl_mc_func(put, sz, h, opt, type, fsz, bpp); \
decl_mc_func(avg, sz, h, opt, type, fsz, bpp); \
decl_mc_func(put, sz, v, opt, type, fsz, bpp); \
decl_mc_func(avg, sz, v, opt, type, fsz, bpp)
#define mc_rep_func(avg, sz, hsz, hszb, dir, opt, type, f_sz, bpp) \
static av_always_inline void \
ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, const type (*filter)[f_sz]) \
{ \
ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst, dst_stride, src, \
src_stride, h, filter); \
ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst + hszb, dst_stride, src + hszb, \
src_stride, h, filter); \
}
#define mc_rep_funcs(sz, hsz, hszb, opt, type, fsz, bpp) \
mc_rep_func(put, sz, hsz, hszb, h, opt, type, fsz, bpp); \
mc_rep_func(avg, sz, hsz, hszb, h, opt, type, fsz, bpp); \
mc_rep_func(put, sz, hsz, hszb, v, opt, type, fsz, bpp); \
mc_rep_func(avg, sz, hsz, hszb, v, opt, type, fsz, bpp)
#define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, bpp, opt) \
static void op##_8tap_##fname##_##sz##dir##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
ff_vp9_##op##_8tap_1d_##dir##_##sz##_##bpp##_##opt(dst, dst_stride, src, src_stride, \
h, ff_filters_##f_opt[f][dvar - 1]); \
}
#define filters_8tap_1d_fn(op, sz, dir, dvar, bpp, opt, f_opt) \
filter_8tap_1d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, dir, dvar, bpp, opt) \
filter_8tap_1d_fn(op, sz, FILTER_8TAP_SHARP, f_opt, sharp, dir, dvar, bpp, opt) \
filter_8tap_1d_fn(op, sz, FILTER_8TAP_SMOOTH, f_opt, smooth, dir, dvar, bpp, opt)
#define filters_8tap_1d_fn2(op, sz, bpp, opt, f_opt) \
filters_8tap_1d_fn(op, sz, h, mx, bpp, opt, f_opt) \
filters_8tap_1d_fn(op, sz, v, my, bpp, opt, f_opt)
#define filters_8tap_1d_fn3(op, bpp, opt4, opt8, f_opt) \
filters_8tap_1d_fn2(op, 64, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 32, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 16, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 8, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 4, bpp, opt4, f_opt)
#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, bpp, bytes, opt) \
static void op##_8tap_##fname##_##sz##hv_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
LOCAL_ALIGNED_##align(uint8_t, temp, [71 * 64 * bytes]); \
ff_vp9_put_8tap_1d_h_##sz##_##bpp##_##opt(temp, 64 * bytes, src - 3 * src_stride, \
src_stride, h + 7, \
ff_filters_##f_opt[f][mx - 1]); \
ff_vp9_##op##_8tap_1d_v_##sz##_##bpp##_##opt(dst, dst_stride, temp + 3 * bytes * 64, \
64 * bytes, h, \
ff_filters_##f_opt[f][my - 1]); \
}
#define filters_8tap_2d_fn(op, sz, align, bpp, bytes, opt, f_opt) \
filter_8tap_2d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, align, bpp, bytes, opt) \
filter_8tap_2d_fn(op, sz, FILTER_8TAP_SHARP, f_opt, sharp, align, bpp, bytes, opt) \
filter_8tap_2d_fn(op, sz, FILTER_8TAP_SMOOTH, f_opt, smooth, align, bpp, bytes, opt)
#define filters_8tap_2d_fn2(op, align, bpp, bytes, opt4, opt8, f_opt) \
filters_8tap_2d_fn(op, 64, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 32, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 16, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 8, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
#define init_fpel_func(idx1, idx2, sz, type, bpp, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_vp9_##type##sz##bpp##_##opt
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
,
int
bpp
);
#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, bpp, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = \
type##_8tap_smooth_##sz##dir##_##bpp##_##opt; \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = \
type##_8tap_regular_##sz##dir##_##bpp##_##opt; \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][idxh][idxv] = \
type##_8tap_sharp_##sz##dir##_##bpp##_##opt
#define init_subpel2(idx1, idx2, sz, type, bpp, opt) \
init_subpel1(idx1, idx2, 1, 1, sz, hv, type, bpp, opt); \
init_subpel1(idx1, idx2, 0, 1, sz, v, type, bpp, opt); \
init_subpel1(idx1, idx2, 1, 0, sz, h, type, bpp, opt)
#define init_subpel3_32_64(idx, type, bpp, opt) \
init_subpel2(0, idx, 64, type, bpp, opt); \
init_subpel2(1, idx, 32, type, bpp, opt)
#define init_subpel3_8to64(idx, type, bpp, opt) \
init_subpel3_32_64(idx, type, bpp, opt); \
init_subpel2(2, idx, 16, type, bpp, opt); \
init_subpel2(3, idx, 8, type, bpp, opt)
#define init_subpel3(idx, type, bpp, opt) \
init_subpel3_8to64(idx, type, bpp, opt); \
init_subpel2(4, idx, 4, type, bpp, opt)
void
ff_vp9dsp_init_10bpp_x86
(
VP9DSPContext
*
dsp
);
void
ff_vp9dsp_init_12bpp_x86
(
VP9DSPContext
*
dsp
);
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
);
#endif
/* AVCODEC_X86_VP9DSP_INIT_H */
libavcodec/x86/vp9dsp_init_10bpp.c
0 → 100644
View file @
344d5190
/*
* VP9 SIMD optimizations
*
* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define BPC 10
#define INIT_FUNC ff_vp9dsp_init_10bpp_x86
#include "vp9dsp_init_16bpp_template.c"
libavcodec/x86/vp9dsp_init_12bpp.c
0 → 100644
View file @
344d5190
/*
* VP9 SIMD optimizations
*
* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define BPC 12
#define INIT_FUNC ff_vp9dsp_init_12bpp_x86
#include "vp9dsp_init_16bpp_template.c"
libavcodec/x86/vp9dsp_init_16bpp.c
View file @
344d5190
...
...
@@ -48,7 +48,7 @@ decl_fpel_func(avg, 128, _16, avx2);
#endif
/* HAVE_YASM */
av_cold
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
,
int
bpp
)
av_cold
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
...
...
libavcodec/x86/vp9dsp_init_16bpp_template.c
0 → 100644
View file @
344d5190
/*
* VP9 SIMD optimizations
*
* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/vp9dsp.h"
#include "libavcodec/x86/vp9dsp_init.h"
#if HAVE_YASM
extern
const
int16_t
ff_filters_16bpp
[
3
][
15
][
4
][
16
];
decl_mc_funcs
(
4
,
sse2
,
int16_t
,
16
,
BPC
);
decl_mc_funcs
(
8
,
sse2
,
int16_t
,
16
,
BPC
);
mc_rep_funcs
(
16
,
8
,
16
,
sse2
,
int16_t
,
16
,
BPC
);
mc_rep_funcs
(
32
,
16
,
32
,
sse2
,
int16_t
,
16
,
BPC
);
mc_rep_funcs
(
64
,
32
,
64
,
sse2
,
int16_t
,
16
,
BPC
);
filters_8tap_2d_fn2
(
put
,
16
,
BPC
,
2
,
sse2
,
sse2
,
16
bpp
)
filters_8tap_2d_fn2
(
avg
,
16
,
BPC
,
2
,
sse2
,
sse2
,
16
bpp
)
filters_8tap_1d_fn3
(
put
,
BPC
,
sse2
,
sse2
,
16
bpp
)
filters_8tap_1d_fn3
(
avg
,
BPC
,
sse2
,
sse2
,
16
bpp
)
#endif
/* HAVE_YASM */
av_cold
void
INIT_FUNC
(
VP9DSPContext
*
dsp
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
init_subpel3
(
0
,
put
,
BPC
,
sse2
);
init_subpel3
(
1
,
avg
,
BPC
,
sse2
);
}
#endif
/* HAVE_YASM */
ff_vp9dsp_init_16bpp_x86
(
dsp
);
}
libavcodec/x86/vp9mc.asm
View file @
344d5190
...
...
@@ -45,6 +45,13 @@ times 8 dw %7
times
8
dw
%8
%endmacro
%macro
F8_16BPP_TAPS
8
times
8
dw
%1
,
%2
times
8
dw
%3
,
%4
times
8
dw
%5
,
%6
times
8
dw
%7
,
%8
%endmacro
%macro
FILTER
1
const
filters_
%1
; smooth
F8_TAPS
-
3
,
-
1
,
32
,
64
,
38
,
1
,
-
3
,
0
...
...
@@ -102,12 +109,15 @@ FILTER ssse3
%define
F8_TAPS
F8_SSE2_TAPS
; int16_t ff_filters_sse2[3][15][8][8]
FILTER
sse2
%define
F8_TAPS
F8_16BPP_TAPS
; int16_t ff_filters_16bpp[3][15][4][16]
FILTER
16
bpp
SECTION
.
text
%macro
filter_sse2_h_fn
1
%assign
%%
px
mmsize
/
2
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
,
6
,
6
,
15
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
%
+
_8
,
6
,
6
,
15
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
pxor
m5
,
m5
mova
m6
,
[
pw_64
]
mova
m7
,
[
filteryq
+
0
]
...
...
@@ -192,7 +202,7 @@ filter_sse2_h_fn avg
%macro
filter_h_fn
1
%assign
%%
px
mmsize
/
2
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
,
6
,
6
,
11
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
%
+
_8
,
6
,
6
,
11
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
mova
m6
,
[
pw_256
]
mova
m7
,
[
filteryq
+
0
]
%if
ARCH_X86_64
&&
mmsize
>
8
...
...
@@ -253,7 +263,7 @@ filter_h_fn avg
%if
ARCH_X86_64
%macro
filter_hx2_fn
1
%assign
%%
px
mmsize
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
,
6
,
6
,
14
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
%
+
_8
,
6
,
6
,
14
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
mova
m13
,
[
pw_256
]
mova
m8
,
[
filteryq
+
0
]
mova
m9
,
[
filteryq
+
32
]
...
...
@@ -315,9 +325,9 @@ filter_hx2_fn avg
%macro
filter_sse2_v_fn
1
%assign
%%
px
mmsize
/
2
%if
ARCH_X86_64
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
,
6
,
8
,
15
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
%
+
_8
,
6
,
8
,
15
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
%else
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
,
4
,
7
,
15
,
dst
,
dstride
,
src
,
sstride
,
filtery
,
src4
,
sstride3
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
%
+
_8
,
4
,
7
,
15
,
dst
,
dstride
,
src
,
sstride
,
filtery
,
src4
,
sstride3
mov
filteryq
,
r5mp
%define
hd
r4mp
%endif
...
...
@@ -413,9 +423,9 @@ filter_sse2_v_fn avg
%macro
filter_v_fn
1
%assign
%%
px
mmsize
/
2
%if
ARCH_X86_64
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
,
6
,
8
,
11
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
%
+
_8
,
6
,
8
,
11
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
%else
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
,
4
,
7
,
11
,
dst
,
dstride
,
src
,
sstride
,
filtery
,
src4
,
sstride3
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
%
+
_8
,
4
,
7
,
11
,
dst
,
dstride
,
src
,
sstride
,
filtery
,
src4
,
sstride3
mov
filteryq
,
r5mp
%define
hd
r4mp
%endif
...
...
@@ -487,7 +497,7 @@ filter_v_fn avg
%macro
filter_vx2_fn
1
%assign
%%
px
mmsize
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
,
6
,
8
,
14
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
%
+
_8
,
6
,
8
,
14
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
mova
m13
,
[
pw_256
]
lea
sstride3q
,
[
sstrideq
*
3
]
lea
src4q
,
[
srcq
+
sstrideq
]
...
...
libavcodec/x86/vp9mc_16bpp.asm
0 → 100644
View file @
344d5190
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment