Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
f1548c00
Commit
f1548c00
authored
Sep 22, 2013
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Full-pixel MC functions.
Decoding time of ped1080p.webm goes from 11.3sec to 11.1sec.
parent
c07ac8d4
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
100 additions
and
0 deletions
+100
-0
vp9dsp.asm
libavcodec/x86/vp9dsp.asm
+57
-0
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+43
-0
No files found.
libavcodec/x86/vp9dsp.asm
View file @
f1548c00
...
@@ -219,3 +219,60 @@ filter_v_fn avg
...
@@ -219,3 +219,60 @@ filter_v_fn avg
INIT_XMM
ssse3
INIT_XMM
ssse3
filter_v_fn
put
filter_v_fn
put
filter_v_fn
avg
filter_v_fn
avg
%macro
fpel_fn
6
%if
%2
==
4
%define
%%
srcfn
movh
%define
%%
dstfn
movh
%else
%define
%%
srcfn
movu
%define
%%
dstfn
mova
%endif
%if
%2
<=
16
cglobal
%1%2
,
5
,
7
,
4
,
dst
,
dstride
,
src
,
sstride
,
h
,
dstride3
,
sstride3
lea
sstride3q
,
[
sstrideq
*
3
]
lea
dstride3q
,
[
dstrideq
*
3
]
%else
cglobal
%1%2
,
5
,
5
,
4
,
dst
,
dstride
,
src
,
sstride
,
h
%endif
.
loop
:
%%
srcfn
m0
,
[srcq]
%%
srcfn
m1
,
[
srcq
+
s%3
]
%%
srcfn
m2
,
[
srcq
+
s%4
]
%%
srcfn
m3
,
[
srcq
+
s%5
]
lea
srcq
,
[
srcq
+
sstrideq
*
%6
]
%ifidn
%1
,
avg
pavgb
m0
,
[dstq]
pavgb
m1
,
[
dstq
+
d%3
]
pavgb
m2
,
[
dstq
+
d%4
]
pavgb
m3
,
[
dstq
+
d%5
]
%endif
%%
dstfn
[dstq],
m0
%%
dstfn
[
dstq
+
d%3
]
,
m1
%%
dstfn
[
dstq
+
d%4
]
,
m2
%%
dstfn
[
dstq
+
d%5
]
,
m3
lea
dstq
,
[
dstq
+
dstrideq
*
%6
]
sub
hd
,
%6
jnz
.
loop
RET
%endmacro
%define
d16
16
%define
s16
16
INIT_MMX
mmx
fpel_fn
put
,
4
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
INIT_MMX
sse
fpel_fn
avg
,
4
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
INIT_XMM
sse
fpel_fn
put
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
put
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
INIT_XMM
sse2
fpel_fn
avg
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
avg
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
%undef
s16
%undef
d16
libavcodec/x86/vp9dsp_init.c
View file @
f1548c00
...
@@ -27,6 +27,22 @@
...
@@ -27,6 +27,22 @@
#if HAVE_YASM
#if HAVE_YASM
#define fpel_func(avg, sz, opt) \
void ff_##avg##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
fpel_func
(
put
,
4
,
mmx
);
fpel_func
(
put
,
8
,
mmx
);
fpel_func
(
put
,
16
,
sse
);
fpel_func
(
put
,
32
,
sse
);
fpel_func
(
put
,
64
,
sse
);
fpel_func
(
avg
,
4
,
sse
);
fpel_func
(
avg
,
8
,
sse
);
fpel_func
(
avg
,
16
,
sse2
);
fpel_func
(
avg
,
32
,
sse2
);
fpel_func
(
avg
,
64
,
sse2
);
#undef fpel_func
#define mc_func(avg, sz, dir, opt) \
#define mc_func(avg, sz, dir, opt) \
void ff_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
void ff_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
...
@@ -141,6 +157,13 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
...
@@ -141,6 +157,13 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
#if HAVE_YASM
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
int
cpu_flags
=
av_get_cpu_flags
();
#define init_fpel(idx1, idx2, sz, type, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_##type##sz##_##opt
#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
...
@@ -158,11 +181,31 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
...
@@ -158,11 +181,31 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_subpel2(idx, 0, 1, v, type, opt); \
init_subpel2(idx, 0, 1, v, type, opt); \
init_subpel2(idx, 1, 0, h, type, opt)
init_subpel2(idx, 1, 0, h, type, opt)
if
(
cpu_flags
&
AV_CPU_FLAG_MMX
)
{
init_fpel
(
4
,
0
,
4
,
put
,
mmx
);
init_fpel
(
3
,
0
,
8
,
put
,
mmx
);
}
if
(
cpu_flags
&
AV_CPU_FLAG_SSE
)
{
init_fpel
(
2
,
0
,
16
,
put
,
sse
);
init_fpel
(
1
,
0
,
32
,
put
,
sse
);
init_fpel
(
0
,
0
,
64
,
put
,
sse
);
init_fpel
(
4
,
1
,
4
,
avg
,
sse
);
init_fpel
(
3
,
1
,
8
,
avg
,
sse
);
}
if
(
cpu_flags
&
AV_CPU_FLAG_SSE2
)
{
init_fpel
(
2
,
1
,
16
,
avg
,
sse2
);
init_fpel
(
1
,
1
,
32
,
avg
,
sse2
);
init_fpel
(
0
,
1
,
64
,
avg
,
sse2
);
}
if
(
cpu_flags
&
AV_CPU_FLAG_SSSE3
)
{
if
(
cpu_flags
&
AV_CPU_FLAG_SSSE3
)
{
init_subpel3
(
0
,
put
,
ssse3
);
init_subpel3
(
0
,
put
,
ssse3
);
init_subpel3
(
1
,
avg
,
ssse3
);
init_subpel3
(
1
,
avg
,
ssse3
);
}
}
#undef init_fpel
#undef init_subpel1
#undef init_subpel1
#undef init_subpel2
#undef init_subpel2
#undef init_subpel3
#undef init_subpel3
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment