Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
e25be471
Commit
e25be471
authored
Mar 02, 2012
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp8: convert idct/mc x86 assembly to use cpuflags().
parent
8249a23f
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
124 additions
and
116 deletions
+124
-116
vp8dsp-init.c
libavcodec/x86/vp8dsp-init.c
+56
-56
vp8dsp.asm
libavcodec/x86/vp8dsp.asm
+68
-60
No files found.
libavcodec/x86/vp8dsp-init.c
View file @
e25be471
...
...
@@ -29,16 +29,16 @@
/*
* MC functions
*/
extern
void
ff_put_vp8_epel4_h4_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_h4_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_epel4_h6_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_h6_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_epel4_v4_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_v4_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_epel4_v6_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_v6_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
...
...
@@ -80,7 +80,7 @@ extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear4_h_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_bilinear4_h_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear8_h_sse2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
...
...
@@ -93,7 +93,7 @@ extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear4_v_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_bilinear4_v_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear8_v_sse2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
...
...
@@ -139,27 +139,27 @@ static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
}
#if ARCH_X86_32
TAP_W8
(
mmx
ext
,
epel
,
h4
)
TAP_W8
(
mmx
ext
,
epel
,
h6
)
TAP_W16
(
mmx
ext
,
epel
,
h6
)
TAP_W8
(
mmx
ext
,
epel
,
v4
)
TAP_W8
(
mmx
ext
,
epel
,
v6
)
TAP_W16
(
mmx
ext
,
epel
,
v6
)
TAP_W8
(
mmx
ext
,
bilinear
,
h
)
TAP_W16
(
mmx
ext
,
bilinear
,
h
)
TAP_W8
(
mmx
ext
,
bilinear
,
v
)
TAP_W16
(
mmx
ext
,
bilinear
,
v
)
TAP_W8
(
mmx
2
,
epel
,
h4
)
TAP_W8
(
mmx
2
,
epel
,
h6
)
TAP_W16
(
mmx
2
,
epel
,
h6
)
TAP_W8
(
mmx
2
,
epel
,
v4
)
TAP_W8
(
mmx
2
,
epel
,
v6
)
TAP_W16
(
mmx
2
,
epel
,
v6
)
TAP_W8
(
mmx
2
,
bilinear
,
h
)
TAP_W16
(
mmx
2
,
bilinear
,
h
)
TAP_W8
(
mmx
2
,
bilinear
,
v
)
TAP_W16
(
mmx
2
,
bilinear
,
v
)
#endif
TAP_W16
(
sse2
,
epel
,
h6
)
TAP_W16
(
sse2
,
epel
,
v6
)
TAP_W16
(
sse2
,
bilinear
,
h
)
TAP_W16
(
sse2
,
bilinear
,
v
)
TAP_W16
(
sse2
,
epel
,
h6
)
TAP_W16
(
sse2
,
epel
,
v6
)
TAP_W16
(
sse2
,
bilinear
,
h
)
TAP_W16
(
sse2
,
bilinear
,
v
)
TAP_W16
(
ssse3
,
epel
,
h6
)
TAP_W16
(
ssse3
,
epel
,
v6
)
TAP_W16
(
ssse3
,
bilinear
,
h
)
TAP_W16
(
ssse3
,
bilinear
,
v
)
TAP_W16
(
ssse3
,
epel
,
h6
)
TAP_W16
(
ssse3
,
epel
,
v6
)
TAP_W16
(
ssse3
,
bilinear
,
h
)
TAP_W16
(
ssse3
,
bilinear
,
v
)
#define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
...
...
@@ -177,13 +177,13 @@ static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT
#if ARCH_X86_32
#define HVTAPMMX(x, y) \
HVTAP(mmx
ext
, 8, x, y, 4, 8) \
HVTAP(mmx
ext
, 8, x, y, 8, 16)
HVTAP(mmx
2
, 8, x, y, 4, 8) \
HVTAP(mmx
2
, 8, x, y, 8, 16)
HVTAP
(
mmx
ext
,
8
,
6
,
6
,
16
,
16
)
HVTAP
(
mmx
2
,
8
,
6
,
6
,
16
,
16
)
#else
#define HVTAPMMX(x, y) \
HVTAP(mmx
ext
, 8, x, y, 4, 8)
HVTAP(mmx
2
, 8, x, y, 4, 8)
#endif
HVTAPMMX
(
4
,
4
)
...
...
@@ -218,16 +218,16 @@ static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
dst, dststride, tmp, SIZE, height, mx, my); \
}
HVBILIN
(
mmx
ext
,
8
,
4
,
8
)
HVBILIN
(
mmx
2
,
8
,
4
,
8
)
#if ARCH_X86_32
HVBILIN
(
mmx
ext
,
8
,
8
,
16
)
HVBILIN
(
mmx
ext
,
8
,
16
,
16
)
HVBILIN
(
mmx
2
,
8
,
8
,
16
)
HVBILIN
(
mmx
2
,
8
,
16
,
16
)
#endif
HVBILIN
(
sse2
,
8
,
8
,
16
)
HVBILIN
(
sse2
,
8
,
16
,
16
)
HVBILIN
(
ssse3
,
8
,
4
,
8
)
HVBILIN
(
ssse3
,
8
,
8
,
16
)
HVBILIN
(
ssse3
,
8
,
16
,
16
)
HVBILIN
(
sse2
,
8
,
8
,
16
)
HVBILIN
(
sse2
,
8
,
16
,
16
)
HVBILIN
(
ssse3
,
8
,
4
,
8
)
HVBILIN
(
ssse3
,
8
,
8
,
16
)
HVBILIN
(
ssse3
,
8
,
16
,
16
)
extern
void
ff_vp8_idct_dc_add_mmx
(
uint8_t
*
dst
,
DCTELEM
block
[
16
],
ptrdiff_t
stride
);
...
...
@@ -283,7 +283,7 @@ extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \
int e, int i, int hvt);
DECLARE_LOOP_FILTER
(
mmx
)
DECLARE_LOOP_FILTER
(
mmx
ext
)
DECLARE_LOOP_FILTER
(
mmx
2
)
DECLARE_LOOP_FILTER
(
sse2
)
DECLARE_LOOP_FILTER
(
ssse3
)
DECLARE_LOOP_FILTER
(
sse4
)
...
...
@@ -351,26 +351,26 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
/* note that 4-tap width=16 functions are missing because w=16
* is only used for luma, and luma is always a copy or sixtap. */
if
(
mm_flags
&
AV_CPU_FLAG_MMX2
)
{
VP8_MC_FUNC
(
2
,
4
,
mmx
ext
);
VP8_BILINEAR_MC_FUNC
(
2
,
4
,
mmx
ext
);
VP8_MC_FUNC
(
2
,
4
,
mmx
2
);
VP8_BILINEAR_MC_FUNC
(
2
,
4
,
mmx
2
);
#if ARCH_X86_32
VP8_LUMA_MC_FUNC
(
0
,
16
,
mmx
ext
);
VP8_MC_FUNC
(
1
,
8
,
mmx
ext
);
VP8_BILINEAR_MC_FUNC
(
0
,
16
,
mmx
ext
);
VP8_BILINEAR_MC_FUNC
(
1
,
8
,
mmx
ext
);
c
->
vp8_v_loop_filter_simple
=
ff_vp8_v_loop_filter_simple_mmx
ext
;
c
->
vp8_h_loop_filter_simple
=
ff_vp8_h_loop_filter_simple_mmx
ext
;
c
->
vp8_v_loop_filter16y_inner
=
ff_vp8_v_loop_filter16y_inner_mmx
ext
;
c
->
vp8_h_loop_filter16y_inner
=
ff_vp8_h_loop_filter16y_inner_mmx
ext
;
c
->
vp8_v_loop_filter8uv_inner
=
ff_vp8_v_loop_filter8uv_inner_mmx
ext
;
c
->
vp8_h_loop_filter8uv_inner
=
ff_vp8_h_loop_filter8uv_inner_mmx
ext
;
c
->
vp8_v_loop_filter16y
=
ff_vp8_v_loop_filter16y_mbedge_mmx
ext
;
c
->
vp8_h_loop_filter16y
=
ff_vp8_h_loop_filter16y_mbedge_mmx
ext
;
c
->
vp8_v_loop_filter8uv
=
ff_vp8_v_loop_filter8uv_mbedge_mmx
ext
;
c
->
vp8_h_loop_filter8uv
=
ff_vp8_h_loop_filter8uv_mbedge_mmx
ext
;
VP8_LUMA_MC_FUNC
(
0
,
16
,
mmx
2
);
VP8_MC_FUNC
(
1
,
8
,
mmx
2
);
VP8_BILINEAR_MC_FUNC
(
0
,
16
,
mmx
2
);
VP8_BILINEAR_MC_FUNC
(
1
,
8
,
mmx
2
);
c
->
vp8_v_loop_filter_simple
=
ff_vp8_v_loop_filter_simple_mmx
2
;
c
->
vp8_h_loop_filter_simple
=
ff_vp8_h_loop_filter_simple_mmx
2
;
c
->
vp8_v_loop_filter16y_inner
=
ff_vp8_v_loop_filter16y_inner_mmx
2
;
c
->
vp8_h_loop_filter16y_inner
=
ff_vp8_h_loop_filter16y_inner_mmx
2
;
c
->
vp8_v_loop_filter8uv_inner
=
ff_vp8_v_loop_filter8uv_inner_mmx
2
;
c
->
vp8_h_loop_filter8uv_inner
=
ff_vp8_h_loop_filter8uv_inner_mmx
2
;
c
->
vp8_v_loop_filter16y
=
ff_vp8_v_loop_filter16y_mbedge_mmx
2
;
c
->
vp8_h_loop_filter16y
=
ff_vp8_h_loop_filter16y_mbedge_mmx
2
;
c
->
vp8_v_loop_filter8uv
=
ff_vp8_v_loop_filter8uv_mbedge_mmx
2
;
c
->
vp8_h_loop_filter8uv
=
ff_vp8_h_loop_filter8uv_mbedge_mmx
2
;
#endif
}
...
...
libavcodec/x86/vp8dsp.asm
View file @
e25be471
...
...
@@ -173,8 +173,8 @@ SECTION .text
; int height, int mx, int my);
;-----------------------------------------------------------------------------
%macro
FILTER_SSSE3
3
cglobal
put_vp8_epel
%1
_h6
_ssse3
,
6
,
6
,
%2
%macro
FILTER_SSSE3
1
cglobal
put_vp8_epel
%1
_h6
,
6
,
6
,
8
lea
r5d
,
[
r5
*
3
]
mova
m3
,
[
filter_h6_shuf2
]
mova
m4
,
[
filter_h6_shuf3
]
...
...
@@ -189,7 +189,7 @@ cglobal put_vp8_epel%1_h6_ssse3, 6, 6, %2
movu
m0
,
[
r2
-
2
]
mova
m1
,
m0
mova
m2
,
m0
%if
idn
%1
,
4
%if
mmsize
==
8
; For epel4, we need 9 bytes, but only 8 get loaded; to compensate, do the
; shuffle with a memory operand
punpcklbw
m0
,
[
r2
+
3
]
...
...
@@ -215,7 +215,7 @@ cglobal put_vp8_epel%1_h6_ssse3, 6, 6, %2
jg
.
nextrow
REP_RET
cglobal
put_vp8_epel
%1
_h4
_ssse3
,
6
,
6
,
%3
cglobal
put_vp8_epel
%1
_h4
,
6
,
6
,
7
shl
r5d
,
4
mova
m2
,
[
pw_64
]
mova
m3
,
[
filter_h2_shuf
]
...
...
@@ -246,7 +246,7 @@ cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3
jg
.
nextrow
REP_RET
cglobal
put_vp8_epel
%1
_v4
_ssse3
,
7
,
7
,
%2
cglobal
put_vp8_epel
%1
_v4
,
7
,
7
,
8
shl
r6d
,
4
%ifdef
PIC
lea
r11
,
[
fourtap_filter_hb_m
]
...
...
@@ -285,7 +285,7 @@ cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2
jg
.
nextrow
REP_RET
cglobal
put_vp8_epel
%1
_v6
_ssse3
,
7
,
7
,
%2
cglobal
put_vp8_epel
%1
_v6
,
7
,
7
,
8
lea
r6d
,
[
r6
*
3
]
%ifdef
PIC
lea
r11
,
[
sixtap_filter_hb_m
]
...
...
@@ -333,13 +333,14 @@ cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2
REP_RET
%endmacro
INIT_MMX
FILTER_SSSE3
4
,
0
,
0
INIT_XMM
FILTER_SSSE3
8
,
8
,
7
INIT_MMX
ssse3
FILTER_SSSE3
4
INIT_XMM
ssse3
FILTER_SSSE3
8
; 4x4 block, H-only 4-tap filter
cglobal
put_vp8_epel4_h4_mmxext
,
6
,
6
INIT_MMX
mmx2
cglobal
put_vp8_epel4_h4
,
6
,
6
shl
r5d
,
4
%ifdef
PIC
lea
r11
,
[
fourtap_filter_hw_m
]
...
...
@@ -386,7 +387,8 @@ cglobal put_vp8_epel4_h4_mmxext, 6, 6
REP_RET
; 4x4 block, H-only 6-tap filter
cglobal
put_vp8_epel4_h6_mmxext
,
6
,
6
INIT_MMX
mmx2
cglobal
put_vp8_epel4_h6
,
6
,
6
lea
r5d
,
[
r5
*
3
]
%ifdef
PIC
lea
r11
,
[
sixtap_filter_hw_m
]
...
...
@@ -442,8 +444,8 @@ cglobal put_vp8_epel4_h6_mmxext, 6, 6
jg
.
nextrow
REP_RET
INIT_XMM
cglobal
put_vp8_epel8_h4
_sse2
,
6
,
6
,
10
INIT_XMM
sse2
cglobal
put_vp8_epel8_h4
,
6
,
6
,
10
shl
r5d
,
5
%ifdef
PIC
lea
r11
,
[
fourtap_filter_v_m
]
...
...
@@ -490,7 +492,8 @@ cglobal put_vp8_epel8_h4_sse2, 6, 6, 10
jg
.
nextrow
REP_RET
cglobal
put_vp8_epel8_h6_sse2
,
6
,
6
,
14
INIT_XMM
sse2
cglobal
put_vp8_epel8_h6
,
6
,
6
,
14
lea
r5d
,
[
r5
*
3
]
shl
r5d
,
4
%ifdef
PIC
...
...
@@ -552,9 +555,9 @@ cglobal put_vp8_epel8_h6_sse2, 6, 6, 14
jg
.
nextrow
REP_RET
%macro
FILTER_V
3
%macro
FILTER_V
1
; 4x4 block, V-only 4-tap filter
cglobal
put_vp8_epel
%
2
_v4_
%1
,
7
,
7
,
%3
cglobal
put_vp8_epel
%
1
_v4
,
7
,
7
,
8
shl
r6d
,
5
%ifdef
PIC
lea
r11
,
[
fourtap_filter_v_m
]
...
...
@@ -607,7 +610,7 @@ cglobal put_vp8_epel%2_v4_%1, 7, 7, %3
; 4x4 block, V-only 6-tap filter
cglobal
put_vp8_epel
%
2
_v6_
%1
,
7
,
7
,
%3
cglobal
put_vp8_epel
%
1
_v6
,
7
,
7
,
8
shl
r6d
,
4
lea
r6
,
[
r6
*
3
]
%ifdef
PIC
...
...
@@ -671,13 +674,13 @@ cglobal put_vp8_epel%2_v6_%1, 7, 7, %3
REP_RET
%endmacro
INIT_MMX
FILTER_V
mmxext
,
4
,
0
INIT_XMM
FILTER_V
sse2
,
8
,
8
INIT_MMX
mmx2
FILTER_V
4
INIT_XMM
sse2
FILTER_V
8
%macro
FILTER_BILINEAR
3
cglobal
put_vp8_bilinear
%
2
_v_
%1
,
7
,
7
,
%3
%macro
FILTER_BILINEAR
1
cglobal
put_vp8_bilinear
%
1
_v
,
7
,
7
,
7
mov
r5d
,
8
*
16
shl
r6d
,
4
sub
r5d
,
r6d
...
...
@@ -705,7 +708,7 @@ cglobal put_vp8_bilinear%2_v_%1, 7,7,%3
psraw
m2
,
2
pavgw
m0
,
m6
pavgw
m2
,
m6
%if
idn
%1
,
mmxext
%if
mmsize
==
8
packuswb
m0
,
m0
packuswb
m2
,
m2
movh
[
r0
+
r1
*
0
]
,
m0
...
...
@@ -722,7 +725,7 @@ cglobal put_vp8_bilinear%2_v_%1, 7,7,%3
jg
.
nextrow
REP_RET
cglobal
put_vp8_bilinear
%
2
_h_
%1
,
7
,
7
,
%3
cglobal
put_vp8_bilinear
%
1
_h
,
7
,
7
,
7
mov
r6d
,
8
*
16
shl
r5d
,
4
sub
r6d
,
r5d
...
...
@@ -751,7 +754,7 @@ cglobal put_vp8_bilinear%2_h_%1, 7,7,%3
psraw
m2
,
2
pavgw
m0
,
m6
pavgw
m2
,
m6
%if
idn
%1
,
mmxext
%if
mmsize
==
8
packuswb
m0
,
m0
packuswb
m2
,
m2
movh
[
r0
+
r1
*
0
]
,
m0
...
...
@@ -769,13 +772,13 @@ cglobal put_vp8_bilinear%2_h_%1, 7,7,%3
REP_RET
%endmacro
INIT_MMX
FILTER_BILINEAR
mmxext
,
4
,
0
INIT_XMM
FILTER_BILINEAR
sse2
,
8
,
7
INIT_MMX
mmx2
FILTER_BILINEAR
4
INIT_XMM
sse2
FILTER_BILINEAR
8
%macro
FILTER_BILINEAR_SSSE3
1
cglobal
put_vp8_bilinear
%1
_v
_ssse3
,
7
,
7
cglobal
put_vp8_bilinear
%1
_v
,
7
,
7
,
5
shl
r6d
,
4
%ifdef
PIC
lea
r11
,
[
bilinear_filter_vb_m
]
...
...
@@ -811,7 +814,7 @@ cglobal put_vp8_bilinear%1_v_ssse3, 7,7
jg
.
nextrow
REP_RET
cglobal
put_vp8_bilinear
%1
_h
_ssse3
,
7
,
7
cglobal
put_vp8_bilinear
%1
_h
,
7
,
7
,
5
shl
r5d
,
4
%ifdef
PIC
lea
r11
,
[
bilinear_filter_vb_m
]
...
...
@@ -848,12 +851,13 @@ cglobal put_vp8_bilinear%1_h_ssse3, 7,7
REP_RET
%endmacro
INIT_MMX
INIT_MMX
ssse3
FILTER_BILINEAR_SSSE3
4
INIT_XMM
INIT_XMM
ssse3
FILTER_BILINEAR_SSSE3
8
cglobal
put_vp8_pixels8_mmx
,
5
,
5
INIT_MMX
mmx
cglobal
put_vp8_pixels8
,
5
,
5
.
nextrow
:
movq
mm0
,
[
r2
+
r3
*
0
]
movq
mm1
,
[
r2
+
r3
*
1
]
...
...
@@ -866,7 +870,8 @@ cglobal put_vp8_pixels8_mmx, 5,5
REP_RET
%if
ARCH_X86_32
cglobal
put_vp8_pixels16_mmx
,
5
,
5
INIT_MMX
mmx
cglobal
put_vp8_pixels16
,
5
,
5
.
nextrow
:
movq
mm0
,
[
r2
+
r3
*
0
+
0
]
movq
mm1
,
[
r2
+
r3
*
0
+
8
]
...
...
@@ -883,7 +888,8 @@ cglobal put_vp8_pixels16_mmx, 5,5
REP_RET
%endif
cglobal
put_vp8_pixels16_sse
,
5
,
5
,
2
INIT_XMM
sse
cglobal
put_vp8_pixels16
,
5
,
5
,
2
.
nextrow
:
movups
xmm0
,
[
r2
+
r3
*
0
]
movups
xmm1
,
[
r2
+
r3
*
1
]
...
...
@@ -918,8 +924,8 @@ cglobal put_vp8_pixels16_sse, 5,5,2
%4
[
r1
+
r2
+
%3
]
,
m5
%endmacro
INIT_MMX
cglobal
vp8_idct_dc_add
_mmx
,
3
,
3
INIT_MMX
mmx
cglobal
vp8_idct_dc_add
,
3
,
3
; load data
movd
m0
,
[r1]
...
...
@@ -941,8 +947,8 @@ cglobal vp8_idct_dc_add_mmx, 3, 3
ADD_DC
m0
,
m1
,
0
,
movh
RET
INIT_XMM
cglobal
vp8_idct_dc_add
_sse4
,
3
,
3
,
6
INIT_XMM
sse4
cglobal
vp8_idct_dc_add
,
3
,
3
,
6
; load data
movd
m0
,
[r1]
pxor
m1
,
m1
...
...
@@ -976,8 +982,8 @@ cglobal vp8_idct_dc_add_sse4, 3, 3, 6
;-----------------------------------------------------------------------------
%if
ARCH_X86_32
INIT_MMX
cglobal
vp8_idct_dc_add4y
_mmx
,
3
,
3
INIT_MMX
mmx
cglobal
vp8_idct_dc_add4y
,
3
,
3
; load data
movd
m0
,
[
r1
+
32
*
0
]
; A
movd
m1
,
[
r1
+
32
*
2
]
; C
...
...
@@ -1012,8 +1018,8 @@ cglobal vp8_idct_dc_add4y_mmx, 3, 3
RET
%endif
INIT_XMM
cglobal
vp8_idct_dc_add4y
_sse2
,
3
,
3
,
6
INIT_XMM
sse2
cglobal
vp8_idct_dc_add4y
,
3
,
3
,
6
; load data
movd
m0
,
[
r1
+
32
*
0
]
; A
movd
m1
,
[
r1
+
32
*
2
]
; C
...
...
@@ -1046,8 +1052,8 @@ cglobal vp8_idct_dc_add4y_sse2, 3, 3, 6
; void vp8_idct_dc_add4uv_<opt>(uint8_t *dst, DCTELEM block[4][16], int stride);
;-----------------------------------------------------------------------------
INIT_MMX
cglobal
vp8_idct_dc_add4uv
_mmx
,
3
,
3
INIT_MMX
mmx
cglobal
vp8_idct_dc_add4uv
,
3
,
3
; load data
movd
m0
,
[
r1
+
32
*
0
]
; A
movd
m1
,
[
r1
+
32
*
2
]
; C
...
...
@@ -1118,9 +1124,8 @@ cglobal vp8_idct_dc_add4uv_mmx, 3, 3
SWAP
%4
,
%3
%endmacro
INIT_MMX
%macro
VP8_IDCT_ADD
1
cglobal
vp8_idct_add_
%1
,
3
,
3
%macro
VP8_IDCT_ADD
0
cglobal
vp8_idct_add
,
3
,
3
; load block data
movq
m0
,
[
r1
+
0
]
movq
m1
,
[
r1
+
8
]
...
...
@@ -1128,7 +1133,7 @@ cglobal vp8_idct_add_%1, 3, 3
movq
m3
,
[
r1
+
24
]
movq
m6
,
[
pw_20091
]
movq
m7
,
[
pw_17734
]
%if
idn
%1
,
sse
%if
cpuflag
(
sse
)
xorps
xmm0
,
xmm0
movaps
[
r1
+
0
]
,
xmm0
movaps
[
r1
+
16
]
,
xmm0
...
...
@@ -1157,9 +1162,11 @@ cglobal vp8_idct_add_%1, 3, 3
%endmacro
%if
ARCH_X86_32
VP8_IDCT_ADD
mmx
INIT_MMX
mmx
VP8_IDCT_ADD
%endif
VP8_IDCT_ADD
sse
INIT_MMX
sse
VP8_IDCT_ADD
;-----------------------------------------------------------------------------
; void vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16])
...
...
@@ -1192,13 +1199,13 @@ VP8_IDCT_ADD sse
SWAP
%1
,
%4
,
%3
%endmacro
%macro
VP8_DC_WHT
1
cglobal
vp8_luma_dc_wht
_
%1
,
2
,
3
%macro
VP8_DC_WHT
0
cglobal
vp8_luma_dc_wht
,
2
,
3
movq
m0
,
[r1]
movq
m1
,
[
r1
+
8
]
movq
m2
,
[
r1
+
16
]
movq
m3
,
[
r1
+
24
]
%if
idn
%1
,
sse
%if
cpuflag
(
sse
)
xorps
xmm0
,
xmm0
movaps
[
r1
+
0
]
,
xmm0
movaps
[
r1
+
16
]
,
xmm0
...
...
@@ -1222,11 +1229,12 @@ cglobal vp8_luma_dc_wht_%1, 2,3
RET
%endmacro
INIT_MMX
%if
ARCH_X86_32
VP8_DC_WHT
mmx
INIT_MMX
mmx
VP8_DC_WHT
%endif
VP8_DC_WHT
sse
INIT_MMX
sse
VP8_DC_WHT
;-----------------------------------------------------------------------------
; void vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment