Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
2dd2f716
Commit
2dd2f716
authored
Jun 29, 2010
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MMX idct_add for VP8.
Originally committed as revision 23886 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
29e71937
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
106 additions
and
0 deletions
+106
-0
vp8dsp-init.c
libavcodec/x86/vp8dsp-init.c
+2
-0
vp8dsp.asm
libavcodec/x86/vp8dsp.asm
+89
-0
x86util.asm
libavcodec/x86/x86util.asm
+15
-0
No files found.
libavcodec/x86/vp8dsp-init.c
View file @
2dd2f716
...
...
@@ -196,6 +196,7 @@ HVBILIN(ssse3, 8, 16, 16)
extern
void
ff_vp8_idct_dc_add_mmx
(
uint8_t
*
dst
,
DCTELEM
block
[
16
],
int
stride
);
extern
void
ff_vp8_idct_dc_add_sse4
(
uint8_t
*
dst
,
DCTELEM
block
[
16
],
int
stride
);
extern
void
ff_vp8_luma_dc_wht_mmxext
(
DCTELEM
block
[
4
][
4
][
16
],
DCTELEM
dc
[
16
]);
extern
void
ff_vp8_idct_add_mmx
(
uint8_t
*
dst
,
DCTELEM
block
[
16
],
int
stride
);
#endif
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
...
...
@@ -229,6 +230,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
#if HAVE_YASM
if
(
mm_flags
&
FF_MM_MMX
)
{
c
->
vp8_idct_dc_add
=
ff_vp8_idct_dc_add_mmx
;
c
->
vp8_idct_add
=
ff_vp8_idct_add_mmx
;
c
->
put_vp8_epel_pixels_tab
[
0
][
0
][
0
]
=
c
->
put_vp8_bilinear_pixels_tab
[
0
][
0
][
0
]
=
ff_put_vp8_pixels16_mmx
;
c
->
put_vp8_epel_pixels_tab
[
1
][
0
][
0
]
=
...
...
libavcodec/x86/vp8dsp.asm
View file @
2dd2f716
...
...
@@ -142,6 +142,9 @@ filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
filter_h6_shuf2
:
db
1
,
2
,
2
,
3
,
3
,
4
,
4
,
5
,
5
,
6
,
6
,
7
,
7
,
8
,
8
,
9
filter_h6_shuf3
:
db
3
,
4
,
4
,
5
,
5
,
6
,
6
,
7
,
7
,
8
,
8
,
9
,
9
,
10
,
10
,
11
pw_20091
:
times
4
dw
20091
pw_17734
:
times
4
dw
17734
cextern
pw_3
cextern
pw_4
cextern
pw_64
...
...
@@ -923,6 +926,92 @@ cglobal vp8_idct_dc_add_sse4, 3, 3, 6
pextrd
[
r1
+
r2
]
,
xmm2
,
3
RET
;-----------------------------------------------------------------------------
; void vp8_idct_add_<opt>(uint8_t *dst, DCTELEM block[16], int stride);
;-----------------------------------------------------------------------------
; calculate %1=%2+%1; %2=%2-%1, with %3=temp register
%macro
SUMSUB
3
mova
%3
,
%1
paddw
%1
,
%2
psubw
%2
,
%3
%endmacro
; calculate %1=mul_35468(%1)-mul_20091(%2); %2=mul_20091(%1)+mul_35468(%2)
; this macro assumes that m6/m7 have words for 20091/17734 loaded
%macro
VP8_MULTIPLY_SUMSUB
4
mova
%3
,
%1
mova
%4
,
%2
pmulhw
%3
,
m6
;20091(1)
pmulhw
%4
,
m6
;20091(2)
paddw
%3
,
%1
paddw
%4
,
%2
psllw
%1
,
1
psllw
%2
,
1
pmulhw
%1
,
m7
;35468(1)
pmulhw
%2
,
m7
;35468(2)
psubw
%1
,
%4
paddw
%2
,
%3
%endmacro
; calculate x0=%1+%3; x1=%1-%3
; x2=mul_35468(%2)-mul_20091(%4); x3=mul_20091(%2)+mul_35468(%4)
; %1=x0+x3 (tmp0); %2=x1+x2 (tmp1); %3=x1-x2 (tmp2); %4=x0-x3 (tmp3)
; %5/%6 are temporary registers
; we assume m6/m7 have constant words 20091/17734 loaded in them
%macro
VP8_IDCT_TRANSFORM4x4_1D
6
SUMSUB_BA
m%3
,
m%1
,
m%5
;t0, t1
VP8_MULTIPLY_SUMSUB
m%2
,
m%4
,
m%5
,
m%6
;t2, t3
SUMSUB_BA
m%4
,
m%3
,
m%5
;tmp0, tmp3
SUMSUB_BA
m%2
,
m%1
,
m%5
;tmp1, tmp2
SWAP
%4
,
%1
SWAP
%4
,
%3
%endmacro
; transpose a 4x4 table
%macro
TRANSPOSE4x4
5
; output in %1/%4/%5/%3
mova
m%5
,
m%1
punpcklwd
m%1
,
m%2
punpckhwd
m%5
,
m%2
mova
m%2
,
m%3
punpcklwd
m%3
,
m%4
punpckhwd
m%2
,
m%4
mova
m%4
,
m%1
punpckldq
m%1
,
m%3
;col0
punpckhdq
m%4
,
m%3
;col1
mova
m%3
,
m%5
punpckldq
m%5
,
m%2
;col2
punpckhdq
m%3
,
m%2
;col3
SWAP
%4
,
%2
SWAP
%4
,
%5
SWAP
%4
,
%3
%endmacro
INIT_MMX
cglobal
vp8_idct_add_mmx
,
3
,
3
; load block data
movq
m0
,
[r1]
movq
m1
,
[
r1
+
8
]
movq
m2
,
[
r1
+
16
]
movq
m3
,
[
r1
+
24
]
movq
m6
,
[
pw_20091
]
movq
m7
,
[
pw_17734
]
; actual IDCT
VP8_IDCT_TRANSFORM4x4_1D
0
,
1
,
2
,
3
,
4
,
5
TRANSPOSE4x4W
0
,
1
,
2
,
3
,
4
paddw
m0
,
[
pw_4
]
VP8_IDCT_TRANSFORM4x4_1D
0
,
1
,
2
,
3
,
4
,
5
TRANSPOSE4x4W
0
,
1
,
2
,
3
,
4
; store
pxor
m4
,
m4
lea
r1
,
[
r0
+
2
*
r2
]
STORE_DIFFx2
m0
,
m1
,
m6
,
m7
,
m4
,
3
,
r0
,
r2
STORE_DIFFx2
m2
,
m3
,
m6
,
m7
,
m4
,
3
,
r1
,
r2
RET
;-----------------------------------------------------------------------------
; void vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16])
;-----------------------------------------------------------------------------
...
...
libavcodec/x86/x86util.asm
View file @
2dd2f716
...
...
@@ -365,3 +365,18 @@
packuswb
%1
,
%1
movh
%4
,
%1
%endmacro
%macro
STORE_DIFFx2
8
; add1, add2, reg1, reg2, zero, shift, source, stride
movh
%3
,
[
%7
]
movh
%4
,
[
%7
+
%8
]
punpcklbw
%3
,
%5
punpcklbw
%4
,
%5
psraw
%1
,
%6
psraw
%2
,
%6
paddw
%3
,
%1
paddw
%4
,
%2
packuswb
%3
,
%5
packuswb
%4
,
%5
movh
[
%7
]
,
%3
movh
[
%7
+
%8
]
,
%4
%endmacro
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment