Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
15ce1601
Commit
15ce1601
authored
Mar 10, 2015
by
Christophe Gisquet
Committed by
Michael Niedermayer
Mar 14, 2015
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: xvid_idct: SSE2 merged add version
Signed-off-by:
Michael Niedermayer
<
michaelni@gmx.at
>
parent
decd5193
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
91 additions
and
10 deletions
+91
-10
xvididct.asm
libavcodec/x86/xvididct.asm
+89
-3
xvididct_init.c
libavcodec/x86/xvididct_init.c
+2
-7
No files found.
libavcodec/x86/xvididct.asm
View file @
15ce1601
...
@@ -384,6 +384,12 @@ SECTION .text
...
@@ -384,6 +384,12 @@ SECTION .text
; Must now load args as gprs are no longer used for masks
; Must now load args as gprs are no longer used for masks
; DEST is set to where address of dest was loaded
; DEST is set to where address of dest was loaded
%
if
ARCH_X86_32
%
if
ARCH_X86_32
%
if
%2
==
2
; Not enough xmms, store
movdqa
[
%1
+
1
*
16
]
,
TAN3
movdqa
[
%1
+
2
*
16
]
,
xmm3
movdqa
[
%1
+
5
*
16
]
,
REG0
movdqa
[
%1
+
6
*
16
]
,
xmm5
%
endif
%
xdefine
DEST
r2q
; BLOCK is r0, stride r1
%
xdefine
DEST
r2q
; BLOCK is r0, stride r1
movifnidn
DEST
,
destm
movifnidn
DEST
,
destm
movifnidn
strideq
,
stridem
movifnidn
strideq
,
stridem
...
@@ -397,8 +403,6 @@ SECTION .text
...
@@ -397,8 +403,6 @@ SECTION .text
movq
[
DEST
+
strideq
]
,
TAN3
movq
[
DEST
+
strideq
]
,
TAN3
movhps
[
DEST
+
2
*
strideq
]
,
TAN3
movhps
[
DEST
+
2
*
strideq
]
,
TAN3
; REG0 and TAN3 are now available (and likely used in second half)
; REG0 and TAN3 are now available (and likely used in second half)
%
else
%
warning
Unimplemented
%
endif
%
endif
%endif
%endif
%endmacro
%endmacro
...
@@ -427,7 +431,88 @@ SECTION .text
...
@@ -427,7 +431,88 @@ SECTION .text
movq
[
DEST
+
2
*
strideq
]
,
xmm5
movq
[
DEST
+
2
*
strideq
]
,
xmm5
movhps
[
DEST
+
strideq
]
,
xmm5
movhps
[
DEST
+
strideq
]
,
xmm5
%elif
%2
==
2
%elif
%2
==
2
%warning
Unimplemented
pxor
xmm0
,
xmm0
%
if
ARCH_X86_32
; free: m3 REG0=m4 m5
; input: m1, m7, m2, m6
movq
xmm3
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
punpcklbw
xmm3
,
xmm0
punpcklbw
xmm4
,
xmm0
paddsw
xmm3
,
%3
paddsw
xmm4
,
[
%1
+
1
*
16
]
movq
%3
,
[
DEST
+
2
*
strideq
]
movq
xmm5
,
[
DEST
+
r3q
]
punpcklbw
%3
,
xmm0
punpcklbw
xmm5
,
xmm0
paddsw
%3
,
[
%1
+
2
*
16
]
paddsw
xmm5
,
%5
packuswb
xmm3
,
xmm4
packuswb
%3
,
xmm5
movq
[
DEST
+
0
*
strideq
]
,
xmm3
movhps
[
DEST
+
1
*
strideq
]
,
xmm3
movq
[
DEST
+
2
*
strideq
]
,
%3
movhps
[
DEST
+
r3q
]
,
%3
lea
DEST
,
[
DEST
+
4
*
strideq
]
movq
xmm3
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
movq
%3
,
[
DEST
+
2
*
strideq
]
movq
xmm5
,
[
DEST
+
r3q
]
punpcklbw
xmm3
,
xmm0
punpcklbw
xmm4
,
xmm0
punpcklbw
%3
,
xmm0
punpcklbw
xmm5
,
xmm0
paddsw
xmm3
,
%6
paddsw
xmm4
,
[
%1
+
5
*
16
]
paddsw
%3
,
[
%1
+
6
*
16
]
paddsw
xmm5
,
%4
packuswb
xmm3
,
xmm4
packuswb
%3
,
xmm5
movq
[
DEST
+
0
*
strideq
]
,
xmm3
movhps
[
DEST
+
1
*
strideq
]
,
xmm3
movq
[
DEST
+
2
*
strideq
]
,
%3
movhps
[
DEST
+
r3q
]
,
%3
%
else
; l1:TAN3=m13 l2:m3 l5:REG0=m8 l6=m5
; input: m1, m7/SREG2=m9, TAN1=m14, REG4=m10
movq
xmm2
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
movq
xmm12
,
[
DEST
+
2
*
strideq
]
movq
xmm11
,
[
DEST
+
r3q
]
punpcklbw
xmm2
,
xmm0
punpcklbw
xmm4
,
xmm0
punpcklbw
xmm12
,
xmm0
punpcklbw
xmm11
,
xmm0
paddsw
xmm2
,
%3
paddsw
xmm4
,
TAN3
paddsw
xmm12
,
xmm3
paddsw
xmm11
,
%5
packuswb
xmm2
,
xmm4
packuswb
xmm12
,
xmm11
movq
[
DEST
+
0
*
strideq
]
,
xmm2
movhps
[
DEST
+
1
*
strideq
]
,
xmm2
movq
[
DEST
+
2
*
strideq
]
,
xmm12
movhps
[
DEST
+
r3q
]
,
xmm12
lea
DEST
,
[
DEST
+
4
*
strideq
]
movq
xmm2
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
movq
xmm12
,
[
DEST
+
2
*
strideq
]
movq
xmm11
,
[
DEST
+
r3q
]
punpcklbw
xmm2
,
xmm0
punpcklbw
xmm4
,
xmm0
punpcklbw
xmm12
,
xmm0
punpcklbw
xmm11
,
xmm0
paddsw
xmm2
,
%6
paddsw
xmm4
,
REG0
paddsw
xmm12
,
xmm5
paddsw
xmm11
,
%4
packuswb
xmm2
,
xmm4
packuswb
xmm12
,
xmm11
movq
[
DEST
+
0
*
strideq
]
,
xmm2
movhps
[
DEST
+
1
*
strideq
]
,
xmm2
movq
[
DEST
+
2
*
strideq
]
,
xmm12
movhps
[
DEST
+
r3q
]
,
xmm12
%
endif
%endif
%endif
%endmacro
%endmacro
...
@@ -623,6 +708,7 @@ cglobal xvid_idct_add, 0, NUM_GPRS, 8+7*ARCH_X86_64, dest, stride, block
...
@@ -623,6 +708,7 @@ cglobal xvid_idct_add, 0, NUM_GPRS, 8+7*ARCH_X86_64, dest, stride, block
INIT_XMM
sse2
INIT_XMM
sse2
IDCT_SSE2
0
IDCT_SSE2
0
IDCT_SSE2
1
IDCT_SSE2
1
IDCT_SSE2
2
%if
ARCH_X86_32
%if
ARCH_X86_32
...
...
libavcodec/x86/xvididct_init.c
View file @
15ce1601
...
@@ -27,12 +27,7 @@
...
@@ -27,12 +27,7 @@
#include "xvididct.h"
#include "xvididct.h"
void
ff_xvid_idct_put_sse2
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
);
void
ff_xvid_idct_put_sse2
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
);
void
ff_xvid_idct_add_sse2
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
);
static
void
xvid_idct_sse2_add
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
)
{
ff_xvid_idct_sse2
(
block
);
ff_add_pixels_clamped
(
block
,
dest
,
line_size
);
}
#if ARCH_X86_32
#if ARCH_X86_32
static
void
xvid_idct_mmx_put
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
)
static
void
xvid_idct_mmx_put
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
)
...
@@ -88,7 +83,7 @@ av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
...
@@ -88,7 +83,7 @@ av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
c
->
idct_put
=
ff_xvid_idct_put_sse2
;
c
->
idct_put
=
ff_xvid_idct_put_sse2
;
c
->
idct_add
=
xvid_idct_sse2_add
;
c
->
idct_add
=
ff_xvid_idct_add_sse2
;
c
->
idct
=
ff_xvid_idct_sse2
;
c
->
idct
=
ff_xvid_idct_sse2
;
c
->
perm_type
=
FF_IDCT_PERM_SSE2
;
c
->
perm_type
=
FF_IDCT_PERM_SSE2
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment