Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
1dae7ffa
Commit
1dae7ffa
authored
Nov 28, 2016
by
James Darnley
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/h264: mmx 4:2:2 idct add8 function
2.87 times faster (1830 vs. 638 cycles)
parent
815ea8c6
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
1 deletion
+38
-1
h264_idct.asm
libavcodec/x86/h264_idct.asm
+32
-0
h264dsp_init.c
libavcodec/x86/h264dsp_init.c
+6
-1
No files found.
libavcodec/x86/h264_idct.asm
View file @
1dae7ffa
...
...
@@ -697,6 +697,38 @@ cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride,
call
h264_idct_add8_mmx_plane
RET
cglobal
h264_idct_add8_422_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
; dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
movsxdifnidn
r3
,
r3d
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
%endif
%if
ARCH_X86_64
mov
dst2q
,
r0
%endif
mov
r5
,
16
; i
add
r2
,
512
; i * 16 * sizeof(dctcoef) ; #define dctcoef int16_t
call
h264_idct_add8_mmx_plane
add
r5
,
4
call
h264_idct_add8_mmx_plane
%if
ARCH_X86_64
add
dst2q
,
gprsize
; dest[1]
%else
add
r0mp
,
gprsize
%endif
add
r5
,
4
; set to 32
add
r2
,
256
; set to i * 16 * sizeof(dctcoef)
call
h264_idct_add8_mmx_plane
add
r5
,
4
call
h264_idct_add8_mmx_plane
RET
h264_idct_add8_mmxext_plane
:
movsxdifnidn
r3
,
r3d
.
nextblock
:
...
...
libavcodec/x86/h264dsp_init.c
View file @
1dae7ffa
...
...
@@ -78,6 +78,8 @@ IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
IDCT_ADD_REP_FUNC2
(,
8
,
10
,
sse2
)
IDCT_ADD_REP_FUNC2
(,
8
,
10
,
avx
)
IDCT_ADD_REP_FUNC2
(,
8
_422
,
8
,
mmx
)
void
ff_h264_luma_dc_dequant_idct_mmx
(
int16_t
*
output
,
int16_t
*
input
,
int
qmul
);
void
ff_h264_luma_dc_dequant_idct_sse2
(
int16_t
*
output
,
int16_t
*
input
,
int
qmul
);
...
...
@@ -228,8 +230,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c
->
h264_idct_add16
=
ff_h264_idct_add16_8_mmx
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_8_mmx
;
if
(
chroma_format_idc
<=
1
)
if
(
chroma_format_idc
<=
1
)
{
c
->
h264_idct_add8
=
ff_h264_idct_add8_8_mmx
;
}
else
{
c
->
h264_idct_add8
=
ff_h264_idct_add8_422_8_mmx
;
}
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_8_mmx
;
if
(
cpu_flags
&
AV_CPU_FLAG_CMOV
)
c
->
h264_luma_dc_dequant_idct
=
ff_h264_luma_dc_dequant_idct_mmx
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment