Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
548a1c8a
Commit
548a1c8a
authored
Mar 07, 2006
by
Loren Merritt
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
h264_idct8_add_mmx
Originally committed as revision 5123 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
6da971f1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
160 additions
and
6 deletions
+160
-6
h264.c
libavcodec/h264.c
+31
-6
dsputil_mmx.c
libavcodec/i386/dsputil_mmx.c
+2
-0
h264dsp_mmx.c
libavcodec/i386/h264dsp_mmx.c
+127
-0
No files found.
libavcodec/h264.c
View file @
548a1c8a
...
...
@@ -358,8 +358,12 @@ typedef struct H264Context{
uint8_t
zigzag_scan
[
16
];
uint8_t
field_scan
[
16
];
uint8_t
zigzag_scan8x8
[
64
];
uint8_t
zigzag_scan8x8_cavlc
[
64
];
const
uint8_t
*
zigzag_scan_q0
;
const
uint8_t
*
field_scan_q0
;
const
uint8_t
*
zigzag_scan8x8_q0
;
const
uint8_t
*
zigzag_scan8x8_cavlc_q0
;
int
x264_build
;
}
H264Context
;
...
...
@@ -2953,6 +2957,7 @@ static void free_tables(H264Context *h){
static
void
init_dequant8_coeff_table
(
H264Context
*
h
){
int
i
,
q
,
x
;
const
int
transpose
=
(
h
->
s
.
dsp
.
h264_idct8_add
!=
ff_h264_idct8_add_c
);
//FIXME ugly
h
->
dequant8_coeff
[
0
]
=
h
->
dequant8_buffer
[
0
];
h
->
dequant8_coeff
[
1
]
=
h
->
dequant8_buffer
[
1
];
...
...
@@ -2966,8 +2971,9 @@ static void init_dequant8_coeff_table(H264Context *h){
int
shift
=
div6
[
q
];
int
idx
=
rem6
[
q
];
for
(
x
=
0
;
x
<
64
;
x
++
)
h
->
dequant8_coeff
[
i
][
q
][
x
]
=
((
uint32_t
)
dequant8_coeff_init
[
idx
][
dequant8_coeff_init_scan
[((
x
>>
1
)
&
12
)
|
(
x
&
3
)]
]
*
h
->
pps
.
scaling_matrix8
[
i
][
x
])
<<
shift
;
h
->
dequant8_coeff
[
i
][
q
][
transpose
?
(
x
>>
3
)
|
((
x
&
7
)
<<
3
)
:
x
]
=
((
uint32_t
)
dequant8_coeff_init
[
idx
][
dequant8_coeff_init_scan
[((
x
>>
1
)
&
12
)
|
(
x
&
3
)]
]
*
h
->
pps
.
scaling_matrix8
[
i
][
x
])
<<
shift
;
}
}
}
...
...
@@ -4317,14 +4323,31 @@ static int decode_slice_header(H264Context *h){
#define T(x) (x>>2) | ((x<<2) & 0xF)
h
->
zigzag_scan
[
i
]
=
T
(
zigzag_scan
[
i
]);
h
->
field_scan
[
i
]
=
T
(
field_scan
[
i
]);
#undef T
}
}
if
(
s
->
dsp
.
h264_idct8_add
==
ff_h264_idct8_add_c
){
memcpy
(
h
->
zigzag_scan8x8
,
zigzag_scan8x8
,
64
*
sizeof
(
uint8_t
));
memcpy
(
h
->
zigzag_scan8x8_cavlc
,
zigzag_scan8x8_cavlc
,
64
*
sizeof
(
uint8_t
));
}
else
{
int
i
;
for
(
i
=
0
;
i
<
64
;
i
++
){
#define T(x) (x>>3) | ((x&7)<<3)
h
->
zigzag_scan8x8
[
i
]
=
T
(
zigzag_scan8x8
[
i
]);
h
->
zigzag_scan8x8_cavlc
[
i
]
=
T
(
zigzag_scan8x8_cavlc
[
i
]);
#undef T
}
}
if
(
h
->
sps
.
transform_bypass
){
//FIXME same ugly
h
->
zigzag_scan_q0
=
zigzag_scan
;
h
->
field_scan_q0
=
field_scan
;
h
->
zigzag_scan8x8_q0
=
zigzag_scan8x8
;
h
->
zigzag_scan8x8_cavlc_q0
=
zigzag_scan8x8_cavlc
;
}
else
{
h
->
zigzag_scan_q0
=
h
->
zigzag_scan
;
h
->
field_scan_q0
=
h
->
field_scan
;
h
->
zigzag_scan8x8_q0
=
h
->
zigzag_scan8x8
;
h
->
zigzag_scan8x8_cavlc_q0
=
h
->
zigzag_scan8x8_cavlc
;
}
alloc_tables
(
h
);
...
...
@@ -5101,7 +5124,7 @@ decode_intra_mb:
int
i8x8
,
i4x4
,
chroma_idx
;
int
chroma_qp
,
dquant
;
GetBitContext
*
gb
=
IS_INTRA
(
mb_type
)
?
h
->
intra_gb_ptr
:
h
->
inter_gb_ptr
;
const
uint8_t
*
scan
,
*
dc_scan
;
const
uint8_t
*
scan
,
*
scan8x8
,
*
dc_scan
;
// fill_non_zero_count_cache(h);
...
...
@@ -5112,6 +5135,7 @@ decode_intra_mb:
scan
=
s
->
qscale
?
h
->
zigzag_scan
:
h
->
zigzag_scan_q0
;
dc_scan
=
luma_dc_zigzag_scan
;
}
scan8x8
=
s
->
qscale
?
h
->
zigzag_scan8x8_cavlc
:
h
->
zigzag_scan8x8_cavlc_q0
;
dquant
=
get_se_golomb
(
&
s
->
gb
);
...
...
@@ -5153,7 +5177,7 @@ decode_intra_mb:
DCTELEM
*
buf
=
&
h
->
mb
[
64
*
i8x8
];
uint8_t
*
nnz
;
for
(
i4x4
=
0
;
i4x4
<
4
;
i4x4
++
){
if
(
decode_residual
(
h
,
gb
,
buf
,
i4x4
+
4
*
i8x8
,
zigzag_scan8x8_cavlc
+
16
*
i4x4
,
if
(
decode_residual
(
h
,
gb
,
buf
,
i4x4
+
4
*
i8x8
,
scan8x8
+
16
*
i4x4
,
h
->
dequant8_coeff
[
IS_INTRA
(
mb_type
)
?
0
:
1
][
s
->
qscale
],
16
)
<
0
)
return
-
1
;
}
...
...
@@ -6144,7 +6168,7 @@ decode_intra_mb:
s
->
current_picture
.
mb_type
[
mb_xy
]
=
mb_type
;
if
(
cbp
||
IS_INTRA16x16
(
mb_type
)
)
{
const
uint8_t
*
scan
,
*
dc_scan
;
const
uint8_t
*
scan
,
*
scan8x8
,
*
dc_scan
;
int
dqp
;
if
(
IS_INTERLACED
(
mb_type
)){
...
...
@@ -6154,6 +6178,7 @@ decode_intra_mb:
scan
=
s
->
qscale
?
h
->
zigzag_scan
:
h
->
zigzag_scan_q0
;
dc_scan
=
luma_dc_zigzag_scan
;
}
scan8x8
=
s
->
qscale
?
h
->
zigzag_scan8x8
:
h
->
zigzag_scan8x8_q0
;
h
->
last_qscale_diff
=
dqp
=
decode_cabac_mb_dqp
(
h
);
if
(
dqp
==
INT_MIN
){
...
...
@@ -6187,7 +6212,7 @@ decode_intra_mb:
if
(
cbp
&
(
1
<<
i8x8
)
)
{
if
(
IS_8x8DCT
(
mb_type
)
)
{
if
(
decode_cabac_residual
(
h
,
h
->
mb
+
64
*
i8x8
,
5
,
4
*
i8x8
,
zigzag_
scan8x8
,
h
->
dequant8_coeff
[
IS_INTRA
(
mb_type
)
?
0
:
1
][
s
->
qscale
],
64
)
<
0
)
scan8x8
,
h
->
dequant8_coeff
[
IS_INTRA
(
mb_type
)
?
0
:
1
][
s
->
qscale
],
64
)
<
0
)
return
-
1
;
}
else
for
(
i4x4
=
0
;
i4x4
<
4
;
i4x4
++
)
{
...
...
libavcodec/i386/dsputil_mmx.c
View file @
548a1c8a
...
...
@@ -2734,6 +2734,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
h264_idct_dc_add
=
c
->
h264_idct_add
=
ff_h264_idct_add_mmx
;
c
->
h264_idct8_dc_add
=
c
->
h264_idct8_add
=
ff_h264_idct8_add_mmx
;
if
(
mm_flags
&
MM_MMXEXT
)
{
c
->
put_pixels_tab
[
0
][
1
]
=
put_pixels16_x2_mmx2
;
...
...
libavcodec/i386/h264dsp_mmx.c
View file @
548a1c8a
...
...
@@ -104,6 +104,133 @@ static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
);
}
static
inline
void
h264_idct8_1d
(
int16_t
*
block
)
{
asm
volatile
(
"movq 112(%0), %%mm7
\n\t
"
"movq 80(%0), %%mm5
\n\t
"
"movq 48(%0), %%mm3
\n\t
"
"movq 16(%0), %%mm1
\n\t
"
"movq %%mm7, %%mm4
\n\t
"
"movq %%mm3, %%mm6
\n\t
"
"movq %%mm5, %%mm0
\n\t
"
"movq %%mm7, %%mm2
\n\t
"
"psraw $1, %%mm4
\n\t
"
"psraw $1, %%mm6
\n\t
"
"psubw %%mm7, %%mm0
\n\t
"
"psubw %%mm6, %%mm2
\n\t
"
"psubw %%mm4, %%mm0
\n\t
"
"psubw %%mm3, %%mm2
\n\t
"
"psubw %%mm3, %%mm0
\n\t
"
"paddw %%mm1, %%mm2
\n\t
"
"movq %%mm5, %%mm4
\n\t
"
"movq %%mm1, %%mm6
\n\t
"
"psraw $1, %%mm4
\n\t
"
"psraw $1, %%mm6
\n\t
"
"paddw %%mm5, %%mm4
\n\t
"
"paddw %%mm1, %%mm6
\n\t
"
"paddw %%mm7, %%mm4
\n\t
"
"paddw %%mm5, %%mm6
\n\t
"
"psubw %%mm1, %%mm4
\n\t
"
"paddw %%mm3, %%mm6
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm4, %%mm3
\n\t
"
"movq %%mm2, %%mm5
\n\t
"
"movq %%mm6, %%mm7
\n\t
"
"psraw $2, %%mm6
\n\t
"
"psraw $2, %%mm3
\n\t
"
"psraw $2, %%mm5
\n\t
"
"psraw $2, %%mm0
\n\t
"
"paddw %%mm6, %%mm1
\n\t
"
"paddw %%mm2, %%mm3
\n\t
"
"psubw %%mm4, %%mm5
\n\t
"
"psubw %%mm0, %%mm7
\n\t
"
"movq 32(%0), %%mm2
\n\t
"
"movq 96(%0), %%mm6
\n\t
"
"movq %%mm2, %%mm4
\n\t
"
"movq %%mm6, %%mm0
\n\t
"
"psraw $1, %%mm4
\n\t
"
"psraw $1, %%mm6
\n\t
"
"psubw %%mm0, %%mm4
\n\t
"
"paddw %%mm2, %%mm6
\n\t
"
"movq (%0), %%mm2
\n\t
"
"movq 64(%0), %%mm0
\n\t
"
SUMSUB_BA
(
%%
mm0
,
%%
mm2
)
SUMSUB_BA
(
%%
mm6
,
%%
mm0
)
SUMSUB_BA
(
%%
mm4
,
%%
mm2
)
SUMSUB_BA
(
%%
mm7
,
%%
mm6
)
SUMSUB_BA
(
%%
mm5
,
%%
mm4
)
SUMSUB_BA
(
%%
mm3
,
%%
mm2
)
SUMSUB_BA
(
%%
mm1
,
%%
mm0
)
::
"r"
(
block
)
);
}
static
void
ff_h264_idct8_add_mmx
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
)
{
int
i
;
int16_t
__attribute__
((
aligned
(
8
)))
b2
[
64
];
block
[
0
]
+=
32
;
for
(
i
=
0
;
i
<
2
;
i
++
){
uint64_t
tmp
;
h264_idct8_1d
(
block
+
4
*
i
);
asm
volatile
(
"movq %%mm7, %0
\n\t
"
TRANSPOSE4
(
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm6
,
%%
mm7
)
"movq %%mm0, 8(%1)
\n\t
"
"movq %%mm6, 24(%1)
\n\t
"
"movq %%mm7, 40(%1)
\n\t
"
"movq %%mm4, 56(%1)
\n\t
"
"movq %0, %%mm7
\n\t
"
TRANSPOSE4
(
%%
mm7
,
%%
mm5
,
%%
mm3
,
%%
mm1
,
%%
mm0
)
"movq %%mm7, (%1)
\n\t
"
"movq %%mm1, 16(%1)
\n\t
"
"movq %%mm0, 32(%1)
\n\t
"
"movq %%mm3, 48(%1)
\n\t
"
:
"=m"
(
tmp
)
:
"r"
(
b2
+
32
*
i
)
:
"memory"
);
}
for
(
i
=
0
;
i
<
2
;
i
++
){
h264_idct8_1d
(
b2
+
4
*
i
);
asm
volatile
(
"psraw $6, %%mm7
\n\t
"
"psraw $6, %%mm6
\n\t
"
"psraw $6, %%mm5
\n\t
"
"psraw $6, %%mm4
\n\t
"
"psraw $6, %%mm3
\n\t
"
"psraw $6, %%mm2
\n\t
"
"psraw $6, %%mm1
\n\t
"
"psraw $6, %%mm0
\n\t
"
"movq %%mm7, (%0)
\n\t
"
"movq %%mm5, 16(%0)
\n\t
"
"movq %%mm3, 32(%0)
\n\t
"
"movq %%mm1, 48(%0)
\n\t
"
"movq %%mm0, 64(%0)
\n\t
"
"movq %%mm2, 80(%0)
\n\t
"
"movq %%mm4, 96(%0)
\n\t
"
"movq %%mm6, 112(%0)
\n\t
"
::
"r"
(
b2
+
4
*
i
)
:
"memory"
);
}
add_pixels_clamped_mmx
(
b2
,
dst
,
stride
);
}
static
void
ff_h264_idct_dc_add_mmx2
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
)
{
int
dc
=
(
block
[
0
]
+
32
)
>>
6
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment