Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
26301caa
Commit
26301caa
authored
Jul 09, 2012
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: mmx2 ---> mmxext in asm constructs
parent
da39cac8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
31 changed files
with
263 additions
and
259 deletions
+263
-259
ac3dsp.asm
libavcodec/x86/ac3dsp.asm
+2
-2
ac3dsp_init.c
libavcodec/x86/ac3dsp_init.c
+2
-2
dsputil.asm
libavcodec/x86/dsputil.asm
+4
-4
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+28
-28
dsputilenc.asm
libavcodec/x86/dsputilenc.asm
+1
-1
dsputilenc_mmx.c
libavcodec/x86/dsputilenc_mmx.c
+3
-3
h264_chromamc.asm
libavcodec/x86/h264_chromamc.asm
+7
-7
h264_chromamc_10bit.asm
libavcodec/x86/h264_chromamc_10bit.asm
+2
-2
h264_deblock.asm
libavcodec/x86/h264_deblock.asm
+10
-10
h264_deblock_10bit.asm
libavcodec/x86/h264_deblock_10bit.asm
+2
-2
h264_idct.asm
libavcodec/x86/h264_idct.asm
+24
-22
h264_idct_10bit.asm
libavcodec/x86/h264_idct_10bit.asm
+1
-1
h264_intrapred.asm
libavcodec/x86/h264_intrapred.asm
+13
-13
h264_intrapred_10bit.asm
libavcodec/x86/h264_intrapred_10bit.asm
+10
-10
h264_intrapred_init.c
libavcodec/x86/h264_intrapred_init.c
+40
-40
h264_weight.asm
libavcodec/x86/h264_weight.asm
+6
-6
h264dsp_init.c
libavcodec/x86/h264dsp_init.c
+0
-0
pngdsp.asm
libavcodec/x86/pngdsp.asm
+1
-1
pngdsp_init.c
libavcodec/x86/pngdsp_init.c
+3
-3
rv34dsp.asm
libavcodec/x86/rv34dsp.asm
+3
-3
rv34dsp_init.c
libavcodec/x86/rv34dsp_init.c
+5
-5
rv40dsp.asm
libavcodec/x86/rv40dsp.asm
+2
-2
rv40dsp_init.c
libavcodec/x86/rv40dsp_init.c
+15
-15
vc1dsp_init.c
libavcodec/x86/vc1dsp_init.c
+3
-3
vp3dsp.asm
libavcodec/x86/vp3dsp.asm
+2
-2
vp3dsp_init.c
libavcodec/x86/vp3dsp_init.c
+9
-7
vp8dsp.asm
libavcodec/x86/vp8dsp.asm
+15
-15
vp8dsp_init.c
libavcodec/x86/vp8dsp_init.c
+43
-43
x86util.asm
libavutil/x86/x86util.asm
+1
-1
output.asm
libswscale/x86/output.asm
+2
-2
swscale.c
libswscale/x86/swscale.c
+4
-4
No files found.
libavcodec/x86/ac3dsp.asm
View file @
26301caa
...
...
@@ -97,7 +97,7 @@ AC3_EXPONENT_MIN
por
%1
,
%2
pshuflw
%2
,
%1
,
q0001
por
%1
,
%2
%elif
cpuflag
(
mmx
2
)
%elif
cpuflag
(
mmx
ext
)
pshufw
%2
,
%1
,
q0032
por
%1
,
%2
pshufw
%2
,
%1
,
q0001
...
...
@@ -153,7 +153,7 @@ cglobal ac3_max_msb_abs_int16, 2,2,5, src, len
INIT_MMX
mmx
%define
ABS2
ABS2_MMX
AC3_MAX_MSB_ABS_INT16
or_abs
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
%define
ABS2
ABS2_MMXEXT
AC3_MAX_MSB_ABS_INT16
min_max
INIT_XMM
sse2
...
...
libavcodec/x86/ac3dsp_init.c
View file @
26301caa
...
...
@@ -31,7 +31,7 @@ extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int n
extern
void
ff_ac3_exponent_min_sse2
(
uint8_t
*
exp
,
int
num_reuse_blocks
,
int
nb_coefs
);
extern
int
ff_ac3_max_msb_abs_int16_mmx
(
const
int16_t
*
src
,
int
len
);
extern
int
ff_ac3_max_msb_abs_int16_mmx
2
(
const
int16_t
*
src
,
int
len
);
extern
int
ff_ac3_max_msb_abs_int16_mmx
ext
(
const
int16_t
*
src
,
int
len
);
extern
int
ff_ac3_max_msb_abs_int16_sse2
(
const
int16_t
*
src
,
int
len
);
extern
int
ff_ac3_max_msb_abs_int16_ssse3
(
const
int16_t
*
src
,
int
len
);
...
...
@@ -182,7 +182,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
}
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
c
->
ac3_exponent_min
=
ff_ac3_exponent_min_mmxext
;
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_mmx
2
;
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_mmx
ext
;
}
if
(
EXTERNAL_SSE
(
mm_flags
))
{
c
->
float_to_fixed24
=
ff_float_to_fixed24_sse
;
...
...
libavcodec/x86/dsputil.asm
View file @
26301caa
...
...
@@ -108,7 +108,7 @@ cglobal scalarproduct_and_madd_int16_%1, 4,4,8, v1, v2, v3, order, mul
%endmacro
INIT_MMX
SCALARPRODUCT
mmx
2
SCALARPRODUCT
mmx
ext
INIT_XMM
SCALARPRODUCT
sse2
...
...
@@ -327,8 +327,8 @@ APPLY_WINDOW_INT16 ssse3_atom, 0, 1
APPLY_WINDOW_INT16
ssse3
,
0
,
1
; void add_hfyu_median_prediction_mmx
2
(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
cglobal
add_hfyu_median_prediction_mmx
2
,
6
,
6
,
0
,
dst
,
top
,
diff
,
w
,
left
,
left_top
; void add_hfyu_median_prediction_mmx
ext
(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
cglobal
add_hfyu_median_prediction_mmx
ext
,
6
,
6
,
0
,
dst
,
top
,
diff
,
w
,
left
,
left_top
movq
mm0
,
[topq]
movq
mm2
,
mm0
movd
mm4
,
[
left_topq
]
...
...
@@ -804,7 +804,7 @@ ALIGN 128
mov
valh
,
vall
%if
%1
>=
8
movd
mm0
,
vald
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
pshufw
mm0
,
mm0
,
0
%else
; mmx
punpcklwd
mm0
,
mm0
...
...
libavcodec/x86/dsputil_mmx.c
View file @
26301caa
...
...
@@ -2045,21 +2045,21 @@ PREFETCH(prefetch_3dnow, prefetch)
void
ff_put_h264_chroma_mc8_rnd_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc8_rnd_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc8_rnd_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc8_rnd_3dnow
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_put_h264_chroma_mc4_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc4_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
void
ff_avg_h264_chroma_mc4_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc4_3dnow
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_put_h264_chroma_mc2_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
void
ff_put_h264_chroma_mc2_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc2_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
void
ff_avg_h264_chroma_mc2_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_put_h264_chroma_mc8_rnd_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
...
...
@@ -2077,10 +2077,10 @@ void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, uint8_t *src, \
int stride, int h, int x, int y);
CHROMA_MC
(
put
,
2
,
10
,
mmx
2
)
CHROMA_MC
(
avg
,
2
,
10
,
mmx
2
)
CHROMA_MC
(
put
,
4
,
10
,
mmx
2
)
CHROMA_MC
(
avg
,
4
,
10
,
mmx
2
)
CHROMA_MC
(
put
,
2
,
10
,
mmx
ext
)
CHROMA_MC
(
avg
,
2
,
10
,
mmx
ext
)
CHROMA_MC
(
put
,
4
,
10
,
mmx
ext
)
CHROMA_MC
(
avg
,
4
,
10
,
mmx
ext
)
CHROMA_MC
(
put
,
8
,
10
,
sse2
)
CHROMA_MC
(
avg
,
8
,
10
,
sse2
)
CHROMA_MC
(
put
,
8
,
10
,
avx
)
...
...
@@ -2283,13 +2283,13 @@ static void vector_clipf_sse(float *dst, const float *src,
#endif
/* HAVE_INLINE_ASM */
int32_t
ff_scalarproduct_int16_mmx
2
(
const
int16_t
*
v1
,
const
int16_t
*
v2
,
int
order
);
int32_t
ff_scalarproduct_int16_mmx
ext
(
const
int16_t
*
v1
,
const
int16_t
*
v2
,
int
order
);
int32_t
ff_scalarproduct_int16_sse2
(
const
int16_t
*
v1
,
const
int16_t
*
v2
,
int
order
);
int32_t
ff_scalarproduct_and_madd_int16_mmx
2
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
int32_t
ff_scalarproduct_and_madd_int16_mmx
ext
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
int32_t
ff_scalarproduct_and_madd_int16_sse2
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
...
...
@@ -2313,9 +2313,9 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
void
ff_bswap32_buf_ssse3
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
void
ff_bswap32_buf_sse2
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
void
ff_add_hfyu_median_prediction_mmx
2
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
const
uint8_t
*
diff
,
int
w
,
int
*
left
,
int
*
left_top
);
void
ff_add_hfyu_median_prediction_mmx
ext
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
const
uint8_t
*
diff
,
int
w
,
int
*
left
,
int
*
left_top
);
int
ff_add_hfyu_left_prediction_ssse3
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
w
,
int
left
);
int
ff_add_hfyu_left_prediction_sse4
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
...
...
@@ -2548,24 +2548,24 @@ static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
#if HAVE_YASM
if
(
!
high_bit_depth
&&
CONFIG_H264CHROMA
)
{
c
->
avg_h264_chroma_pixels_tab
[
0
]
=
ff_avg_h264_chroma_mc8_rnd_mmx
2
;
c
->
avg_h264_chroma_pixels_tab
[
1
]
=
ff_avg_h264_chroma_mc4_mmx
2
;
c
->
avg_h264_chroma_pixels_tab
[
2
]
=
ff_avg_h264_chroma_mc2_mmx
2
;
c
->
put_h264_chroma_pixels_tab
[
2
]
=
ff_put_h264_chroma_mc2_mmx
2
;
c
->
avg_h264_chroma_pixels_tab
[
0
]
=
ff_avg_h264_chroma_mc8_rnd_mmx
ext
;
c
->
avg_h264_chroma_pixels_tab
[
1
]
=
ff_avg_h264_chroma_mc4_mmx
ext
;
c
->
avg_h264_chroma_pixels_tab
[
2
]
=
ff_avg_h264_chroma_mc2_mmx
ext
;
c
->
put_h264_chroma_pixels_tab
[
2
]
=
ff_put_h264_chroma_mc2_mmx
ext
;
}
if
(
bit_depth
==
10
&&
CONFIG_H264CHROMA
)
{
c
->
put_h264_chroma_pixels_tab
[
2
]
=
ff_put_h264_chroma_mc2_10_mmx
2
;
c
->
avg_h264_chroma_pixels_tab
[
2
]
=
ff_avg_h264_chroma_mc2_10_mmx
2
;
c
->
put_h264_chroma_pixels_tab
[
1
]
=
ff_put_h264_chroma_mc4_10_mmx
2
;
c
->
avg_h264_chroma_pixels_tab
[
1
]
=
ff_avg_h264_chroma_mc4_10_mmx
2
;
c
->
put_h264_chroma_pixels_tab
[
2
]
=
ff_put_h264_chroma_mc2_10_mmx
ext
;
c
->
avg_h264_chroma_pixels_tab
[
2
]
=
ff_avg_h264_chroma_mc2_10_mmx
ext
;
c
->
put_h264_chroma_pixels_tab
[
1
]
=
ff_put_h264_chroma_mc4_10_mmx
ext
;
c
->
avg_h264_chroma_pixels_tab
[
1
]
=
ff_avg_h264_chroma_mc4_10_mmx
ext
;
}
/* slower than cmov version on AMD */
if
(
!
(
mm_flags
&
AV_CPU_FLAG_3DNOW
))
c
->
add_hfyu_median_prediction
=
ff_add_hfyu_median_prediction_mmx
2
;
c
->
add_hfyu_median_prediction
=
ff_add_hfyu_median_prediction_mmx
ext
;
c
->
scalarproduct_int16
=
ff_scalarproduct_int16_mmx
2
;
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_mmx
2
;
c
->
scalarproduct_int16
=
ff_scalarproduct_int16_mmx
ext
;
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_mmx
ext
;
if
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)
{
c
->
apply_window_int16
=
ff_apply_window_int16_mmxext_ba
;
...
...
libavcodec/x86/dsputilenc.asm
View file @
26301caa
...
...
@@ -265,7 +265,7 @@ HADAMARD8_DIFF_MMX mmx
%define
ABS1
ABS1_MMXEXT
%define
HSUM
HSUM_MMXEXT
HADAMARD8_DIFF_MMX
mmx
2
HADAMARD8_DIFF_MMX
mmx
ext
INIT_XMM
%define
ABS2
ABS2_MMXEXT
...
...
libavcodec/x86/dsputilenc_mmx.c
View file @
26301caa
...
...
@@ -1104,7 +1104,7 @@ int ff_hadamard8_diff16_##cpu(void *s, uint8_t *src1, uint8_t *src2, \
int stride, int h);
hadamard_func
(
mmx
)
hadamard_func
(
mmx
2
)
hadamard_func
(
mmx
ext
)
hadamard_func
(
sse2
)
hadamard_func
(
ssse3
)
...
...
@@ -1195,8 +1195,8 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
hadamard8_diff
[
1
]
=
ff_hadamard8_diff_mmx
;
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
c
->
hadamard8_diff
[
0
]
=
ff_hadamard8_diff16_mmx
2
;
c
->
hadamard8_diff
[
1
]
=
ff_hadamard8_diff_mmx
2
;
c
->
hadamard8_diff
[
0
]
=
ff_hadamard8_diff16_mmx
ext
;
c
->
hadamard8_diff
[
1
]
=
ff_hadamard8_diff_mmx
ext
;
}
if
(
EXTERNAL_SSE2
(
mm_flags
))
{
...
...
libavcodec/x86/h264_chromamc.asm
View file @
26301caa
...
...
@@ -442,17 +442,17 @@ chroma_mc8_mmx_func put, vc1, nornd_mmx
chroma_mc8_mmx_func
put
,
rv40
,
mmx
chroma_mc4_mmx_func
put
,
h264
,
mmx
chroma_mc4_mmx_func
put
,
rv40
,
mmx
chroma_mc2_mmx_func
put
,
h264
,
mmx
2
chroma_mc2_mmx_func
put
,
h264
,
mmx
ext
%define
CHROMAMC_AVG
DIRECT_AVG
%define
CHROMAMC_AVG4
COPY_AVG
%define
PAVG
pavgb
chroma_mc8_mmx_func
avg
,
h264
,
rnd_mmx
2
chroma_mc8_mmx_func
avg
,
vc1
,
nornd_mmx
2
chroma_mc8_mmx_func
avg
,
rv40
,
mmx
2
chroma_mc4_mmx_func
avg
,
h264
,
mmx
2
chroma_mc4_mmx_func
avg
,
rv40
,
mmx
2
chroma_mc2_mmx_func
avg
,
h264
,
mmx
2
chroma_mc8_mmx_func
avg
,
h264
,
rnd_mmx
ext
chroma_mc8_mmx_func
avg
,
vc1
,
nornd_mmx
ext
chroma_mc8_mmx_func
avg
,
rv40
,
mmx
ext
chroma_mc4_mmx_func
avg
,
h264
,
mmx
ext
chroma_mc4_mmx_func
avg
,
rv40
,
mmx
ext
chroma_mc2_mmx_func
avg
,
h264
,
mmx
ext
%define
PAVG
pavgusb
chroma_mc8_mmx_func
avg
,
h264
,
rnd_3dnow
...
...
libavcodec/x86/h264_chromamc_10bit.asm
View file @
26301caa
...
...
@@ -253,7 +253,7 @@ INIT_XMM sse2
CHROMA_MC8
put
INIT_XMM
avx
CHROMA_MC8
put
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
CHROMA_MC4
put
CHROMA_MC2
put
...
...
@@ -262,6 +262,6 @@ INIT_XMM sse2
CHROMA_MC8
avg
INIT_XMM
avx
CHROMA_MC8
avg
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
CHROMA_MC4
avg
CHROMA_MC2
avg
libavcodec/x86/h264_deblock.asm
View file @
26301caa
...
...
@@ -504,7 +504,7 @@ cglobal deblock_h_luma_8, 0,5
RET
%endmacro
; DEBLOCK_LUMA
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
DEBLOCK_LUMA
v8
,
8
INIT_XMM
sse2
DEBLOCK_LUMA
v
,
16
...
...
@@ -783,11 +783,11 @@ DEBLOCK_LUMA_INTRA v
INIT_XMM
avx
DEBLOCK_LUMA_INTRA
v
%if
ARCH_X86_64
==
0
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
DEBLOCK_LUMA_INTRA
v8
%endif
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
%macro
CHROMA_V_START
0
dec
r2d
; alpha-1
...
...
@@ -818,7 +818,7 @@ cglobal deblock_v_chroma_8, 5,6
movq
m1
,
[
t5
+
r1
]
movq
m2
,
[r0]
movq
m3
,
[
r0
+
r1
]
call
ff_chroma_inter_body_mmx
2
call
ff_chroma_inter_body_mmx
ext
movq
[
t5
+
r1
]
,
m1
movq
[r0],
m2
RET
...
...
@@ -842,7 +842,7 @@ cglobal deblock_h_chroma_8, 5,7
TRANSPOSE4x8_LOAD
bw
,
wd
,
dq
,
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
movq
buf0
,
m0
movq
buf1
,
m3
call
ff_chroma_inter_body_mmx
2
call
ff_chroma_inter_body_mmx
ext
movq
m0
,
buf0
movq
m3
,
buf1
TRANSPOSE8x4B_STORE
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
...
...
@@ -852,7 +852,7 @@ cglobal deblock_h_chroma_8, 5,7
RET
ALIGN
16
ff_chroma_inter_body_mmx
2
:
ff_chroma_inter_body_mmx
ext
:
LOAD_MASK
r2d
,
r3d
movd
m6
,
[r4]
; tc0
punpcklbw
m6
,
m6
...
...
@@ -885,7 +885,7 @@ cglobal deblock_v_chroma_intra_8, 4,5
movq
m1
,
[
t5
+
r1
]
movq
m2
,
[r0]
movq
m3
,
[
r0
+
r1
]
call
ff_chroma_intra_body_mmx
2
call
ff_chroma_intra_body_mmx
ext
movq
[
t5
+
r1
]
,
m1
movq
[r0],
m2
RET
...
...
@@ -896,12 +896,12 @@ cglobal deblock_v_chroma_intra_8, 4,5
cglobal
deblock_h_chroma_intra_8
,
4
,
6
CHROMA_H_START
TRANSPOSE4x8_LOAD
bw
,
wd
,
dq
,
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
call
ff_chroma_intra_body_mmx
2
call
ff_chroma_intra_body_mmx
ext
TRANSPOSE8x4B_STORE
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
RET
ALIGN
16
ff_chroma_intra_body_mmx
2
:
ff_chroma_intra_body_mmx
ext
:
LOAD_MASK
r2d
,
r3d
movq
m5
,
m1
movq
m6
,
m2
...
...
@@ -1025,7 +1025,7 @@ ff_chroma_intra_body_mmx2:
jl
%%
.
b_idx_loop
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
h264_loop_filter_strength
,
9
,
9
,
0
,
bs
,
nnz
,
ref
,
mv
,
bidir
,
edges
,
\
step
,
mask_mv0
,
mask_mv1
,
field
%define
b_idxq
bidirq
...
...
libavcodec/x86/h264_deblock_10bit.asm
View file @
26301caa
...
...
@@ -791,7 +791,7 @@ cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
%endmacro
%if
ARCH_X86_64
==
0
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
INIT_XMM
sse2
...
...
@@ -906,7 +906,7 @@ cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
%endmacro
%if
ARCH_X86_64
==
0
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
DEBLOCK_CHROMA
%endif
INIT_XMM
sse2
...
...
libavcodec/x86/h264_idct.asm
View file @
26301caa
...
...
@@ -286,14 +286,14 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10
%endmacro
INIT_MMX
; ff_h264_idct_dc_add_mmx
2
(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_dc_add_8_mmx
2
,
3
,
3
,
0
; ff_h264_idct_dc_add_mmx
ext
(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_dc_add_8_mmx
ext
,
3
,
3
,
0
DC_ADD_MMXEXT_INIT
r1
,
r2
DC_ADD_MMXEXT_OP
movh
,
r0
,
r2
,
r1
RET
; ff_h264_idct8_dc_add_mmx
2
(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_8_mmx
2
,
3
,
3
,
0
; ff_h264_idct8_dc_add_mmx
ext
(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_8_mmx
ext
,
3
,
3
,
0
DC_ADD_MMXEXT_INIT
r1
,
r2
DC_ADD_MMXEXT_OP
mova
,
r0
,
r2
,
r1
lea
r0
,
[
r0
+
r2
*
4
]
...
...
@@ -354,9 +354,9 @@ cglobal h264_idct8_add4_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block, str
ADD
rsp
,
pad
RET
; ff_h264_idct_add16_mmx
2
(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16_8_mmx
2
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
; ff_h264_idct_add16_mmx
ext
(uint8_t *dst, const int *block_offset,
;
DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16_8_mmx
ext
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -421,9 +421,10 @@ cglobal h264_idct_add16intra_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block
jl
.
nextblock
REP_RET
; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16intra_8_mmx2
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
; ff_h264_idct_add16intra_mmxext(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride,
; const uint8_t nnzc[6*8])
cglobal
h264_idct_add16intra_8_mmxext
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -463,9 +464,10 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, blo
jl
.
nextblock
REP_RET
; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct8_add4_8_mmx2
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
; ff_h264_idct8_add4_mmxext(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride,
; const uint8_t nnzc[6*8])
cglobal
h264_idct8_add4_8_mmxext
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -620,7 +622,7 @@ cglobal h264_idct_add8_8_mmx, 5, 8 + npicregs, 0, dst1, block_offset, block, str
call
h264_idct_add8_mmx_plane
RET
h264_idct_add8_mmx
2
_plane
:
h264_idct_add8_mmx
ext
_plane
:
.
nextblock
:
movzx
r6
,
byte
[
scan8
+
r5
]
movzx
r6
,
byte
[
r4
+
r6
]
...
...
@@ -661,9 +663,9 @@ h264_idct_add8_mmx2_plane:
jnz
.
nextblock
rep
ret
; ff_h264_idct_add8_mmx
2
(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add8_8_mmx
2
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
; ff_h264_idct_add8_mmx
ext
(uint8_t **dest, const int *block_offset,
;
DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add8_8_mmx
ext
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
mov
r5
,
16
add
r2
,
512
%if
ARCH_X86_64
...
...
@@ -672,7 +674,7 @@ cglobal h264_idct_add8_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, st
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
%endif
call
h264_idct_add8_mmx
2
_plane
call
h264_idct_add8_mmx
ext
_plane
mov
r5
,
32
add
r2
,
384
%if
ARCH_X86_64
...
...
@@ -680,12 +682,12 @@ cglobal h264_idct_add8_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, st
%else
add
r0mp
,
gprsize
%endif
call
h264_idct_add8_mmx
2
_plane
call
h264_idct_add8_mmx
ext
_plane
RET
INIT_MMX
; r0 = uint8_t *dst, r2 = int16_t *block, r3 = int stride, r6=clobbered
h264_idct_dc_add8_mmx
2
:
h264_idct_dc_add8_mmx
ext
:
movd
m0
,
[
r2
]
; 0 0 X D
punpcklwd
m0
,
[
r2
+
32
]
; x X d D
paddsw
m0
,
[
pw_32
]
...
...
@@ -779,7 +781,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5 + ARCH_X86_64, 8
%else
add
r0
,
r0m
%endif
call
h264_idct_dc_add8_mmx
2
call
h264_idct_dc_add8_mmx
ext
.
cycle%1
end
:
%if
%1
<
7
add
r2
,
64
...
...
@@ -828,7 +830,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7 + ARCH_X86_64, 8
mov
r0
,
[r0]
add
r0
,
dword
[
r1
+
(
%1
&
1
)
*
8
+
64
*
(
1
+
(
%1
>>
1
))
]
%endif
call
h264_idct_dc_add8_mmx
2
call
h264_idct_dc_add8_mmx
ext
.
cycle%1
end
:
%if
%1
==
1
add
r2
,
384
+
64
...
...
libavcodec/x86/h264_idct_10bit.asm
View file @
26301caa
...
...
@@ -178,7 +178,7 @@ IDCT_ADD16_10
mova
[
%1
+
%3
]
,
m4
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
h264_idct_dc_add_10
,
3
,
3
movd
m0
,
[r1]
paddd
m0
,
[
pd_32
]
...
...
libavcodec/x86/h264_intrapred.asm
View file @
26301caa
...
...
@@ -120,7 +120,7 @@ cglobal pred16x16_horizontal_8, 2,3
INIT_MMX
mmx
PRED16x16_H
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_H
INIT_XMM
ssse3
PRED16x16_H
...
...
@@ -180,7 +180,7 @@ cglobal pred16x16_dc_8, 2,7
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_DC
INIT_XMM
sse2
PRED16x16_DC
...
...
@@ -229,7 +229,7 @@ cglobal pred16x16_tm_vp8_8, 2,5
INIT_MMX
mmx
PRED16x16_TM
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_TM
INIT_XMM
sse2
...
...
@@ -309,14 +309,14 @@ cglobal pred16x16_plane_%1_8, 2,9,7
movhlps
m1
,
m0
%endif
paddw
m0
,
m1
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
PSHUFLW
m1
,
m0
,
0xE
%elif
cpuflag
(
mmx
)
mova
m1
,
m0
psrlq
m1
,
32
%endif
paddw
m0
,
m1
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
PSHUFLW
m1
,
m0
,
0x1
%elif
cpuflag
(
mmx
)
mova
m1
,
m0
...
...
@@ -536,7 +536,7 @@ INIT_MMX mmx
H264_PRED16x16_PLANE
h264
H264_PRED16x16_PLANE
rv40
H264_PRED16x16_PLANE
svq3
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
H264_PRED16x16_PLANE
h264
H264_PRED16x16_PLANE
rv40
H264_PRED16x16_PLANE
svq3
...
...
@@ -582,7 +582,7 @@ cglobal pred8x8_plane_8, 2,9,7
paddw
m0
,
m1
%if
notcpuflag
(
ssse3
)
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
PSHUFLW
m1
,
m0
,
0xE
%elif
cpuflag
(
mmx
)
mova
m1
,
m0
...
...
@@ -591,7 +591,7 @@ cglobal pred8x8_plane_8, 2,9,7
paddw
m0
,
m1
%endif
; !ssse3
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
PSHUFLW
m1
,
m0
,
0x1
%elif
cpuflag
(
mmx
)
mova
m1
,
m0
...
...
@@ -716,7 +716,7 @@ ALIGN 16
INIT_MMX
mmx
H264_PRED8x8_PLANE
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
H264_PRED8x8_PLANE
INIT_XMM
sse2
H264_PRED8x8_PLANE
...
...
@@ -763,7 +763,7 @@ cglobal pred8x8_horizontal_8, 2,3
INIT_MMX
mmx
PRED8x8_H
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED8x8_H
INIT_MMX
ssse3
PRED8x8_H
...
...
@@ -941,7 +941,7 @@ cglobal pred8x8_tm_vp8_8, 2,6
INIT_MMX
mmx
PRED8x8_TM
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED8x8_TM
INIT_XMM
sse2
...
...
@@ -2442,7 +2442,7 @@ cglobal pred4x4_tm_vp8_8, 3,6
sub
r3d
,
r4d
movd
mm2
,
r1d
movd
mm4
,
r3d
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
pshufw
mm2
,
mm2
,
0
pshufw
mm4
,
mm4
,
0
%else
...
...
@@ -2465,7 +2465,7 @@ cglobal pred4x4_tm_vp8_8, 3,6
INIT_MMX
mmx
PRED4x4_TM
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED4x4_TM
INIT_XMM
ssse3
...
...
libavcodec/x86/h264_intrapred_10bit.asm
View file @
26301caa
...
...
@@ -182,7 +182,7 @@ PRED4x4_HD
HADDD
%1
,
%2
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
pred4x4_dc_10
,
3
,
3
sub
r0
,
r2
lea
r1
,
[
r0
+
r2
*
2
]
...
...
@@ -261,7 +261,7 @@ PRED4x4_VL
;-----------------------------------------------------------------------------
; void pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride)
;-----------------------------------------------------------------------------
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
pred4x4_horizontal_up_10
,
3
,
3
sub
r0
,
r2
lea
r1
,
[
r0
+
r2
*
2
]
...
...
@@ -410,7 +410,7 @@ cglobal pred8x8_dc_10, 2, 6
RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED8x8_DC
pshufw
INIT_XMM
sse2
PRED8x8_DC
pshuflw
...
...
@@ -524,7 +524,7 @@ cglobal pred8x8l_128_dc_10, 4, 4
RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED8x8L_128_DC
INIT_XMM
sse2
PRED8x8L_128_DC
...
...
@@ -1007,7 +1007,7 @@ cglobal pred16x16_vertical_10, 2, 3
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_VERTICAL
INIT_XMM
sse2
PRED16x16_VERTICAL
...
...
@@ -1031,7 +1031,7 @@ cglobal pred16x16_horizontal_10, 2, 3
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_HORIZONTAL
INIT_XMM
sse2
PRED16x16_HORIZONTAL
...
...
@@ -1077,7 +1077,7 @@ cglobal pred16x16_dc_10, 2, 6
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_DC
INIT_XMM
sse2
PRED16x16_DC
...
...
@@ -1109,7 +1109,7 @@ cglobal pred16x16_top_dc_10, 2, 3
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_TOP_DC
INIT_XMM
sse2
PRED16x16_TOP_DC
...
...
@@ -1146,7 +1146,7 @@ cglobal pred16x16_left_dc_10, 2, 6
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_LEFT_DC
INIT_XMM
sse2
PRED16x16_LEFT_DC
...
...
@@ -1167,7 +1167,7 @@ cglobal pred16x16_128_dc_10, 2,3
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_128_DC
INIT_XMM
sse2
PRED16x16_128_DC
libavcodec/x86/h264_intrapred_init.c
View file @
26301caa
...
...
@@ -27,7 +27,7 @@ void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
const uint8_t *topright, \
ptrdiff_t stride);
PRED4x4
(
dc
,
10
,
mmx
2
)
PRED4x4
(
dc
,
10
,
mmx
ext
)
PRED4x4
(
down_left
,
10
,
sse2
)
PRED4x4
(
down_left
,
10
,
avx
)
PRED4x4
(
down_right
,
10
,
sse2
)
...
...
@@ -38,7 +38,7 @@ PRED4x4(vertical_left, 10, avx)
PRED4x4
(
vertical_right
,
10
,
sse2
)
PRED4x4
(
vertical_right
,
10
,
ssse3
)
PRED4x4
(
vertical_right
,
10
,
avx
)
PRED4x4
(
horizontal_up
,
10
,
mmx
2
)
PRED4x4
(
horizontal_up
,
10
,
mmx
ext
)
PRED4x4
(
horizontal_down
,
10
,
sse2
)
PRED4x4
(
horizontal_down
,
10
,
ssse3
)
PRED4x4
(
horizontal_down
,
10
,
avx
)
...
...
@@ -47,7 +47,7 @@ PRED4x4(horizontal_down, 10, avx)
void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
ptrdiff_t stride);
PRED8x8
(
dc
,
10
,
mmx
2
)
PRED8x8
(
dc
,
10
,
mmx
ext
)
PRED8x8
(
dc
,
10
,
sse2
)
PRED8x8
(
top_dc
,
10
,
sse2
)
PRED8x8
(
plane
,
10
,
sse2
)
...
...
@@ -62,7 +62,7 @@ void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
PRED8x8L
(
dc
,
10
,
sse2
)
PRED8x8L
(
dc
,
10
,
avx
)
PRED8x8L
(
128
_dc
,
10
,
mmx
2
)
PRED8x8L
(
128
_dc
,
10
,
mmx
ext
)
PRED8x8L
(
128
_dc
,
10
,
sse2
)
PRED8x8L
(
top_dc
,
10
,
sse2
)
PRED8x8L
(
top_dc
,
10
,
avx
)
...
...
@@ -88,42 +88,42 @@ PRED8x8L(horizontal_up, 10, avx)
void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
ptrdiff_t stride);
PRED16x16
(
dc
,
10
,
mmx
2
)
PRED16x16
(
dc
,
10
,
mmx
ext
)
PRED16x16
(
dc
,
10
,
sse2
)
PRED16x16
(
top_dc
,
10
,
mmx
2
)
PRED16x16
(
top_dc
,
10
,
mmx
ext
)
PRED16x16
(
top_dc
,
10
,
sse2
)
PRED16x16
(
128
_dc
,
10
,
mmx
2
)
PRED16x16
(
128
_dc
,
10
,
mmx
ext
)
PRED16x16
(
128
_dc
,
10
,
sse2
)
PRED16x16
(
left_dc
,
10
,
mmx
2
)
PRED16x16
(
left_dc
,
10
,
mmx
ext
)
PRED16x16
(
left_dc
,
10
,
sse2
)
PRED16x16
(
vertical
,
10
,
mmx
2
)
PRED16x16
(
vertical
,
10
,
mmx
ext
)
PRED16x16
(
vertical
,
10
,
sse2
)
PRED16x16
(
horizontal
,
10
,
mmx
2
)
PRED16x16
(
horizontal
,
10
,
mmx
ext
)
PRED16x16
(
horizontal
,
10
,
sse2
)
/* 8-bit versions */
PRED16x16
(
vertical
,
8
,
mmx
)
PRED16x16
(
vertical
,
8
,
sse
)
PRED16x16
(
horizontal
,
8
,
mmx
)
PRED16x16
(
horizontal
,
8
,
mmx
2
)
PRED16x16
(
horizontal
,
8
,
mmx
ext
)
PRED16x16
(
horizontal
,
8
,
ssse3
)
PRED16x16
(
dc
,
8
,
mmx
2
)
PRED16x16
(
dc
,
8
,
mmx
ext
)
PRED16x16
(
dc
,
8
,
sse2
)
PRED16x16
(
dc
,
8
,
ssse3
)
PRED16x16
(
plane_h264
,
8
,
mmx
)
PRED16x16
(
plane_h264
,
8
,
mmx
2
)
PRED16x16
(
plane_h264
,
8
,
mmx
ext
)
PRED16x16
(
plane_h264
,
8
,
sse2
)
PRED16x16
(
plane_h264
,
8
,
ssse3
)
PRED16x16
(
plane_rv40
,
8
,
mmx
)
PRED16x16
(
plane_rv40
,
8
,
mmx
2
)
PRED16x16
(
plane_rv40
,
8
,
mmx
ext
)
PRED16x16
(
plane_rv40
,
8
,
sse2
)
PRED16x16
(
plane_rv40
,
8
,
ssse3
)
PRED16x16
(
plane_svq3
,
8
,
mmx
)
PRED16x16
(
plane_svq3
,
8
,
mmx
2
)
PRED16x16
(
plane_svq3
,
8
,
mmx
ext
)
PRED16x16
(
plane_svq3
,
8
,
sse2
)
PRED16x16
(
plane_svq3
,
8
,
ssse3
)
PRED16x16
(
tm_vp8
,
8
,
mmx
)
PRED16x16
(
tm_vp8
,
8
,
mmx
2
)
PRED16x16
(
tm_vp8
,
8
,
mmx
ext
)
PRED16x16
(
tm_vp8
,
8
,
sse2
)
PRED8x8
(
top_dc
,
8
,
mmxext
)
...
...
@@ -131,14 +131,14 @@ PRED8x8(dc_rv40, 8, mmxext)
PRED8x8
(
dc
,
8
,
mmxext
)
PRED8x8
(
vertical
,
8
,
mmx
)
PRED8x8
(
horizontal
,
8
,
mmx
)
PRED8x8
(
horizontal
,
8
,
mmx
2
)
PRED8x8
(
horizontal
,
8
,
mmx
ext
)
PRED8x8
(
horizontal
,
8
,
ssse3
)
PRED8x8
(
plane
,
8
,
mmx
)
PRED8x8
(
plane
,
8
,
mmx
2
)
PRED8x8
(
plane
,
8
,
mmx
ext
)
PRED8x8
(
plane
,
8
,
sse2
)
PRED8x8
(
plane
,
8
,
ssse3
)
PRED8x8
(
tm_vp8
,
8
,
mmx
)
PRED8x8
(
tm_vp8
,
8
,
mmx
2
)
PRED8x8
(
tm_vp8
,
8
,
mmx
ext
)
PRED8x8
(
tm_vp8
,
8
,
sse2
)
PRED8x8
(
tm_vp8
,
8
,
ssse3
)
...
...
@@ -175,7 +175,7 @@ PRED4x4(vertical_right, 8, mmxext)
PRED4x4
(
horizontal_up
,
8
,
mmxext
)
PRED4x4
(
horizontal_down
,
8
,
mmxext
)
PRED4x4
(
tm_vp8
,
8
,
mmx
)
PRED4x4
(
tm_vp8
,
8
,
mmx
2
)
PRED4x4
(
tm_vp8
,
8
,
mmx
ext
)
PRED4x4
(
tm_vp8
,
8
,
ssse3
)
PRED4x4
(
vertical_vp8
,
8
,
mmxext
)
...
...
@@ -210,10 +210,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
}
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
h
->
pred16x16
[
HOR_PRED8x8
]
=
ff_pred16x16_horizontal_8_mmx
2
;
h
->
pred16x16
[
DC_PRED8x8
]
=
ff_pred16x16_dc_8_mmx
2
;
h
->
pred16x16
[
HOR_PRED8x8
]
=
ff_pred16x16_horizontal_8_mmx
ext
;
h
->
pred16x16
[
DC_PRED8x8
]
=
ff_pred16x16_dc_8_mmx
ext
;
if
(
chroma_format_idc
==
1
)
h
->
pred8x8
[
HOR_PRED8x8
]
=
ff_pred8x8_horizontal_8_mmx
2
;
h
->
pred8x8
[
HOR_PRED8x8
]
=
ff_pred8x8_horizontal_8_mmx
ext
;
h
->
pred8x8l
[
TOP_DC_PRED
]
=
ff_pred8x8l_top_dc_8_mmxext
;
h
->
pred8x8l
[
DC_PRED
]
=
ff_pred8x8l_dc_8_mmxext
;
h
->
pred8x8l
[
HOR_PRED
]
=
ff_pred8x8l_horizontal_8_mmxext
;
...
...
@@ -243,20 +243,20 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
}
}
if
(
codec_id
==
AV_CODEC_ID_VP8
)
{
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_tm_vp8_8_mmx
2
;
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_tm_vp8_8_mmx
ext
;
h
->
pred8x8
[
DC_PRED8x8
]
=
ff_pred8x8_dc_rv40_8_mmxext
;
h
->
pred8x8
[
PLANE_PRED8x8
]
=
ff_pred8x8_tm_vp8_8_mmx
2
;
h
->
pred4x4
[
TM_VP8_PRED
]
=
ff_pred4x4_tm_vp8_8_mmx
2
;
h
->
pred8x8
[
PLANE_PRED8x8
]
=
ff_pred8x8_tm_vp8_8_mmx
ext
;
h
->
pred4x4
[
TM_VP8_PRED
]
=
ff_pred4x4_tm_vp8_8_mmx
ext
;
h
->
pred4x4
[
VERT_PRED
]
=
ff_pred4x4_vertical_vp8_8_mmxext
;
}
else
{
if
(
chroma_format_idc
==
1
)
h
->
pred8x8
[
PLANE_PRED8x8
]
=
ff_pred8x8_plane_8_mmx
2
;
h
->
pred8x8
[
PLANE_PRED8x8
]
=
ff_pred8x8_plane_8_mmx
ext
;
if
(
codec_id
==
AV_CODEC_ID_SVQ3
)
{
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_svq3_8_mmx
2
;
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_svq3_8_mmx
ext
;
}
else
if
(
codec_id
==
AV_CODEC_ID_RV40
)
{
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_rv40_8_mmx
2
;
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_rv40_8_mmx
ext
;
}
else
{
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_h264_8_mmx
2
;
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_h264_8_mmx
ext
;
}
}
}
...
...
@@ -320,20 +320,20 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
}
}
else
if
(
bit_depth
==
10
)
{
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
h
->
pred4x4
[
DC_PRED
]
=
ff_pred4x4_dc_10_mmx
2
;
h
->
pred4x4
[
HOR_UP_PRED
]
=
ff_pred4x4_horizontal_up_10_mmx
2
;
h
->
pred4x4
[
DC_PRED
]
=
ff_pred4x4_dc_10_mmx
ext
;
h
->
pred4x4
[
HOR_UP_PRED
]
=
ff_pred4x4_horizontal_up_10_mmx
ext
;
if
(
chroma_format_idc
==
1
)
h
->
pred8x8
[
DC_PRED8x8
]
=
ff_pred8x8_dc_10_mmx
2
;
h
->
pred8x8
[
DC_PRED8x8
]
=
ff_pred8x8_dc_10_mmx
ext
;
h
->
pred8x8l
[
DC_128_PRED
]
=
ff_pred8x8l_128_dc_10_mmx
2
;
h
->
pred8x8l
[
DC_128_PRED
]
=
ff_pred8x8l_128_dc_10_mmx
ext
;
h
->
pred16x16
[
DC_PRED8x8
]
=
ff_pred16x16_dc_10_mmx
2
;
h
->
pred16x16
[
TOP_DC_PRED8x8
]
=
ff_pred16x16_top_dc_10_mmx
2
;
h
->
pred16x16
[
DC_128_PRED8x8
]
=
ff_pred16x16_128_dc_10_mmx
2
;
h
->
pred16x16
[
LEFT_DC_PRED8x8
]
=
ff_pred16x16_left_dc_10_mmx
2
;
h
->
pred16x16
[
VERT_PRED8x8
]
=
ff_pred16x16_vertical_10_mmx
2
;
h
->
pred16x16
[
HOR_PRED8x8
]
=
ff_pred16x16_horizontal_10_mmx
2
;
h
->
pred16x16
[
DC_PRED8x8
]
=
ff_pred16x16_dc_10_mmx
ext
;
h
->
pred16x16
[
TOP_DC_PRED8x8
]
=
ff_pred16x16_top_dc_10_mmx
ext
;
h
->
pred16x16
[
DC_128_PRED8x8
]
=
ff_pred16x16_128_dc_10_mmx
ext
;
h
->
pred16x16
[
LEFT_DC_PRED8x8
]
=
ff_pred16x16_left_dc_10_mmx
ext
;
h
->
pred16x16
[
VERT_PRED8x8
]
=
ff_pred16x16_vertical_10_mmx
ext
;
h
->
pred16x16
[
HOR_PRED8x8
]
=
ff_pred16x16_horizontal_10_mmx
ext
;
}
if
(
EXTERNAL_SSE2
(
mm_flags
))
{
h
->
pred4x4
[
DIAG_DOWN_LEFT_PRED
]
=
ff_pred4x4_down_left_10_sse2
;
...
...
libavcodec/x86/h264_weight.asm
View file @
26301caa
...
...
@@ -71,7 +71,7 @@ SECTION .text
%endmacro
INIT_MMX
cglobal
h264_weight_16_mmx
2
,
6
,
6
,
0
cglobal
h264_weight_16_mmx
ext
,
6
,
6
,
0
WEIGHT_SETUP
.
nextrow
:
WEIGHT_OP
0
,
4
...
...
@@ -96,7 +96,7 @@ cglobal h264_weight_%1_%3, 6, 6, %2
%endmacro
INIT_MMX
WEIGHT_FUNC_MM
8
,
0
,
mmx
2
WEIGHT_FUNC_MM
8
,
0
,
mmx
ext
INIT_XMM
WEIGHT_FUNC_MM
16
,
8
,
sse2
...
...
@@ -121,7 +121,7 @@ cglobal h264_weight_%1_%3, 6, 6, %2
%endmacro
INIT_MMX
WEIGHT_FUNC_HALF_MM
4
,
0
,
mmx
2
WEIGHT_FUNC_HALF_MM
4
,
0
,
mmx
ext
INIT_XMM
WEIGHT_FUNC_HALF_MM
8
,
8
,
sse2
...
...
@@ -175,7 +175,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
%endmacro
INIT_MMX
cglobal
h264_biweight_16_mmx
2
,
7
,
8
,
0
cglobal
h264_biweight_16_mmx
ext
,
7
,
8
,
0
BIWEIGHT_SETUP
movifnidn
r3d
,
r3m
.
nextrow
:
...
...
@@ -210,7 +210,7 @@ cglobal h264_biweight_%1_%3, 7, 8, %2
%endmacro
INIT_MMX
BIWEIGHT_FUNC_MM
8
,
0
,
mmx
2
BIWEIGHT_FUNC_MM
8
,
0
,
mmx
ext
INIT_XMM
BIWEIGHT_FUNC_MM
16
,
8
,
sse2
...
...
@@ -239,7 +239,7 @@ cglobal h264_biweight_%1_%3, 7, 8, %2
%endmacro
INIT_MMX
BIWEIGHT_FUNC_HALF_MM
4
,
0
,
mmx
2
BIWEIGHT_FUNC_HALF_MM
4
,
0
,
mmx
ext
INIT_XMM
BIWEIGHT_FUNC_HALF_MM
8
,
8
,
sse2
...
...
libavcodec/x86/h264dsp_init.c
View file @
26301caa
This diff is collapsed.
Click to expand it.
libavcodec/x86/pngdsp.asm
View file @
26301caa
...
...
@@ -166,7 +166,7 @@ cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr
RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
ADD_PAETH_PRED_FN
0
INIT_MMX
ssse3
...
...
libavcodec/x86/pngdsp_init.c
View file @
26301caa
...
...
@@ -23,8 +23,8 @@
#include "libavutil/x86/cpu.h"
#include "libavcodec/pngdsp.h"
void
ff_add_png_paeth_prediction_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
void
ff_add_png_paeth_prediction_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
void
ff_add_png_paeth_prediction_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
void
ff_add_bytes_l2_mmx
(
uint8_t
*
dst
,
uint8_t
*
src1
,
...
...
@@ -41,7 +41,7 @@ void ff_pngdsp_init_x86(PNGDSPContext *dsp)
dsp
->
add_bytes_l2
=
ff_add_bytes_l2_mmx
;
#endif
if
(
EXTERNAL_MMXEXT
(
flags
))
dsp
->
add_paeth_prediction
=
ff_add_png_paeth_prediction_mmx
2
;
dsp
->
add_paeth_prediction
=
ff_add_png_paeth_prediction_mmx
ext
;
if
(
EXTERNAL_SSE2
(
flags
))
dsp
->
add_bytes_l2
=
ff_add_bytes_l2_sse2
;
if
(
EXTERNAL_SSSE3
(
flags
))
...
...
libavcodec/x86/rv34dsp.asm
View file @
26301caa
...
...
@@ -57,7 +57,7 @@ cglobal rv34_idct_%1, 1, 2, 0
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
%define
IDCT_DC
IDCT_DC_ROUND
rv34_idct
dc
%define
IDCT_DC
IDCT_DC_NOROUND
...
...
@@ -133,7 +133,7 @@ cglobal rv34_idct_dc_add, 3, 3
mova
mm5
,
[
pd_512
]
; 0x200
%endmacro
; ff_rv34_idct_add_mmx
2
(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
; ff_rv34_idct_add_mmx
ext
(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
%macro
COL_TRANSFORM
4
pshufw
mm3
,
%2
,
0xDD
; col. 1,3,1,3
pshufw
%2
,
%2
,
0x88
; col. 0,2,0,2
...
...
@@ -154,7 +154,7 @@ cglobal rv34_idct_dc_add, 3, 3
packuswb
%2
,
%2
movd
%1
,
%2
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
rv34_idct_add
,
3
,
3
,
0
,
d
,
s
,
b
ROW_TRANSFORM
bq
COL_TRANSFORM
[dq],
mm0
,
[
pw_col_coeffs
+
0
]
,
[
pw_col_coeffs
+
8
]
...
...
libavcodec/x86/rv34dsp_init.c
View file @
26301caa
...
...
@@ -25,11 +25,11 @@
#include "libavcodec/dsputil.h"
#include "libavcodec/rv34dsp.h"
void
ff_rv34_idct_dc_mmx
2
(
DCTELEM
*
block
);
void
ff_rv34_idct_dc_noround_mmx
2
(
DCTELEM
*
block
);
void
ff_rv34_idct_dc_mmx
ext
(
DCTELEM
*
block
);
void
ff_rv34_idct_dc_noround_mmx
ext
(
DCTELEM
*
block
);
void
ff_rv34_idct_dc_add_mmx
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
int
dc
);
void
ff_rv34_idct_dc_add_sse4
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
int
dc
);
void
ff_rv34_idct_add_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
DCTELEM
*
block
);
void
ff_rv34_idct_add_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
DCTELEM
*
block
);
av_cold
void
ff_rv34dsp_init_x86
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
)
{
...
...
@@ -38,8 +38,8 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp)
if
(
EXTERNAL_MMX
(
mm_flags
))
c
->
rv34_idct_dc_add
=
ff_rv34_idct_dc_add_mmx
;
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
c
->
rv34_inv_transform_dc
=
ff_rv34_idct_dc_noround_mmx
2
;
c
->
rv34_idct_add
=
ff_rv34_idct_add_mmx
2
;
c
->
rv34_inv_transform_dc
=
ff_rv34_idct_dc_noround_mmx
ext
;
c
->
rv34_idct_add
=
ff_rv34_idct_add_mmx
ext
;
}
if
(
EXTERNAL_SSE4
(
mm_flags
))
c
->
rv34_idct_dc_add
=
ff_rv34_idct_dc_add_sse4
;
...
...
libavcodec/x86/rv40dsp.asm
View file @
26301caa
...
...
@@ -240,7 +240,7 @@ INIT_MMX mmx
FILTER_V
put
FILTER_H
put
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
FILTER_V
avg
FILTER_H
avg
...
...
@@ -486,7 +486,7 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
RV40_WEIGHT
rnd
,
8
,
3
RV40_WEIGHT
rnd
,
16
,
4
RV40_WEIGHT
nornd
,
8
,
3
...
...
libavcodec/x86/rv40dsp_init.c
View file @
26301caa
...
...
@@ -34,15 +34,15 @@
#if HAVE_YASM
void
ff_put_rv40_chroma_mc8_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc8_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc8_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc8_3dnow
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_put_rv40_chroma_mc4_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc4_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc4_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc4_3dnow
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
...
...
@@ -55,7 +55,7 @@ void ff_rv40_weight_func_nornd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *sr
int w1, int w2, ptrdiff_t stride); \
void ff_rv40_weight_func_nornd_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \
int w1, int w2, ptrdiff_t stride);
DECLARE_WEIGHT
(
mmx
2
)
DECLARE_WEIGHT
(
mmx
ext
)
DECLARE_WEIGHT
(
sse2
)
DECLARE_WEIGHT
(
ssse3
)
...
...
@@ -150,9 +150,9 @@ QPEL_MC_DECL(avg_, _sse2)
QPEL_MC_DECL
(
put_
,
_mmx
)
#define ff_put_rv40_qpel_h_mmx
2
ff_put_rv40_qpel_h_mmx
#define ff_put_rv40_qpel_v_mmx
2
ff_put_rv40_qpel_v_mmx
QPEL_MC_DECL
(
avg_
,
_mmx
2
)
#define ff_put_rv40_qpel_h_mmx
ext
ff_put_rv40_qpel_h_mmx
#define ff_put_rv40_qpel_v_mmx
ext
ff_put_rv40_qpel_v_mmx
QPEL_MC_DECL
(
avg_
,
_mmx
ext
)
#define ff_put_rv40_qpel_h_3dnow ff_put_rv40_qpel_h_mmx
#define ff_put_rv40_qpel_v_3dnow ff_put_rv40_qpel_v_mmx
...
...
@@ -206,14 +206,14 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
#endif
}
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
c
->
avg_chroma_pixels_tab
[
0
]
=
ff_avg_rv40_chroma_mc8_mmx2
;
c
->
avg_chroma_pixels_tab
[
1
]
=
ff_avg_rv40_chroma_mc4_mmx2
;
c
->
rv40_weight_pixels_tab
[
0
][
0
]
=
ff_rv40_weight_func_rnd_16_mmx
2
;
c
->
rv40_weight_pixels_tab
[
0
][
1
]
=
ff_rv40_weight_func_rnd_8_mmx
2
;
c
->
rv40_weight_pixels_tab
[
1
][
0
]
=
ff_rv40_weight_func_nornd_16_mmx
2
;
c
->
rv40_weight_pixels_tab
[
1
][
1
]
=
ff_rv40_weight_func_nornd_8_mmx
2
;
c
->
avg_chroma_pixels_tab
[
0
]
=
ff_avg_rv40_chroma_mc8_mmxext
;
c
->
avg_chroma_pixels_tab
[
1
]
=
ff_avg_rv40_chroma_mc4_mmxext
;
c
->
rv40_weight_pixels_tab
[
0
][
0
]
=
ff_rv40_weight_func_rnd_16_mmx
ext
;
c
->
rv40_weight_pixels_tab
[
0
][
1
]
=
ff_rv40_weight_func_rnd_8_mmx
ext
;
c
->
rv40_weight_pixels_tab
[
1
][
0
]
=
ff_rv40_weight_func_nornd_16_mmx
ext
;
c
->
rv40_weight_pixels_tab
[
1
][
1
]
=
ff_rv40_weight_func_nornd_8_mmx
ext
;
#if ARCH_X86_32
QPEL_MC_SET
(
avg_
,
_mmx
2
)
QPEL_MC_SET
(
avg_
,
_mmx
ext
)
#endif
}
else
if
(
EXTERNAL_AMD3DNOW
(
mm_flags
))
{
c
->
avg_chroma_pixels_tab
[
0
]
=
ff_avg_rv40_chroma_mc8_3dnow
;
...
...
libavcodec/x86/vc1dsp_init.c
View file @
26301caa
...
...
@@ -64,8 +64,8 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
void
ff_put_vc1_chroma_mc8_nornd_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_vc1_chroma_mc8_nornd_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_vc1_chroma_mc8_nornd_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_vc1_chroma_mc8_nornd_3dnow
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_put_vc1_chroma_mc8_nornd_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
...
...
@@ -99,7 +99,7 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
if
(
mm_flags
&
AV_CPU_FLAG_MMXEXT
)
{
ASSIGN_LF
(
mmxext
);
dsp
->
avg_no_rnd_vc1_chroma_pixels_tab
[
0
]
=
ff_avg_vc1_chroma_mc8_nornd_mmx
2
;
dsp
->
avg_no_rnd_vc1_chroma_pixels_tab
[
0
]
=
ff_avg_vc1_chroma_mc8_nornd_mmx
ext
;
}
else
if
(
mm_flags
&
AV_CPU_FLAG_3DNOW
)
{
dsp
->
avg_no_rnd_vc1_chroma_pixels_tab
[
0
]
=
ff_avg_vc1_chroma_mc8_nornd_3dnow
;
}
...
...
libavcodec/x86/vp3dsp.asm
View file @
26301caa
...
...
@@ -101,7 +101,7 @@ SECTION .text
mov
[
r0
+
r3
-
1
]
,
r2w
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
vp3_v_loop_filter
,
3
,
4
%if
ARCH_X86_64
movsxd
r1
,
r1d
...
...
@@ -633,7 +633,7 @@ vp3_idct_funcs
movq
[
r0
+
r3
]
,
m5
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
vp3_idct_dc_add
,
3
,
4
%if
ARCH_X86_64
movsxd
r1
,
r1d
...
...
libavcodec/x86/vp3dsp_init.c
View file @
26301caa
...
...
@@ -31,11 +31,13 @@ void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
void
ff_vp3_idct_put_sse2
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_vp3_idct_add_sse2
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_vp3_idct_dc_add_mmx
2
(
uint8_t
*
dest
,
int
line_size
,
const
DCTELEM
*
block
);
void
ff_vp3_idct_dc_add_mmx
ext
(
uint8_t
*
dest
,
int
line_size
,
const
DCTELEM
*
block
);
void
ff_vp3_v_loop_filter_mmx2
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
);
void
ff_vp3_h_loop_filter_mmx2
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
);
void
ff_vp3_v_loop_filter_mmxext
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
);
void
ff_vp3_h_loop_filter_mmxext
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
);
av_cold
void
ff_vp3dsp_init_x86
(
VP3DSPContext
*
c
,
int
flags
)
{
...
...
@@ -50,11 +52,11 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
#endif
if
(
EXTERNAL_MMXEXT
(
cpuflags
))
{
c
->
idct_dc_add
=
ff_vp3_idct_dc_add_mmx
2
;
c
->
idct_dc_add
=
ff_vp3_idct_dc_add_mmx
ext
;
if
(
!
(
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
v_loop_filter
=
ff_vp3_v_loop_filter_mmx
2
;
c
->
h_loop_filter
=
ff_vp3_h_loop_filter_mmx
2
;
c
->
v_loop_filter
=
ff_vp3_v_loop_filter_mmx
ext
;
c
->
h_loop_filter
=
ff_vp3_h_loop_filter_mmx
ext
;
}
}
...
...
libavcodec/x86/vp8dsp.asm
View file @
26301caa
...
...
@@ -338,7 +338,7 @@ INIT_XMM ssse3
FILTER_SSSE3
8
; 4x4 block, H-only 4-tap filter
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
put_vp8_epel4_h4
,
6
,
6
+
npicregs
,
0
,
dst
,
dststride
,
src
,
srcstride
,
height
,
mx
,
picreg
shl
mxd
,
4
%ifdef
PIC
...
...
@@ -386,7 +386,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
REP_RET
; 4x4 block, H-only 6-tap filter
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
put_vp8_epel4_h6
,
6
,
6
+
npicregs
,
0
,
dst
,
dststride
,
src
,
srcstride
,
height
,
mx
,
picreg
lea
mxd
,
[
mxq
*
3
]
%ifdef
PIC
...
...
@@ -673,7 +673,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
FILTER_V
4
INIT_XMM
sse2
FILTER_V
8
...
...
@@ -769,7 +769,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
FILTER_BILINEAR
4
INIT_XMM
sse2
FILTER_BILINEAR
8
...
...
@@ -1611,7 +1611,7 @@ cglobal vp8_%1_loop_filter_simple, 3, %2, 8, dst, stride, flim, cntr
INIT_MMX
mmx
SIMPLE_LOOPFILTER
v
,
4
SIMPLE_LOOPFILTER
h
,
5
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
SIMPLE_LOOPFILTER
v
,
4
SIMPLE_LOOPFILTER
h
,
5
%endif
...
...
@@ -1835,7 +1835,7 @@ cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, dst, stride, flimE, flimI, hevthr
psubusb
m6
,
m5
; q2-q1
por
m6
,
m4
; abs(q2-q1)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m4
,
m_flimI
pxor
m3
,
m3
psubusb
m0
,
m4
...
...
@@ -1875,7 +1875,7 @@ cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, dst, stride, flimE, flimI, hevthr
psubusb
m1
,
m3
; p1-p0
psubusb
m6
,
m2
; p0-p1
por
m1
,
m6
; abs(p1-p0)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m6
,
m1
psubusb
m1
,
m4
psubusb
m6
,
m_hevthr
...
...
@@ -1906,7 +1906,7 @@ cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, dst, stride, flimE, flimI, hevthr
psubusb
m1
,
m5
; q0-q1
psubusb
m7
,
m4
; q1-q0
por
m1
,
m7
; abs(q1-q0)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m7
,
m1
psubusb
m1
,
m6
psubusb
m7
,
m_hevthr
...
...
@@ -2014,14 +2014,14 @@ cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, dst, stride, flimE, flimI, hevthr
%else
mova
m6
,
m_maskres
%endif
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m7
,
[
pb_1
]
%else
; mmxext/sse2
pxor
m7
,
m7
%endif
pand
m0
,
m6
pand
m1
,
m6
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
paddusb
m0
,
m7
pand
m1
,
[
pb_FE
]
pandn
m7
,
m0
...
...
@@ -2097,7 +2097,7 @@ INNER_LOOPFILTER h, 16
INNER_LOOPFILTER
v
,
8
INNER_LOOPFILTER
h
,
8
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
INNER_LOOPFILTER
v
,
16
INNER_LOOPFILTER
h
,
16
INNER_LOOPFILTER
v
,
8
...
...
@@ -2343,7 +2343,7 @@ cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, dst1, stride, flimE, flimI, hevt
psubusb
m6
,
m5
; q2-q1
por
m6
,
m4
; abs(q2-q1)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m4
,
m_flimI
pxor
m3
,
m3
psubusb
m0
,
m4
...
...
@@ -2383,7 +2383,7 @@ cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, dst1, stride, flimE, flimI, hevt
psubusb
m1
,
m3
; p1-p0
psubusb
m6
,
m2
; p0-p1
por
m1
,
m6
; abs(p1-p0)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m6
,
m1
psubusb
m1
,
m4
psubusb
m6
,
m_hevthr
...
...
@@ -2414,7 +2414,7 @@ cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, dst1, stride, flimE, flimI, hevt
psubusb
m1
,
m5
; q0-q1
psubusb
m7
,
m4
; q1-q0
por
m1
,
m7
; abs(q1-q0)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m7
,
m1
psubusb
m1
,
m6
psubusb
m7
,
m_hevthr
...
...
@@ -2755,7 +2755,7 @@ MBEDGE_LOOPFILTER h, 16
MBEDGE_LOOPFILTER
v
,
8
MBEDGE_LOOPFILTER
h
,
8
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
MBEDGE_LOOPFILTER
v
,
16
MBEDGE_LOOPFILTER
h
,
16
MBEDGE_LOOPFILTER
v
,
8
...
...
libavcodec/x86/vp8dsp_init.c
View file @
26301caa
...
...
@@ -30,16 +30,16 @@
/*
* MC functions
*/
extern
void
ff_put_vp8_epel4_h4_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_h4_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_epel4_h6_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_h6_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_epel4_v4_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_v4_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_epel4_v6_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_v6_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
...
...
@@ -81,7 +81,7 @@ extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear4_h_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_bilinear4_h_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear8_h_sse2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
...
...
@@ -94,7 +94,7 @@ extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear4_v_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_bilinear4_v_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear8_v_sse2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
...
...
@@ -140,16 +140,16 @@ static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
}
#if ARCH_X86_32
TAP_W8
(
mmx
2
,
epel
,
h4
)
TAP_W8
(
mmx
2
,
epel
,
h6
)
TAP_W16
(
mmx
2
,
epel
,
h6
)
TAP_W8
(
mmx
2
,
epel
,
v4
)
TAP_W8
(
mmx
2
,
epel
,
v6
)
TAP_W16
(
mmx
2
,
epel
,
v6
)
TAP_W8
(
mmx
2
,
bilinear
,
h
)
TAP_W16
(
mmx
2
,
bilinear
,
h
)
TAP_W8
(
mmx
2
,
bilinear
,
v
)
TAP_W16
(
mmx
2
,
bilinear
,
v
)
TAP_W8
(
mmx
ext
,
epel
,
h4
)
TAP_W8
(
mmx
ext
,
epel
,
h6
)
TAP_W16
(
mmx
ext
,
epel
,
h6
)
TAP_W8
(
mmx
ext
,
epel
,
v4
)
TAP_W8
(
mmx
ext
,
epel
,
v6
)
TAP_W16
(
mmx
ext
,
epel
,
v6
)
TAP_W8
(
mmx
ext
,
bilinear
,
h
)
TAP_W16
(
mmx
ext
,
bilinear
,
h
)
TAP_W8
(
mmx
ext
,
bilinear
,
v
)
TAP_W16
(
mmx
ext
,
bilinear
,
v
)
#endif
TAP_W16
(
sse2
,
epel
,
h6
)
...
...
@@ -178,13 +178,13 @@ static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT
#if ARCH_X86_32
#define HVTAPMMX(x, y) \
HVTAP(mmx
2
, 8, x, y, 4, 8) \
HVTAP(mmx
2
, 8, x, y, 8, 16)
HVTAP(mmx
ext
, 8, x, y, 4, 8) \
HVTAP(mmx
ext
, 8, x, y, 8, 16)
HVTAP
(
mmx
2
,
8
,
6
,
6
,
16
,
16
)
HVTAP
(
mmx
ext
,
8
,
6
,
6
,
16
,
16
)
#else
#define HVTAPMMX(x, y) \
HVTAP(mmx
2
, 8, x, y, 4, 8)
HVTAP(mmx
ext
, 8, x, y, 4, 8)
#endif
HVTAPMMX
(
4
,
4
)
...
...
@@ -219,10 +219,10 @@ static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
dst, dststride, tmp, SIZE, height, mx, my); \
}
HVBILIN
(
mmx
2
,
8
,
4
,
8
)
HVBILIN
(
mmx
ext
,
8
,
4
,
8
)
#if ARCH_X86_32
HVBILIN
(
mmx
2
,
8
,
8
,
16
)
HVBILIN
(
mmx
2
,
8
,
16
,
16
)
HVBILIN
(
mmx
ext
,
8
,
8
,
16
)
HVBILIN
(
mmx
ext
,
8
,
16
,
16
)
#endif
HVBILIN
(
sse2
,
8
,
8
,
16
)
HVBILIN
(
sse2
,
8
,
16
,
16
)
...
...
@@ -284,7 +284,7 @@ extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \
int e, int i, int hvt);
DECLARE_LOOP_FILTER
(
mmx
)
DECLARE_LOOP_FILTER
(
mmx
2
)
DECLARE_LOOP_FILTER
(
mmx
ext
)
DECLARE_LOOP_FILTER
(
sse2
)
DECLARE_LOOP_FILTER
(
ssse3
)
DECLARE_LOOP_FILTER
(
sse4
)
...
...
@@ -352,26 +352,26 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
/* note that 4-tap width=16 functions are missing because w=16
* is only used for luma, and luma is always a copy or sixtap. */
if
(
mm_flags
&
AV_CPU_FLAG_MMXEXT
)
{
VP8_MC_FUNC
(
2
,
4
,
mmx
2
);
VP8_BILINEAR_MC_FUNC
(
2
,
4
,
mmx
2
);
VP8_MC_FUNC
(
2
,
4
,
mmx
ext
);
VP8_BILINEAR_MC_FUNC
(
2
,
4
,
mmx
ext
);
#if ARCH_X86_32
VP8_LUMA_MC_FUNC
(
0
,
16
,
mmx
2
);
VP8_MC_FUNC
(
1
,
8
,
mmx
2
);
VP8_BILINEAR_MC_FUNC
(
0
,
16
,
mmx
2
);
VP8_BILINEAR_MC_FUNC
(
1
,
8
,
mmx2
);
c
->
vp8_v_loop_filter_simple
=
ff_vp8_v_loop_filter_simple_mmx2
;
c
->
vp8_h_loop_filter_simple
=
ff_vp8_h_loop_filter_simple_mmx2
;
c
->
vp8_v_loop_filter16y_inner
=
ff_vp8_v_loop_filter16y_inner_mmx
2
;
c
->
vp8_h_loop_filter16y_inner
=
ff_vp8_h_loop_filter16y_inner_mmx
2
;
c
->
vp8_v_loop_filter8uv_inner
=
ff_vp8_v_loop_filter8uv_inner_mmx
2
;
c
->
vp8_h_loop_filter8uv_inner
=
ff_vp8_h_loop_filter8uv_inner_mmx
2
;
c
->
vp8_v_loop_filter16y
=
ff_vp8_v_loop_filter16y_mbedge_mmx
2
;
c
->
vp8_h_loop_filter16y
=
ff_vp8_h_loop_filter16y_mbedge_mmx
2
;
c
->
vp8_v_loop_filter8uv
=
ff_vp8_v_loop_filter8uv_mbedge_mmx
2
;
c
->
vp8_h_loop_filter8uv
=
ff_vp8_h_loop_filter8uv_mbedge_mmx
2
;
VP8_LUMA_MC_FUNC
(
0
,
16
,
mmx
ext
);
VP8_MC_FUNC
(
1
,
8
,
mmx
ext
);
VP8_BILINEAR_MC_FUNC
(
0
,
16
,
mmx
ext
);
VP8_BILINEAR_MC_FUNC
(
1
,
8
,
mmxext
);
c
->
vp8_v_loop_filter_simple
=
ff_vp8_v_loop_filter_simple_mmxext
;
c
->
vp8_h_loop_filter_simple
=
ff_vp8_h_loop_filter_simple_mmxext
;
c
->
vp8_v_loop_filter16y_inner
=
ff_vp8_v_loop_filter16y_inner_mmx
ext
;
c
->
vp8_h_loop_filter16y_inner
=
ff_vp8_h_loop_filter16y_inner_mmx
ext
;
c
->
vp8_v_loop_filter8uv_inner
=
ff_vp8_v_loop_filter8uv_inner_mmx
ext
;
c
->
vp8_h_loop_filter8uv_inner
=
ff_vp8_h_loop_filter8uv_inner_mmx
ext
;
c
->
vp8_v_loop_filter16y
=
ff_vp8_v_loop_filter16y_mbedge_mmx
ext
;
c
->
vp8_h_loop_filter16y
=
ff_vp8_h_loop_filter16y_mbedge_mmx
ext
;
c
->
vp8_v_loop_filter8uv
=
ff_vp8_v_loop_filter8uv_mbedge_mmx
ext
;
c
->
vp8_h_loop_filter8uv
=
ff_vp8_h_loop_filter8uv_mbedge_mmx
ext
;
#endif
}
...
...
libavutil/x86/x86util.asm
View file @
26301caa
...
...
@@ -555,7 +555,7 @@
%if
mmsize
==
16
pshuflw
%1
,
%2
,
(
%3
)
*
0x55
punpcklqdq
%1
,
%1
%elif
cpuflag
(
mmx
2
)
%elif
cpuflag
(
mmx
ext
)
pshufw
%1
,
%2
,
(
%3
)
*
0x55
%else
%
ifnidn
%1
,
%2
...
...
libswscale/x86/output.asm
View file @
26301caa
...
...
@@ -247,7 +247,7 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
%endmacro
%if
ARCH_X86_32
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
yuv2planeX_fn
8
,
0
,
7
yuv2planeX_fn
9
,
0
,
5
yuv2planeX_fn
10
,
0
,
5
...
...
@@ -388,7 +388,7 @@ INIT_MMX mmx
yuv2plane1_fn
8
,
0
,
5
yuv2plane1_fn
16
,
0
,
3
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
yuv2plane1_fn
9
,
0
,
3
yuv2plane1_fn
10
,
0
,
3
%endif
...
...
libswscale/x86/swscale.c
View file @
26301caa
...
...
@@ -250,7 +250,7 @@ extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filter
VSCALEX_FUNC(10, opt)
#if ARCH_X86_32
VSCALEX_FUNCS
(
mmx
2
);
VSCALEX_FUNCS
(
mmx
ext
);
#endif
VSCALEX_FUNCS
(
sse2
);
VSCALEX_FUNCS
(
sse4
);
...
...
@@ -267,7 +267,7 @@ extern void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst,
VSCALE_FUNC(16, opt1)
#if ARCH_X86_32
VSCALE_FUNCS
(
mmx
,
mmx
2
);
VSCALE_FUNCS
(
mmx
,
mmx
ext
);
#endif
VSCALE_FUNCS
(
sse2
,
sse2
);
VSCALE_FUNC
(
16
,
sse4
);
...
...
@@ -360,7 +360,7 @@ switch(c->dstBpc){ \
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
ASSIGN_MMX_SCALE_FUNC
(
c
->
hyScale
,
c
->
hLumFilterSize
,
mmx
,
mmx
);
ASSIGN_MMX_SCALE_FUNC
(
c
->
hcScale
,
c
->
hChrFilterSize
,
mmx
,
mmx
);
ASSIGN_VSCALE_FUNC
(
c
->
yuv2plane1
,
mmx
,
mmx
2
,
cpu_flags
&
AV_CPU_FLAG_MMXEXT
);
ASSIGN_VSCALE_FUNC
(
c
->
yuv2plane1
,
mmx
,
mmx
ext
,
cpu_flags
&
AV_CPU_FLAG_MMXEXT
);
switch
(
c
->
srcFormat
)
{
case
AV_PIX_FMT_Y400A
:
...
...
@@ -393,7 +393,7 @@ switch(c->dstBpc){ \
}
}
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
ASSIGN_VSCALEX_FUNC
(
c
->
yuv2planeX
,
mmx
2
,
,
1
);
ASSIGN_VSCALEX_FUNC
(
c
->
yuv2planeX
,
mmx
ext
,
,
1
);
}
#endif
/* ARCH_X86_32 */
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment