Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
d8eda370
Commit
d8eda370
authored
Jul 08, 2012
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: mmx2 ---> mmxext in function names
parent
38fdf725
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
170 additions
and
145 deletions
+170
-145
dct-test.c
libavcodec/dct-test.c
+2
-2
dsputil.h
libavcodec/dsputil.h
+1
-1
cavsdsp.c
libavcodec/x86/cavsdsp.c
+15
-14
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+0
-0
dsputil_mmx.h
libavcodec/x86/dsputil_mmx.h
+5
-5
dsputilenc_mmx.c
libavcodec/x86/dsputilenc_mmx.c
+16
-9
fdct.c
libavcodec/x86/fdct.c
+4
-3
h264_qpel.c
libavcodec/x86/h264_qpel.c
+27
-27
h264dsp_init.c
libavcodec/x86/h264dsp_init.c
+6
-7
idct_mmx_xvid.c
libavcodec/x86/idct_mmx_xvid.c
+6
-5
idct_xvid.h
libavcodec/x86/idct_xvid.h
+3
-3
motion_est.c
libavcodec/x86/motion_est.c
+20
-16
mpegvideoenc.c
libavcodec/x86/mpegvideoenc.c
+3
-3
vc1dsp_mmx.c
libavcodec/x86/vc1dsp_mmx.c
+36
-29
gradfun.c
libavfilter/x86/gradfun.c
+4
-2
yadif.c
libavfilter/x86/yadif.c
+2
-2
utils.c
libswscale/utils.c
+11
-10
rgb2rgb.c
libswscale/x86/rgb2rgb.c
+2
-2
swscale.c
libswscale/x86/swscale.c
+2
-2
yuv2rgb.c
libswscale/x86/yuv2rgb.c
+5
-3
No files found.
libavcodec/dct-test.c
View file @
d8eda370
...
...
@@ -83,7 +83,7 @@ static const struct algo fdct_tab[] = {
#if HAVE_MMX_INLINE
{
"MMX"
,
ff_fdct_mmx
,
NO_PERM
,
AV_CPU_FLAG_MMX
},
{
"MMXEXT"
,
ff_fdct_mmx
2
,
NO_PERM
,
AV_CPU_FLAG_MMXEXT
},
{
"MMXEXT"
,
ff_fdct_mmx
ext
,
NO_PERM
,
AV_CPU_FLAG_MMXEXT
},
{
"SSE2"
,
ff_fdct_sse2
,
NO_PERM
,
AV_CPU_FLAG_SSE2
},
#endif
...
...
@@ -107,7 +107,7 @@ static const struct algo idct_tab[] = {
#if HAVE_MMX_INLINE
{
"SIMPLE-MMX"
,
ff_simple_idct_mmx
,
MMX_SIMPLE_PERM
,
AV_CPU_FLAG_MMX
},
{
"XVID-MMX"
,
ff_idct_xvid_mmx
,
NO_PERM
,
AV_CPU_FLAG_MMX
,
1
},
{
"XVID-MMXEXT"
,
ff_idct_xvid_mmx
2
,
NO_PERM
,
AV_CPU_FLAG_MMXEXT
,
1
},
{
"XVID-MMXEXT"
,
ff_idct_xvid_mmx
ext
,
NO_PERM
,
AV_CPU_FLAG_MMXEXT
,
1
},
{
"XVID-SSE2"
,
ff_idct_xvid_sse2
,
SSE2_PERM
,
AV_CPU_FLAG_SSE2
,
1
},
#endif
...
...
libavcodec/dsputil.h
View file @
d8eda370
...
...
@@ -49,7 +49,7 @@ void ff_j_rev_dct (DCTELEM *data);
void
ff_wmv2_idct_c
(
DCTELEM
*
data
);
void
ff_fdct_mmx
(
DCTELEM
*
block
);
void
ff_fdct_mmx
2
(
DCTELEM
*
block
);
void
ff_fdct_mmx
ext
(
DCTELEM
*
block
);
void
ff_fdct_sse2
(
DCTELEM
*
block
);
#define H264_IDCT(depth) \
...
...
libavcodec/x86/cavsdsp.c
View file @
d8eda370
...
...
@@ -438,21 +438,22 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui
#endif
/* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */
#if HAVE_MMXEXT_INLINE
QPEL_CAVS
(
put_
,
PUT_OP
,
mmx2
)
QPEL_CAVS
(
avg_
,
AVG_MMXEXT_OP
,
mmx2
)
QPEL_CAVS
(
put_
,
PUT_OP
,
mmxext
)
QPEL_CAVS
(
avg_
,
AVG_MMXEXT_OP
,
mmxext
)
CAVS_MC
(
put_
,
8
,
mmx2
)
CAVS_MC
(
put_
,
16
,
mmx2
)
CAVS_MC
(
avg_
,
8
,
mmx2
)
CAVS_MC
(
avg_
,
16
,
mmx2
)
CAVS_MC
(
put_
,
8
,
mmxext
)
CAVS_MC
(
put_
,
16
,
mmxext
)
CAVS_MC
(
avg_
,
8
,
mmxext
)
CAVS_MC
(
avg_
,
16
,
mmxext
)
static
void
ff_cavsdsp_init_mmx2
(
CAVSDSPContext
*
c
,
AVCodecContext
*
avctx
)
{
static
void
ff_cavsdsp_init_mmxext
(
CAVSDSPContext
*
c
,
AVCodecContext
*
avctx
)
{
#define dspfunc(PFX, IDX, NUM) \
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx
2
; \
c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx
2
; \
c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx
2
; \
c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx
2
; \
c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx
2
; \
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx
ext
; \
c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx
ext
; \
c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx
ext
; \
c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx
ext
; \
c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx
ext
; \
dspfunc
(
put_cavs_qpel
,
0
,
16
);
dspfunc
(
put_cavs_qpel
,
1
,
8
);
...
...
@@ -475,7 +476,7 @@ CAVS_MC(avg_, 16,3dnow)
static
void
ff_cavsdsp_init_3dnow
(
CAVSDSPContext
*
c
,
AVCodecContext
*
avctx
)
{
#define dspfunc(PFX, IDX, NUM) \
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx
2
; \
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx
ext
; \
c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
...
...
@@ -496,7 +497,7 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
int
mm_flags
=
av_get_cpu_flags
();
#if HAVE_MMXEXT_INLINE
if
(
mm_flags
&
AV_CPU_FLAG_MMXEXT
)
ff_cavsdsp_init_mmx
2
(
c
,
avctx
);
if
(
mm_flags
&
AV_CPU_FLAG_MMXEXT
)
ff_cavsdsp_init_mmx
ext
(
c
,
avctx
);
#endif
/* HAVE_MMXEXT_INLINE */
#if HAVE_AMD3DNOW_INLINE
if
(
mm_flags
&
AV_CPU_FLAG_3DNOW
)
ff_cavsdsp_init_3dnow
(
c
,
avctx
);
...
...
libavcodec/x86/dsputil_mmx.c
View file @
d8eda370
This diff is collapsed.
Click to expand it.
libavcodec/x86/dsputil_mmx.h
View file @
d8eda370
...
...
@@ -89,13 +89,13 @@ void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_s
void
ff_put_pixels_clamped_mmx
(
const
DCTELEM
*
block
,
uint8_t
*
pixels
,
int
line_size
);
void
ff_put_signed_pixels_clamped_mmx
(
const
DCTELEM
*
block
,
uint8_t
*
pixels
,
int
line_size
);
void
ff_put_cavs_qpel8_mc00_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
);
void
ff_avg_cavs_qpel8_mc00_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
);
void
ff_put_cavs_qpel16_mc00_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
);
void
ff_avg_cavs_qpel16_mc00_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
);
void
ff_put_cavs_qpel8_mc00_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
);
void
ff_avg_cavs_qpel8_mc00_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
);
void
ff_put_cavs_qpel16_mc00_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
);
void
ff_avg_cavs_qpel16_mc00_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
);
void
ff_put_vc1_mspel_mc00_mmx
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
stride
,
int
rnd
);
void
ff_avg_vc1_mspel_mc00_mmx
2
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
stride
,
int
rnd
);
void
ff_avg_vc1_mspel_mc00_mmx
ext
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
stride
,
int
rnd
);
void
ff_put_rv40_qpel8_mc33_mmx
(
uint8_t
*
block
,
uint8_t
*
pixels
,
int
line_size
);
void
ff_put_rv40_qpel16_mc33_mmx
(
uint8_t
*
block
,
uint8_t
*
pixels
,
int
line_size
);
...
...
libavcodec/x86/dsputilenc_mmx.c
View file @
d8eda370
...
...
@@ -647,7 +647,9 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
}
#undef SUM
static
int
vsad_intra16_mmx2
(
void
*
v
,
uint8_t
*
pix
,
uint8_t
*
dummy
,
int
line_size
,
int
h
)
{
static
int
vsad_intra16_mmxext
(
void
*
v
,
uint8_t
*
pix
,
uint8_t
*
dummy
,
int
line_size
,
int
h
)
{
int
tmp
;
assert
(
(((
int
)
pix
)
&
7
)
==
0
);
...
...
@@ -765,7 +767,9 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
}
#undef SUM
static
int
vsad16_mmx2
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
static
int
vsad16_mmxext
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
tmp
;
assert
(
(((
int
)
pix1
)
&
7
)
==
0
);
...
...
@@ -844,7 +848,10 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
dst
[
i
+
0
]
=
src1
[
i
+
0
]
-
src2
[
i
+
0
];
}
static
void
sub_hfyu_median_prediction_mmx2
(
uint8_t
*
dst
,
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
int
w
,
int
*
left
,
int
*
left_top
){
static
void
sub_hfyu_median_prediction_mmxext
(
uint8_t
*
dst
,
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
int
w
,
int
*
left
,
int
*
left_top
)
{
x86_reg
i
=
0
;
uint8_t
l
,
lt
;
...
...
@@ -976,7 +983,7 @@ DCT_SAD_FUNC(mmx)
#define HSUM(a,t,dst) HSUM_MMXEXT(a,t,dst)
#define MMABS(a,z) MMABS_MMXEXT(a,z)
DCT_SAD_FUNC
(
mmx
2
)
DCT_SAD_FUNC
(
mmx
ext
)
#undef HSUM
#undef DCT_SAD
...
...
@@ -1115,7 +1122,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
){
c
->
fdct
=
ff_fdct_sse2
;
}
else
if
(
mm_flags
&
AV_CPU_FLAG_MMXEXT
)
{
c
->
fdct
=
ff_fdct_mmx
2
;
c
->
fdct
=
ff_fdct_mmx
ext
;
}
else
{
c
->
fdct
=
ff_fdct_mmx
;
}
...
...
@@ -1148,14 +1155,14 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
ssd_int8_vs_int16
=
ssd_int8_vs_int16_mmx
;
if
(
mm_flags
&
AV_CPU_FLAG_MMXEXT
)
{
c
->
sum_abs_dctelem
=
sum_abs_dctelem_mmx2
;
c
->
vsad
[
4
]
=
vsad_intra16_mmx2
;
c
->
sum_abs_dctelem
=
sum_abs_dctelem_mmxext
;
c
->
vsad
[
4
]
=
vsad_intra16_mmxext
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)){
c
->
vsad
[
0
]
=
vsad16_mmx
2
;
c
->
vsad
[
0
]
=
vsad16_mmx
ext
;
}
c
->
sub_hfyu_median_prediction
=
sub_hfyu_median_prediction_mmx2
;
c
->
sub_hfyu_median_prediction
=
sub_hfyu_median_prediction_mmxext
;
}
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
){
...
...
libavcodec/x86/fdct.c
View file @
d8eda370
...
...
@@ -440,7 +440,8 @@ static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
);
}
static
av_always_inline
void
fdct_row_mmx2
(
const
int16_t
*
in
,
int16_t
*
out
,
const
int16_t
*
table
)
static
av_always_inline
void
fdct_row_mmxext
(
const
int16_t
*
in
,
int16_t
*
out
,
const
int16_t
*
table
)
{
__asm__
volatile
(
"pshufw $0x1B, 8(%0), %%mm5
\n\t
"
...
...
@@ -555,7 +556,7 @@ void ff_fdct_mmx(int16_t *block)
}
}
void
ff_fdct_mmx
2
(
int16_t
*
block
)
void
ff_fdct_mmx
ext
(
int16_t
*
block
)
{
DECLARE_ALIGNED
(
8
,
int64_t
,
align_tmp
)[
16
];
int16_t
*
block1
=
(
int16_t
*
)
align_tmp
;
...
...
@@ -566,7 +567,7 @@ void ff_fdct_mmx2(int16_t *block)
fdct_col_mmx
(
block
,
block1
,
4
);
for
(
i
=
8
;
i
>
0
;
i
--
)
{
fdct_row_mmx
2
(
block1
,
block
,
table
);
fdct_row_mmx
ext
(
block1
,
block
,
table
);
block1
+=
8
;
table
+=
32
;
block
+=
8
;
...
...
libavcodec/x86/h264_qpel.c
View file @
d8eda370
...
...
@@ -1002,36 +1002,36 @@ static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
}\
#define put_pixels8_l2_sse2 put_pixels8_l2_mmx
2
#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmx
2
#define put_pixels16_l2_sse2 put_pixels16_l2_mmx
2
#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmx
2
#define put_pixels8_l2_ssse3 put_pixels8_l2_mmx
2
#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmx
2
#define put_pixels16_l2_ssse3 put_pixels16_l2_mmx
2
#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmx
2
#define put_pixels8_l2_sse2 put_pixels8_l2_mmx
ext
#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmx
ext
#define put_pixels16_l2_sse2 put_pixels16_l2_mmx
ext
#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmx
ext
#define put_pixels8_l2_ssse3 put_pixels8_l2_mmx
ext
#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmx
ext
#define put_pixels16_l2_ssse3 put_pixels16_l2_mmx
ext
#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmx
ext
#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmx
2
#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmx
2
#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmx
2
#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmx
2
#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmx
2
#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmx
2
#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmx
2
#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmx
2
#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmx
ext
#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmx
ext
#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmx
ext
#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmx
ext
#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmx
ext
#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmx
ext
#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmx
ext
#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmx
ext
#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmx
2
#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmx
2
#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmx
2
#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmx
2
#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmx
ext
#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmx
ext
#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmx
ext
#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmx
ext
#define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2
#define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2
#define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2
#define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2
#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmx
2
#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmx
2
#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmx
ext
#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmx
ext
#define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
...
...
@@ -1045,8 +1045,8 @@ static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
static
void
avg_h264_qpel16_mc00_sse2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
){
avg_pixels16_sse2
(
dst
,
src
,
stride
,
16
);
}
#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmx
2
#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmx
2
#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmx
ext
#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmx
ext
#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
...
...
@@ -1168,8 +1168,8 @@ QPEL_H264(put_, PUT_OP, 3dnow)
QPEL_H264
(
avg_
,
AVG_3DNOW_OP
,
3
dnow
)
#undef PAVGB
#define PAVGB "pavgb"
QPEL_H264
(
put_
,
PUT_OP
,
mmx2
)
QPEL_H264
(
avg_
,
AVG_MMXEXT_OP
,
mmx2
)
QPEL_H264
(
put_
,
PUT_OP
,
mmxext
)
QPEL_H264
(
avg_
,
AVG_MMXEXT_OP
,
mmxext
)
QPEL_H264_V_XMM
(
put_
,
PUT_OP
,
sse2
)
QPEL_H264_V_XMM
(
avg_
,
AVG_MMXEXT_OP
,
sse2
)
QPEL_H264_HV_XMM
(
put_
,
PUT_OP
,
sse2
)
...
...
@@ -1185,7 +1185,7 @@ QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
#undef PAVGB
H264_MC_4816
(
3
dnow
)
H264_MC_4816
(
mmx
2
)
H264_MC_4816
(
mmx
ext
)
H264_MC_816
(
H264_MC_V
,
sse2
)
H264_MC_816
(
H264_MC_HV
,
sse2
)
#if HAVE_SSSE3_INLINE
...
...
libavcodec/x86/h264dsp_init.c
View file @
d8eda370
...
...
@@ -130,18 +130,17 @@ LF_FUNCS(uint16_t, 10)
#if ARCH_X86_32
LF_FUNC
(
v8
,
luma
,
8
,
mmx2
)
static
void
ff_deblock_v_luma_8_mmx
2
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
,
int8_t
*
tc0
)
static
void
ff_deblock_v_luma_8_mmx
ext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
,
int8_t
*
tc0
)
{
if
((
tc0
[
0
]
&
tc0
[
1
])
>=
0
)
ff_deblock_v8_luma_8_mmx2
(
pix
+
0
,
stride
,
alpha
,
beta
,
tc0
);
if
((
tc0
[
2
]
&
tc0
[
3
])
>=
0
)
ff_deblock_v8_luma_8_mmx2
(
pix
+
8
,
stride
,
alpha
,
beta
,
tc0
+
2
);
}
LF_IFUNC
(
v8
,
luma_intra
,
8
,
mmx2
)
static
void
ff_deblock_v_luma_intra_8_mmx
2
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
)
static
void
ff_deblock_v_luma_intra_8_mmx
ext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
)
{
ff_deblock_v8_luma_intra_8_mmx2
(
pix
+
0
,
stride
,
alpha
,
beta
);
ff_deblock_v8_luma_intra_8_mmx2
(
pix
+
8
,
stride
,
alpha
,
beta
);
...
...
@@ -246,9 +245,9 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c
->
h264_h_loop_filter_chroma_intra
=
ff_deblock_h_chroma_intra_8_mmx2
;
}
#if ARCH_X86_32
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_8_mmx
2
;
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_8_mmx
ext
;
c
->
h264_h_loop_filter_luma
=
ff_deblock_h_luma_8_mmx2
;
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_8_mmx
2
;
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_8_mmx
ext
;
c
->
h264_h_loop_filter_luma_intra
=
ff_deblock_h_luma_intra_8_mmx2
;
#endif
/* ARCH_X86_32 */
c
->
weight_h264_pixels_tab
[
0
]
=
ff_h264_weight_16_mmx2
;
...
...
libavcodec/x86/idct_mmx_xvid.c
View file @
d8eda370
...
...
@@ -512,7 +512,8 @@ __asm__ volatile(
//-----------------------------------------------------------------------------
void
ff_idct_xvid_mmx2
(
short
*
block
){
void
ff_idct_xvid_mmxext
(
short
*
block
)
{
__asm__
volatile
(
//# Process each row
DCT_8_INV_ROW_XMM
(
0
*
16
(
%
0
),
0
*
16
(
%
0
),
64
*
0
(
%
2
),
8
*
0
(
%
1
))
...
...
@@ -542,15 +543,15 @@ void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block)
ff_add_pixels_clamped_mmx
(
block
,
dest
,
line_size
);
}
void
ff_idct_xvid_mmx
2
_put
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
)
void
ff_idct_xvid_mmx
ext
_put
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
)
{
ff_idct_xvid_mmx
2
(
block
);
ff_idct_xvid_mmx
ext
(
block
);
ff_put_pixels_clamped_mmx
(
block
,
dest
,
line_size
);
}
void
ff_idct_xvid_mmx
2
_add
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
)
void
ff_idct_xvid_mmx
ext
_add
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
)
{
ff_idct_xvid_mmx
2
(
block
);
ff_idct_xvid_mmx
ext
(
block
);
ff_add_pixels_clamped_mmx
(
block
,
dest
,
line_size
);
}
...
...
libavcodec/x86/idct_xvid.h
View file @
d8eda370
...
...
@@ -34,9 +34,9 @@ void ff_idct_xvid_mmx(short *block);
void
ff_idct_xvid_mmx_put
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_idct_xvid_mmx_add
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_idct_xvid_mmx
2
(
short
*
block
);
void
ff_idct_xvid_mmx
2
_put
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_idct_xvid_mmx
2
_add
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_idct_xvid_mmx
ext
(
short
*
block
);
void
ff_idct_xvid_mmx
ext
_put
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_idct_xvid_mmx
ext
_add
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_idct_xvid_sse2
(
short
*
block
);
void
ff_idct_xvid_sse2_put
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
);
...
...
libavcodec/x86/motion_est.c
View file @
d8eda370
...
...
@@ -74,7 +74,8 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
);
}
static
inline
void
sad8_1_mmx2
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
static
inline
void
sad8_1_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
".p2align 4
\n\t
"
...
...
@@ -120,7 +121,8 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
return
ret
;
}
static
inline
void
sad8_x2a_mmx2
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
static
inline
void
sad8_x2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
".p2align 4
\n\t
"
...
...
@@ -142,7 +144,8 @@ static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h
);
}
static
inline
void
sad8_y2a_mmx2
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
static
inline
void
sad8_y2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
"movq (%1), %%mm0
\n\t
"
...
...
@@ -167,7 +170,8 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h
);
}
static
inline
void
sad8_4_mmx2
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
static
inline
void
sad8_4_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
"movq "
MANGLE
(
bone
)
", %%mm5
\n\t
"
...
...
@@ -304,7 +308,7 @@ static inline int sum_mmx(void)
return
ret
&
0xFFFF
;
}
static
inline
int
sum_mmx
2
(
void
)
static
inline
int
sum_mmx
ext
(
void
)
{
int
ret
;
__asm__
volatile
(
...
...
@@ -424,7 +428,7 @@ static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride,
}\
PIX_SAD
(
mmx
)
PIX_SAD
(
mmx
2
)
PIX_SAD
(
mmx
ext
)
#endif
/* HAVE_INLINE_ASM */
...
...
@@ -447,19 +451,19 @@ void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
sad
[
1
]
=
sad8_mmx
;
}
if
(
mm_flags
&
AV_CPU_FLAG_MMXEXT
)
{
c
->
pix_abs
[
0
][
0
]
=
sad16_mmx
2
;
c
->
pix_abs
[
1
][
0
]
=
sad8_mmx
2
;
c
->
pix_abs
[
0
][
0
]
=
sad16_mmx
ext
;
c
->
pix_abs
[
1
][
0
]
=
sad8_mmx
ext
;
c
->
sad
[
0
]
=
sad16_mmx2
;
c
->
sad
[
1
]
=
sad8_mmx2
;
c
->
sad
[
0
]
=
sad16_mmxext
;
c
->
sad
[
1
]
=
sad8_mmxext
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)){
c
->
pix_abs
[
0
][
1
]
=
sad16_x2_mmx
2
;
c
->
pix_abs
[
0
][
2
]
=
sad16_y2_mmx
2
;
c
->
pix_abs
[
0
][
3
]
=
sad16_xy2_mmx
2
;
c
->
pix_abs
[
1
][
1
]
=
sad8_x2_mmx
2
;
c
->
pix_abs
[
1
][
2
]
=
sad8_y2_mmx
2
;
c
->
pix_abs
[
1
][
3
]
=
sad8_xy2_mmx
2
;
c
->
pix_abs
[
0
][
1
]
=
sad16_x2_mmx
ext
;
c
->
pix_abs
[
0
][
2
]
=
sad16_y2_mmx
ext
;
c
->
pix_abs
[
0
][
3
]
=
sad16_xy2_mmx
ext
;
c
->
pix_abs
[
1
][
1
]
=
sad8_x2_mmx
ext
;
c
->
pix_abs
[
1
][
2
]
=
sad8_y2_mmx
ext
;
c
->
pix_abs
[
1
][
3
]
=
sad8_xy2_mmx
ext
;
}
}
if
((
mm_flags
&
AV_CPU_FLAG_SSE2
)
&&
!
(
mm_flags
&
AV_CPU_FLAG_3DNOW
)
&&
avctx
->
codec_id
!=
AV_CODEC_ID_SNOW
)
{
...
...
libavcodec/x86/mpegvideoenc.c
View file @
d8eda370
...
...
@@ -47,8 +47,8 @@ extern uint16_t ff_inv_zigzag_direct16[64];
#define COMPILE_TEMPLATE_SSSE3 0
#undef RENAME
#undef RENAMEl
#define RENAME(a) a ## _MMX
2
#define RENAMEl(a) a ## _mmx
2
#define RENAME(a) a ## _MMX
EXT
#define RENAMEl(a) a ## _mmx
ext
#include "mpegvideoenc_template.c"
#endif
/* HAVE_MMXEXT_INLINE */
...
...
@@ -92,7 +92,7 @@ void ff_MPV_encode_init_x86(MpegEncContext *s)
#endif
#if HAVE_MMXEXT_INLINE
if
(
INLINE_MMXEXT
(
mm_flags
))
s
->
dct_quantize
=
dct_quantize_MMX
2
;
s
->
dct_quantize
=
dct_quantize_MMX
EXT
;
#endif
#if HAVE_SSE2_INLINE
if
(
INLINE_SSE2
(
mm_flags
))
...
...
libavcodec/x86/vc1dsp_mmx.c
View file @
d8eda370
...
...
@@ -467,7 +467,10 @@ VC1_MSPEL_MC(avg_)
static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
}\
static void avg_vc1_mspel_mc ## a ## b ## _mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst, \
const uint8_t *src, \
int stride, int rnd) \
{ \
avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
}
...
...
@@ -490,7 +493,8 @@ DECLARE_FUNCTION(3, 1)
DECLARE_FUNCTION
(
3
,
2
)
DECLARE_FUNCTION
(
3
,
3
)
static
void
vc1_inv_trans_4x4_dc_mmx2
(
uint8_t
*
dest
,
int
linesize
,
DCTELEM
*
block
)
static
void
vc1_inv_trans_4x4_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
DCTELEM
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
17
*
dc
+
4
)
>>
3
;
...
...
@@ -528,7 +532,8 @@ static void vc1_inv_trans_4x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc
);
}
static
void
vc1_inv_trans_4x8_dc_mmx2
(
uint8_t
*
dest
,
int
linesize
,
DCTELEM
*
block
)
static
void
vc1_inv_trans_4x8_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
DCTELEM
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
17
*
dc
+
4
)
>>
3
;
...
...
@@ -589,7 +594,8 @@ static void vc1_inv_trans_4x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc
);
}
static
void
vc1_inv_trans_8x4_dc_mmx2
(
uint8_t
*
dest
,
int
linesize
,
DCTELEM
*
block
)
static
void
vc1_inv_trans_8x4_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
DCTELEM
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
3
*
dc
+
1
)
>>
1
;
...
...
@@ -627,7 +633,8 @@ static void vc1_inv_trans_8x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc
);
}
static
void
vc1_inv_trans_8x8_dc_mmx2
(
uint8_t
*
dest
,
int
linesize
,
DCTELEM
*
block
)
static
void
vc1_inv_trans_8x8_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
DCTELEM
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
3
*
dc
+
1
)
>>
1
;
...
...
@@ -713,29 +720,29 @@ av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
av_cold
void
ff_vc1dsp_init_mmxext
(
VC1DSPContext
*
dsp
)
{
dsp
->
avg_vc1_mspel_pixels_tab
[
0
]
=
ff_avg_vc1_mspel_mc00_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
4
]
=
avg_vc1_mspel_mc01_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
8
]
=
avg_vc1_mspel_mc02_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
12
]
=
avg_vc1_mspel_mc03_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
1
]
=
avg_vc1_mspel_mc10_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
5
]
=
avg_vc1_mspel_mc11_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
9
]
=
avg_vc1_mspel_mc12_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
13
]
=
avg_vc1_mspel_mc13_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
2
]
=
avg_vc1_mspel_mc20_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
6
]
=
avg_vc1_mspel_mc21_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
10
]
=
avg_vc1_mspel_mc22_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
14
]
=
avg_vc1_mspel_mc23_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
3
]
=
avg_vc1_mspel_mc30_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
7
]
=
avg_vc1_mspel_mc31_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
11
]
=
avg_vc1_mspel_mc32_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
15
]
=
avg_vc1_mspel_mc33_mmx
2
;
dsp
->
vc1_inv_trans_8x8_dc
=
vc1_inv_trans_8x8_dc_mmx
2
;
dsp
->
vc1_inv_trans_4x8_dc
=
vc1_inv_trans_4x8_dc_mmx
2
;
dsp
->
vc1_inv_trans_8x4_dc
=
vc1_inv_trans_8x4_dc_mmx
2
;
dsp
->
vc1_inv_trans_4x4_dc
=
vc1_inv_trans_4x4_dc_mmx
2
;
dsp
->
avg_vc1_mspel_pixels_tab
[
0
]
=
ff_avg_vc1_mspel_mc00_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
4
]
=
avg_vc1_mspel_mc01_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
8
]
=
avg_vc1_mspel_mc02_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
12
]
=
avg_vc1_mspel_mc03_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
1
]
=
avg_vc1_mspel_mc10_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
5
]
=
avg_vc1_mspel_mc11_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
9
]
=
avg_vc1_mspel_mc12_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
13
]
=
avg_vc1_mspel_mc13_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
2
]
=
avg_vc1_mspel_mc20_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
6
]
=
avg_vc1_mspel_mc21_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
10
]
=
avg_vc1_mspel_mc22_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
14
]
=
avg_vc1_mspel_mc23_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
3
]
=
avg_vc1_mspel_mc30_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
7
]
=
avg_vc1_mspel_mc31_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
11
]
=
avg_vc1_mspel_mc32_mmx
ext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
15
]
=
avg_vc1_mspel_mc33_mmx
ext
;
dsp
->
vc1_inv_trans_8x8_dc
=
vc1_inv_trans_8x8_dc_mmx
ext
;
dsp
->
vc1_inv_trans_4x8_dc
=
vc1_inv_trans_4x8_dc_mmx
ext
;
dsp
->
vc1_inv_trans_8x4_dc
=
vc1_inv_trans_8x4_dc_mmx
ext
;
dsp
->
vc1_inv_trans_4x4_dc
=
vc1_inv_trans_4x4_dc_mmx
ext
;
}
#endif
/* HAVE_INLINE_ASM */
libavfilter/x86/gradfun.c
View file @
d8eda370
...
...
@@ -30,7 +30,9 @@ DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F
DECLARE_ALIGNED
(
16
,
static
const
uint16_t
,
pw_ff
)[
8
]
=
{
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
};
#if HAVE_MMXEXT_INLINE
static
void
gradfun_filter_line_mmx2
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
width
,
int
thresh
,
const
uint16_t
*
dithers
)
static
void
gradfun_filter_line_mmxext
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
width
,
int
thresh
,
const
uint16_t
*
dithers
)
{
intptr_t
x
;
if
(
width
&
3
)
{
...
...
@@ -175,7 +177,7 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf)
#if HAVE_MMXEXT_INLINE
if
(
cpu_flags
&
AV_CPU_FLAG_MMXEXT
)
gf
->
filter_line
=
gradfun_filter_line_mmx
2
;
gf
->
filter_line
=
gradfun_filter_line_mmx
ext
;
#endif
#if HAVE_SSSE3_INLINE
if
(
cpu_flags
&
AV_CPU_FLAG_SSSE3
)
...
...
libavfilter/x86/yadif.c
View file @
d8eda370
...
...
@@ -49,7 +49,7 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010
#if HAVE_MMXEXT_INLINE
#undef RENAME
#define RENAME(a) a ## _mmx
2
#define RENAME(a) a ## _mmx
ext
#include "yadif_template.c"
#endif
...
...
@@ -61,7 +61,7 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
#if HAVE_MMXEXT_INLINE
if
(
cpu_flags
&
AV_CPU_FLAG_MMXEXT
)
yadif
->
filter_line
=
yadif_filter_line_mmx
2
;
yadif
->
filter_line
=
yadif_filter_line_mmx
ext
;
#endif
#if HAVE_SSE2_INLINE
if
(
cpu_flags
&
AV_CPU_FLAG_SSE2
)
...
...
libswscale/utils.c
View file @
d8eda370
...
...
@@ -600,8 +600,9 @@ fail:
}
#if HAVE_MMXEXT_INLINE
static
int
initMMX2HScaler
(
int
dstW
,
int
xInc
,
uint8_t
*
filterCode
,
int16_t
*
filter
,
int32_t
*
filterPos
,
int
numSplits
)
static
int
init_hscaler_mmxext
(
int
dstW
,
int
xInc
,
uint8_t
*
filterCode
,
int16_t
*
filter
,
int32_t
*
filterPos
,
int
numSplits
)
{
uint8_t
*
fragmentA
;
x86_reg
imm8OfPShufW1A
;
...
...
@@ -1043,10 +1044,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
#if HAVE_MMXEXT_INLINE
// can't downscale !!!
if
(
c
->
canMMXEXTBeUsed
&&
(
flags
&
SWS_FAST_BILINEAR
))
{
c
->
lumMmxextFilterCodeSize
=
init
MMX2HScaler
(
dstW
,
c
->
lumXInc
,
NULL
,
NULL
,
NULL
,
8
);
c
->
chrMmxextFilterCodeSize
=
init
MMX2HScaler
(
c
->
chrDstW
,
c
->
chrXInc
,
NULL
,
NULL
,
NULL
,
4
);
c
->
lumMmxextFilterCodeSize
=
init
_hscaler_mmxext
(
dstW
,
c
->
lumXInc
,
NULL
,
NULL
,
NULL
,
8
);
c
->
chrMmxextFilterCodeSize
=
init
_hscaler_mmxext
(
c
->
chrDstW
,
c
->
chrXInc
,
NULL
,
NULL
,
NULL
,
4
);
#if USE_MMAP
c
->
lumMmxextFilterCode
=
mmap
(
NULL
,
c
->
lumMmxextFilterCodeSize
,
...
...
@@ -1078,10 +1079,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
FF_ALLOCZ_OR_GOTO
(
c
,
c
->
hLumFilterPos
,
(
dstW
/
2
/
8
+
8
)
*
sizeof
(
int32_t
),
fail
);
FF_ALLOCZ_OR_GOTO
(
c
,
c
->
hChrFilterPos
,
(
c
->
chrDstW
/
2
/
4
+
8
)
*
sizeof
(
int32_t
),
fail
);
init
MMX2HScaler
(
dstW
,
c
->
lumXInc
,
c
->
lumMmxextFilterCode
,
c
->
hLumFilter
,
c
->
hLumFilterPos
,
8
);
init
MMX2HScaler
(
c
->
chrDstW
,
c
->
chrXInc
,
c
->
chrMmxextFilterCode
,
c
->
hChrFilter
,
c
->
hChrFilterPos
,
4
);
init
_hscaler_mmxext
(
dstW
,
c
->
lumXInc
,
c
->
lumMmxextFilterCode
,
c
->
hLumFilter
,
c
->
hLumFilterPos
,
8
);
init
_hscaler_mmxext
(
c
->
chrDstW
,
c
->
chrXInc
,
c
->
chrMmxextFilterCode
,
c
->
hChrFilter
,
c
->
hChrFilterPos
,
4
);
#if USE_MMAP
mprotect
(
c
->
lumMmxextFilterCode
,
c
->
lumMmxextFilterCodeSize
,
PROT_EXEC
|
PROT_READ
);
...
...
libswscale/x86/rgb2rgb.c
View file @
d8eda370
...
...
@@ -99,7 +99,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 1
#define RENAME(a) a ## _MMX
2
#define RENAME(a) a ## _MMX
EXT
#include "rgb2rgb_template.c"
//SSE2 versions
...
...
@@ -139,7 +139,7 @@ av_cold void rgb2rgb_init_x86(void)
if
(
INLINE_AMD3DNOW
(
cpu_flags
))
rgb2rgb_init_3DNOW
();
if
(
INLINE_MMXEXT
(
cpu_flags
))
rgb2rgb_init_MMX
2
();
rgb2rgb_init_MMX
EXT
();
if
(
INLINE_SSE2
(
cpu_flags
))
rgb2rgb_init_SSE2
();
#endif
/* HAVE_INLINE_ASM */
...
...
libswscale/x86/swscale.c
View file @
d8eda370
...
...
@@ -83,7 +83,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 1
#define RENAME(a) a ## _MMX
2
#define RENAME(a) a ## _MMX
EXT
#include "swscale_template.c"
#endif
...
...
@@ -311,7 +311,7 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
sws_init_swScale_MMX
(
c
);
#if HAVE_MMXEXT_INLINE
if
(
cpu_flags
&
AV_CPU_FLAG_MMXEXT
)
sws_init_swScale_MMX
2
(
c
);
sws_init_swScale_MMX
EXT
(
c
);
#endif
#endif
/* HAVE_INLINE_ASM */
...
...
libswscale/x86/yuv2rgb.c
View file @
d8eda370
...
...
@@ -63,7 +63,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 1
#define RENAME(a) a ## _MMX
2
#define RENAME(a) a ## _MMX
EXT
#include "yuv2rgb_template.c"
#endif
/* HAVE_MMXEXT_INLINE */
...
...
@@ -81,8 +81,10 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
#if HAVE_MMXEXT_INLINE
if
(
cpu_flags
&
AV_CPU_FLAG_MMXEXT
)
{
switch
(
c
->
dstFormat
)
{
case
AV_PIX_FMT_RGB24
:
return
yuv420_rgb24_MMX2
;
case
AV_PIX_FMT_BGR24
:
return
yuv420_bgr24_MMX2
;
case
AV_PIX_FMT_RGB24
:
return
yuv420_rgb24_MMXEXT
;
case
AV_PIX_FMT_BGR24
:
return
yuv420_bgr24_MMXEXT
;
}
}
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment