Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
e66149e7
Commit
e66149e7
authored
May 24, 2011
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale: force --enable-runtime-cpudetect and remove SWS_CPU_CAPS_*.
parent
75abcdb3
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
171 additions
and
340 deletions
+171
-340
swscale_bfin.c
libswscale/bfin/swscale_bfin.c
+9
-11
colorspace-test.c
libswscale/colorspace-test.c
+1
-28
options.c
libswscale/options.c
+0
-6
yuv2rgb_altivec.c
libswscale/ppc/yuv2rgb_altivec.c
+2
-1
rgb2rgb.c
libswscale/rgb2rgb.c
+3
-4
rgb2rgb.h
libswscale/rgb2rgb.h
+2
-2
rgb2rgb_template.c
libswscale/rgb2rgb_template.c
+0
-19
swscale.c
libswscale/swscale.c
+25
-109
swscale.h
libswscale/swscale.h
+0
-7
swscale_internal.h
libswscale/swscale_internal.h
+0
-5
utils.c
libswscale/utils.c
+54
-66
rgb2rgb.c
libswscale/x86/rgb2rgb.c
+11
-10
swscale_template.c
libswscale/x86/swscale_template.c
+4
-3
yuv2rgb_mmx.c
libswscale/x86/yuv2rgb_mmx.c
+36
-34
yuv2rgb_template.c
libswscale/x86/yuv2rgb_template.c
+13
-18
yuv2rgb.c
libswscale/yuv2rgb.c
+11
-17
No files found.
libswscale/bfin/swscale_bfin.c
View file @
e66149e7
...
...
@@ -79,15 +79,13 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i
void
ff_bfin_get_unscaled_swscale
(
SwsContext
*
c
)
{
SwsFunc
swScale
=
c
->
swScale
;
if
(
c
->
flags
&
SWS_CPU_CAPS_BFIN
)
if
(
c
->
dstFormat
==
PIX_FMT_YUV420P
)
if
(
c
->
srcFormat
==
PIX_FMT_UYVY422
)
{
av_log
(
NULL
,
AV_LOG_VERBOSE
,
"selecting Blackfin optimized uyvytoyv12_unscaled
\n
"
);
c
->
swScale
=
uyvytoyv12_unscaled
;
}
if
(
c
->
dstFormat
==
PIX_FMT_YUV420P
)
if
(
c
->
srcFormat
==
PIX_FMT_YUYV422
)
{
av_log
(
NULL
,
AV_LOG_VERBOSE
,
"selecting Blackfin optimized yuyvtoyv12_unscaled
\n
"
);
c
->
swScale
=
yuyvtoyv12_unscaled
;
}
if
(
c
->
dstFormat
==
PIX_FMT_YUV420P
&&
c
->
srcFormat
==
PIX_FMT_UYVY422
)
{
av_log
(
NULL
,
AV_LOG_VERBOSE
,
"selecting Blackfin optimized uyvytoyv12_unscaled
\n
"
);
c
->
swScale
=
uyvytoyv12_unscaled
;
}
if
(
c
->
dstFormat
==
PIX_FMT_YUV420P
&&
c
->
srcFormat
==
PIX_FMT_YUYV422
)
{
av_log
(
NULL
,
AV_LOG_VERBOSE
,
"selecting Blackfin optimized yuyvtoyv12_unscaled
\n
"
);
c
->
swScale
=
yuyvtoyv12_unscaled
;
}
}
libswscale/colorspace-test.c
View file @
e66149e7
...
...
@@ -33,31 +33,6 @@
#define FUNC(s,d,n) {s,d,#n,n}
static
int
cpu_caps
;
static
char
*
args_parse
(
int
argc
,
char
*
argv
[])
{
int
o
;
while
((
o
=
getopt
(
argc
,
argv
,
"m23"
))
!=
-
1
)
{
switch
(
o
)
{
case
'm'
:
cpu_caps
|=
SWS_CPU_CAPS_MMX
;
break
;
case
'2'
:
cpu_caps
|=
SWS_CPU_CAPS_MMX2
;
break
;
case
'3'
:
cpu_caps
|=
SWS_CPU_CAPS_3DNOW
;
break
;
default:
av_log
(
NULL
,
AV_LOG_ERROR
,
"Unknown option %c
\n
"
,
o
);
}
}
return
argv
[
optind
];
}
int
main
(
int
argc
,
char
**
argv
)
{
int
i
,
funcNum
;
...
...
@@ -70,9 +45,7 @@ int main(int argc, char **argv)
return
-
1
;
av_log
(
NULL
,
AV_LOG_INFO
,
"memory corruption test ...
\n
"
);
args_parse
(
argc
,
argv
);
av_log
(
NULL
,
AV_LOG_INFO
,
"CPU capabilities forced to %x
\n
"
,
cpu_caps
);
sws_rgb2rgb_init
(
cpu_caps
);
sws_rgb2rgb_init
();
for
(
funcNum
=
0
;
;
funcNum
++
)
{
struct
func_info_s
{
...
...
libswscale/options.c
View file @
e66149e7
...
...
@@ -48,12 +48,6 @@ static const AVOption options[] = {
{
"spline"
,
"natural bicubic spline"
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_SPLINE
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
{
"print_info"
,
"print info"
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_PRINT_INFO
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
{
"accurate_rnd"
,
"accurate rounding"
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_ACCURATE_RND
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
{
"mmx"
,
"MMX SIMD acceleration"
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_CPU_CAPS_MMX
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
{
"mmx2"
,
"MMX2 SIMD acceleration"
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_CPU_CAPS_MMX2
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
{
"sse2"
,
"SSE2 SIMD acceleration"
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_CPU_CAPS_SSE2
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
{
"3dnow"
,
"3DNOW SIMD acceleration"
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_CPU_CAPS_3DNOW
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
{
"altivec"
,
"AltiVec SIMD acceleration"
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_CPU_CAPS_ALTIVEC
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
{
"bfin"
,
"Blackfin SIMD acceleration"
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_CPU_CAPS_BFIN
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
{
"full_chroma_int"
,
"full chroma interpolation"
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_FULL_CHR_H_INT
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
{
"full_chroma_inp"
,
"full chroma input"
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_FULL_CHR_H_INP
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
{
"bitexact"
,
""
,
0
,
FF_OPT_TYPE_CONST
,
{.
dbl
=
SWS_BITEXACT
},
INT_MIN
,
INT_MAX
,
VE
,
"sws_flags"
},
...
...
libswscale/ppc/yuv2rgb_altivec.c
View file @
e66149e7
...
...
@@ -94,6 +94,7 @@ adjustment.
#include "libswscale/rgb2rgb.h"
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "libavutil/cpu.h"
#undef PROFILE_THE_BEAST
#undef INC_SCALING
...
...
@@ -692,7 +693,7 @@ static int altivec_uyvy_rgb32 (SwsContext *c,
*/
SwsFunc
ff_yuv2rgb_init_altivec
(
SwsContext
*
c
)
{
if
(
!
(
c
->
flags
&
SWS_CPU_CAPS
_ALTIVEC
))
if
(
!
(
av_get_cpu_flags
()
&
AV_CPU_FLAG
_ALTIVEC
))
return
NULL
;
/*
...
...
libswscale/rgb2rgb.c
View file @
e66149e7
...
...
@@ -116,12 +116,11 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t
32-bit C version, and and&add trick by Michael Niedermayer
*/
void
sws_rgb2rgb_init
(
int
flags
)
void
sws_rgb2rgb_init
(
void
)
{
rgb2rgb_init_c
();
#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
rgb2rgb_init_x86
(
flags
);
#endif
/* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
if
(
HAVE_MMX
)
rgb2rgb_init_x86
();
}
void
rgb32to24
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
long
src_size
)
...
...
libswscale/rgb2rgb.h
View file @
e66149e7
...
...
@@ -156,8 +156,8 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const u
long
width
,
long
height
,
long
lumStride
,
long
chromStride
,
long
srcStride
);
void
sws_rgb2rgb_init
(
int
flags
);
void
sws_rgb2rgb_init
(
void
);
void
rgb2rgb_init_x86
(
int
flags
);
void
rgb2rgb_init_x86
(
void
);
#endif
/* SWSCALE_RGB2RGB_H */
libswscale/rgb2rgb_template.c
View file @
e66149e7
...
...
@@ -278,25 +278,6 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
}
}
/*
* mm0 = 00 B3 00 B2 00 B1 00 B0
* mm1 = 00 G3 00 G2 00 G1 00 G0
* mm2 = 00 R3 00 R2 00 R1 00 R0
* mm6 = FF FF FF FF FF FF FF FF
* mm7 = 00 00 00 00 00 00 00 00
*/
#define PACK_RGB32 \
"packuswb %%mm7, %%mm0 \n\t"
/* 00 00 00 00 B3 B2 B1 B0 */
\
"packuswb %%mm7, %%mm1 \n\t"
/* 00 00 00 00 G3 G2 G1 G0 */
\
"packuswb %%mm7, %%mm2 \n\t"
/* 00 00 00 00 R3 R2 R1 R0 */
\
"punpcklbw %%mm1, %%mm0 \n\t"
/* G3 B3 G2 B2 G1 B1 G0 B0 */
\
"punpcklbw %%mm6, %%mm2 \n\t"
/* FF R3 FF R2 FF R1 FF R0 */
\
"movq %%mm0, %%mm3 \n\t" \
"punpcklwd %%mm2, %%mm0 \n\t"
/* FF R1 G1 B1 FF R0 G0 B0 */
\
"punpckhwd %%mm2, %%mm3 \n\t"
/* FF R3 G3 B3 FF R2 G2 B2 */
\
MOVNTQ" %%mm0, %0 \n\t" \
MOVNTQ" %%mm3, 8%0 \n\t" \
static
inline
void
rgb15to32_c
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
long
src_size
)
{
const
uint16_t
*
end
;
...
...
libswscale/swscale.c
View file @
e66149e7
...
...
@@ -62,6 +62,7 @@ untested special converters
#include "rgb2rgb.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/cpu.h"
#include "libavutil/avutil.h"
#include "libavutil/mathematics.h"
#include "libavutil/bswap.h"
...
...
@@ -70,10 +71,6 @@ untested special converters
#undef MOVNTQ
#undef PAVGB
//#undef HAVE_MMX2
//#define HAVE_AMD3DNOW
//#undef HAVE_MMX
//#undef ARCH_X86
#define DITHER1XBPP
#define isPacked(x) ( \
...
...
@@ -1179,57 +1176,14 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
//Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
//Plain C versions
#if CONFIG_RUNTIME_CPUDETECT
# define COMPILE_C 1
# if ARCH_X86
# define COMPILE_MMX HAVE_MMX
# define COMPILE_MMX2 HAVE_MMX2
# define COMPILE_3DNOW HAVE_AMD3DNOW
# elif ARCH_PPC
# define COMPILE_ALTIVEC HAVE_ALTIVEC
# endif
#else
/* CONFIG_RUNTIME_CPUDETECT */
# if ARCH_X86
# if HAVE_MMX2
# define COMPILE_MMX2 1
# elif HAVE_AMD3DNOW
# define COMPILE_3DNOW 1
# elif HAVE_MMX
# define COMPILE_MMX 1
# else
# define COMPILE_C 1
# endif
# elif ARCH_PPC && HAVE_ALTIVEC
# define COMPILE_ALTIVEC 1
# else
# define COMPILE_C 1
# endif
#endif
#ifndef COMPILE_C
# define COMPILE_C 0
#endif
#ifndef COMPILE_MMX
# define COMPILE_MMX 0
#endif
#ifndef COMPILE_MMX2
# define COMPILE_MMX2 0
#endif
#ifndef COMPILE_3DNOW
# define COMPILE_3DNOW 0
#endif
#ifndef COMPILE_ALTIVEC
# define COMPILE_ALTIVEC 0
#endif
#define COMPILE_TEMPLATE_MMX 0
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define COMPILE_TEMPLATE_ALTIVEC 0
#include "swscale_template.c"
#if
COMPIL
E_ALTIVEC
#if
HAV
E_ALTIVEC
#undef RENAME
#undef COMPILE_TEMPLATE_ALTIVEC
#define COMPILE_TEMPLATE_ALTIVEC 1
...
...
@@ -1237,15 +1191,11 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
#include "ppc/swscale_template.c"
#endif
#if ARCH_X86
//MMX versions
#if
COMPIL
E_MMX
#if
HAV
E_MMX
#undef RENAME
#undef COMPILE_TEMPLATE_MMX
#undef COMPILE_TEMPLATE_MMX2
#undef COMPILE_TEMPLATE_AMD3DNOW
#define COMPILE_TEMPLATE_MMX 1
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define RENAME(a) a ## _MMX
...
...
@@ -1253,12 +1203,10 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
#endif
//MMX2 versions
#if
COMPIL
E_MMX2
#if
HAV
E_MMX2
#undef RENAME
#undef COMPILE_TEMPLATE_MMX
#undef COMPILE_TEMPLATE_MMX2
#undef COMPILE_TEMPLATE_AMD3DNOW
#define COMPILE_TEMPLATE_MMX 1
#define COMPILE_TEMPLATE_MMX2 1
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define RENAME(a) a ## _MMX2
...
...
@@ -1266,61 +1214,47 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
#endif
//3DNOW versions
#if
COMPILE_
3DNOW
#if
HAVE_AMD
3DNOW
#undef RENAME
#undef COMPILE_TEMPLATE_MMX
#undef COMPILE_TEMPLATE_MMX2
#undef COMPILE_TEMPLATE_AMD3DNOW
#define COMPILE_TEMPLATE_MMX 1
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_AMD3DNOW 1
#define RENAME(a) a ## _3DNow
#include "x86/swscale_template.c"
#endif
#endif //ARCH_X86
SwsFunc
ff_getSwsFunc
(
SwsContext
*
c
)
{
int
cpu_flags
=
av_get_cpu_flags
();
sws_init_swScale_c
(
c
);
#if CONFIG_RUNTIME_CPUDETECT
#if ARCH_X86
#if HAVE_MMX2
// ordered per speed fastest first
if
(
c
->
flags
&
SWS_CPU_CAPS
_MMX2
)
{
if
(
c
pu_flags
&
AV_CPU_FLAG
_MMX2
)
{
sws_init_swScale_MMX2
(
c
);
return
swScale_MMX2
;
}
else
if
(
c
->
flags
&
SWS_CPU_CAPS_3DNOW
)
{
}
else
#endif
#if HAVE_AMD3DNOW
if
(
cpu_flags
&
AV_CPU_FLAG_3DNOW
)
{
sws_init_swScale_3DNow
(
c
);
return
swScale_3DNow
;
}
else
if
(
c
->
flags
&
SWS_CPU_CAPS_MMX
)
{
}
else
#endif
#if HAVE_MMX
if
(
cpu_flags
&
AV_CPU_FLAG_MMX
)
{
sws_init_swScale_MMX
(
c
);
return
swScale_MMX
;
}
#else
#if COMPILE_ALTIVEC
if
(
c
->
flags
&
SWS_CPU_CAPS_ALTIVEC
)
{
}
else
#endif
#if HAVE_ALTIVEC
if
(
cpu_flags
&
AV_CPU_FLAG_ALTIVEC
)
{
sws_init_swScale_altivec
(
c
);
return
swScale_altivec
;
}
#endif
#endif
/* ARCH_X86 */
#else //CONFIG_RUNTIME_CPUDETECT
#if COMPILE_TEMPLATE_MMX2
sws_init_swScale_MMX2
(
c
);
return
swScale_MMX2
;
#elif COMPILE_TEMPLATE_AMD3DNOW
sws_init_swScale_3DNow
(
c
);
return
swScale_3DNow
;
#elif COMPILE_TEMPLATE_MMX
sws_init_swScale_MMX
(
c
);
return
swScale_MMX
;
#elif COMPILE_TEMPLATE_ALTIVEC
sws_init_swScale_altivec
(
c
);
return
swScale_altivec
;
}
else
#endif
#endif //!CONFIG_RUNTIME_CPUDETECT
return
swScale_c
;
}
...
...
@@ -1864,23 +1798,6 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[
return
srcSliceH
;
}
int
ff_hardcodedcpuflags
(
void
)
{
int
flags
=
0
;
#if COMPILE_TEMPLATE_MMX2
flags
|=
SWS_CPU_CAPS_MMX
|
SWS_CPU_CAPS_MMX2
;
#elif COMPILE_TEMPLATE_AMD3DNOW
flags
|=
SWS_CPU_CAPS_MMX
|
SWS_CPU_CAPS_3DNOW
;
#elif COMPILE_TEMPLATE_MMX
flags
|=
SWS_CPU_CAPS_MMX
;
#elif COMPILE_TEMPLATE_ALTIVEC
flags
|=
SWS_CPU_CAPS_ALTIVEC
;
#elif ARCH_BFIN
flags
|=
SWS_CPU_CAPS_BFIN
;
#endif
return
flags
;
}
void
ff_get_unscaled_swscale
(
SwsContext
*
c
)
{
const
enum
PixelFormat
srcFormat
=
c
->
srcFormat
;
...
...
@@ -1964,8 +1881,8 @@ void ff_get_unscaled_swscale(SwsContext *c)
if
(
srcFormat
==
PIX_FMT_UYVY422
&&
dstFormat
==
PIX_FMT_YUV422P
)
c
->
swScale
=
uyvyToYuv422Wrapper
;
#if
COMPIL
E_ALTIVEC
if
((
c
->
flags
&
SWS_CPU_CAPS
_ALTIVEC
)
&&
#if
HAV
E_ALTIVEC
if
((
av_get_cpu_flags
()
&
AV_CPU_FLAG
_ALTIVEC
)
&&
!
(
c
->
flags
&
SWS_BITEXACT
)
&&
srcFormat
==
PIX_FMT_YUV420P
)
{
// unscaled YV12 -> packed YUV, we want speed
...
...
@@ -1995,8 +1912,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
c
->
swScale
=
planarCopyWrapper
;
}
#if ARCH_BFIN
if
(
flags
&
SWS_CPU_CAPS_BFIN
)
ff_bfin_get_unscaled_swscale
(
c
);
ff_bfin_get_unscaled_swscale
(
c
);
#endif
}
...
...
libswscale/swscale.h
View file @
e66149e7
...
...
@@ -95,13 +95,6 @@ const char *swscale_license(void);
#define SWS_ACCURATE_RND 0x40000
#define SWS_BITEXACT 0x80000
#define SWS_CPU_CAPS_MMX 0x80000000
#define SWS_CPU_CAPS_MMX2 0x20000000
#define SWS_CPU_CAPS_3DNOW 0x40000000
#define SWS_CPU_CAPS_ALTIVEC 0x10000000
#define SWS_CPU_CAPS_BFIN 0x01000000
#define SWS_CPU_CAPS_SSE2 0x02000000
#define SWS_MAX_REDUCE_CUTOFF 0.002
#define SWS_CS_ITU709 1
...
...
libswscale/swscale_internal.h
View file @
e66149e7
...
...
@@ -475,11 +475,6 @@ extern const AVClass sws_context_class;
*/
void
ff_get_unscaled_swscale
(
SwsContext
*
c
);
/**
* Returns the SWS_CPU_CAPS for the optimized code compiled into swscale.
*/
int
ff_hardcodedcpuflags
(
void
);
/**
* Returns function pointer to fastest main scaler path function depending
* on architecture and available optimizations.
...
...
libswscale/utils.c
View file @
e66149e7
...
...
@@ -39,6 +39,7 @@
#include "rgb2rgb.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/cpu.h"
#include "libavutil/avutil.h"
#include "libavutil/bswap.h"
#include "libavutil/opt.h"
...
...
@@ -180,7 +181,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist
}
static
int
initFilter
(
int16_t
**
outFilter
,
int16_t
**
filterPos
,
int
*
outFilterSize
,
int
xInc
,
int
srcW
,
int
dstW
,
int
filterAlign
,
int
one
,
int
flags
,
int
srcW
,
int
dstW
,
int
filterAlign
,
int
one
,
int
flags
,
int
cpu_flags
,
SwsVector
*
srcFilter
,
SwsVector
*
dstFilter
,
double
param
[
2
])
{
int
i
;
...
...
@@ -191,10 +192,9 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
int64_t
*
filter2
=
NULL
;
const
int64_t
fone
=
1LL
<<
54
;
int
ret
=
-
1
;
#if ARCH_X86
if
(
flags
&
SWS_CPU_CAPS
_MMX
)
if
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG
_MMX
)
__asm__
volatile
(
"emms
\n\t
"
:::
"memory"
);
//FIXME this should not be required but it IS (even for non-MMX versions)
#endif
// NOTE: the +1 is for the MMX scaler which reads over the end
FF_ALLOC_OR_GOTO
(
NULL
,
*
filterPos
,
(
dstW
+
1
)
*
sizeof
(
int16_t
),
fail
);
...
...
@@ -411,7 +411,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
if
(
min
>
minFilterSize
)
minFilterSize
=
min
;
}
if
(
flags
&
SWS_CPU_CAPS
_ALTIVEC
)
{
if
(
HAVE_ALTIVEC
&&
cpu_flags
&
AV_CPU_FLAG
_ALTIVEC
)
{
// we can handle the special case 4,
// so we don't want to go to the full 8
if
(
minFilterSize
<
5
)
...
...
@@ -426,7 +426,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
filterAlign
=
1
;
}
if
(
flags
&
SWS_CPU_CAPS
_MMX
)
{
if
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG
_MMX
)
{
// special case for unscaled vertical filtering
if
(
minFilterSize
==
1
&&
filterAlign
==
2
)
filterAlign
=
1
;
...
...
@@ -516,7 +516,7 @@ fail:
return
ret
;
}
#if
ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
#if
HAVE_MMX2
static
int
initMMX2HScaler
(
int
dstW
,
int
xInc
,
uint8_t
*
filterCode
,
int16_t
*
filter
,
int32_t
*
filterPos
,
int
numSplits
)
{
uint8_t
*
fragmentA
;
...
...
@@ -674,7 +674,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil
return
fragmentPos
+
1
;
}
#endif
/*
ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
*/
#endif
/*
HAVE_MMX2
*/
static
void
getSubSampleFactors
(
int
*
h
,
int
*
v
,
enum
PixelFormat
format
)
{
...
...
@@ -682,8 +682,6 @@ static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
*
v
=
av_pix_fmt_descriptors
[
format
].
log2_chroma_h
;
}
static
int
update_flags_cpu
(
int
flags
);
int
sws_setColorspaceDetails
(
SwsContext
*
c
,
const
int
inv_table
[
4
],
int
srcRange
,
const
int
table
[
4
],
int
dstRange
,
int
brightness
,
int
contrast
,
int
saturation
)
{
memcpy
(
c
->
srcColorspaceTable
,
inv_table
,
sizeof
(
int
)
*
4
);
...
...
@@ -698,15 +696,12 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange
c
->
dstFormatBpp
=
av_get_bits_per_pixel
(
&
av_pix_fmt_descriptors
[
c
->
dstFormat
]);
c
->
srcFormatBpp
=
av_get_bits_per_pixel
(
&
av_pix_fmt_descriptors
[
c
->
srcFormat
]);
c
->
flags
=
update_flags_cpu
(
c
->
flags
);
ff_yuv2rgb_c_init_tables
(
c
,
inv_table
,
srcRange
,
brightness
,
contrast
,
saturation
);
//FIXME factorize
#if HAVE_ALTIVEC
if
(
c
->
flags
&
SWS_CPU_CAPS_ALTIVEC
)
if
(
HAVE_ALTIVEC
&&
av_get_cpu_flags
()
&
AV_CPU_FLAG_ALTIVEC
)
ff_yuv2rgb_init_tables_altivec
(
c
,
inv_table
,
brightness
,
contrast
,
saturation
);
#endif
return
0
;
}
...
...
@@ -736,20 +731,6 @@ static int handle_jpeg(enum PixelFormat *format)
}
}
static
int
update_flags_cpu
(
int
flags
)
{
#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
flags
&=
~
(
SWS_CPU_CAPS_MMX
|
SWS_CPU_CAPS_MMX2
|
SWS_CPU_CAPS_3DNOW
|
SWS_CPU_CAPS_SSE2
|
SWS_CPU_CAPS_ALTIVEC
|
SWS_CPU_CAPS_BFIN
);
flags
|=
ff_hardcodedcpuflags
();
#endif
/* CONFIG_RUNTIME_CPUDETECT */
return
flags
;
}
SwsContext
*
sws_alloc_context
(
void
)
{
SwsContext
*
c
=
av_mallocz
(
sizeof
(
SwsContext
));
...
...
@@ -770,16 +751,15 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
int
srcH
=
c
->
srcH
;
int
dstW
=
c
->
dstW
;
int
dstH
=
c
->
dstH
;
int
flags
;
int
flags
,
cpu_flags
;
enum
PixelFormat
srcFormat
=
c
->
srcFormat
;
enum
PixelFormat
dstFormat
=
c
->
dstFormat
;
flags
=
c
->
flags
=
update_flags_cpu
(
c
->
flags
);
#if ARCH_X86
if
(
flags
&
SWS_CPU_CAPS
_MMX
)
cpu_flags
=
av_get_cpu_flags
(
);
flags
=
c
->
flags
;
if
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG
_MMX
)
__asm__
volatile
(
"emms
\n\t
"
:::
"memory"
);
#endif
if
(
!
rgb15to16
)
sws_rgb2rgb_init
(
flags
);
if
(
!
rgb15to16
)
sws_rgb2rgb_init
();
unscaled
=
(
srcW
==
dstW
&&
srcH
==
dstH
);
...
...
@@ -872,7 +852,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
}
}
if
(
flags
&
SWS_CPU_CAPS
_MMX2
)
{
if
(
HAVE_MMX2
&&
cpu_flags
&
AV_CPU_FLAG
_MMX2
)
{
c
->
canMMX2BeUsed
=
(
dstW
>=
srcW
&&
(
dstW
&
31
)
==
0
&&
(
srcW
&
15
)
==
0
)
?
1
:
0
;
if
(
!
c
->
canMMX2BeUsed
&&
dstW
>=
srcW
&&
(
srcW
&
15
)
==
0
&&
(
flags
&
SWS_FAST_BILINEAR
))
{
if
(
flags
&
SWS_PRINT_INFO
)
...
...
@@ -898,7 +878,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
c
->
chrXInc
+=
20
;
}
//we don't use the x86 asm scaler if MMX is available
else
if
(
flags
&
SWS_CPU_CAPS
_MMX
)
{
else
if
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG
_MMX
)
{
c
->
lumXInc
=
((
srcW
-
2
)
<<
16
)
/
(
dstW
-
2
)
-
20
;
c
->
chrXInc
=
((
c
->
chrSrcW
-
2
)
<<
16
)
/
(
c
->
chrDstW
-
2
)
-
20
;
}
...
...
@@ -906,7 +886,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
/* precalculate horizontal scaler filter coefficients */
{
#if
ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
#if
HAVE_MMX2
// can't downscale !!!
if
(
c
->
canMMX2BeUsed
&&
(
flags
&
SWS_FAST_BILINEAR
))
{
c
->
lumMmx2FilterCodeSize
=
initMMX2HScaler
(
dstW
,
c
->
lumXInc
,
NULL
,
NULL
,
NULL
,
8
);
...
...
@@ -938,21 +918,21 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
mprotect
(
c
->
chrMmx2FilterCode
,
c
->
chrMmx2FilterCodeSize
,
PROT_EXEC
|
PROT_READ
);
#endif
}
else
#endif
/*
ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
*/
#endif
/*
HAVE_MMX2
*/
{
const
int
filterAlign
=
(
flags
&
SWS_CPU_CAPS
_MMX
)
?
4
:
(
flags
&
SWS_CPU_CAPS
_ALTIVEC
)
?
8
:
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG
_MMX
)
?
4
:
(
HAVE_ALTIVEC
&&
cpu_flags
&
AV_CPU_FLAG
_ALTIVEC
)
?
8
:
1
;
if
(
initFilter
(
&
c
->
hLumFilter
,
&
c
->
hLumFilterPos
,
&
c
->
hLumFilterSize
,
c
->
lumXInc
,
srcW
,
dstW
,
filterAlign
,
1
<<
14
,
(
flags
&
SWS_BICUBLIN
)
?
(
flags
|
SWS_BICUBIC
)
:
flags
,
(
flags
&
SWS_BICUBLIN
)
?
(
flags
|
SWS_BICUBIC
)
:
flags
,
cpu_flags
,
srcFilter
->
lumH
,
dstFilter
->
lumH
,
c
->
param
)
<
0
)
goto
fail
;
if
(
initFilter
(
&
c
->
hChrFilter
,
&
c
->
hChrFilterPos
,
&
c
->
hChrFilterSize
,
c
->
chrXInc
,
c
->
chrSrcW
,
c
->
chrDstW
,
filterAlign
,
1
<<
14
,
(
flags
&
SWS_BICUBLIN
)
?
(
flags
|
SWS_BILINEAR
)
:
flags
,
(
flags
&
SWS_BICUBLIN
)
?
(
flags
|
SWS_BILINEAR
)
:
flags
,
cpu_flags
,
srcFilter
->
chrH
,
dstFilter
->
chrH
,
c
->
param
)
<
0
)
goto
fail
;
}
...
...
@@ -961,18 +941,18 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
/* precalculate vertical scaler filter coefficients */
{
const
int
filterAlign
=
(
flags
&
SWS_CPU_CAPS
_MMX
)
&&
(
flags
&
SWS_ACCURATE_RND
)
?
2
:
(
flags
&
SWS_CPU_CAPS
_ALTIVEC
)
?
8
:
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG
_MMX
)
&&
(
flags
&
SWS_ACCURATE_RND
)
?
2
:
(
HAVE_ALTIVEC
&&
cpu_flags
&
AV_CPU_FLAG
_ALTIVEC
)
?
8
:
1
;
if
(
initFilter
(
&
c
->
vLumFilter
,
&
c
->
vLumFilterPos
,
&
c
->
vLumFilterSize
,
c
->
lumYInc
,
srcH
,
dstH
,
filterAlign
,
(
1
<<
12
),
(
flags
&
SWS_BICUBLIN
)
?
(
flags
|
SWS_BICUBIC
)
:
flags
,
(
flags
&
SWS_BICUBLIN
)
?
(
flags
|
SWS_BICUBIC
)
:
flags
,
cpu_flags
,
srcFilter
->
lumV
,
dstFilter
->
lumV
,
c
->
param
)
<
0
)
goto
fail
;
if
(
initFilter
(
&
c
->
vChrFilter
,
&
c
->
vChrFilterPos
,
&
c
->
vChrFilterSize
,
c
->
chrYInc
,
c
->
chrSrcH
,
c
->
chrDstH
,
filterAlign
,
(
1
<<
12
),
(
flags
&
SWS_BICUBLIN
)
?
(
flags
|
SWS_BILINEAR
)
:
flags
,
(
flags
&
SWS_BICUBLIN
)
?
(
flags
|
SWS_BILINEAR
)
:
flags
,
cpu_flags
,
srcFilter
->
chrV
,
dstFilter
->
chrV
,
c
->
param
)
<
0
)
goto
fail
;
...
...
@@ -1066,13 +1046,13 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
#endif
sws_format_name
(
dstFormat
));
if
(
flags
&
SWS_CPU_CAPS
_MMX2
)
av_log
(
c
,
AV_LOG_INFO
,
"using MMX2
\n
"
);
else
if
(
flags
&
SWS_CPU_CAPS
_3DNOW
)
av_log
(
c
,
AV_LOG_INFO
,
"using 3DNOW
\n
"
);
else
if
(
flags
&
SWS_CPU_CAPS
_MMX
)
av_log
(
c
,
AV_LOG_INFO
,
"using MMX
\n
"
);
else
if
(
flags
&
SWS_CPU_CAPS
_ALTIVEC
)
av_log
(
c
,
AV_LOG_INFO
,
"using AltiVec
\n
"
);
if
(
HAVE_MMX2
&&
cpu_flags
&
AV_CPU_FLAG
_MMX2
)
av_log
(
c
,
AV_LOG_INFO
,
"using MMX2
\n
"
);
else
if
(
HAVE_AMD3DNOW
&&
cpu_flags
&
AV_CPU_FLAG
_3DNOW
)
av_log
(
c
,
AV_LOG_INFO
,
"using 3DNOW
\n
"
);
else
if
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG
_MMX
)
av_log
(
c
,
AV_LOG_INFO
,
"using MMX
\n
"
);
else
if
(
HAVE_ALTIVEC
&&
cpu_flags
&
AV_CPU_FLAG
_ALTIVEC
)
av_log
(
c
,
AV_LOG_INFO
,
"using AltiVec
\n
"
);
else
av_log
(
c
,
AV_LOG_INFO
,
"using C
\n
"
);
if
(
flags
&
SWS_CPU_CAPS
_MMX
)
{
if
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG
_MMX
)
{
if
(
c
->
canMMX2BeUsed
&&
(
flags
&
SWS_FAST_BILINEAR
))
av_log
(
c
,
AV_LOG_VERBOSE
,
"using FAST_BILINEAR MMX2 scaler for horizontal scaling
\n
"
);
else
{
...
...
@@ -1091,7 +1071,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
av_log
(
c
,
AV_LOG_VERBOSE
,
"using n-tap MMX scaler for horizontal chrominance scaling
\n
"
);
}
}
else
{
#if
ARCH_X86
#if
HAVE_MMX
av_log
(
c
,
AV_LOG_VERBOSE
,
"using x86 asm scaler for horizontal scaling
\n
"
);
#else
if
(
flags
&
SWS_FAST_BILINEAR
)
...
...
@@ -1102,31 +1082,41 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
}
if
(
isPlanarYUV
(
dstFormat
))
{
if
(
c
->
vLumFilterSize
==
1
)
av_log
(
c
,
AV_LOG_VERBOSE
,
"using 1-tap %s
\"
scaler
\"
for vertical scaling (YV12 like)
\n
"
,
(
flags
&
SWS_CPU_CAPS_MMX
)
?
"MMX"
:
"C"
);
av_log
(
c
,
AV_LOG_VERBOSE
,
"using 1-tap %s
\"
scaler
\"
for vertical scaling (YV12 like)
\n
"
,
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
?
"MMX"
:
"C"
);
else
av_log
(
c
,
AV_LOG_VERBOSE
,
"using n-tap %s scaler for vertical scaling (YV12 like)
\n
"
,
(
flags
&
SWS_CPU_CAPS_MMX
)
?
"MMX"
:
"C"
);
av_log
(
c
,
AV_LOG_VERBOSE
,
"using n-tap %s scaler for vertical scaling (YV12 like)
\n
"
,
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
?
"MMX"
:
"C"
);
}
else
{
if
(
c
->
vLumFilterSize
==
1
&&
c
->
vChrFilterSize
==
2
)
av_log
(
c
,
AV_LOG_VERBOSE
,
"using 1-tap %s
\"
scaler
\"
for vertical luminance scaling (BGR)
\n
"
" 2-tap scaler for vertical chrominance scaling (BGR)
\n
"
,
(
flags
&
SWS_CPU_CAPS_MMX
)
?
"MMX"
:
"C"
);
" 2-tap scaler for vertical chrominance scaling (BGR)
\n
"
,
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
?
"MMX"
:
"C"
);
else
if
(
c
->
vLumFilterSize
==
2
&&
c
->
vChrFilterSize
==
2
)
av_log
(
c
,
AV_LOG_VERBOSE
,
"using 2-tap linear %s scaler for vertical scaling (BGR)
\n
"
,
(
flags
&
SWS_CPU_CAPS_MMX
)
?
"MMX"
:
"C"
);
av_log
(
c
,
AV_LOG_VERBOSE
,
"using 2-tap linear %s scaler for vertical scaling (BGR)
\n
"
,
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
?
"MMX"
:
"C"
);
else
av_log
(
c
,
AV_LOG_VERBOSE
,
"using n-tap %s scaler for vertical scaling (BGR)
\n
"
,
(
flags
&
SWS_CPU_CAPS_MMX
)
?
"MMX"
:
"C"
);
av_log
(
c
,
AV_LOG_VERBOSE
,
"using n-tap %s scaler for vertical scaling (BGR)
\n
"
,
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
?
"MMX"
:
"C"
);
}
if
(
dstFormat
==
PIX_FMT_BGR24
)
av_log
(
c
,
AV_LOG_VERBOSE
,
"using %s YV12->BGR24 converter
\n
"
,
(
flags
&
SWS_CPU_CAPS_MMX2
)
?
"MMX2"
:
((
flags
&
SWS_CPU_CAPS_MMX
)
?
"MMX"
:
"C"
));
(
HAVE_MMX2
&&
cpu_flags
&
AV_CPU_FLAG_MMX2
)
?
"MMX2"
:
((
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
?
"MMX"
:
"C"
));
else
if
(
dstFormat
==
PIX_FMT_RGB32
)
av_log
(
c
,
AV_LOG_VERBOSE
,
"using %s YV12->BGR32 converter
\n
"
,
(
flags
&
SWS_CPU_CAPS_MMX
)
?
"MMX"
:
"C"
);
av_log
(
c
,
AV_LOG_VERBOSE
,
"using %s YV12->BGR32 converter
\n
"
,
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
?
"MMX"
:
"C"
);
else
if
(
dstFormat
==
PIX_FMT_BGR565
)
av_log
(
c
,
AV_LOG_VERBOSE
,
"using %s YV12->BGR16 converter
\n
"
,
(
flags
&
SWS_CPU_CAPS_MMX
)
?
"MMX"
:
"C"
);
av_log
(
c
,
AV_LOG_VERBOSE
,
"using %s YV12->BGR16 converter
\n
"
,
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
?
"MMX"
:
"C"
);
else
if
(
dstFormat
==
PIX_FMT_BGR555
)
av_log
(
c
,
AV_LOG_VERBOSE
,
"using %s YV12->BGR15 converter
\n
"
,
(
flags
&
SWS_CPU_CAPS_MMX
)
?
"MMX"
:
"C"
);
av_log
(
c
,
AV_LOG_VERBOSE
,
"using %s YV12->BGR15 converter
\n
"
,
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
?
"MMX"
:
"C"
);
else
if
(
dstFormat
==
PIX_FMT_RGB444BE
||
dstFormat
==
PIX_FMT_RGB444LE
||
dstFormat
==
PIX_FMT_BGR444BE
||
dstFormat
==
PIX_FMT_BGR444LE
)
av_log
(
c
,
AV_LOG_VERBOSE
,
"using %s YV12->BGR12 converter
\n
"
,
(
flags
&
SWS_CPU_CAPS_MMX
)
?
"MMX"
:
"C"
);
av_log
(
c
,
AV_LOG_VERBOSE
,
"using %s YV12->BGR12 converter
\n
"
,
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
?
"MMX"
:
"C"
);
av_log
(
c
,
AV_LOG_VERBOSE
,
"%dx%d -> %dx%d
\n
"
,
srcW
,
srcH
,
dstW
,
dstH
);
av_log
(
c
,
AV_LOG_DEBUG
,
"lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d
\n
"
,
...
...
@@ -1504,7 +1494,7 @@ void sws_freeContext(SwsContext *c)
av_freep
(
&
c
->
hLumFilterPos
);
av_freep
(
&
c
->
hChrFilterPos
);
#if
ARCH_X86
#if
HAVE_MMX
#ifdef MAP_ANONYMOUS
if
(
c
->
lumMmx2FilterCode
)
munmap
(
c
->
lumMmx2FilterCode
,
c
->
lumMmx2FilterCodeSize
);
if
(
c
->
chrMmx2FilterCode
)
munmap
(
c
->
chrMmx2FilterCode
,
c
->
chrMmx2FilterCodeSize
);
...
...
@@ -1517,7 +1507,7 @@ void sws_freeContext(SwsContext *c)
#endif
c
->
lumMmx2FilterCode
=
NULL
;
c
->
chrMmx2FilterCode
=
NULL
;
#endif
/*
ARCH_X86
*/
#endif
/*
HAVE_MMX
*/
av_freep
(
&
c
->
yuvTable
);
...
...
@@ -1534,8 +1524,6 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context,
if
(
!
param
)
param
=
default_param
;
flags
=
update_flags_cpu
(
flags
);
if
(
context
&&
(
context
->
srcW
!=
srcW
||
context
->
srcH
!=
srcH
||
...
...
libswscale/x86/rgb2rgb.c
View file @
e66149e7
...
...
@@ -27,6 +27,7 @@
#include "config.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/cpu.h"
#include "libavutil/bswap.h"
#include "libswscale/rgb2rgb.h"
#include "libswscale/swscale.h"
...
...
@@ -122,16 +123,16 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
32-bit C version, and and&add trick by Michael Niedermayer
*/
void
rgb2rgb_init_x86
(
int
flags
)
void
rgb2rgb_init_x86
(
void
)
{
#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
if
(
flags
&
SWS_CPU_CAPS_SSE2
)
rgb2rgb_init_SSE2
();
else
if
(
flags
&
SWS_CPU_CAPS_MMX2
)
rgb2rgb_init_MMX2
();
else
if
(
flags
&
SWS_CPU_CAPS_3DNOW
)
rgb2rgb_init_3DNOW
();
else
if
(
flags
&
SWS_CPU_CAPS_MMX
)
int
cpu_flags
=
av_get_cpu_flags
();
if
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
rgb2rgb_init_MMX
();
#endif
/* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
if
(
HAVE_AMD3DNOW
&&
cpu_flags
&
AV_CPU_FLAG_3DNOW
)
rgb2rgb_init_3DNOW
();
if
(
HAVE_MMX2
&&
cpu_flags
&
AV_CPU_FLAG_MMX2
)
rgb2rgb_init_MMX2
();
if
(
HAVE_SSE
&&
cpu_flags
&
AV_CPU_FLAG_SSE2
)
rgb2rgb_init_SSE2
();
}
libswscale/x86/swscale_template.c
View file @
e66149e7
...
...
@@ -2721,10 +2721,11 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
if
((
dstFormat
==
PIX_FMT_YUVA420P
)
&&
!
alpPixBuf
)
fillPlane
(
dst
[
3
],
dstStride
[
3
],
dstW
,
dstY
-
lastDstY
,
lastDstY
,
255
);
if
(
flags
&
SWS_CPU_CAPS_MMX2
)
__asm__
volatile
(
"sfence"
:::
"memory"
);
if
(
COMPILE_TEMPLATE_MMX2
)
__asm__
volatile
(
"sfence"
:::
"memory"
);
/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
if
(
flags
&
SWS_CPU_CAPS_3DNOW
)
__asm__
volatile
(
"femms"
:::
"memory"
);
else
__asm__
volatile
(
"emms"
:::
"memory"
);
if
(
COMPILE_TEMPLATE_AMD3DNOW
)
__asm__
volatile
(
"femms"
:::
"memory"
);
else
__asm__
volatile
(
"emms"
:::
"memory"
);
/* store changed local vars back in the context */
c
->
dstY
=
dstY
;
c
->
lumBufIndex
=
lumBufIndex
;
...
...
libswscale/x86/yuv2rgb_mmx.c
View file @
e66149e7
...
...
@@ -34,6 +34,7 @@
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/cpu.h"
#define DITHER1XBPP // only for MMX
...
...
@@ -46,57 +47,58 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
pb_07
)
=
0x0707070707070707ULL
;
//MMX versions
#if HAVE_MMX
#undef RENAME
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#define HAVE_MMX2 0
#define HAVE_AMD3DNOW 0
#undef COMPILE_TEMPLATE_MMX2
#define COMPILE_TEMPLATE_MMX2 0
#define RENAME(a) a ## _MMX
#include "yuv2rgb_template.c"
#endif
/* HAVE_MMX */
//MMX2 versions
#if HAVE_MMX2
#undef RENAME
#undef
HAV
E_MMX2
#define
HAV
E_MMX2 1
#undef
COMPILE_TEMPLAT
E_MMX2
#define
COMPILE_TEMPLAT
E_MMX2 1
#define RENAME(a) a ## _MMX2
#include "yuv2rgb_template.c"
#endif
/* HAVE_MMX2 */
SwsFunc
ff_yuv2rgb_init_mmx
(
SwsContext
*
c
)
{
if
(
c
->
flags
&
SWS_CPU_CAPS_MMX2
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
c
->
srcFormat
!=
PIX_FMT_YUV420P
&&
c
->
srcFormat
!=
PIX_FMT_YUVA420P
)
return
NULL
;
if
(
HAVE_MMX2
&&
cpu_flags
&
AV_CPU_FLAG_MMX2
)
{
switch
(
c
->
dstFormat
)
{
case
PIX_FMT_RGB32
:
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
srcFormat
==
PIX_FMT_YUVA420P
)
{
if
(
HAVE_7REGS
)
return
yuva420_rgb32_MMX2
;
break
;
}
else
return
yuv420_rgb32_MMX2
;
case
PIX_FMT_BGR32
:
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
srcFormat
==
PIX_FMT_YUVA420P
)
{
if
(
HAVE_7REGS
)
return
yuva420_bgr32_MMX2
;
break
;
}
else
return
yuv420_bgr32_MMX2
;
case
PIX_FMT_RGB24
:
return
yuv420_rgb24_MMX2
;
case
PIX_FMT_BGR24
:
return
yuv420_bgr24_MMX2
;
case
PIX_FMT_RGB565
:
return
yuv420_rgb16_MMX2
;
case
PIX_FMT_RGB555
:
return
yuv420_rgb15_MMX2
;
}
}
if
(
c
->
flags
&
SWS_CPU_CAPS_MMX
)
{
if
(
HAVE_MMX
&&
cpu_flags
&
AV_CPU_FLAG_MMX
)
{
switch
(
c
->
dstFormat
)
{
case
PIX_FMT_RGB32
:
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
srcFormat
==
PIX_FMT_YUVA420P
)
{
if
(
HAVE_7REGS
)
return
yuva420_rgb32_MMX
;
break
;
}
else
return
yuv420_rgb32_MMX
;
case
PIX_FMT_BGR32
:
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
srcFormat
==
PIX_FMT_YUVA420P
)
{
if
(
HAVE_7REGS
)
return
yuva420_bgr32_MMX
;
break
;
}
else
return
yuv420_bgr32_MMX
;
case
PIX_FMT_RGB24
:
return
yuv420_rgb24_MMX
;
case
PIX_FMT_BGR24
:
return
yuv420_bgr24_MMX
;
case
PIX_FMT_RGB565
:
return
yuv420_rgb16_MMX
;
case
PIX_FMT_RGB555
:
return
yuv420_rgb15_MMX
;
case
PIX_FMT_RGB32
:
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
srcFormat
==
PIX_FMT_YUVA420P
)
{
#if HAVE_7REGS
return
yuva420_rgb32_MMX
;
#endif
break
;
}
else
return
yuv420_rgb32_MMX
;
case
PIX_FMT_BGR32
:
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
srcFormat
==
PIX_FMT_YUVA420P
)
{
#if HAVE_7REGS
return
yuva420_bgr32_MMX
;
#endif
break
;
}
else
return
yuv420_bgr32_MMX
;
case
PIX_FMT_RGB24
:
return
yuv420_rgb24_MMX
;
case
PIX_FMT_BGR24
:
return
yuv420_bgr24_MMX
;
case
PIX_FMT_RGB565
:
return
yuv420_rgb16_MMX
;
case
PIX_FMT_RGB555
:
return
yuv420_rgb15_MMX
;
}
}
...
...
libswscale/x86/yuv2rgb_template.c
View file @
e66149e7
...
...
@@ -25,14 +25,7 @@
#undef EMMS
#undef SFENCE
#if HAVE_AMD3DNOW
/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
#define EMMS "femms"
#else
#define EMMS "emms"
#endif
#if HAVE_MMX2
#if COMPILE_TEMPLATE_MMX2
#define MOVNTQ "movntq"
#define SFENCE "sfence"
#else
...
...
@@ -159,7 +152,8 @@
} \
#define YUV2RGB_ENDFUNC \
__asm__ volatile (SFENCE"\n\t"EMMS); \
__asm__ volatile (SFENCE"\n\t" \
"emms \n\t"); \
return srcSliceH; \
#define IF0(x)
...
...
@@ -188,6 +182,7 @@
"paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \
"paddusb "RED_DITHER"(%4), %%mm1\n\t" \
#if !COMPILE_TEMPLATE_MMX2
static
inline
int
RENAME
(
yuv420_rgb15
)(
SwsContext
*
c
,
const
uint8_t
*
src
[],
int
srcStride
[],
int
srcSliceY
,
int
srcSliceH
,
...
...
@@ -243,6 +238,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
YUV2RGB_OPERANDS
YUV2RGB_ENDFUNC
}
#endif
/* !COMPILE_TEMPLATE_MMX2 */
#define RGB_PACK24(blue, red)\
"packuswb %%mm3, %%mm0 \n"
/* R0 R2 R4 R6 R1 R3 R5 R7 */
\
...
...
@@ -259,7 +255,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
"punpckhwd %%mm6, %%mm5 \n"
/* R4 G4 B4 R5 R6 G6 B6 R7 */
\
RGB_PACK24_B
#if
HAV
E_MMX2
#if
COMPILE_TEMPLAT
E_MMX2
DECLARE_ASM_CONST
(
8
,
int16_t
,
mask1101
[
4
])
=
{
-
1
,
-
1
,
0
,
-
1
};
DECLARE_ASM_CONST
(
8
,
int16_t
,
mask0010
[
4
])
=
{
0
,
0
,
-
1
,
0
};
DECLARE_ASM_CONST
(
8
,
int16_t
,
mask0110
[
4
])
=
{
0
,
-
1
,
-
1
,
0
};
...
...
@@ -366,6 +362,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
MOVNTQ " %%mm5, 16(%1)\n\t" \
MOVNTQ " %%mm"alpha", 24(%1)\n\t" \
#if !COMPILE_TEMPLATE_MMX2
static
inline
int
RENAME
(
yuv420_rgb32
)(
SwsContext
*
c
,
const
uint8_t
*
src
[],
int
srcStride
[],
int
srcSliceY
,
int
srcSliceH
,
...
...
@@ -386,12 +383,12 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
YUV2RGB_ENDFUNC
}
#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
static
inline
int
RENAME
(
yuva420_rgb32
)(
SwsContext
*
c
,
const
uint8_t
*
src
[],
int
srcStride
[],
int
srcSliceY
,
int
srcSliceH
,
uint8_t
*
dst
[],
int
dstStride
[])
{
#if HAVE_7REGS
int
y
,
h_size
;
YUV2RGB_LOOP
(
4
)
...
...
@@ -406,10 +403,8 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
YUV2RGB_ENDLOOP
(
4
)
YUV2RGB_OPERANDS_ALPHA
YUV2RGB_ENDFUNC
#else
return
0
;
#endif
}
#endif
static
inline
int
RENAME
(
yuv420_bgr32
)(
SwsContext
*
c
,
const
uint8_t
*
src
[],
int
srcStride
[],
...
...
@@ -431,12 +426,12 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
YUV2RGB_ENDFUNC
}
#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
static
inline
int
RENAME
(
yuva420_bgr32
)(
SwsContext
*
c
,
const
uint8_t
*
src
[],
int
srcStride
[],
int
srcSliceY
,
int
srcSliceH
,
uint8_t
*
dst
[],
int
dstStride
[])
{
#if HAVE_7REGS
int
y
,
h_size
;
YUV2RGB_LOOP
(
4
)
...
...
@@ -451,7 +446,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
YUV2RGB_ENDLOOP
(
4
)
YUV2RGB_OPERANDS_ALPHA
YUV2RGB_ENDFUNC
#else
return
0
;
#endif
}
#endif
#endif
/* !COMPILE_TEMPLATE_MMX2 */
libswscale/yuv2rgb.c
View file @
e66149e7
...
...
@@ -32,7 +32,7 @@
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
#include "libavutil/
x86_
cpu.h"
#include "libavutil/cpu.h"
#include "libavutil/bswap.h"
extern
const
uint8_t
dither_4x4_16
[
4
][
8
];
...
...
@@ -579,24 +579,18 @@ CLOSEYUV2RGBFUNC(1)
SwsFunc
ff_yuv2rgb_get_func_ptr
(
SwsContext
*
c
)
{
SwsFunc
t
=
NULL
;
#if HAVE_MMX
t
=
ff_yuv2rgb_init_mmx
(
c
);
#endif
#if HAVE_VIS
t
=
ff_yuv2rgb_init_vis
(
c
);
#endif
#if CONFIG_MLIB
t
=
ff_yuv2rgb_init_mlib
(
c
);
#endif
#if HAVE_ALTIVEC
if
(
c
->
flags
&
SWS_CPU_CAPS_ALTIVEC
)
t
=
ff_yuv2rgb_init_altivec
(
c
);
#endif
#if ARCH_BFIN
if
(
c
->
flags
&
SWS_CPU_CAPS_BFIN
)
if
(
HAVE_MMX
)
{
t
=
ff_yuv2rgb_init_mmx
(
c
);
}
else
if
(
HAVE_VIS
)
{
t
=
ff_yuv2rgb_init_vis
(
c
);
}
else
if
(
CONFIG_MLIB
)
{
t
=
ff_yuv2rgb_init_mlib
(
c
);
}
else
if
(
HAVE_ALTIVEC
)
{
t
=
ff_yuv2rgb_init_altivec
(
c
);
}
else
if
(
ARCH_BFIN
)
{
t
=
ff_yuv2rgb_get_func_ptr_bfin
(
c
);
#endif
}
if
(
t
)
return
t
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment