Commit 367d9b29 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  swscale: K&R formatting cosmetics (part II)
  tiffdec: Add a malloc check and refactor another.
  faxcompr: Check malloc results and unify return path
  configure: escape colons in values written to config.fate
  ac3dsp: call femms/emms at the end of float_to_fixed24() for 3DNow and SSE
  matroska: Fix leaking memory allocated for laces.
  pthread: Fix crash due to fctx->delaying not being cleared.
  vp3: Assert on invalid filter_limit values.
  h264: fix 10bit biweight functions after recent x86inc.asm fixes.
  ffv1: Fix size mismatch in encode_line.
  movenc: Remove a dead initialization
  git-howto: Explain how to avoid Windows line endings in git checkouts.
  build: Move all arch OBJS declarations into arch subdirectory Makefiles.

Conflicts:
	configure
	libavcodec/vp3.c
	libavformat/matroskadec.c
	libavutil/Makefile
	libswscale/Makefile
	libswscale/swscale.c
	libswscale/swscale_internal.h
	libswscale/utils.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 62e5ef95 ef0ee7f6
...@@ -2803,7 +2803,11 @@ case $target_os in ...@@ -2803,7 +2803,11 @@ case $target_os in
;; ;;
esac esac
echo "config:$arch:$subarch:$cpu:$target_os:$cc_ident:$FFMPEG_CONFIGURATION" >config.fate esc(){
echo "$*" | sed 's/%/%25/g;s/:/%3a/g'
}
echo "config:$arch:$subarch:$cpu:$target_os:$(esc $cc_ident):$(esc $FFMPEG_CONFIGURATION)" >config.fate
check_cpp_condition stdlib.h "defined(__PIC__) || defined(__pic__) || defined(PIC)" && enable pic check_cpp_condition stdlib.h "defined(__PIC__) || defined(__pic__) || defined(PIC)" && enable pic
......
...@@ -65,6 +65,14 @@ git clone git@@source.ffmpeg.org:ffmpeg <target> ...@@ -65,6 +65,14 @@ git clone git@@source.ffmpeg.org:ffmpeg <target>
This will put the FFmpeg sources into the directory @var{<target>} and let This will put the FFmpeg sources into the directory @var{<target>} and let
you push back your changes to the remote repository. you push back your changes to the remote repository.
Make sure that you do not have Windows line endings in your checkouts,
otherwise you may experience spurious compilation failures. One way to
achieve this is to run
@example
git config --global core.autocrlf false
@end example
@section Updating the source tree to the latest revision @section Updating the source tree to the latest revision
......
...@@ -275,12 +275,17 @@ int ff_ccitt_unpack(AVCodecContext *avctx, ...@@ -275,12 +275,17 @@ int ff_ccitt_unpack(AVCodecContext *avctx,
{ {
int j; int j;
GetBitContext gb; GetBitContext gb;
int *runs, *ref, *runend; int *runs, *ref = NULL, *runend;
int ret; int ret;
int runsize= avctx->width + 2; int runsize= avctx->width + 2;
int err = 0;
runs = av_malloc(runsize * sizeof(runs[0])); runs = av_malloc(runsize * sizeof(runs[0]));
ref = av_malloc(runsize * sizeof(ref[0])); ref = av_malloc(runsize * sizeof(ref[0]));
if (!runs || ! ref) {
err = AVERROR(ENOMEM);
goto fail;
}
ref[0] = avctx->width; ref[0] = avctx->width;
ref[1] = 0; ref[1] = 0;
ref[2] = 0; ref[2] = 0;
...@@ -290,9 +295,8 @@ int ff_ccitt_unpack(AVCodecContext *avctx, ...@@ -290,9 +295,8 @@ int ff_ccitt_unpack(AVCodecContext *avctx,
if(compr == TIFF_G4){ if(compr == TIFF_G4){
ret = decode_group3_2d_line(avctx, &gb, avctx->width, runs, runend, ref); ret = decode_group3_2d_line(avctx, &gb, avctx->width, runs, runend, ref);
if(ret < 0){ if(ret < 0){
av_free(runs); err = -1;
av_free(ref); goto fail;
return -1;
} }
}else{ }else{
int g3d1 = (compr == TIFF_G3) && !(opts & 1); int g3d1 = (compr == TIFF_G3) && !(opts & 1);
...@@ -313,7 +317,8 @@ int ff_ccitt_unpack(AVCodecContext *avctx, ...@@ -313,7 +317,8 @@ int ff_ccitt_unpack(AVCodecContext *avctx,
} }
dst += stride; dst += stride;
} }
fail:
av_free(runs); av_free(runs);
av_free(ref); av_free(ref);
return 0; return err;
} }
...@@ -445,7 +445,7 @@ static inline int get_vlc_symbol(GetBitContext *gb, VlcState * const state, int ...@@ -445,7 +445,7 @@ static inline int get_vlc_symbol(GetBitContext *gb, VlcState * const state, int
#if CONFIG_FFV1_ENCODER #if CONFIG_FFV1_ENCODER
static av_always_inline int encode_line(FFV1Context *s, int w, static av_always_inline int encode_line(FFV1Context *s, int w,
int16_t *sample[2], int16_t *sample[3],
int plane_index, int bits) int plane_index, int bits)
{ {
PlaneContext * const p= &s->plane[plane_index]; PlaneContext * const p= &s->plane[plane_index];
......
...@@ -145,13 +145,18 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, const uin ...@@ -145,13 +145,18 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, const uin
int c, line, pixels, code; int c, line, pixels, code;
const uint8_t *ssrc = src; const uint8_t *ssrc = src;
int width = ((s->width * s->bpp) + 7) >> 3; int width = ((s->width * s->bpp) + 7) >> 3;
#if CONFIG_ZLIB
uint8_t *zbuf; unsigned long outlen;
if (size <= 0)
return AVERROR_INVALIDDATA;
#if CONFIG_ZLIB
if(s->compr == TIFF_DEFLATE || s->compr == TIFF_ADOBE_DEFLATE){ if(s->compr == TIFF_DEFLATE || s->compr == TIFF_ADOBE_DEFLATE){
uint8_t *zbuf; unsigned long outlen;
int ret; int ret;
outlen = width * lines; outlen = width * lines;
zbuf = av_malloc(outlen); zbuf = av_malloc(outlen);
if (!zbuf)
return AVERROR(ENOMEM);
ret = tiff_uncompress(zbuf, &outlen, src, size); ret = tiff_uncompress(zbuf, &outlen, src, size);
if(ret != Z_OK){ if(ret != Z_OK){
av_log(s->avctx, AV_LOG_ERROR, "Uncompressing failed (%lu of %lu) with error %d\n", outlen, (unsigned long)width * lines, ret); av_log(s->avctx, AV_LOG_ERROR, "Uncompressing failed (%lu of %lu) with error %d\n", outlen, (unsigned long)width * lines, ret);
...@@ -180,11 +185,11 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, const uin ...@@ -180,11 +185,11 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, const uin
} }
if(s->compr == TIFF_CCITT_RLE || s->compr == TIFF_G3 || s->compr == TIFF_G4){ if(s->compr == TIFF_CCITT_RLE || s->compr == TIFF_G3 || s->compr == TIFF_G4){
int i, ret = 0; int i, ret = 0;
uint8_t *src2 = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE); uint8_t *src2 = av_malloc((unsigned)size + FF_INPUT_BUFFER_PADDING_SIZE);
if(!src2 || (unsigned)size + FF_INPUT_BUFFER_PADDING_SIZE < (unsigned)size){ if (!src2) {
av_log(s->avctx, AV_LOG_ERROR, "Error allocating temporary buffer\n"); av_log(s->avctx, AV_LOG_ERROR, "Error allocating temporary buffer\n");
return -1; return AVERROR(ENOMEM);
} }
if(s->fax_opts & 2){ if(s->fax_opts & 2){
av_log(s->avctx, AV_LOG_ERROR, "Uncompressed fax mode is not supported (yet)\n"); av_log(s->avctx, AV_LOG_ERROR, "Uncompressed fax mode is not supported (yet)\n");
......
...@@ -223,7 +223,8 @@ cglobal float_to_fixed24_3dnow, 3,3,0, dst, src, len ...@@ -223,7 +223,8 @@ cglobal float_to_fixed24_3dnow, 3,3,0, dst, src, len
add dstq, 32 add dstq, 32
sub lend, 8 sub lend, 8
ja .loop ja .loop
REP_RET femms
RET
INIT_XMM INIT_XMM
cglobal float_to_fixed24_sse, 3,3,3, dst, src, len cglobal float_to_fixed24_sse, 3,3,3, dst, src, len
...@@ -247,7 +248,8 @@ cglobal float_to_fixed24_sse, 3,3,3, dst, src, len ...@@ -247,7 +248,8 @@ cglobal float_to_fixed24_sse, 3,3,3, dst, src, len
add dstq, 32 add dstq, 32
sub lend, 8 sub lend, 8
ja .loop ja .loop
REP_RET emms
RET
INIT_XMM INIT_XMM
cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len
......
...@@ -155,7 +155,7 @@ WEIGHT_FUNC_HALF_MM sse4 ...@@ -155,7 +155,7 @@ WEIGHT_FUNC_HALF_MM sse4
%if ARCH_X86_32 %if ARCH_X86_32
DECLARE_REG_TMP 3 DECLARE_REG_TMP 3
%else %else
DECLARE_REG_TMP 10 DECLARE_REG_TMP 7
%endif %endif
%macro BIWEIGHT_PROLOGUE 0 %macro BIWEIGHT_PROLOGUE 0
...@@ -218,7 +218,7 @@ DECLARE_REG_TMP 10 ...@@ -218,7 +218,7 @@ DECLARE_REG_TMP 10
%endmacro %endmacro
%macro BIWEIGHT_FUNC_DBL 1 %macro BIWEIGHT_FUNC_DBL 1
cglobal h264_biweight_16_10_%1 cglobal h264_biweight_16_10_%1, 0, 8, 8
BIWEIGHT_PROLOGUE BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
.nextrow .nextrow
...@@ -238,7 +238,7 @@ BIWEIGHT_FUNC_DBL sse2 ...@@ -238,7 +238,7 @@ BIWEIGHT_FUNC_DBL sse2
BIWEIGHT_FUNC_DBL sse4 BIWEIGHT_FUNC_DBL sse4
%macro BIWEIGHT_FUNC 1 %macro BIWEIGHT_FUNC 1
cglobal h264_biweight_8_10_%1 cglobal h264_biweight_8_10_%1, 0, 8, 8
BIWEIGHT_PROLOGUE BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
.nextrow .nextrow
...@@ -256,7 +256,7 @@ BIWEIGHT_FUNC sse2 ...@@ -256,7 +256,7 @@ BIWEIGHT_FUNC sse2
BIWEIGHT_FUNC sse4 BIWEIGHT_FUNC sse4
%macro BIWEIGHT_FUNC_HALF 1 %macro BIWEIGHT_FUNC_HALF 1
cglobal h264_biweight_4_10_%1 cglobal h264_biweight_4_10_%1, 0, 8, 8
BIWEIGHT_PROLOGUE BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
sar r3d, 1 sar r3d, 1
......
...@@ -1940,8 +1940,8 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, ...@@ -1940,8 +1940,8 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
if (size < cfs * h / 2) { if (size < cfs * h / 2) {
av_log(matroska->ctx, AV_LOG_ERROR, av_log(matroska->ctx, AV_LOG_ERROR,
"Corrupt int4 RM-style audio packet size\n"); "Corrupt int4 RM-style audio packet size\n");
av_free(lace_size); res = AVERROR_INVALIDDATA;
return AVERROR_INVALIDDATA; goto end;
} }
for (x=0; x<h/2; x++) for (x=0; x<h/2; x++)
memcpy(track->audio.buf+x*2*w+y*cfs, memcpy(track->audio.buf+x*2*w+y*cfs,
...@@ -1950,16 +1950,16 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, ...@@ -1950,16 +1950,16 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
if (size < w) { if (size < w) {
av_log(matroska->ctx, AV_LOG_ERROR, av_log(matroska->ctx, AV_LOG_ERROR,
"Corrupt sipr RM-style audio packet size\n"); "Corrupt sipr RM-style audio packet size\n");
av_free(lace_size); res = AVERROR_INVALIDDATA;
return AVERROR_INVALIDDATA; goto end;
} }
memcpy(track->audio.buf + y*w, data, w); memcpy(track->audio.buf + y*w, data, w);
} else { } else {
if (size < sps * w / sps) { if (size < sps * w / sps) {
av_log(matroska->ctx, AV_LOG_ERROR, av_log(matroska->ctx, AV_LOG_ERROR,
"Corrupt generic RM-style audio packet size\n"); "Corrupt generic RM-style audio packet size\n");
av_free(lace_size); res = AVERROR_INVALIDDATA;
return AVERROR_INVALIDDATA; goto end;
} }
for (x=0; x<w/sps; x++) for (x=0; x<w/sps; x++)
memcpy(track->audio.buf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), data+x*sps, sps); memcpy(track->audio.buf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), data+x*sps, sps);
...@@ -2049,6 +2049,7 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, ...@@ -2049,6 +2049,7 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
} }
} }
end:
av_free(lace_size); av_free(lace_size);
return res; return res;
} }
......
...@@ -939,7 +939,7 @@ static const AVCodecTag codec_3gp_tags[] = { ...@@ -939,7 +939,7 @@ static const AVCodecTag codec_3gp_tags[] = {
static int mov_find_codec_tag(AVFormatContext *s, MOVTrack *track) static int mov_find_codec_tag(AVFormatContext *s, MOVTrack *track)
{ {
int tag = track->enc->codec_tag; int tag;
if (track->mode == MODE_MP4 || track->mode == MODE_PSP) if (track->mode == MODE_MP4 || track->mode == MODE_PSP)
tag = mp4_get_codec_tag(s, track); tag = mp4_get_codec_tag(s, track);
......
...@@ -79,9 +79,6 @@ OBJS = adler32.o \ ...@@ -79,9 +79,6 @@ OBJS = adler32.o \
tree.o \ tree.o \
utils.o \ utils.o \
OBJS-$(ARCH_PPC) += ppc/cpu.o
OBJS-$(ARCH_X86) += x86/cpu.o
TESTPROGS = adler32 aes avstring base64 bprint cpu crc des eval file fifo \ TESTPROGS = adler32 aes avstring base64 bprint cpu crc des eval file fifo \
lfg lls md5 opt pca parseutils random_seed rational sha tree lfg lls md5 opt pca parseutils random_seed rational sha tree
TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo
......
OBJS += ppc/cpu.o \
OBJS += x86/cpu.o \
...@@ -14,22 +14,4 @@ OBJS = input.o \ ...@@ -14,22 +14,4 @@ OBJS = input.o \
utils.o \ utils.o \
yuv2rgb.o \ yuv2rgb.o \
OBJS-$(ARCH_BFIN) += bfin/internal_bfin.o \
bfin/swscale_bfin.o \
bfin/yuv2rgb_bfin.o
ALTIVEC-OBJS += ppc/swscale_altivec.o \
ppc/yuv2rgb_altivec.o \
ppc/yuv2yuv_altivec.o
MMX-OBJS += x86/rgb2rgb.o \
x86/swscale_mmx.o \
x86/yuv2rgb_mmx.o
VIS-OBJS += sparc/yuv2rgb_vis.o
YASM-OBJS += x86/input.o \
x86/output.o \
x86/scale.o
$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
TESTPROGS = colorspace swscale TESTPROGS = colorspace swscale
OBJS += bfin/internal_bfin.o \
bfin/swscale_bfin.o \
bfin/yuv2rgb_bfin.o \
ALTIVEC-OBJS += ppc/swscale_altivec.o \
ppc/yuv2rgb_altivec.o \
ppc/yuv2yuv_altivec.o \
VIS-OBJS += sparc/yuv2rgb_vis.o \
...@@ -18,51 +18,52 @@ ...@@ -18,51 +18,52 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include <assert.h>
#include <inttypes.h> #include <inttypes.h>
#include <string.h>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include "config.h" #include <string.h>
#include <assert.h>
#include "swscale.h"
#include "swscale_internal.h"
#include "rgb2rgb.h"
#include "libavutil/avassert.h" #include "libavutil/avassert.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/cpu.h"
#include "libavutil/avutil.h" #include "libavutil/avutil.h"
#include "libavutil/mathematics.h"
#include "libavutil/bswap.h" #include "libavutil/bswap.h"
#include "libavutil/cpu.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
#include "config.h"
#include "rgb2rgb.h"
#include "swscale_internal.h"
#include "swscale.h"
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = { DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
{ 36, 68, 60, 92, 34, 66, 58, 90,}, { 36, 68, 60, 92, 34, 66, 58, 90, },
{ 100, 4,124, 28, 98, 2,122, 26,}, { 100, 4, 124, 28, 98, 2, 122, 26, },
{ 52, 84, 44, 76, 50, 82, 42, 74,}, { 52, 84, 44, 76, 50, 82, 42, 74, },
{ 116, 20,108, 12,114, 18,106, 10,}, { 116, 20, 108, 12, 114, 18, 106, 10, },
{ 32, 64, 56, 88, 38, 70, 62, 94,}, { 32, 64, 56, 88, 38, 70, 62, 94, },
{ 96, 0,120, 24,102, 6,126, 30,}, { 96, 0, 120, 24, 102, 6, 126, 30, },
{ 48, 80, 40, 72, 54, 86, 46, 78,}, { 48, 80, 40, 72, 54, 86, 46, 78, },
{ 112, 16,104, 8,118, 22,110, 14,}, { 112, 16, 104, 8, 118, 22, 110, 14, },
}; };
DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
{ 64, 64, 64, 64, 64, 64, 64, 64 };
DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] = {
64, 64, 64, 64, 64, 64, 64, 64
};
static av_always_inline void fillPlane(uint8_t* plane, int stride, static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
int width, int height, int height, int y, uint8_t val)
int y, uint8_t val)
{ {
int i; int i;
uint8_t *ptr = plane + stride*y; uint8_t *ptr = plane + stride * y;
for (i=0; i<height; i++) { for (i = 0; i < height; i++) {
memset(ptr, val, width); memset(ptr, val, width);
ptr += stride; ptr += stride;
} }
} }
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
const int16_t *filter, const uint8_t *_src, const int16_t *filter,
const int32_t *filterPos, int filterSize) const int32_t *filterPos, int filterSize)
{ {
int i; int i;
...@@ -87,8 +88,8 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t ...@@ -87,8 +88,8 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t
} }
} }
static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
const int16_t *filter, const uint8_t *_src, const int16_t *filter,
const int32_t *filterPos, int filterSize) const int32_t *filterPos, int filterSize)
{ {
int i; int i;
...@@ -112,72 +113,71 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t ...@@ -112,72 +113,71 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t
} }
// bilinear / bicubic scaling // bilinear / bicubic scaling
static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW,
const int16_t *filter, const int32_t *filterPos, const uint8_t *src, const int16_t *filter,
int filterSize) const int32_t *filterPos, int filterSize)
{ {
int i; int i;
for (i=0; i<dstW; i++) { for (i = 0; i < dstW; i++) {
int j; int j;
int srcPos= filterPos[i]; int srcPos = filterPos[i];
int val=0; int val = 0;
for (j=0; j<filterSize; j++) { for (j = 0; j < filterSize; j++) {
val += ((int)src[srcPos + j])*filter[filterSize*i + j]; val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
} }
//filter += hFilterSize; dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ...
dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
//dst[i] = val>>7;
} }
} }
static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src, static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW,
const int16_t *filter, const int32_t *filterPos, const uint8_t *src, const int16_t *filter,
int filterSize) const int32_t *filterPos, int filterSize)
{ {
int i; int i;
int32_t *dst = (int32_t *) _dst; int32_t *dst = (int32_t *) _dst;
for (i=0; i<dstW; i++) { for (i = 0; i < dstW; i++) {
int j; int j;
int srcPos= filterPos[i]; int srcPos = filterPos[i];
int val=0; int val = 0;
for (j=0; j<filterSize; j++) { for (j = 0; j < filterSize; j++) {
val += ((int)src[srcPos + j])*filter[filterSize*i + j]; val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
} }
//filter += hFilterSize; dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
//dst[i] = val>>7;
} }
} }
//FIXME all pal and rgb srcFormats could do this convertion as well // FIXME all pal and rgb srcFormats could do this convertion as well
//FIXME all scalers more complex than bilinear could do half of this transform // FIXME all scalers more complex than bilinear could do half of this transform
static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width) static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
{ {
int i; int i;
for (i = 0; i < width; i++) { for (i = 0; i < width; i++) {
dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264 dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264
dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264 dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264
} }
} }
static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width) static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
{ {
int i; int i;
for (i = 0; i < width; i++) { for (i = 0; i < width; i++) {
dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469 dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469
dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469 dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469
} }
} }
static void lumRangeToJpeg_c(int16_t *dst, int width) static void lumRangeToJpeg_c(int16_t *dst, int width)
{ {
int i; int i;
for (i = 0; i < width; i++) for (i = 0; i < width; i++)
dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14; dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
} }
static void lumRangeFromJpeg_c(int16_t *dst, int width) static void lumRangeFromJpeg_c(int16_t *dst, int width)
{ {
int i; int i;
for (i = 0; i < width; i++) for (i = 0; i < width; i++)
dst[i] = (dst[i]*14071 + 33561947)>>14; dst[i] = (dst[i] * 14071 + 33561947) >> 14;
} }
static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width) static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
...@@ -186,27 +186,30 @@ static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width) ...@@ -186,27 +186,30 @@ static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
int32_t *dstU = (int32_t *) _dstU; int32_t *dstU = (int32_t *) _dstU;
int32_t *dstV = (int32_t *) _dstV; int32_t *dstV = (int32_t *) _dstV;
for (i = 0; i < width; i++) { for (i = 0; i < width; i++) {
dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264 dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264 dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
} }
} }
static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width) static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
{ {
int i; int i;
int32_t *dstU = (int32_t *) _dstU; int32_t *dstU = (int32_t *) _dstU;
int32_t *dstV = (int32_t *) _dstV; int32_t *dstV = (int32_t *) _dstV;
for (i = 0; i < width; i++) { for (i = 0; i < width; i++) {
dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469 dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469
dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469 dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469
} }
} }
static void lumRangeToJpeg16_c(int16_t *_dst, int width) static void lumRangeToJpeg16_c(int16_t *_dst, int width)
{ {
int i; int i;
int32_t *dst = (int32_t *) _dst; int32_t *dst = (int32_t *) _dst;
for (i = 0; i < width; i++) for (i = 0; i < width; i++)
dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12; dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
} }
static void lumRangeFromJpeg16_c(int16_t *_dst, int width) static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
{ {
int i; int i;
...@@ -219,12 +222,12 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, ...@@ -219,12 +222,12 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
const uint8_t *src, int srcW, int xInc) const uint8_t *src, int srcW, int xInc)
{ {
int i; int i;
unsigned int xpos=0; unsigned int xpos = 0;
for (i=0;i<dstWidth;i++) { for (i = 0; i < dstWidth; i++) {
register unsigned int xx=xpos>>16; register unsigned int xx = xpos >> 16;
register unsigned int xalpha=(xpos&0xFFFF)>>9; register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
xpos+=xInc; xpos += xInc;
} }
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
dst[i] = src[srcW-1]*128; dst[i] = src[srcW-1]*128;
...@@ -232,26 +235,30 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, ...@@ -232,26 +235,30 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
// *** horizontal scale Y line to temp buffer // *** horizontal scale Y line to temp buffer
static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
const uint8_t *src_in[4], int srcW, int xInc, const uint8_t *src_in[4],
int srcW, int xInc,
const int16_t *hLumFilter, const int16_t *hLumFilter,
const int32_t *hLumFilterPos, int hLumFilterSize, const int32_t *hLumFilterPos,
int hLumFilterSize,
uint8_t *formatConvBuffer, uint8_t *formatConvBuffer,
uint32_t *pal, int isAlpha) uint32_t *pal, int isAlpha)
{ {
void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) =
isAlpha ? c->alpToYV12 : c->lumToYV12;
void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
const uint8_t *src = src_in[isAlpha ? 3 : 0]; const uint8_t *src = src_in[isAlpha ? 3 : 0];
if (toYV12) { if (toYV12) {
toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal); toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
src= formatConvBuffer; src = formatConvBuffer;
} else if (c->readLumPlanar && !isAlpha) { } else if (c->readLumPlanar && !isAlpha) {
c->readLumPlanar(formatConvBuffer, src_in, srcW); c->readLumPlanar(formatConvBuffer, src_in, srcW);
src = formatConvBuffer; src = formatConvBuffer;
} }
if (!c->hyscale_fast) { if (!c->hyscale_fast) {
c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize); c->hyScale(c, dst, dstWidth, src, hLumFilter,
hLumFilterPos, hLumFilterSize);
} else { // fast bilinear upscale / crap downscale } else { // fast bilinear upscale / crap downscale
c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
} }
...@@ -265,13 +272,13 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, ...@@ -265,13 +272,13 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
const uint8_t *src2, int srcW, int xInc) const uint8_t *src2, int srcW, int xInc)
{ {
int i; int i;
unsigned int xpos=0; unsigned int xpos = 0;
for (i=0;i<dstWidth;i++) { for (i = 0; i < dstWidth; i++) {
register unsigned int xx=xpos>>16; register unsigned int xx = xpos >> 16;
register unsigned int xalpha=(xpos&0xFFFF)>>9; register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
xpos+=xInc; xpos += xInc;
} }
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
dst1[i] = src1[srcW-1]*128; dst1[i] = src1[srcW-1]*128;
...@@ -279,23 +286,28 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, ...@@ -279,23 +286,28 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
} }
} }
static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
int16_t *dst2, int dstWidth,
const uint8_t *src_in[4], const uint8_t *src_in[4],
int srcW, int xInc, const int16_t *hChrFilter, int srcW, int xInc,
const int32_t *hChrFilterPos, int hChrFilterSize, const int16_t *hChrFilter,
const int32_t *hChrFilterPos,
int hChrFilterSize,
uint8_t *formatConvBuffer, uint32_t *pal) uint8_t *formatConvBuffer, uint32_t *pal)
{ {
const uint8_t *src1 = src_in[1], *src2 = src_in[2]; const uint8_t *src1 = src_in[1], *src2 = src_in[2];
if (c->chrToYV12) { if (c->chrToYV12) {
uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); uint8_t *buf2 = formatConvBuffer +
FFALIGN(srcW*2+78, 16);
c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal); c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
src1= formatConvBuffer; src1= formatConvBuffer;
src2= buf2; src2= buf2;
} else if (c->readChrPlanar) { } else if (c->readChrPlanar) {
uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); uint8_t *buf2 = formatConvBuffer +
FFALIGN(srcW*2+78, 16);
c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW); c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
src1= formatConvBuffer; src1 = formatConvBuffer;
src2= buf2; src2 = buf2;
} }
if (!c->hcscale_fast) { if (!c->hcscale_fast) {
...@@ -310,88 +322,97 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 ...@@ -310,88 +322,97 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
} }
#define DEBUG_SWSCALE_BUFFERS 0 #define DEBUG_SWSCALE_BUFFERS 0
#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__) #define DEBUG_BUFFERS(...) \
if (DEBUG_SWSCALE_BUFFERS) \
av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
static int swScale(SwsContext *c, const uint8_t* src[], static int swScale(SwsContext *c, const uint8_t *src[],
int srcStride[], int srcSliceY, int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]) int srcSliceH, uint8_t *dst[], int dstStride[])
{ {
/* load a few things into local vars to make the code more readable? and faster */ /* load a few things into local vars to make the code more readable?
const int srcW= c->srcW; * and faster */
const int dstW= c->dstW; const int srcW = c->srcW;
const int dstH= c->dstH; const int dstW = c->dstW;
const int chrDstW= c->chrDstW; const int dstH = c->dstH;
const int chrSrcW= c->chrSrcW; const int chrDstW = c->chrDstW;
const int lumXInc= c->lumXInc; const int chrSrcW = c->chrSrcW;
const int chrXInc= c->chrXInc; const int lumXInc = c->lumXInc;
const enum PixelFormat dstFormat= c->dstFormat; const int chrXInc = c->chrXInc;
const int flags= c->flags; const enum PixelFormat dstFormat = c->dstFormat;
int32_t *vLumFilterPos= c->vLumFilterPos; const int flags = c->flags;
int32_t *vChrFilterPos= c->vChrFilterPos; int32_t *vLumFilterPos = c->vLumFilterPos;
int32_t *hLumFilterPos= c->hLumFilterPos; int32_t *vChrFilterPos = c->vChrFilterPos;
int32_t *hChrFilterPos= c->hChrFilterPos; int32_t *hLumFilterPos = c->hLumFilterPos;
int16_t *hLumFilter= c->hLumFilter; int32_t *hChrFilterPos = c->hChrFilterPos;
int16_t *hChrFilter= c->hChrFilter; int16_t *vLumFilter = c->vLumFilter;
int32_t *lumMmxFilter= c->lumMmxFilter; int16_t *vChrFilter = c->vChrFilter;
int32_t *chrMmxFilter= c->chrMmxFilter; int16_t *hLumFilter = c->hLumFilter;
const int vLumFilterSize= c->vLumFilterSize; int16_t *hChrFilter = c->hChrFilter;
const int vChrFilterSize= c->vChrFilterSize; int32_t *lumMmxFilter = c->lumMmxFilter;
const int hLumFilterSize= c->hLumFilterSize; int32_t *chrMmxFilter = c->chrMmxFilter;
const int hChrFilterSize= c->hChrFilterSize; const int vLumFilterSize = c->vLumFilterSize;
int16_t **lumPixBuf= c->lumPixBuf; const int vChrFilterSize = c->vChrFilterSize;
int16_t **chrUPixBuf= c->chrUPixBuf; const int hLumFilterSize = c->hLumFilterSize;
int16_t **chrVPixBuf= c->chrVPixBuf; const int hChrFilterSize = c->hChrFilterSize;
int16_t **alpPixBuf= c->alpPixBuf; int16_t **lumPixBuf = c->lumPixBuf;
const int vLumBufSize= c->vLumBufSize; int16_t **chrUPixBuf = c->chrUPixBuf;
const int vChrBufSize= c->vChrBufSize; int16_t **chrVPixBuf = c->chrVPixBuf;
uint8_t *formatConvBuffer= c->formatConvBuffer; int16_t **alpPixBuf = c->alpPixBuf;
const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; const int vLumBufSize = c->vLumBufSize;
const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); const int vChrBufSize = c->vChrBufSize;
int lastDstY; uint8_t *formatConvBuffer = c->formatConvBuffer;
uint32_t *pal=c->pal_yuv; uint32_t *pal = c->pal_yuv;
int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
yuv2planar1_fn yuv2plane1 = c->yuv2plane1; yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
yuv2planarX_fn yuv2planeX = c->yuv2planeX; yuv2planarX_fn yuv2planeX = c->yuv2planeX;
yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX; yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
yuv2packed1_fn yuv2packed1 = c->yuv2packed1; yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
yuv2packed2_fn yuv2packed2 = c->yuv2packed2; yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
yuv2packedX_fn yuv2packedX = c->yuv2packedX; yuv2packedX_fn yuv2packedX = c->yuv2packedX;
const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample;
const int chrSrcSliceH = -((-srcSliceH) >> c->chrSrcVSubSample);
int should_dither = is9_OR_10BPS(c->srcFormat) ||
is16BPS(c->srcFormat);
int lastDstY;
/* vars which will change and which we need to store back in the context */ /* vars which will change and which we need to store back in the context */
int dstY= c->dstY; int dstY = c->dstY;
int lumBufIndex= c->lumBufIndex; int lumBufIndex = c->lumBufIndex;
int chrBufIndex= c->chrBufIndex; int chrBufIndex = c->chrBufIndex;
int lastInLumBuf= c->lastInLumBuf; int lastInLumBuf = c->lastInLumBuf;
int lastInChrBuf= c->lastInChrBuf; int lastInChrBuf = c->lastInChrBuf;
if (isPacked(c->srcFormat)) { if (isPacked(c->srcFormat)) {
src[0]= src[0] =
src[1]= src[1] =
src[2]= src[2] =
src[3]= src[0]; src[3] = src[0];
srcStride[0]= srcStride[0] =
srcStride[1]= srcStride[1] =
srcStride[2]= srcStride[2] =
srcStride[3]= srcStride[0]; srcStride[3] = srcStride[0];
} }
srcStride[1]<<= c->vChrDrop; srcStride[1] <<= c->vChrDrop;
srcStride[2]<<= c->vChrDrop; srcStride[2] <<= c->vChrDrop;
DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n", DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3], src[0], srcStride[0], src[1], srcStride[1],
dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]); src[2], srcStride[2], src[3], srcStride[3],
dst[0], dstStride[0], dst[1], dstStride[1],
dst[2], dstStride[2], dst[3], dstStride[3]);
DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n", DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
srcSliceY, srcSliceH, dstY, dstH); srcSliceY, srcSliceH, dstY, dstH);
DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n", DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) { if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 ||
static int warnedAlready=0; //FIXME move this into the context perhaps dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
static int warnedAlready = 0; // FIXME maybe move this into the context
if (flags & SWS_PRINT_INFO && !warnedAlready) { if (flags & SWS_PRINT_INFO && !warnedAlready) {
av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" av_log(c, AV_LOG_WARNING,
"Warning: dstStride is not aligned!\n"
" ->cannot do aligned memory accesses anymore\n"); " ->cannot do aligned memory accesses anymore\n");
warnedAlready=1; warnedAlready = 1;
} }
} }
...@@ -408,23 +429,23 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -408,23 +429,23 @@ static int swScale(SwsContext *c, const uint8_t* src[],
} }
/* Note the user might start scaling the picture in the middle so this /* Note the user might start scaling the picture in the middle so this
will not get executed. This is not really intended but works * will not get executed. This is not really intended but works
currently, so people might do it. */ * currently, so people might do it. */
if (srcSliceY ==0) { if (srcSliceY == 0) {
lumBufIndex=-1; lumBufIndex = -1;
chrBufIndex=-1; chrBufIndex = -1;
dstY=0; dstY = 0;
lastInLumBuf= -1; lastInLumBuf = -1;
lastInChrBuf= -1; lastInChrBuf = -1;
} }
if (!should_dither) { if (!should_dither) {
c->chrDither8 = c->lumDither8 = ff_sws_pb_64; c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
} }
lastDstY= dstY; lastDstY = dstY;
for (;dstY < dstH; dstY++) { for (; dstY < dstH; dstY++) {
const int chrDstY= dstY>>c->chrDstVSubSample; const int chrDstY = dstY >> c->chrDstVSubSample;
uint8_t *dest[4] = { uint8_t *dest[4] = {
dst[0] + dstStride[0] * dstY, dst[0] + dstStride[0] * dstY,
dst[1] + dstStride[1] * chrDstY, dst[1] + dstStride[1] * chrDstY,
...@@ -433,9 +454,11 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -433,9 +454,11 @@ static int swScale(SwsContext *c, const uint8_t* src[],
}; };
int use_mmx_vfilter= c->use_mmx_vfilter; int use_mmx_vfilter= c->use_mmx_vfilter;
const int firstLumSrcY= FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]); //First line needed as input // First line needed as input
const int firstLumSrcY2= FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]); const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
const int firstChrSrcY= FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]); //First line needed as input const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
// First line needed as input
const int firstChrSrcY = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
// Last line needed as input // Last line needed as input
int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1; int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
...@@ -443,9 +466,11 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -443,9 +466,11 @@ static int swScale(SwsContext *c, const uint8_t* src[],
int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1; int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
int enough_lines; int enough_lines;
//handle holes (FAST_BILINEAR & weird filters) // handle holes (FAST_BILINEAR & weird filters)
if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; if (firstLumSrcY > lastInLumBuf)
if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; lastInLumBuf = firstLumSrcY - 1;
if (firstChrSrcY > lastInChrBuf)
lastInChrBuf = firstChrSrcY - 1;
assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1); assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1); assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
...@@ -456,7 +481,8 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -456,7 +481,8 @@ static int swScale(SwsContext *c, const uint8_t* src[],
firstChrSrcY, lastChrSrcY, lastInChrBuf); firstChrSrcY, lastChrSrcY, lastInChrBuf);
// Do we have enough lines in this slice to output the dstY line // Do we have enough lines in this slice to output the dstY line
enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample); enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
lastChrSrcY < -((-srcSliceY - srcSliceH) >> c->chrSrcVSubSample);
if (!enough_lines) { if (!enough_lines) {
lastLumSrcY = srcSliceY + srcSliceH - 1; lastLumSrcY = srcSliceY + srcSliceH - 1;
...@@ -465,8 +491,8 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -465,8 +491,8 @@ static int swScale(SwsContext *c, const uint8_t* src[],
lastLumSrcY, lastChrSrcY); lastLumSrcY, lastChrSrcY);
} }
//Do horizontal scaling // Do horizontal scaling
while(lastInLumBuf < lastLumSrcY) { while (lastInLumBuf < lastLumSrcY) {
const uint8_t *src1[4] = { const uint8_t *src1[4] = {
src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0], src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1], src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
...@@ -474,23 +500,21 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -474,23 +500,21 @@ static int swScale(SwsContext *c, const uint8_t* src[],
src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3], src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
}; };
lumBufIndex++; lumBufIndex++;
assert(lumBufIndex < 2*vLumBufSize); assert(lumBufIndex < 2 * vLumBufSize);
assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
assert(lastInLumBuf + 1 - srcSliceY >= 0); assert(lastInLumBuf + 1 - srcSliceY >= 0);
hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc, hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc,
hLumFilter, hLumFilterPos, hLumFilterSize, hLumFilter, hLumFilterPos, hLumFilterSize,
formatConvBuffer, formatConvBuffer, pal, 0);
pal, 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW, hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW,
lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize, lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
formatConvBuffer, formatConvBuffer, pal, 1);
pal, 1);
lastInLumBuf++; lastInLumBuf++;
DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n", DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
lumBufIndex, lastInLumBuf); lumBufIndex, lastInLumBuf);
} }
while(lastInChrBuf < lastChrSrcY) { while (lastInChrBuf < lastChrSrcY) {
const uint8_t *src1[4] = { const uint8_t *src1[4] = {
src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0], src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1], src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
...@@ -498,10 +522,10 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -498,10 +522,10 @@ static int swScale(SwsContext *c, const uint8_t* src[],
src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3], src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
}; };
chrBufIndex++; chrBufIndex++;
assert(chrBufIndex < 2*vChrBufSize); assert(chrBufIndex < 2 * vChrBufSize);
assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)); assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
assert(lastInChrBuf + 1 - chrSrcSliceY >= 0); assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
//FIXME replace parameters through context struct (some at least) // FIXME replace parameters through context struct (some at least)
if (c->needs_hcscale) if (c->needs_hcscale)
hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex], hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
...@@ -512,36 +536,42 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -512,36 +536,42 @@ static int swScale(SwsContext *c, const uint8_t* src[],
DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n", DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
chrBufIndex, lastInChrBuf); chrBufIndex, lastInChrBuf);
} }
//wrap buf index around to stay inside the ring buffer // wrap buf index around to stay inside the ring buffer
if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize; if (lumBufIndex >= vLumBufSize)
if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize; lumBufIndex -= vLumBufSize;
if (chrBufIndex >= vChrBufSize)
chrBufIndex -= vChrBufSize;
if (!enough_lines) if (!enough_lines)
break; //we can't output a dstY line so let's try with the next slice break; // we can't output a dstY line so let's try with the next slice
#if HAVE_MMX #if HAVE_MMX
updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf); updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
lastInLumBuf, lastInChrBuf);
#endif #endif
if (should_dither) { if (should_dither) {
c->chrDither8 = dither_8x8_128[chrDstY & 7]; c->chrDither8 = dither_8x8_128[chrDstY & 7];
c->lumDither8 = dither_8x8_128[dstY & 7]; c->lumDither8 = dither_8x8_128[dstY & 7];
} }
if (dstY >= dstH-2) { if (dstY >= dstH - 2) {
// hmm looks like we can't use MMX here without overwriting this array's tail /* hmm looks like we can't use MMX here without overwriting
* this array's tail */
ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX, ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
&yuv2packed1, &yuv2packed2, &yuv2packedX); &yuv2packed1, &yuv2packed2, &yuv2packedX);
use_mmx_vfilter= 0; use_mmx_vfilter= 0;
} }
{ {
const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ?
int16_t *vLumFilter= c->vLumFilter; (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
int16_t *vChrFilter= c->vChrFilter; int16_t *vLumFilter = c->vLumFilter;
int16_t *vChrFilter = c->vChrFilter;
if (isPlanarYUV(dstFormat) || (isGray(dstFormat) && !isALPHA(dstFormat))) { //YV12 like
const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if (isPlanarYUV(dstFormat) ||
(isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like
const int chrSkipMask = (1 << c->chrDstVSubSample) - 1;
vLumFilter += dstY * vLumFilterSize; vLumFilter += dstY * vLumFilterSize;
vChrFilter += chrDstY * vChrFilterSize; vChrFilter += chrDstY * vChrFilterSize;
...@@ -564,53 +594,62 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -564,53 +594,62 @@ static int swScale(SwsContext *c, const uint8_t* src[],
yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0); yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
} else { } else {
yuv2planeX(vLumFilter, vLumFilterSize, yuv2planeX(vLumFilter, vLumFilterSize,
lumSrcPtr, dest[0], dstW, c->lumDither8, 0); lumSrcPtr, dest[0],
dstW, c->lumDither8, 0);
} }
if (!((dstY&chrSkipMask) || isGray(dstFormat))) { if (!((dstY & chrSkipMask) || isGray(dstFormat))) {
if (yuv2nv12cX) { if (yuv2nv12cX) {
yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW); yuv2nv12cX(c, vChrFilter,
vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
dest[1], chrDstW);
} else if (vChrFilterSize == 1) { } else if (vChrFilterSize == 1) {
yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0); yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3); yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
} else { } else {
yuv2planeX(vChrFilter, vChrFilterSize, yuv2planeX(vChrFilter,
chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0); vChrFilterSize, chrUSrcPtr, dest[1],
yuv2planeX(vChrFilter, vChrFilterSize, chrDstW, c->chrDither8, 0);
chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3); yuv2planeX(vChrFilter,
vChrFilterSize, chrVSrcPtr, dest[2],
chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
} }
} }
if (CONFIG_SWSCALE_ALPHA && alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
if(use_mmx_vfilter){ if(use_mmx_vfilter){
vLumFilter= c->alpMmxFilter; vLumFilter= c->alpMmxFilter;
} }
if (vLumFilterSize == 1) { if (vLumFilterSize == 1) {
yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0); yuv2plane1(alpSrcPtr[0], dest[3], dstW,
c->lumDither8, 0);
} else { } else {
yuv2planeX(vLumFilter, vLumFilterSize, yuv2planeX(vLumFilter,
alpSrcPtr, dest[3], dstW, c->lumDither8, 0); vLumFilterSize, alpSrcPtr, dest[3],
dstW, c->lumDither8, 0);
} }
} }
} else { } else {
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize * 2);
assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2); assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize * 2);
if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize <= 2) { //unscaled RGB if (c->yuv2packed1 && vLumFilterSize == 1 &&
vChrFilterSize <= 2) { // unscaled RGB
int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1]; int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr, yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *alpSrcPtr : NULL,
dest[0], dstW, chrAlpha, dstY); dest[0], dstW, chrAlpha, dstY);
} else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB } else if (c->yuv2packed2 && vLumFilterSize == 2 &&
vChrFilterSize == 2) { // bilinear upscale RGB
int lumAlpha = vLumFilter[2 * dstY + 1]; int lumAlpha = vLumFilter[2 * dstY + 1];
int chrAlpha = vChrFilter[2 * dstY + 1]; int chrAlpha = vChrFilter[2 * dstY + 1];
lumMmxFilter[2] = lumMmxFilter[2] =
lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001; lumMmxFilter[3] = vLumFilter[2 * dstY] * 0x10001;
chrMmxFilter[2] = chrMmxFilter[2] =
chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001; chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr, yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
alpPixBuf ? alpSrcPtr : NULL, alpPixBuf ? alpSrcPtr : NULL,
dest[0], dstW, lumAlpha, chrAlpha, dstY); dest[0], dstW, lumAlpha, chrAlpha, dstY);
} else { //general RGB } else { // general RGB
yuv2packedX(c, vLumFilter + dstY * vLumFilterSize, yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
lumSrcPtr, vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter + dstY * vChrFilterSize, vChrFilter + dstY * vChrFilterSize,
...@@ -622,20 +661,20 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -622,20 +661,20 @@ static int swScale(SwsContext *c, const uint8_t* src[],
} }
if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255); fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255);
#if HAVE_MMX2 #if HAVE_MMX2
if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2) if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
__asm__ volatile("sfence":::"memory"); __asm__ volatile ("sfence" ::: "memory");
#endif #endif
emms_c(); emms_c();
/* store changed local vars back in the context */ /* store changed local vars back in the context */
c->dstY= dstY; c->dstY = dstY;
c->lumBufIndex= lumBufIndex; c->lumBufIndex = lumBufIndex;
c->chrBufIndex= chrBufIndex; c->chrBufIndex = chrBufIndex;
c->lastInLumBuf= lastInLumBuf; c->lastInLumBuf = lastInLumBuf;
c->lastInChrBuf= lastInChrBuf; c->lastInChrBuf = lastInChrBuf;
return dstY - lastDstY; return dstY - lastDstY;
} }
...@@ -662,7 +701,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c) ...@@ -662,7 +701,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
c->hyScale = c->hcScale = hScale8To19_c; c->hyScale = c->hcScale = hScale8To19_c;
} }
} else { } else {
c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c; c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c
: hScale16To15_c;
} }
if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
......
...@@ -18,14 +18,15 @@ ...@@ -18,14 +18,15 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#define _SVID_SOURCE //needed for MAP_ANONYMOUS #include "config.h"
#define _SVID_SOURCE // needed for MAP_ANONYMOUS
#define _DARWIN_C_SOURCE // needed for MAP_ANON #define _DARWIN_C_SOURCE // needed for MAP_ANON
#include <assert.h>
#include <inttypes.h> #include <inttypes.h>
#include <string.h>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include "config.h" #include <string.h>
#include <assert.h>
#if HAVE_SYS_MMAN_H #if HAVE_SYS_MMAN_H
#include <sys/mman.h> #include <sys/mman.h>
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS) #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
...@@ -36,18 +37,19 @@ ...@@ -36,18 +37,19 @@
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
#endif #endif
#include "swscale.h"
#include "swscale_internal.h" #include "libavutil/avassert.h"
#include "rgb2rgb.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/cpu.h"
#include "libavutil/avutil.h" #include "libavutil/avutil.h"
#include "libavutil/bswap.h" #include "libavutil/bswap.h"
#include "libavutil/cpu.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h" #include "libavutil/mathematics.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
#include "libavutil/avassert.h" #include "libavutil/x86_cpu.h"
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
unsigned swscale_version(void) unsigned swscale_version(void)
{ {
...@@ -66,98 +68,98 @@ const char *swscale_license(void) ...@@ -66,98 +68,98 @@ const char *swscale_license(void)
return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1; return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
} }
#define RET 0xC3 //near return opcode for x86 #define RET 0xC3 // near return opcode for x86
typedef struct FormatEntry { typedef struct FormatEntry {
int is_supported_in, is_supported_out; int is_supported_in, is_supported_out;
} FormatEntry; } FormatEntry;
static const FormatEntry format_entries[PIX_FMT_NB] = { static const FormatEntry format_entries[PIX_FMT_NB] = {
[PIX_FMT_YUV420P] = { 1 , 1 }, [PIX_FMT_YUV420P] = { 1, 1 },
[PIX_FMT_YUYV422] = { 1 , 1 }, [PIX_FMT_YUYV422] = { 1, 1 },
[PIX_FMT_RGB24] = { 1 , 1 }, [PIX_FMT_RGB24] = { 1, 1 },
[PIX_FMT_BGR24] = { 1 , 1 }, [PIX_FMT_BGR24] = { 1, 1 },
[PIX_FMT_YUV422P] = { 1 , 1 }, [PIX_FMT_YUV422P] = { 1, 1 },
[PIX_FMT_YUV444P] = { 1 , 1 }, [PIX_FMT_YUV444P] = { 1, 1 },
[PIX_FMT_YUV410P] = { 1 , 1 }, [PIX_FMT_YUV410P] = { 1, 1 },
[PIX_FMT_YUV411P] = { 1 , 1 }, [PIX_FMT_YUV411P] = { 1, 1 },
[PIX_FMT_GRAY8] = { 1 , 1 }, [PIX_FMT_GRAY8] = { 1, 1 },
[PIX_FMT_MONOWHITE] = { 1 , 1 }, [PIX_FMT_MONOWHITE] = { 1, 1 },
[PIX_FMT_MONOBLACK] = { 1 , 1 }, [PIX_FMT_MONOBLACK] = { 1, 1 },
[PIX_FMT_PAL8] = { 1 , 0 }, [PIX_FMT_PAL8] = { 1, 0 },
[PIX_FMT_YUVJ420P] = { 1 , 1 }, [PIX_FMT_YUVJ420P] = { 1, 1 },
[PIX_FMT_YUVJ422P] = { 1 , 1 }, [PIX_FMT_YUVJ422P] = { 1, 1 },
[PIX_FMT_YUVJ444P] = { 1 , 1 }, [PIX_FMT_YUVJ444P] = { 1, 1 },
[PIX_FMT_UYVY422] = { 1 , 1 }, [PIX_FMT_UYVY422] = { 1, 1 },
[PIX_FMT_UYYVYY411] = { 0 , 0 }, [PIX_FMT_UYYVYY411] = { 0, 0 },
[PIX_FMT_BGR8] = { 1 , 1 }, [PIX_FMT_BGR8] = { 1, 1 },
[PIX_FMT_BGR4] = { 0 , 1 }, [PIX_FMT_BGR4] = { 0, 1 },
[PIX_FMT_BGR4_BYTE] = { 1 , 1 }, [PIX_FMT_BGR4_BYTE] = { 1, 1 },
[PIX_FMT_RGB8] = { 1 , 1 }, [PIX_FMT_RGB8] = { 1, 1 },
[PIX_FMT_RGB4] = { 0 , 1 }, [PIX_FMT_RGB4] = { 0, 1 },
[PIX_FMT_RGB4_BYTE] = { 1 , 1 }, [PIX_FMT_RGB4_BYTE] = { 1, 1 },
[PIX_FMT_NV12] = { 1 , 1 }, [PIX_FMT_NV12] = { 1, 1 },
[PIX_FMT_NV21] = { 1 , 1 }, [PIX_FMT_NV21] = { 1, 1 },
[PIX_FMT_ARGB] = { 1 , 1 }, [PIX_FMT_ARGB] = { 1, 1 },
[PIX_FMT_RGBA] = { 1 , 1 }, [PIX_FMT_RGBA] = { 1, 1 },
[PIX_FMT_ABGR] = { 1 , 1 }, [PIX_FMT_ABGR] = { 1, 1 },
[PIX_FMT_BGRA] = { 1 , 1 }, [PIX_FMT_BGRA] = { 1, 1 },
[PIX_FMT_0RGB] = { 1 , 1 }, [PIX_FMT_0RGB] = { 1, 1 },
[PIX_FMT_RGB0] = { 1 , 1 }, [PIX_FMT_RGB0] = { 1, 1 },
[PIX_FMT_0BGR] = { 1 , 1 }, [PIX_FMT_0BGR] = { 1, 1 },
[PIX_FMT_BGR0] = { 1 , 1 }, [PIX_FMT_BGR0] = { 1, 1 },
[PIX_FMT_GRAY16BE] = { 1 , 1 }, [PIX_FMT_GRAY16BE] = { 1, 1 },
[PIX_FMT_GRAY16LE] = { 1 , 1 }, [PIX_FMT_GRAY16LE] = { 1, 1 },
[PIX_FMT_YUV440P] = { 1 , 1 }, [PIX_FMT_YUV440P] = { 1, 1 },
[PIX_FMT_YUVJ440P] = { 1 , 1 }, [PIX_FMT_YUVJ440P] = { 1, 1 },
[PIX_FMT_YUVA420P] = { 1 , 1 }, [PIX_FMT_YUVA420P] = { 1, 1 },
[PIX_FMT_YUVA444P] = { 1 , 1 }, [PIX_FMT_YUVA444P] = { 1, 1 },
[PIX_FMT_RGB48BE] = { 1 , 1 }, [PIX_FMT_RGB48BE] = { 1, 1 },
[PIX_FMT_RGB48LE] = { 1 , 1 }, [PIX_FMT_RGB48LE] = { 1, 1 },
[PIX_FMT_RGBA64BE] = { 1 , 0 }, [PIX_FMT_RGBA64BE] = { 1, 0 },
[PIX_FMT_RGBA64LE] = { 1 , 0 }, [PIX_FMT_RGBA64LE] = { 1, 0 },
[PIX_FMT_RGB565BE] = { 1 , 1 }, [PIX_FMT_RGB565BE] = { 1, 1 },
[PIX_FMT_RGB565LE] = { 1 , 1 }, [PIX_FMT_RGB565LE] = { 1, 1 },
[PIX_FMT_RGB555BE] = { 1 , 1 }, [PIX_FMT_RGB555BE] = { 1, 1 },
[PIX_FMT_RGB555LE] = { 1 , 1 }, [PIX_FMT_RGB555LE] = { 1, 1 },
[PIX_FMT_BGR565BE] = { 1 , 1 }, [PIX_FMT_BGR565BE] = { 1, 1 },
[PIX_FMT_BGR565LE] = { 1 , 1 }, [PIX_FMT_BGR565LE] = { 1, 1 },
[PIX_FMT_BGR555BE] = { 1 , 1 }, [PIX_FMT_BGR555BE] = { 1, 1 },
[PIX_FMT_BGR555LE] = { 1 , 1 }, [PIX_FMT_BGR555LE] = { 1, 1 },
[PIX_FMT_YUV420P16LE] = { 1 , 1 }, [PIX_FMT_YUV420P16LE] = { 1, 1 },
[PIX_FMT_YUV420P16BE] = { 1 , 1 }, [PIX_FMT_YUV420P16BE] = { 1, 1 },
[PIX_FMT_YUV422P16LE] = { 1 , 1 }, [PIX_FMT_YUV422P16LE] = { 1, 1 },
[PIX_FMT_YUV422P16BE] = { 1 , 1 }, [PIX_FMT_YUV422P16BE] = { 1, 1 },
[PIX_FMT_YUV444P16LE] = { 1 , 1 }, [PIX_FMT_YUV444P16LE] = { 1, 1 },
[PIX_FMT_YUV444P16BE] = { 1 , 1 }, [PIX_FMT_YUV444P16BE] = { 1, 1 },
[PIX_FMT_RGB444LE] = { 1 , 1 }, [PIX_FMT_RGB444LE] = { 1, 1 },
[PIX_FMT_RGB444BE] = { 1 , 1 }, [PIX_FMT_RGB444BE] = { 1, 1 },
[PIX_FMT_BGR444LE] = { 1 , 1 }, [PIX_FMT_BGR444LE] = { 1, 1 },
[PIX_FMT_BGR444BE] = { 1 , 1 }, [PIX_FMT_BGR444BE] = { 1, 1 },
[PIX_FMT_Y400A] = { 1 , 0 }, [PIX_FMT_Y400A] = { 1, 0 },
[PIX_FMT_BGR48BE] = { 1 , 1 }, [PIX_FMT_BGR48BE] = { 1, 1 },
[PIX_FMT_BGR48LE] = { 1 , 1 }, [PIX_FMT_BGR48LE] = { 1, 1 },
[PIX_FMT_BGRA64BE] = { 0 , 0 }, [PIX_FMT_BGRA64BE] = { 0, 0 },
[PIX_FMT_BGRA64LE] = { 0 , 0 }, [PIX_FMT_BGRA64LE] = { 0, 0 },
[PIX_FMT_YUV420P9BE] = { 1 , 1 }, [PIX_FMT_YUV420P9BE] = { 1, 1 },
[PIX_FMT_YUV420P9LE] = { 1 , 1 }, [PIX_FMT_YUV420P9LE] = { 1, 1 },
[PIX_FMT_YUV420P10BE] = { 1 , 1 }, [PIX_FMT_YUV420P10BE] = { 1, 1 },
[PIX_FMT_YUV420P10LE] = { 1 , 1 }, [PIX_FMT_YUV420P10LE] = { 1, 1 },
[PIX_FMT_YUV422P9BE] = { 1 , 1 }, [PIX_FMT_YUV422P9BE] = { 1, 1 },
[PIX_FMT_YUV422P9LE] = { 1 , 1 }, [PIX_FMT_YUV422P9LE] = { 1, 1 },
[PIX_FMT_YUV422P10BE] = { 1 , 1 }, [PIX_FMT_YUV422P10BE] = { 1, 1 },
[PIX_FMT_YUV422P10LE] = { 1 , 1 }, [PIX_FMT_YUV422P10LE] = { 1, 1 },
[PIX_FMT_YUV444P9BE] = { 1 , 1 }, [PIX_FMT_YUV444P9BE] = { 1, 1 },
[PIX_FMT_YUV444P9LE] = { 1 , 1 }, [PIX_FMT_YUV444P9LE] = { 1, 1 },
[PIX_FMT_YUV444P10BE] = { 1 , 1 }, [PIX_FMT_YUV444P10BE] = { 1, 1 },
[PIX_FMT_YUV444P10LE] = { 1 , 1 }, [PIX_FMT_YUV444P10LE] = { 1, 1 },
[PIX_FMT_GBRP] = { 1 , 0 }, [PIX_FMT_GBRP] = { 1, 0 },
[PIX_FMT_GBRP9LE] = { 1 , 0 }, [PIX_FMT_GBRP9LE] = { 1, 0 },
[PIX_FMT_GBRP9BE] = { 1 , 0 }, [PIX_FMT_GBRP9BE] = { 1, 0 },
[PIX_FMT_GBRP10LE] = { 1 , 0 }, [PIX_FMT_GBRP10LE] = { 1, 0 },
[PIX_FMT_GBRP10BE] = { 1 , 0 }, [PIX_FMT_GBRP10BE] = { 1, 0 },
[PIX_FMT_GBRP16LE] = { 1 , 0 }, [PIX_FMT_GBRP16LE] = { 1, 0 },
[PIX_FMT_GBRP16BE] = { 1 , 0 }, [PIX_FMT_GBRP16BE] = { 1, 0 },
}; };
int sws_isSupportedInput(enum PixelFormat pix_fmt) int sws_isSupportedInput(enum PixelFormat pix_fmt)
...@@ -181,261 +183,301 @@ const char *sws_format_name(enum PixelFormat format) ...@@ -181,261 +183,301 @@ const char *sws_format_name(enum PixelFormat format)
} }
#endif #endif
static double getSplineCoeff(double a, double b, double c, double d, double dist) static double getSplineCoeff(double a, double b, double c, double d,
double dist)
{ {
if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a; if (dist <= 1.0)
else return getSplineCoeff( 0.0, return ((d * dist + c) * dist + b) * dist + a;
b+ 2.0*c + 3.0*d, else
c + 3.0*d, return getSplineCoeff(0.0,
-b- 3.0*c - 6.0*d, b + 2.0 * c + 3.0 * d,
dist-1.0); c + 3.0 * d,
-b - 3.0 * c - 6.0 * d,
dist - 1.0);
} }
static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSize, int xInc, static int initFilter(int16_t **outFilter, int32_t **filterPos,
int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags, int *outFilterSize, int xInc, int srcW, int dstW,
SwsVector *srcFilter, SwsVector *dstFilter, double param[2]) int filterAlign, int one, int flags, int cpu_flags,
SwsVector *srcFilter, SwsVector *dstFilter,
double param[2])
{ {
int i; int i;
int filterSize; int filterSize;
int filter2Size; int filter2Size;
int minFilterSize; int minFilterSize;
int64_t *filter=NULL; int64_t *filter = NULL;
int64_t *filter2=NULL; int64_t *filter2 = NULL;
const int64_t fone= 1LL<<54; const int64_t fone = 1LL << 54;
int ret= -1; int ret = -1;
emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions) emms_c(); // FIXME should not be required but IS (even for non-MMX versions)
// NOTE: the +3 is for the MMX(+1)/SSE(+3) scaler which reads over the end // NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(**filterPos), fail); FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW + 3) * sizeof(**filterPos), fail);
if (FFABS(xInc - 0x10000) <10) { // unscaled if (FFABS(xInc - 0x10000) < 10) { // unscaled
int i; int i;
filterSize= 1; filterSize = 1;
FF_ALLOCZ_OR_GOTO(NULL, filter, dstW*sizeof(*filter)*filterSize, fail); FF_ALLOCZ_OR_GOTO(NULL, filter,
dstW * sizeof(*filter) * filterSize, fail);
for (i=0; i<dstW; i++) { for (i = 0; i < dstW; i++) {
filter[i*filterSize]= fone; filter[i * filterSize] = fone;
(*filterPos)[i]=i; (*filterPos)[i] = i;
} }
} else if (flags & SWS_POINT) { // lame looking point sampling mode
} else if (flags&SWS_POINT) { // lame looking point sampling mode
int i; int i;
int64_t xDstInSrc; int64_t xDstInSrc;
filterSize= 1; filterSize = 1;
FF_ALLOC_OR_GOTO(NULL, filter, dstW*sizeof(*filter)*filterSize, fail); FF_ALLOC_OR_GOTO(NULL, filter,
dstW * sizeof(*filter) * filterSize, fail);
xDstInSrc= xInc/2 - 0x8000; xDstInSrc = xInc / 2 - 0x8000;
for (i=0; i<dstW; i++) { for (i = 0; i < dstW; i++) {
int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16; int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
(*filterPos)[i]= xx; (*filterPos)[i] = xx;
filter[i]= fone; filter[i] = fone;
xDstInSrc+= xInc; xDstInSrc += xInc;
} }
} else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) { // bilinear upscale } else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) ||
(flags & SWS_FAST_BILINEAR)) { // bilinear upscale
int i; int i;
int64_t xDstInSrc; int64_t xDstInSrc;
filterSize= 2; filterSize = 2;
FF_ALLOC_OR_GOTO(NULL, filter, dstW*sizeof(*filter)*filterSize, fail); FF_ALLOC_OR_GOTO(NULL, filter,
dstW * sizeof(*filter) * filterSize, fail);
xDstInSrc= xInc/2 - 0x8000; xDstInSrc = xInc / 2 - 0x8000;
for (i=0; i<dstW; i++) { for (i = 0; i < dstW; i++) {
int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16; int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
int j; int j;
(*filterPos)[i]= xx; (*filterPos)[i] = xx;
//bilinear upscale / linear interpolate / area averaging // bilinear upscale / linear interpolate / area averaging
for (j=0; j<filterSize; j++) { for (j = 0; j < filterSize; j++) {
int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16); int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
if (coeff<0) coeff=0; if (coeff < 0)
filter[i*filterSize + j]= coeff; coeff = 0;
filter[i * filterSize + j] = coeff;
xx++; xx++;
} }
xDstInSrc+= xInc; xDstInSrc += xInc;
} }
} else { } else {
int64_t xDstInSrc; int64_t xDstInSrc;
int sizeFactor; int sizeFactor;
if (flags&SWS_BICUBIC) sizeFactor= 4; if (flags & SWS_BICUBIC)
else if (flags&SWS_X) sizeFactor= 8; sizeFactor = 4;
else if (flags&SWS_AREA) sizeFactor= 1; //downscale only, for upscale it is bilinear else if (flags & SWS_X)
else if (flags&SWS_GAUSS) sizeFactor= 8; // infinite ;) sizeFactor = 8;
else if (flags&SWS_LANCZOS) sizeFactor= param[0] != SWS_PARAM_DEFAULT ? ceil(2*param[0]) : 6; else if (flags & SWS_AREA)
else if (flags&SWS_SINC) sizeFactor= 20; // infinite ;) sizeFactor = 1; // downscale only, for upscale it is bilinear
else if (flags&SWS_SPLINE) sizeFactor= 20; // infinite ;) else if (flags & SWS_GAUSS)
else if (flags&SWS_BILINEAR) sizeFactor= 2; sizeFactor = 8; // infinite ;)
else if (flags & SWS_LANCZOS)
sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
else if (flags & SWS_SINC)
sizeFactor = 20; // infinite ;)
else if (flags & SWS_SPLINE)
sizeFactor = 20; // infinite ;)
else if (flags & SWS_BILINEAR)
sizeFactor = 2;
else { else {
sizeFactor= 0; //GCC warning killer sizeFactor = 0; // GCC warning killer
assert(0); assert(0);
} }
if (xInc <= 1<<16) filterSize= 1 + sizeFactor; // upscale if (xInc <= 1 << 16)
else filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW; filterSize = 1 + sizeFactor; // upscale
else
filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
filterSize = FFMIN(filterSize, srcW - 2); filterSize = FFMIN(filterSize, srcW - 2);
filterSize = FFMAX(filterSize, 1); filterSize = FFMAX(filterSize, 1);
FF_ALLOC_OR_GOTO(NULL, filter, dstW*sizeof(*filter)*filterSize, fail); FF_ALLOC_OR_GOTO(NULL, filter,
dstW * sizeof(*filter) * filterSize, fail);
xDstInSrc= xInc - 0x10000; xDstInSrc = xInc - 0x10000;
for (i=0; i<dstW; i++) { for (i = 0; i < dstW; i++) {
int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17); int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17);
int j; int j;
(*filterPos)[i]= xx; (*filterPos)[i] = xx;
for (j=0; j<filterSize; j++) { for (j = 0; j < filterSize; j++) {
int64_t d= (FFABS(((int64_t)xx<<17) - xDstInSrc))<<13; int64_t d = (FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13;
double floatd; double floatd;
int64_t coeff; int64_t coeff;
if (xInc > 1<<16) if (xInc > 1 << 16)
d= d*dstW/srcW; d = d * dstW / srcW;
floatd= d * (1.0/(1<<30)); floatd = d * (1.0 / (1 << 30));
if (flags & SWS_BICUBIC) { if (flags & SWS_BICUBIC) {
int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24); int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1 << 24);
int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24); int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24);
if (d >= 1LL<<31) { if (d >= 1LL << 31) {
coeff = 0.0; coeff = 0.0;
} else { } else {
int64_t dd = (d * d) >> 30; int64_t dd = (d * d) >> 30;
int64_t ddd = (dd * d) >> 30; int64_t ddd = (dd * d) >> 30;
if (d < 1LL<<30) if (d < 1LL << 30)
coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30); coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd +
(-18 * (1 << 24) + 12 * B + 6 * C) * dd +
(6 * (1 << 24) - 2 * B) * (1 << 30);
else else
coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30); coeff = (-B - 6 * C) * ddd +
(6 * B + 30 * C) * dd +
(-12 * B - 48 * C) * d +
(8 * B + 24 * C) * (1 << 30);
} }
coeff *= fone>>(30+24); coeff *= fone >> (30 + 24);
} }
/* else if (flags & SWS_X) { #if 0
double p= param ? param*0.01 : 0.3;
coeff = d ? sin(d*M_PI)/(d*M_PI) : 1.0;
coeff*= pow(2.0, - p*d*d);
}*/
else if (flags & SWS_X) { else if (flags & SWS_X) {
double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0; double p = param ? param * 0.01 : 0.3;
coeff = d ? sin(d * M_PI) / (d * M_PI) : 1.0;
coeff *= pow(2.0, -p * d * d);
}
#endif
else if (flags & SWS_X) {
double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
double c; double c;
if (floatd<1.0) if (floatd < 1.0)
c = cos(floatd*M_PI); c = cos(floatd * M_PI);
else
c = -1.0;
if (c < 0.0)
c = -pow(-c, A);
else else
c=-1.0; c = pow(c, A);
if (c<0.0) c= -pow(-c, A); coeff = (c * 0.5 + 0.5) * fone;
else c= pow( c, A);
coeff= (c*0.5 + 0.5)*fone;
} else if (flags & SWS_AREA) { } else if (flags & SWS_AREA) {
int64_t d2= d - (1<<29); int64_t d2 = d - (1 << 29);
if (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16)); if (d2 * xInc < -(1LL << (29 + 16)))
else if (d2*xInc < (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16)); coeff = 1.0 * (1LL << (30 + 16));
else coeff=0.0; else if (d2 * xInc < (1LL << (29 + 16)))
coeff *= fone>>(30+16); coeff = -d2 * xInc + (1LL << (29 + 16));
else
coeff = 0.0;
coeff *= fone >> (30 + 16);
} else if (flags & SWS_GAUSS) { } else if (flags & SWS_GAUSS) {
double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = (pow(2.0, - p*floatd*floatd))*fone; coeff = (pow(2.0, -p * floatd * floatd)) * fone;
} else if (flags & SWS_SINC) { } else if (flags & SWS_SINC) {
coeff = (d ? sin(floatd*M_PI)/(floatd*M_PI) : 1.0)*fone; coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone;
} else if (flags & SWS_LANCZOS) { } else if (flags & SWS_LANCZOS) {
double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = (d ? sin(floatd*M_PI)*sin(floatd*M_PI/p)/(floatd*floatd*M_PI*M_PI/p) : 1.0)*fone; coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) /
if (floatd>p) coeff=0; (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
if (floatd > p)
coeff = 0;
} else if (flags & SWS_BILINEAR) { } else if (flags & SWS_BILINEAR) {
coeff= (1<<30) - d; coeff = (1 << 30) - d;
if (coeff<0) coeff=0; if (coeff < 0)
coeff = 0;
coeff *= fone >> 30; coeff *= fone >> 30;
} else if (flags & SWS_SPLINE) { } else if (flags & SWS_SPLINE) {
double p=-2.196152422706632; double p = -2.196152422706632;
coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone; coeff = getSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone;
} else { } else {
coeff= 0.0; //GCC warning killer coeff = 0.0; // GCC warning killer
assert(0); assert(0);
} }
filter[i*filterSize + j]= coeff; filter[i * filterSize + j] = coeff;
xx++; xx++;
} }
xDstInSrc+= 2*xInc; xDstInSrc += 2 * xInc;
} }
} }
/* apply src & dst Filter to filter -> filter2 /* apply src & dst Filter to filter -> filter2
av_free(filter); * av_free(filter);
*/ */
assert(filterSize>0); assert(filterSize > 0);
filter2Size= filterSize; filter2Size = filterSize;
if (srcFilter) filter2Size+= srcFilter->length - 1; if (srcFilter)
if (dstFilter) filter2Size+= dstFilter->length - 1; filter2Size += srcFilter->length - 1;
assert(filter2Size>0); if (dstFilter)
FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size*dstW*sizeof(*filter2), fail); filter2Size += dstFilter->length - 1;
assert(filter2Size > 0);
for (i=0; i<dstW; i++) { FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size * dstW * sizeof(*filter2), fail);
for (i = 0; i < dstW; i++) {
int j, k; int j, k;
if(srcFilter) { if (srcFilter) {
for (k=0; k<srcFilter->length; k++) { for (k = 0; k < srcFilter->length; k++) {
for (j=0; j<filterSize; j++) for (j = 0; j < filterSize; j++)
filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j]; filter2[i * filter2Size + k + j] +=
srcFilter->coeff[k] * filter[i * filterSize + j];
} }
} else { } else {
for (j=0; j<filterSize; j++) for (j = 0; j < filterSize; j++)
filter2[i*filter2Size + j]= filter[i*filterSize + j]; filter2[i * filter2Size + j] = filter[i * filterSize + j];
} }
//FIXME dstFilter // FIXME dstFilter
(*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2; (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
} }
av_freep(&filter); av_freep(&filter);
/* try to reduce the filter-size (step1 find size and shift left) */ /* try to reduce the filter-size (step1 find size and shift left) */
// Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not). // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
minFilterSize= 0; minFilterSize = 0;
for (i=dstW-1; i>=0; i--) { for (i = dstW - 1; i >= 0; i--) {
int min= filter2Size; int min = filter2Size;
int j; int j;
int64_t cutOff=0.0; int64_t cutOff = 0.0;
/* get rid of near zero elements on the left by shifting left */ /* get rid of near zero elements on the left by shifting left */
for (j=0; j<filter2Size; j++) { for (j = 0; j < filter2Size; j++) {
int k; int k;
cutOff += FFABS(filter2[i*filter2Size]); cutOff += FFABS(filter2[i * filter2Size]);
if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break; if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
break;
/* preserve monotonicity because the core can't handle the filter otherwise */ /* preserve monotonicity because the core can't handle the
if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break; * filter otherwise */
if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
break;
// move filter coefficients left // move filter coefficients left
for (k=1; k<filter2Size; k++) for (k = 1; k < filter2Size; k++)
filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k]; filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
filter2[i*filter2Size + k - 1]= 0; filter2[i * filter2Size + k - 1] = 0;
(*filterPos)[i]++; (*filterPos)[i]++;
} }
cutOff=0; cutOff = 0;
/* count near zeros on the right */ /* count near zeros on the right */
for (j=filter2Size-1; j>0; j--) { for (j = filter2Size - 1; j > 0; j--) {
cutOff += FFABS(filter2[i*filter2Size + j]); cutOff += FFABS(filter2[i * filter2Size + j]);
if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break; if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
break;
min--; min--;
} }
if (min>minFilterSize) minFilterSize= min; if (min > minFilterSize)
minFilterSize = min;
} }
if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) { if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) {
// we can handle the special case 4, // we can handle the special case 4, so we don't want to go the full 8
// so we don't want to go to the full 8
if (minFilterSize < 5) if (minFilterSize < 5)
filterAlign = 4; filterAlign = 4;
// We really don't want to waste our time /* We really don't want to waste our time doing useless computation, so
// doing useless computation, so fall back on * fall back on the scalar C code for very small filters.
// the scalar C code for very small filters. * Vectorizing is worth it only if you have a decent-sized vector. */
// Vectorizing is worth it only if you have a
// decent-sized vector.
if (minFilterSize < 3) if (minFilterSize < 3)
filterAlign = 1; filterAlign = 1;
} }
...@@ -443,53 +485,58 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSi ...@@ -443,53 +485,58 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSi
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
// special case for unscaled vertical filtering // special case for unscaled vertical filtering
if (minFilterSize == 1 && filterAlign == 2) if (minFilterSize == 1 && filterAlign == 2)
filterAlign= 1; filterAlign = 1;
} }
assert(minFilterSize > 0); assert(minFilterSize > 0);
filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1)); filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
assert(filterSize > 0); assert(filterSize > 0);
filter= av_malloc(filterSize*dstW*sizeof(*filter)); filter = av_malloc(filterSize * dstW * sizeof(*filter));
if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter) if (filterSize >= MAX_FILTER_SIZE * 16 /
((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
goto fail; goto fail;
*outFilterSize= filterSize; *outFilterSize = filterSize;
if (flags&SWS_PRINT_INFO) if (flags & SWS_PRINT_INFO)
av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize); av_log(NULL, AV_LOG_VERBOSE,
"SwScaler: reducing / aligning filtersize %d -> %d\n",
filter2Size, filterSize);
/* try to reduce the filter-size (step2 reduce it) */ /* try to reduce the filter-size (step2 reduce it) */
for (i=0; i<dstW; i++) { for (i = 0; i < dstW; i++) {
int j; int j;
for (j=0; j<filterSize; j++) { for (j = 0; j < filterSize; j++) {
if (j>=filter2Size) filter[i*filterSize + j]= 0; if (j >= filter2Size)
else filter[i*filterSize + j]= filter2[i*filter2Size + j]; filter[i * filterSize + j] = 0;
if((flags & SWS_BITEXACT) && j>=minFilterSize) else
filter[i*filterSize + j]= 0; filter[i * filterSize + j] = filter2[i * filter2Size + j];
if ((flags & SWS_BITEXACT) && j >= minFilterSize)
filter[i * filterSize + j] = 0;
} }
} }
//FIXME try to align filterPos if possible // FIXME try to align filterPos if possible
//fix borders // fix borders
for (i=0; i<dstW; i++) { for (i = 0; i < dstW; i++) {
int j; int j;
if ((*filterPos)[i] < 0) { if ((*filterPos)[i] < 0) {
// move filter coefficients left to compensate for filterPos // move filter coefficients left to compensate for filterPos
for (j=1; j<filterSize; j++) { for (j = 1; j < filterSize; j++) {
int left= FFMAX(j + (*filterPos)[i], 0); int left = FFMAX(j + (*filterPos)[i], 0);
filter[i*filterSize + left] += filter[i*filterSize + j]; filter[i * filterSize + left] += filter[i * filterSize + j];
filter[i*filterSize + j]=0; filter[i * filterSize + j] = 0;
} }
(*filterPos)[i]= 0; (*filterPos)[i]= 0;
} }
if ((*filterPos)[i] + filterSize > srcW) { if ((*filterPos)[i] + filterSize > srcW) {
int shift= (*filterPos)[i] + filterSize - srcW; int shift = (*filterPos)[i] + filterSize - srcW;
// move filter coefficients right to compensate for filterPos // move filter coefficients right to compensate for filterPos
for (j=filterSize-2; j>=0; j--) { for (j = filterSize - 2; j >= 0; j--) {
int right= FFMIN(j + shift, filterSize-1); int right = FFMIN(j + shift, filterSize - 1);
filter[i*filterSize +right] += filter[i*filterSize +j]; filter[i * filterSize + right] += filter[i * filterSize + j];
filter[i*filterSize +j]=0; filter[i * filterSize + j] = 0;
} }
(*filterPos)[i]= srcW - filterSize; (*filterPos)[i]= srcW - filterSize;
} }
...@@ -497,37 +544,40 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSi ...@@ -497,37 +544,40 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSi
// Note the +1 is for the MMX scaler which reads over the end // Note the +1 is for the MMX scaler which reads over the end
/* align at 16 for AltiVec (needed by hScale_altivec_real) */ /* align at 16 for AltiVec (needed by hScale_altivec_real) */
FF_ALLOCZ_OR_GOTO(NULL, *outFilter, *outFilterSize*(dstW+3)*sizeof(int16_t), fail); FF_ALLOCZ_OR_GOTO(NULL, *outFilter,
*outFilterSize * (dstW + 3) * sizeof(int16_t), fail);
/* normalize & store in outFilter */ /* normalize & store in outFilter */
for (i=0; i<dstW; i++) { for (i = 0; i < dstW; i++) {
int j; int j;
int64_t error=0; int64_t error = 0;
int64_t sum=0; int64_t sum = 0;
for (j=0; j<filterSize; j++) { for (j = 0; j < filterSize; j++) {
sum+= filter[i*filterSize + j]; sum += filter[i * filterSize + j];
} }
sum= (sum + one/2)/ one; sum = (sum + one / 2) / one;
for (j=0; j<*outFilterSize; j++) { for (j = 0; j < *outFilterSize; j++) {
int64_t v= filter[i*filterSize + j] + error; int64_t v = filter[i * filterSize + j] + error;
int intV= ROUNDED_DIV(v, sum); int intV = ROUNDED_DIV(v, sum);
(*outFilter)[i*(*outFilterSize) + j]= intV; (*outFilter)[i * (*outFilterSize) + j] = intV;
error= v - intV*sum; error = v - intV * sum;
} }
} }
(*filterPos)[dstW+0] = (*filterPos)[dstW + 0] =
(*filterPos)[dstW+1] = (*filterPos)[dstW + 1] =
(*filterPos)[dstW+2] = (*filterPos)[dstW-1]; // the MMX/SSE scaler will read over the end (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1]; /* the MMX/SSE scaler will
for (i=0; i<*outFilterSize; i++) { * read over the end */
int k= (dstW - 1) * (*outFilterSize) + i; for (i = 0; i < *outFilterSize; i++) {
int k = (dstW - 1) * (*outFilterSize) + i;
(*outFilter)[k + 1 * (*outFilterSize)] = (*outFilter)[k + 1 * (*outFilterSize)] =
(*outFilter)[k + 2 * (*outFilterSize)] = (*outFilter)[k + 2 * (*outFilterSize)] =
(*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k]; (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
} }
ret=0; ret = 0;
fail: fail:
av_free(filter); av_free(filter);
av_free(filter2); av_free(filter2);
...@@ -535,7 +585,8 @@ fail: ...@@ -535,7 +585,8 @@ fail:
} }
#if HAVE_MMX2 #if HAVE_MMX2
static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *filter, int32_t *filterPos, int numSplits) static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
int16_t *filter, int32_t *filterPos, int numSplits)
{ {
uint8_t *fragmentA; uint8_t *fragmentA;
x86_reg imm8OfPShufW1A; x86_reg imm8OfPShufW1A;
...@@ -550,16 +601,15 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil ...@@ -550,16 +601,15 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil
int xpos, i; int xpos, i;
// create an optimized horizontal scaling routine // create an optimized horizontal scaling routine
/* This scaler is made of runtime-generated MMX2 code using specially /* This scaler is made of runtime-generated MMX2 code using specially tuned
* tuned pshufw instructions. For every four output pixels, if four * pshufw instructions. For every four output pixels, if four input pixels
* input pixels are enough for the fast bilinear scaling, then a chunk * are enough for the fast bilinear scaling, then a chunk of fragmentB is
* of fragmentB is used. If five input pixels are needed, then a chunk * used. If five input pixels are needed, then a chunk of fragmentA is used.
* of fragmentA is used.
*/ */
//code fragment // code fragment
__asm__ volatile( __asm__ volatile (
"jmp 9f \n\t" "jmp 9f \n\t"
// Begin // Begin
"0: \n\t" "0: \n\t"
...@@ -583,7 +633,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil ...@@ -583,7 +633,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil
"add $8, %%"REG_a" \n\t" "add $8, %%"REG_a" \n\t"
// End // End
"9: \n\t" "9: \n\t"
// "int $3 \n\t" // "int $3 \n\t"
"lea " LOCAL_MANGLE(0b) ", %0 \n\t" "lea " LOCAL_MANGLE(0b) ", %0 \n\t"
"lea " LOCAL_MANGLE(1b) ", %1 \n\t" "lea " LOCAL_MANGLE(1b) ", %1 \n\t"
"lea " LOCAL_MANGLE(2b) ", %2 \n\t" "lea " LOCAL_MANGLE(2b) ", %2 \n\t"
...@@ -595,11 +645,11 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil ...@@ -595,11 +645,11 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil
"sub %0, %3 \n\t" "sub %0, %3 \n\t"
:"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A), : "=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
"=r" (fragmentLengthA) "=r" (fragmentLengthA)
); );
__asm__ volatile( __asm__ volatile (
"jmp 9f \n\t" "jmp 9f \n\t"
// Begin // Begin
"0: \n\t" "0: \n\t"
...@@ -621,7 +671,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil ...@@ -621,7 +671,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil
"add $8, %%"REG_a" \n\t" "add $8, %%"REG_a" \n\t"
// End // End
"9: \n\t" "9: \n\t"
// "int $3 \n\t" // "int $3 \n\t"
"lea " LOCAL_MANGLE(0b) ", %0 \n\t" "lea " LOCAL_MANGLE(0b) ", %0 \n\t"
"lea " LOCAL_MANGLE(1b) ", %1 \n\t" "lea " LOCAL_MANGLE(1b) ", %1 \n\t"
"lea " LOCAL_MANGLE(2b) ", %2 \n\t" "lea " LOCAL_MANGLE(2b) ", %2 \n\t"
...@@ -633,62 +683,67 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil ...@@ -633,62 +683,67 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil
"sub %0, %3 \n\t" "sub %0, %3 \n\t"
:"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B), : "=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
"=r" (fragmentLengthB) "=r" (fragmentLengthB)
); );
xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers xpos = 0; // lumXInc/2 - 0x8000; // difference between pixel centers
fragmentPos=0; fragmentPos = 0;
for (i=0; i<dstW/numSplits; i++) { for (i = 0; i < dstW / numSplits; i++) {
int xx=xpos>>16; int xx = xpos >> 16;
if ((i&3) == 0) { if ((i & 3) == 0) {
int a=0; int a = 0;
int b=((xpos+xInc)>>16) - xx; int b = ((xpos + xInc) >> 16) - xx;
int c=((xpos+xInc*2)>>16) - xx; int c = ((xpos + xInc * 2) >> 16) - xx;
int d=((xpos+xInc*3)>>16) - xx; int d = ((xpos + xInc * 3) >> 16) - xx;
int inc = (d+1<4); int inc = (d + 1 < 4);
uint8_t *fragment = (d+1<4) ? fragmentB : fragmentA; uint8_t *fragment = (d + 1 < 4) ? fragmentB : fragmentA;
x86_reg imm8OfPShufW1 = (d+1<4) ? imm8OfPShufW1B : imm8OfPShufW1A; x86_reg imm8OfPShufW1 = (d + 1 < 4) ? imm8OfPShufW1B : imm8OfPShufW1A;
x86_reg imm8OfPShufW2 = (d+1<4) ? imm8OfPShufW2B : imm8OfPShufW2A; x86_reg imm8OfPShufW2 = (d + 1 < 4) ? imm8OfPShufW2B : imm8OfPShufW2A;
x86_reg fragmentLength = (d+1<4) ? fragmentLengthB : fragmentLengthA; x86_reg fragmentLength = (d + 1 < 4) ? fragmentLengthB : fragmentLengthA;
int maxShift= 3-(d+inc); int maxShift = 3 - (d + inc);
int shift=0; int shift = 0;
if (filterCode) { if (filterCode) {
filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9; filter[i] = ((xpos & 0xFFFF) ^ 0xFFFF) >> 9;
filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9; filter[i + 1] = (((xpos + xInc) & 0xFFFF) ^ 0xFFFF) >> 9;
filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9; filter[i + 2] = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9;
filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9; filter[i + 3] = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9;
filterPos[i/2]= xx; filterPos[i / 2] = xx;
memcpy(filterCode + fragmentPos, fragment, fragmentLength); memcpy(filterCode + fragmentPos, fragment, fragmentLength);
filterCode[fragmentPos + imm8OfPShufW1]= filterCode[fragmentPos + imm8OfPShufW1] = (a + inc) |
(a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6); ((b + inc) << 2) |
filterCode[fragmentPos + imm8OfPShufW2]= ((c + inc) << 4) |
a | (b<<2) | (c<<4) | (d<<6); ((d + inc) << 6);
filterCode[fragmentPos + imm8OfPShufW2] = a | (b << 2) |
(c << 4) |
(d << 6);
if (i+4-inc>=dstW) shift=maxShift; //avoid overread if (i + 4 - inc >= dstW)
else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align shift = maxShift; // avoid overread
else if ((filterPos[i / 2] & 3) <= maxShift)
shift = filterPos[i / 2] & 3; // align
if (shift && i>=shift) { if (shift && i >= shift) {
filterCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift; filterCode[fragmentPos + imm8OfPShufW1] += 0x55 * shift;
filterCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift; filterCode[fragmentPos + imm8OfPShufW2] += 0x55 * shift;
filterPos[i/2]-=shift; filterPos[i / 2] -= shift;
} }
} }
fragmentPos+= fragmentLength; fragmentPos += fragmentLength;
if (filterCode) if (filterCode)
filterCode[fragmentPos]= RET; filterCode[fragmentPos] = RET;
} }
xpos+=xInc; xpos += xInc;
} }
if (filterCode) if (filterCode)
filterPos[((i/2)+1)&(~1)]= xpos>>16; // needed to jump to the next part filterPos[((i / 2) + 1) & (~1)] = xpos >> 16; // needed to jump to the next part
return fragmentPos + 1; return fragmentPos + 1;
} }
...@@ -704,24 +759,27 @@ int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], ...@@ -704,24 +759,27 @@ int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
int srcRange, const int table[4], int dstRange, int srcRange, const int table[4], int dstRange,
int brightness, int contrast, int saturation) int brightness, int contrast, int saturation)
{ {
memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4); memcpy(c->srcColorspaceTable, inv_table, sizeof(int) * 4);
memcpy(c->dstColorspaceTable, table, sizeof(int)*4); memcpy(c->dstColorspaceTable, table, sizeof(int) * 4);
c->brightness= brightness; c->brightness = brightness;
c->contrast = contrast; c->contrast = contrast;
c->saturation= saturation; c->saturation = saturation;
c->srcRange = srcRange; c->srcRange = srcRange;
c->dstRange = dstRange; c->dstRange = dstRange;
if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; if (isYUV(c->dstFormat) || isGray(c->dstFormat))
return -1;
c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->dstFormat]); c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->dstFormat]);
c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->srcFormat]); c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->srcFormat]);
ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation); ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness,
//FIXME factorize contrast, saturation);
// FIXME factorize
if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)
ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, saturation); ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness,
contrast, saturation);
return 0; return 0;
} }
...@@ -729,15 +787,16 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, ...@@ -729,15 +787,16 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
int *srcRange, int **table, int *dstRange, int *srcRange, int **table, int *dstRange,
int *brightness, int *contrast, int *saturation) int *brightness, int *contrast, int *saturation)
{ {
if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat))
return -1;
*inv_table = c->srcColorspaceTable; *inv_table = c->srcColorspaceTable;
*table = c->dstColorspaceTable; *table = c->dstColorspaceTable;
*srcRange = c->srcRange; *srcRange = c->srcRange;
*dstRange = c->dstRange; *dstRange = c->dstRange;
*brightness= c->brightness; *brightness = c->brightness;
*contrast = c->contrast; *contrast = c->contrast;
*saturation= c->saturation; *saturation = c->saturation;
return 0; return 0;
} }
...@@ -745,11 +804,20 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, ...@@ -745,11 +804,20 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
static int handle_jpeg(enum PixelFormat *format) static int handle_jpeg(enum PixelFormat *format)
{ {
switch (*format) { switch (*format) {
case PIX_FMT_YUVJ420P: *format = PIX_FMT_YUV420P; return 1; case PIX_FMT_YUVJ420P:
case PIX_FMT_YUVJ422P: *format = PIX_FMT_YUV422P; return 1; *format = PIX_FMT_YUV420P;
case PIX_FMT_YUVJ444P: *format = PIX_FMT_YUV444P; return 1; return 1;
case PIX_FMT_YUVJ440P: *format = PIX_FMT_YUV440P; return 1; case PIX_FMT_YUVJ422P:
default: return 0; *format = PIX_FMT_YUV422P;
return 1;
case PIX_FMT_YUVJ444P:
*format = PIX_FMT_YUV444P;
return 1;
case PIX_FMT_YUVJ440P:
*format = PIX_FMT_YUV440P;
return 1;
default:
return 0;
} }
} }
...@@ -766,7 +834,7 @@ static int handle_0alpha(enum PixelFormat *format) ...@@ -766,7 +834,7 @@ static int handle_0alpha(enum PixelFormat *format)
SwsContext *sws_alloc_context(void) SwsContext *sws_alloc_context(void)
{ {
SwsContext *c= av_mallocz(sizeof(SwsContext)); SwsContext *c = av_mallocz(sizeof(SwsContext));
c->av_class = &sws_context_class; c->av_class = &sws_context_class;
av_opt_set_defaults(c); av_opt_set_defaults(c);
...@@ -779,20 +847,21 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) ...@@ -779,20 +847,21 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
int i, j; int i, j;
int usesVFilter, usesHFilter; int usesVFilter, usesHFilter;
int unscaled; int unscaled;
SwsFilter dummyFilter= {NULL, NULL, NULL, NULL}; SwsFilter dummyFilter = { NULL, NULL, NULL, NULL };
int srcW= c->srcW; int srcW = c->srcW;
int srcH= c->srcH; int srcH = c->srcH;
int dstW= c->dstW; int dstW = c->dstW;
int dstH= c->dstH; int dstH = c->dstH;
int dst_stride = FFALIGN(dstW * sizeof(int16_t)+66, 16); int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 66, 16);
int flags, cpu_flags; int flags, cpu_flags;
enum PixelFormat srcFormat= c->srcFormat; enum PixelFormat srcFormat = c->srcFormat;
enum PixelFormat dstFormat= c->dstFormat; enum PixelFormat dstFormat = c->dstFormat;
cpu_flags = av_get_cpu_flags(); cpu_flags = av_get_cpu_flags();
flags = c->flags; flags = c->flags;
emms_c(); emms_c();
if (!rgb15to16) sws_rgb2rgb_init(); if (!rgb15to16)
sws_rgb2rgb_init();
unscaled = (srcW == dstW && srcH == dstH); unscaled = (srcW == dstW && srcH == dstH);
...@@ -808,53 +877,59 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) ...@@ -808,53 +877,59 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
} }
if (!sws_isSupportedInput(srcFormat)) { if (!sws_isSupportedInput(srcFormat)) {
av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", av_get_pix_fmt_name(srcFormat)); av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n",
av_get_pix_fmt_name(srcFormat));
return AVERROR(EINVAL); return AVERROR(EINVAL);
} }
if (!sws_isSupportedOutput(dstFormat)) { if (!sws_isSupportedOutput(dstFormat)) {
av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", av_get_pix_fmt_name(dstFormat)); av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n",
av_get_pix_fmt_name(dstFormat));
return AVERROR(EINVAL); return AVERROR(EINVAL);
} }
i= flags & ( SWS_POINT i = flags & (SWS_POINT |
|SWS_AREA SWS_AREA |
|SWS_BILINEAR SWS_BILINEAR |
|SWS_FAST_BILINEAR SWS_FAST_BILINEAR |
|SWS_BICUBIC SWS_BICUBIC |
|SWS_X SWS_X |
|SWS_GAUSS SWS_GAUSS |
|SWS_LANCZOS SWS_LANCZOS |
|SWS_SINC SWS_SINC |
|SWS_SPLINE SWS_SPLINE |
|SWS_BICUBLIN); SWS_BICUBLIN);
if(!i || (i & (i-1))) { if (!i || (i & (i - 1))) {
av_log(c, AV_LOG_ERROR, "Exactly one scaler algorithm must be chosen, got %X\n", i); av_log(c, AV_LOG_ERROR, "Exactly one scaler algorithm must be chosen, got %X\n", i);
return AVERROR(EINVAL); return AVERROR(EINVAL);
} }
/* sanity check */ /* sanity check */
if (srcW<4 || srcH<1 || dstW<8 || dstH<1) { //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code if (srcW < 4 || srcH < 1 || dstW < 8 || dstH < 1) {
/* FIXME check if these are enough and try to lower them after
* fixing the relevant parts of the code */
av_log(c, AV_LOG_ERROR, "%dx%d -> %dx%d is invalid scaling dimension\n", av_log(c, AV_LOG_ERROR, "%dx%d -> %dx%d is invalid scaling dimension\n",
srcW, srcH, dstW, dstH); srcW, srcH, dstW, dstH);
return AVERROR(EINVAL); return AVERROR(EINVAL);
} }
if (!dstFilter) dstFilter= &dummyFilter; if (!dstFilter)
if (!srcFilter) srcFilter= &dummyFilter; dstFilter = &dummyFilter;
if (!srcFilter)
srcFilter = &dummyFilter;
c->lumXInc= (((int64_t)srcW<<16) + (dstW>>1))/dstW; c->lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
c->lumYInc= (((int64_t)srcH<<16) + (dstH>>1))/dstH; c->lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[dstFormat]); c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[dstFormat]);
c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[srcFormat]); c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[srcFormat]);
c->vRounder= 4* 0x0001000100010001ULL; c->vRounder = 4 * 0x0001000100010001ULL;
usesVFilter = (srcFilter->lumV && srcFilter->lumV->length>1) || usesVFilter = (srcFilter->lumV && srcFilter->lumV->length > 1) ||
(srcFilter->chrV && srcFilter->chrV->length>1) || (srcFilter->chrV && srcFilter->chrV->length > 1) ||
(dstFilter->lumV && dstFilter->lumV->length>1) || (dstFilter->lumV && dstFilter->lumV->length > 1) ||
(dstFilter->chrV && dstFilter->chrV->length>1); (dstFilter->chrV && dstFilter->chrV->length > 1);
usesHFilter = (srcFilter->lumH && srcFilter->lumH->length>1) || usesHFilter = (srcFilter->lumH && srcFilter->lumH->length > 1) ||
(srcFilter->chrH && srcFilter->chrH->length>1) || (srcFilter->chrH && srcFilter->chrH->length > 1) ||
(dstFilter->lumH && dstFilter->lumH->length>1) || (dstFilter->lumH && dstFilter->lumH->length > 1) ||
(dstFilter->chrH && dstFilter->chrH->length>1); (dstFilter->chrH && dstFilter->chrH->length > 1);
getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat); getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat); getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
...@@ -867,7 +942,8 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) ...@@ -867,7 +942,8 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
c->flags = flags; c->flags = flags;
} }
} }
// reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation /* reuse chroma for 2 pixels RGB/BGR unless user wants full
* chroma interpolation */
if (flags & SWS_FULL_CHR_H_INT && if (flags & SWS_FULL_CHR_H_INT &&
isAnyRGB(dstFormat) && isAnyRGB(dstFormat) &&
dstFormat != PIX_FMT_RGBA && dstFormat != PIX_FMT_RGBA &&
...@@ -878,37 +954,43 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) ...@@ -878,37 +954,43 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
dstFormat != PIX_FMT_BGR24) { dstFormat != PIX_FMT_BGR24) {
av_log(c, AV_LOG_WARNING, av_log(c, AV_LOG_WARNING,
"full chroma interpolation for destination format '%s' not yet implemented\n", "full chroma interpolation for destination format '%s' not yet implemented\n",
sws_format_name(dstFormat)); av_get_pix_fmt_name(dstFormat));
flags &= ~SWS_FULL_CHR_H_INT; flags &= ~SWS_FULL_CHR_H_INT;
c->flags = flags; c->flags = flags;
} }
if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1; if (isAnyRGB(dstFormat) && !(flags & SWS_FULL_CHR_H_INT))
c->chrDstHSubSample = 1;
// drop some chroma lines if the user wants it // drop some chroma lines if the user wants it
c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT; c->vChrDrop = (flags & SWS_SRC_V_CHR_DROP_MASK) >>
c->chrSrcVSubSample+= c->vChrDrop; SWS_SRC_V_CHR_DROP_SHIFT;
c->chrSrcVSubSample += c->vChrDrop;
// drop every other pixel for chroma calculation unless user wants full chroma
if (isAnyRGB(srcFormat) && !(flags&SWS_FULL_CHR_H_INP) /* drop every other pixel for chroma calculation unless user
&& srcFormat!=PIX_FMT_RGB8 && srcFormat!=PIX_FMT_BGR8 * wants full chroma */
&& srcFormat!=PIX_FMT_RGB4 && srcFormat!=PIX_FMT_BGR4 if (isAnyRGB(srcFormat) && !(flags & SWS_FULL_CHR_H_INP) &&
&& srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE srcFormat != PIX_FMT_RGB8 && srcFormat != PIX_FMT_BGR8 &&
&& ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&SWS_FAST_BILINEAR))) srcFormat != PIX_FMT_RGB4 && srcFormat != PIX_FMT_BGR4 &&
c->chrSrcHSubSample=1; srcFormat != PIX_FMT_RGB4_BYTE && srcFormat != PIX_FMT_BGR4_BYTE &&
((dstW >> c->chrDstHSubSample) <= (srcW >> 1) ||
(flags & SWS_FAST_BILINEAR)))
c->chrSrcHSubSample = 1;
// Note the -((-x)>>y) is so that we always round toward +inf. // Note the -((-x)>>y) is so that we always round toward +inf.
c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample); c->chrSrcW = -((-srcW) >> c->chrSrcHSubSample);
c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample); c->chrSrcH = -((-srcH) >> c->chrSrcVSubSample);
c->chrDstW= -((-dstW) >> c->chrDstHSubSample); c->chrDstW = -((-dstW) >> c->chrDstHSubSample);
c->chrDstH= -((-dstH) >> c->chrDstVSubSample); c->chrDstH = -((-dstH) >> c->chrDstVSubSample);
/* unscaled special cases */ /* unscaled special cases */
if (unscaled && !usesHFilter && !usesVFilter && (c->srcRange == c->dstRange || isAnyRGB(dstFormat))) { if (unscaled && !usesHFilter && !usesVFilter &&
(c->srcRange == c->dstRange || isAnyRGB(dstFormat))) {
ff_get_unscaled_swscale(c); ff_get_unscaled_swscale(c);
if (c->swScale) { if (c->swScale) {
if (flags&SWS_PRINT_INFO) if (flags & SWS_PRINT_INFO)
av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n", av_log(c, AV_LOG_INFO,
"using unscaled %s -> %s special converter\n",
av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
return 0; return 0;
} }
...@@ -925,35 +1007,40 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) ...@@ -925,35 +1007,40 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
if (c->dstBpc == 16) if (c->dstBpc == 16)
dst_stride <<= 1; dst_stride <<= 1;
FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->srcBpc == 8 && c->dstBpc <= 10) { if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 &&
c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; c->srcBpc == 8 && c->dstBpc <= 10) {
if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 &&
if (flags&SWS_PRINT_INFO) (srcW & 15) == 0) ? 1 : 0;
av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n"); if (!c->canMMX2BeUsed && dstW >= srcW && (srcW & 15) == 0
} && (flags & SWS_FAST_BILINEAR)) {
if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) c->canMMX2BeUsed=0; if (flags & SWS_PRINT_INFO)
} av_log(c, AV_LOG_INFO,
else "output width is not a multiple of 32 -> no MMX2 scaler\n");
}
if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat))
c->canMMX2BeUsed=0; c->canMMX2BeUsed=0;
} else
c->chrXInc= (((int64_t)c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW; c->canMMX2BeUsed = 0;
c->chrYInc= (((int64_t)c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW;
// match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH;
// but only for the FAST_BILINEAR mode otherwise do correct scaling
// n-2 is the last chrominance sample available /* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src
// this is not perfect, but no one should notice the difference, the more correct variant * to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do
// would be like the vertical one, but that would require some special code for the * correct scaling.
// first and last pixel * n-2 is the last chrominance sample available.
if (flags&SWS_FAST_BILINEAR) { * This is not perfect, but no one should notice the difference, the more
* correct variant would be like the vertical one, but that would require
* some special code for the first and last pixel */
if (flags & SWS_FAST_BILINEAR) {
if (c->canMMX2BeUsed) { if (c->canMMX2BeUsed) {
c->lumXInc+= 20; c->lumXInc += 20;
c->chrXInc+= 20; c->chrXInc += 20;
} }
//we don't use the x86 asm scaler if MMX is available // we don't use the x86 asm scaler if MMX is available
else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX && c->dstBpc <= 10) { else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX && c->dstBpc <= 10) {
c->lumXInc = ((int64_t)(srcW-2)<<16)/(dstW-2) - 20; c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20;
c->chrXInc = ((int64_t)(c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20; c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20;
} }
} }
...@@ -962,8 +1049,10 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) ...@@ -962,8 +1049,10 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
#if HAVE_MMX2 #if HAVE_MMX2
// can't downscale !!! // can't downscale !!!
if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) { if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) {
c->lumMmx2FilterCodeSize = initMMX2HScaler( dstW, c->lumXInc, NULL, NULL, NULL, 8); c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL,
c->chrMmx2FilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc, NULL, NULL, NULL, 4); NULL, NULL, 8);
c->chrMmx2FilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc,
NULL, NULL, NULL, 4);
#ifdef MAP_ANONYMOUS #ifdef MAP_ANONYMOUS
c->lumMmx2FilterCode = mmap(NULL, c->lumMmx2FilterCodeSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); c->lumMmx2FilterCode = mmap(NULL, c->lumMmx2FilterCodeSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
...@@ -985,13 +1074,16 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) ...@@ -985,13 +1074,16 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n"); av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n");
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
FF_ALLOCZ_OR_GOTO(c, c->hLumFilter , (dstW /8+8)*sizeof(int16_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hChrFilter , (c->chrDstW /4+8)*sizeof(int16_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW /2/8+8)*sizeof(int32_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW/2/4+8)*sizeof(int32_t), fail);
initMMX2HScaler( dstW, c->lumXInc, c->lumMmx2FilterCode, c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8); FF_ALLOCZ_OR_GOTO(c, c->hLumFilter, (dstW / 8 + 8) * sizeof(int16_t), fail);
initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmx2FilterCode, c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4); FF_ALLOCZ_OR_GOTO(c, c->hChrFilter, (c->chrDstW / 4 + 8) * sizeof(int16_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail);
initMMX2HScaler( dstW, c->lumXInc, c->lumMmx2FilterCode,
c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8);
initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmx2FilterCode,
c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4);
#ifdef MAP_ANONYMOUS #ifdef MAP_ANONYMOUS
mprotect(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize, PROT_EXEC | PROT_READ); mprotect(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
...@@ -1000,107 +1092,122 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) ...@@ -1000,107 +1092,122 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
} else } else
#endif /* HAVE_MMX2 */ #endif /* HAVE_MMX2 */
{ {
const int filterAlign= const int filterAlign =
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 : (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 :
(HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 :
1; 1;
if (initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc, if (initFilter(&c->hLumFilter, &c->hLumFilterPos,
srcW , dstW, filterAlign, 1<<14, &c->hLumFilterSize, c->lumXInc,
(flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags, srcW, dstW, filterAlign, 1 << 14,
srcFilter->lumH, dstFilter->lumH, c->param) < 0) (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
cpu_flags, srcFilter->lumH, dstFilter->lumH,
c->param) < 0)
goto fail; goto fail;
if (initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, if (initFilter(&c->hChrFilter, &c->hChrFilterPos,
c->chrSrcW, c->chrDstW, filterAlign, 1<<14, &c->hChrFilterSize, c->chrXInc,
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags, c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
srcFilter->chrH, dstFilter->chrH, c->param) < 0) (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
cpu_flags, srcFilter->chrH, dstFilter->chrH,
c->param) < 0)
goto fail; goto fail;
} }
} // initialize horizontal stuff } // initialize horizontal stuff
/* precalculate vertical scaler filter coefficients */ /* precalculate vertical scaler filter coefficients */
{ {
const int filterAlign= const int filterAlign =
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 2 : (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 2 :
(HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 :
1; 1;
if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize,
srcH , dstH, filterAlign, (1<<12), c->lumYInc, srcH, dstH, filterAlign, (1 << 12),
(flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags, (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
srcFilter->lumV, dstFilter->lumV, c->param) < 0) cpu_flags, srcFilter->lumV, dstFilter->lumV,
c->param) < 0)
goto fail; goto fail;
if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,
c->chrSrcH, c->chrDstH, filterAlign, (1<<12), c->chrYInc, c->chrSrcH, c->chrDstH,
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags, filterAlign, (1 << 12),
srcFilter->chrV, dstFilter->chrV, c->param) < 0) (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
cpu_flags, srcFilter->chrV, dstFilter->chrV,
c->param) < 0)
goto fail; goto fail;
#if HAVE_ALTIVEC #if HAVE_ALTIVEC
FF_ALLOC_OR_GOTO(c, c->vYCoeffsBank, sizeof (vector signed short)*c->vLumFilterSize*c->dstH, fail); FF_ALLOC_OR_GOTO(c, c->vYCoeffsBank, sizeof(vector signed short) * c->vLumFilterSize * c->dstH, fail);
FF_ALLOC_OR_GOTO(c, c->vCCoeffsBank, sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH, fail); FF_ALLOC_OR_GOTO(c, c->vCCoeffsBank, sizeof(vector signed short) * c->vChrFilterSize * c->chrDstH, fail);
for (i=0;i<c->vLumFilterSize*c->dstH;i++) { for (i = 0; i < c->vLumFilterSize * c->dstH; i++) {
int j; int j;
short *p = (short *)&c->vYCoeffsBank[i]; short *p = (short *)&c->vYCoeffsBank[i];
for (j=0;j<8;j++) for (j = 0; j < 8; j++)
p[j] = c->vLumFilter[i]; p[j] = c->vLumFilter[i];
} }
for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) { for (i = 0; i < c->vChrFilterSize * c->chrDstH; i++) {
int j; int j;
short *p = (short *)&c->vCCoeffsBank[i]; short *p = (short *)&c->vCCoeffsBank[i];
for (j=0;j<8;j++) for (j = 0; j < 8; j++)
p[j] = c->vChrFilter[i]; p[j] = c->vChrFilter[i];
} }
#endif #endif
} }
// calculate buffer sizes so that they won't run out while handling these damn slices // calculate buffer sizes so that they won't run out while handling these damn slices
c->vLumBufSize= c->vLumFilterSize; c->vLumBufSize = c->vLumFilterSize;
c->vChrBufSize= c->vChrFilterSize; c->vChrBufSize = c->vChrFilterSize;
for (i=0; i<dstH; i++) { for (i = 0; i < dstH; i++) {
int chrI = (int64_t) i * c->chrDstH / dstH; int chrI = (int64_t)i * c->chrDstH / dstH;
int nextSlice= FFMAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1, int nextSlice = FFMAX(c->vLumFilterPos[i] + c->vLumFilterSize - 1,
((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample)); ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)
<< c->chrSrcVSubSample));
nextSlice>>= c->chrSrcVSubSample;
nextSlice<<= c->chrSrcVSubSample; nextSlice >>= c->chrSrcVSubSample;
if (c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice) nextSlice <<= c->chrSrcVSubSample;
c->vLumBufSize= nextSlice - c->vLumFilterPos[i]; if (c->vLumFilterPos[i] + c->vLumBufSize < nextSlice)
if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample)) c->vLumBufSize = nextSlice - c->vLumFilterPos[i];
c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI]; if (c->vChrFilterPos[chrI] + c->vChrBufSize <
} (nextSlice >> c->chrSrcVSubSample))
c->vChrBufSize = (nextSlice >> c->chrSrcVSubSample) -
// allocate pixbufs (we use dynamic allocation because otherwise we would need to c->vChrFilterPos[chrI];
// allocate several megabytes to handle all possible cases) }
FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail);
FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); /* Allocate pixbufs (we use dynamic allocation because otherwise we would
FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); * need to allocate several megabytes to handle all possible cases) */
FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail);
FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail);
FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail);
if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat))
FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail); FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail);
//Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000) /* Note we need at least one pixel more at the end because of the MMX code
* (just in case someone wants to replace the 4000/8000). */
/* align at 16 bytes for AltiVec */ /* align at 16 bytes for AltiVec */
for (i=0; i<c->vLumBufSize; i++) { for (i = 0; i < c->vLumBufSize; i++) {
FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], dst_stride+16, fail); FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i + c->vLumBufSize],
c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize]; dst_stride + 16, fail);
c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize];
} }
// 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7); c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7);
c->uv_offx2 = dst_stride + 16; c->uv_offx2 = dst_stride + 16;
for (i=0; i<c->vChrBufSize; i++) { for (i = 0; i < c->vChrBufSize; i++) {
FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail); FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i + c->vChrBufSize],
c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize]; dst_stride * 2 + 32, fail);
c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i] + (dst_stride >> 1) + 8; c->chrUPixBuf[i] = c->chrUPixBuf[i + c->vChrBufSize];
c->chrVPixBuf[i] = c->chrVPixBuf[i + c->vChrBufSize]
= c->chrUPixBuf[i] + (dst_stride >> 1) + 8;
} }
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
for (i=0; i<c->vLumBufSize; i++) { for (i = 0; i < c->vLumBufSize; i++) {
FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i+c->vLumBufSize], dst_stride+16, fail); FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i + c->vLumBufSize],
c->alpPixBuf[i] = c->alpPixBuf[i+c->vLumBufSize]; dst_stride + 16, fail);
c->alpPixBuf[i] = c->alpPixBuf[i + c->vLumBufSize];
} }
//try to avoid drawing green stuff between the right end and the stride end // try to avoid drawing green stuff between the right end and the stride end
for (i=0; i<c->vChrBufSize; i++) for (i = 0; i < c->vChrBufSize; i++)
if(av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 == 15){ if(av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 == 15){
av_assert0(c->dstBpc > 10); av_assert0(c->dstBpc > 10);
for(j=0; j<dst_stride/2+1; j++) for(j=0; j<dst_stride/2+1; j++)
...@@ -1111,79 +1218,103 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) ...@@ -1111,79 +1218,103 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
assert(c->chrDstH <= dstH); assert(c->chrDstH <= dstH);
if (flags&SWS_PRINT_INFO) { if (flags & SWS_PRINT_INFO) {
if (flags&SWS_FAST_BILINEAR) av_log(c, AV_LOG_INFO, "FAST_BILINEAR scaler, "); if (flags & SWS_FAST_BILINEAR)
else if (flags&SWS_BILINEAR) av_log(c, AV_LOG_INFO, "BILINEAR scaler, "); av_log(c, AV_LOG_INFO, "FAST_BILINEAR scaler, ");
else if (flags&SWS_BICUBIC) av_log(c, AV_LOG_INFO, "BICUBIC scaler, "); else if (flags & SWS_BILINEAR)
else if (flags&SWS_X) av_log(c, AV_LOG_INFO, "Experimental scaler, "); av_log(c, AV_LOG_INFO, "BILINEAR scaler, ");
else if (flags&SWS_POINT) av_log(c, AV_LOG_INFO, "Nearest Neighbor / POINT scaler, "); else if (flags & SWS_BICUBIC)
else if (flags&SWS_AREA) av_log(c, AV_LOG_INFO, "Area Averaging scaler, "); av_log(c, AV_LOG_INFO, "BICUBIC scaler, ");
else if (flags&SWS_BICUBLIN) av_log(c, AV_LOG_INFO, "luma BICUBIC / chroma BILINEAR scaler, "); else if (flags & SWS_X)
else if (flags&SWS_GAUSS) av_log(c, AV_LOG_INFO, "Gaussian scaler, "); av_log(c, AV_LOG_INFO, "Experimental scaler, ");
else if (flags&SWS_SINC) av_log(c, AV_LOG_INFO, "Sinc scaler, "); else if (flags & SWS_POINT)
else if (flags&SWS_LANCZOS) av_log(c, AV_LOG_INFO, "Lanczos scaler, "); av_log(c, AV_LOG_INFO, "Nearest Neighbor / POINT scaler, ");
else if (flags&SWS_SPLINE) av_log(c, AV_LOG_INFO, "Bicubic spline scaler, "); else if (flags & SWS_AREA)
else av_log(c, AV_LOG_INFO, "ehh flags invalid?! "); av_log(c, AV_LOG_INFO, "Area Averaging scaler, ");
else if (flags & SWS_BICUBLIN)
av_log(c, AV_LOG_INFO, "luma BICUBIC / chroma BILINEAR scaler, ");
else if (flags & SWS_GAUSS)
av_log(c, AV_LOG_INFO, "Gaussian scaler, ");
else if (flags & SWS_SINC)
av_log(c, AV_LOG_INFO, "Sinc scaler, ");
else if (flags & SWS_LANCZOS)
av_log(c, AV_LOG_INFO, "Lanczos scaler, ");
else if (flags & SWS_SPLINE)
av_log(c, AV_LOG_INFO, "Bicubic spline scaler, ");
else
av_log(c, AV_LOG_INFO, "ehh flags invalid?! ");
av_log(c, AV_LOG_INFO, "from %s to %s%s ", av_log(c, AV_LOG_INFO, "from %s to %s%s ",
av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(srcFormat),
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 || dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 ||
dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE || dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE ||
dstFormat == PIX_FMT_BGR444BE || dstFormat == PIX_FMT_BGR444LE ? "dithered " : "", dstFormat == PIX_FMT_BGR444BE || dstFormat == PIX_FMT_BGR444LE ?
"dithered " : "",
#else #else
"", "",
#endif #endif
av_get_pix_fmt_name(dstFormat)); av_get_pix_fmt_name(dstFormat));
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) av_log(c, AV_LOG_INFO, "using MMX2\n"); if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2)
else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) av_log(c, AV_LOG_INFO, "using 3DNOW\n"); av_log(c, AV_LOG_INFO, "using MMX2\n");
else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) av_log(c, AV_LOG_INFO, "using MMX\n"); else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)
else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) av_log(c, AV_LOG_INFO, "using AltiVec\n"); av_log(c, AV_LOG_INFO, "using 3DNOW\n");
else av_log(c, AV_LOG_INFO, "using C\n"); else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX)
av_log(c, AV_LOG_INFO, "using MMX\n");
else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC)
av_log(c, AV_LOG_INFO, "using AltiVec\n");
else
av_log(c, AV_LOG_INFO, "using C\n");
av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", av_log(c, AV_LOG_DEBUG,
"lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
av_log(c, AV_LOG_DEBUG, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", av_log(c, AV_LOG_DEBUG,
c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc); "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH,
c->chrXInc, c->chrYInc);
} }
c->swScale= ff_getSwsFunc(c); c->swScale = ff_getSwsFunc(c);
return 0; return 0;
fail: //FIXME replace things by appropriate error codes fail: // FIXME replace things by appropriate error codes
return -1; return -1;
} }
#if FF_API_SWS_GETCONTEXT #if FF_API_SWS_GETCONTEXT
SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat,
int dstW, int dstH, enum PixelFormat dstFormat, int flags, int dstW, int dstH, enum PixelFormat dstFormat,
SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param) int flags, SwsFilter *srcFilter,
SwsFilter *dstFilter, const double *param)
{ {
SwsContext *c; SwsContext *c;
if(!(c=sws_alloc_context())) if (!(c = sws_alloc_context()))
return NULL; return NULL;
c->flags= flags; c->flags = flags;
c->srcW= srcW; c->srcW = srcW;
c->srcH= srcH; c->srcH = srcH;
c->dstW= dstW; c->dstW = dstW;
c->dstH= dstH; c->dstH = dstH;
c->srcRange = handle_jpeg(&srcFormat); c->srcRange = handle_jpeg(&srcFormat);
c->dstRange = handle_jpeg(&dstFormat); c->dstRange = handle_jpeg(&dstFormat);
c->src0Alpha = handle_0alpha(&srcFormat); c->src0Alpha = handle_0alpha(&srcFormat);
c->dst0Alpha = handle_0alpha(&dstFormat); c->dst0Alpha = handle_0alpha(&dstFormat);
c->srcFormat= srcFormat; c->srcFormat = srcFormat;
c->dstFormat= dstFormat; c->dstFormat = dstFormat;
if (param) { if (param) {
c->param[0] = param[0]; c->param[0] = param[0];
c->param[1] = param[1]; c->param[1] = param[1];
} }
sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], c->srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, c->dstRange, 0, 1<<16, 1<<16); sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], c->srcRange,
ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/,
c->dstRange, 0, 1 << 16, 1 << 16);
if(sws_init_context(c, srcFilter, dstFilter) < 0){ if (sws_init_context(c, srcFilter, dstFilter) < 0) {
sws_freeContext(c); sws_freeContext(c);
return NULL; return NULL;
} }
...@@ -1197,28 +1328,28 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, ...@@ -1197,28 +1328,28 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
float chromaHShift, float chromaVShift, float chromaHShift, float chromaVShift,
int verbose) int verbose)
{ {
SwsFilter *filter= av_malloc(sizeof(SwsFilter)); SwsFilter *filter = av_malloc(sizeof(SwsFilter));
if (!filter) if (!filter)
return NULL; return NULL;
if (lumaGBlur!=0.0) { if (lumaGBlur != 0.0) {
filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0); filter->lumH = sws_getGaussianVec(lumaGBlur, 3.0);
filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0); filter->lumV = sws_getGaussianVec(lumaGBlur, 3.0);
} else { } else {
filter->lumH= sws_getIdentityVec(); filter->lumH = sws_getIdentityVec();
filter->lumV= sws_getIdentityVec(); filter->lumV = sws_getIdentityVec();
} }
if (chromaGBlur!=0.0) { if (chromaGBlur != 0.0) {
filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0); filter->chrH = sws_getGaussianVec(chromaGBlur, 3.0);
filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0); filter->chrV = sws_getGaussianVec(chromaGBlur, 3.0);
} else { } else {
filter->chrH= sws_getIdentityVec(); filter->chrH = sws_getIdentityVec();
filter->chrV= sws_getIdentityVec(); filter->chrV = sws_getIdentityVec();
} }
if (chromaSharpen!=0.0) { if (chromaSharpen != 0.0) {
SwsVector *id= sws_getIdentityVec(); SwsVector *id = sws_getIdentityVec();
sws_scaleVec(filter->chrH, -chromaSharpen); sws_scaleVec(filter->chrH, -chromaSharpen);
sws_scaleVec(filter->chrV, -chromaSharpen); sws_scaleVec(filter->chrV, -chromaSharpen);
sws_addVec(filter->chrH, id); sws_addVec(filter->chrH, id);
...@@ -1226,8 +1357,8 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, ...@@ -1226,8 +1357,8 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
sws_freeVec(id); sws_freeVec(id);
} }
if (lumaSharpen!=0.0) { if (lumaSharpen != 0.0) {
SwsVector *id= sws_getIdentityVec(); SwsVector *id = sws_getIdentityVec();
sws_scaleVec(filter->lumH, -lumaSharpen); sws_scaleVec(filter->lumH, -lumaSharpen);
sws_scaleVec(filter->lumV, -lumaSharpen); sws_scaleVec(filter->lumV, -lumaSharpen);
sws_addVec(filter->lumH, id); sws_addVec(filter->lumH, id);
...@@ -1236,18 +1367,20 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, ...@@ -1236,18 +1367,20 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
} }
if (chromaHShift != 0.0) if (chromaHShift != 0.0)
sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5)); sws_shiftVec(filter->chrH, (int)(chromaHShift + 0.5));
if (chromaVShift != 0.0) if (chromaVShift != 0.0)
sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5)); sws_shiftVec(filter->chrV, (int)(chromaVShift + 0.5));
sws_normalizeVec(filter->chrH, 1.0); sws_normalizeVec(filter->chrH, 1.0);
sws_normalizeVec(filter->chrV, 1.0); sws_normalizeVec(filter->chrV, 1.0);
sws_normalizeVec(filter->lumH, 1.0); sws_normalizeVec(filter->lumH, 1.0);
sws_normalizeVec(filter->lumV, 1.0); sws_normalizeVec(filter->lumV, 1.0);
if (verbose) sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG); if (verbose)
if (verbose) sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG); sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG);
if (verbose)
sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG);
return filter; return filter;
} }
...@@ -1266,17 +1399,18 @@ SwsVector *sws_allocVec(int length) ...@@ -1266,17 +1399,18 @@ SwsVector *sws_allocVec(int length)
SwsVector *sws_getGaussianVec(double variance, double quality) SwsVector *sws_getGaussianVec(double variance, double quality)
{ {
const int length= (int)(variance*quality + 0.5) | 1; const int length = (int)(variance * quality + 0.5) | 1;
int i; int i;
double middle= (length-1)*0.5; double middle = (length - 1) * 0.5;
SwsVector *vec= sws_allocVec(length); SwsVector *vec = sws_allocVec(length);
if (!vec) if (!vec)
return NULL; return NULL;
for (i=0; i<length; i++) { for (i = 0; i < length; i++) {
double dist= i-middle; double dist = i - middle;
vec->coeff[i]= exp(-dist*dist/(2*variance*variance)) / sqrt(2*variance*M_PI); vec->coeff[i] = exp(-dist * dist / (2 * variance * variance)) /
sqrt(2 * variance * M_PI);
} }
sws_normalizeVec(vec, 1.0); sws_normalizeVec(vec, 1.0);
...@@ -1287,13 +1421,13 @@ SwsVector *sws_getGaussianVec(double variance, double quality) ...@@ -1287,13 +1421,13 @@ SwsVector *sws_getGaussianVec(double variance, double quality)
SwsVector *sws_getConstVec(double c, int length) SwsVector *sws_getConstVec(double c, int length)
{ {
int i; int i;
SwsVector *vec= sws_allocVec(length); SwsVector *vec = sws_allocVec(length);
if (!vec) if (!vec)
return NULL; return NULL;
for (i=0; i<length; i++) for (i = 0; i < length; i++)
vec->coeff[i]= c; vec->coeff[i] = c;
return vec; return vec;
} }
...@@ -1306,10 +1440,10 @@ SwsVector *sws_getIdentityVec(void) ...@@ -1306,10 +1440,10 @@ SwsVector *sws_getIdentityVec(void)
static double sws_dcVec(SwsVector *a) static double sws_dcVec(SwsVector *a)
{ {
int i; int i;
double sum=0; double sum = 0;
for (i=0; i<a->length; i++) for (i = 0; i < a->length; i++)
sum+= a->coeff[i]; sum += a->coeff[i];
return sum; return sum;
} }
...@@ -1318,27 +1452,27 @@ void sws_scaleVec(SwsVector *a, double scalar) ...@@ -1318,27 +1452,27 @@ void sws_scaleVec(SwsVector *a, double scalar)
{ {
int i; int i;
for (i=0; i<a->length; i++) for (i = 0; i < a->length; i++)
a->coeff[i]*= scalar; a->coeff[i] *= scalar;
} }
void sws_normalizeVec(SwsVector *a, double height) void sws_normalizeVec(SwsVector *a, double height)
{ {
sws_scaleVec(a, height/sws_dcVec(a)); sws_scaleVec(a, height / sws_dcVec(a));
} }
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b) static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b)
{ {
int length= a->length + b->length - 1; int length = a->length + b->length - 1;
int i, j; int i, j;
SwsVector *vec= sws_getConstVec(0.0, length); SwsVector *vec = sws_getConstVec(0.0, length);
if (!vec) if (!vec)
return NULL; return NULL;
for (i=0; i<a->length; i++) { for (i = 0; i < a->length; i++) {
for (j=0; j<b->length; j++) { for (j = 0; j < b->length; j++) {
vec->coeff[i+j]+= a->coeff[i]*b->coeff[j]; vec->coeff[i + j] += a->coeff[i] * b->coeff[j];
} }
} }
...@@ -1347,30 +1481,34 @@ static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b) ...@@ -1347,30 +1481,34 @@ static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b)
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b) static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b)
{ {
int length= FFMAX(a->length, b->length); int length = FFMAX(a->length, b->length);
int i; int i;
SwsVector *vec= sws_getConstVec(0.0, length); SwsVector *vec = sws_getConstVec(0.0, length);
if (!vec) if (!vec)
return NULL; return NULL;
for (i=0; i<a->length; i++) vec->coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i]; for (i = 0; i < a->length; i++)
for (i=0; i<b->length; i++) vec->coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i]; vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i];
for (i = 0; i < b->length; i++)
vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] += b->coeff[i];
return vec; return vec;
} }
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b) static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b)
{ {
int length= FFMAX(a->length, b->length); int length = FFMAX(a->length, b->length);
int i; int i;
SwsVector *vec= sws_getConstVec(0.0, length); SwsVector *vec = sws_getConstVec(0.0, length);
if (!vec) if (!vec)
return NULL; return NULL;
for (i=0; i<a->length; i++) vec->coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i]; for (i = 0; i < a->length; i++)
for (i=0; i<b->length; i++) vec->coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i]; vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i];
for (i = 0; i < b->length; i++)
vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] -= b->coeff[i];
return vec; return vec;
} }
...@@ -1378,15 +1516,16 @@ static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b) ...@@ -1378,15 +1516,16 @@ static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b)
/* shift left / or right if "shift" is negative */ /* shift left / or right if "shift" is negative */
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift) static SwsVector *sws_getShiftedVec(SwsVector *a, int shift)
{ {
int length= a->length + FFABS(shift)*2; int length = a->length + FFABS(shift) * 2;
int i; int i;
SwsVector *vec= sws_getConstVec(0.0, length); SwsVector *vec = sws_getConstVec(0.0, length);
if (!vec) if (!vec)
return NULL; return NULL;
for (i=0; i<a->length; i++) { for (i = 0; i < a->length; i++) {
vec->coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i]; vec->coeff[i + (length - 1) / 2 -
(a->length - 1) / 2 - shift] = a->coeff[i];
} }
return vec; return vec;
...@@ -1394,49 +1533,50 @@ static SwsVector *sws_getShiftedVec(SwsVector *a, int shift) ...@@ -1394,49 +1533,50 @@ static SwsVector *sws_getShiftedVec(SwsVector *a, int shift)
void sws_shiftVec(SwsVector *a, int shift) void sws_shiftVec(SwsVector *a, int shift)
{ {
SwsVector *shifted= sws_getShiftedVec(a, shift); SwsVector *shifted = sws_getShiftedVec(a, shift);
av_free(a->coeff); av_free(a->coeff);
a->coeff= shifted->coeff; a->coeff = shifted->coeff;
a->length= shifted->length; a->length = shifted->length;
av_free(shifted); av_free(shifted);
} }
void sws_addVec(SwsVector *a, SwsVector *b) void sws_addVec(SwsVector *a, SwsVector *b)
{ {
SwsVector *sum= sws_sumVec(a, b); SwsVector *sum = sws_sumVec(a, b);
av_free(a->coeff); av_free(a->coeff);
a->coeff= sum->coeff; a->coeff = sum->coeff;
a->length= sum->length; a->length = sum->length;
av_free(sum); av_free(sum);
} }
void sws_subVec(SwsVector *a, SwsVector *b) void sws_subVec(SwsVector *a, SwsVector *b)
{ {
SwsVector *diff= sws_diffVec(a, b); SwsVector *diff = sws_diffVec(a, b);
av_free(a->coeff); av_free(a->coeff);
a->coeff= diff->coeff; a->coeff = diff->coeff;
a->length= diff->length; a->length = diff->length;
av_free(diff); av_free(diff);
} }
void sws_convVec(SwsVector *a, SwsVector *b) void sws_convVec(SwsVector *a, SwsVector *b)
{ {
SwsVector *conv= sws_getConvVec(a, b); SwsVector *conv = sws_getConvVec(a, b);
av_free(a->coeff); av_free(a->coeff);
a->coeff= conv->coeff; a->coeff = conv->coeff;
a->length= conv->length; a->length = conv->length;
av_free(conv); av_free(conv);
} }
SwsVector *sws_cloneVec(SwsVector *a) SwsVector *sws_cloneVec(SwsVector *a)
{ {
int i; int i;
SwsVector *vec= sws_allocVec(a->length); SwsVector *vec = sws_allocVec(a->length);
if (!vec) if (!vec)
return NULL; return NULL;
for (i=0; i<a->length; i++) vec->coeff[i]= a->coeff[i]; for (i = 0; i < a->length; i++)
vec->coeff[i] = a->coeff[i];
return vec; return vec;
} }
...@@ -1444,65 +1584,75 @@ SwsVector *sws_cloneVec(SwsVector *a) ...@@ -1444,65 +1584,75 @@ SwsVector *sws_cloneVec(SwsVector *a)
void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level) void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level)
{ {
int i; int i;
double max=0; double max = 0;
double min=0; double min = 0;
double range; double range;
for (i=0; i<a->length; i++) for (i = 0; i < a->length; i++)
if (a->coeff[i]>max) max= a->coeff[i]; if (a->coeff[i] > max)
max = a->coeff[i];
for (i=0; i<a->length; i++) for (i = 0; i < a->length; i++)
if (a->coeff[i]<min) min= a->coeff[i]; if (a->coeff[i] < min)
min = a->coeff[i];
range= max - min; range = max - min;
for (i=0; i<a->length; i++) { for (i = 0; i < a->length; i++) {
int x= (int)((a->coeff[i]-min)*60.0/range +0.5); int x = (int)((a->coeff[i] - min) * 60.0 / range + 0.5);
av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]); av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]);
for (;x>0; x--) av_log(log_ctx, log_level, " "); for (; x > 0; x--)
av_log(log_ctx, log_level, " ");
av_log(log_ctx, log_level, "|\n"); av_log(log_ctx, log_level, "|\n");
} }
} }
void sws_freeVec(SwsVector *a) void sws_freeVec(SwsVector *a)
{ {
if (!a) return; if (!a)
return;
av_freep(&a->coeff); av_freep(&a->coeff);
a->length=0; a->length = 0;
av_free(a); av_free(a);
} }
void sws_freeFilter(SwsFilter *filter) void sws_freeFilter(SwsFilter *filter)
{ {
if (!filter) return; if (!filter)
return;
if (filter->lumH) sws_freeVec(filter->lumH);
if (filter->lumV) sws_freeVec(filter->lumV); if (filter->lumH)
if (filter->chrH) sws_freeVec(filter->chrH); sws_freeVec(filter->lumH);
if (filter->chrV) sws_freeVec(filter->chrV); if (filter->lumV)
sws_freeVec(filter->lumV);
if (filter->chrH)
sws_freeVec(filter->chrH);
if (filter->chrV)
sws_freeVec(filter->chrV);
av_free(filter); av_free(filter);
} }
void sws_freeContext(SwsContext *c) void sws_freeContext(SwsContext *c)
{ {
int i; int i;
if (!c) return; if (!c)
return;
if (c->lumPixBuf) { if (c->lumPixBuf) {
for (i=0; i<c->vLumBufSize; i++) for (i = 0; i < c->vLumBufSize; i++)
av_freep(&c->lumPixBuf[i]); av_freep(&c->lumPixBuf[i]);
av_freep(&c->lumPixBuf); av_freep(&c->lumPixBuf);
} }
if (c->chrUPixBuf) { if (c->chrUPixBuf) {
for (i=0; i<c->vChrBufSize; i++) for (i = 0; i < c->vChrBufSize; i++)
av_freep(&c->chrUPixBuf[i]); av_freep(&c->chrUPixBuf[i]);
av_freep(&c->chrUPixBuf); av_freep(&c->chrUPixBuf);
av_freep(&c->chrVPixBuf); av_freep(&c->chrVPixBuf);
} }
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
for (i=0; i<c->vLumBufSize; i++) for (i = 0; i < c->vLumBufSize; i++)
av_freep(&c->alpPixBuf[i]); av_freep(&c->alpPixBuf[i]);
av_freep(&c->alpPixBuf); av_freep(&c->alpPixBuf);
} }
...@@ -1523,17 +1673,21 @@ void sws_freeContext(SwsContext *c) ...@@ -1523,17 +1673,21 @@ void sws_freeContext(SwsContext *c)
#if HAVE_MMX #if HAVE_MMX
#ifdef MAP_ANONYMOUS #ifdef MAP_ANONYMOUS
if (c->lumMmx2FilterCode) munmap(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize); if (c->lumMmx2FilterCode)
if (c->chrMmx2FilterCode) munmap(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize); munmap(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize);
if (c->chrMmx2FilterCode)
munmap(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize);
#elif HAVE_VIRTUALALLOC #elif HAVE_VIRTUALALLOC
if (c->lumMmx2FilterCode) VirtualFree(c->lumMmx2FilterCode, 0, MEM_RELEASE); if (c->lumMmx2FilterCode)
if (c->chrMmx2FilterCode) VirtualFree(c->chrMmx2FilterCode, 0, MEM_RELEASE); VirtualFree(c->lumMmx2FilterCode, 0, MEM_RELEASE);
if (c->chrMmx2FilterCode)
VirtualFree(c->chrMmx2FilterCode, 0, MEM_RELEASE);
#else #else
av_free(c->lumMmx2FilterCode); av_free(c->lumMmx2FilterCode);
av_free(c->chrMmx2FilterCode); av_free(c->chrMmx2FilterCode);
#endif #endif
c->lumMmx2FilterCode=NULL; c->lumMmx2FilterCode = NULL;
c->chrMmx2FilterCode=NULL; c->chrMmx2FilterCode = NULL;
#endif /* HAVE_MMX */ #endif /* HAVE_MMX */
av_freep(&c->yuvTable); av_freep(&c->yuvTable);
...@@ -1542,12 +1696,16 @@ void sws_freeContext(SwsContext *c) ...@@ -1542,12 +1696,16 @@ void sws_freeContext(SwsContext *c)
av_free(c); av_free(c);
} }
struct SwsContext *sws_getCachedContext(struct SwsContext *context, struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW,
int srcW, int srcH, enum PixelFormat srcFormat, int srcH, enum PixelFormat srcFormat,
int dstW, int dstH, enum PixelFormat dstFormat, int flags, int dstW, int dstH,
SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param) enum PixelFormat dstFormat, int flags,
SwsFilter *srcFilter,
SwsFilter *dstFilter,
const double *param)
{ {
static const double default_param[2] = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT}; static const double default_param[2] = { SWS_PARAM_DEFAULT,
SWS_PARAM_DEFAULT };
if (!param) if (!param)
param = default_param; param = default_param;
...@@ -1582,7 +1740,10 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context, ...@@ -1582,7 +1740,10 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context,
context->flags = flags; context->flags = flags;
context->param[0] = param[0]; context->param[0] = param[0];
context->param[1] = param[1]; context->param[1] = param[1];
sws_setColorspaceDetails(context, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], context->srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, context->dstRange, 0, 1<<16, 1<<16); sws_setColorspaceDetails(context, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT],
context->srcRange,
ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/,
context->dstRange, 0, 1 << 16, 1 << 16);
if (sws_init_context(context, srcFilter, dstFilter) < 0) { if (sws_init_context(context, srcFilter, dstFilter) < 0) {
sws_freeContext(context); sws_freeContext(context);
return NULL; return NULL;
......
$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
MMX-OBJS += x86/rgb2rgb.o \
x86/swscale_mmx.o \
x86/yuv2rgb_mmx.o \
YASM-OBJS += x86/input.o \
x86/output.o \
x86/scale.o \
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment