Commit 466e32bf authored by James Almer's avatar James Almer

x86/vf_fspp: port inline asm to yasm

Reviewed-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent 9224c7f0
...@@ -151,11 +151,11 @@ static void store_slice2_c(uint8_t *dst, int16_t *src, ...@@ -151,11 +151,11 @@ static void store_slice2_c(uint8_t *dst, int16_t *src,
} }
} }
static void mul_thrmat_c(FSPPContext *p, int q) static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
{ {
int a; int a;
for (a = 0; a < 64; a++) for (a = 0; a < 64; a++)
((int16_t *)p->threshold_mtx)[a] = q * ((int16_t *)p->threshold_mtx_noq)[a];//ints faster in C thr_adr[a] = q * thr_adr_noq[a];
} }
static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src, static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
...@@ -220,7 +220,7 @@ static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src, ...@@ -220,7 +220,7 @@ static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
t = qp_store[qy + (t >> qpsh)]; t = qp_store[qy + (t >> qpsh)];
t = norm_qscale(t, p->qscale_type); t = norm_qscale(t, p->qscale_type);
if (t != p->prev_q) p->prev_q = t, p->mul_thrmat(p, t); if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
} }
p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1)); p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
...@@ -378,7 +378,7 @@ static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int ...@@ -378,7 +378,7 @@ static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int
} }
} }
static void row_idct_c(int16_t *workspace, int16_t *output_adr, int output_stride, int cnt) static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
{ {
int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int_simd16_t tmp10, tmp11, tmp12, tmp13; int_simd16_t tmp10, tmp11, tmp12, tmp13;
...@@ -440,7 +440,7 @@ static void row_idct_c(int16_t *workspace, int16_t *output_adr, int output_strid ...@@ -440,7 +440,7 @@ static void row_idct_c(int16_t *workspace, int16_t *output_adr, int output_strid
} }
} }
static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int cnt) static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
{ {
int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int_simd16_t tmp10, tmp11, tmp12, tmp13; int_simd16_t tmp10, tmp11, tmp12, tmp13;
...@@ -582,7 +582,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) ...@@ -582,7 +582,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
} }
if (fspp->qp) if (fspp->qp)
fspp->prev_q = fspp->qp, fspp->mul_thrmat(fspp, fspp->qp); fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
/* if we are not in a constant user quantizer mode and we don't want to use /* if we are not in a constant user quantizer mode and we don't want to use
* the quantizers from the B-frames (B-frames often have a higher QP), we * the quantizers from the B-frames (B-frames often have a higher QP), we
......
...@@ -79,16 +79,16 @@ typedef struct FSPPContext { ...@@ -79,16 +79,16 @@ typedef struct FSPPContext {
ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t dst_stride, ptrdiff_t src_stride,
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale); ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale);
void (*mul_thrmat)(struct FSPPContext *fspp, int q); void (*mul_thrmat)(int16_t *thr_adr_noq, int16_t *thr_adr, int q);
void (*column_fidct)(int16_t *thr_adr, int16_t *data, void (*column_fidct)(int16_t *thr_adr, int16_t *data,
int16_t *output, int cnt); int16_t *output, int cnt);
void (*row_idct)(int16_t *workspace, int16_t *output_adr, void (*row_idct)(int16_t *workspace, int16_t *output_adr,
int output_stride, int cnt); ptrdiff_t output_stride, int cnt);
void (*row_fdct)(int16_t *data, const uint8_t *pixels, void (*row_fdct)(int16_t *data, const uint8_t *pixels,
int line_size, int cnt); ptrdiff_t line_size, int cnt);
} FSPPContext; } FSPPContext;
......
OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
...@@ -10,6 +10,7 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o ...@@ -10,6 +10,7 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
YASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
YASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o YASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
YASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o YASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
......
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
* Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "libavutil/attributes.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/vf_fspp.h"
void ff_store_slice_mmx(uint8_t *dst, int16_t *src,
ptrdiff_t dst_stride, ptrdiff_t src_stride,
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale);
void ff_store_slice2_mmx(uint8_t *dst, int16_t *src,
ptrdiff_t dst_stride, ptrdiff_t src_stride,
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale);
void ff_mul_thrmat_mmx(int16_t *thr_adr_noq, int16_t *thr_adr, int q);
void ff_column_fidct_mmx(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt);
void ff_row_idct_mmx(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt);
void ff_row_fdct_mmx(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt);
av_cold void ff_fspp_init_x86(FSPPContext *s)
{
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags)) {
s->store_slice = ff_store_slice_mmx;
s->store_slice2 = ff_store_slice2_mmx;
s->mul_thrmat = ff_mul_thrmat_mmx;
s->column_fidct = ff_column_fidct_mmx;
s->row_idct = ff_row_idct_mmx;
s->row_fdct = ff_row_fdct_mmx;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment