Commit 2e55e26b authored by Martin Storsjö's avatar Martin Storsjö

vp9: Flip the order of arguments in MC functions

This makes it match the pattern already used for VP8 MC functions.

This also makes the signature match ffmpeg's version of these
functions, easing porting of code in both directions.
Signed-off-by: 's avatarMartin Storsjö <martin@martin.st>
parent e3fb74f7
...@@ -127,9 +127,8 @@ typedef struct ProbContext { ...@@ -127,9 +127,8 @@ typedef struct ProbContext {
uint8_t partition[4][4][3]; uint8_t partition[4][4][3];
} ProbContext; } ProbContext;
typedef void (*vp9_mc_func)(uint8_t *dst, const uint8_t *ref, typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride,
ptrdiff_t ref_stride,
int h, int mx, int my); int h, int mx, int my);
typedef struct VP9DSPContext { typedef struct VP9DSPContext {
......
...@@ -1187,7 +1187,7 @@ static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func(*mc)[2], ...@@ -1187,7 +1187,7 @@ static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
ref_stride = 80; ref_stride = 80;
} }
mc[!!mx][!!my](dst, ref, dst_stride, ref_stride, bh, mx << 1, my << 1); mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
} }
static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2], static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
...@@ -1227,7 +1227,7 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2], ...@@ -1227,7 +1227,7 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
bw + !!mx * 7, bh + !!my * 7, bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h); x - !!mx * 3, y - !!my * 3, w, h);
ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
mc[!!mx][!!my](dst_u, ref_u, dst_stride, 80, bh, mx, my); mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_v - !!my * 3 * src_stride_v - !!mx * 3, ref_v - !!my * 3 * src_stride_v - !!mx * 3,
...@@ -1236,10 +1236,10 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2], ...@@ -1236,10 +1236,10 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
bw + !!mx * 7, bh + !!my * 7, bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h); x - !!mx * 3, y - !!my * 3, w, h);
ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
mc[!!mx][!!my](dst_v, ref_v, dst_stride, 80, bh, mx, my); mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
} else { } else {
mc[!!mx][!!my](dst_u, ref_u, dst_stride, src_stride_u, bh, mx, my); mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
mc[!!mx][!!my](dst_v, ref_v, dst_stride, src_stride_v, bh, mx, my); mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
} }
} }
...@@ -1668,8 +1668,8 @@ int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col, ...@@ -1668,8 +1668,8 @@ int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col,
av_assert2(n <= 4); av_assert2(n <= 4);
if (w & bw) { if (w & bw) {
s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o,
s->tmp_y + o,
f->linesize[0], f->linesize[0],
s->tmp_y + o,
64, h, 0, 0); 64, h, 0, 0);
o += bw; o += bw;
} }
...@@ -1686,12 +1686,12 @@ int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col, ...@@ -1686,12 +1686,12 @@ int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col,
av_assert2(n <= 4); av_assert2(n <= 4);
if (w & bw) { if (w & bw) {
s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o,
s->tmp_uv[0] + o,
f->linesize[1], f->linesize[1],
s->tmp_uv[0] + o,
32, h, 0, 0); 32, h, 0, 0);
s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o,
s->tmp_uv[1] + o,
f->linesize[2], f->linesize[2],
s->tmp_uv[1] + o,
32, h, 0, 0); 32, h, 0, 0);
o += bw; o += bw;
} }
......
...@@ -1738,9 +1738,8 @@ static av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp) ...@@ -1738,9 +1738,8 @@ static av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp)
dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c; dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c;
} }
static av_always_inline void copy_c(uint8_t *dst, const uint8_t *src, static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride,
ptrdiff_t src_stride,
int w, int h) int w, int h)
{ {
do { do {
...@@ -1751,9 +1750,8 @@ static av_always_inline void copy_c(uint8_t *dst, const uint8_t *src, ...@@ -1751,9 +1750,8 @@ static av_always_inline void copy_c(uint8_t *dst, const uint8_t *src,
} while (--h); } while (--h);
} }
static av_always_inline void avg_c(uint8_t *dst, const uint8_t *src, static av_always_inline void avg_c(uint8_t *dst, ptrdiff_t dst_stride,
ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride,
ptrdiff_t src_stride,
int w, int h) int w, int h)
{ {
do { do {
...@@ -1767,13 +1765,12 @@ static av_always_inline void avg_c(uint8_t *dst, const uint8_t *src, ...@@ -1767,13 +1765,12 @@ static av_always_inline void avg_c(uint8_t *dst, const uint8_t *src,
} while (--h); } while (--h);
} }
#define fpel_fn(type, sz) \ #define fpel_fn(type, sz) \
static void type ## sz ## _c(uint8_t *dst, const uint8_t *src, \ static void type ## sz ## _c(uint8_t *dst, ptrdiff_t dst_stride, \
ptrdiff_t dst_stride, \ const uint8_t *src, ptrdiff_t src_stride, \
ptrdiff_t src_stride, \ int h, int mx, int my) \
int h, int mx, int my) \ { \
{ \ type ## _c(dst, dst_stride, src, src_stride, sz, h); \
type ## _c(dst, src, dst_stride, src_stride, sz, h); \
} }
#define copy_avg_fn(sz) \ #define copy_avg_fn(sz) \
...@@ -1851,9 +1848,8 @@ static const int8_t vp9_subpel_filters[3][15][8] = { ...@@ -1851,9 +1848,8 @@ static const int8_t vp9_subpel_filters[3][15][8] = {
F[6] * src[x + +3 * stride] + \ F[6] * src[x + +3 * stride] + \
F[7] * src[x + +4 * stride] + 64) >> 7) F[7] * src[x + +4 * stride] + 64) >> 7)
static av_always_inline void do_8tap_1d_c(uint8_t *dst, const uint8_t *src, static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride,
ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds, int w, int h, ptrdiff_t ds,
const int8_t *filter, int avg) const int8_t *filter, int avg)
{ {
...@@ -1873,13 +1869,13 @@ static av_always_inline void do_8tap_1d_c(uint8_t *dst, const uint8_t *src, ...@@ -1873,13 +1869,13 @@ static av_always_inline void do_8tap_1d_c(uint8_t *dst, const uint8_t *src,
#define filter_8tap_1d_fn(opn, opa, dir, ds) \ #define filter_8tap_1d_fn(opn, opa, dir, ds) \
static av_noinline void opn ## _8tap_1d_ ## dir ## _c(uint8_t *dst, \ static av_noinline void opn ## _8tap_1d_ ## dir ## _c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int w, int h, \ int w, int h, \
const int8_t *filter) \ const int8_t *filter) \
{ \ { \
do_8tap_1d_c(dst, src, dst_stride, src_stride, w, h, ds, filter, opa); \ do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
} }
filter_8tap_1d_fn(put, 0, v, src_stride) filter_8tap_1d_fn(put, 0, v, src_stride)
...@@ -1889,9 +1885,8 @@ filter_8tap_1d_fn(avg, 1, h, 1) ...@@ -1889,9 +1885,8 @@ filter_8tap_1d_fn(avg, 1, h, 1)
#undef filter_8tap_1d_fn #undef filter_8tap_1d_fn
static av_always_inline void do_8tap_2d_c(uint8_t *dst, const uint8_t *src, static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride,
ptrdiff_t src_stride,
int w, int h, const int8_t *filterx, int w, int h, const int8_t *filterx,
const int8_t *filtery, int avg) const int8_t *filtery, int avg)
{ {
...@@ -1926,14 +1921,14 @@ static av_always_inline void do_8tap_2d_c(uint8_t *dst, const uint8_t *src, ...@@ -1926,14 +1921,14 @@ static av_always_inline void do_8tap_2d_c(uint8_t *dst, const uint8_t *src,
#define filter_8tap_2d_fn(opn, opa) \ #define filter_8tap_2d_fn(opn, opa) \
static av_noinline void opn ## _8tap_2d_hv_c(uint8_t *dst, \ static av_noinline void opn ## _8tap_2d_hv_c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int w, int h, \ int w, int h, \
const int8_t *filterx, \ const int8_t *filterx, \
const int8_t *filtery) \ const int8_t *filtery) \
{ \ { \
do_8tap_2d_c(dst, src, dst_stride, src_stride, \ do_8tap_2d_c(dst, dst_stride, src, src_stride, \
w, h, filterx, filtery, opa); \ w, h, filterx, filtery, opa); \
} }
...@@ -1947,23 +1942,23 @@ filter_8tap_2d_fn(avg, 1) ...@@ -1947,23 +1942,23 @@ filter_8tap_2d_fn(avg, 1)
#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \ #define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
static void \ static void \
avg ## _8tap_ ## type ## _ ## sz ## dir ## _c(uint8_t *dst, \ avg ## _8tap_ ## type ## _ ## sz ## dir ## _c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int h, int mx, int my) \ int h, int mx, int my) \
{ \ { \
avg ## _8tap_1d_ ## dir ## _c(dst, src, dst_stride, src_stride, sz, h, \ avg ## _8tap_1d_ ## dir ## _c(dst, dst_stride, src, src_stride, sz, h, \
vp9_subpel_filters[type_idx][dir_m - 1]); \ vp9_subpel_filters[type_idx][dir_m - 1]); \
} }
#define filter_fn_2d(sz, type, type_idx, avg) \ #define filter_fn_2d(sz, type, type_idx, avg) \
static void avg ## _8tap_ ## type ## _ ## sz ## hv_c(uint8_t *dst, \ static void avg ## _8tap_ ## type ## _ ## sz ## hv_c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int h, int mx, int my) \ int h, int mx, int my) \
{ \ { \
avg ## _8tap_2d_hv_c(dst, src, dst_stride, src_stride, sz, h, \ avg ## _8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
vp9_subpel_filters[type_idx][mx - 1], \ vp9_subpel_filters[type_idx][mx - 1], \
vp9_subpel_filters[type_idx][my - 1]); \ vp9_subpel_filters[type_idx][my - 1]); \
} }
...@@ -1972,8 +1967,8 @@ static void avg ## _8tap_ ## type ## _ ## sz ## hv_c(uint8_t *dst, \ ...@@ -1972,8 +1967,8 @@ static void avg ## _8tap_ ## type ## _ ## sz ## hv_c(uint8_t *dst, \
(src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4)) (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
static av_always_inline void do_bilin_1d_c(uint8_t *dst, static av_always_inline void do_bilin_1d_c(uint8_t *dst,
const uint8_t *src,
ptrdiff_t dst_stride, ptrdiff_t dst_stride,
const uint8_t *src,
ptrdiff_t src_stride, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds, int w, int h, ptrdiff_t ds,
int mxy, int avg) int mxy, int avg)
...@@ -1994,12 +1989,12 @@ static av_always_inline void do_bilin_1d_c(uint8_t *dst, ...@@ -1994,12 +1989,12 @@ static av_always_inline void do_bilin_1d_c(uint8_t *dst,
#define bilin_1d_fn(opn, opa, dir, ds) \ #define bilin_1d_fn(opn, opa, dir, ds) \
static av_noinline void opn ## _bilin_1d_ ## dir ## _c(uint8_t *dst, \ static av_noinline void opn ## _bilin_1d_ ## dir ## _c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int w, int h, int mxy) \ int w, int h, int mxy) \
{ \ { \
do_bilin_1d_c(dst, src, dst_stride, src_stride, w, h, ds, mxy, opa); \ do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
} }
bilin_1d_fn(put, 0, v, src_stride) bilin_1d_fn(put, 0, v, src_stride)
...@@ -2010,8 +2005,8 @@ bilin_1d_fn(avg, 1, h, 1) ...@@ -2010,8 +2005,8 @@ bilin_1d_fn(avg, 1, h, 1)
#undef bilin_1d_fn #undef bilin_1d_fn
static av_always_inline void do_bilin_2d_c(uint8_t *dst, static av_always_inline void do_bilin_2d_c(uint8_t *dst,
const uint8_t *src,
ptrdiff_t dst_stride, ptrdiff_t dst_stride,
const uint8_t *src,
ptrdiff_t src_stride, ptrdiff_t src_stride,
int w, int h, int mx, int my, int w, int h, int mx, int my,
int avg) int avg)
...@@ -2046,13 +2041,13 @@ static av_always_inline void do_bilin_2d_c(uint8_t *dst, ...@@ -2046,13 +2041,13 @@ static av_always_inline void do_bilin_2d_c(uint8_t *dst,
#define bilin_2d_fn(opn, opa) \ #define bilin_2d_fn(opn, opa) \
static av_noinline void opn ## _bilin_2d_hv_c(uint8_t *dst, \ static av_noinline void opn ## _bilin_2d_hv_c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int w, int h, \ int w, int h, \
int mx, int my) \ int mx, int my) \
{ \ { \
do_bilin_2d_c(dst, src, dst_stride, src_stride, w, h, mx, my, opa); \ do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \
} }
bilin_2d_fn(put, 0) bilin_2d_fn(put, 0)
...@@ -2064,23 +2059,23 @@ bilin_2d_fn(avg, 1) ...@@ -2064,23 +2059,23 @@ bilin_2d_fn(avg, 1)
#define bilinf_fn_1d(sz, dir, dir_m, avg) \ #define bilinf_fn_1d(sz, dir, dir_m, avg) \
static void avg ## _bilin_ ## sz ## dir ## _c(uint8_t *dst, \ static void avg ## _bilin_ ## sz ## dir ## _c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int h, int mx, int my) \ int h, int mx, int my) \
{ \ { \
avg ## _bilin_1d_ ## dir ## _c(dst, src, dst_stride, src_stride, \ avg ## _bilin_1d_ ## dir ## _c(dst, dst_stride, src, src_stride, \
sz, h, dir_m); \ sz, h, dir_m); \
} }
#define bilinf_fn_2d(sz, avg) \ #define bilinf_fn_2d(sz, avg) \
static void avg ## _bilin_ ## sz ## hv_c(uint8_t *dst, \ static void avg ## _bilin_ ## sz ## hv_c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int h, int mx, int my) \ int h, int mx, int my) \
{ \ { \
avg ## _bilin_2d_hv_c(dst, src, dst_stride, src_stride, \ avg ## _bilin_2d_hv_c(dst, dst_stride, src, src_stride, \
sz, h, mx, my); \ sz, h, mx, my); \
} }
......
...@@ -29,10 +29,9 @@ ...@@ -29,10 +29,9 @@
#if HAVE_YASM #if HAVE_YASM
#define fpel_func(avg, sz, opt) \ #define fpel_func(avg, sz, opt) \
void ff_vp9_ ## avg ## sz ## _ ## opt(uint8_t *dst, const uint8_t *src, \ void ff_vp9_ ## avg ## sz ## _ ## opt(uint8_t *dst, ptrdiff_t dst_stride, \
ptrdiff_t dst_stride, \ const uint8_t *src, ptrdiff_t src_stride, \
ptrdiff_t src_stride, \
int h, int mx, int my) int h, int mx, int my)
fpel_func(put, 4, mmx); fpel_func(put, 4, mmx);
...@@ -54,8 +53,8 @@ fpel_func(avg, 64, avx2); ...@@ -54,8 +53,8 @@ fpel_func(avg, 64, avx2);
#define mc_func(avg, sz, dir, opt, type, f_sz) \ #define mc_func(avg, sz, dir, opt, type, f_sz) \
void \ void \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \ ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int h, \ int h, \
const type (*filter)[f_sz]) const type (*filter)[f_sz])
...@@ -81,20 +80,21 @@ mc_funcs(32, avx2, int8_t, 32); ...@@ -81,20 +80,21 @@ mc_funcs(32, avx2, int8_t, 32);
#define mc_rep_func(avg, sz, hsz, dir, opt, type, f_sz) \ #define mc_rep_func(avg, sz, hsz, dir, opt, type, f_sz) \
static av_always_inline void \ static av_always_inline void \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \ ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int h, \ int h, \
const type (*filter)[f_sz]) \ const type (*filter)[f_sz]) \
{ \ { \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, src, \ ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, \
dst_stride, \ dst_stride, \
src, \
src_stride, \ src_stride, \
h, \ h, \
filter); \ filter); \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst + hsz, \ ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst + hsz, \
src + hsz, \
dst_stride, \ dst_stride, \
src + hsz, \
src_stride, \ src_stride, \
h, filter); \ h, filter); \
} }
...@@ -126,19 +126,18 @@ extern const int16_t ff_filters_sse2[3][15][8][8]; ...@@ -126,19 +126,18 @@ extern const int16_t ff_filters_sse2[3][15][8][8];
#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, opt) \ #define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, opt) \
static void \ static void \
op ## _8tap_ ## fname ## _ ## sz ## hv_ ## opt(uint8_t *dst, \ op ## _8tap_ ## fname ## _ ## sz ## hv_ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int h, int mx, int my) \ int h, int mx, int my) \
{ \ { \
LOCAL_ALIGNED_ ## align(uint8_t, temp, [71 * 64]); \ LOCAL_ALIGNED_ ## align(uint8_t, temp, [71 * 64]); \
ff_vp9_put_8tap_1d_h_ ## sz ## _ ## opt(temp, src - 3 * src_stride, \ ff_vp9_put_8tap_1d_h_ ## sz ## _ ## opt(temp, 64, \
64, src_stride, \ src - 3 * src_stride, \
h + 7, \ src_stride, h + 7, \
ff_filters_ ## f_opt[f][mx - 1]); \ ff_filters_ ## f_opt[f][mx - 1]); \
ff_vp9_ ## op ## _8tap_1d_v_ ## sz ## _ ## opt(dst, temp + 3 * 64, \ ff_vp9_ ## op ## _8tap_1d_v_ ## sz ## _ ## opt(dst, dst_stride, \
dst_stride, 64, \ temp + 3 * 64, 64, h, \
h, \
ff_filters_ ## f_opt[f][my - 1]); \ ff_filters_ ## f_opt[f][my - 1]); \
} }
...@@ -173,14 +172,15 @@ filters_8tap_2d_fn(avg, 32, 32, avx2, ssse3) ...@@ -173,14 +172,15 @@ filters_8tap_2d_fn(avg, 32, 32, avx2, ssse3)
#define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, opt) \ #define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, opt) \
static void \ static void \
op ## _8tap_ ## fname ## _ ## sz ## dir ## _ ## opt(uint8_t *dst, \ op ## _8tap_ ## fname ## _ ## sz ## dir ## _ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \ ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \ ptrdiff_t src_stride, \
int h, int mx, \ int h, int mx, \
int my) \ int my) \
{ \ { \
ff_vp9_ ## op ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(dst, src, \ ff_vp9_ ## op ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(dst, \
dst_stride, \ dst_stride, \
src, \
src_stride, h,\ src_stride, h,\
ff_filters_ ## f_opt[f][dvar - 1]); \ ff_filters_ ## f_opt[f][dvar - 1]); \
} }
......
...@@ -107,7 +107,7 @@ SECTION .text ...@@ -107,7 +107,7 @@ SECTION .text
%macro filter_sse2_h_fn 1 %macro filter_sse2_h_fn 1
%assign %%px mmsize/2 %assign %%px mmsize/2
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 15, dst, src, dstride, sstride, h, filtery cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 15, dst, dstride, src, sstride, h, filtery
pxor m5, m5 pxor m5, m5
mova m6, [pw_64] mova m6, [pw_64]
mova m7, [filteryq+ 0] mova m7, [filteryq+ 0]
...@@ -192,7 +192,7 @@ filter_sse2_h_fn avg ...@@ -192,7 +192,7 @@ filter_sse2_h_fn avg
%macro filter_h_fn 1 %macro filter_h_fn 1
%assign %%px mmsize/2 %assign %%px mmsize/2
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, src, dstride, sstride, h, filtery cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, dstride, src, sstride, h, filtery
mova m6, [pw_256] mova m6, [pw_256]
mova m7, [filteryq+ 0] mova m7, [filteryq+ 0]
%if ARCH_X86_64 && mmsize > 8 %if ARCH_X86_64 && mmsize > 8
...@@ -253,7 +253,7 @@ filter_h_fn avg ...@@ -253,7 +253,7 @@ filter_h_fn avg
%if ARCH_X86_64 %if ARCH_X86_64
%macro filter_hx2_fn 1 %macro filter_hx2_fn 1
%assign %%px mmsize %assign %%px mmsize
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, src, dstride, sstride, h, filtery cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, dstride, src, sstride, h, filtery
mova m13, [pw_256] mova m13, [pw_256]
mova m8, [filteryq+ 0] mova m8, [filteryq+ 0]
mova m9, [filteryq+32] mova m9, [filteryq+32]
...@@ -315,9 +315,9 @@ filter_hx2_fn avg ...@@ -315,9 +315,9 @@ filter_hx2_fn avg
%macro filter_sse2_v_fn 1 %macro filter_sse2_v_fn 1
%assign %%px mmsize/2 %assign %%px mmsize/2
%if ARCH_X86_64 %if ARCH_X86_64
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 15, dst, src, dstride, sstride, h, filtery, src4, sstride3 cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 15, dst, dstride, src, sstride, h, filtery, src4, sstride3
%else %else
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 15, dst, src, dstride, sstride, filtery, src4, sstride3 cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 15, dst, dstride, src, sstride, filtery, src4, sstride3
mov filteryq, r5mp mov filteryq, r5mp
%define hd r4mp %define hd r4mp
%endif %endif
...@@ -413,9 +413,9 @@ filter_sse2_v_fn avg ...@@ -413,9 +413,9 @@ filter_sse2_v_fn avg
%macro filter_v_fn 1 %macro filter_v_fn 1
%assign %%px mmsize/2 %assign %%px mmsize/2
%if ARCH_X86_64 %if ARCH_X86_64
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, src, dstride, sstride, h, filtery, src4, sstride3 cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, dstride, src, sstride, h, filtery, src4, sstride3
%else %else
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, src, dstride, sstride, filtery, src4, sstride3 cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, dstride, src, sstride, filtery, src4, sstride3
mov filteryq, r5mp mov filteryq, r5mp
%define hd r4mp %define hd r4mp
%endif %endif
...@@ -486,7 +486,7 @@ filter_v_fn avg ...@@ -486,7 +486,7 @@ filter_v_fn avg
%macro filter_vx2_fn 1 %macro filter_vx2_fn 1
%assign %%px mmsize %assign %%px mmsize
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, src, dstride, sstride, h, filtery, src4, sstride3 cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, dstride, src, sstride, h, filtery, src4, sstride3
mova m13, [pw_256] mova m13, [pw_256]
lea sstride3q, [sstrideq*3] lea sstride3q, [sstrideq*3]
lea src4q, [srcq+sstrideq] lea src4q, [srcq+sstrideq]
...@@ -562,11 +562,11 @@ filter_vx2_fn avg ...@@ -562,11 +562,11 @@ filter_vx2_fn avg
%endif %endif
%if %2 <= mmsize %if %2 <= mmsize
cglobal vp9_%1%2, 5, 7, 4, dst, src, dstride, sstride, h, dstride3, sstride3 cglobal vp9_%1%2, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3
lea sstride3q, [sstrideq*3] lea sstride3q, [sstrideq*3]
lea dstride3q, [dstrideq*3] lea dstride3q, [dstrideq*3]
%else %else
cglobal vp9_%1%2, 5, 5, 4, dst, src, dstride, sstride, h cglobal vp9_%1%2, 5, 5, 4, dst, dstride, src, sstride, h
%endif %endif
.loop: .loop:
%%srcfn m0, [srcq] %%srcfn m0, [srcq]
......
...@@ -228,8 +228,8 @@ static void check_mc(void) ...@@ -228,8 +228,8 @@ static void check_mc(void)
int op, hsize, filter, dx, dy; int op, hsize, filter, dx, dy;
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
void, uint8_t *dst, const uint8_t *ref, void, uint8_t *dst, ptrdiff_t dst_stride,
ptrdiff_t dst_stride, ptrdiff_t ref_stride, const uint8_t *ref, ptrdiff_t ref_stride,
int h, int mx, int my); int h, int mx, int my);
for (op = 0; op < 2; op++) { for (op = 0; op < 2; op++) {
...@@ -252,13 +252,11 @@ static void check_mc(void) ...@@ -252,13 +252,11 @@ static void check_mc(void)
int mx = dx ? 1 + (rnd() % 14) : 0; int mx = dx ? 1 + (rnd() % 14) : 0;
int my = dy ? 1 + (rnd() % 14) : 0; int my = dy ? 1 + (rnd() % 14) : 0;
randomize_buffers(); randomize_buffers();
call_ref(dst0, src, call_ref(dst0, size * SIZEOF_PIXEL,
size * SIZEOF_PIXEL, src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my); size, mx, my);
call_new(dst1, src, call_new(dst1, size * SIZEOF_PIXEL,
size * SIZEOF_PIXEL, src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my); size, mx, my);
if (memcmp(dst0, dst1, DST_BUF_SIZE)) if (memcmp(dst0, dst1, DST_BUF_SIZE))
fail(); fail();
...@@ -267,8 +265,8 @@ static void check_mc(void) ...@@ -267,8 +265,8 @@ static void check_mc(void)
// functions are identical // functions are identical
if (filter >= 1 && filter <= 2) continue; if (filter >= 1 && filter <= 2) continue;
bench_new(dst1, src, size * SIZEOF_PIXEL, bench_new(dst1, size * SIZEOF_PIXEL,
SRC_BUF_STRIDE * SIZEOF_PIXEL, src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my); size, mx, my);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment