x86: hpeldsp: Split off VP3-specific bits into a separate file

1dfc3cf8 · Diego Biurrun · 9833a406 · 1dfc3cf8 · 1dfc3cf8 · 1dfc3cf8
Commit 1dfc3cf8 authored Jan 11, 2016 by Diego Biurrun
6 changed files
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -51,6 +51,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp.o
 OBJS-$(CONFIG_V210_ENCODER)            += x86/v210enc_init.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += x86/vorbisdsp_init.o
+OBJS-$(CONFIG_VP3_DECODER)             += x86/hpeldsp_vp3_init.o
 OBJS-$(CONFIG_VP6_DECODER)             += x86/vp6dsp_init.o
 OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o

@@ -122,5 +123,6 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o
 YASM-OBJS-$(CONFIG_RV40_DECODER)       += x86/rv40dsp.o
 YASM-OBJS-$(CONFIG_V210_ENCODER)       += x86/v210enc.o
 YASM-OBJS-$(CONFIG_VORBIS_DECODER)     += x86/vorbisdsp.o
+YASM-OBJS-$(CONFIG_VP3_DECODER)        += x86/hpeldsp_vp3.o
 YASM-OBJS-$(CONFIG_VP6_DECODER)        += x86/vp6dsp.o
 YASM-OBJS-$(CONFIG_VP9_DECODER)        += x86/vp9dsp.o
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -142,53 +142,6 @@ INIT_MMX 3dnow
 PUT_NO_RND_PIXELS8_X2


-; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
-cglobal put_no_rnd_pixels8_x2_exact, 4,5
-    lea          r4, [r2*3]
-    pcmpeqb      m6, m6
-.loop:
-    mova         m0, [r1]
-    mova         m2, [r1+r2]
-    mova         m1, [r1+1]
-    mova         m3, [r1+r2+1]
-    pxor         m0, m6
-    pxor         m2, m6
-    pxor         m1, m6
-    pxor         m3, m6
-    PAVGB        m0, m1
-    PAVGB        m2, m3
-    pxor         m0, m6
-    pxor         m2, m6
-    mova       [r0], m0
-    mova    [r0+r2], m2
-    mova         m0, [r1+r2*2]
-    mova         m1, [r1+r2*2+1]
-    mova         m2, [r1+r4]
-    mova         m3, [r1+r4+1]
-    pxor         m0, m6
-    pxor         m1, m6
-    pxor         m2, m6
-    pxor         m3, m6
-    PAVGB        m0, m1
-    PAVGB        m2, m3
-    pxor         m0, m6
-    pxor         m2, m6
-    mova  [r0+r2*2], m0
-    mova    [r0+r4], m2
-    lea          r1, [r1+r2*4]
-    lea          r0, [r0+r2*4]
-    sub         r3d, 4
-    jg .loop
-    REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_X2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_X2_EXACT
-
-
 ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 %macro PUT_PIXELS8_Y2 0
 cglobal put_pixels8_y2, 4,5
@@ -260,48 +213,6 @@ INIT_MMX 3dnow
 PUT_NO_RND_PIXELS8_Y2


-; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
-cglobal put_no_rnd_pixels8_y2_exact, 4,5
-    lea          r4, [r2*3]
-    mova         m0, [r1]
-    pcmpeqb      m6, m6
-    add          r1, r2
-    pxor         m0, m6
-.loop:
-    mova         m1, [r1]
-    mova         m2, [r1+r2]
-    pxor         m1, m6
-    pxor         m2, m6
-    PAVGB        m0, m1
-    PAVGB        m1, m2
-    pxor         m0, m6
-    pxor         m1, m6
-    mova       [r0], m0
-    mova    [r0+r2], m1
-    mova         m1, [r1+r2*2]
-    mova         m0, [r1+r4]
-    pxor         m1, m6
-    pxor         m0, m6
-    PAVGB        m2, m1
-    PAVGB        m1, m0
-    pxor         m2, m6
-    pxor         m1, m6
-    mova  [r0+r2*2], m2
-    mova    [r0+r4], m1
-    lea          r1, [r1+r2*4]
-    lea          r0, [r0+r2*4]
-    sub         r3d, 4
-    jg .loop
-    REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_Y2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_Y2_EXACT
-
-
 ; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 %macro AVG_PIXELS8 0
 cglobal avg_pixels8, 4,5

--- a/libavcodec/x86/hpeldsp.h
+++ b/libavcodec/x86/hpeldsp.h
@@ -22,6 +22,8 @@
 #include <stddef.h>
 #include <stdint.h>

+#include "libavcodec/hpeldsp.h"
+
 void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);

@@ -35,4 +37,6 @@ void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
 void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                             ptrdiff_t line_size, int h);

+void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
+
 #endif /* AVCODEC_X86_HPELDSP_H */
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -43,12 +43,6 @@ void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
                                     ptrdiff_t line_size, int h);
 void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
                                    ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
-                                           const uint8_t *pixels,
-                                           ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
-                                          const uint8_t *pixels,
-                                          ptrdiff_t line_size, int h);
 void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
                              ptrdiff_t line_size, int h);
 void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
@@ -57,12 +51,6 @@ void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
                                     ptrdiff_t line_size, int h);
 void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
                                    ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
-                                           const uint8_t *pixels,
-                                           ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
-                                          const uint8_t *pixels,
-                                          ptrdiff_t line_size, int h);
 void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels,
                          ptrdiff_t line_size, int h);
 void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
@@ -209,11 +197,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
        c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
    }
-
-    if (flags & AV_CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
-        c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
-        c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
-    }
 #endif /* HAVE_MMXEXT_EXTERNAL */
 }

@@ -243,11 +226,6 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
        c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
    }
-
-    if (flags & AV_CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
-        c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
-        c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
-    }
 #endif /* HAVE_AMD3DNOW_EXTERNAL */
 }

@@ -278,4 +256,7 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)

    if (EXTERNAL_SSE2(cpu_flags))
        hpeldsp_init_sse2(c, flags, cpu_flags);
+
+    if (CONFIG_VP3_DECODER)
+        ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
 }
--- a/libavcodec/x86/hpeldsp_vp3.asm
+++ b/libavcodec/x86/hpeldsp_vp3.asm
+;******************************************************************************
+;* SIMD-optimized halfpel functions for VP3
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
+cglobal put_no_rnd_pixels8_x2_exact, 4,5
+    lea          r4, [r2*3]
+    pcmpeqb      m6, m6
+.loop:
+    mova         m0, [r1]
+    mova         m2, [r1+r2]
+    mova         m1, [r1+1]
+    mova         m3, [r1+r2+1]
+    pxor         m0, m6
+    pxor         m2, m6
+    pxor         m1, m6
+    pxor         m3, m6
+    PAVGB        m0, m1
+    PAVGB        m2, m3
+    pxor         m0, m6
+    pxor         m2, m6
+    mova       [r0], m0
+    mova    [r0+r2], m2
+    mova         m0, [r1+r2*2]
+    mova         m1, [r1+r2*2+1]
+    mova         m2, [r1+r4]
+    mova         m3, [r1+r4+1]
+    pxor         m0, m6
+    pxor         m1, m6
+    pxor         m2, m6
+    pxor         m3, m6
+    PAVGB        m0, m1
+    PAVGB        m2, m3
+    pxor         m0, m6
+    pxor         m2, m6
+    mova  [r0+r2*2], m0
+    mova    [r0+r4], m2
+    lea          r1, [r1+r2*4]
+    lea          r0, [r0+r2*4]
+    sub         r3d, 4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PUT_NO_RND_PIXELS8_X2_EXACT
+INIT_MMX 3dnow
+PUT_NO_RND_PIXELS8_X2_EXACT
+
+
+; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
+cglobal put_no_rnd_pixels8_y2_exact, 4,5
+    lea          r4, [r2*3]
+    mova         m0, [r1]
+    pcmpeqb      m6, m6
+    add          r1, r2
+    pxor         m0, m6
+.loop:
+    mova         m1, [r1]
+    mova         m2, [r1+r2]
+    pxor         m1, m6
+    pxor         m2, m6
+    PAVGB        m0, m1
+    PAVGB        m1, m2
+    pxor         m0, m6
+    pxor         m1, m6
+    mova       [r0], m0
+    mova    [r0+r2], m1
+    mova         m1, [r1+r2*2]
+    mova         m0, [r1+r4]
+    pxor         m1, m6
+    pxor         m0, m6
+    PAVGB        m2, m1
+    PAVGB        m1, m0
+    pxor         m2, m6
+    pxor         m1, m6
+    mova  [r0+r2*2], m2
+    mova    [r0+r4], m1
+    lea          r1, [r1+r2*4]
+    lea          r0, [r0+r2*4]
+    sub         r3d, 4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PUT_NO_RND_PIXELS8_Y2_EXACT
+INIT_MMX 3dnow
+PUT_NO_RND_PIXELS8_Y2_EXACT
--- a/libavcodec/x86/hpeldsp_vp3_init.c
+++ b/libavcodec/x86/hpeldsp_vp3_init.c
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/hpeldsp.h"
+
+#include "hpeldsp.h"
+
+void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
+                                           const uint8_t *pixels,
+                                           ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
+                                          const uint8_t *pixels,
+                                          ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
+                                           const uint8_t *pixels,
+                                           ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
+                                          const uint8_t *pixels,
+                                          ptrdiff_t line_size, int h);
+
+av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
+{
+    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
+        if (flags & AV_CODEC_FLAG_BITEXACT) {
+            c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
+            c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
+        }
+    }
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        if (flags & AV_CODEC_FLAG_BITEXACT) {
+            c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
+            c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
+        }
+    }
+}