Commit 260e3b22 authored by fbarchard@google.com's avatar fbarchard@google.com

now that libyuv requires newer nacl compiler, bundles can be assumed and bundle…

now that libyuv requires newer nacl compiler, bundles can be assumed and bundle align macro can be removed.  no impact on code gen.
BUG=none
TESTED=validator still passes
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/30019004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1150 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent ee4bc0d8
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1147 Version: 1148
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -507,7 +507,6 @@ typedef uint8 ulvec8[32]; ...@@ -507,7 +507,6 @@ typedef uint8 ulvec8[32];
#if defined(__native_client__) && defined(__x86_64__) #if defined(__native_client__) && defined(__x86_64__)
#define BUNDLELOCK ".bundle_lock\n" #define BUNDLELOCK ".bundle_lock\n"
#define BUNDLEUNLOCK ".bundle_unlock\n" #define BUNDLEUNLOCK ".bundle_unlock\n"
#define BUNDLEALIGN "\n"
#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")" #define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")" #define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
#define MEMLEA(offset, base) #offset "(%q" #base ")" #define MEMLEA(offset, base) #offset "(%q" #base ")"
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1147 #define LIBYUV_VERSION 1148
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -386,7 +386,6 @@ void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { ...@@ -386,7 +386,6 @@ void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
"movdqa %%xmm1,%%xmm2 \n" "movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm0,%%xmm1 \n" "punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n" "punpckhbw %%xmm0,%%xmm2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2) MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2)
MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2) MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2)
"lea " MEMLEA(0x10,0) ",%0 \n" "lea " MEMLEA(0x10,0) ",%0 \n"
...@@ -443,7 +442,6 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { ...@@ -443,7 +442,6 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
"movdqa %%xmm1,%%xmm2 \n" "movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm0,%%xmm1 \n" "punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n" "punpckhbw %%xmm0,%%xmm2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2) MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2)
MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2) MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2)
"lea " MEMLEA(0x10,0) ",%0 \n" "lea " MEMLEA(0x10,0) ",%0 \n"
...@@ -487,7 +485,6 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { ...@@ -487,7 +485,6 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
"movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n" "punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n" "punpckhbw %%xmm2,%%xmm1 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,2) // movdqu %%xmm0,(%1,%0,2) MEMOPMEM(movdqu,xmm0,0x00,1,0,2) // movdqu %%xmm0,(%1,%0,2)
MEMOPMEM(movdqu,xmm1,0x10,1,0,2) // movdqu %%xmm1,0x10(%1,%0,2) MEMOPMEM(movdqu,xmm1,0x10,1,0,2) // movdqu %%xmm1,0x10(%1,%0,2)
"lea " MEMLEA(0x10,0) ",%0 \n" "lea " MEMLEA(0x10,0) ",%0 \n"
...@@ -842,7 +839,6 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -842,7 +839,6 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
"paddb %%xmm5,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n" "movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n" "lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n" "jg 1b \n"
...@@ -916,7 +912,6 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -916,7 +912,6 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
"packsswb %%xmm1,%%xmm0 \n" "packsswb %%xmm1,%%xmm0 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n" "movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n" "lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n" "jg 1b \n"
...@@ -983,7 +978,6 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v, ...@@ -983,7 +978,6 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"packsswb %%xmm2,%%xmm0 \n" "packsswb %%xmm2,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n"
"lea " MEMLEA(0x40,0) ",%0 \n" "lea " MEMLEA(0x40,0) ",%0 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,2,1) // movdqu %%xmm0,(%1,%2,1) MEMOPMEM(movdqu,xmm0,0x00,1,2,1) // movdqu %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x10,1) ",%1 \n" "lea " MEMLEA(0x10,1) ",%1 \n"
"jg 1b \n" "jg 1b \n"
...@@ -1046,7 +1040,6 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0, ...@@ -1046,7 +1040,6 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
"paddb %%xmm5,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n" "movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n" "lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n" "jg 1b \n"
...@@ -1154,7 +1147,6 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra, ...@@ -1154,7 +1147,6 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
"paddb %%xmm5,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n" "movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n" "lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n" "jg 1b \n"
...@@ -1298,7 +1290,6 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr, ...@@ -1298,7 +1290,6 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
"paddb %%xmm5,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n" "movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n" "lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n" "jg 1b \n"
...@@ -1368,7 +1359,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, ...@@ -1368,7 +1359,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
"paddb %%xmm5,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n" "movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n" "lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n" "jg 1b \n"
...@@ -1433,7 +1423,6 @@ struct { ...@@ -1433,7 +1423,6 @@ struct {
// Read 8 UV from 411 // Read 8 UV from 411
#define READYUV444 \ #define READYUV444 \
"movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
BUNDLEALIGN \
MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
"lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
"punpcklbw %%xmm1,%%xmm0 \n" "punpcklbw %%xmm1,%%xmm0 \n"
...@@ -1441,7 +1430,6 @@ struct { ...@@ -1441,7 +1430,6 @@ struct {
// Read 4 UV from 422, upsample to 8 UV // Read 4 UV from 422, upsample to 8 UV
#define READYUV422 \ #define READYUV422 \
"movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
BUNDLEALIGN \
MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
"lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
"punpcklbw %%xmm1,%%xmm0 \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \
...@@ -1450,7 +1438,6 @@ struct { ...@@ -1450,7 +1438,6 @@ struct {
// Read 2 UV from 411, upsample to 8 UV // Read 2 UV from 411, upsample to 8 UV
#define READYUV411 \ #define READYUV411 \
"movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
BUNDLEALIGN \
MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
"lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \
"punpcklbw %%xmm1,%%xmm0 \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \
...@@ -1982,7 +1969,6 @@ void I422ToBGRARow_AVX2(const uint8* y_buf, ...@@ -1982,7 +1969,6 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
*/ */
"vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n" "vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n"
// ymm0 = xxxxxxxxxxxxxxxxUUUUUUUUUUUUUUUU, uint8 // ymm0 = xxxxxxxxxxxxxxxxUUUUUUUUUUUUUUUU, uint8
BUNDLEALIGN
MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1)
// ymm1 = xxxxxxxxxxxxxxxxVVVVVVVVVVVVVVVV, uint8 // ymm1 = xxxxxxxxxxxxxxxxVVVVVVVVVVVVVVVV, uint8
"lea " MEMLEA(0x10, [u_buf]) ", %[u_buf] \n" // u_buf += 16 "lea " MEMLEA(0x10, [u_buf]) ", %[u_buf] \n" // u_buf += 16
...@@ -2274,7 +2260,6 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, ...@@ -2274,7 +2260,6 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
"pshufb %%xmm1,%%xmm0 \n" "pshufb %%xmm1,%%xmm0 \n"
"sub $8,%3 \n" "sub $8,%3 \n"
"movlpd %%xmm0," MEMACCESS(1) " \n" "movlpd %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhpd,xmm0,0x00,1,2,1) // movhpd %%xmm0,(%1,%2) MEMOPMEM(movhpd,xmm0,0x00,1,2,1) // movhpd %%xmm0,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n" "lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n" "jg 1b \n"
...@@ -2688,7 +2673,6 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, ...@@ -2688,7 +2673,6 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n" "lea " MEMLEA(0x20,0) ",%0 \n"
...@@ -2703,7 +2687,6 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, ...@@ -2703,7 +2687,6 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n" "packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n" "movq %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n" "lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
...@@ -2743,7 +2726,6 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, ...@@ -2743,7 +2726,6 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n" "packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n" "movq %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n" "lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
...@@ -2798,7 +2780,6 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, ...@@ -2798,7 +2780,6 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n" "lea " MEMLEA(0x20,0) ",%0 \n"
...@@ -2813,7 +2794,6 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, ...@@ -2813,7 +2794,6 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n" "packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n" "movq %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n" "lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
...@@ -2853,7 +2833,6 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy, ...@@ -2853,7 +2833,6 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n" "packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n" "movq %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n" "lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
...@@ -3283,7 +3262,6 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, ...@@ -3283,7 +3262,6 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"movdqu " MEMACCESS(0) ",%%xmm1 \n" "movdqu " MEMACCESS(0) ",%%xmm1 \n"
"movzb " MEMACCESS2(0x0b,0) ",%3 \n" "movzb " MEMACCESS2(0x0b,0) ",%3 \n"
"punpckhbw %%xmm1,%%xmm1 \n" "punpckhbw %%xmm1,%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
"movzb " MEMACCESS2(0x0f,0) ",%3 \n" "movzb " MEMACCESS2(0x0f,0) ",%3 \n"
MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3
...@@ -3718,13 +3696,11 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, ...@@ -3718,13 +3696,11 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"punpcklbw %%xmm5,%%xmm0 \n" "punpcklbw %%xmm5,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm1 \n"
"psubw %%xmm1,%%xmm0 \n" "psubw %%xmm1,%%xmm0 \n"
BUNDLEALIGN
MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1 MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1
MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2 MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2
"punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm1 \n"
"punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm2 \n"
"psubw %%xmm2,%%xmm1 \n" "psubw %%xmm2,%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2 MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2
MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3 MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3
"punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm2 \n"
...@@ -3738,7 +3714,6 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, ...@@ -3738,7 +3714,6 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"pmaxsw %%xmm1,%%xmm0 \n" "pmaxsw %%xmm1,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n"
"sub $0x8,%4 \n" "sub $0x8,%4 \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm0,0x00,0,3,1) // movq %%xmm0,(%0,%3,1) MEMOPMEM(movq,xmm0,0x00,0,3,1) // movq %%xmm0,(%0,%3,1)
"lea " MEMLEA(0x8,0) ",%0 \n" "lea " MEMLEA(0x8,0) ",%0 \n"
"jg 1b \n" "jg 1b \n"
...@@ -3779,13 +3754,11 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, ...@@ -3779,13 +3754,11 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"punpcklbw %%xmm5,%%xmm0 \n" "punpcklbw %%xmm5,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm1 \n"
"psubw %%xmm1,%%xmm0 \n" "psubw %%xmm1,%%xmm0 \n"
BUNDLEALIGN
"movq " MEMACCESS2(0x1,0) ",%%xmm1 \n" "movq " MEMACCESS2(0x1,0) ",%%xmm1 \n"
MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2 MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2
"punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm1 \n"
"punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm2 \n"
"psubw %%xmm2,%%xmm1 \n" "psubw %%xmm2,%%xmm1 \n"
BUNDLEALIGN
"movq " MEMACCESS2(0x2,0) ",%%xmm2 \n" "movq " MEMACCESS2(0x2,0) ",%%xmm2 \n"
MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3 MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3
"punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm2 \n"
...@@ -3799,7 +3772,6 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, ...@@ -3799,7 +3772,6 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"pmaxsw %%xmm1,%%xmm0 \n" "pmaxsw %%xmm1,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n"
"sub $0x8,%3 \n" "sub $0x8,%3 \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm0,0x00,0,2,1) // movq %%xmm0,(%0,%2,1) MEMOPMEM(movq,xmm0,0x00,0,2,1) // movq %%xmm0,(%0,%2,1)
"lea " MEMLEA(0x8,0) ",%0 \n" "lea " MEMLEA(0x8,0) ",%0 \n"
"jg 1b \n" "jg 1b \n"
...@@ -4078,7 +4050,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, ...@@ -4078,7 +4050,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
"movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
BUNDLEALIGN
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
...@@ -4088,7 +4059,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, ...@@ -4088,7 +4059,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
"psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n" "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
"psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n" "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
"psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n" "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
BUNDLEALIGN
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
...@@ -4112,7 +4082,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, ...@@ -4112,7 +4082,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
"movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
BUNDLEALIGN
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
...@@ -4122,7 +4091,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, ...@@ -4122,7 +4091,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
"psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n" "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
"psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n" "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
"psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n" "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
BUNDLEALIGN
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
...@@ -4159,7 +4127,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, ...@@ -4159,7 +4127,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
"lea " MEMLEA(0x10,0) ",%0 \n" "lea " MEMLEA(0x10,0) ",%0 \n"
"psubd " MEMACCESS(1) ",%%xmm0 \n" "psubd " MEMACCESS(1) ",%%xmm0 \n"
BUNDLEALIGN
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
"lea " MEMLEA(0x10,1) ",%1 \n" "lea " MEMLEA(0x10,1) ",%1 \n"
"cvtdq2ps %%xmm0,%%xmm0 \n" "cvtdq2ps %%xmm0,%%xmm0 \n"
...@@ -4227,7 +4194,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -4227,7 +4194,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"pshufd $0x39,%%xmm0,%%xmm0 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n"
"movd %%xmm0,%k5 \n" "movd %%xmm0,%k5 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
"punpckldq %%xmm6,%%xmm1 \n" "punpckldq %%xmm6,%%xmm1 \n"
...@@ -4236,7 +4202,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -4236,7 +4202,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"movd %%xmm0,%k1 \n" "movd %%xmm0,%k1 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n"
"movd %%xmm0,%k5 \n" "movd %%xmm0,%k5 \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
"punpckldq %%xmm6,%%xmm0 \n" "punpckldq %%xmm6,%%xmm0 \n"
...@@ -4258,7 +4223,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -4258,7 +4223,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"pmaddwd %%xmm5,%%xmm0 \n" "pmaddwd %%xmm5,%%xmm0 \n"
"addps %%xmm7,%%xmm2 \n" "addps %%xmm7,%%xmm2 \n"
"movd %%xmm0,%k1 \n" "movd %%xmm0,%k1 \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
"sub $0x1,%4 \n" "sub $0x1,%4 \n"
"movd %%xmm0," MEMACCESS(2) " \n" "movd %%xmm0," MEMACCESS(2) " \n"
...@@ -4322,7 +4286,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4322,7 +4286,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"psrlw $0x7,%%xmm1 \n" "psrlw $0x7,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n" "packuswb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n" "lea " MEMLEA(0x10,1) ",%1 \n"
"jg 1b \n" "jg 1b \n"
...@@ -4336,7 +4299,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4336,7 +4299,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n" "lea " MEMLEA(0x10,1) ",%1 \n"
"jg 25b \n" "jg 25b \n"
...@@ -4349,7 +4311,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4349,7 +4311,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
MEMOPREG(movdqu,0x00,1,4,1,xmm1) MEMOPREG(movdqu,0x00,1,4,1,xmm1)
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n" "lea " MEMLEA(0x10,1) ",%1 \n"
"jg 50b \n" "jg 50b \n"
...@@ -4363,7 +4324,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4363,7 +4324,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n" "lea " MEMLEA(0x10,1) ",%1 \n"
"jg 75b \n" "jg 75b \n"
...@@ -4442,7 +4402,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4442,7 +4402,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
"paddw %%xmm3,%%xmm1 \n" "paddw %%xmm3,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n" "packuswb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
"lea " MEMLEA(0x10,1) ",%1 \n" "lea " MEMLEA(0x10,1) ",%1 \n"
"jg 1b \n" "jg 1b \n"
...@@ -4456,7 +4415,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4456,7 +4415,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
"lea " MEMLEA(0x10,1) ",%1 \n" "lea " MEMLEA(0x10,1) ",%1 \n"
"jg 25b \n" "jg 25b \n"
...@@ -4469,7 +4427,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4469,7 +4427,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1 MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
"lea " MEMLEA(0x10,1) ",%1 \n" "lea " MEMLEA(0x10,1) ",%1 \n"
"jg 50b \n" "jg 50b \n"
...@@ -4483,7 +4440,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4483,7 +4440,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
"lea " MEMLEA(0x10,1) ",%1 \n" "lea " MEMLEA(0x10,1) ",%1 \n"
"jg 75b \n" "jg 75b \n"
...@@ -4664,7 +4620,6 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, ...@@ -4664,7 +4620,6 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"movzb " MEMACCESS2(0x1,4) ",%2 \n" "movzb " MEMACCESS2(0x1,4) ",%2 \n"
MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
"mov %b2," MEMACCESS2(0x1,1) " \n" "mov %b2," MEMACCESS2(0x1,1) " \n"
BUNDLEALIGN
"movzb " MEMACCESS2(0x2,4) ",%2 \n" "movzb " MEMACCESS2(0x2,4) ",%2 \n"
MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
"mov %b2," MEMACCESS2(0x2,1) " \n" "mov %b2," MEMACCESS2(0x2,1) " \n"
...@@ -5051,7 +5006,6 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, ...@@ -5051,7 +5006,6 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
"movzb " MEMACCESS2(0x4,2) ",%0 \n" "movzb " MEMACCESS2(0x4,2) ",%0 \n"
MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
"mov %b0," MEMACCESS2(0x4,3) " \n" "mov %b0," MEMACCESS2(0x4,3) " \n"
BUNDLEALIGN
"movzb " MEMACCESS2(0x5,2) ",%0 \n" "movzb " MEMACCESS2(0x5,2) ",%0 \n"
MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
"mov %b0," MEMACCESS2(0x5,3) " \n" "mov %b0," MEMACCESS2(0x5,3) " \n"
......
...@@ -168,7 +168,6 @@ void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -168,7 +168,6 @@ void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
BUNDLEALIGN
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n" "lea " MEMLEA(0x20,0) ",%0 \n"
"pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm2,%%xmm0 \n"
...@@ -245,12 +244,10 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -245,12 +244,10 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
BUNDLEALIGN
MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
"pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm2,%%xmm0 \n"
"pavgb %%xmm3,%%xmm1 \n" "pavgb %%xmm3,%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2 MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2
BUNDLEALIGN
MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3 MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3
MEMOPREG(movdqu,0x00,0,3,1,xmm4) // movdqu (%0,%3,1),%%xmm4 MEMOPREG(movdqu,0x00,0,3,1,xmm4) // movdqu (%0,%3,1),%%xmm4
MEMOPREG(movdqu,0x10,0,3,1,xmm5) // movdqu 0x10(%0,%3,1),%%xmm5 MEMOPREG(movdqu,0x10,0,3,1,xmm5) // movdqu 0x10(%0,%3,1),%%xmm5
...@@ -374,7 +371,6 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, ...@@ -374,7 +371,6 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
"packuswb %%xmm6,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n"
"movq %%xmm6," MEMACCESS2(0x8,1) " \n" "movq %%xmm6," MEMACCESS2(0x8,1) " \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
BUNDLEALIGN
MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3),%%xmm7 MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3),%%xmm7
"lea " MEMLEA(0x20,0) ",%0 \n" "lea " MEMLEA(0x20,0) ",%0 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
...@@ -708,7 +704,6 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -708,7 +704,6 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2 MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2
"movd %k2,%%xmm0 \n" "movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm1 \n" "psrlw $0x9,%%xmm1 \n"
BUNDLEALIGN
MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2 MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2
"movd %k2,%%xmm4 \n" "movd %k2,%%xmm4 \n"
"pshufb %%xmm5,%%xmm1 \n" "pshufb %%xmm5,%%xmm1 \n"
...@@ -848,7 +843,6 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ...@@ -848,7 +843,6 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n" "lea " MEMLEA(0x20,0) ",%0 \n"
...@@ -891,7 +885,6 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, ...@@ -891,7 +885,6 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
"movd " MEMACCESS(0) ",%%xmm0 \n" "movd " MEMACCESS(0) ",%%xmm0 \n"
MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
"punpckldq %%xmm1,%%xmm0 \n" "punpckldq %%xmm1,%%xmm0 \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2 MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2
MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3 MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3
"lea " MEMLEA4(0x00,0,1,4) ",%0 \n" "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
...@@ -935,11 +928,9 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ...@@ -935,11 +928,9 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
"movq " MEMACCESS(0) ",%%xmm0 \n" "movq " MEMACCESS(0) ",%%xmm0 \n"
MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0 MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0
MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1 MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1
BUNDLEALIGN
MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1 MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1
"lea " MEMLEA4(0x00,0,1,4) ",%0 \n" "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
"movq " MEMACCESS(5) ",%%xmm2 \n" "movq " MEMACCESS(5) ",%%xmm2 \n"
BUNDLEALIGN
MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2 MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2
MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3 MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3
MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3 MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3
...@@ -1014,7 +1005,6 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -1014,7 +1005,6 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
"49: \n" "49: \n"
"test $0x2,%4 \n" "test $0x2,%4 \n"
"je 29f \n" "je 29f \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1 MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1
"pextrw $0x5,%%xmm2,%k0 \n" "pextrw $0x5,%%xmm2,%k0 \n"
...@@ -1120,7 +1110,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -1120,7 +1110,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
"paddd %%xmm3,%%xmm2 \n" "paddd %%xmm3,%%xmm2 \n"
MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
"psrlw $0x9,%%xmm1 \n" "psrlw $0x9,%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0 MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0
"pshufb %%xmm5,%%xmm1 \n" "pshufb %%xmm5,%%xmm1 \n"
"pshufb %%xmm4,%%xmm0 \n" "pshufb %%xmm4,%%xmm0 \n"
...@@ -1140,7 +1129,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -1140,7 +1129,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
"add $0x1,%2 \n" "add $0x1,%2 \n"
"jl 99f \n" "jl 99f \n"
"psrlw $0x9,%%xmm2 \n" "psrlw $0x9,%%xmm2 \n"
BUNDLEALIGN
MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
"pshufb %%xmm5,%%xmm2 \n" "pshufb %%xmm5,%%xmm2 \n"
"pshufb %%xmm4,%%xmm0 \n" "pshufb %%xmm4,%%xmm0 \n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment