Commit 260e3b22 authored by fbarchard@google.com's avatar fbarchard@google.com

now that libyuv requires newer nacl compiler, bundles can be assumed and bundle…

now that libyuv requires newer nacl compiler, bundles can be assumed and bundle align macro can be removed.  no impact on code gen.
BUG=none
TESTED=validator still passes
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/30019004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1150 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent ee4bc0d8
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1147
Version: 1148
License: BSD
License File: LICENSE
......
......@@ -507,7 +507,6 @@ typedef uint8 ulvec8[32];
#if defined(__native_client__) && defined(__x86_64__)
#define BUNDLELOCK ".bundle_lock\n"
#define BUNDLEUNLOCK ".bundle_unlock\n"
#define BUNDLEALIGN "\n"
#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
#define MEMLEA(offset, base) #offset "(%q" #base ")"
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1147
#define LIBYUV_VERSION 1148
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -386,7 +386,6 @@ void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2)
MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2)
"lea " MEMLEA(0x10,0) ",%0 \n"
......@@ -443,7 +442,6 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2)
MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2)
"lea " MEMLEA(0x10,0) ",%0 \n"
......@@ -487,7 +485,6 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,2) // movdqu %%xmm0,(%1,%0,2)
MEMOPMEM(movdqu,xmm1,0x10,1,0,2) // movdqu %%xmm1,0x10(%1,%0,2)
"lea " MEMLEA(0x10,0) ",%0 \n"
......@@ -842,7 +839,6 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
"paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n"
......@@ -916,7 +912,6 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
"packsswb %%xmm1,%%xmm0 \n"
"sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n"
......@@ -983,7 +978,6 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"packsswb %%xmm2,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
"lea " MEMLEA(0x40,0) ",%0 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,2,1) // movdqu %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
"jg 1b \n"
......@@ -1046,7 +1040,6 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
"paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n"
......@@ -1154,7 +1147,6 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
"paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n"
......@@ -1298,7 +1290,6 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
"paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n"
......@@ -1368,7 +1359,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
"paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n"
......@@ -1433,7 +1423,6 @@ struct {
// Read 8 UV from 411
#define READYUV444 \
"movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
BUNDLEALIGN \
MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
"lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
"punpcklbw %%xmm1,%%xmm0 \n"
......@@ -1441,7 +1430,6 @@ struct {
// Read 4 UV from 422, upsample to 8 UV
#define READYUV422 \
"movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
BUNDLEALIGN \
MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
"lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
"punpcklbw %%xmm1,%%xmm0 \n" \
......@@ -1450,7 +1438,6 @@ struct {
// Read 2 UV from 411, upsample to 8 UV
#define READYUV411 \
"movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
BUNDLEALIGN \
MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
"lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \
"punpcklbw %%xmm1,%%xmm0 \n" \
......@@ -1982,7 +1969,6 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
*/
"vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n"
// ymm0 = xxxxxxxxxxxxxxxxUUUUUUUUUUUUUUUU, uint8
BUNDLEALIGN
MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1)
// ymm1 = xxxxxxxxxxxxxxxxVVVVVVVVVVVVVVVV, uint8
"lea " MEMLEA(0x10, [u_buf]) ", %[u_buf] \n" // u_buf += 16
......@@ -2274,7 +2260,6 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
"pshufb %%xmm1,%%xmm0 \n"
"sub $8,%3 \n"
"movlpd %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movhpd,xmm0,0x00,1,2,1) // movhpd %%xmm0,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n"
"jg 1b \n"
......@@ -2688,7 +2673,6 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n"
......@@ -2703,7 +2687,6 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n"
......@@ -2743,7 +2726,6 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n"
......@@ -2798,7 +2780,6 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n"
......@@ -2813,7 +2794,6 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n"
......@@ -2853,7 +2833,6 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n"
......@@ -3283,7 +3262,6 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
"movzb " MEMACCESS2(0x0b,0) ",%3 \n"
"punpckhbw %%xmm1,%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
"movzb " MEMACCESS2(0x0f,0) ",%3 \n"
MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3
......@@ -3718,13 +3696,11 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"punpcklbw %%xmm5,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm1 \n"
"psubw %%xmm1,%%xmm0 \n"
BUNDLEALIGN
MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1
MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2
"punpcklbw %%xmm5,%%xmm1 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"psubw %%xmm2,%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2
MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3
"punpcklbw %%xmm5,%%xmm2 \n"
......@@ -3738,7 +3714,6 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"pmaxsw %%xmm1,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"sub $0x8,%4 \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm0,0x00,0,3,1) // movq %%xmm0,(%0,%3,1)
"lea " MEMLEA(0x8,0) ",%0 \n"
"jg 1b \n"
......@@ -3779,13 +3754,11 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"punpcklbw %%xmm5,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm1 \n"
"psubw %%xmm1,%%xmm0 \n"
BUNDLEALIGN
"movq " MEMACCESS2(0x1,0) ",%%xmm1 \n"
MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2
"punpcklbw %%xmm5,%%xmm1 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"psubw %%xmm2,%%xmm1 \n"
BUNDLEALIGN
"movq " MEMACCESS2(0x2,0) ",%%xmm2 \n"
MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3
"punpcklbw %%xmm5,%%xmm2 \n"
......@@ -3799,7 +3772,6 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"pmaxsw %%xmm1,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"sub $0x8,%3 \n"
BUNDLEALIGN
MEMOPMEM(movq,xmm0,0x00,0,2,1) // movq %%xmm0,(%0,%2,1)
"lea " MEMLEA(0x8,0) ",%0 \n"
"jg 1b \n"
......@@ -4078,7 +4050,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
"movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
BUNDLEALIGN
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
......@@ -4088,7 +4059,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
"psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
"psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
"psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
BUNDLEALIGN
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
......@@ -4112,7 +4082,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
"movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
BUNDLEALIGN
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
......@@ -4122,7 +4091,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
"psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
"psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
"psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
BUNDLEALIGN
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
......@@ -4159,7 +4127,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
"lea " MEMLEA(0x10,0) ",%0 \n"
"psubd " MEMACCESS(1) ",%%xmm0 \n"
BUNDLEALIGN
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
"lea " MEMLEA(0x10,1) ",%1 \n"
"cvtdq2ps %%xmm0,%%xmm0 \n"
......@@ -4227,7 +4194,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"pshufd $0x39,%%xmm0,%%xmm0 \n"
"movd %%xmm0,%k5 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
"punpckldq %%xmm6,%%xmm1 \n"
......@@ -4236,7 +4202,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"movd %%xmm0,%k1 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n"
"movd %%xmm0,%k5 \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
"punpckldq %%xmm6,%%xmm0 \n"
......@@ -4258,7 +4223,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"pmaddwd %%xmm5,%%xmm0 \n"
"addps %%xmm7,%%xmm2 \n"
"movd %%xmm0,%k1 \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
"sub $0x1,%4 \n"
"movd %%xmm0," MEMACCESS(2) " \n"
......@@ -4322,7 +4286,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"psrlw $0x7,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
"jg 1b \n"
......@@ -4336,7 +4299,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"pavgb %%xmm1,%%xmm0 \n"
"pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
"jg 25b \n"
......@@ -4349,7 +4311,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
MEMOPREG(movdqu,0x00,1,4,1,xmm1)
"pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
"jg 50b \n"
......@@ -4363,7 +4324,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"pavgb %%xmm1,%%xmm0 \n"
"pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
"jg 75b \n"
......@@ -4442,7 +4402,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
"paddw %%xmm3,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
"jg 1b \n"
......@@ -4456,7 +4415,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
"pavgb %%xmm1,%%xmm0 \n"
"pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
"jg 25b \n"
......@@ -4469,7 +4427,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
"pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
"jg 50b \n"
......@@ -4483,7 +4440,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
"pavgb %%xmm1,%%xmm0 \n"
"pavgb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n"
BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
"jg 75b \n"
......@@ -4664,7 +4620,6 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"movzb " MEMACCESS2(0x1,4) ",%2 \n"
MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
"mov %b2," MEMACCESS2(0x1,1) " \n"
BUNDLEALIGN
"movzb " MEMACCESS2(0x2,4) ",%2 \n"
MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
"mov %b2," MEMACCESS2(0x2,1) " \n"
......@@ -5051,7 +5006,6 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
"movzb " MEMACCESS2(0x4,2) ",%0 \n"
MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
"mov %b0," MEMACCESS2(0x4,3) " \n"
BUNDLEALIGN
"movzb " MEMACCESS2(0x5,2) ",%0 \n"
MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
"mov %b0," MEMACCESS2(0x5,3) " \n"
......
......@@ -168,7 +168,6 @@ void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
BUNDLEALIGN
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n"
"pavgb %%xmm2,%%xmm0 \n"
......@@ -245,12 +244,10 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
BUNDLEALIGN
MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
"pavgb %%xmm2,%%xmm0 \n"
"pavgb %%xmm3,%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2
BUNDLEALIGN
MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3
MEMOPREG(movdqu,0x00,0,3,1,xmm4) // movdqu (%0,%3,1),%%xmm4
MEMOPREG(movdqu,0x10,0,3,1,xmm5) // movdqu 0x10(%0,%3,1),%%xmm5
......@@ -374,7 +371,6 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
"packuswb %%xmm6,%%xmm6 \n"
"movq %%xmm6," MEMACCESS2(0x8,1) " \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
BUNDLEALIGN
MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3),%%xmm7
"lea " MEMLEA(0x20,0) ",%0 \n"
"pavgb %%xmm7,%%xmm6 \n"
......@@ -708,7 +704,6 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2
"movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm1 \n"
BUNDLEALIGN
MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2
"movd %k2,%%xmm4 \n"
"pshufb %%xmm5,%%xmm1 \n"
......@@ -848,7 +843,6 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n"
......@@ -891,7 +885,6 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
"movd " MEMACCESS(0) ",%%xmm0 \n"
MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
"punpckldq %%xmm1,%%xmm0 \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2
MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3
"lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
......@@ -935,11 +928,9 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
"movq " MEMACCESS(0) ",%%xmm0 \n"
MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0
MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1
BUNDLEALIGN
MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1
"lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
"movq " MEMACCESS(5) ",%%xmm2 \n"
BUNDLEALIGN
MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2
MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3
MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3
......@@ -1014,7 +1005,6 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
"49: \n"
"test $0x2,%4 \n"
"je 29f \n"
BUNDLEALIGN
MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1
"pextrw $0x5,%%xmm2,%k0 \n"
......@@ -1120,7 +1110,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
"paddd %%xmm3,%%xmm2 \n"
MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
"psrlw $0x9,%%xmm1 \n"
BUNDLEALIGN
MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0
"pshufb %%xmm5,%%xmm1 \n"
"pshufb %%xmm4,%%xmm0 \n"
......@@ -1140,7 +1129,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
"add $0x1,%2 \n"
"jl 99f \n"
"psrlw $0x9,%%xmm2 \n"
BUNDLEALIGN
MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
"pshufb %%xmm5,%%xmm2 \n"
"pshufb %%xmm4,%%xmm0 \n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment