Commit f8a86cb0 authored by fbarchard@google.com's avatar fbarchard@google.com

Enable AlphaBlend SSSE3 code for NaCL

BUG=253
TESTED=validator
R=nfullagar@chromium.org, ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/1968004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@744 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent f2aa91a1
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 743 Version: 744
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -27,8 +27,7 @@ extern "C" { ...@@ -27,8 +27,7 @@ extern "C" {
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
#if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ #if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
defined(TARGET_IPHONE_SIMULATOR) || \ defined(TARGET_IPHONE_SIMULATOR)
(defined(__native_client__) && defined(__x86_64__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
// True if compiling for SSSE3 as a requirement. // True if compiling for SSSE3 as a requirement.
...@@ -36,9 +35,17 @@ extern "C" { ...@@ -36,9 +35,17 @@ extern "C" {
#define LIBYUV_SSSE3_ONLY #define LIBYUV_SSSE3_ONLY
#endif #endif
// The following are available on all x86 platforms: // The following are available on all x86 platforms, including NaCL:
#if !defined(LIBYUV_DISABLE_X86) && \ #if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_ARGBBLENDROW_SSSE3
#endif
// The following are available on all x86 platforms except NaCL x64:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!(defined(__native_client__) && defined(__x86_64__))
// Conversions. // Conversions.
#define HAS_ABGRTOUVROW_SSSE3 #define HAS_ABGRTOUVROW_SSSE3
#define HAS_ABGRTOYROW_SSSE3 #define HAS_ABGRTOYROW_SSSE3
...@@ -108,7 +115,6 @@ extern "C" { ...@@ -108,7 +115,6 @@ extern "C" {
#define HAS_ARGBADDROW_SSE2 #define HAS_ARGBADDROW_SSE2
#define HAS_ARGBAFFINEROW_SSE2 #define HAS_ARGBAFFINEROW_SSE2
#define HAS_ARGBATTENUATEROW_SSSE3 #define HAS_ARGBATTENUATEROW_SSSE3
#define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3 #define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_ARGBGRAYROW_SSSE3 #define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBMIRRORROW_SSSE3 #define HAS_ARGBMIRRORROW_SSSE3
...@@ -180,10 +186,13 @@ extern "C" { ...@@ -180,10 +186,13 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \ #if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!defined(LIBYUV_SSSE3_ONLY) !defined(LIBYUV_SSSE3_ONLY)
#define HAS_ARGBATTENUATEROW_SSE2 // Available with NaCL:
#define HAS_ARGBBLENDROW_SSE2 #define HAS_ARGBBLENDROW_SSE2
#if !(defined(__native_client__) && defined(__x86_64__))
#define HAS_ARGBATTENUATEROW_SSE2
#define HAS_MIRRORROW_SSE2 #define HAS_MIRRORROW_SSE2
#endif #endif
#endif
// The following are available on Neon platforms // The following are available on Neon platforms
#if !defined(LIBYUV_DISABLE_NEON) && \ #if !defined(LIBYUV_DISABLE_NEON) && \
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 743 #define LIBYUV_VERSION 744
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -20,6 +20,14 @@ extern "C" { ...@@ -20,6 +20,14 @@ extern "C" {
// This module is for GCC x86 and x64 // This module is for GCC x86 and x64
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) #if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#if defined(__native_client__) && defined(__x86_64__)
#define MEMACCESS(x) "%%nacl:(%%r15,%q" #x ")"
#define MEMLEA(x, y) #x "(%q" #y ")"
#else
#define MEMACCESS(x) "(%" #x ")"
#define MEMLEA(x, y) #x "(%" #y ")"
#endif
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
// Constants for ARGB // Constants for ARGB
...@@ -3494,19 +3502,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3494,19 +3502,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"10: \n" "10: \n"
"test $0xf,%2 \n" "test $0xf,%2 \n"
"je 19f \n" "je 19f \n"
"movd (%0),%%xmm3 \n" "movd "MEMACCESS(0)",%%xmm3 \n"
"lea 0x4(%0),%0 \n" "lea "MEMLEA(0x4,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n" "movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n" "pxor %%xmm4,%%xmm3 \n"
"movd (%1),%%xmm2 \n" "movd "MEMACCESS(1)",%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n" "psrlw $0x8,%%xmm3 \n"
"pshufhw $0xf5,%%xmm3,%%xmm3 \n" "pshufhw $0xf5,%%xmm3,%%xmm3 \n"
"pshuflw $0xf5,%%xmm3,%%xmm3 \n" "pshuflw $0xf5,%%xmm3,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n" "pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n" "paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n" "pmullw %%xmm3,%%xmm2 \n"
"movd (%1),%%xmm1 \n" "movd "MEMACCESS(1)",%%xmm1 \n"
"lea 0x4(%1),%1 \n" "lea "MEMLEA(0x4,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n" "por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n" "pmullw %%xmm3,%%xmm1 \n"
...@@ -3515,8 +3523,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3515,8 +3523,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n" "pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n" "paddusb %%xmm1,%%xmm0 \n"
"sub $0x1,%3 \n" "sub $0x1,%3 \n"
"movd %%xmm0,(%2) \n" "movd %%xmm0,"MEMACCESS(2)" \n"
"lea 0x4(%2),%2 \n" "lea "MEMLEA(0x4,2)",%2 \n"
"jge 10b \n" "jge 10b \n"
"19: \n" "19: \n"
...@@ -3526,19 +3534,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3526,19 +3534,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
// 4 pixel loop. // 4 pixel loop.
".p2align 2 \n" ".p2align 2 \n"
"41: \n" "41: \n"
"movdqu (%0),%%xmm3 \n" "movdqu "MEMACCESS(0)",%%xmm3 \n"
"lea 0x10(%0),%0 \n" "lea "MEMLEA(0x10,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n" "movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n" "pxor %%xmm4,%%xmm3 \n"
"movdqu (%1),%%xmm2 \n" "movdqu "MEMACCESS(1)",%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n" "psrlw $0x8,%%xmm3 \n"
"pshufhw $0xf5,%%xmm3,%%xmm3 \n" "pshufhw $0xf5,%%xmm3,%%xmm3 \n"
"pshuflw $0xf5,%%xmm3,%%xmm3 \n" "pshuflw $0xf5,%%xmm3,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n" "pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n" "paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n" "pmullw %%xmm3,%%xmm2 \n"
"movdqu (%1),%%xmm1 \n" "movdqu "MEMACCESS(1)",%%xmm1 \n"
"lea 0x10(%1),%1 \n" "lea "MEMLEA(0x10,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n" "por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n" "pmullw %%xmm3,%%xmm1 \n"
...@@ -3547,8 +3555,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3547,8 +3555,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n" "pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n" "paddusb %%xmm1,%%xmm0 \n"
"sub $0x4,%3 \n" "sub $0x4,%3 \n"
"movdqa %%xmm0,(%2) \n" "movdqa %%xmm0,"MEMACCESS(2)" \n"
"lea 0x10(%2),%2 \n" "lea "MEMLEA(0x10,2)",%2 \n"
"jge 41b \n" "jge 41b \n"
"49: \n" "49: \n"
...@@ -3557,19 +3565,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3557,19 +3565,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
// 1 pixel loop. // 1 pixel loop.
"91: \n" "91: \n"
"movd (%0),%%xmm3 \n" "movd "MEMACCESS(0)",%%xmm3 \n"
"lea 0x4(%0),%0 \n" "lea "MEMLEA(0x4,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n" "movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n" "pxor %%xmm4,%%xmm3 \n"
"movd (%1),%%xmm2 \n" "movd "MEMACCESS(1)",%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n" "psrlw $0x8,%%xmm3 \n"
"pshufhw $0xf5,%%xmm3,%%xmm3 \n" "pshufhw $0xf5,%%xmm3,%%xmm3 \n"
"pshuflw $0xf5,%%xmm3,%%xmm3 \n" "pshuflw $0xf5,%%xmm3,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n" "pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n" "paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n" "pmullw %%xmm3,%%xmm2 \n"
"movd (%1),%%xmm1 \n" "movd "MEMACCESS(1)",%%xmm1 \n"
"lea 0x4(%1),%1 \n" "lea "MEMLEA(0x4,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n" "por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n" "pmullw %%xmm3,%%xmm1 \n"
...@@ -3578,8 +3586,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3578,8 +3586,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n" "pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n" "paddusb %%xmm1,%%xmm0 \n"
"sub $0x1,%3 \n" "sub $0x1,%3 \n"
"movd %%xmm0,(%2) \n" "movd %%xmm0,"MEMACCESS(2)" \n"
"lea 0x4(%2),%2 \n" "lea "MEMLEA(0x4,2)",%2 \n"
"jge 91b \n" "jge 91b \n"
"99: \n" "99: \n"
: "+r"(src_argb0), // %0 : "+r"(src_argb0), // %0
...@@ -3631,17 +3639,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3631,17 +3639,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"10: \n" "10: \n"
"test $0xf,%2 \n" "test $0xf,%2 \n"
"je 19f \n" "je 19f \n"
"movd (%0),%%xmm3 \n" "movd "MEMACCESS(0)",%%xmm3 \n"
"lea 0x4(%0),%0 \n" "lea "MEMLEA(0x4,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n" "movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n" "pxor %%xmm4,%%xmm3 \n"
"movd (%1),%%xmm2 \n" "movd "MEMACCESS(1)",%%xmm2 \n"
"pshufb %4,%%xmm3 \n" "pshufb %4,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n" "pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n" "paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n" "pmullw %%xmm3,%%xmm2 \n"
"movd (%1),%%xmm1 \n" "movd "MEMACCESS(1)",%%xmm1 \n"
"lea 0x4(%1),%1 \n" "lea "MEMLEA(0x4,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n" "por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n" "pmullw %%xmm3,%%xmm1 \n"
...@@ -3650,8 +3658,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3650,8 +3658,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n" "pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n" "paddusb %%xmm1,%%xmm0 \n"
"sub $0x1,%3 \n" "sub $0x1,%3 \n"
"movd %%xmm0,(%2) \n" "movd %%xmm0,"MEMACCESS(2)" \n"
"lea 0x4(%2),%2 \n" "lea "MEMLEA(0x4,2)",%2 \n"
"jge 10b \n" "jge 10b \n"
"19: \n" "19: \n"
...@@ -3665,17 +3673,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3665,17 +3673,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
// 4 pixel loop. // 4 pixel loop.
".p2align 2 \n" ".p2align 2 \n"
"40: \n" "40: \n"
"movdqa (%0),%%xmm3 \n" "movdqa "MEMACCESS(0)",%%xmm3 \n"
"lea 0x10(%0),%0 \n" "lea "MEMLEA(0x10,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n" "movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n" "pxor %%xmm4,%%xmm3 \n"
"movdqa (%1),%%xmm2 \n" "movdqa "MEMACCESS(1)",%%xmm2 \n"
"pshufb %4,%%xmm3 \n" "pshufb %4,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n" "pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n" "paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n" "pmullw %%xmm3,%%xmm2 \n"
"movdqa (%1),%%xmm1 \n" "movdqa "MEMACCESS(1)",%%xmm1 \n"
"lea 0x10(%1),%1 \n" "lea "MEMLEA(0x10,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n" "por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n" "pmullw %%xmm3,%%xmm1 \n"
...@@ -3684,25 +3692,25 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3684,25 +3692,25 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n" "pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n" "paddusb %%xmm1,%%xmm0 \n"
"sub $0x4,%3 \n" "sub $0x4,%3 \n"
"movdqa %%xmm0,(%2) \n" "movdqa %%xmm0,"MEMACCESS(2)" \n"
"lea 0x10(%2),%2 \n" "lea "MEMLEA(0x10,2)",%2 \n"
"jge 40b \n" "jge 40b \n"
"jmp 49f \n" "jmp 49f \n"
// 4 pixel unaligned loop. // 4 pixel unaligned loop.
".p2align 2 \n" ".p2align 2 \n"
"41: \n" "41: \n"
"movdqu (%0),%%xmm3 \n" "movdqu "MEMACCESS(0)",%%xmm3 \n"
"lea 0x10(%0),%0 \n" "lea "MEMLEA(0x10,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n" "movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n" "pxor %%xmm4,%%xmm3 \n"
"movdqu (%1),%%xmm2 \n" "movdqu "MEMACCESS(1)",%%xmm2 \n"
"pshufb %4,%%xmm3 \n" "pshufb %4,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n" "pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n" "paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n" "pmullw %%xmm3,%%xmm2 \n"
"movdqu (%1),%%xmm1 \n" "movdqu "MEMACCESS(1)",%%xmm1 \n"
"lea 0x10(%1),%1 \n" "lea "MEMLEA(0x10,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n" "por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n" "pmullw %%xmm3,%%xmm1 \n"
...@@ -3711,8 +3719,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3711,8 +3719,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n" "pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n" "paddusb %%xmm1,%%xmm0 \n"
"sub $0x4,%3 \n" "sub $0x4,%3 \n"
"movdqa %%xmm0,(%2) \n" "movdqa %%xmm0,"MEMACCESS(2)" \n"
"lea 0x10(%2),%2 \n" "lea "MEMLEA(0x10,2)",%2 \n"
"jge 41b \n" "jge 41b \n"
"49: \n" "49: \n"
...@@ -3721,17 +3729,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3721,17 +3729,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
// 1 pixel loop. // 1 pixel loop.
"91: \n" "91: \n"
"movd (%0),%%xmm3 \n" "movd "MEMACCESS(0)",%%xmm3 \n"
"lea 0x4(%0),%0 \n" "lea "MEMLEA(0x4,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n" "movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n" "pxor %%xmm4,%%xmm3 \n"
"movd (%1),%%xmm2 \n" "movd "MEMACCESS(1)",%%xmm2 \n"
"pshufb %4,%%xmm3 \n" "pshufb %4,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n" "pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n" "paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n" "pmullw %%xmm3,%%xmm2 \n"
"movd (%1),%%xmm1 \n" "movd "MEMACCESS(1)",%%xmm1 \n"
"lea 0x4(%1),%1 \n" "lea "MEMLEA(0x4,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n" "por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n" "pmullw %%xmm3,%%xmm1 \n"
...@@ -3740,8 +3748,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, ...@@ -3740,8 +3748,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n" "pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n" "paddusb %%xmm1,%%xmm0 \n"
"sub $0x1,%3 \n" "sub $0x1,%3 \n"
"movd %%xmm0,(%2) \n" "movd %%xmm0,"MEMACCESS(2)" \n"
"lea 0x4(%2),%2 \n" "lea "MEMLEA(0x4,2)",%2 \n"
"jge 91b \n" "jge 91b \n"
"99: \n" "99: \n"
: "+r"(src_argb0), // %0 : "+r"(src_argb0), // %0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment