Commit f8a86cb0 authored by fbarchard@google.com's avatar fbarchard@google.com

Enable AlphaBlend SSSE3 code for NaCL

BUG=253
TESTED=validator
R=nfullagar@chromium.org, ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/1968004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@744 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent f2aa91a1
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 743
Version: 744
License: BSD
License File: LICENSE
......
......@@ -27,8 +27,7 @@ extern "C" {
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
#if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
defined(TARGET_IPHONE_SIMULATOR) || \
(defined(__native_client__) && defined(__x86_64__))
defined(TARGET_IPHONE_SIMULATOR)
#define LIBYUV_DISABLE_X86
#endif
// True if compiling for SSSE3 as a requirement.
......@@ -36,9 +35,17 @@ extern "C" {
#define LIBYUV_SSSE3_ONLY
#endif
// The following are available on all x86 platforms:
// The following are available on all x86 platforms, including NaCL:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_ARGBBLENDROW_SSSE3
#endif
// The following are available on all x86 platforms except NaCL x64:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!(defined(__native_client__) && defined(__x86_64__))
// Conversions.
#define HAS_ABGRTOUVROW_SSSE3
#define HAS_ABGRTOYROW_SSSE3
......@@ -108,7 +115,6 @@ extern "C" {
#define HAS_ARGBADDROW_SSE2
#define HAS_ARGBAFFINEROW_SSE2
#define HAS_ARGBATTENUATEROW_SSSE3
#define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBMIRRORROW_SSSE3
......@@ -180,10 +186,13 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!defined(LIBYUV_SSSE3_ONLY)
#define HAS_ARGBATTENUATEROW_SSE2
// Available with NaCL:
#define HAS_ARGBBLENDROW_SSE2
#if !(defined(__native_client__) && defined(__x86_64__))
#define HAS_ARGBATTENUATEROW_SSE2
#define HAS_MIRRORROW_SSE2
#endif
#endif
// The following are available on Neon platforms
#if !defined(LIBYUV_DISABLE_NEON) && \
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 743
#define LIBYUV_VERSION 744
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -20,6 +20,14 @@ extern "C" {
// This module is for GCC x86 and x64
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#if defined(__native_client__) && defined(__x86_64__)
#define MEMACCESS(x) "%%nacl:(%%r15,%q" #x ")"
#define MEMLEA(x, y) #x "(%q" #y ")"
#else
#define MEMACCESS(x) "(%" #x ")"
#define MEMLEA(x, y) #x "(%" #y ")"
#endif
#ifdef HAS_ARGBTOYROW_SSSE3
// Constants for ARGB
......@@ -3494,19 +3502,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"10: \n"
"test $0xf,%2 \n"
"je 19f \n"
"movd (%0),%%xmm3 \n"
"lea 0x4(%0),%0 \n"
"movd "MEMACCESS(0)",%%xmm3 \n"
"lea "MEMLEA(0x4,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n"
"movd (%1),%%xmm2 \n"
"movd "MEMACCESS(1)",%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n"
"pshufhw $0xf5,%%xmm3,%%xmm3 \n"
"pshuflw $0xf5,%%xmm3,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n"
"movd (%1),%%xmm1 \n"
"lea 0x4(%1),%1 \n"
"movd "MEMACCESS(1)",%%xmm1 \n"
"lea "MEMLEA(0x4,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n"
......@@ -3515,8 +3523,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n"
"sub $0x1,%3 \n"
"movd %%xmm0,(%2) \n"
"lea 0x4(%2),%2 \n"
"movd %%xmm0,"MEMACCESS(2)" \n"
"lea "MEMLEA(0x4,2)",%2 \n"
"jge 10b \n"
"19: \n"
......@@ -3526,19 +3534,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
// 4 pixel loop.
".p2align 2 \n"
"41: \n"
"movdqu (%0),%%xmm3 \n"
"lea 0x10(%0),%0 \n"
"movdqu "MEMACCESS(0)",%%xmm3 \n"
"lea "MEMLEA(0x10,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n"
"movdqu (%1),%%xmm2 \n"
"movdqu "MEMACCESS(1)",%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n"
"pshufhw $0xf5,%%xmm3,%%xmm3 \n"
"pshuflw $0xf5,%%xmm3,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n"
"movdqu (%1),%%xmm1 \n"
"lea 0x10(%1),%1 \n"
"movdqu "MEMACCESS(1)",%%xmm1 \n"
"lea "MEMLEA(0x10,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n"
......@@ -3547,8 +3555,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n"
"sub $0x4,%3 \n"
"movdqa %%xmm0,(%2) \n"
"lea 0x10(%2),%2 \n"
"movdqa %%xmm0,"MEMACCESS(2)" \n"
"lea "MEMLEA(0x10,2)",%2 \n"
"jge 41b \n"
"49: \n"
......@@ -3557,19 +3565,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
// 1 pixel loop.
"91: \n"
"movd (%0),%%xmm3 \n"
"lea 0x4(%0),%0 \n"
"movd "MEMACCESS(0)",%%xmm3 \n"
"lea "MEMLEA(0x4,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n"
"movd (%1),%%xmm2 \n"
"movd "MEMACCESS(1)",%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n"
"pshufhw $0xf5,%%xmm3,%%xmm3 \n"
"pshuflw $0xf5,%%xmm3,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n"
"movd (%1),%%xmm1 \n"
"lea 0x4(%1),%1 \n"
"movd "MEMACCESS(1)",%%xmm1 \n"
"lea "MEMLEA(0x4,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n"
......@@ -3578,8 +3586,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n"
"sub $0x1,%3 \n"
"movd %%xmm0,(%2) \n"
"lea 0x4(%2),%2 \n"
"movd %%xmm0,"MEMACCESS(2)" \n"
"lea "MEMLEA(0x4,2)",%2 \n"
"jge 91b \n"
"99: \n"
: "+r"(src_argb0), // %0
......@@ -3631,17 +3639,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"10: \n"
"test $0xf,%2 \n"
"je 19f \n"
"movd (%0),%%xmm3 \n"
"lea 0x4(%0),%0 \n"
"movd "MEMACCESS(0)",%%xmm3 \n"
"lea "MEMLEA(0x4,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n"
"movd (%1),%%xmm2 \n"
"movd "MEMACCESS(1)",%%xmm2 \n"
"pshufb %4,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n"
"movd (%1),%%xmm1 \n"
"lea 0x4(%1),%1 \n"
"movd "MEMACCESS(1)",%%xmm1 \n"
"lea "MEMLEA(0x4,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n"
......@@ -3650,8 +3658,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n"
"sub $0x1,%3 \n"
"movd %%xmm0,(%2) \n"
"lea 0x4(%2),%2 \n"
"movd %%xmm0,"MEMACCESS(2)" \n"
"lea "MEMLEA(0x4,2)",%2 \n"
"jge 10b \n"
"19: \n"
......@@ -3665,17 +3673,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
// 4 pixel loop.
".p2align 2 \n"
"40: \n"
"movdqa (%0),%%xmm3 \n"
"lea 0x10(%0),%0 \n"
"movdqa "MEMACCESS(0)",%%xmm3 \n"
"lea "MEMLEA(0x10,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n"
"movdqa (%1),%%xmm2 \n"
"movdqa "MEMACCESS(1)",%%xmm2 \n"
"pshufb %4,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n"
"movdqa (%1),%%xmm1 \n"
"lea 0x10(%1),%1 \n"
"movdqa "MEMACCESS(1)",%%xmm1 \n"
"lea "MEMLEA(0x10,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n"
......@@ -3684,25 +3692,25 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n"
"sub $0x4,%3 \n"
"movdqa %%xmm0,(%2) \n"
"lea 0x10(%2),%2 \n"
"movdqa %%xmm0,"MEMACCESS(2)" \n"
"lea "MEMLEA(0x10,2)",%2 \n"
"jge 40b \n"
"jmp 49f \n"
// 4 pixel unaligned loop.
".p2align 2 \n"
"41: \n"
"movdqu (%0),%%xmm3 \n"
"lea 0x10(%0),%0 \n"
"movdqu "MEMACCESS(0)",%%xmm3 \n"
"lea "MEMLEA(0x10,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n"
"movdqu (%1),%%xmm2 \n"
"movdqu "MEMACCESS(1)",%%xmm2 \n"
"pshufb %4,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n"
"movdqu (%1),%%xmm1 \n"
"lea 0x10(%1),%1 \n"
"movdqu "MEMACCESS(1)",%%xmm1 \n"
"lea "MEMLEA(0x10,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n"
......@@ -3711,8 +3719,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n"
"sub $0x4,%3 \n"
"movdqa %%xmm0,(%2) \n"
"lea 0x10(%2),%2 \n"
"movdqa %%xmm0,"MEMACCESS(2)" \n"
"lea "MEMLEA(0x10,2)",%2 \n"
"jge 41b \n"
"49: \n"
......@@ -3721,17 +3729,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
// 1 pixel loop.
"91: \n"
"movd (%0),%%xmm3 \n"
"lea 0x4(%0),%0 \n"
"movd "MEMACCESS(0)",%%xmm3 \n"
"lea "MEMLEA(0x4,0)",%0 \n"
"movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n"
"movd (%1),%%xmm2 \n"
"movd "MEMACCESS(1)",%%xmm2 \n"
"pshufb %4,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n"
"movd (%1),%%xmm1 \n"
"lea 0x4(%1),%1 \n"
"movd "MEMACCESS(1)",%%xmm1 \n"
"lea "MEMLEA(0x4,1)",%1 \n"
"psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n"
......@@ -3740,8 +3748,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n"
"sub $0x1,%3 \n"
"movd %%xmm0,(%2) \n"
"lea 0x4(%2),%2 \n"
"movd %%xmm0,"MEMACCESS(2)" \n"
"lea "MEMLEA(0x4,2)",%2 \n"
"jge 91b \n"
"99: \n"
: "+r"(src_argb0), // %0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment