Commit e37aed6f authored by fbarchard@google.com's avatar fbarchard@google.com

Nacl versions of color tables

BUG=none
TEST=none
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/3769004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@850 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent f7eb04bc
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 849 Version: 850
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -44,7 +44,11 @@ extern "C" { ...@@ -44,7 +44,11 @@ extern "C" {
#define HAS_ARGBATTENUATEROW_SSSE3 #define HAS_ARGBATTENUATEROW_SSSE3
#define HAS_ARGBBLENDROW_SSSE3 #define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3 #define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_ARGBCOPYALPHAROW_SSE2
#define HAS_ARGBCOPYYTOALPHAROW_SSE2
#define HAS_ARGBGRAYROW_SSSE3 #define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
#define HAS_ARGBMIRRORROW_SSSE3 #define HAS_ARGBMIRRORROW_SSSE3
#define HAS_ARGBMULTIPLYROW_SSE2 #define HAS_ARGBMULTIPLYROW_SSE2
#define HAS_ARGBPOLYNOMIALROW_SSE2 #define HAS_ARGBPOLYNOMIALROW_SSE2
...@@ -58,21 +62,20 @@ extern "C" { ...@@ -58,21 +62,20 @@ extern "C" {
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
#define HAS_INTERPOLATEROW_SSE2 #define HAS_INTERPOLATEROW_SSE2
#define HAS_INTERPOLATEROW_SSSE3 #define HAS_INTERPOLATEROW_SSSE3
#define HAS_RGBCOLORTABLEROW_X86
#define HAS_SOBELROW_SSE2 #define HAS_SOBELROW_SSE2
#define HAS_SOBELTOPLANEROW_SSE2 #define HAS_SOBELTOPLANEROW_SSE2
#define HAS_SOBELXROW_SSE2 #define HAS_SOBELXROW_SSE2
#define HAS_SOBELXYROW_SSE2 #define HAS_SOBELXYROW_SSE2
#define HAS_SOBELYROW_SSE2 #define HAS_SOBELYROW_SSE2
#define HAS_ARGBCOPYALPHAROW_SSE2
#define HAS_ARGBCOPYYTOALPHAROW_SSE2
// Conversions: // Conversions:
#define HAS_ABGRTOUVROW_SSSE3 #define HAS_ABGRTOUVROW_SSSE3
#define HAS_ABGRTOYROW_SSSE3 #define HAS_ABGRTOYROW_SSSE3
#define HAS_ARGBSHUFFLEROW_SSE2 #define HAS_ARGBSHUFFLEROW_SSE2
#define HAS_ARGBSHUFFLEROW_SSSE3 #define HAS_ARGBSHUFFLEROW_SSSE3
#define HAS_ARGBTOBAYERROW_SSSE3
#define HAS_ARGBTOBAYERGGROW_SSE2 #define HAS_ARGBTOBAYERGGROW_SSE2
#define HAS_ARGBTOBAYERROW_SSSE3
#define HAS_ARGBTOUV422ROW_SSSE3 #define HAS_ARGBTOUV422ROW_SSSE3
#define HAS_ARGBTOUV444ROW_SSSE3 #define HAS_ARGBTOUV444ROW_SSSE3
#define HAS_ARGBTOUVJROW_SSSE3 #define HAS_ARGBTOUVJROW_SSSE3
...@@ -140,11 +143,6 @@ extern "C" { ...@@ -140,11 +143,6 @@ extern "C" {
#define HAS_YTOARGBROW_SSE2 #define HAS_YTOARGBROW_SSE2
#define HAS_YUY2TOARGBROW_SSSE3 #define HAS_YUY2TOARGBROW_SSSE3
#define HAS_YUY2TOYROW_SSE2 #define HAS_YUY2TOYROW_SSE2
// Effects:
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_RGBCOLORTABLEROW_X86
#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
#endif #endif
// The following are available on all x86 platforms, including NaCL, but // The following are available on all x86 platforms, including NaCL, but
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 849 #define LIBYUV_VERSION 850
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -6043,8 +6043,8 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, ...@@ -6043,8 +6043,8 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"cmp $0x2010003,%k2 \n" "cmp $0x2010003,%k2 \n"
"je 2103f \n" "je 2103f \n"
"1: \n"
BUNDLEALIGN BUNDLEALIGN
"1: \n"
"movzb " MEMACCESS(4) ",%2 \n" "movzb " MEMACCESS(4) ",%2 \n"
MEMOP(movzb,0x00,0,2,1) ",%2 \n" // movzb (%0,%2,1),%2 MEMOP(movzb,0x00,0,2,1) ",%2 \n" // movzb (%0,%2,1),%2
"mov %b2," MEMACCESS(1) " \n" "mov %b2," MEMACCESS(1) " \n"
...@@ -6352,20 +6352,21 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, ...@@ -6352,20 +6352,21 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
asm volatile ( asm volatile (
// 1 pixel loop. // 1 pixel loop.
".p2align 4 \n" ".p2align 4 \n"
BUNDLEALIGN
"1: \n" "1: \n"
"movzb (%0),%1 \n" "movzb " MEMACCESS(0) ",%1 \n"
"lea 0x4(%0),%0 \n" "lea " MEMLEA(0x4,0) ",%0 \n"
"movzb (%3,%1,4),%1 \n" MEMOP(movzb,0x00,3,1,4) ",%1 \n" // movzb (%3,%1,4),%1
"mov %b1,-0x4(%0) \n" "mov %b1," MEMACCESS2(-0x4,0) " \n"
"movzb -0x3(%0),%1 \n" "movzb " MEMACCESS2(-0x3,0) ",%1 \n"
"movzb 0x1(%3,%1,4),%1 \n" MEMOP(movzb,0x01,3,1,4) ",%1 \n" // movzb 0x1(%3,%1,4),%1
"mov %b1,-0x3(%0) \n" "mov %b1," MEMACCESS2(-0x3,0) " \n"
"movzb -0x2(%0),%1 \n" "movzb " MEMACCESS2(-0x2,0) ",%1 \n"
"movzb 0x2(%3,%1,4),%1 \n" MEMOP(movzb,0x02,3,1,4) ",%1 \n" // movzb 0x2(%3,%1,4),%1
"mov %b1,-0x2(%0) \n" "mov %b1," MEMACCESS2(-0x2,0) " \n"
"movzb -0x1(%0),%1 \n" "movzb " MEMACCESS2(-0x1,0) ",%1 \n"
"movzb 0x3(%3,%1,4),%1 \n" MEMOP(movzb,0x03,3,1,4) ",%1 \n" // movzb 0x3(%3,%1,4),%1
"mov %b1,-0x1(%0) \n" "mov %b1," MEMACCESS2(-0x1,0) " \n"
"dec %2 \n" "dec %2 \n"
"jg 1b \n" "jg 1b \n"
: "+r"(dst_argb), // %0 : "+r"(dst_argb), // %0
...@@ -6383,17 +6384,18 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { ...@@ -6383,17 +6384,18 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
asm volatile ( asm volatile (
// 1 pixel loop. // 1 pixel loop.
".p2align 4 \n" ".p2align 4 \n"
BUNDLEALIGN
"1: \n" "1: \n"
"movzb (%0),%1 \n" "movzb " MEMACCESS(0) ",%1 \n"
"lea 0x4(%0),%0 \n" "lea " MEMLEA(0x4,0) ",%0 \n"
"movzb (%3,%1,4),%1 \n" MEMOP(movzb,0x00,3,1,4) ",%1 \n" // movzb (%3,%1,4),%1
"mov %b1,-0x4(%0) \n" "mov %b1," MEMACCESS2(-0x4,0) " \n"
"movzb -0x3(%0),%1 \n" "movzb " MEMACCESS2(-0x3,0) ",%1 \n"
"movzb 0x1(%3,%1,4),%1 \n" MEMOP(movzb,0x01,3,1,4) ",%1 \n" // movzb 0x1(%3,%1,4),%1
"mov %b1,-0x3(%0) \n" "mov %b1," MEMACCESS2(-0x3,0) " \n"
"movzb -0x2(%0),%1 \n" "movzb " MEMACCESS2(-0x2,0) ",%1 \n"
"movzb 0x2(%3,%1,4),%1 \n" MEMOP(movzb,0x02,3,1,4) ",%1 \n" // movzb 0x2(%3,%1,4),%1
"mov %b1,-0x2(%0) \n" "mov %b1," MEMACCESS2(-0x2,0) " \n"
"dec %2 \n" "dec %2 \n"
"jg 1b \n" "jg 1b \n"
: "+r"(dst_argb), // %0 : "+r"(dst_argb), // %0
...@@ -6420,8 +6422,9 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, ...@@ -6420,8 +6422,9 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
// 4 pixel loop. // 4 pixel loop.
".p2align 4 \n" ".p2align 4 \n"
BUNDLEALIGN
"1: \n" "1: \n"
"movdqu (%2),%%xmm0 \n" "movdqu " MEMACCESS(2) ",%%xmm0 \n"
"pmaddubsw %%xmm3,%%xmm0 \n" "pmaddubsw %%xmm3,%%xmm0 \n"
"phaddw %%xmm0,%%xmm0 \n" "phaddw %%xmm0,%%xmm0 \n"
"pand %%xmm4,%%xmm0 \n" "pand %%xmm4,%%xmm0 \n"
...@@ -6430,67 +6433,68 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, ...@@ -6430,67 +6433,68 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
"add %5,%1 \n" "add %5,%1 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n"
"movzb (%2),%0 \n" "movzb " MEMACCESS(2) ",%0 \n"
"movzb (%1,%0,1),%0 \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"mov %b0,(%3) \n" "mov %b0," MEMACCESS(3) " \n"
"movzb 0x1(%2),%0 \n" "movzb " MEMACCESS2(0x1,2) ",%0 \n"
"movzb (%1,%0,1),%0 \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"mov %b0,0x1(%3) \n" "mov %b0," MEMACCESS2(0x1,3) " \n"
"movzb 0x2(%2),%0 \n" "movzb " MEMACCESS2(0x2,2) ",%0 \n"
"movzb (%1,%0,1),%0 \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"mov %b0,0x2(%3) \n" "mov %b0," MEMACCESS2(0x2,3) " \n"
"movzb 0x3(%2),%0 \n" "movzb " MEMACCESS2(0x3,2) ",%0 \n"
"mov %b0,0x3(%3) \n" "mov %b0," MEMACCESS2(0x3,3) " \n"
"movd %%xmm0,%k1 \n" // 32 bit offset "movd %%xmm0,%k1 \n" // 32 bit offset
"add %5,%1 \n" "add %5,%1 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n"
"movzb 0x4(%2),%0 \n" "movzb " MEMACCESS2(0x4,2) ",%0 \n"
"movzb (%1,%0,1),%0 \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"mov %b0,0x4(%3) \n" "mov %b0," MEMACCESS2(0x4,3) " \n"
"movzb 0x5(%2),%0 \n" BUNDLEALIGN
"movzb (%1,%0,1),%0 \n" "movzb " MEMACCESS2(0x5,2) ",%0 \n"
"mov %b0,0x5(%3) \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"movzb 0x6(%2),%0 \n" "mov %b0," MEMACCESS2(0x5,3) " \n"
"movzb (%1,%0,1),%0 \n" "movzb " MEMACCESS2(0x6,2) ",%0 \n"
"mov %b0,0x6(%3) \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"movzb 0x7(%2),%0 \n" "mov %b0," MEMACCESS2(0x6,3) " \n"
"mov %b0,0x7(%3) \n" "movzb " MEMACCESS2(0x7,2) ",%0 \n"
"mov %b0," MEMACCESS2(0x7,3) " \n"
"movd %%xmm0,%k1 \n" // 32 bit offset "movd %%xmm0,%k1 \n" // 32 bit offset
"add %5,%1 \n" "add %5,%1 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n"
"movzb 0x8(%2),%0 \n" "movzb " MEMACCESS2(0x8,2) ",%0 \n"
"movzb (%1,%0,1),%0 \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"mov %b0,0x8(%3) \n" "mov %b0," MEMACCESS2(0x8,3) " \n"
"movzb 0x9(%2),%0 \n" "movzb " MEMACCESS2(0x9,2) ",%0 \n"
"movzb (%1,%0,1),%0 \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"mov %b0,0x9(%3) \n" "mov %b0," MEMACCESS2(0x9,3) " \n"
"movzb 0xa(%2),%0 \n" "movzb " MEMACCESS2(0xa,2) ",%0 \n"
"movzb (%1,%0,1),%0 \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"mov %b0,0xa(%3) \n" "mov %b0," MEMACCESS2(0xa,3) " \n"
"movzb 0xb(%2),%0 \n" "movzb " MEMACCESS2(0xb,2) ",%0 \n"
"mov %b0,0xb(%3) \n" "mov %b0," MEMACCESS2(0xb,3) " \n"
"movd %%xmm0,%k1 \n" // 32 bit offset "movd %%xmm0,%k1 \n" // 32 bit offset
"add %5,%1 \n" "add %5,%1 \n"
"movzb 0xc(%2),%0 \n" "movzb " MEMACCESS2(0xc,2) ",%0 \n"
"movzb (%1,%0,1),%0 \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"mov %b0,0xc(%3) \n" "mov %b0," MEMACCESS2(0xc,3) " \n"
"movzb 0xd(%2),%0 \n" "movzb " MEMACCESS2(0xd,2) ",%0 \n"
"movzb (%1,%0,1),%0 \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"mov %b0,0xd(%3) \n" "mov %b0," MEMACCESS2(0xd,3) " \n"
"movzb 0xe(%2),%0 \n" "movzb " MEMACCESS2(0xe,2) ",%0 \n"
"movzb (%1,%0,1),%0 \n" MEMOP(movzb,0x00,1,0,1) ",%0 \n" // movzb (%1,%0,1),%0
"mov %b0,0xe(%3) \n" "mov %b0," MEMACCESS2(0xe,3) " \n"
"movzb 0xf(%2),%0 \n" "movzb " MEMACCESS2(0xf,2) ",%0 \n"
"mov %b0,0xf(%3) \n" "mov %b0," MEMACCESS2(0xf,3) " \n"
"sub $0x4,%4 \n" "sub $0x4,%4 \n"
"lea 0x10(%2),%2 \n" "lea " MEMLEA(0x10,2) ",%2 \n"
"lea 0x10(%3),%3 \n" "lea " MEMLEA(0x10,3) ",%3 \n"
"jg 1b \n" "jg 1b \n"
: "+d"(pixel_temp), // %0 : "+d"(pixel_temp), // %0
"+a"(table_temp), // %1 "+a"(table_temp), // %1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment