Commit f1b6063f authored by fbarchard@google.com's avatar fbarchard@google.com

port rgb to posix

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/395010

git-svn-id: http://libyuv.googlecode.com/svn/trunk@176 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 19932f8d
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 174 Version: 176
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -32,6 +32,14 @@ extern "C" { ...@@ -32,6 +32,14 @@ extern "C" {
#define HAS_BGRATOARGBROW_SSSE3 #define HAS_BGRATOARGBROW_SSSE3
#define HAS_RGB24TOARGBROW_SSSE3 #define HAS_RGB24TOARGBROW_SSSE3
#define HAS_RAWTOARGBROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3
#define HAS_RGB565TOARGBROW_SSE2
#define HAS_ARGB1555TOARGBROW_SSE2
#define HAS_ARGB4444TOARGBROW_SSE2
#define HAS_ARGBTORGB24ROW_SSSE3
#define HAS_ARGBTORAWROW_SSSE3
#define HAS_ARGBTORGB565ROW_SSE2
#define HAS_ARGBTOARGB1555ROW_SSE2
#define HAS_ARGBTOARGB4444ROW_SSE2
#define HAS_ARGBTOYROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3
#define HAS_BGRATOYROW_SSSE3 #define HAS_BGRATOYROW_SSSE3
#define HAS_ABGRTOYROW_SSSE3 #define HAS_ABGRTOYROW_SSSE3
...@@ -55,18 +63,6 @@ extern "C" { ...@@ -55,18 +63,6 @@ extern "C" {
#define HAS_UYVYTOUVROW_SSE2 #define HAS_UYVYTOUVROW_SSE2
#endif #endif
// The following are available on Windows platforms
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_RGB565TOARGBROW_SSE2
#define HAS_ARGB1555TOARGBROW_SSE2
#define HAS_ARGB4444TOARGBROW_SSE2
#define HAS_ARGBTORGB24ROW_SSSE3
#define HAS_ARGBTORAWROW_SSSE3
#define HAS_ARGBTORGB565ROW_SSE2
#define HAS_ARGBTOARGB1555ROW_SSE2
#define HAS_ARGBTOARGB4444ROW_SSE2
#endif
// The following are available on Neon platforms // The following are available on Neon platforms
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_MIRRORROW_NEON #define HAS_MIRRORROW_NEON
......
...@@ -72,6 +72,17 @@ CONST uvec8 kShuffleMaskBGRAToARGB = { ...@@ -72,6 +72,17 @@ CONST uvec8 kShuffleMaskBGRAToARGB = {
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
}; };
// Shuffle table for converting ARGB to RGB24.
CONST uvec8 kShuffleMaskARGBToRGB24 = {
0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
};
// Shuffle table for converting ARGB to RAW.
CONST uvec8 kShuffleMaskARGBToRAW = {
2u, 1u,0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
};
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
asm volatile ( asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
...@@ -98,7 +109,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { ...@@ -98,7 +109,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm5" , "xmm0", "xmm1", "xmm5"
#endif #endif
); );
} }
void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) { void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) {
...@@ -120,8 +131,7 @@ void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) { ...@@ -120,8 +131,7 @@ void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) {
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm5" , "xmm0", "xmm5"
#endif #endif
);
);
} }
void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) { void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
...@@ -143,7 +153,7 @@ void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) { ...@@ -143,7 +153,7 @@ void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm5" , "xmm0", "xmm5"
#endif #endif
); );
} }
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
...@@ -183,7 +193,7 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { ...@@ -183,7 +193,7 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif #endif
); );
} }
void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) { void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
...@@ -223,9 +233,343 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) { ...@@ -223,9 +233,343 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif #endif
); );
}
void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
asm volatile (
"mov $0x1080108,%%eax \n"
"movd %%eax,%%xmm5 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n"
"mov $0x20082008,%%eax \n"
"movd %%eax,%%xmm6 \n"
"pshufd $0x0,%%xmm6,%%xmm6 \n"
"pcmpeqb %%xmm3,%%xmm3 \n"
"psllw $0xb,%%xmm3 \n"
"pcmpeqb %%xmm4,%%xmm4 \n"
"psllw $0xa,%%xmm4 \n"
"psrlw $0x5,%%xmm4 \n"
"pcmpeqb %%xmm7,%%xmm7 \n"
"psllw $0x8,%%xmm7 \n"
"sub %0,%1 \n"
"sub %0,%1 \n"
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"pand %%xmm3,%%xmm1 \n"
"psllw $0xb,%%xmm2 \n"
"pmulhuw %%xmm5,%%xmm1 \n"
"pmulhuw %%xmm5,%%xmm2 \n"
"psllw $0x8,%%xmm1 \n"
"por %%xmm2,%%xmm1 \n"
"pand %%xmm4,%%xmm0 \n"
"pmulhuw %%xmm6,%%xmm0 \n"
"por %%xmm7,%%xmm0 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,(%1,%0,2) \n"
"movdqa %%xmm2,0x10(%1,%0,2) \n"
"lea 0x10(%0),%0 \n"
"sub $0x8,%2 \n"
"ja 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(pix) // %2
:
: "memory", "cc", "eax"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
#endif
);
}
void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
asm volatile (
"mov $0x1080108,%%eax \n"
"movd %%eax,%%xmm5 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n"
"mov $0x42004200,%%eax \n"
"movd %%eax,%%xmm6 \n"
"pshufd $0x0,%%xmm6,%%xmm6 \n"
"pcmpeqb %%xmm3,%%xmm3 \n"
"psllw $0xb,%%xmm3 \n"
"movdqa %%xmm3,%%xmm4 \n"
"psrlw $0x6,%%xmm4 \n"
"pcmpeqb %%xmm7,%%xmm7 \n"
"psllw $0x8,%%xmm7 \n"
"sub %0,%1 \n"
"sub %0,%1 \n"
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"psllw $0x1,%%xmm1 \n"
"psllw $0xb,%%xmm2 \n"
"pand %%xmm3,%%xmm1 \n"
"pmulhuw %%xmm5,%%xmm2 \n"
"pmulhuw %%xmm5,%%xmm1 \n"
"psllw $0x8,%%xmm1 \n"
"por %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"pand %%xmm4,%%xmm0 \n"
"psraw $0x8,%%xmm2 \n"
"pmulhuw %%xmm6,%%xmm0 \n"
"pand %%xmm7,%%xmm2 \n"
"por %%xmm2,%%xmm0 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,(%1,%0,2) \n"
"movdqa %%xmm2,0x10(%1,%0,2) \n"
"lea 0x10(%0),%0 \n"
"sub $0x8,%2 \n"
"ja 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(pix) // %2
:
: "memory", "cc", "eax"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
#endif
);
}
void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
asm volatile (
"mov $0xf0f0f0f,%%eax \n"
"movd %%eax,%%xmm4 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
"movdqa %%xmm4,%%xmm5 \n"
"pslld $0x4,%%xmm5 \n"
"sub %0,%1 \n"
"sub %0,%1 \n"
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqa %%xmm0,%%xmm2 \n"
"pand %%xmm4,%%xmm0 \n"
"pand %%xmm5,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm2,%%xmm3 \n"
"psllw $0x4,%%xmm1 \n"
"psrlw $0x4,%%xmm3 \n"
"por %%xmm1,%%xmm0 \n"
"por %%xmm3,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,(%1,%0,2) \n"
"movdqa %%xmm1,0x10(%1,%0,2) \n"
"lea 0x10(%0),%0 \n"
"sub $0x8,%2 \n"
"ja 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(pix) // %2
:
: "memory", "cc", "eax"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
);
} }
void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
asm volatile (
"movdqa %3,%%xmm6 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"movdqa 0x20(%0),%%xmm2 \n"
"movdqa 0x30(%0),%%xmm3 \n"
"lea 0x40(%0),%0 \n"
"pshufb %%xmm6,%%xmm0 \n"
"pshufb %%xmm6,%%xmm1 \n"
"pshufb %%xmm6,%%xmm2 \n"
"pshufb %%xmm6,%%xmm3 \n"
"movdqa %%xmm1,%%xmm4 \n"
"psrldq $0x4,%%xmm1 \n"
"pslldq $0xc,%%xmm4 \n"
"movdqa %%xmm2,%%xmm5 \n"
"por %%xmm4,%%xmm0 \n"
"pslldq $0x8,%%xmm5 \n"
"movdqa %%xmm0,(%1) \n"
"por %%xmm5,%%xmm1 \n"
"psrldq $0x8,%%xmm2 \n"
"pslldq $0x4,%%xmm3 \n"
"por %%xmm3,%%xmm2 \n"
"movdqa %%xmm1,0x10(%1) \n"
"movdqa %%xmm2,0x20(%1) \n"
"lea 0x30(%1),%1 \n"
"sub $0x10,%2 \n"
"ja 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(pix) // %2
: "m"(kShuffleMaskARGBToRGB24) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
#endif
);
}
void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
asm volatile (
"movdqa %3,%%xmm6 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"movdqa 0x20(%0),%%xmm2 \n"
"movdqa 0x30(%0),%%xmm3 \n"
"lea 0x40(%0),%0 \n"
"pshufb %%xmm6,%%xmm0 \n"
"pshufb %%xmm6,%%xmm1 \n"
"pshufb %%xmm6,%%xmm2 \n"
"pshufb %%xmm6,%%xmm3 \n"
"movdqa %%xmm1,%%xmm4 \n"
"psrldq $0x4,%%xmm1 \n"
"pslldq $0xc,%%xmm4 \n"
"movdqa %%xmm2,%%xmm5 \n"
"por %%xmm4,%%xmm0 \n"
"pslldq $0x8,%%xmm5 \n"
"movdqa %%xmm0,(%1) \n"
"por %%xmm5,%%xmm1 \n"
"psrldq $0x8,%%xmm2 \n"
"pslldq $0x4,%%xmm3 \n"
"por %%xmm3,%%xmm2 \n"
"movdqa %%xmm1,0x10(%1) \n"
"movdqa %%xmm2,0x20(%1) \n"
"lea 0x30(%1),%1 \n"
"sub $0x10,%2 \n"
"ja 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(pix) // %2
: "m"(kShuffleMaskARGBToRAW) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
#endif
);
}
void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
asm volatile (
"pcmpeqb %%xmm3,%%xmm3 \n"
"psrld $0x1b,%%xmm3 \n"
"pcmpeqb %%xmm4,%%xmm4 \n"
"psrld $0x1a,%%xmm4 \n"
"pslld $0x5,%%xmm4 \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"pslld $0xb,%%xmm5 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"pslld $0x8,%%xmm0 \n"
"psrld $0x3,%%xmm1 \n"
"psrld $0x5,%%xmm2 \n"
"psrad $0x10,%%xmm0 \n"
"pand %%xmm3,%%xmm1 \n"
"pand %%xmm4,%%xmm2 \n"
"pand %%xmm5,%%xmm0 \n"
"por %%xmm2,%%xmm1 \n"
"por %%xmm1,%%xmm0 \n"
"packssdw %%xmm0,%%xmm0 \n"
"lea 0x10(%0),%0 \n"
"movq %%xmm0,(%1) \n"
"lea 0x8(%1),%1 \n"
"sub $0x4,%2 \n"
"ja 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(pix) // %2
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
);
}
void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
asm volatile (
"pcmpeqb %%xmm4,%%xmm4 \n"
"psrld $0x1b,%%xmm4 \n"
"movdqa %%xmm4,%%xmm5 \n"
"pslld $0x5,%%xmm5 \n"
"movdqa %%xmm4,%%xmm6 \n"
"pslld $0xa,%%xmm6 \n"
"pcmpeqb %%xmm7,%%xmm7 \n"
"pslld $0xf,%%xmm7 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm0,%%xmm3 \n"
"psrad $0x10,%%xmm0 \n"
"psrld $0x3,%%xmm1 \n"
"psrld $0x6,%%xmm2 \n"
"psrld $0x9,%%xmm3 \n"
"pand %%xmm7,%%xmm0 \n"
"pand %%xmm4,%%xmm1 \n"
"pand %%xmm5,%%xmm2 \n"
"pand %%xmm6,%%xmm3 \n"
"por %%xmm1,%%xmm0 \n"
"por %%xmm3,%%xmm2 \n"
"por %%xmm2,%%xmm0 \n"
"packssdw %%xmm0,%%xmm0 \n"
"lea 0x10(%0),%0 \n"
"movq %%xmm0,(%1) \n"
"lea 0x8(%1),%1 \n"
"sub $0x4,%2 \n"
"ja 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(pix) // %2
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
#endif
);
}
void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
asm volatile (
"pcmpeqb %%xmm4,%%xmm4 \n"
"psllw $0xc,%%xmm4 \n"
"movdqa %%xmm4,%%xmm3 \n"
"psrlw $0x8,%%xmm3 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"pand %%xmm3,%%xmm0 \n"
"pand %%xmm4,%%xmm1 \n"
"psrlq $0x4,%%xmm0 \n"
"psrlq $0x8,%%xmm1 \n"
"por %%xmm1,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"lea 0x10(%0),%0 \n"
"movq %%xmm0,(%1) \n"
"lea 0x8(%1),%1 \n"
"sub $0x4,%2 \n"
"ja 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(pix) // %2
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
#endif
);
}
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile ( asm volatile (
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
...@@ -259,8 +603,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { ...@@ -259,8 +603,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif #endif
);
);
} }
void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
...@@ -296,15 +639,14 @@ void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { ...@@ -296,15 +639,14 @@ void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif #endif
);
);
} }
#endif #endif
#ifdef HAS_ARGBTOUVROW_SSSE3 #ifdef HAS_ARGBTOUVROW_SSSE3
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) { uint8* dst_u, uint8* dst_v, int width) {
asm volatile ( asm volatile (
"movdqa %0,%%xmm4 \n" "movdqa %0,%%xmm4 \n"
"movdqa %1,%%xmm3 \n" "movdqa %1,%%xmm3 \n"
"movdqa %2,%%xmm5 \n" "movdqa %2,%%xmm5 \n"
...@@ -316,8 +658,8 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -316,8 +658,8 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
#if defined(__SSE2__) #if defined(__SSE2__)
"xmm3", "xmm4", "xmm5" "xmm3", "xmm4", "xmm5"
#endif #endif
); );
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
"1: \n" "1: \n"
"movdqa (%0),%%xmm0 \n" "movdqa (%0),%%xmm0 \n"
...@@ -363,12 +705,12 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -363,12 +705,12 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
#endif #endif
); );
} }
void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) { uint8* dst_u, uint8* dst_v, int width) {
asm volatile ( asm volatile (
"movdqa %0,%%xmm4 \n" "movdqa %0,%%xmm4 \n"
"movdqa %1,%%xmm3 \n" "movdqa %1,%%xmm3 \n"
"movdqa %2,%%xmm5 \n" "movdqa %2,%%xmm5 \n"
...@@ -380,8 +722,8 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -380,8 +722,8 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
#if defined(__SSE2__) #if defined(__SSE2__)
"xmm3", "xmm4", "xmm5" "xmm3", "xmm4", "xmm5"
#endif #endif
); );
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
...@@ -431,7 +773,7 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -431,7 +773,7 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
#endif #endif
); );
} }
#endif #endif
...@@ -507,16 +849,15 @@ struct { ...@@ -507,16 +849,15 @@ struct {
"packuswb %%xmm1,%%xmm1 \n" \ "packuswb %%xmm1,%%xmm1 \n" \
"packuswb %%xmm2,%%xmm2 \n" "packuswb %%xmm2,%%xmm2 \n"
void OMITFP I420ToARGBRow_SSSE3(const uint8* y_buf, // rdi void OMITFP I420ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf, // rsi const uint8* u_buf,
const uint8* v_buf, // rdx const uint8* v_buf,
uint8* rgb_buf, // rcx uint8* rgb_buf,
int width) { // r8 int width) {
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"pxor %%xmm4,%%xmm4 \n" "pxor %%xmm4,%%xmm4 \n"
"1: \n" "1: \n"
YUVTORGB YUVTORGB
"punpcklbw %%xmm1,%%xmm0 \n" "punpcklbw %%xmm1,%%xmm0 \n"
...@@ -542,16 +883,15 @@ void OMITFP I420ToARGBRow_SSSE3(const uint8* y_buf, // rdi ...@@ -542,16 +883,15 @@ void OMITFP I420ToARGBRow_SSSE3(const uint8* y_buf, // rdi
); );
} }
void OMITFP I420ToBGRARow_SSSE3(const uint8* y_buf, // rdi void OMITFP I420ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf, // rsi const uint8* u_buf,
const uint8* v_buf, // rdx const uint8* v_buf,
uint8* rgb_buf, // rcx uint8* rgb_buf,
int width) { // r8 int width) {
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"pxor %%xmm4,%%xmm4 \n" "pxor %%xmm4,%%xmm4 \n"
"1: \n" "1: \n"
YUVTORGB YUVTORGB
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
...@@ -578,16 +918,15 @@ void OMITFP I420ToBGRARow_SSSE3(const uint8* y_buf, // rdi ...@@ -578,16 +918,15 @@ void OMITFP I420ToBGRARow_SSSE3(const uint8* y_buf, // rdi
); );
} }
void OMITFP I420ToABGRRow_SSSE3(const uint8* y_buf, // rdi void OMITFP I420ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf, // rsi const uint8* u_buf,
const uint8* v_buf, // rdx const uint8* v_buf,
uint8* rgb_buf, // rcx uint8* rgb_buf,
int width) { // r8 int width) {
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"pxor %%xmm4,%%xmm4 \n" "pxor %%xmm4,%%xmm4 \n"
"1: \n" "1: \n"
YUVTORGB YUVTORGB
"punpcklbw %%xmm1,%%xmm2 \n" "punpcklbw %%xmm1,%%xmm2 \n"
...@@ -613,16 +952,15 @@ void OMITFP I420ToABGRRow_SSSE3(const uint8* y_buf, // rdi ...@@ -613,16 +952,15 @@ void OMITFP I420ToABGRRow_SSSE3(const uint8* y_buf, // rdi
); );
} }
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, // rdi void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf, // rsi const uint8* u_buf,
const uint8* v_buf, // rdx const uint8* v_buf,
uint8* rgb_buf, // rcx uint8* rgb_buf,
int width) { // r8 int width) {
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"pxor %%xmm4,%%xmm4 \n" "pxor %%xmm4,%%xmm4 \n"
"1: \n" "1: \n"
"movd (%1),%%xmm0 \n" "movd (%1),%%xmm0 \n"
"movd (%1,%2,1),%%xmm1 \n" "movd (%1,%2,1),%%xmm1 \n"
...@@ -672,10 +1010,9 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, // rdi ...@@ -672,10 +1010,9 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, // rdi
#endif #endif
#ifdef HAS_YTOARGBROW_SSE2 #ifdef HAS_YTOARGBROW_SSE2
void YToARGBRow_SSE2(const uint8* y_buf,
void YToARGBRow_SSE2(const uint8* y_buf, // rdi uint8* rgb_buf,
uint8* rgb_buf, // rcx int width) {
int width) { // r8
asm volatile ( asm volatile (
"pcmpeqb %%xmm4,%%xmm4 \n" "pcmpeqb %%xmm4,%%xmm4 \n"
"pslld $0x18,%%xmm4 \n" "pslld $0x18,%%xmm4 \n"
...@@ -685,7 +1022,6 @@ void YToARGBRow_SSE2(const uint8* y_buf, // rdi ...@@ -685,7 +1022,6 @@ void YToARGBRow_SSE2(const uint8* y_buf, // rdi
"mov $0x012a012a,%%eax \n" "mov $0x012a012a,%%eax \n"
"movd %%eax,%%xmm2 \n" "movd %%eax,%%xmm2 \n"
"pshufd $0x0,%%xmm2,%%xmm2 \n" "pshufd $0x0,%%xmm2,%%xmm2 \n"
"1: \n" "1: \n"
// Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
"movq (%0),%%xmm0 \n" "movq (%0),%%xmm0 \n"
...@@ -865,14 +1201,14 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { ...@@ -865,14 +1201,14 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
"lea 0x10(%1),%1 \n" "lea 0x10(%1),%1 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
"ja 1b \n" "ja 1b \n"
: "+r"(src_uv), // %0 : "+r"(src_uv), // %0
"+r"(dst_u), // %1 "+r"(dst_u), // %1
"+r"(dst_v), // %2 "+r"(dst_v), // %2
"+r"(pix) // %3 "+r"(pix) // %3
: :
: "memory", "cc" : "memory", "cc"
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif #endif
); );
} }
...@@ -881,7 +1217,7 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { ...@@ -881,7 +1217,7 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
#ifdef HAS_COPYROW_SSE2 #ifdef HAS_COPYROW_SSE2
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
asm volatile ( asm volatile (
"sub %0,%1 \n" "sub %0,%1 \n"
"1: \n" "1: \n"
"movdqa (%0),%%xmm0 \n" "movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n" "movdqa 0x10(%0),%%xmm1 \n"
...@@ -1048,7 +1384,7 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, ...@@ -1048,7 +1384,7 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2,
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif #endif
); );
} }
void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) { void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
......
...@@ -90,12 +90,14 @@ static const uvec8 kShuffleMaskBGRAToARGB = { ...@@ -90,12 +90,14 @@ static const uvec8 kShuffleMaskBGRAToARGB = {
// Shuffle table for converting ARGB to RGB24. // Shuffle table for converting ARGB to RGB24.
static const uvec8 kShuffleMaskARGBToRGB24 = { static const uvec8 kShuffleMaskARGBToRGB24 = {
0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u }; 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
};
// Shuffle table for converting ARGB to RAW. // Shuffle table for converting ARGB to RAW.
static const uvec8 kShuffleMaskARGBToRAW = { static const uvec8 kShuffleMaskARGBToRAW = {
2u, 1u,0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u }; 2u, 1u,0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
};
__declspec(naked) __declspec(naked)
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
...@@ -297,7 +299,6 @@ __asm { ...@@ -297,7 +299,6 @@ __asm {
} }
} }
// TODO(fbarchard): Port ARGB1555ToARGBRow_SSE2 to gcc
// 24 instructions // 24 instructions
__declspec(naked) __declspec(naked)
void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
...@@ -351,7 +352,6 @@ __asm { ...@@ -351,7 +352,6 @@ __asm {
} }
} }
// TODO(fbarchard): Port ARGB4444ToARGBRow_SSE2 to gcc
// 18 instructions // 18 instructions
__declspec(naked) __declspec(naked)
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
...@@ -391,7 +391,6 @@ __asm { ...@@ -391,7 +391,6 @@ __asm {
} }
} }
// TODO(fbarchard): Port to gcc
__declspec(naked) __declspec(naked)
void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm { __asm {
...@@ -430,7 +429,6 @@ __asm { ...@@ -430,7 +429,6 @@ __asm {
} }
} }
// TODO(fbarchard): Port to gcc
__declspec(naked) __declspec(naked)
void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm { __asm {
...@@ -507,7 +505,6 @@ __asm { ...@@ -507,7 +505,6 @@ __asm {
} }
} }
// TODO(fbarchard): Port to gcc
// TODO(fbarchard): Improve sign extension/packing // TODO(fbarchard): Improve sign extension/packing
__declspec(naked) __declspec(naked)
void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
...@@ -550,7 +547,6 @@ __asm { ...@@ -550,7 +547,6 @@ __asm {
} }
} }
// TODO(fbarchard): Port to gcc
__declspec(naked) __declspec(naked)
void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm { __asm {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment