Commit 6334808d authored by fbarchard@google.com's avatar fbarchard@google.com

remove _ from simd in row

BUG=none
TEST=none
Review URL: http://webrtc-codereview.appspot.com/270006

git-svn-id: http://libyuv.googlecode.com/svn/trunk@71 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 75988ad3
...@@ -13,74 +13,73 @@ ...@@ -13,74 +13,73 @@
extern "C" { extern "C" {
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
#define TALIGN16(t, var) static __declspec(align(16)) t _ ## var
// Constant multiplication table for converting ARGB to I400. // Constant multiplication table for converting ARGB to I400.
extern "C" TALIGN16(const int8, kARGBToY[16]) = { SIMD_ALIGNED(const int8 kARGBToY[16]) = {
13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
}; };
extern "C" TALIGN16(const int8, kARGBToU[16]) = { SIMD_ALIGNED(const int8 kARGBToU[16]) = {
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
}; };
extern "C" TALIGN16(const int8, kARGBToV[16]) = { SIMD_ALIGNED(const int8 kARGBToV[16]) = {
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
}; };
// Constants for BGRA // Constants for BGRA
extern "C" TALIGN16(const int8, kBGRAToY[16]) = { SIMD_ALIGNED(const int8 kBGRAToY[16]) = {
0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
}; };
extern "C" TALIGN16(const int8, kBGRAToU[16]) = { SIMD_ALIGNED(const int8 kBGRAToU[16]) = {
0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
}; };
extern "C" TALIGN16(const int8, kBGRAToV[16]) = { SIMD_ALIGNED(const int8 kBGRAToV[16]) = {
0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
}; };
// Constants for ABGR // Constants for ABGR
extern "C" TALIGN16(const int8, kABGRToY[16]) = { SIMD_ALIGNED(const int8 kABGRToY[16]) = {
33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
}; };
extern "C" TALIGN16(const int8, kABGRToU[16]) = { SIMD_ALIGNED(const int8 kABGRToU[16]) = {
-38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0 -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
}; };
extern "C" TALIGN16(const int8, kABGRToV[16]) = { SIMD_ALIGNED(const int8 kABGRToV[16]) = {
112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
}; };
extern "C" TALIGN16(const uint8, kAddY16[16]) = { SIMD_ALIGNED(const uint8 kAddY16[16]) = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
}; };
extern "C" TALIGN16(const uint8, kAddUV128[16]) = { SIMD_ALIGNED(const uint8 kAddUV128[16]) = {
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
}; };
// Shuffle table for converting BG24 to ARGB. // Shuffle table for converting BG24 to ARGB.
extern "C" TALIGN16(const uint8, kShuffleMaskBG24ToARGB[16]) = { SIMD_ALIGNED(const uint8 kShuffleMaskBG24ToARGB[16]) = {
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
}; };
// Shuffle table for converting RAW to ARGB. // Shuffle table for converting RAW to ARGB.
extern "C" TALIGN16(const uint8, kShuffleMaskRAWToARGB[16]) = { SIMD_ALIGNED(const uint8 kShuffleMaskRAWToARGB[16]) = {
2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
}; };
// Shuffle table for converting ABGR to ARGB. // Shuffle table for converting ABGR to ARGB.
extern "C" TALIGN16(const uint8, kShuffleMaskABGRToARGB[16]) = { SIMD_ALIGNED(const uint8 kShuffleMaskABGRToARGB[16]) = {
2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u 2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
}; };
// Shuffle table for converting BGRA to ARGB. // Shuffle table for converting BGRA to ARGB.
extern "C" TALIGN16(const uint8, kShuffleMaskBGRAToARGB[16]) = { SIMD_ALIGNED(const uint8 kShuffleMaskBGRAToARGB[16]) = {
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
}; };
...@@ -117,7 +116,7 @@ __asm { ...@@ -117,7 +116,7 @@ __asm {
mov eax, [esp + 4] // src_abgr mov eax, [esp + 4] // src_abgr
mov edx, [esp + 8] // dst_argb mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // pix mov ecx, [esp + 12] // pix
movdqa xmm5, _kShuffleMaskABGRToARGB movdqa xmm5, kShuffleMaskABGRToARGB
convertloop : convertloop :
movdqa xmm0, [eax] movdqa xmm0, [eax]
...@@ -137,7 +136,7 @@ __asm { ...@@ -137,7 +136,7 @@ __asm {
mov eax, [esp + 4] // src_bgra mov eax, [esp + 4] // src_bgra
mov edx, [esp + 8] // dst_argb mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // pix mov ecx, [esp + 12] // pix
movdqa xmm5, _kShuffleMaskBGRAToARGB movdqa xmm5, kShuffleMaskBGRAToARGB
convertloop : convertloop :
movdqa xmm0, [eax] movdqa xmm0, [eax]
...@@ -159,7 +158,7 @@ __asm { ...@@ -159,7 +158,7 @@ __asm {
mov ecx, [esp + 12] // pix mov ecx, [esp + 12] // pix
pcmpeqb xmm5, xmm5 // generate mask 0xff000000 pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24 pslld xmm5, 24
movdqa xmm4, _kShuffleMaskBG24ToARGB movdqa xmm4, kShuffleMaskBG24ToARGB
convertloop : convertloop :
movdqa xmm0, [eax] movdqa xmm0, [eax]
...@@ -198,7 +197,7 @@ __asm { ...@@ -198,7 +197,7 @@ __asm {
mov ecx, [esp + 12] // pix mov ecx, [esp + 12] // pix
pcmpeqb xmm5, xmm5 // generate mask 0xff000000 pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24 pslld xmm5, 24
movdqa xmm4, _kShuffleMaskRAWToARGB movdqa xmm4, kShuffleMaskRAWToARGB
convertloop : convertloop :
movdqa xmm0, [eax] movdqa xmm0, [eax]
...@@ -235,8 +234,8 @@ __asm { ...@@ -235,8 +234,8 @@ __asm {
mov eax, [esp + 4] /* src_argb */ mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */ mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */ mov ecx, [esp + 12] /* pix */
movdqa xmm5, _kAddY16 movdqa xmm5, kAddY16
movdqa xmm4, _kARGBToY movdqa xmm4, kARGBToY
convertloop : convertloop :
movdqa xmm0, [eax] movdqa xmm0, [eax]
...@@ -268,8 +267,8 @@ __asm { ...@@ -268,8 +267,8 @@ __asm {
mov eax, [esp + 4] /* src_argb */ mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */ mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */ mov ecx, [esp + 12] /* pix */
movdqa xmm5, _kAddY16 movdqa xmm5, kAddY16
movdqa xmm4, _kBGRAToY movdqa xmm4, kBGRAToY
convertloop : convertloop :
movdqa xmm0, [eax] movdqa xmm0, [eax]
...@@ -301,8 +300,8 @@ __asm { ...@@ -301,8 +300,8 @@ __asm {
mov eax, [esp + 4] /* src_argb */ mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */ mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */ mov ecx, [esp + 12] /* pix */
movdqa xmm5, _kAddY16 movdqa xmm5, kAddY16
movdqa xmm4, _kABGRToY movdqa xmm4, kABGRToY
convertloop : convertloop :
movdqa xmm0, [eax] movdqa xmm0, [eax]
...@@ -339,9 +338,9 @@ __asm { ...@@ -339,9 +338,9 @@ __asm {
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, _kARGBToU movdqa xmm7, kARGBToU
movdqa xmm6, _kARGBToV movdqa xmm6, kARGBToV
movdqa xmm5, _kAddUV128 movdqa xmm5, kAddUV128
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop : convertloop :
...@@ -403,9 +402,9 @@ __asm { ...@@ -403,9 +402,9 @@ __asm {
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, _kBGRAToU movdqa xmm7, kBGRAToU
movdqa xmm6, _kBGRAToV movdqa xmm6, kBGRAToV
movdqa xmm5, _kAddUV128 movdqa xmm5, kAddUV128
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop : convertloop :
...@@ -467,9 +466,9 @@ __asm { ...@@ -467,9 +466,9 @@ __asm {
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, _kABGRToU movdqa xmm7, kABGRToU
movdqa xmm6, _kABGRToV movdqa xmm6, kABGRToV
movdqa xmm5, _kAddUV128 movdqa xmm5, kAddUV128
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop : convertloop :
...@@ -713,35 +712,35 @@ void FastConvertYToARGBRow_MMX(const uint8* y_buf, ...@@ -713,35 +712,35 @@ void FastConvertYToARGBRow_MMX(const uint8* y_buf,
#define BG UG * 128 + VG * 128 #define BG UG * 128 + VG * 128
#define BR UR * 128 + VR * 128 #define BR UR * 128 + VR * 128
extern "C" TALIGN16(const int8, kUVToB[16]) = { SIMD_ALIGNED(const int8 kUVToB[16]) = {
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
}; };
extern "C" TALIGN16(const int8, kUVToR[16]) = { SIMD_ALIGNED(const int8 kUVToR[16]) = {
UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
}; };
extern "C" TALIGN16(const int8, kUVToG[16]) = { SIMD_ALIGNED(const int8 kUVToG[16]) = {
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
}; };
extern "C" TALIGN16(const int16, kYToRgb[8]) = { SIMD_ALIGNED(const int16 kYToRgb[8]) = {
YG, YG, YG, YG, YG, YG, YG, YG YG, YG, YG, YG, YG, YG, YG, YG
}; };
extern "C" TALIGN16(const int16, kYSub16[8]) = { SIMD_ALIGNED(const int16 kYSub16[8]) = {
16, 16, 16, 16, 16, 16, 16, 16 16, 16, 16, 16, 16, 16, 16, 16
}; };
extern "C" TALIGN16(const int16, kUVBiasB[8]) = { SIMD_ALIGNED(const int16 kUVBiasB[8]) = {
BB, BB, BB, BB, BB, BB, BB, BB BB, BB, BB, BB, BB, BB, BB, BB
}; };
extern "C" TALIGN16(const int16, kUVBiasG[8]) = { SIMD_ALIGNED(const int16 kUVBiasG[8]) = {
BG, BG, BG, BG, BG, BG, BG, BG BG, BG, BG, BG, BG, BG, BG, BG
}; };
extern "C" TALIGN16(const int16, kUVBiasR[8]) = { SIMD_ALIGNED(const int16 kUVBiasR[8]) = {
BR, BR, BR, BR, BR, BR, BR, BR BR, BR, BR, BR, BR, BR, BR, BR
}; };
...@@ -754,18 +753,18 @@ extern "C" TALIGN16(const int16, kUVBiasR[8]) = { ...@@ -754,18 +753,18 @@ extern "C" TALIGN16(const int16, kUVBiasR[8]) = {
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
__asm movdqa xmm1, xmm0 \ __asm movdqa xmm1, xmm0 \
__asm movdqa xmm2, xmm0 \ __asm movdqa xmm2, xmm0 \
__asm pmaddubsw xmm0, _kUVToB /* scale B UV */ \ __asm pmaddubsw xmm0, kUVToB /* scale B UV */ \
__asm pmaddubsw xmm1, _kUVToG /* scale G UV */ \ __asm pmaddubsw xmm1, kUVToG /* scale G UV */ \
__asm pmaddubsw xmm2, _kUVToR /* scale R UV */ \ __asm pmaddubsw xmm2, kUVToR /* scale R UV */ \
__asm psubw xmm0, _kUVBiasB /* unbias back to signed */ \ __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \
__asm psubw xmm1, _kUVBiasG \ __asm psubw xmm1, kUVBiasG \
__asm psubw xmm2, _kUVBiasR \ __asm psubw xmm2, kUVBiasR \
/* Step 2: Find Y contribution to 8 R,G,B values */ \ /* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] \ __asm movq xmm3, qword ptr [eax] \
__asm lea eax, [eax + 8] \ __asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm4 \ __asm punpcklbw xmm3, xmm4 \
__asm psubsw xmm3, _kYSub16 \ __asm psubsw xmm3, kYSub16 \
__asm pmullw xmm3, _kYToRgb \ __asm pmullw xmm3, kYToRgb \
__asm paddw xmm0, xmm3 /* B += Y */ \ __asm paddw xmm0, xmm3 /* B += Y */ \
__asm paddw xmm1, xmm3 /* G += Y */ \ __asm paddw xmm1, xmm3 /* G += Y */ \
__asm paddw xmm2, xmm3 /* R += Y */ \ __asm paddw xmm2, xmm3 /* R += Y */ \
...@@ -923,19 +922,19 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -923,19 +922,19 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
punpcklbw xmm0, xmm1 // UV punpcklbw xmm0, xmm1 // UV
movdqa xmm1, xmm0 movdqa xmm1, xmm0
movdqa xmm2, xmm0 movdqa xmm2, xmm0
pmaddubsw xmm0, _kUVToB // scale B UV pmaddubsw xmm0, kUVToB // scale B UV
pmaddubsw xmm1, _kUVToG // scale G UV pmaddubsw xmm1, kUVToG // scale G UV
pmaddubsw xmm2, _kUVToR // scale R UV pmaddubsw xmm2, kUVToR // scale R UV
psubw xmm0, _kUVBiasB // unbias back to signed psubw xmm0, kUVBiasB // unbias back to signed
psubw xmm1, _kUVBiasG psubw xmm1, kUVBiasG
psubw xmm2, _kUVBiasR psubw xmm2, kUVBiasR
// Step 2: Find Y contribution to 4 R,G,B values // Step 2: Find Y contribution to 4 R,G,B values
movd xmm3, [eax] movd xmm3, [eax]
lea eax, [eax + 4] lea eax, [eax + 4]
punpcklbw xmm3, xmm4 punpcklbw xmm3, xmm4
psubsw xmm3, _kYSub16 psubsw xmm3, kYSub16
pmullw xmm3, _kYToRgb pmullw xmm3, kYToRgb
paddw xmm0, xmm3 // B += Y paddw xmm0, xmm3 // B += Y
paddw xmm1, xmm3 // G += Y paddw xmm1, xmm3 // G += Y
paddw xmm2, xmm3 // R += Y paddw xmm2, xmm3 // R += Y
...@@ -976,8 +975,8 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, ...@@ -976,8 +975,8 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
pcmpeqb xmm5, xmm5 // generate mask 0xff000000 pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24 pslld xmm5, 24
pxor xmm4, xmm4 pxor xmm4, xmm4
movdqa xmm3, _kYSub16 movdqa xmm3, kYSub16
movdqa xmm2, _kYToRgb movdqa xmm2, kYToRgb
convertloop : convertloop :
// Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment