Commit 5452cce4 authored by Frank Barchard's avatar Frank Barchard

port row to clangcl

BUG=libyuv:487
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/53799005.
parent fa7ce4af
......@@ -23,18 +23,26 @@ extern "C" {
// This module is for Visual C 32/64 bit and clangcl 32 bit
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
(defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
struct YuvConstants {
lvec8 kUVToB; // 0
lvec8 kUVToG; // 32
lvec8 kUVToR; // 64
lvec16 kUVBiasB; // 96
lvec16 kUVBiasG; // 128
lvec16 kUVBiasR; // 160
lvec16 kYToRgb; // 192
lvec8 kUVToB;
lvec8 kUVToG;
lvec8 kUVToR;
lvec16 kUVBiasB;
lvec16 kUVBiasG;
lvec16 kUVBiasR;
lvec16 kYToRgb;
};
#define KUVTOB 0
#define KUVTOG 32
#define KUVTOR 64
#define KUVBIASB 96
#define KUVBIASG 128
#define KUVBIASR 160
#define KYTORGB 192
// BT.601 YUV to RGB reference
// R = (Y - 16) * 1.164 - V * -1.596
// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
......@@ -389,7 +397,7 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
mov ecx, [esp + 12] // pix
pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24
movdqa xmm4, kShuffleMaskRGB24ToARGB
movdqa xmm4, xmmword ptr kShuffleMaskRGB24ToARGB
convertloop:
movdqu xmm0, [eax]
......@@ -428,7 +436,7 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
mov ecx, [esp + 12] // pix
pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24
movdqa xmm4, kShuffleMaskRAWToARGB
movdqa xmm4, xmmword ptr kShuffleMaskRAWToARGB
convertloop:
movdqu xmm0, [eax]
......@@ -759,7 +767,7 @@ void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_rgb
mov ecx, [esp + 12] // pix
movdqa xmm6, kShuffleMaskARGBToRGB24
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
convertloop:
movdqu xmm0, [eax] // fetch 16 pixels of argb
......@@ -797,7 +805,7 @@ void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_rgb
mov ecx, [esp + 12] // pix
movdqa xmm6, kShuffleMaskARGBToRAW
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW
convertloop:
movdqu xmm0, [eax] // fetch 16 pixels of argb
......@@ -1142,8 +1150,8 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */
movdqa xmm4, kARGBToY
movdqa xmm5, kAddY16
movdqa xmm4, xmmword ptr kARGBToY
movdqa xmm5, xmmword ptr kAddY16
convertloop:
movdqu xmm0, [eax]
......@@ -1177,8 +1185,8 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */
movdqa xmm4, kARGBToYJ
movdqa xmm5, kAddYJ64
movdqa xmm4, xmmword ptr kARGBToYJ
movdqa xmm5, xmmword ptr kAddYJ64
convertloop:
movdqu xmm0, [eax]
......@@ -1218,9 +1226,9 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */
vbroadcastf128 ymm4, kARGBToY
vbroadcastf128 ymm5, kAddY16
vmovdqu ymm6, kPermdARGBToY_AVX
vbroadcastf128 ymm4, xmmword ptr kARGBToY
vbroadcastf128 ymm5, xmmword ptr kAddY16
vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX
convertloop:
vmovdqu ymm0, [eax]
......@@ -1257,9 +1265,9 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */
vbroadcastf128 ymm4, kARGBToYJ
vbroadcastf128 ymm5, kAddYJ64
vmovdqu ymm6, kPermdARGBToY_AVX
vbroadcastf128 ymm4, xmmword ptr kARGBToYJ
vbroadcastf128 ymm5, xmmword ptr kAddYJ64
vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX
convertloop:
vmovdqu ymm0, [eax]
......@@ -1296,8 +1304,8 @@ void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */
movdqa xmm4, kBGRAToY
movdqa xmm5, kAddY16
movdqa xmm4, xmmword ptr kBGRAToY
movdqa xmm5, xmmword ptr kAddY16
convertloop:
movdqu xmm0, [eax]
......@@ -1329,8 +1337,8 @@ void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */
movdqa xmm4, kABGRToY
movdqa xmm5, kAddY16
movdqa xmm4, xmmword ptr kABGRToY
movdqa xmm5, xmmword ptr kAddY16
convertloop:
movdqu xmm0, [eax]
......@@ -1362,8 +1370,8 @@ void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */
movdqa xmm4, kRGBAToY
movdqa xmm5, kAddY16
movdqa xmm4, xmmword ptr kRGBAToY
movdqa xmm5, xmmword ptr kAddY16
convertloop:
movdqu xmm0, [eax]
......@@ -1400,9 +1408,9 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix
movdqa xmm5, kAddUV128
movdqa xmm6, kARGBToV
movdqa xmm7, kARGBToU
movdqa xmm5, xmmword ptr kAddUV128
movdqa xmm6, xmmword ptr kARGBToV
movdqa xmm7, xmmword ptr kARGBToU
sub edi, edx // stride from u to v
convertloop:
......@@ -1470,9 +1478,9 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix
movdqa xmm5, kAddUVJ128
movdqa xmm6, kARGBToVJ
movdqa xmm7, kARGBToUJ
movdqa xmm5, xmmword ptr kAddUVJ128
movdqa xmm6, xmmword ptr kARGBToVJ
movdqa xmm7, xmmword ptr kARGBToUJ
sub edi, edx // stride from u to v
convertloop:
......@@ -1542,9 +1550,9 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix
vbroadcastf128 ymm5, kAddUV128
vbroadcastf128 ymm6, kARGBToV
vbroadcastf128 ymm7, kARGBToU
vbroadcastf128 ymm5, xmmword ptr kAddUV128
vbroadcastf128 ymm6, xmmword ptr kARGBToV
vbroadcastf128 ymm7, xmmword ptr kARGBToU
sub edi, edx // stride from u to v
convertloop:
......@@ -1578,7 +1586,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
vpsraw ymm0, ymm0, 8
vpacksswb ymm0, ymm1, ymm0 // mutates
vpermq ymm0, ymm0, 0xd8 // For vpacksswb
vpshufb ymm0, ymm0, kShufARGBToUV_AVX // For vshufps + vphaddw
vpshufb ymm0, ymm0, ymmword ptr kShufARGBToUV_AVX // for vshufps/vphaddw
vpaddb ymm0, ymm0, ymm5 // -> unsigned
// step 3 - store 16 U and 16 V values
......@@ -1605,9 +1613,9 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
movdqa xmm5, kAddUV128
movdqa xmm6, kARGBToV
movdqa xmm7, kARGBToU
movdqa xmm5, xmmword ptr kAddUV128
movdqa xmm6, xmmword ptr kARGBToV
movdqa xmm7, xmmword ptr kARGBToU
sub edi, edx // stride from u to v
convertloop:
......@@ -1662,9 +1670,9 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
movdqa xmm5, kAddUV128
movdqa xmm6, kARGBToV
movdqa xmm7, kARGBToU
movdqa xmm5, xmmword ptr kAddUV128
movdqa xmm6, xmmword ptr kARGBToV
movdqa xmm7, xmmword ptr kARGBToU
sub edi, edx // stride from u to v
convertloop:
......@@ -1722,9 +1730,9 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix
movdqa xmm5, kAddUV128
movdqa xmm6, kBGRAToV
movdqa xmm7, kBGRAToU
movdqa xmm5, xmmword ptr kAddUV128
movdqa xmm6, xmmword ptr kBGRAToV
movdqa xmm7, xmmword ptr kBGRAToU
sub edi, edx // stride from u to v
convertloop:
......@@ -1792,9 +1800,9 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix
movdqa xmm5, kAddUV128
movdqa xmm6, kABGRToV
movdqa xmm7, kABGRToU
movdqa xmm5, xmmword ptr kAddUV128
movdqa xmm6, xmmword ptr kABGRToV
movdqa xmm7, xmmword ptr kABGRToU
sub edi, edx // stride from u to v
convertloop:
......@@ -1862,9 +1870,9 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix
movdqa xmm5, kAddUV128
movdqa xmm6, kRGBAToV
movdqa xmm7, kRGBAToU
movdqa xmm5, xmmword ptr kAddUV128
movdqa xmm6, xmmword ptr kRGBAToV
movdqa xmm7, xmmword ptr kRGBAToU
sub edi, edx // stride from u to v
convertloop:
......@@ -1964,21 +1972,21 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
// Convert 16 pixels: 16 UV and 16 Y.
#define YUVTORGB_AVX2(YuvConstants) __asm { \
/* Step 1: Find 8 UV contributions to 16 R,G,B values */ \
__asm vpmaddubsw ymm2, ymm0, YuvConstants.kUVToR /* scale R UV */ \
__asm vpmaddubsw ymm1, ymm0, YuvConstants.kUVToG /* scale G UV */ \
__asm vpmaddubsw ymm0, ymm0, YuvConstants.kUVToB /* scale B UV */ \
__asm vmovdqu ymm3, YuvConstants.kUVBiasR \
__asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\
__asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\
__asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\
__asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \
__asm vpsubw ymm2, ymm3, ymm2 \
__asm vmovdqu ymm3, YuvConstants.kUVBiasG \
__asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \
__asm vpsubw ymm1, ymm3, ymm1 \
__asm vmovdqu ymm3, YuvConstants.kUVBiasB \
__asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \
__asm vpsubw ymm0, ymm3, ymm0 \
/* Step 2: Find Y contribution to 16 R,G,B values */ \
__asm vmovdqu xmm3, [eax] /* NOLINT */ \
__asm lea eax, [eax + 16] \
__asm vpermq ymm3, ymm3, 0xd8 \
__asm vpunpcklbw ymm3, ymm3, ymm3 \
__asm vpmulhuw ymm3, ymm3, YuvConstants.kYToRgb \
__asm vpmulhuw ymm3, ymm3, ymmword ptr [YuvConstants + KYTORGB] \
__asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \
__asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \
__asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \
......@@ -2393,20 +2401,20 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
__asm movdqa xmm1, xmm0 \
__asm movdqa xmm2, xmm0 \
__asm movdqa xmm3, xmm0 \
__asm movdqa xmm0, YuvConstants.kUVBiasB /* unbias back to signed */ \
__asm pmaddubsw xmm1, YuvConstants.kUVToB /* scale B UV */ \
__asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \
__asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \
__asm psubw xmm0, xmm1 \
__asm movdqa xmm1, YuvConstants.kUVBiasG \
__asm pmaddubsw xmm2, YuvConstants.kUVToG /* scale G UV */ \
__asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \
__asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \
__asm psubw xmm1, xmm2 \
__asm movdqa xmm2, YuvConstants.kUVBiasR \
__asm pmaddubsw xmm3, YuvConstants.kUVToR /* scale R UV */ \
__asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \
__asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \
__asm psubw xmm2, xmm3 \
/* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
__asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm3 \
__asm pmulhuw xmm3, YuvConstants.kYToRgb \
__asm pmulhuw xmm3, xmmword ptr [YuvConstants + KYTORGB] \
__asm paddsw xmm0, xmm3 /* B += Y */ \
__asm paddsw xmm1, xmm3 /* G += Y */ \
__asm paddsw xmm2, xmm3 /* R += Y */ \
......@@ -2592,8 +2600,8 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
mov edx, [esp + 8 + 16] // rgb24
mov ecx, [esp + 8 + 20] // width
sub edi, esi
movdqa xmm5, kShuffleMaskARGBToRGB24_0
movdqa xmm6, kShuffleMaskARGBToRGB24
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
convertloop:
READYUV422
......@@ -2626,8 +2634,8 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
mov edx, [esp + 8 + 16] // raw
mov ecx, [esp + 8 + 20] // width
sub edi, esi
movdqa xmm5, kShuffleMaskARGBToRAW_0
movdqa xmm6, kShuffleMaskARGBToRAW
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW
convertloop:
READYUV422
......@@ -3045,7 +3053,7 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width
movdqa xmm5, kShuffleMirror
movdqa xmm5, xmmword ptr kShuffleMirror
convertloop:
movdqu xmm0, [eax - 16 + ecx]
......@@ -3066,7 +3074,7 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width
vbroadcastf128 ymm5, kShuffleMirror
vbroadcastf128 ymm5, xmmword ptr kShuffleMirror
convertloop:
vmovdqu ymm0, [eax - 32 + ecx]
......@@ -3123,7 +3131,7 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // width
movdqa xmm1, kShuffleMirrorUV
movdqa xmm1, xmmword ptr kShuffleMirrorUV
lea eax, [eax + ecx * 2 - 16]
sub edi, edx
......@@ -3177,7 +3185,7 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width
vmovdqu ymm5, kARGBShuffleMirror_AVX2
vmovdqu ymm5, ymmword ptr kARGBShuffleMirror_AVX2
convertloop:
vpermd ymm0, ymm5, [eax - 32 + ecx * 4] // permute dword order
......@@ -4133,7 +4141,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
movdqa xmm0, xmm3 // src argb
pxor xmm3, xmm4 // ~alpha
movdqu xmm2, [esi] // _r_b
pshufb xmm3, kShuffleAlpha // alpha
pshufb xmm3, xmmword ptr kShuffleAlpha // alpha
pand xmm2, xmm6 // _r_b
paddw xmm3, xmm7 // 256 - alpha
pmullw xmm2, xmm3 // _r_b * alpha
......@@ -4162,7 +4170,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
movdqa xmm0, xmm3 // src argb
pxor xmm3, xmm4 // ~alpha
movd xmm2, [esi] // _r_b
pshufb xmm3, kShuffleAlpha // alpha
pshufb xmm3, xmmword ptr kShuffleAlpha // alpha
pand xmm2, xmm6 // _r_b
paddw xmm3, xmm7 // 256 - alpha
pmullw xmm2, xmm3 // _r_b * alpha
......@@ -4246,8 +4254,8 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
mov ecx, [esp + 12] // width
pcmpeqb xmm3, xmm3 // generate mask 0xff000000
pslld xmm3, 24
movdqa xmm4, kShuffleAlpha0
movdqa xmm5, kShuffleAlpha1
movdqa xmm4, xmmword ptr kShuffleAlpha0
movdqa xmm5, xmmword ptr kShuffleAlpha1
convertloop:
movdqu xmm0, [eax] // read 4 pixels
......@@ -4289,7 +4297,7 @@ void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width
sub edx, eax
vbroadcastf128 ymm4,kShuffleAlpha_AVX2
vbroadcastf128 ymm4, xmmword ptr kShuffleAlpha_AVX2
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
vpslld ymm5, ymm5, 24
......@@ -4381,7 +4389,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width
sub edx, eax
vbroadcastf128 ymm4, kUnattenShuffleAlpha_AVX2
vbroadcastf128 ymm4, xmmword ptr kUnattenShuffleAlpha_AVX2
convertloop:
vmovdqu ymm6, [eax] // read 8 pixels.
......@@ -4416,7 +4424,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width
sub edx, eax
vbroadcastf128 ymm5, kUnattenShuffleAlpha_AVX2
vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2
push esi
push edi
......@@ -4480,8 +4488,8 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_argb */
mov ecx, [esp + 12] /* width */
movdqa xmm4, kARGBToYJ
movdqa xmm5, kAddYJ64
movdqa xmm4, xmmword ptr kARGBToYJ
movdqa xmm5, xmmword ptr kAddYJ64
convertloop:
movdqu xmm0, [eax] // G
......@@ -4538,9 +4546,9 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] /* dst_argb */
mov ecx, [esp + 8] /* width */
movdqa xmm2, kARGBToSepiaB
movdqa xmm3, kARGBToSepiaG
movdqa xmm4, kARGBToSepiaR
movdqa xmm2, xmmword ptr kARGBToSepiaB
movdqa xmm3, xmmword ptr kARGBToSepiaG
movdqa xmm4, xmmword ptr kARGBToSepiaR
convertloop:
movdqu xmm0, [eax] // B
......@@ -6245,7 +6253,7 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
// 4 pixel loop.
convertloop:
movdqu xmm0, qword ptr [eax] // generate luma ptr
movdqu xmm0, xmmword ptr [eax] // generate luma ptr
pmaddubsw xmm0, xmm3
phaddw xmm0, xmm0
pand xmm0, xmm4 // mask out low bits
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment