Commit ba1f5269 authored by fbarchard@google.com's avatar fbarchard@google.com

rewrite of I420ToRGB565 etc using row functions

BUG=none
TEST=media_unittest
Review URL: http://webrtc-codereview.appspot.com/345004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@131 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 20005e64
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 130
Version: 131
License: BSD
License File: LICENSE
......
......@@ -30,7 +30,6 @@
'include/libyuv/video_common.h',
# headers
'source/conversion_tables.h',
'source/rotate_priv.h',
'source/row.h',
......
This diff is collapsed.
This diff is collapsed.
......@@ -1621,6 +1621,217 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
return 0;
}
// Convert I420 to RGB565.
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToRGB565Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTORGB565ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_SSSE3;
} else
#endif
{
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToRGB565Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ARGB1555.
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToARGB1555Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTOARGB1555ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTOARGB1555ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_SSSE3;
} else
#endif
{
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToARGB1555Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ARGB4444.
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToARGB4444Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTOARGB4444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTOARGB4444ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_SSSE3;
} else
#endif
{
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToARGB4444Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RGB24.
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToRGB24Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_SSSE3;
} else
#endif
{
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToRGB24Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RAW.
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToRAWRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTORAWROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_SSSE3;
} else
#endif
{
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToRAWRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I422 to ARGB.
int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
......@@ -1875,31 +2086,31 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
return 0;
}
// Convert BG24 to ARGB.
int BG24ToARGB(const uint8* src_bg24, int src_stride_bg24,
// Convert RGB24 to ARGB.
int BG24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (height < 0) {
height = -height;
src_bg24 = src_bg24 + (height - 1) * src_stride_bg24;
src_stride_bg24 = -src_stride_bg24;
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
void (*BG24ToARGBRow)(const uint8* src_bg24, uint8* dst_argb, int pix);
#if defined(HAS_BG24TOARGBROW_SSSE3)
void (*RGB24ToARGBRow)(const uint8* src_rgb24, uint8* dst_argb, int pix);
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_bg24, 16) && IS_ALIGNED(src_stride_bg24, 16) &&
IS_ALIGNED(src_rgb24, 16) && IS_ALIGNED(src_stride_rgb24, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
BG24ToARGBRow = BG24ToARGBRow_SSSE3;
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
} else
#endif
{
BG24ToARGBRow = BG24ToARGBRow_C;
RGB24ToARGBRow = RGB24ToARGBRow_C;
}
for (int y = 0; y < height; ++y) {
BG24ToARGBRow(src_bg24, dst_argb, width);
src_bg24 += src_stride_bg24;
RGB24ToARGBRow(src_rgb24, dst_argb, width);
src_rgb24 += src_stride_rgb24;
dst_argb += dst_stride_argb;
}
return 0;
......
......@@ -25,7 +25,7 @@
!defined(YUV_DISABLE_ASM)
#define HAS_ABGRTOARGBROW_SSSE3
#define HAS_BGRATOARGBROW_SSSE3
#define HAS_BG24TOARGBROW_SSSE3
#define HAS_RGB24TOARGBROW_SSSE3
#define HAS_RAWTOARGBROW_SSSE3
#define HAS_RGB24TOYROW_SSSE3
#define HAS_RAWTOYROW_SSSE3
......@@ -48,6 +48,11 @@
#define HAS_FASTCONVERTYUVTOARGBROW_SSSE3
#define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3
#define HAS_FASTCONVERTYUVTOABGRROW_SSSE3
#define HAS_FASTCONVERTYUVTORGB565ROW_SSSE3
#define HAS_FASTCONVERTYUVTOARGB1555ROW_SSSE3
#define HAS_FASTCONVERTYUVTOARGB4444ROW_SSSE3
#define HAS_FASTCONVERTYUVTORGB24ROW_SSSE3
#define HAS_FASTCONVERTYUVTORAWROW_SSSE3
#define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3
#define HAS_REVERSE_ROW_SSSE3
#define HAS_REVERSE_ROW_SSE2
......@@ -64,6 +69,11 @@
#define HAS_FASTCONVERTYUVTOARGBROW_NEON
#define HAS_FASTCONVERTYUVTOBGRAROW_NEON
#define HAS_FASTCONVERTYUVTOABGRROW_NEON
#define HAS_FASTCONVERTYUVTORGB565ROW_NEON
#define HAS_FASTCONVERTYUVTOARGB1555ROW_NEON
#define HAS_FASTCONVERTYUVTOARGB4444ROW_NEON
#define HAS_FASTCONVERTYUVTORGB24ROW_NEON
#define HAS_FASTCONVERTYUVTORAWROW_NEON
#endif
#ifdef __cplusplus
......@@ -92,6 +102,41 @@ void FastConvertYUVToABGRRow_NEON(const uint8* y_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTORGB565ROW_NEON
void FastConvertYUVToRGB565Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTOARGB1555ROW_NEON
void FastConvertYUVToARGB1555Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTOARGB4444ROW_NEON
void FastConvertYUVToARGB4444Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTORGB24ROW_NEON
void FastConvertYUVToRGB24Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTORAWROW_NEON
void FastConvertYUVToRAWRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_ARGBTOYROW_SSSE3
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
......@@ -104,7 +149,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#endif
#if defined(HAS_BG24TOARGBROW_SSSE3) && defined(HAS_ARGBTOYROW_SSSE3)
#if defined(HAS_RGB24TOARGBROW_SSSE3) && defined(HAS_ARGBTOYROW_SSSE3)
#define HASRGB24TOYROW_SSSE3
#endif
#ifdef HASRGB24TOYROW_SSSE3
......@@ -163,11 +208,11 @@ void ARGB1555ToUVRow_C(const uint8* src_argb0, int src_stride_argb,
void ARGB4444ToUVRow_C(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#ifdef HAS_BG24TOARGBROW_SSSE3
#ifdef HAS_RGB24TOARGBROW_SSSE3
void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix);
void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix);
void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix);
void RAWToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix);
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
void RAWToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
// TODO(fbarchard): SSE2 565 etc
//void RGB565ToARGBRow_SSE2(const uint8* src_rgb, uint8* dst_argb, int pix);
//void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
......@@ -177,8 +222,8 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
#endif
void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix);
void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int pix);
void BG24ToARGBRow_C(const uint8* src_bg24, uint8* dst_argb, int pix);
void RAWToARGBRow_C(const uint8* src_bg24, uint8* dst_argb, int pix);
void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
void RAWToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
......@@ -222,6 +267,36 @@ void FastConvertYUVToABGRRow_C(const uint8* y_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRGB565Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB1555Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB4444Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRGB24Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRAWRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUV444ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -293,6 +368,35 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRGB565Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB1555Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB4444Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRGB24Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRAWRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYTOARGBROW_SSE2
......
This diff is collapsed.
......@@ -49,8 +49,8 @@ CONST uvec8 kAddY16 = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
};
// Shuffle table for converting BG24 to ARGB.
CONST uvec8 kShuffleMaskBG24ToARGB = {
// Shuffle table for converting RGB24 to ARGB.
CONST uvec8 kShuffleMaskRGB24ToARGB = {
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
};
......@@ -143,7 +143,7 @@ void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
);
}
void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) {
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
"pslld $0x18,%%xmm5 \n"
......@@ -172,10 +172,10 @@ void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) {
"lea 0x40(%1),%1 \n"
"sub $0x10,%2 \n"
"ja 1b \n"
: "+r"(src_bg24), // %0
: "+r"(src_rgb24), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
: "m"(kShuffleMaskBG24ToARGB) // %3
: "m"(kShuffleMaskRGB24ToARGB) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
......
......@@ -65,8 +65,8 @@ static const uvec8 kAddUV128 = {
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
};
// Shuffle table for converting BG24 to ARGB.
static const uvec8 kShuffleMaskBG24ToARGB = {
// Shuffle table for converting RGB24 to ARGB.
static const uvec8 kShuffleMaskRGB24ToARGB = {
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
};
......@@ -153,14 +153,14 @@ __asm {
}
__declspec(naked)
void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) {
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
__asm {
mov eax, [esp + 4] // src_bg24
mov eax, [esp + 4] // src_rgb24
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // pix
pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24
movdqa xmm4, kShuffleMaskBG24ToARGB
movdqa xmm4, kShuffleMaskRGB24ToARGB
convertloop:
movdqa xmm0, [eax]
......@@ -229,6 +229,7 @@ __asm {
}
}
// TODO(fbarchard): Port ARGB4444ToARGBRow_SSE2 to gcc
__declspec(naked)
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
int pix) {
......@@ -243,8 +244,8 @@ __asm {
mov ecx, [esp + 12] // pix
convertloop:
movq xmm0, qword ptr [eax] // fetch 4 pixels of bgra4444
lea eax, [eax + 8]
movdqa xmm0, qword ptr [eax] // fetch 8 pixels of bgra4444
lea eax, [eax + 16]
movdqa xmm2, xmm0
pand xmm0, xmm4 // mask low nibbles
pand xmm2, xmm5 // mask high nibbles
......@@ -254,10 +255,13 @@ __asm {
psrlw xmm3, 4
por xmm0, xmm1
por xmm2, xmm3
movdqa xmm1, xmm0
punpcklbw xmm0, xmm2
punpckhbw xmm1, xmm2
movdqa [edx], xmm0 // store 4 pixels of ARGB
lea edx, [edx + 16]
sub ecx, 4
movdqa [edx + 16], xmm1 // store next 4 pixels of ARGB
lea edx, [edx + 32]
sub ecx, 8
ja convertloop
ret
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment