Commit ba1f5269 authored by fbarchard@google.com's avatar fbarchard@google.com

rewrite of I420ToRGB565 etc using row functions

BUG=none
TEST=media_unittest
Review URL: http://webrtc-codereview.appspot.com/345004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@131 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 20005e64
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 130 Version: 131
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -30,7 +30,6 @@ ...@@ -30,7 +30,6 @@
'include/libyuv/video_common.h', 'include/libyuv/video_common.h',
# headers # headers
'source/conversion_tables.h',
'source/rotate_priv.h', 'source/rotate_priv.h',
'source/row.h', 'source/row.h',
......
This diff is collapsed.
This diff is collapsed.
...@@ -1621,6 +1621,217 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, ...@@ -1621,6 +1621,217 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
return 0; return 0;
} }
// Convert I420 to RGB565.
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToRGB565Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTORGB565ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_SSSE3;
} else
#endif
{
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToRGB565Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ARGB1555.
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToARGB1555Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTOARGB1555ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTOARGB1555ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_SSSE3;
} else
#endif
{
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToARGB1555Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ARGB4444.
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToARGB4444Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTOARGB4444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTOARGB4444ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_SSSE3;
} else
#endif
{
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToARGB4444Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RGB24.
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToRGB24Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_SSSE3;
} else
#endif
{
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToRGB24Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RAW.
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToRAWRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTORAWROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_SSSE3;
} else
#endif
{
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToRAWRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I422 to ARGB. // Convert I422 to ARGB.
int I422ToARGB(const uint8* src_y, int src_stride_y, int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
...@@ -1875,31 +2086,31 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw, ...@@ -1875,31 +2086,31 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
return 0; return 0;
} }
// Convert BG24 to ARGB. // Convert RGB24 to ARGB.
int BG24ToARGB(const uint8* src_bg24, int src_stride_bg24, int BG24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height) { int width, int height) {
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_bg24 = src_bg24 + (height - 1) * src_stride_bg24; src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_bg24 = -src_stride_bg24; src_stride_rgb24 = -src_stride_rgb24;
} }
void (*BG24ToARGBRow)(const uint8* src_bg24, uint8* dst_argb, int pix); void (*RGB24ToARGBRow)(const uint8* src_rgb24, uint8* dst_argb, int pix);
#if defined(HAS_BG24TOARGBROW_SSSE3) #if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_bg24, 16) && IS_ALIGNED(src_stride_bg24, 16) && IS_ALIGNED(src_rgb24, 16) && IS_ALIGNED(src_stride_rgb24, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
BG24ToARGBRow = BG24ToARGBRow_SSSE3; RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
} else } else
#endif #endif
{ {
BG24ToARGBRow = BG24ToARGBRow_C; RGB24ToARGBRow = RGB24ToARGBRow_C;
} }
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
BG24ToARGBRow(src_bg24, dst_argb, width); RGB24ToARGBRow(src_rgb24, dst_argb, width);
src_bg24 += src_stride_bg24; src_rgb24 += src_stride_rgb24;
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
} }
return 0; return 0;
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
!defined(YUV_DISABLE_ASM) !defined(YUV_DISABLE_ASM)
#define HAS_ABGRTOARGBROW_SSSE3 #define HAS_ABGRTOARGBROW_SSSE3
#define HAS_BGRATOARGBROW_SSSE3 #define HAS_BGRATOARGBROW_SSSE3
#define HAS_BG24TOARGBROW_SSSE3 #define HAS_RGB24TOARGBROW_SSSE3
#define HAS_RAWTOARGBROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3
#define HAS_RGB24TOYROW_SSSE3 #define HAS_RGB24TOYROW_SSSE3
#define HAS_RAWTOYROW_SSSE3 #define HAS_RAWTOYROW_SSSE3
...@@ -48,6 +48,11 @@ ...@@ -48,6 +48,11 @@
#define HAS_FASTCONVERTYUVTOARGBROW_SSSE3 #define HAS_FASTCONVERTYUVTOARGBROW_SSSE3
#define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3 #define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3
#define HAS_FASTCONVERTYUVTOABGRROW_SSSE3 #define HAS_FASTCONVERTYUVTOABGRROW_SSSE3
#define HAS_FASTCONVERTYUVTORGB565ROW_SSSE3
#define HAS_FASTCONVERTYUVTOARGB1555ROW_SSSE3
#define HAS_FASTCONVERTYUVTOARGB4444ROW_SSSE3
#define HAS_FASTCONVERTYUVTORGB24ROW_SSSE3
#define HAS_FASTCONVERTYUVTORAWROW_SSSE3
#define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3 #define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3
#define HAS_REVERSE_ROW_SSSE3 #define HAS_REVERSE_ROW_SSSE3
#define HAS_REVERSE_ROW_SSE2 #define HAS_REVERSE_ROW_SSE2
...@@ -64,6 +69,11 @@ ...@@ -64,6 +69,11 @@
#define HAS_FASTCONVERTYUVTOARGBROW_NEON #define HAS_FASTCONVERTYUVTOARGBROW_NEON
#define HAS_FASTCONVERTYUVTOBGRAROW_NEON #define HAS_FASTCONVERTYUVTOBGRAROW_NEON
#define HAS_FASTCONVERTYUVTOABGRROW_NEON #define HAS_FASTCONVERTYUVTOABGRROW_NEON
#define HAS_FASTCONVERTYUVTORGB565ROW_NEON
#define HAS_FASTCONVERTYUVTOARGB1555ROW_NEON
#define HAS_FASTCONVERTYUVTOARGB4444ROW_NEON
#define HAS_FASTCONVERTYUVTORGB24ROW_NEON
#define HAS_FASTCONVERTYUVTORAWROW_NEON
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
...@@ -92,6 +102,41 @@ void FastConvertYUVToABGRRow_NEON(const uint8* y_buf, ...@@ -92,6 +102,41 @@ void FastConvertYUVToABGRRow_NEON(const uint8* y_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#endif #endif
#ifdef HAS_FASTCONVERTYUVTORGB565ROW_NEON
void FastConvertYUVToRGB565Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTOARGB1555ROW_NEON
void FastConvertYUVToARGB1555Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTOARGB4444ROW_NEON
void FastConvertYUVToARGB4444Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTORGB24ROW_NEON
void FastConvertYUVToRGB24Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTORAWROW_NEON
void FastConvertYUVToRAWRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
...@@ -104,7 +149,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -104,7 +149,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#endif #endif
#if defined(HAS_BG24TOARGBROW_SSSE3) && defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_RGB24TOARGBROW_SSSE3) && defined(HAS_ARGBTOYROW_SSSE3)
#define HASRGB24TOYROW_SSSE3 #define HASRGB24TOYROW_SSSE3
#endif #endif
#ifdef HASRGB24TOYROW_SSSE3 #ifdef HASRGB24TOYROW_SSSE3
...@@ -163,11 +208,11 @@ void ARGB1555ToUVRow_C(const uint8* src_argb0, int src_stride_argb, ...@@ -163,11 +208,11 @@ void ARGB1555ToUVRow_C(const uint8* src_argb0, int src_stride_argb,
void ARGB4444ToUVRow_C(const uint8* src_argb0, int src_stride_argb, void ARGB4444ToUVRow_C(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#ifdef HAS_BG24TOARGBROW_SSSE3 #ifdef HAS_RGB24TOARGBROW_SSSE3
void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix); void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix);
void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix); void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix);
void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix); void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
void RAWToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix); void RAWToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
// TODO(fbarchard): SSE2 565 etc // TODO(fbarchard): SSE2 565 etc
//void RGB565ToARGBRow_SSE2(const uint8* src_rgb, uint8* dst_argb, int pix); //void RGB565ToARGBRow_SSE2(const uint8* src_rgb, uint8* dst_argb, int pix);
//void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); //void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
...@@ -177,8 +222,8 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); ...@@ -177,8 +222,8 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
#endif #endif
void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix); void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix);
void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int pix); void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int pix);
void BG24ToARGBRow_C(const uint8* src_bg24, uint8* dst_argb, int pix); void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
void RAWToARGBRow_C(const uint8* src_bg24, uint8* dst_argb, int pix); void RAWToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix); void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
...@@ -222,6 +267,36 @@ void FastConvertYUVToABGRRow_C(const uint8* y_buf, ...@@ -222,6 +267,36 @@ void FastConvertYUVToABGRRow_C(const uint8* y_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void FastConvertYUVToRGB565Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB1555Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB4444Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRGB24Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRAWRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUV444ToARGBRow_C(const uint8* y_buf, void FastConvertYUV444ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
...@@ -293,6 +368,35 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -293,6 +368,35 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void FastConvertYUVToRGB565Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB1555Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB4444Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRGB24Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRAWRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif #endif
#ifdef HAS_FASTCONVERTYTOARGBROW_SSE2 #ifdef HAS_FASTCONVERTYTOARGBROW_SSE2
......
This diff is collapsed.
...@@ -49,8 +49,8 @@ CONST uvec8 kAddY16 = { ...@@ -49,8 +49,8 @@ CONST uvec8 kAddY16 = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
}; };
// Shuffle table for converting BG24 to ARGB. // Shuffle table for converting RGB24 to ARGB.
CONST uvec8 kShuffleMaskBG24ToARGB = { CONST uvec8 kShuffleMaskRGB24ToARGB = {
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
}; };
...@@ -143,7 +143,7 @@ void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) { ...@@ -143,7 +143,7 @@ void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
); );
} }
void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) { void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
asm volatile ( asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
"pslld $0x18,%%xmm5 \n" "pslld $0x18,%%xmm5 \n"
...@@ -172,10 +172,10 @@ void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) { ...@@ -172,10 +172,10 @@ void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) {
"lea 0x40(%1),%1 \n" "lea 0x40(%1),%1 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
"ja 1b \n" "ja 1b \n"
: "+r"(src_bg24), // %0 : "+r"(src_rgb24), // %0
"+r"(dst_argb), // %1 "+r"(dst_argb), // %1
"+r"(pix) // %2 "+r"(pix) // %2
: "m"(kShuffleMaskBG24ToARGB) // %3 : "m"(kShuffleMaskRGB24ToARGB) // %3
: "memory", "cc" : "memory", "cc"
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
......
...@@ -65,8 +65,8 @@ static const uvec8 kAddUV128 = { ...@@ -65,8 +65,8 @@ static const uvec8 kAddUV128 = {
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
}; };
// Shuffle table for converting BG24 to ARGB. // Shuffle table for converting RGB24 to ARGB.
static const uvec8 kShuffleMaskBG24ToARGB = { static const uvec8 kShuffleMaskRGB24ToARGB = {
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
}; };
...@@ -153,14 +153,14 @@ __asm { ...@@ -153,14 +153,14 @@ __asm {
} }
__declspec(naked) __declspec(naked)
void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) { void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
__asm { __asm {
mov eax, [esp + 4] // src_bg24 mov eax, [esp + 4] // src_rgb24
mov edx, [esp + 8] // dst_argb mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // pix mov ecx, [esp + 12] // pix
pcmpeqb xmm5, xmm5 // generate mask 0xff000000 pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24 pslld xmm5, 24
movdqa xmm4, kShuffleMaskBG24ToARGB movdqa xmm4, kShuffleMaskRGB24ToARGB
convertloop: convertloop:
movdqa xmm0, [eax] movdqa xmm0, [eax]
...@@ -229,6 +229,7 @@ __asm { ...@@ -229,6 +229,7 @@ __asm {
} }
} }
// TODO(fbarchard): Port ARGB4444ToARGBRow_SSE2 to gcc
__declspec(naked) __declspec(naked)
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
int pix) { int pix) {
...@@ -243,8 +244,8 @@ __asm { ...@@ -243,8 +244,8 @@ __asm {
mov ecx, [esp + 12] // pix mov ecx, [esp + 12] // pix
convertloop: convertloop:
movq xmm0, qword ptr [eax] // fetch 4 pixels of bgra4444 movdqa xmm0, qword ptr [eax] // fetch 8 pixels of bgra4444
lea eax, [eax + 8] lea eax, [eax + 16]
movdqa xmm2, xmm0 movdqa xmm2, xmm0
pand xmm0, xmm4 // mask low nibbles pand xmm0, xmm4 // mask low nibbles
pand xmm2, xmm5 // mask high nibbles pand xmm2, xmm5 // mask high nibbles
...@@ -254,10 +255,13 @@ __asm { ...@@ -254,10 +255,13 @@ __asm {
psrlw xmm3, 4 psrlw xmm3, 4
por xmm0, xmm1 por xmm0, xmm1
por xmm2, xmm3 por xmm2, xmm3
movdqa xmm1, xmm0
punpcklbw xmm0, xmm2 punpcklbw xmm0, xmm2
punpckhbw xmm1, xmm2
movdqa [edx], xmm0 // store 4 pixels of ARGB movdqa [edx], xmm0 // store 4 pixels of ARGB
lea edx, [edx + 16] movdqa [edx + 16], xmm1 // store next 4 pixels of ARGB
sub ecx, 4 lea edx, [edx + 32]
sub ecx, 8
ja convertloop ja convertloop
ret ret
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment