Commit 28427a53 authored by Frank Barchard's avatar Frank Barchard

I444ToABGR for android

Reimplements I444ToARGB as a matrix function.
new I444ToABGR as matrix functions with wrappers and any functions.
Allows for future J444 and H444 versions.
I444ToABGR user level function added.

BUG=libyuv:490, libyuv:449
R=harryjin@google.com

Review URL: https://codereview.chromium.org/1355733002 .
parent 158d4079
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1485
Version: 1486
License: BSD
License File: LICENSE
......
......@@ -60,6 +60,14 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I444 to ABGR.
LIBYUV_API
int I444ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert I411 to ARGB.
LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y,
......
......@@ -56,6 +56,26 @@ extern "C" {
#endif // clang >= 3.5
#endif // __clang__
// GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
#define GCC_HAS_AVX2 1
#endif // GNUC >= 4.7
#endif // __GNUC__
// clang >= 3.4.0 required for AVX2.
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
#define CLANG_HAS_AVX2 1
#endif // clang >= 3.4
#endif // __clang__
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && \
defined(_MSC_VER) && _MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
......@@ -163,6 +183,7 @@ extern "C" {
#endif
// The following are available on x64 Visual C and clangcl.
// TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
(!defined(__clang__) || defined(__SSSE3__))
#define HAS_I422TOARGBROW_SSSE3
......@@ -171,27 +192,17 @@ extern "C" {
#define HAS_I422TOABGRMATRIXROW_SSSE3
#endif
// GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
#define GCC_HAS_AVX2 1
#endif // GNUC >= 4.7
#endif // __GNUC__
// clang >= 3.4.0 required for AVX2.
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
#define CLANG_HAS_AVX2 1
#endif // clang >= 3.4
#endif // __clang__
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && \
defined(_MSC_VER) && _MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// The following are available for Visual C and clangcl 32 bit:
// TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_I444TOABGRROW_SSSE3
#define HAS_I444TOARGBMATRIXROW_SSSE3
#define HAS_I444TOABGRMATRIXROW_SSSE3
#endif
// The following are available for AVX2 Visual C and clangcl 32 bit:
// TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_ARGB1555TOARGBROW_AVX2
......@@ -206,12 +217,15 @@ extern "C" {
#define HAS_I422TOARGB4444ROW_AVX2
#define HAS_I422TORGB565ROW_AVX2
#define HAS_I444TOARGBROW_AVX2
#define HAS_I444TOABGRROW_AVX2
#define HAS_J400TOARGBROW_AVX2
#define HAS_NV12TOARGBROW_AVX2
#define HAS_NV12TORGB565ROW_AVX2
#define HAS_NV21TOARGBROW_AVX2
#define HAS_NV21TORGB565ROW_AVX2
#define HAS_RGB565TOARGBROW_AVX2
#define HAS_I444TOARGBMATRIXROW_AVX2
#define HAS_I444TOABGRMATRIXROW_AVX2
#endif
// The following are available on all x86 platforms, but
......@@ -1030,6 +1044,11 @@ void I444ToARGBRow_C(const uint8* src_y,
const uint8* src_v,
uint8* dst_argb,
int width);
void I444ToABGRRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1166,6 +1185,18 @@ void I422ToABGRRow_AVX2(const uint8* src_y,
const uint8* src_v,
uint8* dst_argb,
int width);
void I444ToARGBMatrixRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width);
void I444ToARGBMatrixRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width);
void I444ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1176,6 +1207,28 @@ void I444ToARGBRow_AVX2(const uint8* src_y,
const uint8* src_v,
uint8* dst_argb,
int width);
void I444ToABGRMatrixRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
struct YuvConstants* YuvConstants,
int width);
void I444ToABGRMatrixRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
struct YuvConstants* YuvConstants,
int width);
void I444ToABGRRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
int width);
void I444ToABGRRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
int width);
void I422ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1382,6 +1435,16 @@ void I444ToARGBRow_Any_AVX2(const uint8* src_y,
const uint8* src_v,
uint8* dst_argb,
int width);
void I444ToABGRRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
int width);
void I444ToABGRRow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
int width);
void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1485
#define LIBYUV_VERSION 1486
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -112,6 +112,74 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
return 0;
}
// Convert I444 to ABGR.
LIBYUV_API
int I444ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
void (*I444ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I444ToABGRRow_C;
if (!src_y || !src_u || !src_v ||
!dst_abgr ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
// Coalesce rows.
if (src_stride_y == width &&
src_stride_u == width &&
src_stride_v == width &&
dst_stride_abgr == width * 4) {
width *= height;
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
}
#if defined(HAS_I444TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I444ToABGRRow = I444ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I444ToABGRRow = I444ToABGRRow_SSSE3;
}
}
#endif
#if defined(HAS_I444TOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I444ToABGRRow = I444ToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I444ToABGRRow = I444ToABGRRow_AVX2;
}
}
#endif
#if defined(HAS_I444TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I444ToABGRRow = I444ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I444ToABGRRow = I444ToABGRRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I444ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert I422 to ARGB.
LIBYUV_API
int I422ToARGB(const uint8* src_y, int src_stride_y,
......
......@@ -62,6 +62,9 @@ ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
#endif // HAS_I444TOARGBROW_SSSE3
#ifdef HAS_I444TOABGRROW_SSSE3
ANY31(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7)
#endif
#ifdef HAS_I422TORGB24ROW_AVX2
ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
#endif
......@@ -95,6 +98,9 @@ ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
#ifdef HAS_I444TOARGBROW_AVX2
ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
#endif
#ifdef HAS_I444TOABGRROW_AVX2
ANY31(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15)
#endif
#ifdef HAS_I411TOARGBROW_AVX2
ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
#endif
......
......@@ -1149,6 +1149,30 @@ void I444ToARGBRow_C(const uint8* src_y,
rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
}
}
void I444ToABGRRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
YuvPixel(src_y[0], u, v, rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
rgb_buf[3] = 255;
YuvPixel(src_y[1], u, v, rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
rgb_buf[7] = 255;
src_y += 2;
src_u += 2;
src_v += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
}
}
#else
void I444ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
......@@ -1166,6 +1190,23 @@ void I444ToARGBRow_C(const uint8* src_y,
rgb_buf += 4; // Advance 1 pixel.
}
}
void I444ToABGRRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel(src_y[0], src_u[0], src_v[0],
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
rgb_buf[3] = 255;
src_y += 1;
src_u += 1;
src_v += 1;
rgb_buf += 4; // Advance 1 pixel.
}
}
#endif
// Also used for 420
......@@ -2319,6 +2360,19 @@ ANYYUV(I422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvConstants)
ANYYUV(J422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvJConstants)
ANYYUV(H422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvHConstants)
#endif
// TODO(fbarchard): Neon, J444, H444 versions.
#ifdef HAS_I444TOARGBMATRIXROW_SSSE3
ANYYUV(I444ToARGBRow_SSSE3, I444ToARGBMatrixRow_SSSE3, kYuvConstants)
#endif
#ifdef HAS_I444TOARGBMATRIXROW_AVX2
ANYYUV(I444ToARGBRow_AVX2, I444ToARGBMatrixRow_AVX2, kYuvConstants)
#endif
#ifdef HAS_I444TOABGRMATRIXROW_SSSE3
ANYYUV(I444ToABGRRow_SSSE3, I444ToABGRMatrixRow_SSSE3, kYuvConstants)
#endif
#ifdef HAS_I444TOABGRMATRIXROW_AVX2
ANYYUV(I444ToABGRRow_AVX2, I444ToABGRMatrixRow_AVX2, kYuvConstants)
#endif
// Maximum temporary width for wrappers to process at a time, in pixels.
#define MAXTWIDTH 2048
......
......@@ -2172,41 +2172,83 @@ void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
}
#endif // HAS_I422TOARGBMATRIXROW_AVX2
#ifdef HAS_I444TOARGBROW_AVX2
#ifdef HAS_I444TOARGBMATRIXROW_AVX2
// 16 pixels
// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked)
void I444ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
void I444ToARGBMatrixRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READYUV444_AVX2
YUVTORGB_AVX2(kYuvConstants)
YUVTORGB_AVX2(ebp)
STOREARGB_AVX2
sub ecx, 16
jg convertloop
pop ebp
pop edi
pop esi
vzeroupper
ret
}
}
#endif // HAS_I444TOARGBROW_AVX2
#endif // HAS_I444TOARGBMATRIXROW_AVX2
#ifdef HAS_I444TOABGRMATRIXROW_AVX2
// 16 pixels
// 16 UV values with 16 Y producing 16 ABGR (64 bytes).
__declspec(naked)
void I444ToABGRMatrixRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* YuvConstants,
int width) {
__asm {
push esi
push edi
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READYUV444_AVX2
YUVTORGB_AVX2(ebp)
STOREABGR_AVX2
sub ecx, 16
jg convertloop
pop ebp
pop edi
pop esi
vzeroupper
ret
}
}
#endif // HAS_I444TOABGRMATRIXROW_AVX2
#ifdef HAS_I411TOARGBROW_AVX2
// 16 pixels
......@@ -2608,30 +2650,71 @@ void I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
// 8 pixels.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked)
void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop:
READYUV444
YUVTORGB(kYuvConstants)
YUVTORGB(ebp)
STOREARGB
sub ecx, 8
jg convertloop
pop ebp
pop edi
pop esi
ret
}
}
// 8 pixels.
// 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes).
__declspec(naked)
void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* YuvConstants,
int width) {
__asm {
push esi
push edi
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop:
READYUV444
YUVTORGB(ebp)
STOREABGR
sub ecx, 8
jg convertloop
pop ebp
pop edi
pop esi
ret
......
......@@ -513,6 +513,7 @@ TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4)
TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1, 2, ARGB, 4)
TESTPLANARTOB(I411, 4, 1, ARGB, 4, 4, 1, 2, ARGB, 4)
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4)
TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1, 2, ARGB, 4)
TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1, 1, ARGB, 4)
TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1, 1, ARGB, 4)
TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1, 0, ARGB, 4)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment