Commit 9b63884a authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

Add ABGRToNV21 and ABGRToNV12

Fix ARGBToUVJRow_AVX2 constants for win32

BUG=libyuv:833, libyuv:839

Change-Id: Id4731a573d40d7a9b46fcc31c2fee295483e1ff6
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1739509
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: 's avatarHirokazu Honda <hiroh@chromium.org>
parent a57b724f
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1734
Version: 1735
License: BSD
License File: LICENSE
......
......@@ -250,6 +250,28 @@ int ARGBToNV21(const uint8_t* src_argb,
int width,
int height);
// Convert ABGR To NV12.
LIBYUV_API
int ABGRToNV12(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert ABGR To NV21.
LIBYUV_API
int ABGRToNV21(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Convert ARGB To NV21.
LIBYUV_API
int ARGBToNV21(const uint8_t* src_argb,
......
......@@ -3375,11 +3375,11 @@ void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr,
int width);
void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_Any_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_Any_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
void AYUVToUVRow_C(const uint8_t* src_ayuv,
int stride_ayuv,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1734
#define LIBYUV_VERSION 1735
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -572,6 +572,326 @@ int ARGBToNV21(const uint8_t* src_argb,
return 0;
}
LIBYUV_API
int ABGRToNV12(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ABGRToUVRow_C;
void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) =
ABGRToYRow_C;
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;
if (!src_abgr || !dst_y || !dst_uv || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
ABGRToYRow = ABGRToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_SSSE3;
ABGRToYRow = ABGRToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOYROW_AVX2) && defined(HAS_ABGRTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToUVRow = ABGRToUVRow_Any_AVX2;
ABGRToYRow = ABGRToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ABGRToUVRow = ABGRToUVRow_AVX2;
ABGRToYRow = ABGRToYRow_AVX2;
}
}
#endif
#if defined(HAS_ABGRTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToYRow = ABGRToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToUVRow = ABGRToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToYRow = ABGRToYRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ABGRToYRow = ABGRToYRow_MSA;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToUVRow = ABGRToUVRow_Any_MSA;
if (IS_ALIGNED(width, 32)) {
ABGRToUVRow = ABGRToUVRow_MSA;
}
}
#endif
#if defined(HAS_ABGRTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ABGRToYRow = ABGRToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_MMI;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ABGRToUVRow = ABGRToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_MMI;
}
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
}
}
#endif
#if defined(HAS_MERGEUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MergeUVRow_ = MergeUVRow_Any_MSA;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_MSA;
}
}
#endif
#if defined(HAS_MERGEUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MergeUVRow_ = MergeUVRow_Any_MMI;
if (IS_ALIGNED(halfwidth, 8)) {
MergeUVRow_ = MergeUVRow_MMI;
}
}
#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
ABGRToYRow(src_abgr, dst_y, width);
ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
src_abgr += src_stride_abgr * 2;
dst_y += dst_stride_y * 2;
dst_uv += dst_stride_uv;
}
if (height & 1) {
ABGRToUVRow(src_abgr, 0, row_u, row_v, width);
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
ABGRToYRow(src_abgr, dst_y, width);
}
free_aligned_buffer_64(row_u);
}
return 0;
}
// Same as NV12 but U and V swapped.
LIBYUV_API
int ABGRToNV21(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ABGRToUVRow_C;
void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) =
ABGRToYRow_C;
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_vu, int width) = MergeUVRow_C;
if (!src_abgr || !dst_y || !dst_vu || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
ABGRToYRow = ABGRToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_SSSE3;
ABGRToYRow = ABGRToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOYROW_AVX2) && defined(HAS_ABGRTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToUVRow = ABGRToUVRow_Any_AVX2;
ABGRToYRow = ABGRToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ABGRToUVRow = ABGRToUVRow_AVX2;
ABGRToYRow = ABGRToYRow_AVX2;
}
}
#endif
#if defined(HAS_ABGRTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToYRow = ABGRToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToUVRow = ABGRToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToYRow = ABGRToYRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ABGRToYRow = ABGRToYRow_MSA;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToUVRow = ABGRToUVRow_Any_MSA;
if (IS_ALIGNED(width, 32)) {
ABGRToUVRow = ABGRToUVRow_MSA;
}
}
#endif
#if defined(HAS_ABGRTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ABGRToYRow = ABGRToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_MMI;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ABGRToUVRow = ABGRToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_MMI;
}
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
}
}
#endif
#if defined(HAS_MERGEUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MergeUVRow_ = MergeUVRow_Any_MSA;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_MSA;
}
}
#endif
#if defined(HAS_MERGEUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MergeUVRow_ = MergeUVRow_Any_MMI;
if (IS_ALIGNED(halfwidth, 8)) {
MergeUVRow_ = MergeUVRow_MMI;
}
}
#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
ABGRToYRow(src_abgr, dst_y, width);
ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
src_abgr += src_stride_abgr * 2;
dst_y += dst_stride_y * 2;
dst_vu += dst_stride_vu;
}
if (height & 1) {
ABGRToUVRow(src_abgr, 0, row_u, row_v, width);
MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
ABGRToYRow(src_abgr, dst_y, width);
}
free_aligned_buffer_64(row_u);
}
return 0;
}
// Convert ARGB to YUY2.
LIBYUV_API
int ARGBToYUY2(const uint8_t* src_argb,
......
......@@ -3319,6 +3319,7 @@ void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
}
}
// Convert UV plane of NV12 to VU of NV21.
void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
int x;
for (x = 0; x < width; ++x) {
......
......@@ -6793,12 +6793,11 @@ void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
#ifdef HAS_SWAPUVROW_SSSE3
// Shuffle table for reversing the bytes.
static const uvec8 kShuffleUVToVU = {1u, 0u, 3u, 2u, 5u, 4u, 7u, 6u,
static const uvec8 kShuffleUVToVU = {1u, 0u, 3u, 2u, 5u, 4u, 7u, 6u,
9u, 8u, 11u, 10u, 13u, 12u, 15u, 14u};
void SwapUVRow_SSSE3(const uint8_t* src_uv,
uint8_t* dst_vu,
int width) {
// Convert UV plane of NV12 to VU of NV21.
void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile(
"movdqu %3,%%xmm5 \n"
......@@ -6815,18 +6814,16 @@ void SwapUVRow_SSSE3(const uint8_t* src_uv,
"lea 0x20(%1),%1 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_vu), // %1
"+r"(width) // %2
: "m"(kShuffleUVToVU) // %3
: "+r"(src_uv), // %0
"+r"(dst_vu), // %1
"+r"(width) // %2
: "m"(kShuffleUVToVU) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm5");
}
#endif // HAS_SWAPUVROW_SSSE3
#ifdef HAS_SWAPUVROW_AVX2
void SwapUVRow_AVX2(const uint8_t* src_uv,
uint8_t* dst_vu,
int width) {
void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm5 \n"
......@@ -6844,10 +6841,10 @@ void SwapUVRow_AVX2(const uint8_t* src_uv,
"sub $0x20,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_uv), // %0
"+r"(dst_vu), // %1
"+r"(width) // %2
: "m"(kShuffleUVToVU) // %3
: "+r"(src_uv), // %0
"+r"(dst_vu), // %1
"+r"(width) // %2
: "m"(kShuffleUVToVU) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm5");
}
#endif // HAS_SWAPUVROW_AVX2
......
......@@ -2867,7 +2867,7 @@ void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
: "cc", "memory", "q0", "q1", "q2", "q3");
}
// Convert biplanar UV channel of NV12 to NV21
// Convert UV plane of NV12 to VU of NV21.
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile(
"1: \n"
......
......@@ -2969,7 +2969,7 @@ void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3");
}
// Convert biplanar UV channel of NV12 to NV21
// Convert UV plane of NV12 to VU of NV21.
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile(
"1: \n"
......
......@@ -1594,9 +1594,9 @@ __declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // width
vbroadcastf128 ymm5, xmmword ptr kAddUV128
vbroadcastf128 ymm6, xmmword ptr kARGBToV
vbroadcastf128 ymm7, xmmword ptr kARGBToU
vbroadcastf128 ymm5, xmmword ptr kAddUVJ128
vbroadcastf128 ymm6, xmmword ptr kARGBToVJ
vbroadcastf128 ymm7, xmmword ptr kARGBToUJ
sub edi, edx // stride from u to v
convertloop:
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment