Commit 942db301 authored by Magnus Jedvert's avatar Magnus Jedvert

Add ARGBExtractAlpha function

BUG=libyuv:572
R=fbarchard@google.com

Review URL: https://codereview.chromium.org/1995293002 .
parent 6020d2aa
...@@ -288,6 +288,12 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, ...@@ -288,6 +288,12 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Extract the alpha channel from ARGB.
LIBYUV_API
int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_a, int dst_stride_a,
int width, int height);
// Copy Y channel to Alpha of ARGB. // Copy Y channel to Alpha of ARGB.
LIBYUV_API LIBYUV_API
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
......
...@@ -104,6 +104,7 @@ extern "C" { ...@@ -104,6 +104,7 @@ extern "C" {
#define HAS_ARGBTOUVROW_SSSE3 #define HAS_ARGBTOUVROW_SSSE3
#define HAS_ARGBTOYJROW_SSSE3 #define HAS_ARGBTOYJROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3
#define HAS_ARGBEXTRACTALPHAROW_SSE2
#define HAS_BGRATOUVROW_SSSE3 #define HAS_BGRATOUVROW_SSSE3
#define HAS_BGRATOYROW_SSSE3 #define HAS_BGRATOYROW_SSSE3
#define HAS_COPYROW_ERMS #define HAS_COPYROW_ERMS
...@@ -291,6 +292,7 @@ extern "C" { ...@@ -291,6 +292,7 @@ extern "C" {
#define HAS_ARGBTOUVROW_NEON #define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOYJROW_NEON #define HAS_ARGBTOYJROW_NEON
#define HAS_ARGBTOYROW_NEON #define HAS_ARGBTOYROW_NEON
#define HAS_ARGBEXTRACTALPHAROW_NEON
#define HAS_BGRATOUVROW_NEON #define HAS_BGRATOUVROW_NEON
#define HAS_BGRATOYROW_NEON #define HAS_BGRATOYROW_NEON
#define HAS_COPYROW_NEON #define HAS_COPYROW_NEON
...@@ -877,6 +879,14 @@ void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, ...@@ -877,6 +879,14 @@ void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
int width); int width);
void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width);
void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width);
void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width);
void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_a,
int width);
void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, uint8* dst_a,
int width);
void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width); void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
......
...@@ -2374,6 +2374,49 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, ...@@ -2374,6 +2374,49 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
return 0; return 0;
} }
// Extract just the alpha channel from ARGB.
LIBYUV_API
int ARGBExtractAlpha(const uint8* src_argb, int src_stride,
uint8* dst_a, int dst_stride,
int width, int height) {
if (!src_argb || !dst_a || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb += (height - 1) * src_stride;
src_stride = -src_stride;
}
// Coalesce rows.
if (src_stride == width * 4 && dst_stride == width) {
width *= height;
height = 1;
src_stride = dst_stride = 0;
}
void (*ARGBExtractAlphaRow)(const uint8 *src_argb, uint8 *dst_a, int width) =
ARGBExtractAlphaRow_C;
#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
: ARGBExtractAlphaRow_Any_SSE2;
}
#endif
#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_NEON
: ARGBExtractAlphaRow_Any_NEON;
}
#endif
for (int y = 0; y < height; ++y) {
ARGBExtractAlphaRow(src_argb, dst_a, width);
src_argb += src_stride;
dst_a += dst_stride;
}
return 0;
}
// Copy a planar Y channel to the alpha channel of a destination ARGB image. // Copy a planar Y channel to the alpha channel of a destination ARGB image.
LIBYUV_API LIBYUV_API
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
......
...@@ -466,6 +466,12 @@ ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) ...@@ -466,6 +466,12 @@ ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
#ifdef HAS_ARGBATTENUATEROW_NEON #ifdef HAS_ARGBATTENUATEROW_NEON
ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 7)
#endif
#undef ANY11 #undef ANY11
// Any 1 to 1 blended. Destination is read, modify, write. // Any 1 to 1 blended. Destination is read, modify, write.
......
...@@ -2381,6 +2381,19 @@ void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { ...@@ -2381,6 +2381,19 @@ void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
} }
} }
void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) {
int i;
for (i = 0; i < width - 1; i += 2) {
dst_a[0] = src_argb[3];
dst_a[1] = src_argb[7];
dst_a += 2;
src_argb += 8;
}
if (width & 1) {
dst_a[0] = src_argb[3];
}
}
void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
int i; int i;
for (i = 0; i < width - 1; i += 2) { for (i = 0; i < width - 1; i += 2) {
......
...@@ -2936,6 +2936,33 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { ...@@ -2936,6 +2936,33 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
} }
#endif // HAS_ARGBCOPYALPHAROW_AVX2 #endif // HAS_ARGBCOPYALPHAROW_AVX2
#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
// width in pixels
void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) {
asm volatile (
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ", %%xmm0 \n"
"movdqu " MEMACCESS2(0x10, 0) ", %%xmm1 \n"
"lea " MEMLEA(0x20, 0) ", %0 \n"
"psrld $0x18, %%xmm0 \n"
"psrld $0x18, %%xmm1 \n"
"packssdw %%xmm1, %%xmm0 \n"
"packuswb %%xmm0, %%xmm0 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x8, 1) ", %1 \n"
"sub $0x8, %2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_a), // %1
"+rm"(width) // %2
:
: "memory", "cc"
, "xmm0", "xmm1"
);
}
#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
// width in pixels // width in pixels
void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
......
...@@ -1298,6 +1298,23 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { ...@@ -1298,6 +1298,23 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
); );
} }
void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels
"subs %2, %2, #8 \n" // 8 processed per loop
MEMACCESS(1)
"vst1.8 {d3}, [%1]! \n" // store 8 A's.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_a), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
);
}
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile ( asm volatile (
"vmov.u8 d24, #15 \n" // B * 0.11400 coefficient "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
......
...@@ -1399,6 +1399,25 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { ...@@ -1399,6 +1399,25 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
} }
#endif // HAS_ARGBTOYROW_NEON #endif // HAS_ARGBTOYROW_NEON
#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels
"subs %w2, %w2, #8 \n" // 8 processed per loop
MEMACCESS(1)
"st1 {v3.8b}, [%1], #8 \n" // store 8 A's.
"b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_a), // %1
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_ARGBEXTRACTALPHAROW_NEON
#ifdef HAS_ARGBTOYJROW_NEON #ifdef HAS_ARGBTOYJROW_NEON
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile ( asm volatile (
......
...@@ -3532,6 +3532,33 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { ...@@ -3532,6 +3532,33 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
} }
#endif // HAS_ARGBCOPYALPHAROW_AVX2 #endif // HAS_ARGBCOPYALPHAROW_AVX2
#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
// width in pixels
__declspec(naked)
void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_a
mov ecx, [esp + 12] // width
extractloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
lea eax, [eax + 32]
psrld xmm0, 24
psrld xmm1, 24
packssdw xmm0, xmm1
packuswb xmm0, xmm0
movq qword ptr [edx], xmm0
lea edx, [edx + 8]
sub ecx, 8
jg extractloop
ret
}
}
#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
// width in pixels // width in pixels
__declspec(naked) __declspec(naked)
......
...@@ -2390,6 +2390,36 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) { ...@@ -2390,6 +2390,36 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
free_aligned_buffer_64(orig_pixels); free_aligned_buffer_64(orig_pixels);
} }
TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_64(src_pixels, kPixels * 4);
align_buffer_64(dst_pixels_opt, kPixels);
align_buffer_64(dst_pixels_c, kPixels);
MemRandomize(src_pixels, kPixels * 4);
MemRandomize(dst_pixels_opt, kPixels);
memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
MaskCpuFlags(disable_cpu_flags_);
ARGBExtractAlpha(src_pixels, benchmark_width_ * 4,
dst_pixels_c, benchmark_width_,
benchmark_width_, benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) {
ARGBExtractAlpha(src_pixels, benchmark_width_ * 4,
dst_pixels_opt, benchmark_width_,
benchmark_width_, benchmark_height_);
}
for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
free_aligned_buffer_64(dst_pixels_c);
free_aligned_buffer_64(dst_pixels_opt);
free_aligned_buffer_64(src_pixels);
}
TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
const int kPixels = benchmark_width_ * benchmark_height_; const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_64(orig_pixels, kPixels); align_buffer_64(orig_pixels, kPixels);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment