Commit 65460962 authored by Frank Barchard's avatar Frank Barchard

ARGBExtractAlpha 16 pixels at a time for ARM

arm64   8     TestARGBExtractAlpha (10019 ms) <-original 64 bit code
arm64   8 x2  TestARGBExtractAlpha (7639 ms)
arm64   16    TestARGBExtractAlpha (7369 ms) <- new 64 bit code
thumb32 8     TestARGBExtractAlpha (9505 ms) <- original 32 bit code
thumb32 8 x2  TestARGBExtractAlpha (7400 ms)
thumb32 8 x2i TestARGBExtractAlpha (7266 ms) <- new 32 bit code
arm32   8     TestARGBExtractAlpha (10002 ms)

BUG=libyuv:572
TESTED=local test on nexus 9
R=harryjin@google.com, wangcheng@google.com

Review URL: https://codereview.chromium.org/2035573002 .
parent 462be27e
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1593
Version: 1594
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1593
#define LIBYUV_VERSION 1594
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -61,6 +61,7 @@
'-mfpu=vfp',
'-mfpu=vfpv3',
'-mfpu=vfpv3-d16',
# '-mthumb', # arm32 not thumb
],
'conditions': [
# Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug.
......@@ -74,6 +75,7 @@
['target_arch != "arm64"', {
'cflags': [
'-mfpu=neon',
# '-marm', # arm32 not thumb
],
}],
],
......
......@@ -2404,7 +2404,7 @@ int ARGBExtractAlpha(const uint8* src_argb, int src_stride,
#endif
#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_NEON
ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
: ARGBExtractAlphaRow_Any_NEON;
}
#endif
......
......@@ -470,7 +470,7 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 7)
ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
#endif
#undef ANY11
......
......@@ -1302,16 +1302,17 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels
"subs %2, %2, #8 \n" // 8 processed per loop
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels
"subs %2, %2, #16 \n" // 16 processed per loop
MEMACCESS(1)
"vst1.8 {d3}, [%1]! \n" // store 8 A's.
"vst1.8 {q3}, [%1]! \n" // store 16 A's.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_a), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
);
}
......
......@@ -450,7 +450,6 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
void I400ToARGBRow_NEON(const uint8* src_y,
uint8* dst_argb,
int width) {
int64 width64 = (int64)(width);
asm volatile (
YUVTORGB_SETUP
"movi v23.8b, #255 \n"
......@@ -463,7 +462,7 @@ void I400ToARGBRow_NEON(const uint8* src_y,
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(dst_argb), // %1
"+r"(width64) // %2
"+r"(width) // %2
: [kUVToRB]"r"(&kYuvI601Constants.kUVToRB),
[kUVToG]"r"(&kYuvI601Constants.kUVToG),
[kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR),
......@@ -1404,10 +1403,10 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels
"subs %w2, %w2, #8 \n" // 8 processed per loop
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 pixels
"subs %w2, %w2, #16 \n" // 16 processed per loop
MEMACCESS(1)
"st1 {v3.8b}, [%1], #8 \n" // store 8 A's.
"st1 {v3.16b}, [%1], #16 \n" // store 16 A's.
"b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_a), // %1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment