Commit 82375d6d authored by fbarchard@google.com's avatar fbarchard@google.com

Neon version of Gray

BUG=176
TEST=./libyuv_unittest --gtest_filter=**Gray*
Review URL: https://webrtc-codereview.appspot.com/929039

git-svn-id: http://libyuv.googlecode.com/svn/trunk@510 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent b94b139e
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 509
Version: 510
License: BSD
License File: LICENSE
......
......@@ -224,6 +224,8 @@ extern "C" {
#define HAS_ARGBATTENUATEROW_NEON
#define HAS_ARGBQUANTIZEROW_NEON
#define HAS_ARGBSHADEROW_NEON
#define HAS_ARGBGRAYROW_NEON
#define HAS_ARGBSEPIAROW_NEON
#endif
// The following are available on Mips platforms
......@@ -1218,9 +1220,11 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBSepiaRow_C(uint8* dst_argb, int width);
void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
void ARGBColorMatrixRow_C(uint8* dst_argb, const int8* matrix_argb, int width);
void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
......@@ -1236,6 +1240,13 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width);
void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
// Used for blur.
void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst, int count);
......@@ -1247,12 +1258,6 @@ void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
const int32* previous_cumsum, int width);
void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
LIBYUV_API
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 509
#define LIBYUV_VERSION 510
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -856,6 +856,10 @@ int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBGrayRow = ARGBGrayRow_SSSE3;
}
#elif defined(HAS_ARGBGRAYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
ARGBGrayRow = ARGBGrayRow_NEON;
}
#endif
for (int y = 0; y < height; ++y) {
......@@ -881,6 +885,10 @@ int ARGBGray(uint8* dst_argb, int dst_stride_argb,
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBGrayRow = ARGBGrayRow_SSSE3;
}
#elif defined(HAS_ARGBGRAYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
ARGBGrayRow = ARGBGrayRow_NEON;
}
#endif
uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
for (int y = 0; y < height; ++y) {
......
......@@ -563,7 +563,7 @@ void ARGBToUV411Row_C(const uint8* src_argb,
// 0.11 * B + 0.59 * G + 0.30 * R
// Coefficients rounded to multiple of 2 for consistency with SSSE3 version.
static __inline int RGBToGray(uint8 r, uint8 g, uint8 b) {
return (( 76 * r + 152 * g + 28 * b) >> 8);
return (28 * b + 152 * g + 76 * r) >> 8;
}
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
......
......@@ -2550,6 +2550,34 @@ void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
);
}
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
// Similar to ARGBToY but different constants, no round and stores ARGB.
// C code is (28 * b + 152 * g + 76 * r) >> 8;
void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
asm volatile (
"vmov.u8 d24, #14 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #76 \n" // G * 0.5078 coefficient
"vmov.u8 d26, #38 \n" // R * 0.2578 coefficient
".p2align 2 \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q2, d0, d24 \n" // B
"vmlal.u8 q2, d1, d25 \n" // G
"vmlal.u8 q2, d2, d26 \n" // R
"vqshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
"vmov d1, d0 \n" // G
"vmov d2, d0 \n" // R
"vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q12", "q13"
);
}
#endif // __ARM_NEON__
#ifdef __cplusplus
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment