Commit 8e50e619 authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBScale_NEON

BUG=none
TEST=./libyuv_unittest --gtest_filter=*ARGBScale*
Review URL: https://webrtc-codereview.appspot.com/964016

git-svn-id: http://libyuv.googlecode.com/svn/trunk@494 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 4a86a836
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 493
Version: 494
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 493
#define LIBYUV_VERSION 494
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -36,6 +36,14 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, int src_stride,
void ScaleARGBRowDownEvenInt_NEON(const uint8* src_argb, int src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
#define HAS_SCALEARGBROWDOWN2_NEON
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
uint8* dst, int dst_width);
void ScaleARGBRowDown2Int_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
#endif
/**
......@@ -876,13 +884,18 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
uint8* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDown2Int_C : ScaleARGBRowDown2_C;
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 4) &&
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_SSE2 :
ScaleARGBRowDown2_SSE2;
}
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_NEON :
ScaleARGBRowDown2_NEON;
}
#endif
// TODO(fbarchard): Loop through source height to allow odd height.
......
......@@ -18,6 +18,61 @@ extern "C" {
// This module is for GCC Neon
#if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__)
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
uint8* dst, int dst_width) {
asm volatile (
"1: \n"
// load even pixels into q0, odd into q1
"vld2.u32 {q0, q1}, [%0]! \n"
"vld2.u32 {q2, q3}, [%0]! \n"
"subs %2, %2, #8 \n" // 8 processed per loop
"vst1.u8 {q0}, [%1]! \n" // store even pixels
"vst1.u8 {q2}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst), // %1
"+r"(dst_width) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
);
}
void ScaleARGBRowDown2Int_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
// change the stride to row 2 pointer
"add %1, %0 \n"
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
"vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
"vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels.
"vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels.
"vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts.
"vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts.
"vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts.
"vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack
"vrshrn.u16 d1, q1, #2 \n"
"vrshrn.u16 d2, q2, #2 \n"
"vrshrn.u16 d3, q3, #2 \n"
"vst4.u8 {d0, d1, d2, d3}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
"+r"(src_stride), // %1
"+r"(dst), // %2
"+r"(dst_width) // %3
:
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
);
}
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t,
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment