Commit dd2d512e authored by fbarchard@google.com's avatar fbarchard@google.com

420 subsampler

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/935012

git-svn-id: http://libyuv.googlecode.com/svn/trunk@478 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 76e85179
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 476
Version: 478
License: BSD
License File: LICENSE
......
......@@ -196,6 +196,7 @@ extern "C" {
#define HAS_ARGBTOUV444ROW_NEON
#define HAS_ARGBTOUV422ROW_NEON
#define HAS_ARGBTOUV411ROW_NEON
#define HAS_ARGBTOUVROW_NEON
#define HAS_BGRATOYROW_NEON
#define HAS_ABGRTOYROW_NEON
#define HAS_RGBATOYROW_NEON
......@@ -351,6 +352,8 @@ void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix);
void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix);
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int pix);
void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 476
#define LIBYUV_VERSION 478
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -943,6 +943,9 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
......@@ -1204,6 +1207,9 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
RGB24ToYRow = RGB24ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToYRow = RGB24ToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#else
......@@ -1306,6 +1312,9 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
RAWToYRow = RAWToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToYRow = RAWToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#else
......@@ -1407,6 +1416,9 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
RGB565ToYRow = RGB565ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB565ToYRow = RGB565ToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#else
......@@ -1508,6 +1520,9 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToYRow = ARGB1555ToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#else
......@@ -1609,6 +1624,9 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToYRow = ARGB4444ToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#else
......
......@@ -232,6 +232,9 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
......@@ -335,6 +338,9 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
......@@ -435,6 +441,9 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
......@@ -511,6 +520,9 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
......
......@@ -321,6 +321,9 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
......
......@@ -1350,7 +1350,7 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"adds %1, %0, %1 \n" // stride + src_yuy2
"add %1, %0, %1 \n" // stride + src_yuy2
".p2align 2 \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
......@@ -1376,7 +1376,7 @@ void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"adds %1, %0, %1 \n" // stride + src_uyvy
"add %1, %0, %1 \n" // stride + src_uyvy
".p2align 2 \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
......@@ -1715,11 +1715,12 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
}
#endif // HAS_ARGBTOUV411ROW_NEON
// 32x1 pixels -> 8x1. pix is number of argb pixels. e.g. 32.
#ifdef HAS_ARGBTOUV411ROW_NEON_ALT
void ARGBToUV411Row_Alt_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) {
// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
#ifdef HAS_ARGBTOUVROW_NEON
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_argb
"vmov.s16 q10, #112 / 4 \n" // UB / VR 0.875 coefficient
"vmov.s16 q11, #74 / 4 \n" // UG -0.5781 coefficient
"vmov.s16 q12, #38 / 4 \n" // UR -0.2969 coefficient
......@@ -1728,47 +1729,41 @@ void ARGBToUV411Row_Alt_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"vmov.u16 q15, #0x8080 \n" // 128.5
".p2align 2 \n"
"1: \n"
"vld4.32 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
"vld4.32 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
"vaddl.u8 q0, q0, q1 \n"
"vaddl.u8 q2, q2, q3 \n"
"vadd.u16 q4, q0, q2 \n" // 4 pixels <- 16
"vld4.32 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
"vld4.32 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
"vaddl.u8 q0, q0, q1 \n"
"vaddl.u8 q2, q2, q3 \n"
"vadd.u16 q3, q0, q2 \n" // 4 more pixels <- 16
"vtbl.u16 q0, {q3, q4}, q5 \n" // REQUIRES SETUP
"vtbl.u16 q1, {q3, q4}, q6 \n"
"vtbl.u16 q2, {q3, q4}, q7 \n" // 8 pixels
"subs %3, %3, #16 \n" // 32 processed per loop.
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
"subs %4, %4, #16 \n" // 32 processed per loop.
"vmul.s16 q8, q0, q10 \n" // B
"vmls.s16 q8, q1, q11 \n" // G
"vmls.s16 q8, q2, q12 \n" // R
"vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
"vmul.s16 q9, q2, q10 \n" // R
"vmls.s16 q9, q1, q14 \n" // G
"vmls.s16 q9, q0, q13 \n" // B
"vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
"vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
"vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
"vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(pix) // %3
"+r"(src_stride_argb), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
"+r"(pix) // %4
:
: "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
#endif // HAS_ARGBTOUV411ROW_NEON_ALTERNATIVE
#endif // HAS_ARGBTOUVROW_NEON
#ifdef HAS_RGB565TOYROW_NEON
void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
......
......@@ -605,15 +605,15 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, DIFF, _Opt, +, 0)
TESTATOPLANAR(ARGB, 4, I420, 2, 2, 2)
TESTATOPLANAR(BGRA, 4, I420, 2, 2, 2)
TESTATOPLANAR(ABGR, 4, I420, 2, 2, 2)
TESTATOPLANAR(RGBA, 4, I420, 2, 2, 2)
TESTATOPLANAR(RAW, 3, I420, 2, 2, 2)
TESTATOPLANAR(RGB24, 3, I420, 2, 2, 2)
TESTATOPLANAR(RGB565, 2, I420, 2, 2, 2)
TESTATOPLANAR(ARGB1555, 2, I420, 2, 2, 2)
TESTATOPLANAR(ARGB4444, 2, I420, 2, 2, 2)
TESTATOPLANAR(ARGB, 4, I420, 2, 2, 4)
TESTATOPLANAR(BGRA, 4, I420, 2, 2, 4)
TESTATOPLANAR(ABGR, 4, I420, 2, 2, 4)
TESTATOPLANAR(RGBA, 4, I420, 2, 2, 4)
TESTATOPLANAR(RAW, 3, I420, 2, 2, 4)
TESTATOPLANAR(RGB24, 3, I420, 2, 2, 4)
TESTATOPLANAR(RGB565, 2, I420, 2, 2, 4)
TESTATOPLANAR(ARGB1555, 2, I420, 2, 2, 4)
TESTATOPLANAR(ARGB4444, 2, I420, 2, 2, 4)
TESTATOPLANAR(ARGB, 4, I411, 4, 1, 4)
TESTATOPLANAR(ARGB, 4, I422, 2, 1, 2)
TESTATOPLANAR(ARGB, 4, I444, 1, 1, 2)
......@@ -623,10 +623,10 @@ TESTATOPLANAR(UYVY, 2, I420, 2, 2, 2)
TESTATOPLANAR(YUY2, 2, I422, 2, 1, 2)
TESTATOPLANAR(UYVY, 2, I422, 2, 1, 2)
TESTATOPLANAR(I400, 1, I420, 2, 2, 2)
TESTATOPLANAR(BayerBGGR, 1, I420, 2, 2, 2)
TESTATOPLANAR(BayerRGGB, 1, I420, 2, 2, 2)
TESTATOPLANAR(BayerGBRG, 1, I420, 2, 2, 2)
TESTATOPLANAR(BayerGRBG, 1, I420, 2, 2, 2)
TESTATOPLANAR(BayerBGGR, 1, I420, 2, 2, 4)
TESTATOPLANAR(BayerRGGB, 1, I420, 2, 2, 4)
TESTATOPLANAR(BayerGBRG, 1, I420, 2, 2, 4)
TESTATOPLANAR(BayerGRBG, 1, I420, 2, 2, 4)
#define TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
W1280, N, NEG, OFF) \
......@@ -666,7 +666,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
} \
} \
} \
EXPECT_LE(max_diff, 2); \
EXPECT_LE(max_diff, 4); \
for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) { \
for (int j = 0; j < kWidth / SUBSAMP_X * 2; ++j) { \
int abs_diff = \
......@@ -677,7 +677,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
} \
} \
} \
EXPECT_LE(max_diff, 2); \
EXPECT_LE(max_diff, 4); \
free_aligned_buffer_16(dst_y_c) \
free_aligned_buffer_16(dst_uv_c) \
free_aligned_buffer_16(dst_y_opt) \
......@@ -811,8 +811,8 @@ TESTATOB(ARGB, 4, 4, 1, BayerBGGR, 1, 2, 2, 0)
TESTATOB(ARGB, 4, 4, 1, BayerRGGB, 1, 2, 2, 0)
TESTATOB(ARGB, 4, 4, 1, BayerGBRG, 1, 2, 2, 0)
TESTATOB(ARGB, 4, 4, 1, BayerGRBG, 1, 2, 2, 0)
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 2)
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 2)
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment