Commit c74fe987 authored by fbarchard@google.com's avatar fbarchard@google.com

YUY2 16 pixels at a time is 2x faster

BUG=116
TEST=libyuv_unittest
Review URL: https://webrtc-codereview.appspot.com/870005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@393 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent cc206dee
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 392 Version: 393
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 392 #define LIBYUV_VERSION 393
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -558,11 +558,9 @@ int Q420ToI420(const uint8* src_y, int src_stride_y, ...@@ -558,11 +558,9 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
} }
} }
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 16)) {
YUY2ToYRow = YUY2ToYRow_NEON; YUY2ToYRow = YUY2ToYRow_NEON;
if (IS_ALIGNED(width, 16)) { YUY2ToUV422Row = YUY2ToUV422Row_NEON;
YUY2ToUV422Row = YUY2ToUV422Row_NEON;
}
} }
} }
#endif #endif
...@@ -683,11 +681,9 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, ...@@ -683,11 +681,9 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
YUY2ToUVRow = YUY2ToUVRow_Any_NEON; YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
} }
} }
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 16)) {
YUY2ToYRow = YUY2ToYRow_NEON; YUY2ToYRow = YUY2ToYRow_NEON;
if (IS_ALIGNED(width, 16)) { YUY2ToUVRow = YUY2ToUVRow_NEON;
YUY2ToUVRow = YUY2ToUVRow_NEON;
}
} }
} }
#endif #endif
...@@ -752,11 +748,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -752,11 +748,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
UYVYToUVRow = UYVYToUVRow_Any_NEON; UYVYToUVRow = UYVYToUVRow_Any_NEON;
} }
} }
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 16)) {
UYVYToYRow = UYVYToYRow_NEON; UYVYToYRow = UYVYToYRow_NEON;
if (IS_ALIGNED(width, 16)) { UYVYToUVRow = UYVYToUVRow_NEON;
UYVYToUVRow = UYVYToUVRow_NEON;
}
} }
} }
#endif #endif
...@@ -872,11 +866,9 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210, ...@@ -872,11 +866,9 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
UYVYToUVRow = UYVYToUVRow_Any_NEON; UYVYToUVRow = UYVYToUVRow_Any_NEON;
} }
} }
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 16)) {
UYVYToYRow = UYVYToYRow_NEON; UYVYToYRow = UYVYToYRow_NEON;
if (IS_ALIGNED(width, 16)) { UYVYToUVRow = UYVYToUVRow_NEON;
UYVYToUVRow = UYVYToUVRow_NEON;
}
} }
} }
#endif #endif
...@@ -903,11 +895,9 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210, ...@@ -903,11 +895,9 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
UYVYToUVRow = UYVYToUVRow_Any_NEON; UYVYToUVRow = UYVYToUVRow_Any_NEON;
} }
} }
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 16)) {
UYVYToYRow = UYVYToYRow_NEON; UYVYToYRow = UYVYToYRow_NEON;
if (IS_ALIGNED(width, 16)) { UYVYToUVRow = UYVYToUVRow_NEON;
UYVYToUVRow = UYVYToUVRow_NEON;
}
} }
} }
#endif #endif
......
...@@ -509,15 +509,15 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) { ...@@ -509,15 +509,15 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) { void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
asm volatile ( asm volatile (
"1: \n" "1: \n"
"vld2.u8 {d0, d1}, [%0]! \n" // load 8 pixels of YUY2. "vld2.u8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
"subs %2, %2, #8 \n" // 8 processed per loop. "subs %2, %2, #16 \n" // 16 processed per loop.
"vst1.u8 {d0}, [%1]! \n" // store 8 pixels of Y. "vst1.u8 {q0}, [%1]! \n" // store 16 pixels of Y.
"bgt 1b \n" "bgt 1b \n"
: "+r"(src_yuy2), // %0 : "+r"(src_yuy2), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(pix) // %2 "+r"(pix) // %2
: :
: "memory", "cc", "d0", "d1" // Clobber List : "memory", "cc", "q0", "q1" // Clobber List
); );
} }
#endif // HAS_YUY2TOYROW_NEON #endif // HAS_YUY2TOYROW_NEON
...@@ -526,19 +526,22 @@ void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) { ...@@ -526,19 +526,22 @@ void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) { void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
asm volatile ( asm volatile (
"1: \n" "1: \n"
"vld2.u8 {d0, d1}, [%0]! \n" // load 8 pixels of UYVY. "vld2.u8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY.
"subs %2, %2, #8 \n" // 8 processed per loop. "subs %2, %2, #16 \n" // 16 processed per loop.
"vst1.u8 {d1}, [%1]! \n" // store 8 pixels of Y. "vst1.u8 {q1}, [%1]! \n" // store 16 pixels of Y.
"bgt 1b \n" "bgt 1b \n"
: "+r"(src_uyvy), // %0 : "+r"(src_uyvy), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(pix) // %2 "+r"(pix) // %2
: :
: "memory", "cc", "d0", "d1" // Clobber List : "memory", "cc", "q0", "q1" // Clobber List
); );
} }
#endif // HAS_UYVYTOYROW_NEON #endif // HAS_UYVYTOYROW_NEON
#endif // HAS_UYVYTOYROW_NEON
#ifdef HAS_YUY2TOYROW_NEON #ifdef HAS_YUY2TOYROW_NEON
void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
int pix) { int pix) {
...@@ -627,8 +630,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, ...@@ -627,8 +630,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
: "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
); );
} }
#endif // HAS_UYVYTOYROW_NEON
#endif // __ARM_NEON__ #endif // __ARM_NEON__
#ifdef __cplusplus #ifdef __cplusplus
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment