Commit dddf94c3 authored by fbarchard@google.com's avatar fbarchard@google.com

YUY2 and UYVY conversions to I420 ported to NEON

BUG=64
TEST=untested
Review URL: https://webrtc-codereview.appspot.com/823007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@371 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent f0ada0e9
......@@ -16,12 +16,11 @@ common_SRC_FILES := \
source/row_posix.cc \
source/scale.cc \
source/scale_argb.cc \
source/video_common.cc
# For Neon support, add .neon to all filenames and the following
# source/rotate_neon.cc
# source/row_neon.cc
source/video_common.cc \
source/rotate_neon.cc \
source/row_neon.cc
common_CFLAGS := -Wall -fexceptions
common_CFLAGS := -Wall -fexceptions -DHAVE_ARMEABI_V7A=1 -mfloat-abi=softfp -mfpu=neon
common_C_INCLUDES = $(LOCAL_PATH)/include
......
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 370
Version: 371
License: BSD
License File: LICENSE
......
......@@ -67,6 +67,7 @@ extern "C" {
#define HAS_RGB24TOARGBROW_SSSE3
#define HAS_RGB565TOARGBROW_SSE2
#define HAS_SPLITUV_SSE2
#define HAS_UYVYTOUV422ROW_SSE2
#define HAS_UYVYTOUVROW_SSE2
#define HAS_UYVYTOYROW_SSE2
#define HAS_YTOARGBROW_SSE2
......@@ -119,11 +120,22 @@ extern "C" {
#define HAS_I422TOBGRAROW_NEON
#define HAS_I422TOABGRROW_NEON
#define HAS_I422TORGBAROW_NEON
// TODO(fbarchard): Hook these up to calling functions.
#define HAS_ARGBTORGBAROW_NEON
#define HAS_ARGBTORGB24ROW_NEON
#define HAS_ARGBTORAWROW_NEON
#define HAS_ABGRTOARGBROW_NEON
#define HAS_BGRATOARGBROW_NEON
#define HAS_RGBATOARGBROW_NEON
#define HAS_RAWTOARGBROW_NEON
#define HAS_RGB24TOARGBROW_NEON
#define HAS_YUY2TOUV422ROW_NEON
#define HAS_YUY2TOUVROW_NEON
#define HAS_YUY2TOYROW_NEON
#define HAS_UYVYTOUV422ROW_NEON
#define HAS_UYVYTOUVROW_NEON
#define HAS_UYVYTOYROW_NEON
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER)
......@@ -542,6 +554,11 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
......@@ -552,6 +569,11 @@ void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
......@@ -564,6 +586,12 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
void UYVYToUV422Row_NEON(const uint8* src_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
......@@ -574,7 +602,11 @@ void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 370
#define LIBYUV_VERSION 371
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -744,6 +744,21 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
}
}
}
#elif defined(HAS_YUY2TOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (width > 16) {
YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
}
}
if (IS_ALIGNED(width, 8)) {
YUY2ToYRow = YUY2ToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
YUY2ToUVRow = YUY2ToUVRow_NEON;
}
}
}
#endif
for (int y = 0; y < height - 1; y += 2) {
......@@ -768,11 +783,6 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_uyvy ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
......@@ -802,7 +812,23 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
}
}
}
#elif defined(HAS_UYVYTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
UYVYToYRow = UYVYToYRow_Any_NEON;
if (width > 16) {
UYVYToUVRow = UYVYToUVRow_Any_NEON;
}
}
if (IS_ALIGNED(width, 8)) {
UYVYToYRow = UYVYToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
UYVYToUVRow = UYVYToUVRow_NEON;
}
}
}
#endif
for (int y = 0; y < height - 1; y += 2) {
UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width);
UYVYToYRow(src_uyvy, dst_y, width);
......@@ -905,7 +931,55 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
UYVYToYRow = UYVYToYRow_SSE2;
}
}
#elif defined(HAS_UYVYTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
UYVYToYRow = UYVYToYRow_Any_NEON;
if (width > 16) {
UYVYToUVRow = UYVYToUVRow_Any_NEON;
}
}
if (IS_ALIGNED(width, 8)) {
UYVYToYRow = UYVYToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
UYVYToUVRow = UYVYToUVRow_NEON;
}
}
}
#endif
#if defined(HAS_UYVYTOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
if (width > 16) {
UYVYToUVRow = UYVYToUVRow_Any_SSE2;
UYVYToYRow = UYVYToYRow_Any_SSE2;
}
if (IS_ALIGNED(width, 16)) {
UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
UYVYToUVRow = UYVYToUVRow_SSE2;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
UYVYToYRow = UYVYToYRow_SSE2;
}
}
}
#endif
#if defined(HAS_UYVYTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
UYVYToYRow = UYVYToYRow_Any_NEON;
if (width > 16) {
UYVYToUVRow = UYVYToUVRow_Any_NEON;
}
}
if (IS_ALIGNED(width, 8)) {
UYVYToYRow = UYVYToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
UYVYToUVRow = UYVYToUVRow_NEON;
}
}
}
#endif
for (int y = 0; y < height - 1; y += 2) {
V210ToUYVYRow(src_v210, row, width);
V210ToUYVYRow(src_v210 + src_stride_v210, row + kMaxStride, width);
......
......@@ -671,7 +671,23 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
}
}
}
#elif defined(HAS_YUY2TOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (width > 16) {
YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
}
}
if (IS_ALIGNED(width, 8)) {
YUY2ToYRow = YUY2ToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
YUY2ToUV422Row = YUY2ToUV422Row_NEON;
}
}
}
#endif
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......
......@@ -933,7 +933,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
if (width * 3 <= kMaxStride) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
}
if (IS_ALIGNED(width, 16)) {
if (IS_ALIGNED(width, 8)) {
ARGBToRGB24Row = ARGBToRGB24Row_NEON;
}
}
......@@ -1004,7 +1004,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
if (width * 3 <= kMaxStride) {
ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
}
if (IS_ALIGNED(width, 16)) {
if (IS_ALIGNED(width, 8)) {
ARGBToRAWRow = ARGBToRAWRow_NEON;
}
}
......
......@@ -321,8 +321,7 @@ int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTORGBAROW_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(width, 16)) {
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
ARGBToRGBARow = ARGBToRGBARow_NEON;
}
#endif
......@@ -366,7 +365,7 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
if (width * 3 <= kMaxStride) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
}
if (IS_ALIGNED(width, 16)) {
if (IS_ALIGNED(width, 8)) {
ARGBToRGB24Row = ARGBToRGB24Row_NEON;
}
}
......@@ -411,7 +410,7 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
if (width * 3 <= kMaxStride) {
ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
}
if (IS_ALIGNED(width, 16)) {
if (IS_ALIGNED(width, 8)) {
ARGBToRAWRow = ARGBToRAWRow_NEON;
}
}
......
......@@ -957,8 +957,6 @@ YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, 1)
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1)
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1)
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1)
#endif
#ifdef HAS_I422TORGBAROW_NEON
YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1)
#endif
#undef YANY
......@@ -1000,6 +998,10 @@ YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4)
#endif
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2)
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2)
#ifdef HAS_YUY2TOYROW_NEON
YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2)
YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2)
#endif
#undef YANY
#define UVANY(NAMEANY, ANYTOUV_SSE, ANYTOUV_C, BPP) \
......@@ -1021,6 +1023,10 @@ UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4)
#endif
UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2)
UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2)
#ifdef HAS_YUY2TOUVROW_NEON
UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2)
UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2)
#endif
#undef UVANY
#define UV422ANY(NAMEANY, ANYTOUV_SSE, ANYTOUV_C, BPP) \
......@@ -1038,6 +1044,12 @@ UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2, \
YUY2ToUV422Row_C, 2)
UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2, \
UYVYToUV422Row_C, 2)
#ifdef HAS_YUY2TOUV422ROW_NEON
UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, \
YUY2ToUV422Row_C, 2)
UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, \
UYVYToUV422Row_C, 2)
#endif
#undef UV422ANY
#endif
......
This diff is collapsed.
......@@ -2755,13 +2755,11 @@ void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
* its original size.
*
*/
static void ScalePlaneDown2(int src_width, int src_height,
static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
FilterMode filtering) {
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) =
filtering ? ScaleRowDown2Int_C : ScaleRowDown2_C;
......@@ -2795,13 +2793,11 @@ static void ScalePlaneDown2(int src_width, int src_height,
* This is an optimized version for scaling down a plane to 1/4 of
* its original size.
*/
static void ScalePlaneDown4(int src_width, int src_height,
static void ScalePlaneDown4(int /* src_width */, int /* src_height */,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
FilterMode filtering) {
assert(IS_ALIGNED(src_width, 4));
assert(IS_ALIGNED(src_height, 4));
void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Int_C : ScaleRowDown4_C;
......@@ -2832,13 +2828,11 @@ static void ScalePlaneDown4(int src_width, int src_height,
* of its original size.
*
*/
static void ScalePlaneDown8(int src_width, int src_height,
static void ScalePlaneDown8(int /* src_width */, int /* src_height */,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
FilterMode filtering) {
assert(IS_ALIGNED(src_width, 8));
assert(IS_ALIGNED(src_height, 8));
void (*ScaleRowDown8)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) =
filtering && (dst_width <= kMaxOutputWidth) ?
......@@ -2864,7 +2858,7 @@ static void ScalePlaneDown8(int src_width, int src_height,
* Provided by Frank Barchard (fbarchard@google.com)
*
*/
static void ScalePlaneDown34(int src_width, int src_height,
static void ScalePlaneDown34(int /* src_width */, int /* src_height */,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
......@@ -2953,7 +2947,7 @@ static void ScalePlaneDown34(int src_width, int src_height,
* ggghhhii
* Boxes are 3x3, 2x3, 3x2 and 2x2
*/
static void ScalePlaneDown38(int src_width, int src_height,
static void ScalePlaneDown38(int /* src_width */, int /* src_height */,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
......
......@@ -791,13 +791,11 @@ void ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr,
* its original size.
*
*/
static void ScaleARGBDown2(int src_width, int src_height,
static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
FilterMode filtering) {
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
void (*ScaleARGBRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) =
filtering ? ScaleARGBRowDown2Int_C : ScaleARGBRowDown2_C;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment