Commit 518833b9 authored by fbarchard@google.com's avatar fbarchard@google.com

Fix RGB565ToARGB_Any which uses SSE2 that requires ARGB alignment. Add row…

Fix RGB565ToARGB_Any which uses SSE2 that requires ARGB alignment.  Add row coalescing to convert_argb.cc.  Improve coalescing on planar_functions.cc and convert_from_argb.cc.  Use stride * 2 == width to test for even width.  Apply coalescing to all functions that have same vertical subsampling.
BUG=197
TESTED=libyuv unittest passes where _Opt uses row coalescing.
Review URL: https://webrtc-codereview.appspot.com/1186004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@601 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 4db10514
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 600 Version: 601
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 600 #define LIBYUV_VERSION 601
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#ifdef HAVE_JPEG #ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h" #include "libyuv/mjpeg_decoder.h"
#endif #endif
#include "libyuv/planar_functions.h"
#include "libyuv/rotate_argb.h" #include "libyuv/rotate_argb.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#include "libyuv/video_common.h" #include "libyuv/video_common.h"
...@@ -64,6 +63,17 @@ int I444ToARGB(const uint8* src_y, int src_stride_y, ...@@ -64,6 +63,17 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb; dst_stride_argb = -dst_stride_argb;
} }
// Coalesce contiguous rows.
if (src_stride_y == width &&
src_stride_u == width &&
src_stride_v == width &&
dst_stride_argb == width * 4) {
return I444ToARGB(src_y, 0,
src_u, 0,
src_v, 0,
dst_argb, 0,
width * height, 1);
}
void (*I444ToARGBRow)(const uint8* y_buf, void (*I444ToARGBRow)(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
...@@ -116,6 +126,17 @@ int I422ToARGB(const uint8* src_y, int src_stride_y, ...@@ -116,6 +126,17 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb; dst_stride_argb = -dst_stride_argb;
} }
// Coalesce contiguous rows.
if (src_stride_y == width &&
src_stride_u * 2 == width &&
src_stride_v * 2 == width &&
dst_stride_argb == width * 4) {
return I422ToARGB(src_y, 0,
src_u, 0,
src_v, 0,
dst_argb, 0,
width * height, 1);
}
void (*I422ToARGBRow)(const uint8* y_buf, void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
...@@ -176,6 +197,17 @@ int I411ToARGB(const uint8* src_y, int src_stride_y, ...@@ -176,6 +197,17 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb; dst_stride_argb = -dst_stride_argb;
} }
// Coalesce contiguous rows.
if (src_stride_y == width &&
src_stride_u * 4 == width &&
src_stride_v * 4 == width &&
dst_stride_argb == width * 4) {
return I411ToARGB(src_y, 0,
src_u, 0,
src_v, 0,
dst_argb, 0,
width * height, 1);
}
void (*I411ToARGBRow)(const uint8* y_buf, void (*I411ToARGBRow)(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
...@@ -225,6 +257,13 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y, ...@@ -225,6 +257,13 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb; dst_stride_argb = -dst_stride_argb;
} }
// Coalesce contiguous rows.
if (src_stride_y == width &&
dst_stride_argb == width * 4) {
return I400ToARGB_Reference(src_y, 0,
dst_argb, 0,
width * height, 1);
}
void (*YToARGBRow)(const uint8* y_buf, void (*YToARGBRow)(const uint8* y_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width) = YToARGBRow_C; int width) = YToARGBRow_C;
...@@ -268,6 +307,13 @@ int I400ToARGB(const uint8* src_y, int src_stride_y, ...@@ -268,6 +307,13 @@ int I400ToARGB(const uint8* src_y, int src_stride_y,
src_y = src_y + (height - 1) * src_stride_y; src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y; src_stride_y = -src_stride_y;
} }
// Coalesce contiguous rows.
if (src_stride_y == width &&
dst_stride_argb == width * 4) {
return I400ToARGB(src_y, 0,
dst_argb, 0,
width * height, 1);
}
void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) = void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
I400ToARGBRow_C; I400ToARGBRow_C;
#if defined(HAS_I400TOARGBROW_SSE2) #if defined(HAS_I400TOARGBROW_SSE2)
...@@ -359,6 +405,13 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, ...@@ -359,6 +405,13 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24; src_stride_rgb24 = -src_stride_rgb24;
} }
// Coalesce contiguous rows.
if (src_stride_rgb24 == width * 3 &&
dst_stride_argb == width * 4) {
return RGB24ToARGB(src_rgb24, 0,
dst_argb, 0,
width * height, 1);
}
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RGB24ToARGBRow_C; RGB24ToARGBRow_C;
#if defined(HAS_RGB24TOARGBROW_SSSE3) #if defined(HAS_RGB24TOARGBROW_SSSE3)
...@@ -401,6 +454,13 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw, ...@@ -401,6 +454,13 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
src_raw = src_raw + (height - 1) * src_stride_raw; src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw; src_stride_raw = -src_stride_raw;
} }
// Coalesce contiguous rows.
if (src_stride_raw == width * 3 &&
dst_stride_argb == width * 4) {
return RAWToARGB(src_raw, 0,
dst_argb, 0,
width * height, 1);
}
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RAWToARGBRow_C; RAWToARGBRow_C;
#if defined(HAS_RAWTOARGBROW_SSSE3) #if defined(HAS_RAWTOARGBROW_SSSE3)
...@@ -443,6 +503,13 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565, ...@@ -443,6 +503,13 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565; src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
src_stride_rgb565 = -src_stride_rgb565; src_stride_rgb565 = -src_stride_rgb565;
} }
// Coalesce contiguous rows.
if (src_stride_rgb565 == width * 2 &&
dst_stride_argb == width * 4) {
return RGB565ToARGB(src_rgb565, 0,
dst_argb, 0,
width * height, 1);
}
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) = void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
RGB565ToARGBRow_C; RGB565ToARGBRow_C;
#if defined(HAS_RGB565TOARGBROW_SSE2) #if defined(HAS_RGB565TOARGBROW_SSE2)
...@@ -485,6 +552,13 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, ...@@ -485,6 +552,13 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555; src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
src_stride_argb1555 = -src_stride_argb1555; src_stride_argb1555 = -src_stride_argb1555;
} }
// Coalesce contiguous rows.
if (src_stride_argb1555 == width * 2 &&
dst_stride_argb == width * 4) {
return ARGB1555ToARGB(src_argb1555, 0,
dst_argb, 0,
width * height, 1);
}
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb, void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
int pix) = ARGB1555ToARGBRow_C; int pix) = ARGB1555ToARGBRow_C;
#if defined(HAS_ARGB1555TOARGBROW_SSE2) #if defined(HAS_ARGB1555TOARGBROW_SSE2)
...@@ -527,6 +601,13 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444, ...@@ -527,6 +601,13 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444; src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
src_stride_argb4444 = -src_stride_argb4444; src_stride_argb4444 = -src_stride_argb4444;
} }
// Coalesce contiguous rows.
if (src_stride_argb4444 == width * 2 &&
dst_stride_argb == width * 4) {
return ARGB4444ToARGB(src_argb4444, 0,
dst_argb, 0,
width * height, 1);
}
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb, void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
int pix) = ARGB4444ToARGBRow_C; int pix) = ARGB4444ToARGBRow_C;
#if defined(HAS_ARGB4444TOARGBROW_SSE2) #if defined(HAS_ARGB4444TOARGBROW_SSE2)
...@@ -721,10 +802,19 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, ...@@ -721,10 +802,19 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
src_stride_yuy2 = -src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2;
} }
// Coalesce contiguous rows.
if (width * height <= kMaxStride &&
src_stride_yuy2 == width * 2 &&
dst_stride_argb == width * 4) {
return YUY2ToARGB(src_yuy2, 0,
dst_argb, 0,
width * height, 1);
}
void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) = void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
YUY2ToARGBRow_C; YUY2ToARGBRow_C;
#if defined(HAS_YUY2TOARGBROW_SSSE3) #if defined(HAS_YUY2TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { // posix it 16, win is 8. // Posix it 16, Windows is 8.
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 && width <= kMaxStride) {
YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3; YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3; YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
...@@ -765,10 +855,19 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -765,10 +855,19 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
src_stride_uyvy = -src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy;
} }
// Coalesce contiguous rows.
if (width * height <= kMaxStride &&
src_stride_uyvy == width * 2 &&
dst_stride_argb == width * 4) {
return UYVYToARGB(src_uyvy, 0,
dst_argb, 0,
width * height, 1);
}
void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) = void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
UYVYToARGBRow_C; UYVYToARGBRow_C;
#if defined(HAS_UYVYTOARGBROW_SSSE3) #if defined(HAS_UYVYTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { // posix it 16, win is 8. // Posix it 16, Windows is 8.
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 && width <= kMaxStride) {
UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3; UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3; UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
......
...@@ -36,6 +36,17 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb, ...@@ -36,6 +36,17 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb; src_stride_argb = -src_stride_argb;
} }
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_y == width &&
dst_stride_u == width &&
dst_stride_v == width) {
return ARGBToI444(src_argb, 0,
dst_y, 0,
dst_u, 0,
dst_v, 0,
width * height, 1);
}
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C; ARGBToYRow_C;
void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
...@@ -100,6 +111,17 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, ...@@ -100,6 +111,17 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb; src_stride_argb = -src_stride_argb;
} }
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_y == width &&
dst_stride_u * 2 == width &&
dst_stride_v * 2 == width) {
return ARGBToI422(src_argb, 0,
dst_y, 0,
dst_u, 0,
dst_v, 0,
width * height, 1);
}
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C; int pix) = ARGBToUV422Row_C;
#if defined(HAS_ARGBTOUV422ROW_SSSE3) #if defined(HAS_ARGBTOUV422ROW_SSSE3)
...@@ -168,6 +190,17 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb, ...@@ -168,6 +190,17 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb; src_stride_argb = -src_stride_argb;
} }
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_y == width &&
dst_stride_u * 4 == width &&
dst_stride_v * 4 == width) {
return ARGBToI411(src_argb, 0,
dst_y, 0,
dst_u, 0,
dst_v, 0,
width * height, 1);
}
void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV411Row_C; int pix) = ARGBToUV411Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
...@@ -446,6 +479,14 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, ...@@ -446,6 +479,14 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
dst_stride_yuy2 = -dst_stride_yuy2; dst_stride_yuy2 = -dst_stride_yuy2;
} }
// Coalesce contiguous rows.
if (width * height <= kMaxStride &&
src_stride_argb == width * 4 &&
dst_stride_yuy2 == width * 2) {
return ARGBToYUY2(src_argb, 0,
dst_yuy2, 0,
width * height, 1);
}
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C; int pix) = ARGBToUV422Row_C;
#if defined(HAS_ARGBTOUV422ROW_SSSE3) #if defined(HAS_ARGBTOUV422ROW_SSSE3)
...@@ -535,6 +576,14 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, ...@@ -535,6 +576,14 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
dst_stride_uyvy = -dst_stride_uyvy; dst_stride_uyvy = -dst_stride_uyvy;
} }
// Coalesce contiguous rows.
if (width * height <= kMaxStride &&
src_stride_argb == width * 4 &&
dst_stride_uyvy == width * 2) {
return ARGBToUYVY(src_argb, 0,
dst_uyvy, 0,
width * height, 1);
}
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C; int pix) = ARGBToUV422Row_C;
#if defined(HAS_ARGBTOUV422ROW_SSSE3) #if defined(HAS_ARGBTOUV422ROW_SSSE3)
...@@ -624,7 +673,9 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb, ...@@ -624,7 +673,9 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (src_stride_argb == width * 4 && if (src_stride_argb == width * 4 &&
dst_stride_y == width) { dst_stride_y == width) {
return ARGBToI400(src_argb, 0, dst_y, 0, width * height, 1); return ARGBToI400(src_argb, 0,
dst_y, 0,
width * height, 1);
} }
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C; ARGBToYRow_C;
...@@ -704,7 +755,9 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, ...@@ -704,7 +755,9 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (src_stride_argb == width * 4 && if (src_stride_argb == width * 4 &&
dst_stride_rgb24 == width * 3) { dst_stride_rgb24 == width * 3) {
return ARGBToRGB24(src_argb, 0, dst_rgb24, 0, width * height, 1); return ARGBToRGB24(src_argb, 0,
dst_rgb24, 0,
width * height, 1);
} }
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRGB24Row_C; ARGBToRGB24Row_C;
...@@ -750,7 +803,9 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb, ...@@ -750,7 +803,9 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (src_stride_argb == width * 4 && if (src_stride_argb == width * 4 &&
dst_stride_raw == width * 3) { dst_stride_raw == width * 3) {
return ARGBToRAW(src_argb, 0, dst_raw, 0, width * height, 1); return ARGBToRAW(src_argb, 0,
dst_raw, 0,
width * height, 1);
} }
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) = void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRAWRow_C; ARGBToRAWRow_C;
...@@ -796,7 +851,9 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, ...@@ -796,7 +851,9 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (src_stride_argb == width * 4 && if (src_stride_argb == width * 4 &&
dst_stride_rgb565 == width * 2) { dst_stride_rgb565 == width * 2) {
return ARGBToRGB565(src_argb, 0, dst_rgb565, 0, width * height, 1); return ARGBToRGB565(src_argb, 0,
dst_rgb565, 0,
width * height, 1);
} }
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRGB565Row_C; ARGBToRGB565Row_C;
...@@ -841,7 +898,9 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, ...@@ -841,7 +898,9 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (src_stride_argb == width * 4 && if (src_stride_argb == width * 4 &&
dst_stride_argb1555 == width * 2) { dst_stride_argb1555 == width * 2) {
return ARGBToARGB1555(src_argb, 0, dst_argb1555, 0, width * height, 1); return ARGBToARGB1555(src_argb, 0,
dst_argb1555, 0,
width * height, 1);
} }
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToARGB1555Row_C; ARGBToARGB1555Row_C;
...@@ -886,7 +945,9 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, ...@@ -886,7 +945,9 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (src_stride_argb == width * 4 && if (src_stride_argb == width * 4 &&
dst_stride_argb4444 == width * 2) { dst_stride_argb4444 == width * 2) {
return ARGBToARGB4444(src_argb, 0, dst_argb4444, 0, width * height, 1); return ARGBToARGB4444(src_argb, 0,
dst_argb4444, 0,
width * height, 1);
} }
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToARGB4444Row_C; ARGBToARGB4444Row_C;
......
...@@ -29,8 +29,11 @@ void CopyPlane(const uint8* src_y, int src_stride_y, ...@@ -29,8 +29,11 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
int width, int height) { int width, int height) {
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (src_stride_y == width && dst_stride_y == width) { if (src_stride_y == width &&
CopyPlane(src_y, 0, dst_y, 0, width * height, 1); dst_stride_y == width) {
CopyPlane(src_y, 0,
dst_y, 0,
width * height, 1);
return; return;
} }
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
...@@ -228,12 +231,14 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, ...@@ -228,12 +231,14 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
src_stride_yuy2 = -src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2;
} }
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (IS_ALIGNED(width, 2) && if (src_stride_yuy2 == width * 2 &&
src_stride_yuy2 == width * 2 &&
dst_stride_y == width && dst_stride_y == width &&
dst_stride_u == (width + 1) / 2 && dst_stride_u * 2 == width &&
dst_stride_v == (width + 1) / 2) { dst_stride_v * 2 == width) {
return YUY2ToI422(src_yuy2, 0, dst_y, 0, dst_u, 0, dst_v, 0, return YUY2ToI422(src_yuy2, 0,
dst_y, 0,
dst_u, 0,
dst_v, 0,
width * height, 1); width * height, 1);
} }
void (*YUY2ToUV422Row)(const uint8* src_yuy2, void (*YUY2ToUV422Row)(const uint8* src_yuy2,
...@@ -314,12 +319,14 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -314,12 +319,14 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
src_stride_uyvy = -src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy;
} }
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (IS_ALIGNED(width, 2) && if (src_stride_uyvy == width * 2 &&
src_stride_uyvy == width * 2 &&
dst_stride_y == width && dst_stride_y == width &&
dst_stride_u == (width + 1) / 2 && dst_stride_u * 2 == width &&
dst_stride_v == (width + 1) / 2) { dst_stride_v * 2 == width) {
return UYVYToI422(src_uyvy, 0, dst_y, 0, dst_u, 0, dst_v, 0, return UYVYToI422(src_uyvy, 0,
dst_y, 0,
dst_u, 0,
dst_v, 0,
width * height, 1); width * height, 1);
} }
void (*UYVYToUV422Row)(const uint8* src_uyvy, void (*UYVYToUV422Row)(const uint8* src_uyvy,
...@@ -793,10 +800,9 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y, ...@@ -793,10 +800,9 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
dst_stride_bgra = -dst_stride_bgra; dst_stride_bgra = -dst_stride_bgra;
} }
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (IS_ALIGNED(width, 2) && if (src_stride_y == width &&
src_stride_y == width && src_stride_u * 2 == width &&
src_stride_u == (width + 1) / 2 && src_stride_v * 2 == width &&
src_stride_v == (width + 1) / 2 &&
dst_stride_bgra == width * 4) { dst_stride_bgra == width * 4) {
return I422ToBGRA(src_y, 0, return I422ToBGRA(src_y, 0,
src_u, 0, src_u, 0,
...@@ -865,10 +871,9 @@ int I422ToABGR(const uint8* src_y, int src_stride_y, ...@@ -865,10 +871,9 @@ int I422ToABGR(const uint8* src_y, int src_stride_y,
dst_stride_abgr = -dst_stride_abgr; dst_stride_abgr = -dst_stride_abgr;
} }
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (IS_ALIGNED(width, 2) && if (src_stride_y == width &&
src_stride_y == width && src_stride_u * 2 == width &&
src_stride_u == (width + 1) / 2 && src_stride_v * 2 == width &&
src_stride_v == (width + 1) / 2 &&
dst_stride_abgr == width * 4) { dst_stride_abgr == width * 4) {
return I422ToABGR(src_y, 0, return I422ToABGR(src_y, 0,
src_u, 0, src_u, 0,
...@@ -929,10 +934,9 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y, ...@@ -929,10 +934,9 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
dst_stride_rgba = -dst_stride_rgba; dst_stride_rgba = -dst_stride_rgba;
} }
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (IS_ALIGNED(width, 2) && if (src_stride_y == width &&
src_stride_y == width && src_stride_u * 2 == width &&
src_stride_u == (width + 1) / 2 && src_stride_v * 2 == width &&
src_stride_v == (width + 1) / 2 &&
dst_stride_rgba == width * 4) { dst_stride_rgba == width * 4) {
return I422ToRGBA(src_y, 0, return I422ToRGBA(src_y, 0,
src_u, 0, src_u, 0,
...@@ -1074,7 +1078,9 @@ void SetPlane(uint8* dst_y, int dst_stride_y, ...@@ -1074,7 +1078,9 @@ void SetPlane(uint8* dst_y, int dst_stride_y,
uint32 value) { uint32 value) {
// Coalesce contiguous rows. // Coalesce contiguous rows.
if (dst_stride_y == width) { if (dst_stride_y == width) {
SetPlane(dst_y, 0, width * height, 1, value); SetPlane(dst_y, 0,
width * height, 1,
value);
return; return;
} }
void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C; void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
......
...@@ -115,8 +115,6 @@ NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2) ...@@ -115,8 +115,6 @@ NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
#endif // HAS_NV12TORGB565ROW_NEON #endif // HAS_NV12TORGB565ROW_NEON
#undef NVANY #undef NVANY
// TODO(fbarchard): RGBANY use last 16 method.
// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \ #define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
void NAMEANY(const uint8* src, \ void NAMEANY(const uint8* src, \
uint8* dst, \ uint8* dst, \
...@@ -145,6 +143,17 @@ RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C, ...@@ -145,6 +143,17 @@ RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C,
15, 2, 4) 15, 2, 4)
RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C, RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C,
15, 2, 4) 15, 2, 4)
// These require alignment on ARGB, so C is used for remainder.
RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C,
15, 3, 4)
RGBANY(RAWToARGBRow_Any_SSSE3,RAWToARGBRow_SSSE3, RAWToARGBRow_C,
15, 3, 4)
RGBANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, RGB565ToARGBRow_C,
7, 2, 4)
RGBANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, ARGB1555ToARGBRow_C,
7, 2, 4)
RGBANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, ARGB4444ToARGBRow_C,
7, 2, 4)
#endif #endif
#if defined(HAS_ARGBTORGB24ROW_NEON) #if defined(HAS_ARGBTORGB24ROW_NEON)
RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3) RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
...@@ -188,30 +197,31 @@ BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C, ...@@ -188,30 +197,31 @@ BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD. // RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
// TODO(fbarchard): Use last 16 method for all unsubsampled conversions. // TODO(fbarchard): Use last 16 method for all unsubsampled conversions.
#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM) \ #define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_SIMDU, SBPP, BPP, NUM) \
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \ ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \
ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP, \ ARGBTOY_SIMDU(src_argb + (width - NUM) * SBPP, \
dst_y + (width - NUM) * BPP, NUM); \ dst_y + (width - NUM) * BPP, NUM); \
} }
#ifdef HAS_ARGBTOYROW_AVX2 #ifdef HAS_ARGBTOYROW_AVX2
YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 4, 1, 32) YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, ARGBToYRow_AVX2, 4, 1, 32)
YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 2, 1, 32) YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, YUY2ToYRow_AVX2, 2, 1, 32)
YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32) YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, UYVYToYRow_AVX2, 2, 1, 32)
#endif #endif
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16) YANY(ARGBToYRow_Any_SSSE3,
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16) ARGBToYRow_Unaligned_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16) YANY(BGRAToYRow_Any_SSSE3,
YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16) BGRAToYRow_Unaligned_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16) YANY(ABGRToYRow_Any_SSSE3,
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16) ABGRToYRow_Unaligned_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 3, 4, 16) YANY(RGBAToYRow_Any_SSSE3,
YANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 3, 4, 16) RGBAToYRow_Unaligned_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 2, 4, 8) YANY(YUY2ToYRow_Any_SSE2,
YANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 2, 4, 8) YUY2ToYRow_Unaligned_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16)
YANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 2, 4, 8) YANY(UYVYToYRow_Any_SSE2,
UYVYToYRow_Unaligned_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
#endif #endif
#ifdef HAS_ARGBTOYROW_NEON #ifdef HAS_ARGBTOYROW_NEON
YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8) YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
...@@ -233,7 +243,6 @@ YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8) ...@@ -233,7 +243,6 @@ YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
#endif #endif
#undef YANY #undef YANY
// Attenuate is destructive so last16 method can not be used due to overlap.
#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \ #define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
int n = width & ~MASK; \ int n = width & ~MASK; \
...@@ -242,6 +251,7 @@ YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8) ...@@ -242,6 +251,7 @@ YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
dst_y + n * BPP, width & MASK); \ dst_y + n * BPP, width & MASK); \
} }
// Attenuate is destructive so last16 method can not be used due to overlap.
#ifdef HAS_ARGBATTENUATEROW_SSSE3 #ifdef HAS_ARGBATTENUATEROW_SSSE3
YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C, YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C,
4, 4, 3) 4, 4, 3)
...@@ -268,6 +278,8 @@ YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C, ...@@ -268,6 +278,8 @@ YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C,
#endif #endif
#undef YANY #undef YANY
// RGB/YUV to UV does multiple of 16 with SIMD and remainder with C. // RGB/YUV to UV does multiple of 16 with SIMD and remainder with C.
#define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \ #define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \
void NAMEANY(const uint8* src_argb, int src_stride_argb, \ void NAMEANY(const uint8* src_argb, int src_stride_argb, \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment