Commit 518833b9 authored by fbarchard@google.com's avatar fbarchard@google.com

Fix RGB565ToARGB_Any which uses SSE2 that requires ARGB alignment. Add row…

Fix RGB565ToARGB_Any which uses SSE2 that requires ARGB alignment.  Add row coalescing to convert_argb.cc.  Improve coalescing on planar_functions.cc and convert_from_argb.cc.  Use stride * 2 == width to test for even width.  Apply coalescing to all functions that have same vertical subsampling.
BUG=197
TESTED=libyuv unittest passes where _Opt uses row coalescing.
Review URL: https://webrtc-codereview.appspot.com/1186004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@601 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 4db10514
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 600
Version: 601
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 600
#define LIBYUV_VERSION 601
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -15,7 +15,6 @@
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
#include "libyuv/planar_functions.h"
#include "libyuv/rotate_argb.h"
#include "libyuv/row.h"
#include "libyuv/video_common.h"
......@@ -64,6 +63,17 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce contiguous rows.
if (src_stride_y == width &&
src_stride_u == width &&
src_stride_v == width &&
dst_stride_argb == width * 4) {
return I444ToARGB(src_y, 0,
src_u, 0,
src_v, 0,
dst_argb, 0,
width * height, 1);
}
void (*I444ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -116,6 +126,17 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce contiguous rows.
if (src_stride_y == width &&
src_stride_u * 2 == width &&
src_stride_v * 2 == width &&
dst_stride_argb == width * 4) {
return I422ToARGB(src_y, 0,
src_u, 0,
src_v, 0,
dst_argb, 0,
width * height, 1);
}
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -176,6 +197,17 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce contiguous rows.
if (src_stride_y == width &&
src_stride_u * 4 == width &&
src_stride_v * 4 == width &&
dst_stride_argb == width * 4) {
return I411ToARGB(src_y, 0,
src_u, 0,
src_v, 0,
dst_argb, 0,
width * height, 1);
}
void (*I411ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -225,6 +257,13 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce contiguous rows.
if (src_stride_y == width &&
dst_stride_argb == width * 4) {
return I400ToARGB_Reference(src_y, 0,
dst_argb, 0,
width * height, 1);
}
void (*YToARGBRow)(const uint8* y_buf,
uint8* rgb_buf,
int width) = YToARGBRow_C;
......@@ -268,6 +307,13 @@ int I400ToARGB(const uint8* src_y, int src_stride_y,
src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y;
}
// Coalesce contiguous rows.
if (src_stride_y == width &&
dst_stride_argb == width * 4) {
return I400ToARGB(src_y, 0,
dst_argb, 0,
width * height, 1);
}
void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
I400ToARGBRow_C;
#if defined(HAS_I400TOARGBROW_SSE2)
......@@ -359,6 +405,13 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
// Coalesce contiguous rows.
if (src_stride_rgb24 == width * 3 &&
dst_stride_argb == width * 4) {
return RGB24ToARGB(src_rgb24, 0,
dst_argb, 0,
width * height, 1);
}
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RGB24ToARGBRow_C;
#if defined(HAS_RGB24TOARGBROW_SSSE3)
......@@ -401,6 +454,13 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
// Coalesce contiguous rows.
if (src_stride_raw == width * 3 &&
dst_stride_argb == width * 4) {
return RAWToARGB(src_raw, 0,
dst_argb, 0,
width * height, 1);
}
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RAWToARGBRow_C;
#if defined(HAS_RAWTOARGBROW_SSSE3)
......@@ -443,6 +503,13 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
src_stride_rgb565 = -src_stride_rgb565;
}
// Coalesce contiguous rows.
if (src_stride_rgb565 == width * 2 &&
dst_stride_argb == width * 4) {
return RGB565ToARGB(src_rgb565, 0,
dst_argb, 0,
width * height, 1);
}
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
RGB565ToARGBRow_C;
#if defined(HAS_RGB565TOARGBROW_SSE2)
......@@ -485,6 +552,13 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
src_stride_argb1555 = -src_stride_argb1555;
}
// Coalesce contiguous rows.
if (src_stride_argb1555 == width * 2 &&
dst_stride_argb == width * 4) {
return ARGB1555ToARGB(src_argb1555, 0,
dst_argb, 0,
width * height, 1);
}
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
int pix) = ARGB1555ToARGBRow_C;
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
......@@ -527,6 +601,13 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
src_stride_argb4444 = -src_stride_argb4444;
}
// Coalesce contiguous rows.
if (src_stride_argb4444 == width * 2 &&
dst_stride_argb == width * 4) {
return ARGB4444ToARGB(src_argb4444, 0,
dst_argb, 0,
width * height, 1);
}
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
int pix) = ARGB4444ToARGBRow_C;
#if defined(HAS_ARGB4444TOARGBROW_SSE2)
......@@ -721,10 +802,19 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
src_stride_yuy2 = -src_stride_yuy2;
}
// Coalesce contiguous rows.
if (width * height <= kMaxStride &&
src_stride_yuy2 == width * 2 &&
dst_stride_argb == width * 4) {
return YUY2ToARGB(src_yuy2, 0,
dst_argb, 0,
width * height, 1);
}
void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
YUY2ToARGBRow_C;
#if defined(HAS_YUY2TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { // posix it 16, win is 8.
// Posix it 16, Windows is 8.
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 && width <= kMaxStride) {
YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
......@@ -765,10 +855,19 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
src_stride_uyvy = -src_stride_uyvy;
}
// Coalesce contiguous rows.
if (width * height <= kMaxStride &&
src_stride_uyvy == width * 2 &&
dst_stride_argb == width * 4) {
return UYVYToARGB(src_uyvy, 0,
dst_argb, 0,
width * height, 1);
}
void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
UYVYToARGBRow_C;
#if defined(HAS_UYVYTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { // posix it 16, win is 8.
// Posix it 16, Windows is 8.
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 && width <= kMaxStride) {
UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
......
......@@ -36,6 +36,17 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_y == width &&
dst_stride_u == width &&
dst_stride_v == width) {
return ARGBToI444(src_argb, 0,
dst_y, 0,
dst_u, 0,
dst_v, 0,
width * height, 1);
}
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
......@@ -100,6 +111,17 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_y == width &&
dst_stride_u * 2 == width &&
dst_stride_v * 2 == width) {
return ARGBToI422(src_argb, 0,
dst_y, 0,
dst_u, 0,
dst_v, 0,
width * height, 1);
}
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C;
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
......@@ -168,6 +190,17 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_y == width &&
dst_stride_u * 4 == width &&
dst_stride_v * 4 == width) {
return ARGBToI411(src_argb, 0,
dst_y, 0,
dst_u, 0,
dst_v, 0,
width * height, 1);
}
void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV411Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
......@@ -446,6 +479,14 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
dst_stride_yuy2 = -dst_stride_yuy2;
}
// Coalesce contiguous rows.
if (width * height <= kMaxStride &&
src_stride_argb == width * 4 &&
dst_stride_yuy2 == width * 2) {
return ARGBToYUY2(src_argb, 0,
dst_yuy2, 0,
width * height, 1);
}
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C;
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
......@@ -535,6 +576,14 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
dst_stride_uyvy = -dst_stride_uyvy;
}
// Coalesce contiguous rows.
if (width * height <= kMaxStride &&
src_stride_argb == width * 4 &&
dst_stride_uyvy == width * 2) {
return ARGBToUYVY(src_argb, 0,
dst_uyvy, 0,
width * height, 1);
}
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C;
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
......@@ -624,7 +673,9 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_y == width) {
return ARGBToI400(src_argb, 0, dst_y, 0, width * height, 1);
return ARGBToI400(src_argb, 0,
dst_y, 0,
width * height, 1);
}
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
......@@ -704,7 +755,9 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_rgb24 == width * 3) {
return ARGBToRGB24(src_argb, 0, dst_rgb24, 0, width * height, 1);
return ARGBToRGB24(src_argb, 0,
dst_rgb24, 0,
width * height, 1);
}
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRGB24Row_C;
......@@ -750,7 +803,9 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_raw == width * 3) {
return ARGBToRAW(src_argb, 0, dst_raw, 0, width * height, 1);
return ARGBToRAW(src_argb, 0,
dst_raw, 0,
width * height, 1);
}
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRAWRow_C;
......@@ -796,7 +851,9 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_rgb565 == width * 2) {
return ARGBToRGB565(src_argb, 0, dst_rgb565, 0, width * height, 1);
return ARGBToRGB565(src_argb, 0,
dst_rgb565, 0,
width * height, 1);
}
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRGB565Row_C;
......@@ -841,7 +898,9 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_argb1555 == width * 2) {
return ARGBToARGB1555(src_argb, 0, dst_argb1555, 0, width * height, 1);
return ARGBToARGB1555(src_argb, 0,
dst_argb1555, 0,
width * height, 1);
}
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToARGB1555Row_C;
......@@ -886,7 +945,9 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_argb4444 == width * 2) {
return ARGBToARGB4444(src_argb, 0, dst_argb4444, 0, width * height, 1);
return ARGBToARGB4444(src_argb, 0,
dst_argb4444, 0,
width * height, 1);
}
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToARGB4444Row_C;
......
......@@ -29,8 +29,11 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
// Coalesce contiguous rows.
if (src_stride_y == width && dst_stride_y == width) {
CopyPlane(src_y, 0, dst_y, 0, width * height, 1);
if (src_stride_y == width &&
dst_stride_y == width) {
CopyPlane(src_y, 0,
dst_y, 0,
width * height, 1);
return;
}
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
......@@ -228,12 +231,14 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
src_stride_yuy2 = -src_stride_yuy2;
}
// Coalesce contiguous rows.
if (IS_ALIGNED(width, 2) &&
src_stride_yuy2 == width * 2 &&
if (src_stride_yuy2 == width * 2 &&
dst_stride_y == width &&
dst_stride_u == (width + 1) / 2 &&
dst_stride_v == (width + 1) / 2) {
return YUY2ToI422(src_yuy2, 0, dst_y, 0, dst_u, 0, dst_v, 0,
dst_stride_u * 2 == width &&
dst_stride_v * 2 == width) {
return YUY2ToI422(src_yuy2, 0,
dst_y, 0,
dst_u, 0,
dst_v, 0,
width * height, 1);
}
void (*YUY2ToUV422Row)(const uint8* src_yuy2,
......@@ -314,12 +319,14 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
src_stride_uyvy = -src_stride_uyvy;
}
// Coalesce contiguous rows.
if (IS_ALIGNED(width, 2) &&
src_stride_uyvy == width * 2 &&
if (src_stride_uyvy == width * 2 &&
dst_stride_y == width &&
dst_stride_u == (width + 1) / 2 &&
dst_stride_v == (width + 1) / 2) {
return UYVYToI422(src_uyvy, 0, dst_y, 0, dst_u, 0, dst_v, 0,
dst_stride_u * 2 == width &&
dst_stride_v * 2 == width) {
return UYVYToI422(src_uyvy, 0,
dst_y, 0,
dst_u, 0,
dst_v, 0,
width * height, 1);
}
void (*UYVYToUV422Row)(const uint8* src_uyvy,
......@@ -793,10 +800,9 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
dst_stride_bgra = -dst_stride_bgra;
}
// Coalesce contiguous rows.
if (IS_ALIGNED(width, 2) &&
src_stride_y == width &&
src_stride_u == (width + 1) / 2 &&
src_stride_v == (width + 1) / 2 &&
if (src_stride_y == width &&
src_stride_u * 2 == width &&
src_stride_v * 2 == width &&
dst_stride_bgra == width * 4) {
return I422ToBGRA(src_y, 0,
src_u, 0,
......@@ -865,10 +871,9 @@ int I422ToABGR(const uint8* src_y, int src_stride_y,
dst_stride_abgr = -dst_stride_abgr;
}
// Coalesce contiguous rows.
if (IS_ALIGNED(width, 2) &&
src_stride_y == width &&
src_stride_u == (width + 1) / 2 &&
src_stride_v == (width + 1) / 2 &&
if (src_stride_y == width &&
src_stride_u * 2 == width &&
src_stride_v * 2 == width &&
dst_stride_abgr == width * 4) {
return I422ToABGR(src_y, 0,
src_u, 0,
......@@ -929,10 +934,9 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
dst_stride_rgba = -dst_stride_rgba;
}
// Coalesce contiguous rows.
if (IS_ALIGNED(width, 2) &&
src_stride_y == width &&
src_stride_u == (width + 1) / 2 &&
src_stride_v == (width + 1) / 2 &&
if (src_stride_y == width &&
src_stride_u * 2 == width &&
src_stride_v * 2 == width &&
dst_stride_rgba == width * 4) {
return I422ToRGBA(src_y, 0,
src_u, 0,
......@@ -1074,7 +1078,9 @@ void SetPlane(uint8* dst_y, int dst_stride_y,
uint32 value) {
// Coalesce contiguous rows.
if (dst_stride_y == width) {
SetPlane(dst_y, 0, width * height, 1, value);
SetPlane(dst_y, 0,
width * height, 1,
value);
return;
}
void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
......
......@@ -115,8 +115,6 @@ NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
#endif // HAS_NV12TORGB565ROW_NEON
#undef NVANY
// TODO(fbarchard): RGBANY use last 16 method.
// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
void NAMEANY(const uint8* src, \
uint8* dst, \
......@@ -145,6 +143,17 @@ RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C,
15, 2, 4)
RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C,
15, 2, 4)
// These require alignment on ARGB, so C is used for remainder.
RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C,
15, 3, 4)
RGBANY(RAWToARGBRow_Any_SSSE3,RAWToARGBRow_SSSE3, RAWToARGBRow_C,
15, 3, 4)
RGBANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, RGB565ToARGBRow_C,
7, 2, 4)
RGBANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, ARGB1555ToARGBRow_C,
7, 2, 4)
RGBANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, ARGB4444ToARGBRow_C,
7, 2, 4)
#endif
#if defined(HAS_ARGBTORGB24ROW_NEON)
RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
......@@ -188,30 +197,31 @@ BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
// TODO(fbarchard): Use last 16 method for all unsubsampled conversions.
#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM) \
#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_SIMDU, SBPP, BPP, NUM) \
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \
ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP, \
ARGBTOY_SIMDU(src_argb + (width - NUM) * SBPP, \
dst_y + (width - NUM) * BPP, NUM); \
}
#ifdef HAS_ARGBTOYROW_AVX2
YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 4, 1, 32)
YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 2, 1, 32)
YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32)
YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, ARGBToYRow_AVX2, 4, 1, 32)
YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, YUY2ToYRow_AVX2, 2, 1, 32)
YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, UYVYToYRow_AVX2, 2, 1, 32)
#endif
#ifdef HAS_ARGBTOYROW_SSSE3
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16)
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
YANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 3, 4, 16)
YANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 3, 4, 16)
YANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 2, 4, 8)
YANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 2, 4, 8)
YANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 2, 4, 8)
YANY(ARGBToYRow_Any_SSSE3,
ARGBToYRow_Unaligned_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(BGRAToYRow_Any_SSSE3,
BGRAToYRow_Unaligned_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(ABGRToYRow_Any_SSSE3,
ABGRToYRow_Unaligned_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(RGBAToYRow_Any_SSSE3,
RGBAToYRow_Unaligned_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(YUY2ToYRow_Any_SSE2,
YUY2ToYRow_Unaligned_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16)
YANY(UYVYToYRow_Any_SSE2,
UYVYToYRow_Unaligned_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
#endif
#ifdef HAS_ARGBTOYROW_NEON
YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
......@@ -233,7 +243,6 @@ YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
#endif
#undef YANY
// Attenuate is destructive so last16 method can not be used due to overlap.
#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
int n = width & ~MASK; \
......@@ -242,6 +251,7 @@ YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
dst_y + n * BPP, width & MASK); \
}
// Attenuate is destructive so last16 method can not be used due to overlap.
#ifdef HAS_ARGBATTENUATEROW_SSSE3
YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C,
4, 4, 3)
......@@ -268,6 +278,8 @@ YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C,
#endif
#undef YANY
// RGB/YUV to UV does multiple of 16 with SIMD and remainder with C.
#define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \
void NAMEANY(const uint8* src_argb, int src_stride_argb, \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment