Commit f3fb7b69 authored by fbarchard@google.com's avatar fbarchard@google.com

Lint cleanup for Neon and Visual C inline.

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/464002

git-svn-id: http://libyuv.googlecode.com/svn/trunk@231 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 5b22506b
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 230 Version: 231
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION 230 #define INCLUDE_LIBYUV_VERSION 231
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -78,7 +78,7 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, ...@@ -78,7 +78,7 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
movdqa xmm0, [eax] movdqa xmm0, [eax]
pavgb xmm0, [eax + edx] pavgb xmm0, [eax + edx]
sub ecx, 16 sub ecx, 16
movdqa [eax + edi], xmm0 // NOLINT movdqa [eax + edi], xmm0
lea eax, [eax + 16] lea eax, [eax + 16]
jg convertloop jg convertloop
pop edi pop edi
...@@ -475,7 +475,7 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2, ...@@ -475,7 +475,7 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
pand xmm2, xmm5 // even bytes are Y pand xmm2, xmm5 // even bytes are Y
pand xmm3, xmm5 pand xmm3, xmm5
packuswb xmm2, xmm3 packuswb xmm2, xmm3
movdqa [edx], xmm2 // NOLINT movdqa [edx], xmm2
lea edx, [edx + 16] lea edx, [edx + 16]
psrlw xmm0, 8 // YUYV -> UVUV psrlw xmm0, 8 // YUYV -> UVUV
psrlw xmm1, 8 psrlw xmm1, 8
...@@ -483,12 +483,12 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2, ...@@ -483,12 +483,12 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
movdqa xmm1, xmm0 movdqa xmm1, xmm0
pand xmm0, xmm5 // U pand xmm0, xmm5 // U
packuswb xmm0, xmm0 packuswb xmm0, xmm0
movq qword ptr [esi], xmm0 // NOLINT movq qword ptr [esi], xmm0
lea esi, [esi + 8] lea esi, [esi + 8]
psrlw xmm1, 8 // V psrlw xmm1, 8 // V
packuswb xmm1, xmm1 packuswb xmm1, xmm1
sub ecx, 16 sub ecx, 16
movq qword ptr [edi], xmm1 // NOLINT movq qword ptr [edi], xmm1
lea edi, [edi + 8] lea edi, [edi + 8]
jg convertloop jg convertloop
......
...@@ -283,7 +283,7 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y, ...@@ -283,7 +283,7 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y,
ret ret
} }
} }
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) #elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
#define HAS_I42XTOYUY2ROW_SSE2 #define HAS_I42XTOYUY2ROW_SSE2
static void I42xToYUY2Row_SSE2(const uint8* src_y, static void I42xToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
...@@ -355,7 +355,9 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y, ...@@ -355,7 +355,9 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y,
} }
#endif #endif
static void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, static void I42xToYUY2Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) { uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) { for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_y[0]; dst_frame[0] = src_y[0];
...@@ -375,7 +377,9 @@ static void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* ...@@ -375,7 +377,9 @@ static void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8*
} }
} }
static void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, static void I42xToUYVYRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) { uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) { for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_u[0]; dst_frame[0] = src_u[0];
...@@ -406,7 +410,7 @@ static void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* ...@@ -406,7 +410,7 @@ static void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8*
#endif #endif
#ifdef LIBYUV_LITTLE_ENDIAN #ifdef LIBYUV_LITTLE_ENDIAN
#define WRITEWORD(p, v) (*((uint32*) (p))) = v #define WRITEWORD(p, v) *reinterpret_cast<uint32*>(p) = v
#else #else
static inline void WRITEWORD(uint8* p, uint32 v) { static inline void WRITEWORD(uint8* p, uint32 v) {
...@@ -453,8 +457,8 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y, ...@@ -453,8 +457,8 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
dst_stride_frame = -dst_stride_frame; dst_stride_frame = -dst_stride_frame;
} }
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width); const uint8* src_v, uint8* dst_frame, int width) =
I42xToYUY2Row = I42xToYUY2Row_C; I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2) #if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
...@@ -489,8 +493,8 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y, ...@@ -489,8 +493,8 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
dst_stride_frame = -dst_stride_frame; dst_stride_frame = -dst_stride_frame;
} }
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width); const uint8* src_v, uint8* dst_frame, int width) =
I42xToYUY2Row = I42xToYUY2Row_C; I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2) #if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
...@@ -531,8 +535,8 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y, ...@@ -531,8 +535,8 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
dst_stride_frame = -dst_stride_frame; dst_stride_frame = -dst_stride_frame;
} }
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width); const uint8* src_v, uint8* dst_frame, int width) =
I42xToUYVYRow = I42xToUYVYRow_C; I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2) #if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
...@@ -567,8 +571,8 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y, ...@@ -567,8 +571,8 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
dst_stride_frame = -dst_stride_frame; dst_stride_frame = -dst_stride_frame;
} }
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width); const uint8* src_v, uint8* dst_frame, int width) =
I42xToUYVYRow = I42xToUYVYRow_C; I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2) #if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
...@@ -598,7 +602,7 @@ int I420ToV210(const uint8* src_y, int src_stride_y, ...@@ -598,7 +602,7 @@ int I420ToV210(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame, uint8* dst_frame, int dst_stride_frame,
int width, int height) { int width, int height) {
if (width * 16 / 6 > kMaxStride || // row buffer of V210 is required if (width * 16 / 6 > kMaxStride || // Row buffer of V210 is required.
src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1; return -1;
} }
...@@ -614,8 +618,8 @@ int I420ToV210(const uint8* src_y, int src_stride_y, ...@@ -614,8 +618,8 @@ int I420ToV210(const uint8* src_y, int src_stride_y,
UYVYToV210Row = UYVYToV210Row_C; UYVYToV210Row = UYVYToV210Row_C;
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width); const uint8* src_v, uint8* dst_frame, int width) =
I42xToUYVYRow = I42xToUYVYRow_C; I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2) #if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
...@@ -658,14 +662,14 @@ int I420ToARGB(const uint8* src_y, int src_stride_y, ...@@ -658,14 +662,14 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON) #if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_Any_NEON; I420ToARGBRow = I420ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I420ToARGBRow = I420ToARGBRow_NEON; I420ToARGBRow = I420ToARGBRow_NEON;
} }
} else }
#elif defined(HAS_I420TOARGBROW_SSSE3) #elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_Any_SSSE3; I420ToARGBRow = I420ToARGBRow_Any_SSSE3;
...@@ -673,11 +677,9 @@ int I420ToARGB(const uint8* src_y, int src_stride_y, ...@@ -673,11 +677,9 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I420ToARGBRow = I420ToARGBRow_SSSE3; I420ToARGBRow = I420ToARGBRow_SSSE3;
} }
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
} }
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, dst_argb, width); I420ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
...@@ -706,14 +708,14 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y, ...@@ -706,14 +708,14 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width) = I420ToBGRARow_C;
#if defined(HAS_I420TOBGRAROW_NEON) #if defined(HAS_I420TOBGRAROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
I420ToBGRARow = I420ToBGRARow_Any_NEON; I420ToBGRARow = I420ToBGRARow_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I420ToBGRARow = I420ToBGRARow_NEON; I420ToBGRARow = I420ToBGRARow_NEON;
} }
} else }
#elif defined(HAS_I420TOBGRAROW_SSSE3) #elif defined(HAS_I420TOBGRAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToBGRARow = I420ToBGRARow_Any_SSSE3; I420ToBGRARow = I420ToBGRARow_Any_SSSE3;
...@@ -721,11 +723,9 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y, ...@@ -721,11 +723,9 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) { IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
I420ToBGRARow = I420ToBGRARow_SSSE3; I420ToBGRARow = I420ToBGRARow_SSSE3;
} }
} else
#endif
{
I420ToBGRARow = I420ToBGRARow_C;
} }
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I420ToBGRARow(src_y, src_u, src_v, dst_bgra, width); I420ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
dst_bgra += dst_stride_bgra; dst_bgra += dst_stride_bgra;
...@@ -754,14 +754,14 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, ...@@ -754,14 +754,14 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width) = I420ToABGRRow_C;
#if defined(HAS_I420TOABGRROW_NEON) #if defined(HAS_I420TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
I420ToABGRRow = I420ToABGRRow_Any_NEON; I420ToABGRRow = I420ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I420ToABGRRow = I420ToABGRRow_NEON; I420ToABGRRow = I420ToABGRRow_NEON;
} }
} else }
#elif defined(HAS_I420TOABGRROW_SSSE3) #elif defined(HAS_I420TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToABGRRow = I420ToABGRRow_Any_SSSE3; I420ToABGRRow = I420ToABGRRow_Any_SSSE3;
...@@ -769,11 +769,9 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, ...@@ -769,11 +769,9 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) { IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
I420ToABGRRow = I420ToABGRRow_SSSE3; I420ToABGRRow = I420ToABGRRow_SSSE3;
} }
} else
#endif
{
I420ToABGRRow = I420ToABGRRow_C;
} }
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I420ToABGRRow(src_y, src_u, src_v, dst_abgr, width); I420ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
dst_abgr += dst_stride_abgr; dst_abgr += dst_stride_abgr;
...@@ -802,22 +800,20 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y, ...@@ -802,22 +800,20 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON) #if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON; I420ToARGBRow = I420ToARGBRow_NEON;
} else }
#elif defined(HAS_I420TOARGBROW_SSSE3) #elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3; I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
} }
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix); void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRGB24Row_C;
#if defined(HAS_ARGBTORGB24ROW_SSSE3) #if defined(HAS_ARGBTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3; ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
...@@ -825,11 +821,8 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y, ...@@ -825,11 +821,8 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3; ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
} }
} else
#endif
{
ARGBToRGB24Row = ARGBToRGB24Row_C;
} }
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width); I420ToARGBRow(src_y, src_u, src_v, row, width);
...@@ -860,22 +853,20 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, ...@@ -860,22 +853,20 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON) #if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON; I420ToARGBRow = I420ToARGBRow_NEON;
} else }
#elif defined(HAS_I420TOARGBROW_SSSE3) #elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3; I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
} }
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix); void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRAWRow_C;
#if defined(HAS_ARGBTORAWROW_SSSE3) #if defined(HAS_ARGBTORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3; ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
...@@ -883,11 +874,8 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, ...@@ -883,11 +874,8 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRAWRow = ARGBToRAWRow_SSSE3; ARGBToRAWRow = ARGBToRAWRow_SSSE3;
} }
} else
#endif
{
ARGBToRAWRow = ARGBToRAWRow_C;
} }
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width); I420ToARGBRow(src_y, src_u, src_v, row, width);
...@@ -918,33 +906,28 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -918,33 +906,28 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON) #if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON; I420ToARGBRow = I420ToARGBRow_NEON;
} else }
#elif defined(HAS_I420TOARGBROW_SSSE3) #elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3; I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
} }
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix); void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix) =
ARGBToRGB565Row_C;
#if defined(HAS_ARGBTORGB565ROW_SSE2) #if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2; ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2; ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
} }
} else
#endif
{
ARGBToRGB565Row = ARGBToRGB565Row_C;
} }
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width); I420ToARGBRow(src_y, src_u, src_v, row, width);
...@@ -975,33 +958,28 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y, ...@@ -975,33 +958,28 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON) #if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON; I420ToARGBRow = I420ToARGBRow_NEON;
} else }
#elif defined(HAS_I420TOARGBROW_SSSE3) #elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3; I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
} }
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix); void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToARGB1555Row_C;
#if defined(HAS_ARGBTOARGB1555ROW_SSE2) #if defined(HAS_ARGBTOARGB1555ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2; ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
} }
} else
#endif
{
ARGBToARGB1555Row = ARGBToARGB1555Row_C;
} }
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width); I420ToARGBRow(src_y, src_u, src_v, row, width);
...@@ -1032,33 +1010,28 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, ...@@ -1032,33 +1010,28 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON) #if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON; I420ToARGBRow = I420ToARGBRow_NEON;
} else }
#elif defined(HAS_I420TOARGBROW_SSSE3) #elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3; I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
} }
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix); void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToARGB4444Row_C;
#if defined(HAS_ARGBTOARGB4444ROW_SSE2) #if defined(HAS_ARGBTOARGB4444ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2; ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
} }
} else
#endif
{
ARGBToARGB4444Row = ARGBToARGB4444Row_C;
} }
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width); I420ToARGBRow(src_y, src_u, src_v, row, width);
...@@ -1080,7 +1053,6 @@ int ConvertFromI420(const uint8* y, int y_stride, ...@@ -1080,7 +1053,6 @@ int ConvertFromI420(const uint8* y, int y_stride,
uint8* dst_sample, int dst_sample_stride, uint8* dst_sample, int dst_sample_stride,
int width, int height, int width, int height,
uint32 format) { uint32 format) {
if (y == NULL || u == NULL || v == NULL || dst_sample == NULL) { if (y == NULL || u == NULL || v == NULL || dst_sample == NULL) {
return -1; return -1;
} }
......
...@@ -48,7 +48,7 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb, ...@@ -48,7 +48,7 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb,
} }
} }
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) #elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
#define HAS_ARGBTOBAYERROW_SSSE3 #define HAS_ARGBTOBAYERROW_SSSE3
static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
...@@ -141,18 +141,15 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb, ...@@ -141,18 +141,15 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb; src_stride_argb = -src_stride_argb;
} }
void (*ARGBToBayerRow)(const uint8* src_argb, void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint8* dst_bayer, uint32 selector, int pix); uint32 selector, int pix) = ARGBToBayerRow_C;
#if defined(HAS_ARGBTOBAYERROW_SSSE3) #if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 4) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3; ARGBToBayerRow = ARGBToBayerRow_SSSE3;
} else
#endif
{
ARGBToBayerRow = ARGBToBayerRow_C;
} }
#endif
const int blue_index = 0; // Offsets for ARGB format const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1; const int green_index = 1;
const int red_index = 2; const int red_index = 2;
...@@ -170,7 +167,7 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb, ...@@ -170,7 +167,7 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
return 0; return 0;
} }
#define AVG(a,b) (((a) + (b)) >> 1) #define AVG(a, b) (((a) + (b)) >> 1)
static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer, static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) { uint8* dst_argb, int pix) {
...@@ -369,9 +366,10 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer, ...@@ -369,9 +366,10 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix); uint8* dst_argb, int pix);
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer, void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix); uint8* dst_argb, int pix);
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
SIMD_ALIGNED(uint8 row[kMaxStride * 2]); SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
#if defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
...@@ -379,19 +377,13 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer, ...@@ -379,19 +377,13 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} else
#endif
{
ARGBToYRow = ARGBToYRow_C;
} }
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
} else
#endif
{
ARGBToUVRow = ARGBToUVRow_C;
} }
#endif
switch (src_fourcc_bayer) { switch (src_fourcc_bayer) {
case FOURCC_BGGR: case FOURCC_BGGR:
...@@ -456,30 +448,24 @@ int I420ToBayer(const uint8* src_y, int src_stride_y, ...@@ -456,30 +448,24 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON) #if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON; I420ToARGBRow = I420ToARGBRow_NEON;
} else }
#elif defined(HAS_I420TOARGBROW_SSSE3) #elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3; I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
} }
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToBayerRow)(const uint8* src_argb, void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint8* dst_bayer, uint32 selector, int pix); uint32 selector, int pix) = ARGBToBayerRow_C;
#if defined(HAS_ARGBTOBAYERROW_SSSE3) #if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3; ARGBToBayerRow = ARGBToBayerRow_SSSE3;
} else
#endif
{
ARGBToBayerRow = ARGBToBayerRow_C;
} }
#endif
const int blue_index = 0; // Offsets for ARGB format const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1; const int green_index = 1;
const int red_index = 2; const int red_index = 2;
......
...@@ -29,7 +29,7 @@ extern "C" { ...@@ -29,7 +29,7 @@ extern "C" {
".private_extern _" #name " \n" \ ".private_extern _" #name " \n" \
".align 4,0x90 \n" \ ".align 4,0x90 \n" \
"_" #name ": \n" "_" #name ": \n"
#elif (defined(__MINGW32__) || defined(__CYGWIN__)) && defined(__i386__) #elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
#define DECLARE_FUNCTION(name) \ #define DECLARE_FUNCTION(name) \
".text \n" \ ".text \n" \
".align 4,0x90 \n" \ ".align 4,0x90 \n" \
...@@ -42,16 +42,6 @@ extern "C" { ...@@ -42,16 +42,6 @@ extern "C" {
#endif #endif
#endif #endif
typedef void (*mirror_uv_func)(const uint8*, uint8*, uint8*, int);
typedef void (*rotate_uv_wx8_func)(const uint8*, int,
uint8*, int,
uint8*, int, int);
typedef void (*rotate_uv_wxh_func)(const uint8*, int,
uint8*, int,
uint8*, int, int, int);
typedef void (*rotate_wx8_func)(const uint8*, int, uint8*, int, int);
typedef void (*rotate_wxh_func)(const uint8*, int, uint8*, int, int, int);
#ifdef __ARM_NEON__ #ifdef __ARM_NEON__
#define HAS_MIRRORROW_NEON #define HAS_MIRRORROW_NEON
void MirrorRow_NEON(const uint8* src, uint8* dst, int width); void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
...@@ -291,7 +281,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride, ...@@ -291,7 +281,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
ret ret
} }
} }
#elif (defined(__i386__) || defined(__x86_64__)) && !defined(YUV_DISABLE_ASM) #elif defined(__i386__) || defined(__x86_64__) && !defined(YUV_DISABLE_ASM)
#define HAS_TRANSPOSE_WX8_SSSE3 #define HAS_TRANSPOSE_WX8_SSSE3
static void TransposeWx8_SSSE3(const uint8* src, int src_stride, static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) { uint8* dst, int dst_stride, int width) {
...@@ -501,7 +491,7 @@ extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride, ...@@ -501,7 +491,7 @@ extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
"pop %ebx \n" "pop %ebx \n"
"ret \n" "ret \n"
); );
#elif defined (__x86_64__) #elif defined(__x86_64__)
// 64 bit version has enough registers to do 16x8 to 8x16 at a time. // 64 bit version has enough registers to do 16x8 to 8x16 at a time.
#define HAS_TRANSPOSE_WX8_FAST_SSSE3 #define HAS_TRANSPOSE_WX8_FAST_SSSE3
static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride, static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
...@@ -781,45 +771,37 @@ static void TransposeWxH_C(const uint8* src, int src_stride, ...@@ -781,45 +771,37 @@ static void TransposeWxH_C(const uint8* src, int src_stride,
void TransposePlane(const uint8* src, int src_stride, void TransposePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height) { int width, int height) {
int i = height; void (*TransposeWx8)(const uint8* src, int src_stride,
rotate_wx8_func TransposeWx8; uint8* dst, int dst_stride,
rotate_wxh_func TransposeWxH; int width) = TransposeWx8_C;
#if defined(HAS_TRANSPOSE_WX8_NEON) #if defined(HAS_TRANSPOSE_WX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
TransposeWx8 = TransposeWx8_NEON; TransposeWx8 = TransposeWx8_NEON;
TransposeWxH = TransposeWxH_C; }
} else #endif
#if defined(HAS_TRANSPOSE_WX8_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
TransposeWx8 = TransposeWx8_SSSE3;
}
#endif #endif
#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3) #if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
TransposeWx8 = TransposeWx8_FAST_SSSE3; TransposeWx8 = TransposeWx8_FAST_SSSE3;
TransposeWxH = TransposeWxH_C;
} else
#endif
#if defined(HAS_TRANSPOSE_WX8_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
TransposeWx8 = TransposeWx8_SSSE3;
TransposeWxH = TransposeWxH_C;
} else
#endif
{
TransposeWx8 = TransposeWx8_C;
TransposeWxH = TransposeWxH_C;
} }
#endif
// work across the source in 8x8 tiles // Work across the source in 8x8 tiles
int i = height;
while (i >= 8) { while (i >= 8) {
TransposeWx8(src, src_stride, dst, dst_stride, width); TransposeWx8(src, src_stride, dst, dst_stride, width);
src += 8 * src_stride; // Go down 8 rows.
src += 8 * src_stride; // go down 8 rows dst += 8; // Move over 8 columns.
dst += 8; // move over 8 columns
i -= 8; i -= 8;
} }
TransposeWxH(src, src_stride, dst, dst_stride, width, i); TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
} }
void RotatePlane90(const uint8* src, int src_stride, void RotatePlane90(const uint8* src, int src_stride,
...@@ -830,7 +812,6 @@ void RotatePlane90(const uint8* src, int src_stride, ...@@ -830,7 +812,6 @@ void RotatePlane90(const uint8* src, int src_stride,
// of the buffer and flip the sign of the source stride. // of the buffer and flip the sign of the source stride.
src += src_stride * (height - 1); src += src_stride * (height - 1);
src_stride = -src_stride; src_stride = -src_stride;
TransposePlane(src, src_stride, dst, dst_stride, width, height); TransposePlane(src, src_stride, dst, dst_stride, width, height);
} }
...@@ -842,26 +823,17 @@ void RotatePlane270(const uint8* src, int src_stride, ...@@ -842,26 +823,17 @@ void RotatePlane270(const uint8* src, int src_stride,
// of the buffer and flip the sign of the destination stride. // of the buffer and flip the sign of the destination stride.
dst += dst_stride * (width - 1); dst += dst_stride * (width - 1);
dst_stride = -dst_stride; dst_stride = -dst_stride;
TransposePlane(src, src_stride, dst, dst_stride, width, height); TransposePlane(src, src_stride, dst, dst_stride, width, height);
} }
void RotatePlane180(const uint8* src, int src_stride, void RotatePlane180(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height) { int width, int height) {
void (*MirrorRow)(const uint8* src, uint8* dst, int width); void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
#if defined(HAS_MIRRORROW_NEON) #if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
MirrorRow = MirrorRow_NEON; MirrorRow = MirrorRow_NEON;
} else }
#endif
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
MirrorRow = MirrorRow_SSSE3;
} else
#endif #endif
#if defined(HAS_MIRRORROW_SSE2) #if defined(HAS_MIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
...@@ -869,11 +841,16 @@ void RotatePlane180(const uint8* src, int src_stride, ...@@ -869,11 +841,16 @@ void RotatePlane180(const uint8* src, int src_stride,
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
MirrorRow = MirrorRow_SSE2; MirrorRow = MirrorRow_SSE2;
} else }
#endif #endif
{ #if defined(HAS_MIRRORROW_SSSE3)
MirrorRow = MirrorRow_C; if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
MirrorRow = MirrorRow_SSSE3;
} }
#endif
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_COPYROW_NEON) #if defined(HAS_COPYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) { if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) {
...@@ -915,8 +892,7 @@ static void TransposeUVWx8_C(const uint8* src, int src_stride, ...@@ -915,8 +892,7 @@ static void TransposeUVWx8_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a, uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, uint8* dst_b, int dst_stride_b,
int width) { int width) {
int i; for (int i = 0; i < width; ++i) {
for (i = 0; i < width; ++i) {
dst_a[0] = src[0 * src_stride + 0]; dst_a[0] = src[0 * src_stride + 0];
dst_b[0] = src[0 * src_stride + 1]; dst_b[0] = src[0 * src_stride + 1];
dst_a[1] = src[1 * src_stride + 0]; dst_a[1] = src[1 * src_stride + 0];
...@@ -943,9 +919,8 @@ static void TransposeUVWxH_C(const uint8* src, int src_stride, ...@@ -943,9 +919,8 @@ static void TransposeUVWxH_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a, uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, uint8* dst_b, int dst_stride_b,
int width, int height) { int width, int height) {
int i, j; for (int i = 0; i < width * 2; i += 2)
for (i = 0; i < width * 2; i += 2) for (int j = 0; j < height; ++j) {
for (j = 0; j < height; ++j) {
dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)]; dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1]; dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
} }
...@@ -955,47 +930,39 @@ void TransposeUV(const uint8* src, int src_stride, ...@@ -955,47 +930,39 @@ void TransposeUV(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a, uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, uint8* dst_b, int dst_stride_b,
int width, int height) { int width, int height) {
int i = height; void (*TransposeUVWx8)(const uint8* src, int src_stride,
rotate_uv_wx8_func TransposeWx8; uint8* dst_a, int dst_stride_a,
rotate_uv_wxh_func TransposeWxH; uint8* dst_b, int dst_stride_b,
int width) = TransposeUVWx8_C;
#if defined(HAS_TRANSPOSE_UVWX8_NEON) #if defined(HAS_TRANSPOSE_UVWX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
TransposeWx8 = TransposeUVWx8_NEON; TransposeUVWx8 = TransposeUVWx8_NEON;
TransposeWxH = TransposeUVWxH_C; }
} else #elif defined(HAS_TRANSPOSE_UVWX8_SSE2)
#endif
#if defined(HAS_TRANSPOSE_UVWX8_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 8) && IS_ALIGNED(width, 8) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
TransposeWx8 = TransposeUVWx8_SSE2; TransposeUVWx8 = TransposeUVWx8_SSE2;
TransposeWxH = TransposeUVWxH_C;
} else
#endif
{
TransposeWx8 = TransposeUVWx8_C;
TransposeWxH = TransposeUVWxH_C;
} }
#endif
// work through the source in 8x8 tiles // Work through the source in 8x8 tiles.
int i = height;
while (i >= 8) { while (i >= 8) {
TransposeWx8(src, src_stride, TransposeUVWx8(src, src_stride,
dst_a, dst_stride_a, dst_a, dst_stride_a,
dst_b, dst_stride_b, dst_b, dst_stride_b,
width); width);
src += 8 * src_stride; // Go down 8 rows.
src += 8 * src_stride; // go down 8 rows dst_a += 8; // Move over 8 columns.
dst_a += 8; // move over 8 columns dst_b += 8; // Move over 8 columns.
dst_b += 8; // move over 8 columns
i -= 8; i -= 8;
} }
TransposeWxH(src, src_stride, TransposeUVWxH_C(src, src_stride,
dst_a, dst_stride_a, dst_a, dst_stride_a,
dst_b, dst_stride_b, dst_b, dst_stride_b,
width, i); width, i);
} }
void RotateUV90(const uint8* src, int src_stride, void RotateUV90(const uint8* src, int src_stride,
...@@ -1031,29 +998,25 @@ void RotateUV180(const uint8* src, int src_stride, ...@@ -1031,29 +998,25 @@ void RotateUV180(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a, uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, uint8* dst_b, int dst_stride_b,
int width, int height) { int width, int height) {
mirror_uv_func MirrorRow; void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
MirrorRowUV_C;
#if defined(HAS_MIRRORROW_UV_NEON) #if defined(HAS_MIRRORROW_UV_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
MirrorRow = MirrorRowUV_NEON; MirrorRowUV = MirrorRowUV_NEON;
} else }
#endif #elif defined(HAS_MIRRORROW_UV_SSSE3)
#if defined(HAS_MIRRORROW_UV_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
MirrorRow = MirrorRowUV_SSSE3; MirrorRowUV = MirrorRowUV_SSSE3;
} else
#endif
{
MirrorRow = MirrorRowUV_C;
} }
#endif
dst_a += dst_stride_a * (height - 1); dst_a += dst_stride_a * (height - 1);
dst_b += dst_stride_b * (height - 1); dst_b += dst_stride_b * (height - 1);
for (int i = 0; i < height; ++i) { for (int i = 0; i < height; ++i) {
MirrorRow(src, dst_a, dst_b, width); MirrorRowUV(src, dst_a, dst_b, width);
src += src_stride; src += src_stride;
dst_a -= dst_stride_a; dst_a -= dst_stride_a;
dst_b -= dst_stride_b; dst_b -= dst_stride_b;
......
...@@ -471,7 +471,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) { ...@@ -471,7 +471,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
dst_argb[3] = 255u; dst_argb[3] = 255u;
} else { } else {
*reinterpret_cast<uint32*>(dst_argb) = *reinterpret_cast<uint32*>(dst_argb) =
*reinterpret_cast<uint32*>(src_argb); *reinterpret_cast<const uint32*>(src_argb);
} }
} }
a = src_argb[4 + 3]; a = src_argb[4 + 3];
...@@ -489,7 +489,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) { ...@@ -489,7 +489,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
dst_argb[4 + 3] = 255u; dst_argb[4 + 3] = 255u;
} else { } else {
*reinterpret_cast<uint32*>(dst_argb + 4) = *reinterpret_cast<uint32*>(dst_argb + 4) =
*reinterpret_cast<uint32*>(src_argb + 4); *reinterpret_cast<const uint32*>(src_argb + 4);
} }
} }
src_argb += 8; src_argb += 8;
...@@ -512,7 +512,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) { ...@@ -512,7 +512,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
dst_argb[3] = 255u; dst_argb[3] = 255u;
} else { } else {
*reinterpret_cast<uint32*>(dst_argb) = *reinterpret_cast<uint32*>(dst_argb) =
*reinterpret_cast<uint32*>(src_argb); *reinterpret_cast<const uint32*>(src_argb);
} }
} }
} }
......
...@@ -105,7 +105,7 @@ CONST uvec8 kShuffleMaskARGBToRGB24 = { ...@@ -105,7 +105,7 @@ CONST uvec8 kShuffleMaskARGBToRGB24 = {
// Shuffle table for converting ARGB to RAW. // Shuffle table for converting ARGB to RAW.
CONST uvec8 kShuffleMaskARGBToRAW = { CONST uvec8 kShuffleMaskARGBToRAW = {
2u, 1u,0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
}; };
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
...@@ -1728,6 +1728,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, ...@@ -1728,6 +1728,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
); );
} }
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
uint8* dst_y, int pix) { uint8* dst_y, int pix) {
asm volatile ( asm volatile (
......
...@@ -96,7 +96,7 @@ static const uvec8 kShuffleMaskARGBToRGB24 = { ...@@ -96,7 +96,7 @@ static const uvec8 kShuffleMaskARGBToRGB24 = {
// Shuffle table for converting ARGB to RAW. // Shuffle table for converting ARGB to RAW.
static const uvec8 kShuffleMaskARGBToRAW = { static const uvec8 kShuffleMaskARGBToRAW = {
2u, 1u,0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
}; };
__declspec(naked) __declspec(naked)
...@@ -1252,7 +1252,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; ...@@ -1252,7 +1252,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm psubw xmm1, kUVBiasG \ __asm psubw xmm1, kUVBiasG \
__asm psubw xmm2, kUVBiasR \ __asm psubw xmm2, kUVBiasR \
/* Step 2: Find Y contribution to 8 R,G,B values */ \ /* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] \ __asm movq xmm3, qword ptr [eax] /* NOLINT */ \
__asm lea eax, [eax + 8] \ __asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm4 \ __asm punpcklbw xmm3, xmm4 \
__asm psubsw xmm3, kYSub16 \ __asm psubsw xmm3, kYSub16 \
......
...@@ -269,14 +269,14 @@ static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride, ...@@ -269,14 +269,14 @@ static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride,
} }
#define HAS_SCALEROWDOWN38_NEON #define HAS_SCALEROWDOWN38_NEON
const uint8 shuf38[16] __attribute__ ((aligned(16))) = const uvec8 shuf38 =
{ 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 }; { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
const uint8 shuf38_2[16] __attribute__ ((aligned(16))) = const uvec8 shuf38_2 =
{ 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 }; { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 };
const unsigned short mult38_div6[8] __attribute__ ((aligned(16))) = const vec16 mult38_div6 =
{ 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 }; 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) = const vec16 mult38_div9 =
{ 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 }; 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
...@@ -566,11 +566,11 @@ static void ScaleFilterRows_NEON(uint8* dst_ptr, ...@@ -566,11 +566,11 @@ static void ScaleFilterRows_NEON(uint8* dst_ptr,
*/ */
// Constants for SSE2 code // Constants for SSE2 code
#elif (defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)) && \ #elif defined(_M_IX86) || defined(__i386__) || defined(__x86_64__) && \
!defined(YUV_DISABLE_ASM) !defined(YUV_DISABLE_ASM)
#if defined(_MSC_VER) #if defined(_MSC_VER)
#define TALIGN16(t, var) __declspec(align(16)) t _ ## var #define TALIGN16(t, var) __declspec(align(16)) t _ ## var
#elif (defined(__APPLE__) || defined(__MINGW32__) || defined(__CYGWIN__)) && \ #elif defined(__APPLE__) || defined(__MINGW32__) || defined(__CYGWIN__) && \
defined(__i386__) defined(__i386__)
#define TALIGN16(t, var) t var __attribute__((aligned(16))) #define TALIGN16(t, var) t var __attribute__((aligned(16)))
#else #else
...@@ -583,7 +583,7 @@ static void ScaleFilterRows_NEON(uint8* dst_ptr, ...@@ -583,7 +583,7 @@ static void ScaleFilterRows_NEON(uint8* dst_ptr,
".private_extern _" #name " \n" \ ".private_extern _" #name " \n" \
".align 4,0x90 \n" \ ".align 4,0x90 \n" \
"_" #name ": \n" "_" #name ": \n"
#elif (defined(__MINGW32__) || defined(__CYGWIN__)) && defined(__i386__) #elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
#define DECLARE_FUNCTION(name) \ #define DECLARE_FUNCTION(name) \
".text \n" \ ".text \n" \
".align 4,0x90 \n" \ ".align 4,0x90 \n" \
...@@ -1547,7 +1547,7 @@ static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -1547,7 +1547,7 @@ static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
} }
} }
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) #elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
// GCC versions of row functions are verbatim conversions from Visual C. // GCC versions of row functions are verbatim conversions from Visual C.
// Generated using gcc disassembly on Visual C object file: // Generated using gcc disassembly on Visual C object file:
...@@ -2910,7 +2910,8 @@ static void ScaleFilterCols34_C(uint8* dst_ptr, const uint8* src_ptr, ...@@ -2910,7 +2910,8 @@ static void ScaleFilterCols34_C(uint8* dst_ptr, const uint8* src_ptr,
#endif #endif
// (1-f)a + fb can be replaced with a + f(b-a) // (1-f)a + fb can be replaced with a + f(b-a)
#define BLENDER(a, b, f) ((int)(a) + ((f) * ((int)(b) - (int)(a)) >> 16)) #define BLENDER(a, b, f) (static_cast<int>(a) + \
((f) * (static_cast<int>(b) - static_cast<int>(a)) >> 16))
static void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, static void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) { int dst_width, int x, int dx) {
...@@ -3067,24 +3068,22 @@ static void ScalePlaneDown2(int src_width, int src_height, ...@@ -3067,24 +3068,22 @@ static void ScalePlaneDown2(int src_width, int src_height,
assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2)); assert(IS_ALIGNED(src_height, 2));
void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride, void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width) =
filtering ? ScaleRowDown2Int_C : ScaleRowDown2_C;
#if defined(HAS_SCALEROWDOWN2_NEON) #if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(dst_width, 16)) { IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering ? ScaleRowDown2Int_NEON : ScaleRowDown2_NEON; ScaleRowDown2 = filtering ? ScaleRowDown2Int_NEON : ScaleRowDown2_NEON;
} else }
#endif #elif defined(HAS_SCALEROWDOWN2_SSE2)
#if defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 16) && IS_ALIGNED(dst_width, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleRowDown2 = filtering ? ScaleRowDown2Int_SSE2 : ScaleRowDown2_SSE2; ScaleRowDown2 = filtering ? ScaleRowDown2Int_SSE2 : ScaleRowDown2_SSE2;
} else
#endif
{
ScaleRowDown2 = filtering ? ScaleRowDown2Int_C : ScaleRowDown2_C;
} }
#endif
// TODO(fbarchard): Loop through source height to allow odd height. // TODO(fbarchard): Loop through source height to allow odd height.
for (int y = 0; y < dst_height; ++y) { for (int y = 0; y < dst_height; ++y) {
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
...@@ -3107,23 +3106,21 @@ static void ScalePlaneDown4(int src_width, int src_height, ...@@ -3107,23 +3106,21 @@ static void ScalePlaneDown4(int src_width, int src_height,
assert(IS_ALIGNED(src_width, 4)); assert(IS_ALIGNED(src_width, 4));
assert(IS_ALIGNED(src_height, 4)); assert(IS_ALIGNED(src_height, 4));
void (*ScaleRowDown4)(const uint8* src_ptr, int src_stride, void (*ScaleRowDown4)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Int_C : ScaleRowDown4_C;
#if defined(HAS_SCALEROWDOWN4_NEON) #if defined(HAS_SCALEROWDOWN4_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(dst_width, 4)) { IS_ALIGNED(dst_width, 4)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Int_NEON : ScaleRowDown4_NEON; ScaleRowDown4 = filtering ? ScaleRowDown4Int_NEON : ScaleRowDown4_NEON;
} else }
#endif #elif defined(HAS_SCALEROWDOWN4_SSE2)
#if defined(HAS_SCALEROWDOWN4_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 8) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Int_SSE2 : ScaleRowDown4_SSE2; ScaleRowDown4 = filtering ? ScaleRowDown4Int_SSE2 : ScaleRowDown4_SSE2;
} else
#endif
{
ScaleRowDown4 = filtering ? ScaleRowDown4Int_C : ScaleRowDown4_C;
} }
#endif
for (int y = 0; y < dst_height; ++y) { for (int y = 0; y < dst_height; ++y) {
ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += (src_stride << 2); src_ptr += (src_stride << 2);
...@@ -3146,18 +3143,17 @@ static void ScalePlaneDown8(int src_width, int src_height, ...@@ -3146,18 +3143,17 @@ static void ScalePlaneDown8(int src_width, int src_height,
assert(IS_ALIGNED(src_width, 8)); assert(IS_ALIGNED(src_width, 8));
assert(IS_ALIGNED(src_height, 8)); assert(IS_ALIGNED(src_height, 8));
void (*ScaleRowDown8)(const uint8* src_ptr, int src_stride, void (*ScaleRowDown8)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width) =
filtering && (dst_width <= kMaxOutputWidth) ?
ScaleRowDown8Int_C : ScaleRowDown8_C;
#if defined(HAS_SCALEROWDOWN8_SSE2) #if defined(HAS_SCALEROWDOWN8_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 4) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown8 = filtering ? ScaleRowDown8Int_SSE2 : ScaleRowDown8_SSE2; ScaleRowDown8 = filtering ? ScaleRowDown8Int_SSE2 : ScaleRowDown8_SSE2;
} else
#endif
{
ScaleRowDown8 = filtering && (dst_width <= kMaxOutputWidth) ?
ScaleRowDown8Int_C : ScaleRowDown8_C;
} }
#endif
for (int y = 0; y < dst_height; ++y) { for (int y = 0; y < dst_height; ++y) {
ScaleRowDown8(src_ptr, src_stride, dst_ptr, dst_width); ScaleRowDown8(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += (src_stride << 3); src_ptr += (src_stride << 3);
...@@ -3181,6 +3177,13 @@ static void ScalePlaneDown34(int src_width, int src_height, ...@@ -3181,6 +3177,13 @@ static void ScalePlaneDown34(int src_width, int src_height,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void (*ScaleRowDown34_1)(const uint8* src_ptr, int src_stride, void (*ScaleRowDown34_1)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_C;
ScaleRowDown34_1 = ScaleRowDown34_C;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_C;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_C;
}
#if defined(HAS_SCALEROWDOWN34_NEON) #if defined(HAS_SCALEROWDOWN34_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
if (!filtering) { if (!filtering) {
...@@ -3190,36 +3193,27 @@ static void ScalePlaneDown34(int src_width, int src_height, ...@@ -3190,36 +3193,27 @@ static void ScalePlaneDown34(int src_width, int src_height,
ScaleRowDown34_0 = ScaleRowDown34_0_Int_NEON; ScaleRowDown34_0 = ScaleRowDown34_0_Int_NEON;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_NEON; ScaleRowDown34_1 = ScaleRowDown34_1_Int_NEON;
} }
} else
#endif
#if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSSE3;
} }
} else
#endif #endif
#if defined(HAS_SCALEROWDOWN34_SSE2) #if defined(HAS_SCALEROWDOWN34_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && (dst_width % 24 == 0) && if (TestCpuFlag(kCpuHasSSE2) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && filtering) { IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && filtering) {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSE2; ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSE2;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSE2; ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSE2;
} else }
#endif #endif
{ #if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) { if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_C; ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_C; ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
} else { } else {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_C; ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_C; ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSSE3;
} }
} }
#endif
for (int y = 0; y < dst_height - 2; y += 3) { for (int y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width); ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width);
...@@ -3272,6 +3266,13 @@ static void ScalePlaneDown38(int src_width, int src_height, ...@@ -3272,6 +3266,13 @@ static void ScalePlaneDown38(int src_width, int src_height,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void (*ScaleRowDown38_2)(const uint8* src_ptr, int src_stride, void (*ScaleRowDown38_2)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_C;
ScaleRowDown38_2 = ScaleRowDown38_C;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Int_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Int_C;
}
#if defined(HAS_SCALEROWDOWN38_NEON) #if defined(HAS_SCALEROWDOWN38_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
if (!filtering) { if (!filtering) {
...@@ -3281,9 +3282,8 @@ static void ScalePlaneDown38(int src_width, int src_height, ...@@ -3281,9 +3282,8 @@ static void ScalePlaneDown38(int src_width, int src_height,
ScaleRowDown38_3 = ScaleRowDown38_3_Int_NEON; ScaleRowDown38_3 = ScaleRowDown38_3_Int_NEON;
ScaleRowDown38_2 = ScaleRowDown38_2_Int_NEON; ScaleRowDown38_2 = ScaleRowDown38_2_Int_NEON;
} }
} else }
#endif #elif defined(HAS_SCALEROWDOWN38_SSSE3)
#if defined(HAS_SCALEROWDOWN38_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) { if (!filtering) {
...@@ -3293,17 +3293,9 @@ static void ScalePlaneDown38(int src_width, int src_height, ...@@ -3293,17 +3293,9 @@ static void ScalePlaneDown38(int src_width, int src_height,
ScaleRowDown38_3 = ScaleRowDown38_3_Int_SSSE3; ScaleRowDown38_3 = ScaleRowDown38_3_Int_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Int_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_2_Int_SSSE3;
} }
} else
#endif
{
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_C;
ScaleRowDown38_2 = ScaleRowDown38_C;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Int_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Int_C;
}
} }
#endif
for (int y = 0; y < dst_height - 2; y += 3) { for (int y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width); ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3; src_ptr += src_stride * 3;
...@@ -3426,23 +3418,21 @@ static void ScalePlaneBox(int src_width, int src_height, ...@@ -3426,23 +3418,21 @@ static void ScalePlaneBox(int src_width, int src_height,
} else { } else {
ALIGN16(uint16 row[kMaxInputWidth]); ALIGN16(uint16 row[kMaxInputWidth]);
void (*ScaleAddRows)(const uint8* src_ptr, int src_stride, void (*ScaleAddRows)(const uint8* src_ptr, int src_stride,
uint16* dst_ptr, int src_width, int src_height); uint16* dst_ptr, int src_width, int src_height)=
ScaleAddRows_C;
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr); const uint16* src_ptr, uint8* dst_ptr);
#if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
ScaleAddRows = ScaleAddRows_SSE2;
} else
#endif
{
ScaleAddRows = ScaleAddRows_C;
}
if (dx & 0xffff) { if (dx & 0xffff) {
ScaleAddCols = ScaleAddCols2_C; ScaleAddCols = ScaleAddCols2_C;
} else { } else {
ScaleAddCols = ScaleAddCols1_C; ScaleAddCols = ScaleAddCols1_C;
} }
#if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
ScaleAddRows = ScaleAddRows_SSE2;
}
#endif
for (int j = 0; j < dst_height; ++j) { for (int j = 0; j < dst_height; ++j) {
int iy = y >> 16; int iy = y >> 16;
...@@ -3518,27 +3508,25 @@ void ScalePlaneBilinear(int src_width, int src_height, ...@@ -3518,27 +3508,25 @@ void ScalePlaneBilinear(int src_width, int src_height,
ALIGN16(uint8 row[kMaxInputWidth + 1]); ALIGN16(uint8 row[kMaxInputWidth + 1]);
void (*ScaleFilterRows)(uint8* dst_ptr, const uint8* src_ptr, void (*ScaleFilterRows)(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int src_stride,
int dst_width, int source_y_fraction); int dst_width, int source_y_fraction) =
ScaleFilterRows_C;
#if defined(HAS_SCALEFILTERROWS_NEON) #if defined(HAS_SCALEFILTERROWS_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleFilterRows = ScaleFilterRows_NEON; ScaleFilterRows = ScaleFilterRows_NEON;
} else }
#endif
#if defined(HAS_SCALEFILTERROWS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
ScaleFilterRows = ScaleFilterRows_SSSE3;
} else
#endif #endif
#if defined(HAS_SCALEFILTERROWS_SSE2) #if defined(HAS_SCALEFILTERROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) { IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
ScaleFilterRows = ScaleFilterRows_SSE2; ScaleFilterRows = ScaleFilterRows_SSE2;
} else }
#endif #endif
{ #if defined(HAS_SCALEFILTERROWS_SSSE3)
ScaleFilterRows = ScaleFilterRows_C; if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
ScaleFilterRows = ScaleFilterRows_SSSE3;
} }
#endif
int dx = (src_width << 16) / dst_width; int dx = (src_width << 16) / dst_width;
int dy = (src_height << 16) / dst_height; int dy = (src_height << 16) / dst_height;
...@@ -3645,7 +3633,7 @@ void ScalePlane(const uint8* src, int src_stride, ...@@ -3645,7 +3633,7 @@ void ScalePlane(const uint8* src, int src_stride,
// environment variable overrides for testing. // environment variable overrides for testing.
char *filter_override = getenv("LIBYUV_FILTER"); char *filter_override = getenv("LIBYUV_FILTER");
if (filter_override) { if (filter_override) {
filtering = (FilterMode)atoi(filter_override); filtering = (FilterMode)atoi(filter_override); // NOLINT
} }
#endif #endif
// Use specialized scales to improve performance for common resolutions. // Use specialized scales to improve performance for common resolutions.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment