Commit 1c5136d0 authored by fbarchard@google.com's avatar fbarchard@google.com

use IS_ALIGNED for width and stride to avoid mod that generates 6 instructions

BUG=none
TEST=disassemble to confirm smaller/simplier alignment checks
Review URL: http://webrtc-codereview.appspot.com/287001

git-svn-id: http://libyuv.googlecode.com/svn/trunk@91 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 3c00cf04
...@@ -204,7 +204,7 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, ...@@ -204,7 +204,7 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
const uint8* src_b, int count); const uint8* src_b, int count);
#if defined(HAS_SUMSQUAREERROR_NEON) #if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) &&
(width % 16 == 0)) { IS_ALIGNED(width, 16)) {
SumSquareError = SumSquareError_NEON; SumSquareError = SumSquareError_NEON;
} else } else
#endif #endif
......
...@@ -435,9 +435,9 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y, ...@@ -435,9 +435,9 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_v, uint8* dst_frame, int width); const uint8* src_v, uint8* dst_frame, int width);
#if defined(HAS_I42XTOYUY2ROW_SSE2) #if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && (src_stride_y % 16 == 0) && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && (dst_stride_frame % 16 == 0)) { IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2; I42xToYUY2Row = I42xToYUY2Row_SSE2;
} else } else
#endif #endif
...@@ -472,9 +472,9 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y, ...@@ -472,9 +472,9 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_v, uint8* dst_frame, int width); const uint8* src_v, uint8* dst_frame, int width);
#if defined(HAS_I42XTOYUY2ROW_SSE2) #if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && (src_stride_y % 16 == 0) && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && (dst_stride_frame % 16 == 0)) { IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2; I42xToYUY2Row = I42xToYUY2Row_SSE2;
} else } else
#endif #endif
...@@ -721,9 +721,9 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame, ...@@ -721,9 +721,9 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) { IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} else } else
#endif #endif
...@@ -732,10 +732,10 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame, ...@@ -732,10 +732,10 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame,
} }
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) && IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) { IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -774,9 +774,9 @@ int BGRAToI420(const uint8* src_frame, int src_stride_frame, ...@@ -774,9 +774,9 @@ int BGRAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_BGRATOYROW_SSSE3) #if defined(HAS_BGRATOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) { IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = BGRAToYRow_SSSE3; ARGBToYRow = BGRAToYRow_SSSE3;
} else } else
#endif #endif
...@@ -785,10 +785,10 @@ int BGRAToI420(const uint8* src_frame, int src_stride_frame, ...@@ -785,10 +785,10 @@ int BGRAToI420(const uint8* src_frame, int src_stride_frame,
} }
#if defined(HAS_BGRATOUVROW_SSSE3) #if defined(HAS_BGRATOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) && IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) { IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = BGRAToUVRow_SSSE3; ARGBToUVRow = BGRAToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -827,9 +827,9 @@ int ABGRToI420(const uint8* src_frame, int src_stride_frame, ...@@ -827,9 +827,9 @@ int ABGRToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_ABGRTOYROW_SSSE3) #if defined(HAS_ABGRTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) { IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ABGRToYRow_SSSE3; ARGBToYRow = ABGRToYRow_SSSE3;
} else } else
#endif #endif
...@@ -838,10 +838,10 @@ int ABGRToI420(const uint8* src_frame, int src_stride_frame, ...@@ -838,10 +838,10 @@ int ABGRToI420(const uint8* src_frame, int src_stride_frame,
} }
#if defined(HAS_ABGRTOUVROW_SSSE3) #if defined(HAS_ABGRTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) && IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) { IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ABGRToUVRow_SSSE3; ARGBToUVRow = ABGRToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -880,9 +880,9 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -880,9 +880,9 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_RGB24TOYROW_SSSE3) #if defined(HAS_RGB24TOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) { IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = RGB24ToYRow_SSSE3; ARGBToYRow = RGB24ToYRow_SSSE3;
} else } else
#endif #endif
...@@ -891,10 +891,10 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -891,10 +891,10 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
} }
#if defined(HAS_RGB24TOUVROW_SSSE3) #if defined(HAS_RGB24TOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) && IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) { IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = RGB24ToUVRow_SSSE3; ARGBToUVRow = RGB24ToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -919,10 +919,10 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -919,10 +919,10 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
} }
int RAWToI420(const uint8* src_frame, int src_stride_frame, int RAWToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height) { int width, int height) {
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_frame = src_frame + (height - 1) * src_stride_frame; src_frame = src_frame + (height - 1) * src_stride_frame;
...@@ -933,9 +933,9 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame, ...@@ -933,9 +933,9 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_RAWTOYROW_SSSE3) #if defined(HAS_RAWTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) { IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = RAWToYRow_SSSE3; ARGBToYRow = RAWToYRow_SSSE3;
} else } else
#endif #endif
...@@ -944,10 +944,10 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame, ...@@ -944,10 +944,10 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame,
} }
#if defined(HAS_RAWTOUVROW_SSSE3) #if defined(HAS_RAWTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) && IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) { IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = RAWToUVRow_SSSE3; ARGBToUVRow = RAWToUVRow_SSSE3;
} else } else
#endif #endif
......
...@@ -112,9 +112,9 @@ int ARGBToBayerRGB(const uint8* src_rgb, int src_stride_rgb, ...@@ -112,9 +112,9 @@ int ARGBToBayerRGB(const uint8* src_rgb, int src_stride_rgb,
uint8* dst_bayer, uint32 selector, int pix); uint8* dst_bayer, uint32 selector, int pix);
#if defined(HAS_ARGBTOBAYERROW_SSSE3) #if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 4 == 0) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_rgb, 16) && (src_stride_rgb % 16 == 0) && IS_ALIGNED(src_rgb, 16) && IS_ALIGNED(src_stride_rgb, 16) &&
IS_ALIGNED(dst_bayer, 4) && (dst_stride_bayer % 4 == 0)) { IS_ALIGNED(dst_bayer, 4) && IS_ALIGNED(dst_stride_bayer, 4)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3; ARGBToBayerRow = ARGBToBayerRow_SSSE3;
} else } else
#endif #endif
...@@ -366,9 +366,9 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer, ...@@ -366,9 +366,9 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer,
#if defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(row, 16) && (kMaxStride % 16 == 0) && IS_ALIGNED(row, 16) && IS_ALIGNED(kMaxStride, 16) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) { IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} else } else
#endif #endif
...@@ -377,10 +377,10 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer, ...@@ -377,10 +377,10 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer,
} }
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(row, 16) && (kMaxStride % 16 == 0) && IS_ALIGNED(row, 16) && IS_ALIGNED(kMaxStride, 16) &&
IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) && IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) { IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
} else } else
#endif #endif
......
This diff is collapsed.
...@@ -782,18 +782,18 @@ void TransposePlane(const uint8* src, int src_stride, ...@@ -782,18 +782,18 @@ void TransposePlane(const uint8* src, int src_stride,
#endif #endif
#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3) #if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && (src_stride % 16 == 0) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 8) && (dst_stride % 8 == 0)) { IS_ALIGNED(dst, 8) && IS_ALIGNED(dst_stride, 8)) {
TransposeWx8 = TransposeWx8_FAST_SSSE3; TransposeWx8 = TransposeWx8_FAST_SSSE3;
TransposeWxH = TransposeWxH_C; TransposeWxH = TransposeWxH_C;
} else } else
#endif #endif
#if defined(HAS_TRANSPOSE_WX8_SSSE3) #if defined(HAS_TRANSPOSE_WX8_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 8 == 0) && IS_ALIGNED(width, 8) &&
IS_ALIGNED(src, 8) && (src_stride % 8 == 0) && IS_ALIGNED(src, 8) && IS_ALIGNED(src_stride, 8) &&
IS_ALIGNED(dst, 8) && (dst_stride % 8 == 0)) { IS_ALIGNED(dst, 8) && IS_ALIGNED(dst_stride, 8)) {
TransposeWx8 = TransposeWx8_SSSE3; TransposeWx8 = TransposeWx8_SSSE3;
TransposeWxH = TransposeWxH_C; TransposeWxH = TransposeWxH_C;
} else } else
...@@ -850,9 +850,9 @@ void RotatePlane180(const uint8* src, int src_stride, ...@@ -850,9 +850,9 @@ void RotatePlane180(const uint8* src, int src_stride,
#endif #endif
#if defined(HAS_REVERSE_ROW_SSSE3) #if defined(HAS_REVERSE_ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && (src_stride % 16 == 0) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && (dst_stride % 16 == 0)) { IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ReverseRow = ReverseRow_SSSE3; ReverseRow = ReverseRow_SSSE3;
} else } else
#endif #endif
...@@ -926,10 +926,10 @@ void TransposeUV(const uint8* src, int src_stride, ...@@ -926,10 +926,10 @@ void TransposeUV(const uint8* src, int src_stride,
#endif #endif
#if defined(HAS_TRANSPOSE_UVWX8_SSE2) #if defined(HAS_TRANSPOSE_UVWX8_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
(width % 8 == 0) && IS_ALIGNED(width, 8) &&
IS_ALIGNED(src, 16) && (src_stride % 16 == 0) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_a, 8) && (dst_stride_a % 8 == 0) && IS_ALIGNED(dst_a, 8) && IS_ALIGNED(dst_stride_a, 8) &&
IS_ALIGNED(dst_b, 8) && (dst_stride_b % 8 == 0)) { IS_ALIGNED(dst_b, 8) && IS_ALIGNED(dst_stride_b, 8)) {
TransposeWx8 = TransposeUVWx8_SSE2; TransposeWx8 = TransposeUVWx8_SSE2;
TransposeWxH = TransposeUVWxH_C; TransposeWxH = TransposeUVWxH_C;
} else } else
...@@ -1076,10 +1076,10 @@ void RotateUV180(const uint8* src, int src_stride, ...@@ -1076,10 +1076,10 @@ void RotateUV180(const uint8* src, int src_stride,
#endif #endif
#if defined(HAS_REVERSE_ROW_UV_SSSE3) #if defined(HAS_REVERSE_ROW_UV_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && (src_stride % 16 == 0) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_a, 8) && (dst_stride_a % 8 == 0) && IS_ALIGNED(dst_a, 8) && IS_ALIGNED(dst_stride_a, 8) &&
IS_ALIGNED(dst_b, 8) && (dst_stride_b % 8 == 0) ) { IS_ALIGNED(dst_b, 8) && IS_ALIGNED(dst_stride_b, 8) ) {
ReverseRow = ReverseRowUV_SSSE3; ReverseRow = ReverseRowUV_SSSE3;
} else } else
#endif #endif
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#define kMaxStride (2048 * 4) #define kMaxStride (2048 * 4)
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
#if defined(COVERAGE_ENABLED) || defined(TARGET_IPHONE_SIMULATOR) #if defined(COVERAGE_ENABLED) || defined(TARGET_IPHONE_SIMULATOR)
#define YUV_DISABLE_ASM #define YUV_DISABLE_ASM
......
...@@ -2935,8 +2935,7 @@ static void ScaleRowDown34_0_Int_SSE2(const uint8* src_ptr, int src_stride, ...@@ -2935,8 +2935,7 @@ static void ScaleRowDown34_0_Int_SSE2(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
assert((dst_width % 3 == 0) && (dst_width > 0)); assert((dst_width % 3 == 0) && (dst_width > 0));
ALIGN16(uint8 row[kMaxInputWidth]); ALIGN16(uint8 row[kMaxInputWidth]);
ScaleFilterRows_SSE2(row, src_ptr, src_stride, dst_width * 4 / 3, ScaleFilterRows_SSE2(row, src_ptr, src_stride, dst_width * 4 / 3, 256 / 4);
256 / 4);
ScaleFilterCols34_C(dst_ptr, row, dst_width); ScaleFilterCols34_C(dst_ptr, row, dst_width);
} }
...@@ -3057,22 +3056,22 @@ static void ScalePlaneDown2(int src_width, int src_height, ...@@ -3057,22 +3056,22 @@ static void ScalePlaneDown2(int src_width, int src_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr, const uint8* src_ptr, uint8* dst_ptr,
FilterMode filtering) { FilterMode filtering) {
assert(src_width % 2 == 0); assert(IS_ALIGNED(src_width, 2));
assert(src_height % 2 == 0); assert(IS_ALIGNED(src_height, 2));
void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride, void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
#if defined(HAS_SCALEROWDOWN2_NEON) #if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) &&
(dst_width % 16 == 0)) { IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering ? ScaleRowDown2Int_NEON : ScaleRowDown2_NEON; ScaleRowDown2 = filtering ? ScaleRowDown2Int_NEON : ScaleRowDown2_NEON;
} else } else
#endif #endif
#if defined(HAS_SCALEROWDOWN2_SSE2) #if defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
(dst_width % 16 == 0) && IS_ALIGNED(dst_width, 16) &&
IS_ALIGNED(src_ptr, 16) && (src_stride % 16 == 0) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && (dst_stride % 16 == 0)) { IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleRowDown2 = filtering ? ScaleRowDown2Int_SSE2 : ScaleRowDown2_SSE2; ScaleRowDown2 = filtering ? ScaleRowDown2Int_SSE2 : ScaleRowDown2_SSE2;
} else } else
#endif #endif
...@@ -3098,22 +3097,22 @@ static void ScalePlaneDown4(int src_width, int src_height, ...@@ -3098,22 +3097,22 @@ static void ScalePlaneDown4(int src_width, int src_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr, const uint8* src_ptr, uint8* dst_ptr,
FilterMode filtering) { FilterMode filtering) {
assert(src_width % 4 == 0); assert(IS_ALIGNED(src_width, 4));
assert(src_height % 4 == 0); assert(IS_ALIGNED(src_height, 4));
void (*ScaleRowDown4)(const uint8* src_ptr, int src_stride, void (*ScaleRowDown4)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
#if defined(HAS_SCALEROWDOWN4_NEON) #if defined(HAS_SCALEROWDOWN4_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) &&
(dst_width % 4 == 0)) { IS_ALIGNED(dst_width, 4)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Int_NEON : ScaleRowDown4_NEON; ScaleRowDown4 = filtering ? ScaleRowDown4Int_NEON : ScaleRowDown4_NEON;
} else } else
#endif #endif
#if defined(HAS_SCALEROWDOWN4_SSE2) #if defined(HAS_SCALEROWDOWN4_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
(dst_width % 8 == 0) && (src_stride % 16 == 0) && IS_ALIGNED(dst_width, 8) &&
(dst_stride % 8 == 0) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(dst_ptr, 8)) { IS_ALIGNED(dst_ptr, 8) && IS_ALIGNED(dst_stride, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Int_SSE2 : ScaleRowDown4_SSE2; ScaleRowDown4 = filtering ? ScaleRowDown4Int_SSE2 : ScaleRowDown4_SSE2;
} else } else
#endif #endif
...@@ -3140,15 +3139,15 @@ static void ScalePlaneDown8(int src_width, int src_height, ...@@ -3140,15 +3139,15 @@ static void ScalePlaneDown8(int src_width, int src_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr, const uint8* src_ptr, uint8* dst_ptr,
FilterMode filtering) { FilterMode filtering) {
assert(src_width % 8 == 0); assert(IS_ALIGNED(src_width, 8));
assert(src_height % 8 == 0); assert(IS_ALIGNED(src_height, 8));
void (*ScaleRowDown8)(const uint8* src_ptr, int src_stride, void (*ScaleRowDown8)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
#if defined(HAS_SCALEROWDOWN8_SSE2) #if defined(HAS_SCALEROWDOWN8_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
(dst_width % 16 == 0) && dst_width <= kMaxOutputWidth && IS_ALIGNED(dst_width, 4) &&
(src_stride % 16 == 0) && (dst_stride % 16 == 0) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(dst_ptr, 16)) { IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
ScaleRowDown8 = filtering ? ScaleRowDown8Int_SSE2 : ScaleRowDown8_SSE2; ScaleRowDown8 = filtering ? ScaleRowDown8Int_SSE2 : ScaleRowDown8_SSE2;
} else } else
#endif #endif
...@@ -3193,9 +3192,9 @@ static void ScalePlaneDown34(int src_width, int src_height, ...@@ -3193,9 +3192,9 @@ static void ScalePlaneDown34(int src_width, int src_height,
#endif #endif
#if defined(HAS_SCALEROWDOWN34_SSSE3) #if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(dst_width % 24 == 0) && (src_stride % 16 == 0) && (dst_width % 24 == 0) &&
(dst_stride % 8 == 0) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(dst_ptr, 8)) { IS_ALIGNED(dst_ptr, 8) && IS_ALIGNED(dst_stride, 8)) {
if (!filtering) { if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_SSSE3; ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
...@@ -3207,8 +3206,8 @@ static void ScalePlaneDown34(int src_width, int src_height, ...@@ -3207,8 +3206,8 @@ static void ScalePlaneDown34(int src_width, int src_height,
#endif #endif
#if defined(HAS_SCALEROWDOWN34_SSE2) #if defined(HAS_SCALEROWDOWN34_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
(dst_width % 24 == 0) && (src_stride % 16 == 0) && (dst_width % 24 == 0) && IS_ALIGNED(src_stride, 16) &&
(dst_stride % 8 == 0) && IS_ALIGNED(dst_stride, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(dst_ptr, 8) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(dst_ptr, 8) &&
filtering) { filtering) {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSE2; ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSE2;
...@@ -3282,8 +3281,8 @@ static void ScalePlaneDown38(int src_width, int src_height, ...@@ -3282,8 +3281,8 @@ static void ScalePlaneDown38(int src_width, int src_height,
#endif #endif
#if defined(HAS_SCALEROWDOWN38_SSSE3) #if defined(HAS_SCALEROWDOWN38_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(dst_width % 24 == 0) && (src_stride % 16 == 0) && (dst_width % 24 == 0) && IS_ALIGNED(src_stride, 16) &&
(dst_stride % 8 == 0) && IS_ALIGNED(dst_stride, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(dst_ptr, 8)) { IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(dst_ptr, 8)) {
if (!filtering) { if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_SSSE3; ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
...@@ -3403,7 +3402,7 @@ static void ScalePlaneBox(int src_width, int src_height, ...@@ -3403,7 +3402,7 @@ static void ScalePlaneBox(int src_width, int src_height,
assert(dst_height > 0); assert(dst_height > 0);
int dy = (src_height << 16) / dst_height; int dy = (src_height << 16) / dst_height;
int dx = (src_width << 16) / dst_width; int dx = (src_width << 16) / dst_width;
if ((src_width % 16 != 0) || (src_width > kMaxInputWidth) || if (!IS_ALIGNED(src_width, 16) || (src_width > kMaxInputWidth) ||
dst_height * 2 > src_height) { dst_height * 2 > src_height) {
uint8* dst = dst_ptr; uint8* dst = dst_ptr;
int dy = (src_height << 16) / dst_height; int dy = (src_height << 16) / dst_height;
...@@ -3431,8 +3430,8 @@ static void ScalePlaneBox(int src_width, int src_height, ...@@ -3431,8 +3430,8 @@ static void ScalePlaneBox(int src_width, int src_height,
const uint16* src_ptr, uint8* dst_ptr); const uint16* src_ptr, uint8* dst_ptr);
#if defined(HAS_SCALEADDROWS_SSE2) #if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
(src_stride % 16 == 0) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) &&
(src_width % 16) == 0) { IS_ALIGNED(src_width, 16)) {
ScaleAddRows = ScaleAddRows_SSE2; ScaleAddRows = ScaleAddRows_SSE2;
} else } else
#endif #endif
...@@ -3513,7 +3512,7 @@ static void ScalePlaneBilinear(int src_width, int src_height, ...@@ -3513,7 +3512,7 @@ static void ScalePlaneBilinear(int src_width, int src_height,
assert(dst_height > 0); assert(dst_height > 0);
int dy = (src_height << 16) / dst_height; int dy = (src_height << 16) / dst_height;
int dx = (src_width << 16) / dst_width; int dx = (src_width << 16) / dst_width;
if ((src_width % 8 != 0) || (src_width > kMaxInputWidth)) { if (!IS_ALIGNED(src_width, 8) || (src_width > kMaxInputWidth)) {
ScalePlaneBilinearSimple(src_width, src_height, dst_width, dst_height, ScalePlaneBilinearSimple(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src_ptr, dst_ptr); src_stride, dst_stride, src_ptr, dst_ptr);
...@@ -3526,15 +3525,15 @@ static void ScalePlaneBilinear(int src_width, int src_height, ...@@ -3526,15 +3525,15 @@ static void ScalePlaneBilinear(int src_width, int src_height,
int dst_width, int dx); int dst_width, int dx);
#if defined(HAS_SCALEFILTERROWS_SSSE3) #if defined(HAS_SCALEFILTERROWS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
(src_stride % 16 == 0) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) &&
(src_width % 16) == 0) { IS_ALIGNED(src_width, 16)) {
ScaleFilterRows = ScaleFilterRows_SSSE3; ScaleFilterRows = ScaleFilterRows_SSSE3;
} else } else
#endif #endif
#if defined(HAS_SCALEFILTERROWS_SSE2) #if defined(HAS_SCALEFILTERROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
(src_stride % 16 == 0) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) &&
(src_width % 16) == 0) { IS_ALIGNED(src_width, 16)) {
ScaleFilterRows = ScaleFilterRows_SSE2; ScaleFilterRows = ScaleFilterRows_SSE2;
} else } else
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment