Commit 94602791 authored by fbarchard@google.com's avatar fbarchard@google.com

scale function relaxed pointer checks and unrolled scale34 and scale38

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/369004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@149 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 42831e0a
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 147 Version: 149
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -3115,8 +3115,7 @@ static void ScalePlaneDown4(int src_width, int src_height, ...@@ -3115,8 +3115,7 @@ static void ScalePlaneDown4(int src_width, int src_height,
#if defined(HAS_SCALEROWDOWN4_SSE2) #if defined(HAS_SCALEROWDOWN4_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 8) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
IS_ALIGNED(dst_ptr, 8) && IS_ALIGNED(dst_stride, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Int_SSE2 : ScaleRowDown4_SSE2; ScaleRowDown4 = filtering ? ScaleRowDown4Int_SSE2 : ScaleRowDown4_SSE2;
} else } else
#endif #endif
...@@ -3149,8 +3148,7 @@ static void ScalePlaneDown8(int src_width, int src_height, ...@@ -3149,8 +3148,7 @@ static void ScalePlaneDown8(int src_width, int src_height,
#if defined(HAS_SCALEROWDOWN8_SSE2) #if defined(HAS_SCALEROWDOWN8_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 4) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
ScaleRowDown8 = filtering ? ScaleRowDown8Int_SSE2 : ScaleRowDown8_SSE2; ScaleRowDown8 = filtering ? ScaleRowDown8Int_SSE2 : ScaleRowDown8_SSE2;
} else } else
#endif #endif
...@@ -3182,8 +3180,7 @@ static void ScalePlaneDown34(int src_width, int src_height, ...@@ -3182,8 +3180,7 @@ static void ScalePlaneDown34(int src_width, int src_height,
void (*ScaleRowDown34_1)(const uint8* src_ptr, int src_stride, void (*ScaleRowDown34_1)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
#if defined(HAS_SCALEROWDOWN34_NEON) #if defined(HAS_SCALEROWDOWN34_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
(dst_width % 24 == 0)) {
if (!filtering) { if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_NEON; ScaleRowDown34_0 = ScaleRowDown34_NEON;
ScaleRowDown34_1 = ScaleRowDown34_NEON; ScaleRowDown34_1 = ScaleRowDown34_NEON;
...@@ -3194,10 +3191,8 @@ static void ScalePlaneDown34(int src_width, int src_height, ...@@ -3194,10 +3191,8 @@ static void ScalePlaneDown34(int src_width, int src_height,
} else } else
#endif #endif
#if defined(HAS_SCALEROWDOWN34_SSSE3) #if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
(dst_width % 24 == 0) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 8) && IS_ALIGNED(dst_stride, 8)) {
if (!filtering) { if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_SSSE3; ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
...@@ -3208,11 +3203,8 @@ static void ScalePlaneDown34(int src_width, int src_height, ...@@ -3208,11 +3203,8 @@ static void ScalePlaneDown34(int src_width, int src_height,
} else } else
#endif #endif
#if defined(HAS_SCALEROWDOWN34_SSE2) #if defined(HAS_SCALEROWDOWN34_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && (dst_width % 24 == 0) &&
(dst_width % 24 == 0) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && filtering) {
IS_ALIGNED(dst_stride, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(dst_ptr, 8) &&
filtering) {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSE2; ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSE2;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSE2; ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSE2;
} else } else
...@@ -3226,29 +3218,27 @@ static void ScalePlaneDown34(int src_width, int src_height, ...@@ -3226,29 +3218,27 @@ static void ScalePlaneDown34(int src_width, int src_height,
ScaleRowDown34_1 = ScaleRowDown34_1_Int_C; ScaleRowDown34_1 = ScaleRowDown34_1_Int_C;
} }
} }
int src_row = 0;
for (int y = 0; y < dst_height; ++y) { for (int y = 0; y < dst_height - 2; y += 3) {
switch (src_row) { ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width);
case 0:
ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width);
break;
case 1:
ScaleRowDown34_1(src_ptr, src_stride, dst_ptr, dst_width);
break;
case 2:
ScaleRowDown34_0(src_ptr + src_stride, -src_stride,
dst_ptr, dst_width);
break;
}
++src_row;
src_ptr += src_stride; src_ptr += src_stride;
dst_ptr += dst_stride; dst_ptr += dst_stride;
if (src_row >= 3) { ScaleRowDown34_1(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += src_stride; src_ptr += src_stride;
src_row = 0; dst_ptr += dst_stride;
} ScaleRowDown34_0(src_ptr + src_stride, -src_stride,
dst_ptr, dst_width);
src_ptr += src_stride * 2;
dst_ptr += dst_stride;
}
if ((dst_height % 3) >= 1) {
ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += src_stride;
dst_ptr += dst_stride;
}
if ((dst_height % 3) >= 2) {
ScaleRowDown34_1(src_ptr, src_stride, dst_ptr, dst_width);
} }
} }
...@@ -3271,8 +3261,7 @@ static void ScalePlaneDown38(int src_width, int src_height, ...@@ -3271,8 +3261,7 @@ static void ScalePlaneDown38(int src_width, int src_height,
void (*ScaleRowDown38_2)(const uint8* src_ptr, int src_stride, void (*ScaleRowDown38_2)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
#if defined(HAS_SCALEROWDOWN38_NEON) #if defined(HAS_SCALEROWDOWN38_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
(dst_width % 12 == 0)) {
if (!filtering) { if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_NEON; ScaleRowDown38_3 = ScaleRowDown38_NEON;
ScaleRowDown38_2 = ScaleRowDown38_NEON; ScaleRowDown38_2 = ScaleRowDown38_NEON;
...@@ -3283,10 +3272,8 @@ static void ScalePlaneDown38(int src_width, int src_height, ...@@ -3283,10 +3272,8 @@ static void ScalePlaneDown38(int src_width, int src_height,
} else } else
#endif #endif
#if defined(HAS_SCALEROWDOWN38_SSSE3) #if defined(HAS_SCALEROWDOWN38_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
(dst_width % 24 == 0) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
IS_ALIGNED(dst_stride, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(dst_ptr, 8)) {
if (!filtering) { if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_SSSE3; ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
...@@ -3305,24 +3292,25 @@ static void ScalePlaneDown38(int src_width, int src_height, ...@@ -3305,24 +3292,25 @@ static void ScalePlaneDown38(int src_width, int src_height,
ScaleRowDown38_2 = ScaleRowDown38_2_Int_C; ScaleRowDown38_2 = ScaleRowDown38_2_Int_C;
} }
} }
int src_row = 0; for (int y = 0; y < dst_height - 2; y += 3) {
for (int y = 0; y < dst_height; ++y) { ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width);
switch (src_row) { src_ptr += src_stride * 3;
case 0: dst_ptr += dst_stride;
case 1: ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width);
ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width); src_ptr += src_stride * 3;
src_ptr += src_stride * 3; dst_ptr += dst_stride;
++src_row; ScaleRowDown38_2(src_ptr, src_stride, dst_ptr, dst_width);
break; src_ptr += src_stride * 2;
case 2:
ScaleRowDown38_2(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += src_stride * 2;
src_row = 0;
break;
}
dst_ptr += dst_stride; dst_ptr += dst_stride;
} }
if ((dst_height % 3) >= 1) {
ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3;
dst_ptr += dst_stride;
}
if ((dst_height % 3) >= 2) {
ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width);
}
} }
static __inline uint32 SumBox(int iboxwidth, int iboxheight, static __inline uint32 SumBox(int iboxwidth, int iboxheight,
...@@ -3433,8 +3421,7 @@ static void ScalePlaneBox(int src_width, int src_height, ...@@ -3433,8 +3421,7 @@ static void ScalePlaneBox(int src_width, int src_height,
const uint16* src_ptr, uint8* dst_ptr); const uint16* src_ptr, uint8* dst_ptr);
#if defined(HAS_SCALEADDROWS_SSE2) #if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
IS_ALIGNED(src_width, 16)) {
ScaleAddRows = ScaleAddRows_SSE2; ScaleAddRows = ScaleAddRows_SSE2;
} else } else
#endif #endif
...@@ -3527,22 +3514,19 @@ void ScalePlaneBilinear(int src_width, int src_height, ...@@ -3527,22 +3514,19 @@ void ScalePlaneBilinear(int src_width, int src_height,
void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int dx); int dst_width, int dx);
#if defined(HAS_SCALEFILTERROWS_NEON) #if defined(HAS_SCALEFILTERROWS_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON)) {
IS_ALIGNED(src_width, 16)) {
ScaleFilterRows = ScaleFilterRows_NEON; ScaleFilterRows = ScaleFilterRows_NEON;
} else } else
#endif #endif
#if defined(HAS_SCALEFILTERROWS_SSSE3) #if defined(HAS_SCALEFILTERROWS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
IS_ALIGNED(src_width, 16)) {
ScaleFilterRows = ScaleFilterRows_SSSE3; ScaleFilterRows = ScaleFilterRows_SSSE3;
} else } else
#endif #endif
#if defined(HAS_SCALEFILTERROWS_SSE2) #if defined(HAS_SCALEFILTERROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
IS_ALIGNED(src_width, 16)) {
ScaleFilterRows = ScaleFilterRows_SSE2; ScaleFilterRows = ScaleFilterRows_SSE2;
} else } else
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment