Commit 044f914c authored by fbarchard@google.com's avatar fbarchard@google.com

Change scale to unaligned movdqu.

BUG=365
TESTED=scale unittests
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/22879004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1101 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 9c4c8218
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1099
Version: 1101
License: BSD
License File: LICENSE
......
......@@ -1748,12 +1748,6 @@ void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width,
int source_y_fraction);
void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width,
int source_y_fraction);
void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width,
int source_y_fraction);
void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width,
int source_y_fraction);
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1099
#define LIBYUV_VERSION 1101
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -1798,12 +1798,7 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
InterpolateRow = InterpolateRow_SSE2;
}
}
#endif
......@@ -1811,12 +1806,7 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
......
......@@ -579,11 +579,11 @@ NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2,
InterpolateRow_C, 1, 1, 32)
#endif
#ifdef HAS_INTERPOLATEROW_SSSE3
NANY(InterpolateRow_Any_SSSE3, InterpolateRow_Unaligned_SSSE3,
NANY(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3,
InterpolateRow_C, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_SSE2
NANY(InterpolateRow_Any_SSE2, InterpolateRow_Unaligned_SSE2,
NANY(InterpolateRow_Any_SSE2, InterpolateRow_SSE2,
InterpolateRow_C, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_NEON
......
......@@ -6322,7 +6322,6 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
}
#endif // HAS_INTERPOLATEROW_AVX2
#ifdef HAS_INTERPOLATEROW_SSSE3
// Bilinear filter 16x2 -> 16x1
__declspec(naked) __declspec(align(16))
void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
......@@ -6356,225 +6355,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
punpcklwd xmm5, xmm5
pshufd xmm5, xmm5, 0
align 4
xloop:
movdqa xmm0, [esi]
movdqa xmm2, [esi + edx]
movdqa xmm1, xmm0
punpcklbw xmm0, xmm2
punpckhbw xmm1, xmm2
pmaddubsw xmm0, xmm5
pmaddubsw xmm1, xmm5
psrlw xmm0, 7
psrlw xmm1, 7
packuswb xmm0, xmm1
sub ecx, 16
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop
jmp xloop99
// Blend 25 / 75.
align 4
xloop25:
movdqa xmm0, [esi]
movdqa xmm1, [esi + edx]
pavgb xmm0, xmm1
pavgb xmm0, xmm1
sub ecx, 16
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop25
jmp xloop99
// Blend 50 / 50.
align 4
xloop50:
movdqa xmm0, [esi]
movdqa xmm1, [esi + edx]
pavgb xmm0, xmm1
sub ecx, 16
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop50
jmp xloop99
// Blend 75 / 25.
align 4
xloop75:
movdqa xmm1, [esi]
movdqa xmm0, [esi + edx]
pavgb xmm0, xmm1
pavgb xmm0, xmm1
sub ecx, 16
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop75
jmp xloop99
// Blend 100 / 0 - Copy row unchanged.
align 4
xloop100:
movdqa xmm0, [esi]
sub ecx, 16
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop100
xloop99:
pop edi
pop esi
ret
}
}
#endif // HAS_INTERPOLATEROW_SSSE3
#ifdef HAS_INTERPOLATEROW_SSE2
// Bilinear filter 16x2 -> 16x1
__declspec(naked) __declspec(align(16))
void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
__asm {
push esi
push edi
mov edi, [esp + 8 + 4] // dst_ptr
mov esi, [esp + 8 + 8] // src_ptr
mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
sub edi, esi
// Dispatch to specialized filters if applicable.
cmp eax, 0
je xloop100 // 0 / 256. Blend 100 / 0.
cmp eax, 64
je xloop75 // 64 / 256 is 0.25. Blend 75 / 25.
cmp eax, 128
je xloop50 // 128 / 256 is 0.50. Blend 50 / 50.
cmp eax, 192
je xloop25 // 192 / 256 is 0.75. Blend 25 / 75.
movd xmm5, eax // xmm5 = y fraction
punpcklbw xmm5, xmm5
psrlw xmm5, 1
punpcklwd xmm5, xmm5
punpckldq xmm5, xmm5
punpcklqdq xmm5, xmm5
pxor xmm4, xmm4
align 4
xloop:
movdqa xmm0, [esi] // row0
movdqa xmm2, [esi + edx] // row1
movdqa xmm1, xmm0
movdqa xmm3, xmm2
punpcklbw xmm2, xmm4
punpckhbw xmm3, xmm4
punpcklbw xmm0, xmm4
punpckhbw xmm1, xmm4
psubw xmm2, xmm0 // row1 - row0
psubw xmm3, xmm1
paddw xmm2, xmm2 // 9 bits * 15 bits = 8.16
paddw xmm3, xmm3
pmulhw xmm2, xmm5 // scale diff
pmulhw xmm3, xmm5
paddw xmm0, xmm2 // sum rows
paddw xmm1, xmm3
packuswb xmm0, xmm1
sub ecx, 16
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop
jmp xloop99
// Blend 25 / 75.
align 4
xloop25:
movdqa xmm0, [esi]
movdqa xmm1, [esi + edx]
pavgb xmm0, xmm1
pavgb xmm0, xmm1
sub ecx, 16
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop25
jmp xloop99
// Blend 50 / 50.
align 4
xloop50:
movdqa xmm0, [esi]
movdqa xmm1, [esi + edx]
pavgb xmm0, xmm1
sub ecx, 16
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop50
jmp xloop99
// Blend 75 / 25.
align 4
xloop75:
movdqa xmm1, [esi]
movdqa xmm0, [esi + edx]
pavgb xmm0, xmm1
pavgb xmm0, xmm1
sub ecx, 16
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop75
jmp xloop99
// Blend 100 / 0 - Copy row unchanged.
align 4
xloop100:
movdqa xmm0, [esi]
sub ecx, 16
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop100
xloop99:
pop edi
pop esi
ret
}
}
#endif // HAS_INTERPOLATEROW_SSE2
// Bilinear filter 16x2 -> 16x1
__declspec(naked) __declspec(align(16))
void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
__asm {
push esi
push edi
mov edi, [esp + 8 + 4] // dst_ptr
mov esi, [esp + 8 + 8] // src_ptr
mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
sub edi, esi
shr eax, 1
// Dispatch to specialized filters if applicable.
cmp eax, 0
je xloop100 // 0 / 128. Blend 100 / 0.
cmp eax, 32
je xloop75 // 32 / 128 is 0.25. Blend 75 / 25.
cmp eax, 64
je xloop50 // 64 / 128 is 0.50. Blend 50 / 50.
cmp eax, 96
je xloop25 // 96 / 128 is 0.75. Blend 25 / 75.
movd xmm0, eax // high fraction 0..127
neg eax
add eax, 128
movd xmm5, eax // low fraction 128..1
punpcklbw xmm5, xmm0
punpcklwd xmm5, xmm5
pshufd xmm5, xmm5, 0
align 4
xloop:
movdqu xmm0, [esi]
......@@ -6650,9 +6430,9 @@ void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
#ifdef HAS_INTERPOLATEROW_SSE2
// Bilinear filter 16x2 -> 16x1
__declspec(naked) __declspec(align(16))
void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
__asm {
push esi
push edi
......
......@@ -59,16 +59,9 @@ static void ScalePlaneDown2(int src_width, int src_height,
}
#elif defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
ScaleRowDown2Box_Unaligned_SSE2);
if (IS_ALIGNED(src_ptr, 16) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
ScaleRowDown2Box_SSE2);
}
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
ScaleRowDown2Box_SSE2);
}
#elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
......@@ -114,17 +107,9 @@ static void ScalePlaneDown2_16(int src_width, int src_height,
}
#elif defined(HAS_SCALEROWDOWN2_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ?
ScaleRowDown2_Unaligned_16_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 :
ScaleRowDown2Box_Unaligned_16_SSE2);
if (IS_ALIGNED(src_ptr, 16) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
ScaleRowDown2Box_16_SSE2);
}
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
ScaleRowDown2Box_16_SSE2);
}
#elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
......@@ -889,10 +874,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
InterpolateRow = InterpolateRow_SSE2;
}
}
#endif
......@@ -900,10 +882,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
......@@ -991,10 +970,7 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
......@@ -1002,10 +978,7 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
InterpolateRow = InterpolateRow_16_SSSE3;
}
}
#endif
......@@ -1090,10 +1063,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
InterpolateRow = InterpolateRow_SSE2;
}
}
#endif
......@@ -1101,10 +1071,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
......@@ -1229,10 +1196,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
......@@ -1240,10 +1204,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
InterpolateRow = InterpolateRow_16_SSSE3;
}
}
#endif
......
......@@ -193,10 +193,7 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
InterpolateRow = InterpolateRow_SSE2;
}
}
#endif
......@@ -204,10 +201,7 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
......@@ -289,10 +283,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
InterpolateRow = InterpolateRow_SSE2;
}
}
#endif
......@@ -300,10 +291,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
......@@ -430,10 +418,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
......@@ -470,10 +455,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
InterpolateRow = InterpolateRow_SSE2;
}
}
#endif
......@@ -481,10 +463,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
......
......@@ -888,11 +888,7 @@ void ScalePlaneVertical(int src_height,
if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
InterpolateRow = InterpolateRow_SSE2;
}
}
#endif
......@@ -900,11 +896,7 @@ void ScalePlaneVertical(int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
......@@ -970,11 +962,7 @@ void ScalePlaneVertical_16(int src_height,
if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
......@@ -982,11 +970,7 @@ void ScalePlaneVertical_16(int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
InterpolateRow = InterpolateRow_16_SSSE3;
}
}
#endif
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment