Commit 942ad1f5 authored by fbarchard@google.com's avatar fbarchard@google.com

SSE2 version of scale is suspected of having a bug, as it didnt work in…

SSE2 version of scale is suspected of having a bug, as it didnt work in ARGBInterpolate, while SSSE3 and C do.  Until the result can be verified, SSE2 is disabled.  This will hurt performance on Pentium4 and below.
BUG=49
TEST=none
Review URL: https://webrtc-codereview.appspot.com/672009

git-svn-id: http://libyuv.googlecode.com/svn/trunk@301 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 0d95d477
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 300 Version: 301
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -1114,14 +1114,10 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb, ...@@ -1114,14 +1114,10 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
#if !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \ #if !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \
(defined(__x86_64__) || defined(__i386__))) (defined(__x86_64__) || defined(__i386__)))
#define HAS_SCALEARGBFILTERROWS_SSE2
#define HAS_SCALEARGBFILTERROWS_SSSE3 #define HAS_SCALEARGBFILTERROWS_SSSE3
#endif #endif
void ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr, int src_stride, void ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction); int dst_width, int source_y_fraction);
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width,
int source_y_fraction);
void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width, int src_stride, int dst_width,
int source_y_fraction); int source_y_fraction);
...@@ -1143,14 +1139,6 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, ...@@ -1143,14 +1139,6 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr, void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width, int src_stride, int dst_width,
int source_y_fraction) = ScaleARGBFilterRows_C; int source_y_fraction) = ScaleARGBFilterRows_C;
#if defined(HAS_SCALEARGBFILTERROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ScaleARGBFilterRows = ScaleARGBFilterRows_SSE2;
}
#endif
#if defined(HAS_SCALEARGBFILTERROWS_SSSE3) #if defined(HAS_SCALEARGBFILTERROWS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
......
...@@ -31,7 +31,7 @@ extern "C" { ...@@ -31,7 +31,7 @@ extern "C" {
// NOT the optimized versions. Useful for debugging and // NOT the optimized versions. Useful for debugging and
// when comparing the quality of the resulting YUV planes // when comparing the quality of the resulting YUV planes
// as produced by the optimized and non-optimized versions. // as produced by the optimized and non-optimized versions.
#define SSE2_DISABLED 1
static bool use_reference_impl_ = false; static bool use_reference_impl_ = false;
void SetUseReferenceImpl(bool use) { void SetUseReferenceImpl(bool use) {
...@@ -1377,12 +1377,13 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride, ...@@ -1377,12 +1377,13 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
} }
} }
#ifndef SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version. // Bilinear row filtering combines 16x2 -> 16x1. SSE2 version.
// Normal formula for bilinear interpolation is: // Normal formula for bilinear interpolation is:
// source_y_fraction * row1 + (1 - source_y_fraction) row0 // source_y_fraction * row1 + (1 - source_y_fraction) row0
// SSE2 version using the a single multiply of difference: // SSE2 version using the a single multiply of difference:
// source_y_fraction * (row1 - row0) + row0 // source_y_fraction * (row1 - row0) + row0
#define HAS_SCALEFILTERROWS_SSE2 #define HAS_SCALEFILTERROWS_SSE2_DISABLED
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width, int src_stride, int dst_width,
...@@ -1471,7 +1472,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -1471,7 +1472,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ret ret
} }
} }
#endif // SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version. // Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version.
#define HAS_SCALEFILTERROWS_SSSE3 #define HAS_SCALEFILTERROWS_SSSE3
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
...@@ -2247,8 +2248,9 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride, ...@@ -2247,8 +2248,9 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
); );
} }
#ifndef SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version // Bilinear row filtering combines 16x2 -> 16x1. SSE2 version
#define HAS_SCALEFILTERROWS_SSE2 #define HAS_SCALEFILTERROWS_SSE2_DISABLED
static void ScaleFilterRows_SSE2(uint8* dst_ptr, static void ScaleFilterRows_SSE2(uint8* dst_ptr,
const uint8* src_ptr, int src_stride, const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction) { int dst_width, int source_y_fraction) {
...@@ -2318,6 +2320,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, ...@@ -2318,6 +2320,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
#endif #endif
); );
} }
#endif // SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version // Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version
#define HAS_SCALEFILTERROWS_SSSE3 #define HAS_SCALEFILTERROWS_SSSE3
...@@ -2388,7 +2391,6 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, ...@@ -2388,7 +2391,6 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
#endif #endif
); );
} }
#endif // defined(__x86_64__) || defined(__i386__) #endif // defined(__x86_64__) || defined(__i386__)
// CPU agnostic row functions // CPU agnostic row functions
...@@ -2609,7 +2611,7 @@ static void ScaleFilterCols34_C(uint8* dst_ptr, const uint8* src_ptr, ...@@ -2609,7 +2611,7 @@ static void ScaleFilterCols34_C(uint8* dst_ptr, const uint8* src_ptr,
} while (dst_ptr < dend); } while (dst_ptr < dend);
} }
#define HAS_SCALEROWDOWN34_SSE2 #define HAS_SCALEROWDOWN34_SSE2_DISABLED
// Filter rows 0 and 1 together, 3 : 1 // Filter rows 0 and 1 together, 3 : 1
static void ScaleRowDown34_0_Int_SSE2(const uint8* src_ptr, int src_stride, static void ScaleRowDown34_0_Int_SSE2(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
......
...@@ -28,6 +28,7 @@ extern "C" { ...@@ -28,6 +28,7 @@ extern "C" {
/** /**
* SSE2 downscalers with bilinear interpolation. * SSE2 downscalers with bilinear interpolation.
*/ */
#define SSE2_DISABLED 1
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
...@@ -182,7 +183,8 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride, ...@@ -182,7 +183,8 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride,
} }
// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version. // Bilinear row filtering combines 4x2 -> 4x1. SSE2 version.
#define HAS_SCALEARGBFILTERROWS_SSE2 #ifndef SSE2_DISABLED
#define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width, int src_stride, int dst_width,
...@@ -267,6 +269,7 @@ void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -267,6 +269,7 @@ void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ret ret
} }
} }
#endif // SSE2_DISABLED
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version. // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version.
#define HAS_SCALEARGBFILTERROWS_SSSE3 #define HAS_SCALEARGBFILTERROWS_SSSE3
...@@ -497,8 +500,9 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride, ...@@ -497,8 +500,9 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride,
); );
} }
#ifndef SSE2_DISABLED
// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version // Bilinear row filtering combines 4x2 -> 4x1. SSE2 version
#define HAS_SCALEARGBFILTERROWS_SSE2 #define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width, int src_stride, int dst_width,
int source_y_fraction) { int source_y_fraction) {
...@@ -567,6 +571,7 @@ void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -567,6 +571,7 @@ void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
#endif #endif
); );
} }
#endif // SSE2_DISABLED
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
#define HAS_SCALEARGBFILTERROWS_SSSE3 #define HAS_SCALEARGBFILTERROWS_SSSE3
...@@ -635,7 +640,7 @@ void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -635,7 +640,7 @@ void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
#endif #endif
); );
} }
#endif #endif // defined(__x86_64__) || defined(__i386__)
static void ScaleARGBRowDown2_C(const uint8* src_ptr, int, static void ScaleARGBRowDown2_C(const uint8* src_ptr, int,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment