Commit 942ad1f5 authored by fbarchard@google.com's avatar fbarchard@google.com

SSE2 version of scale is suspected of having a bug, as it didnt work in…

SSE2 version of scale is suspected of having a bug, as it didnt work in ARGBInterpolate, while SSSE3 and C do.  Until the result can be verified, SSE2 is disabled.  This will hurt performance on Pentium4 and below.
BUG=49
TEST=none
Review URL: https://webrtc-codereview.appspot.com/672009

git-svn-id: http://libyuv.googlecode.com/svn/trunk@301 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 0d95d477
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 300
Version: 301
License: BSD
License File: LICENSE
......
......@@ -1114,14 +1114,10 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
#if !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \
(defined(__x86_64__) || defined(__i386__)))
#define HAS_SCALEARGBFILTERROWS_SSE2
#define HAS_SCALEARGBFILTERROWS_SSSE3
#endif
void ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction);
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width,
int source_y_fraction);
void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width,
int source_y_fraction);
......@@ -1143,14 +1139,6 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width,
int source_y_fraction) = ScaleARGBFilterRows_C;
#if defined(HAS_SCALEARGBFILTERROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ScaleARGBFilterRows = ScaleARGBFilterRows_SSE2;
}
#endif
#if defined(HAS_SCALEARGBFILTERROWS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
......
......@@ -31,7 +31,7 @@ extern "C" {
// NOT the optimized versions. Useful for debugging and
// when comparing the quality of the resulting YUV planes
// as produced by the optimized and non-optimized versions.
#define SSE2_DISABLED 1
static bool use_reference_impl_ = false;
void SetUseReferenceImpl(bool use) {
......@@ -1377,12 +1377,13 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
}
}
#ifndef SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version.
// Normal formula for bilinear interpolation is:
// source_y_fraction * row1 + (1 - source_y_fraction) row0
// SSE2 version using the a single multiply of difference:
// source_y_fraction * (row1 - row0) + row0
#define HAS_SCALEFILTERROWS_SSE2
#define HAS_SCALEFILTERROWS_SSE2_DISABLED
__declspec(naked) __declspec(align(16))
static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width,
......@@ -1471,7 +1472,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ret
}
}
#endif // SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version.
#define HAS_SCALEFILTERROWS_SSSE3
__declspec(naked) __declspec(align(16))
......@@ -2247,8 +2248,9 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
);
}
#ifndef SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version
#define HAS_SCALEFILTERROWS_SSE2
#define HAS_SCALEFILTERROWS_SSE2_DISABLED
static void ScaleFilterRows_SSE2(uint8* dst_ptr,
const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction) {
......@@ -2318,6 +2320,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
#endif
);
}
#endif // SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version
#define HAS_SCALEFILTERROWS_SSSE3
......@@ -2388,7 +2391,6 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
#endif
);
}
#endif // defined(__x86_64__) || defined(__i386__)
// CPU agnostic row functions
......@@ -2609,7 +2611,7 @@ static void ScaleFilterCols34_C(uint8* dst_ptr, const uint8* src_ptr,
} while (dst_ptr < dend);
}
#define HAS_SCALEROWDOWN34_SSE2
#define HAS_SCALEROWDOWN34_SSE2_DISABLED
// Filter rows 0 and 1 together, 3 : 1
static void ScaleRowDown34_0_Int_SSE2(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width) {
......
......@@ -28,6 +28,7 @@ extern "C" {
/**
* SSE2 downscalers with bilinear interpolation.
*/
#define SSE2_DISABLED 1
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
......@@ -182,7 +183,8 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride,
}
// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version.
#define HAS_SCALEARGBFILTERROWS_SSE2
#ifndef SSE2_DISABLED
#define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED
__declspec(naked) __declspec(align(16))
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width,
......@@ -267,6 +269,7 @@ void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ret
}
}
#endif // SSE2_DISABLED
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version.
#define HAS_SCALEARGBFILTERROWS_SSSE3
......@@ -497,8 +500,9 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride,
);
}
#ifndef SSE2_DISABLED
// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version
#define HAS_SCALEARGBFILTERROWS_SSE2
#define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width,
int source_y_fraction) {
......@@ -567,6 +571,7 @@ void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
#endif
);
}
#endif // SSE2_DISABLED
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
#define HAS_SCALEARGBFILTERROWS_SSSE3
......@@ -635,7 +640,7 @@ void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
#endif
);
}
#endif
#endif // defined(__x86_64__) || defined(__i386__)
static void ScaleARGBRowDown2_C(const uint8* src_ptr, int,
uint8* dst_ptr, int dst_width) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment