Commit 735ace2e authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

Re-enable x86 assembly without requiring -msse2

clang does not require -msse2 or -msse for inline, except
the "x" parameter.  So change this to "m" for 32 bit.  64 bit
requires sse2 so use "x" for 64 bit.

gcc requires -msse for xmm registers in clobber list.
Reduce compiler requirement from -msse2 to -msse for enabling
assembly.

Bug: libyuv:754, libyuv:757
Test: CC=clang CXX=clang++ CFLAGS="-m32" CXXFLAGS="-m32 -mno-sse -O2" make -f linux.mk
Change-Id: I86df72cfee80b7d349561c1fd7c97ad360767255
Reviewed-on: https://chromium-review.googlesource.com/759303Reviewed-by: 's avatarrichard winterton <rrwinterton@gmail.com>
Reviewed-by: 's avatarFrank Barchard <fbarchard@google.com>
Commit-Queue: Frank Barchard <fbarchard@google.com>
parent 68f852d8
...@@ -19,7 +19,7 @@ extern "C" { ...@@ -19,7 +19,7 @@ extern "C" {
#endif #endif
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
......
...@@ -746,7 +746,7 @@ int I420Interpolate(const uint8* src0_y, ...@@ -746,7 +746,7 @@ int I420Interpolate(const uint8* src0_y,
int interpolation); int interpolation);
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
......
...@@ -19,7 +19,7 @@ extern "C" { ...@@ -19,7 +19,7 @@ extern "C" {
#endif #endif
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
......
...@@ -31,7 +31,7 @@ extern "C" { ...@@ -31,7 +31,7 @@ extern "C" {
var = 0 var = 0
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
...@@ -279,7 +279,7 @@ extern "C" { ...@@ -279,7 +279,7 @@ extern "C" {
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_MERGEUV10ROW_AVX2 #define HAS_MERGEUV10ROW_AVX2
#endif #endif
// The following are available on Neon platforms: // The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && \ #if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
......
...@@ -20,7 +20,7 @@ extern "C" { ...@@ -20,7 +20,7 @@ extern "C" {
#endif #endif
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
......
...@@ -80,4 +80,4 @@ cpuid: util/cpuid.c libyuv.a ...@@ -80,4 +80,4 @@ cpuid: util/cpuid.c libyuv.a
$(CC) $(CFLAGS) -o $@ util/cpuid.c libyuv.a $(CC) $(CFLAGS) -o $@ util/cpuid.c libyuv.a
clean: clean:
/bin/rm -f source/*.o *.ii *.s libyuv.a convert cpuid psnr /bin/rm -f source/*.o *.ii *.s libyuv.a yuvconvert cpuid psnr
...@@ -5687,6 +5687,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb, ...@@ -5687,6 +5687,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
#ifdef HAS_HALFFLOATROW_SSE2 #ifdef HAS_HALFFLOATROW_SSE2
static float kScaleBias = 1.9259299444e-34f; static float kScaleBias = 1.9259299444e-34f;
void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
scale *= kScaleBias;
asm volatile ( asm volatile (
"pshufd $0x0,%3,%%xmm4 \n" "pshufd $0x0,%3,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n" "pxor %%xmm5,%%xmm5 \n"
...@@ -5713,7 +5714,11 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { ...@@ -5713,7 +5714,11 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(width) // %2 "+r"(width) // %2
: "x"(scale * kScaleBias) // %3 #if defined(__x86_64__)
: "x"(scale) // %3
#else
: "m"(scale) // %3
#endif
: "memory", "cc", : "memory", "cc",
"xmm2", "xmm3", "xmm4", "xmm5" "xmm2", "xmm3", "xmm4", "xmm5"
); );
...@@ -5722,6 +5727,7 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { ...@@ -5722,6 +5727,7 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
#ifdef HAS_HALFFLOATROW_AVX2 #ifdef HAS_HALFFLOATROW_AVX2
void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
scale *= kScaleBias;
asm volatile ( asm volatile (
"vbroadcastss %3, %%ymm4 \n" "vbroadcastss %3, %%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n" "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
...@@ -5749,7 +5755,11 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { ...@@ -5749,7 +5755,11 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(width) // %2 "+r"(width) // %2
: "x"(scale * kScaleBias) // %3 #if defined(__x86_64__)
: "x"(scale) // %3
#else
: "m"(scale) // %3
#endif
: "memory", "cc", : "memory", "cc",
"xmm2", "xmm3", "xmm4", "xmm5" "xmm2", "xmm3", "xmm4", "xmm5"
); );
...@@ -5782,7 +5792,11 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { ...@@ -5782,7 +5792,11 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(width) // %2 "+r"(width) // %2
#if defined(__x86_64__)
: "x"(scale) // %3 : "x"(scale) // %3
#else
: "m"(scale) // %3
#endif
: "memory", "cc", : "memory", "cc",
"xmm2", "xmm3", "xmm4" "xmm2", "xmm3", "xmm4"
); );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment