Commit fae1a105 authored by Frank Barchard's avatar Frank Barchard

Work around bug in xgetbv for Visual Studio.

xgetbv is generating bad code, falsely disabling AVX2 and AVX512.
disable optimization for the function affected on older versions of Visual C 32 bit.

R=brucedawson@chromium.org, dhrosa@google.com, harryjin@google.com
BUG=libyuv:529

Review URL: https://codereview.chromium.org/1503393004 .
parent 2657688e
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1549 Version: 1550
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1549 #define LIBYUV_VERSION 1550
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -48,7 +48,7 @@ extern "C" { ...@@ -48,7 +48,7 @@ extern "C" {
!defined(__pnacl__) && !defined(__CLR_VER) !defined(__pnacl__) && !defined(__CLR_VER)
LIBYUV_API LIBYUV_API
void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
#if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) #if defined(_MSC_VER) && !defined(__clang__)
// Visual C version uses intrinsic or inline x86 assembly. // Visual C version uses intrinsic or inline x86 assembly.
#if (_MSC_FULL_VER >= 160040219) #if (_MSC_FULL_VER >= 160040219)
__cpuidex((int*)(cpu_info), info_eax, info_ecx); __cpuidex((int*)(cpu_info), info_eax, info_ecx);
...@@ -63,7 +63,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { ...@@ -63,7 +63,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
mov [edi + 8], ecx mov [edi + 8], ecx
mov [edi + 12], edx mov [edi + 12], edx
} }
#else #else // Visual C but not x86
if (info_ecx == 0) { if (info_ecx == 0) {
__cpuid((int*)(cpu_info), info_eax); __cpuid((int*)(cpu_info), info_eax);
} else { } else {
...@@ -71,7 +71,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { ...@@ -71,7 +71,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
} }
#endif #endif
// GCC version uses inline x86 assembly. // GCC version uses inline x86 assembly.
#else // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) #else // defined(_MSC_VER) && !defined(__clang__)
uint32 info_ebx, info_edx; uint32 info_ebx, info_edx;
asm volatile ( asm volatile (
#if defined( __i386__) && defined(__PIC__) #if defined( __i386__) && defined(__PIC__)
...@@ -89,7 +89,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { ...@@ -89,7 +89,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
cpu_info[1] = info_ebx; cpu_info[1] = info_ebx;
cpu_info[2] = info_ecx; cpu_info[2] = info_ecx;
cpu_info[3] = info_edx; cpu_info[3] = info_edx;
#endif // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) #endif // defined(_MSC_VER) && !defined(__clang__)
} }
#else // (defined(_M_IX86) || defined(_M_X64) ... #else // (defined(_M_IX86) || defined(_M_X64) ...
LIBYUV_API LIBYUV_API
...@@ -98,7 +98,18 @@ void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) { ...@@ -98,7 +98,18 @@ void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
} }
#endif #endif
// TODO(fbarchard): Enable xgetbv when validator supports it. // For VS2010 and earlier emit can be used:
// _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
// __asm {
// xor ecx, ecx // xcr 0
// xgetbv
// mov xcr0, eax
// }
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
// https://code.google.com/p/libyuv/issues/detail?id=529
#if defined(_M_IX86) && (_MSC_VER < 1900)
#pragma optimize("g", off)
#endif
#if (defined(_M_IX86) || defined(_M_X64) || \ #if (defined(_M_IX86) || defined(_M_X64) || \
defined(__i386__) || defined(__x86_64__)) && \ defined(__i386__) || defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
...@@ -106,20 +117,19 @@ void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) { ...@@ -106,20 +117,19 @@ void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
int GetXCR0() { int GetXCR0() {
uint32 xcr0 = 0u; uint32 xcr0 = 0u;
#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219) #if (_MSC_FULL_VER >= 160040219)
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
#elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__)
__asm {
xor ecx, ecx // xcr 0
_asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
mov xcr0, eax
}
#elif defined(__i386__) || defined(__x86_64__) #elif defined(__i386__) || defined(__x86_64__)
uint32 xcr0 = 0u;
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
#endif // defined(__i386__) || defined(__x86_64__) #endif // defined(__i386__) || defined(__x86_64__)
return xcr0; return xcr0;
} }
#endif // defined(_M_IX86) || defined(_M_X64) .. #endif // defined(_M_IX86) || defined(_M_X64) ..
// Return optimization to previous setting.
#if defined(_M_IX86) && (_MSC_VER < 1900)
#pragma optimize("g", on)
#endif
// based on libvpx arm_cpudetect.c // based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU // For Arm, but public to allow testing on any CPU
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment