Commit e35422d9 authored by fbarchard@google.com's avatar fbarchard@google.com

Fix AVX2 detect and a performance stall for gcc/clang.

BUG=276
TEST=Cpu unittest
R=nfullagar@google.com, ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/2401004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@817 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 78ad8d1f
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 814 Version: 817
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -68,8 +68,10 @@ LIBYUV_API ...@@ -68,8 +68,10 @@ LIBYUV_API
void MaskCpuFlags(int enable_flags); void MaskCpuFlags(int enable_flags);
// Low level cpuid for X86. Returns zeros on other CPUs. // Low level cpuid for X86. Returns zeros on other CPUs.
// eax is the info type that you want.
// ecx is typically the cpu number, and should normally be zero.
LIBYUV_API LIBYUV_API
void CpuId(int cpu_info[4], int info_type); void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 814 #define LIBYUV_VERSION 817
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#ifdef _MSC_VER #ifdef _MSC_VER
#include <intrin.h> // For __cpuid() #include <intrin.h> // For __cpuidex()
#endif #endif
#if !defined(__CLR_VER) && !defined(__native_client__) && defined(_M_X64) && \ #if !defined(__CLR_VER) && !defined(__native_client__) && defined(_M_X64) && \
defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
...@@ -28,28 +28,6 @@ ...@@ -28,28 +28,6 @@
#include "libyuv/basic_types.h" // For CPU_X86 #include "libyuv/basic_types.h" // For CPU_X86
// TODO(fbarchard): Consider cpu functionality for breakpoints, timer and cache.
// arm - bkpt vs intel int 3
// TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
static __inline void __cpuid(int cpu_info[4], int info_type) {
asm volatile ( // NOLINT
"mov %%ebx, %%edi \n"
"cpuid \n"
"xchg %%edi, %%ebx \n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type));
}
#elif defined(__i386__) || defined(__x86_64__)
static __inline void __cpuid(int cpu_info[4], int info_type) {
asm volatile ( // NOLINT
"cpuid \n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type));
}
#endif
#ifdef __cplusplus #ifdef __cplusplus
namespace libyuv { namespace libyuv {
extern "C" { extern "C" {
...@@ -59,51 +37,43 @@ extern "C" { ...@@ -59,51 +37,43 @@ extern "C" {
#if !defined(__CLR_VER) && (defined(_M_IX86) || defined(_M_X64) || \ #if !defined(__CLR_VER) && (defined(_M_IX86) || defined(_M_X64) || \
defined(__i386__) || defined(__x86_64__)) defined(__i386__) || defined(__x86_64__))
LIBYUV_API LIBYUV_API
void CpuId(int cpu_info[4], int info_type) { void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
__cpuid(cpu_info, info_type); #if defined(_MSC_VER)
} __cpuidex(reinterpret_cast<int*>(cpu_info), eax, ecx);
#else #else
LIBYUV_API uint32 ebx, edx;
void CpuId(int cpu_info[4], int) { asm volatile ( // NOLINT
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; #if defined( __i386__) && defined(__PIC__)
} // Preserve ebx for fpic 32 bit.
#endif "mov %%ebx, %%edi \n"
"cpuid \n"
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. "xchg %%edi, %%ebx \n"
#if !defined(__CLR_VER) && !defined(__native_client__) : "=D" (ebx),
#if defined(_M_X64) && defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) #else
#define HAS_XGETBV "cpuid \n"
static uint32 XGetBV(unsigned int xcr) { : "+b" (ebx),
return static_cast<uint32>(_xgetbv(xcr)); #endif // defined( __i386__) && defined(__PIC__)
"+a" (eax), "+c" (ecx), "=d" (edx));
cpu_info[0] = eax; cpu_info[1] = ebx; cpu_info[2] = ecx; cpu_info[3] = edx;
#endif // defined(_MSC_VER)
} }
#elif !defined(__CLR_VER) && defined(_M_IX86) && defined(_MSC_VER)
#define HAS_XGETBV #define HAS_XGETBV
__declspec(naked) __declspec(align(16)) // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
static uint32 XGetBV(unsigned int xcr) { int TestOsSaveYmm() {
__asm { uint32 xcr0;
mov ecx, [esp + 4] // xcr #if defined(_MSC_VER)
push edx xcr0 = (uint32)_xgetbv(0); /* min VS2010 SP1 compiler is required */
_asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // xgetbv for vs2005. #else
pop edx __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
ret #endif
} return((xcr0 & 6) == 6); // Is ymm saved?
} }
#elif defined(__i386__) || defined(__x86_64__) #else
#define HAS_XGETBV LIBYUV_API
static uint32 XGetBV(unsigned int xcr) { void CpuId(uint32, uint32, uint32* abcd) {
uint32 xcr_feature_mask; cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
asm volatile ( // NOLINT
".byte 0x0f, 0x01, 0xd0\n"
: "=a"(xcr_feature_mask)
: "c"(xcr)
: "memory", "cc", "edx"); // edx unused.
return xcr_feature_mask;
} }
#endif #endif
#endif // !defined(__CLR_VER) && !defined(__native_client__)
#ifdef HAS_XGETBV
static const int kXCR_XFEATURE_ENABLED_MASK = 0;
#endif
// based on libvpx arm_cpudetect.c // based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU // For Arm, but public to allow testing on any CPU
...@@ -170,10 +140,10 @@ static bool TestEnv(const char*) { ...@@ -170,10 +140,10 @@ static bool TestEnv(const char*) {
LIBYUV_API LIBYUV_API
int InitCpuFlags(void) { int InitCpuFlags(void) {
#if !defined(__CLR_VER) && defined(CPU_X86) #if !defined(__CLR_VER) && defined(CPU_X86)
int cpu_info1[4] = { 0, 0, 0, 0 }; uint32 cpu_info1[4] = { 0, 0, 0, 0 };
int cpu_info7[4] = { 0, 0, 0, 0 }; uint32 cpu_info7[4] = { 0, 0, 0, 0 };
__cpuid(cpu_info1, 1); CpuId(1, 0, cpu_info1);
__cpuid(cpu_info7, 7); CpuId(7, 0, cpu_info7);
cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
...@@ -183,7 +153,7 @@ int InitCpuFlags(void) { ...@@ -183,7 +153,7 @@ int InitCpuFlags(void) {
kCpuHasX86; kCpuHasX86;
#ifdef HAS_XGETBV #ifdef HAS_XGETBV
if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave
(XGetBV(kXCR_XFEATURE_ENABLED_MASK) & 0x06) == 0x06) { // Saves YMM. TestOsSaveYmm()) { // Saves YMM.
cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
kCpuHasAVX; kCpuHasAVX;
} }
......
...@@ -56,7 +56,7 @@ TEST_F(libyuvTest, TestCpuHas) { ...@@ -56,7 +56,7 @@ TEST_F(libyuvTest, TestCpuHas) {
TEST_F(libyuvTest, TestCpuId) { TEST_F(libyuvTest, TestCpuId) {
int has_x86 = TestCpuFlag(kCpuHasX86); int has_x86 = TestCpuFlag(kCpuHasX86);
if (has_x86) { if (has_x86) {
int cpu_info[4]; uint32 cpu_info[4];
// Vendor ID: // Vendor ID:
// AuthenticAMD AMD processor // AuthenticAMD AMD processor
// CentaurHauls Centaur processor // CentaurHauls Centaur processor
...@@ -68,7 +68,7 @@ TEST_F(libyuvTest, TestCpuId) { ...@@ -68,7 +68,7 @@ TEST_F(libyuvTest, TestCpuId) {
// RiseRiseRise Rise Technology processor // RiseRiseRise Rise Technology processor
// SiS SiS SiS SiS processor // SiS SiS SiS SiS processor
// UMC UMC UMC UMC processor // UMC UMC UMC UMC processor
CpuId(cpu_info, 0); CpuId(0, 0, cpu_info);
cpu_info[0] = cpu_info[1]; // Reorder output cpu_info[0] = cpu_info[1]; // Reorder output
cpu_info[1] = cpu_info[3]; cpu_info[1] = cpu_info[3];
cpu_info[3] = 0; cpu_info[3] = 0;
...@@ -83,7 +83,7 @@ TEST_F(libyuvTest, TestCpuId) { ...@@ -83,7 +83,7 @@ TEST_F(libyuvTest, TestCpuId) {
// 13:12 - Processor Type // 13:12 - Processor Type
// 19:16 - Extended Model // 19:16 - Extended Model
// 27:20 - Extended Family // 27:20 - Extended Family
CpuId(cpu_info, 1); CpuId(1, 0, cpu_info);
int family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0); int family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
int model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0); int model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family,
......
...@@ -25,7 +25,7 @@ int main(int argc, const char* argv[]) { ...@@ -25,7 +25,7 @@ int main(int argc, const char* argv[]) {
#if defined(__i386__) || defined(__x86_64__) || \ #if defined(__i386__) || defined(__x86_64__) || \
defined(_M_IX86) || defined(_M_X64) defined(_M_IX86) || defined(_M_X64)
if (has_x86) { if (has_x86) {
int family, model, cpu_info[4]; uint32 family, model, cpu_info[4];
// Vendor ID: // Vendor ID:
// AuthenticAMD AMD processor // AuthenticAMD AMD processor
// CentaurHauls Centaur processor // CentaurHauls Centaur processor
...@@ -37,7 +37,7 @@ int main(int argc, const char* argv[]) { ...@@ -37,7 +37,7 @@ int main(int argc, const char* argv[]) {
// RiseRiseRise Rise Technology processor // RiseRiseRise Rise Technology processor
// SiS SiS SiS SiS processor // SiS SiS SiS SiS processor
// UMC UMC UMC UMC processor // UMC UMC UMC UMC processor
CpuId(cpu_info, 0); CpuId(0, 0, &cpu_info[0]);
cpu_info[0] = cpu_info[1]; // Reorder output cpu_info[0] = cpu_info[1]; // Reorder output
cpu_info[1] = cpu_info[3]; cpu_info[1] = cpu_info[3];
cpu_info[3] = 0; cpu_info[3] = 0;
...@@ -50,7 +50,7 @@ int main(int argc, const char* argv[]) { ...@@ -50,7 +50,7 @@ int main(int argc, const char* argv[]) {
// 13:12 - Processor Type // 13:12 - Processor Type
// 19:16 - Extended Model // 19:16 - Extended Model
// 27:20 - Extended Family // 27:20 - Extended Family
CpuId(cpu_info, 1); CpuId(1, 0, &cpu_info[0]);
family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0); family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0); model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment