Commit e35422d9 authored by fbarchard@google.com's avatar fbarchard@google.com

Fix AVX2 detect and a performance stall for gcc/clang.

BUG=276
TEST=Cpu unittest
R=nfullagar@google.com, ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/2401004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@817 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 78ad8d1f
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 814
Version: 817
License: BSD
License File: LICENSE
......
......@@ -68,8 +68,10 @@ LIBYUV_API
void MaskCpuFlags(int enable_flags);
// Low level cpuid for X86. Returns zeros on other CPUs.
// eax is the info type that you want.
// ecx is typically the cpu number, and should normally be zero.
LIBYUV_API
void CpuId(int cpu_info[4], int info_type);
void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info);
#ifdef __cplusplus
} // extern "C"
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 814
#define LIBYUV_VERSION 817
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -11,7 +11,7 @@
#include "libyuv/cpu_id.h"
#ifdef _MSC_VER
#include <intrin.h> // For __cpuid()
#include <intrin.h> // For __cpuidex()
#endif
#if !defined(__CLR_VER) && !defined(__native_client__) && defined(_M_X64) && \
defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
......@@ -28,28 +28,6 @@
#include "libyuv/basic_types.h" // For CPU_X86
// TODO(fbarchard): Consider cpu functionality for breakpoints, timer and cache.
// arm - bkpt vs intel int 3
// TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
static __inline void __cpuid(int cpu_info[4], int info_type) {
asm volatile ( // NOLINT
"mov %%ebx, %%edi \n"
"cpuid \n"
"xchg %%edi, %%ebx \n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type));
}
#elif defined(__i386__) || defined(__x86_64__)
static __inline void __cpuid(int cpu_info[4], int info_type) {
asm volatile ( // NOLINT
"cpuid \n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type));
}
#endif
#ifdef __cplusplus
namespace libyuv {
extern "C" {
......@@ -59,51 +37,43 @@ extern "C" {
#if !defined(__CLR_VER) && (defined(_M_IX86) || defined(_M_X64) || \
defined(__i386__) || defined(__x86_64__))
LIBYUV_API
void CpuId(int cpu_info[4], int info_type) {
__cpuid(cpu_info, info_type);
}
void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
#if defined(_MSC_VER)
__cpuidex(reinterpret_cast<int*>(cpu_info), eax, ecx);
#else
LIBYUV_API
void CpuId(int cpu_info[4], int) {
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
}
#endif
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
#if !defined(__CLR_VER) && !defined(__native_client__)
#if defined(_M_X64) && defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
#define HAS_XGETBV
static uint32 XGetBV(unsigned int xcr) {
return static_cast<uint32>(_xgetbv(xcr));
uint32 ebx, edx;
asm volatile ( // NOLINT
#if defined( __i386__) && defined(__PIC__)
// Preserve ebx for fpic 32 bit.
"mov %%ebx, %%edi \n"
"cpuid \n"
"xchg %%edi, %%ebx \n"
: "=D" (ebx),
#else
"cpuid \n"
: "+b" (ebx),
#endif // defined( __i386__) && defined(__PIC__)
"+a" (eax), "+c" (ecx), "=d" (edx));
cpu_info[0] = eax; cpu_info[1] = ebx; cpu_info[2] = ecx; cpu_info[3] = edx;
#endif // defined(_MSC_VER)
}
#elif !defined(__CLR_VER) && defined(_M_IX86) && defined(_MSC_VER)
#define HAS_XGETBV
__declspec(naked) __declspec(align(16))
static uint32 XGetBV(unsigned int xcr) {
__asm {
mov ecx, [esp + 4] // xcr
push edx
_asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // xgetbv for vs2005.
pop edx
ret
}
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
int TestOsSaveYmm() {
uint32 xcr0;
#if defined(_MSC_VER)
xcr0 = (uint32)_xgetbv(0); /* min VS2010 SP1 compiler is required */
#else
__asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
#endif
return((xcr0 & 6) == 6); // Is ymm saved?
}
#elif defined(__i386__) || defined(__x86_64__)
#define HAS_XGETBV
static uint32 XGetBV(unsigned int xcr) {
uint32 xcr_feature_mask;
asm volatile ( // NOLINT
".byte 0x0f, 0x01, 0xd0\n"
: "=a"(xcr_feature_mask)
: "c"(xcr)
: "memory", "cc", "edx"); // edx unused.
return xcr_feature_mask;
#else
LIBYUV_API
void CpuId(uint32, uint32, uint32* abcd) {
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
}
#endif
#endif // !defined(__CLR_VER) && !defined(__native_client__)
#ifdef HAS_XGETBV
static const int kXCR_XFEATURE_ENABLED_MASK = 0;
#endif
// based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU
......@@ -170,10 +140,10 @@ static bool TestEnv(const char*) {
LIBYUV_API
int InitCpuFlags(void) {
#if !defined(__CLR_VER) && defined(CPU_X86)
int cpu_info1[4] = { 0, 0, 0, 0 };
int cpu_info7[4] = { 0, 0, 0, 0 };
__cpuid(cpu_info1, 1);
__cpuid(cpu_info7, 7);
uint32 cpu_info1[4] = { 0, 0, 0, 0 };
uint32 cpu_info7[4] = { 0, 0, 0, 0 };
CpuId(1, 0, cpu_info1);
CpuId(7, 0, cpu_info7);
cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
......@@ -183,7 +153,7 @@ int InitCpuFlags(void) {
kCpuHasX86;
#ifdef HAS_XGETBV
if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave
(XGetBV(kXCR_XFEATURE_ENABLED_MASK) & 0x06) == 0x06) { // Saves YMM.
TestOsSaveYmm()) { // Saves YMM.
cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
kCpuHasAVX;
}
......
......@@ -56,7 +56,7 @@ TEST_F(libyuvTest, TestCpuHas) {
TEST_F(libyuvTest, TestCpuId) {
int has_x86 = TestCpuFlag(kCpuHasX86);
if (has_x86) {
int cpu_info[4];
uint32 cpu_info[4];
// Vendor ID:
// AuthenticAMD AMD processor
// CentaurHauls Centaur processor
......@@ -68,7 +68,7 @@ TEST_F(libyuvTest, TestCpuId) {
// RiseRiseRise Rise Technology processor
// SiS SiS SiS SiS processor
// UMC UMC UMC UMC processor
CpuId(cpu_info, 0);
CpuId(0, 0, cpu_info);
cpu_info[0] = cpu_info[1]; // Reorder output
cpu_info[1] = cpu_info[3];
cpu_info[3] = 0;
......@@ -83,7 +83,7 @@ TEST_F(libyuvTest, TestCpuId) {
// 13:12 - Processor Type
// 19:16 - Extended Model
// 27:20 - Extended Family
CpuId(cpu_info, 1);
CpuId(1, 0, cpu_info);
int family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
int model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family,
......
......@@ -25,7 +25,7 @@ int main(int argc, const char* argv[]) {
#if defined(__i386__) || defined(__x86_64__) || \
defined(_M_IX86) || defined(_M_X64)
if (has_x86) {
int family, model, cpu_info[4];
uint32 family, model, cpu_info[4];
// Vendor ID:
// AuthenticAMD AMD processor
// CentaurHauls Centaur processor
......@@ -37,7 +37,7 @@ int main(int argc, const char* argv[]) {
// RiseRiseRise Rise Technology processor
// SiS SiS SiS SiS processor
// UMC UMC UMC UMC processor
CpuId(cpu_info, 0);
CpuId(0, 0, &cpu_info[0]);
cpu_info[0] = cpu_info[1]; // Reorder output
cpu_info[1] = cpu_info[3];
cpu_info[3] = 0;
......@@ -50,7 +50,7 @@ int main(int argc, const char* argv[]) {
// 13:12 - Processor Type
// 19:16 - Extended Model
// 27:20 - Extended Family
CpuId(cpu_info, 1);
CpuId(1, 0, &cpu_info[0]);
family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment