Commit 4c32b3a0 authored by fbarchard@google.com's avatar fbarchard@google.com

Port xgetbv to posix

BUG=98
TEST=none
Review URL: https://webrtc-codereview.appspot.com/848004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@374 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 78070bc1
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 373 Version: 374
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 373 #define LIBYUV_VERSION 374
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -962,8 +962,7 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210, ...@@ -962,8 +962,7 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
} }
} }
} }
#endif #elif defined(HAS_UYVYTOYROW_NEON)
#if defined(HAS_UYVYTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) { if (width > 8) {
UYVYToYRow = UYVYToYRow_Any_NEON; UYVYToYRow = UYVYToYRow_Any_NEON;
......
...@@ -998,8 +998,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, ...@@ -998,8 +998,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
ARGBToRAWRow = ARGBToRAWRow_SSSE3; ARGBToRAWRow = ARGBToRAWRow_SSSE3;
} }
} }
#endif #elif defined(HAS_ARGBTORAWROW_NEON)
#if defined(HAS_ARGBTORAWROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
if (width * 3 <= kMaxStride) { if (width * 3 <= kMaxStride) {
ARGBToRAWRow = ARGBToRAWRow_Any_NEON; ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
......
...@@ -58,19 +58,37 @@ void CpuId(int cpu_info[4], int) { ...@@ -58,19 +58,37 @@ void CpuId(int cpu_info[4], int) {
} }
#endif #endif
// Low level cpuid for X86. Returns zeros on other CPUs. // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
#if !defined(__CLR_VER) && defined(_M_IX86) #if !defined(__CLR_VER) && defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
// TODO(fbarchard): Port to GCC and 64 bit Visual C. #define HAS_XGETBV
static uint32 XGetBV(unsigned int xcr) {
return static_cast<uint32>(_xgetbv(xcr));
}
#elif !defined(__CLR_VER) && defined(_M_IX86)
#define HAS_XGETBV
__declspec(naked) __declspec(align(16))
static uint32 XGetBV(unsigned int xcr) {
__asm {
mov ecx, [esp + 4] // xcr
xgetbv // updates eax and edx. edx unused/
ret
}
}
#elif defined(__i386__) || defined(__x86_64__)
#define HAS_XGETBV #define HAS_XGETBV
// Return low 32 bits of BV - OS support for register saving. static uint32 XGetBV(unsigned int xcr) {
__declspec(naked) uint32 xcr_feature_mask;
static uint32 XGetBV32(void) { asm volatile (
_asm _emit 0x0f ".byte 0x0f, 0x01, 0xd0\n"
_asm _emit 0x01 : "=a"(xcr_feature_mask)
_asm _emit 0xd0 // xgetbv : "c"(xcr)
_asm ret : "memory", "cc", "edx"); // edx unused.
return xcr_feature_mask;
} }
#endif #endif
#ifdef HAS_XGETBV
static const int kXCR_XFEATURE_ENABLED_MASK = 0;
#endif
// based on libvpx arm_cpudetect.c // based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU // For Arm, but public to allow testing on any CPU
...@@ -105,13 +123,13 @@ int InitCpuFlags() { ...@@ -105,13 +123,13 @@ int InitCpuFlags() {
((cpu_info[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | ((cpu_info[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info[2] & 0x00080000) ? kCpuHasSSE41 : 0) | ((cpu_info[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
((cpu_info[2] & 0x00100000) ? kCpuHasSSE42 : 0) | ((cpu_info[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
// TODO(fbarchard): AVX test BV same as AVX2.
(((cpu_info[2] & 0x18000000) == 0x18000000) ? kCpuHasAVX : 0) | (((cpu_info[2] & 0x18000000) == 0x18000000) ? kCpuHasAVX : 0) |
kCpuInitialized | kCpuHasX86; kCpuInitialized | kCpuHasX86;
#ifdef HAS_XGETBV #ifdef HAS_XGETBV
if (cpu_info_ & kCpuHasAVX) { if (cpu_info_ & kCpuHasAVX) {
__cpuid(cpu_info, 7); __cpuid(cpu_info, 7);
if ((cpu_info[1] & 0x00000020) && ((XGetBV32() & 0x06) == 0x06)) { if ((cpu_info[1] & 0x00000020) &&
((XGetBV(kXCR_XFEATURE_ENABLED_MASK) & 0x06) == 0x06)) {
cpu_info_ |= kCpuHasAVX2; cpu_info_ |= kCpuHasAVX2;
} }
} }
......
...@@ -459,7 +459,7 @@ void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) { ...@@ -459,7 +459,7 @@ void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) {
"1: \n" "1: \n"
"vld4.8 {d5, d6, d7, d8}, [%0]! \n" // load 8 pixels of ARGB. "vld4.8 {d5, d6, d7, d8}, [%0]! \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop. "subs %2, %2, #8 \n" // 8 processed per loop.
"vmov.u8 d4, d8 \n" "vmov.u8 d4, d8 \n" // move A before RGB.
"vst4.8 {d4, d5, d6, d7}, [%1]! \n" // store 8 pixels of RGBA. "vst4.8 {d4, d5, d6, d7}, [%1]! \n" // store 8 pixels of RGBA.
"bgt 1b \n" "bgt 1b \n"
: "+r"(src_argb), // %0 : "+r"(src_argb), // %0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment