Commit 4c32b3a0 authored by fbarchard@google.com's avatar fbarchard@google.com

Port xgetbv to posix

BUG=98
TEST=none
Review URL: https://webrtc-codereview.appspot.com/848004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@374 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 78070bc1
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 373
Version: 374
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 373
#define LIBYUV_VERSION 374
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -962,8 +962,7 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
}
}
}
#endif
#if defined(HAS_UYVYTOYROW_NEON)
#elif defined(HAS_UYVYTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
UYVYToYRow = UYVYToYRow_Any_NEON;
......
......@@ -998,8 +998,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
ARGBToRAWRow = ARGBToRAWRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTORAWROW_NEON)
#elif defined(HAS_ARGBTORAWROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width * 3 <= kMaxStride) {
ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
......
......@@ -58,19 +58,37 @@ void CpuId(int cpu_info[4], int) {
}
#endif
// Low level cpuid for X86. Returns zeros on other CPUs.
#if !defined(__CLR_VER) && defined(_M_IX86)
// TODO(fbarchard): Port to GCC and 64 bit Visual C.
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
#if !defined(__CLR_VER) && defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
#define HAS_XGETBV
static uint32 XGetBV(unsigned int xcr) {
return static_cast<uint32>(_xgetbv(xcr));
}
#elif !defined(__CLR_VER) && defined(_M_IX86)
#define HAS_XGETBV
__declspec(naked) __declspec(align(16))
static uint32 XGetBV(unsigned int xcr) {
__asm {
mov ecx, [esp + 4] // xcr
xgetbv // updates eax and edx. edx unused/
ret
}
}
#elif defined(__i386__) || defined(__x86_64__)
#define HAS_XGETBV
// Return low 32 bits of BV - OS support for register saving.
__declspec(naked)
static uint32 XGetBV32(void) {
_asm _emit 0x0f
_asm _emit 0x01
_asm _emit 0xd0 // xgetbv
_asm ret
static uint32 XGetBV(unsigned int xcr) {
uint32 xcr_feature_mask;
asm volatile (
".byte 0x0f, 0x01, 0xd0\n"
: "=a"(xcr_feature_mask)
: "c"(xcr)
: "memory", "cc", "edx"); // edx unused.
return xcr_feature_mask;
}
#endif
#ifdef HAS_XGETBV
static const int kXCR_XFEATURE_ENABLED_MASK = 0;
#endif
// based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU
......@@ -105,13 +123,13 @@ int InitCpuFlags() {
((cpu_info[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
((cpu_info[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
// TODO(fbarchard): AVX test BV same as AVX2.
(((cpu_info[2] & 0x18000000) == 0x18000000) ? kCpuHasAVX : 0) |
kCpuInitialized | kCpuHasX86;
#ifdef HAS_XGETBV
if (cpu_info_ & kCpuHasAVX) {
__cpuid(cpu_info, 7);
if ((cpu_info[1] & 0x00000020) && ((XGetBV32() & 0x06) == 0x06)) {
if ((cpu_info[1] & 0x00000020) &&
((XGetBV(kXCR_XFEATURE_ENABLED_MASK) & 0x06) == 0x06)) {
cpu_info_ |= kCpuHasAVX2;
}
}
......
......@@ -459,7 +459,7 @@ void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) {
"1: \n"
"vld4.8 {d5, d6, d7, d8}, [%0]! \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmov.u8 d4, d8 \n"
"vmov.u8 d4, d8 \n" // move A before RGB.
"vst4.8 {d4, d5, d6, d7}, [%1]! \n" // store 8 pixels of RGBA.
"bgt 1b \n"
: "+r"(src_argb), // %0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment