Commit fdcf524a authored by Frank Barchard's avatar Frank Barchard

Add f16c (halffloat) cpuid

R=wangcheng@google.com, hubbe@chromium.org
BUG=libyuv:560

Review URL: https://codereview.chromium.org/2418763006 .
parent 5333e94e
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1626 Version: 1627
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -31,13 +31,15 @@ static const int kCpuHasX86 = 0x10; ...@@ -31,13 +31,15 @@ static const int kCpuHasX86 = 0x10;
static const int kCpuHasSSE2 = 0x20; static const int kCpuHasSSE2 = 0x20;
static const int kCpuHasSSSE3 = 0x40; static const int kCpuHasSSSE3 = 0x40;
static const int kCpuHasSSE41 = 0x80; static const int kCpuHasSSE41 = 0x80;
static const int kCpuHasSSE42 = 0x100; static const int kCpuHasSSE42 = 0x100; // unused at this time.
static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400; static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800; static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000; static const int kCpuHasFMA3 = 0x1000;
static const int kCpuHasAVX3 = 0x2000; static const int kCpuHasAVX3 = 0x2000;
// 0x2000, 0x4000, 0x8000 reserved for future X86 flags. static const int kCpuHasF16C = 0x4000;
// 0x8000 reserved for future X86 flags.
// These flags are only valid on MIPS processors. // These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x10000; static const int kCpuHasMIPS = 0x10000;
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1626 #define LIBYUV_VERSION 1627
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -229,19 +229,21 @@ int InitCpuFlags(void) { ...@@ -229,19 +229,21 @@ int InitCpuFlags(void) {
if (cpu_info0[0] >= 7) { if (cpu_info0[0] >= 7) {
CpuId(7, 0, cpu_info7); CpuId(7, 0, cpu_info7);
} }
cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | cpu_info = kCpuHasX86 |
((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) | ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0);
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
kCpuHasX86;
#ifdef HAS_XGETBV #ifdef HAS_XGETBV
// AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv
if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX; cpu_info |= kCpuHasAVX |
((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
// Detect AVX512bw // Detect AVX512bw
if ((GetXCR0() & 0xe0) == 0xe0) { if ((GetXCR0() & 0xe0) == 0xe0) {
...@@ -281,6 +283,10 @@ int InitCpuFlags(void) { ...@@ -281,6 +283,10 @@ int InitCpuFlags(void) {
if (TestEnv("LIBYUV_DISABLE_AVX3")) { if (TestEnv("LIBYUV_DISABLE_AVX3")) {
cpu_info &= ~kCpuHasAVX3; cpu_info &= ~kCpuHasAVX3;
} }
if (TestEnv("LIBYUV_DISABLE_F16C")) {
cpu_info &= ~kCpuHasF16C;
}
#endif #endif
#if defined(__mips__) && defined(__linux__) #if defined(__mips__) && defined(__linux__)
#if defined(__mips_dspr2) #if defined(__mips_dspr2)
......
...@@ -2570,7 +2570,7 @@ int HalfFloatPlane(const uint16* src_y, int src_stride_y, ...@@ -2570,7 +2570,7 @@ int HalfFloatPlane(const uint16* src_y, int src_stride_y,
} }
#endif #endif
#if defined(HAS_HALFFLOATROW_AVX2) #if defined(HAS_HALFFLOATROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) {
HalfFloatRow = HalfFloatRow_Any_AVX2; HalfFloatRow = HalfFloatRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
HalfFloatRow = HalfFloatRow_AVX2; HalfFloatRow = HalfFloatRow_AVX2;
......
...@@ -45,6 +45,8 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { ...@@ -45,6 +45,8 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
printf("Has FMA3 %x\n", has_fma3); printf("Has FMA3 %x\n", has_fma3);
int has_avx3 = TestCpuFlag(kCpuHasAVX3); int has_avx3 = TestCpuFlag(kCpuHasAVX3);
printf("Has AVX3 %x\n", has_avx3); printf("Has AVX3 %x\n", has_avx3);
int has_f16c = TestCpuFlag(kCpuHasF16C);
printf("Has F16C %x\n", has_f16c);
int has_mips = TestCpuFlag(kCpuHasMIPS); int has_mips = TestCpuFlag(kCpuHasMIPS);
printf("Has MIPS %x\n", has_mips); printf("Has MIPS %x\n", has_mips);
int has_dspr2 = TestCpuFlag(kCpuHasDSPR2); int has_dspr2 = TestCpuFlag(kCpuHasDSPR2);
......
...@@ -79,6 +79,7 @@ int main(int argc, const char* argv[]) { ...@@ -79,6 +79,7 @@ int main(int argc, const char* argv[]) {
int has_avx3 = TestCpuFlag(kCpuHasAVX3); int has_avx3 = TestCpuFlag(kCpuHasAVX3);
int has_erms = TestCpuFlag(kCpuHasERMS); int has_erms = TestCpuFlag(kCpuHasERMS);
int has_fma3 = TestCpuFlag(kCpuHasFMA3); int has_fma3 = TestCpuFlag(kCpuHasFMA3);
int has_f16c = TestCpuFlag(kCpuHasF16C);
printf("Has SSE2 %x\n", has_sse2); printf("Has SSE2 %x\n", has_sse2);
printf("Has SSSE3 %x\n", has_ssse3); printf("Has SSSE3 %x\n", has_ssse3);
printf("Has SSE4.1 %x\n", has_sse41); printf("Has SSE4.1 %x\n", has_sse41);
...@@ -88,6 +89,7 @@ int main(int argc, const char* argv[]) { ...@@ -88,6 +89,7 @@ int main(int argc, const char* argv[]) {
printf("Has AVX3 %x\n", has_avx3); printf("Has AVX3 %x\n", has_avx3);
printf("Has ERMS %x\n", has_erms); printf("Has ERMS %x\n", has_erms);
printf("Has FMA3 %x\n", has_fma3); printf("Has FMA3 %x\n", has_fma3);
printf("Has F16C %x\n", has_f16c);
} }
return 0; return 0;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment