Commit c5d44a0c authored by fbarchard@google.com's avatar fbarchard@google.com

cpuid add sse41 and x86

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/485001

git-svn-id: http://libyuv.googlecode.com/svn/trunk@236 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 1702ec78
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 235 Version: 236
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -17,14 +17,16 @@ extern "C" { ...@@ -17,14 +17,16 @@ extern "C" {
#endif #endif
// These flags are only valid on x86 processors // These flags are only valid on x86 processors
static const int kCpuHasSSE2 = 1; static const int kCpuHasX86 = 1;
static const int kCpuHasSSSE3 = 2; static const int kCpuHasSSE2 = 2;
static const int kCpuHasSSSE3 = 4;
static const int kCpuHasSSE41 = 8;
// These flags are only valid on ARM processors // These flags are only valid on ARM processors
static const int kCpuHasNEON = 4; static const int kCpuHasNEON = 16;
// Internal flag to indicate cpuid is initialized. // Internal flag to indicate cpuid is initialized.
static const int kCpuInitialized = 8; static const int kCpuInitialized = 32;
// Detect CPU has SSE2 etc. // Detect CPU has SSE2 etc.
// test_flag parameter should be one of kCpuHas constants above // test_flag parameter should be one of kCpuHas constants above
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 235 #define LIBYUV_VERSION 236
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -75,16 +75,22 @@ int InitCpuFlags() { ...@@ -75,16 +75,22 @@ int InitCpuFlags() {
__cpuid(cpu_info, 1); __cpuid(cpu_info, 1);
cpu_info_ = (cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0) | cpu_info_ = (cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0) |
(cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) | (cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) |
kCpuInitialized; (cpu_info[2] & 0x00080000 ? kCpuHasSSE41 : 0) |
kCpuInitialized | kCpuHasX86;
// environment variable overrides for testing. // environment variable overrides for testing.
if (getenv("LIBYUV_DISABLE_X86")) {
cpu_info_ &= ~kCpuHasX86;
}
if (getenv("LIBYUV_DISABLE_SSE2")) { if (getenv("LIBYUV_DISABLE_SSE2")) {
cpu_info_ &= ~kCpuHasSSE2; cpu_info_ &= ~kCpuHasSSE2;
} }
// environment variable overrides for testing.
if (getenv("LIBYUV_DISABLE_SSSE3")) { if (getenv("LIBYUV_DISABLE_SSSE3")) {
cpu_info_ &= ~kCpuHasSSSE3; cpu_info_ &= ~kCpuHasSSSE3;
} }
if (getenv("LIBYUV_DISABLE_SSE41")) {
cpu_info_ &= ~kCpuHasSSE41;
}
#elif defined(__linux__) && defined(__ARM_NEON__) #elif defined(__linux__) && defined(__ARM_NEON__)
cpu_info_ = ArmCpuCaps("/proc/cpuinfo") | kCpuInitialized; cpu_info_ = ArmCpuCaps("/proc/cpuinfo") | kCpuInitialized;
#elif defined(__ARM_NEON__) #elif defined(__ARM_NEON__)
......
...@@ -29,17 +29,17 @@ void CopyPlane(const uint8* src_y, int src_stride_y, ...@@ -29,17 +29,17 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) { if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) {
CopyRow = CopyRow_NEON; CopyRow = CopyRow_NEON;
} }
#elif defined(HAS_COPYROW_X86) #endif
if (IS_ALIGNED(width, 4)) { #if defined(HAS_COPYROW_X86)
if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
CopyRow = CopyRow_X86; CopyRow = CopyRow_X86;
#if defined(HAS_COPYROW_SSE2) }
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 32) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
CopyRow = CopyRow_SSE2;
}
#endif #endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
CopyRow = CopyRow_SSE2;
} }
#endif #endif
...@@ -755,7 +755,6 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width, ...@@ -755,7 +755,6 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
} }
#endif #endif
#if !defined(HAS_SETROW_X86)
static void SetRow8_C(uint8* dst, uint32 v8, int count) { static void SetRow8_C(uint8* dst, uint32 v8, int count) {
#ifdef _MSC_VER #ifdef _MSC_VER
for (int x = 0; x < count; ++x) { for (int x = 0; x < count; ++x) {
...@@ -776,24 +775,24 @@ static void SetRows32_C(uint8* dst, uint32 v32, int width, ...@@ -776,24 +775,24 @@ static void SetRows32_C(uint8* dst, uint32 v32, int width,
dst += dst_stride; dst += dst_stride;
} }
} }
#endif
void SetPlane(uint8* dst_y, int dst_stride_y, void SetPlane(uint8* dst_y, int dst_stride_y,
int width, int height, int width, int height,
uint32 value) { uint32 value) {
#if defined(HAS_SETROW_X86)
void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_X86;
#else
void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_C; void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_C;
#endif
#if defined(HAS_SETROW_NEON) #if defined(HAS_SETROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
SetRow = SetRow8_NEON; SetRow = SetRow8_NEON;
} }
#elif defined(HAS_SETROW_SSE2) #endif
#if defined(HAS_SETROW_X86)
if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
SetRow = SetRow8_X86;
}
#endif
#if defined(HAS_SETROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
...@@ -836,7 +835,6 @@ int I420Rect(uint8* dst_y, int dst_stride_y, ...@@ -836,7 +835,6 @@ int I420Rect(uint8* dst_y, int dst_stride_y,
return 0; return 0;
} }
// TODO(fbarchard): Add TestCpuFlag(kCpuHasX86) to allow C code to be tested.
// Draw a rectangle into ARGB // Draw a rectangle into ARGB
int ARGBRect(uint8* dst_argb, int dst_stride_argb, int ARGBRect(uint8* dst_argb, int dst_stride_argb,
int dst_x, int dst_y, int dst_x, int dst_y,
...@@ -848,9 +846,6 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, ...@@ -848,9 +846,6 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
return -1; return -1;
} }
uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
#if defined(HAS_SETROW_X86)
SetRows32_X86(dst, value, width, dst_stride_argb, height);
#else
#if defined(HAS_SETROW_NEON) #if defined(HAS_SETROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) && if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
...@@ -858,8 +853,13 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, ...@@ -858,8 +853,13 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
return 0; return 0;
} }
#endif #endif
SetRows32_C(dst, value, width, dst_stride_argb, height); #if defined(HAS_SETROW_X86)
if (TestCpuFlag(kCpuHasX86)) {
SetRows32_X86(dst, value, width, dst_stride_argb, height);
return 0;
}
#endif #endif
SetRows32_C(dst, value, width, dst_stride_argb, height);
return 0; return 0;
} }
......
...@@ -89,11 +89,13 @@ extern "C" { ...@@ -89,11 +89,13 @@ extern "C" {
typedef __declspec(align(16)) int8 vec8[16]; typedef __declspec(align(16)) int8 vec8[16];
typedef __declspec(align(16)) uint8 uvec8[16]; typedef __declspec(align(16)) uint8 uvec8[16];
typedef __declspec(align(16)) int16 vec16[8]; typedef __declspec(align(16)) int16 vec16[8];
typedef __declspec(align(16)) int32 vec32[4];
#else // __GNUC__ #else // __GNUC__
#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
typedef int8 __attribute__((vector_size(16))) vec8; typedef int8 __attribute__((vector_size(16))) vec8;
typedef uint8 __attribute__((vector_size(16))) uvec8; typedef uint8 __attribute__((vector_size(16))) uvec8;
typedef int16 __attribute__((vector_size(16))) vec16; typedef int16 __attribute__((vector_size(16))) vec16;
typedef int32 __attribute__((vector_size(16))) vec32;
#endif #endif
void I420ToARGBRow_NEON(const uint8* y_buf, void I420ToARGBRow_NEON(const uint8* y_buf,
......
...@@ -19,6 +19,27 @@ ...@@ -19,6 +19,27 @@
namespace libyuv { namespace libyuv {
TEST_F(libyuvTest, TestVersion) {
EXPECT_GE(LIBYUV_VERSION, 169);
}
TEST_F(libyuvTest, TestCpuHas) {
#if LIBYUV_VERSION >= 236
int has_x86 = TestCpuFlag(kCpuHasX86);
printf("Has X86 %d\n", has_x86);
#endif
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
printf("Has SSE2 %d\n", has_sse2);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
printf("Has SSSE3 %d\n", has_ssse3);
#if LIBYUV_VERSION >= 236
int has_sse41 = TestCpuFlag(kCpuHasSSE41);
printf("Has SSE4.1 %d\n", has_sse41);
#endif
int has_neon = TestCpuFlag(kCpuHasNEON);
printf("Has NEON %d\n", has_neon);
}
// For testing purposes call the proc/cpuinfo parser directly // For testing purposes call the proc/cpuinfo parser directly
extern "C" int ArmCpuCaps(const char* cpuinfoname); extern "C" int ArmCpuCaps(const char* cpuinfoname);
...@@ -27,8 +48,4 @@ TEST_F(libyuvTest, TestLinuxNeon) { ...@@ -27,8 +48,4 @@ TEST_F(libyuvTest, TestLinuxNeon) {
EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("unit_test/testdata/tegra3.txt")); EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("unit_test/testdata/tegra3.txt"));
} }
TEST_F(libyuvTest, TestVersion) {
EXPECT_GE(LIBYUV_VERSION, 169);
}
} // namespace libyuv } // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment