Commit c5d44a0c authored by fbarchard@google.com's avatar fbarchard@google.com

cpuid add sse41 and x86

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/485001

git-svn-id: http://libyuv.googlecode.com/svn/trunk@236 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 1702ec78
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 235
Version: 236
License: BSD
License File: LICENSE
......
......@@ -17,14 +17,16 @@ extern "C" {
#endif
// These flags are only valid on x86 processors
static const int kCpuHasSSE2 = 1;
static const int kCpuHasSSSE3 = 2;
static const int kCpuHasX86 = 1;
static const int kCpuHasSSE2 = 2;
static const int kCpuHasSSSE3 = 4;
static const int kCpuHasSSE41 = 8;
// These flags are only valid on ARM processors
static const int kCpuHasNEON = 4;
static const int kCpuHasNEON = 16;
// Internal flag to indicate cpuid is initialized.
static const int kCpuInitialized = 8;
static const int kCpuInitialized = 32;
// Detect CPU has SSE2 etc.
// test_flag parameter should be one of kCpuHas constants above
......
......@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 235
#define LIBYUV_VERSION 236
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -75,16 +75,22 @@ int InitCpuFlags() {
__cpuid(cpu_info, 1);
cpu_info_ = (cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0) |
(cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) |
kCpuInitialized;
(cpu_info[2] & 0x00080000 ? kCpuHasSSE41 : 0) |
kCpuInitialized | kCpuHasX86;
// environment variable overrides for testing.
if (getenv("LIBYUV_DISABLE_X86")) {
cpu_info_ &= ~kCpuHasX86;
}
if (getenv("LIBYUV_DISABLE_SSE2")) {
cpu_info_ &= ~kCpuHasSSE2;
}
// environment variable overrides for testing.
if (getenv("LIBYUV_DISABLE_SSSE3")) {
cpu_info_ &= ~kCpuHasSSSE3;
}
if (getenv("LIBYUV_DISABLE_SSE41")) {
cpu_info_ &= ~kCpuHasSSE41;
}
#elif defined(__linux__) && defined(__ARM_NEON__)
cpu_info_ = ArmCpuCaps("/proc/cpuinfo") | kCpuInitialized;
#elif defined(__ARM_NEON__)
......
......@@ -29,19 +29,19 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) {
CopyRow = CopyRow_NEON;
}
#elif defined(HAS_COPYROW_X86)
if (IS_ALIGNED(width, 4)) {
#endif
#if defined(HAS_COPYROW_X86)
if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
CopyRow = CopyRow_X86;
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 32) &&
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
CopyRow = CopyRow_SSE2;
}
#endif
}
#endif
// Copy plane
for (int y = 0; y < height; ++y) {
......@@ -755,7 +755,6 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
}
#endif
#if !defined(HAS_SETROW_X86)
static void SetRow8_C(uint8* dst, uint32 v8, int count) {
#ifdef _MSC_VER
for (int x = 0; x < count; ++x) {
......@@ -776,24 +775,24 @@ static void SetRows32_C(uint8* dst, uint32 v32, int width,
dst += dst_stride;
}
}
#endif
void SetPlane(uint8* dst_y, int dst_stride_y,
int width, int height,
uint32 value) {
#if defined(HAS_SETROW_X86)
void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_X86;
#else
void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_C;
#endif
#if defined(HAS_SETROW_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
SetRow = SetRow8_NEON;
}
#elif defined(HAS_SETROW_SSE2)
#endif
#if defined(HAS_SETROW_X86)
if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
SetRow = SetRow8_X86;
}
#endif
#if defined(HAS_SETROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
......@@ -836,7 +835,6 @@ int I420Rect(uint8* dst_y, int dst_stride_y,
return 0;
}
// TODO(fbarchard): Add TestCpuFlag(kCpuHasX86) to allow C code to be tested.
// Draw a rectangle into ARGB
int ARGBRect(uint8* dst_argb, int dst_stride_argb,
int dst_x, int dst_y,
......@@ -848,9 +846,6 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
return -1;
}
uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
#if defined(HAS_SETROW_X86)
SetRows32_X86(dst, value, width, dst_stride_argb, height);
#else
#if defined(HAS_SETROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
......@@ -858,8 +853,13 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
return 0;
}
#endif
SetRows32_C(dst, value, width, dst_stride_argb, height);
#if defined(HAS_SETROW_X86)
if (TestCpuFlag(kCpuHasX86)) {
SetRows32_X86(dst, value, width, dst_stride_argb, height);
return 0;
}
#endif
SetRows32_C(dst, value, width, dst_stride_argb, height);
return 0;
}
......
......@@ -89,11 +89,13 @@ extern "C" {
typedef __declspec(align(16)) int8 vec8[16];
typedef __declspec(align(16)) uint8 uvec8[16];
typedef __declspec(align(16)) int16 vec16[8];
typedef __declspec(align(16)) int32 vec32[4];
#else // __GNUC__
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
typedef int8 __attribute__((vector_size(16))) vec8;
typedef uint8 __attribute__((vector_size(16))) uvec8;
typedef int16 __attribute__((vector_size(16))) vec16;
typedef int32 __attribute__((vector_size(16))) vec32;
#endif
void I420ToARGBRow_NEON(const uint8* y_buf,
......
......@@ -19,6 +19,27 @@
namespace libyuv {
TEST_F(libyuvTest, TestVersion) {
EXPECT_GE(LIBYUV_VERSION, 169);
}
TEST_F(libyuvTest, TestCpuHas) {
#if LIBYUV_VERSION >= 236
int has_x86 = TestCpuFlag(kCpuHasX86);
printf("Has X86 %d\n", has_x86);
#endif
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
printf("Has SSE2 %d\n", has_sse2);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
printf("Has SSSE3 %d\n", has_ssse3);
#if LIBYUV_VERSION >= 236
int has_sse41 = TestCpuFlag(kCpuHasSSE41);
printf("Has SSE4.1 %d\n", has_sse41);
#endif
int has_neon = TestCpuFlag(kCpuHasNEON);
printf("Has NEON %d\n", has_neon);
}
// For testing purposes call the proc/cpuinfo parser directly
extern "C" int ArmCpuCaps(const char* cpuinfoname);
......@@ -27,8 +48,4 @@ TEST_F(libyuvTest, TestLinuxNeon) {
EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("unit_test/testdata/tegra3.txt"));
}
TEST_F(libyuvTest, TestVersion) {
EXPECT_GE(LIBYUV_VERSION, 169);
}
} // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment