Commit aa7988ff authored by fbarchard@google.com's avatar fbarchard@google.com

Enhanced Rep Mov String version of CopyRow for posix and use cpu detect for ERMS

BUG=213
TEST=none
Review URL: https://webrtc-codereview.appspot.com/1306008

git-svn-id: http://libyuv.googlecode.com/svn/trunk@658 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 15c7b2ff
...@@ -61,6 +61,7 @@ extern "C" { ...@@ -61,6 +61,7 @@ extern "C" {
#define HAS_BGRATOYROW_SSSE3 #define HAS_BGRATOYROW_SSSE3
#define HAS_COPYROW_SSE2 #define HAS_COPYROW_SSE2
#define HAS_COPYROW_X86 #define HAS_COPYROW_X86
#define HAS_COPYROW_ERMS
#define HAS_HALFROW_SSE2 #define HAS_HALFROW_SSE2
#define HAS_I400TOARGBROW_SSE2 #define HAS_I400TOARGBROW_SSE2
#define HAS_I411TOARGBROW_SSSE3 #define HAS_I411TOARGBROW_SSSE3
...@@ -130,7 +131,6 @@ extern "C" { ...@@ -130,7 +131,6 @@ extern "C" {
// TODO(fbarchard): Port to gcc. // TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#define HAS_ARGBCOLORTABLEROW_X86 #define HAS_ARGBCOLORTABLEROW_X86
#define HAS_COPYROW_AVX2
// Visual C 2012 required for AVX2. // Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700 #if _MSC_VER >= 1700
// TODO(fbarchard): Hook these up to all functions. e.g. format conversion. // TODO(fbarchard): Hook these up to all functions. e.g. format conversion.
...@@ -649,7 +649,7 @@ void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, ...@@ -649,7 +649,7 @@ void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width); int width);
void CopyRow_SSE2(const uint8* src, uint8* dst, int count); void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
void CopyRow_AVX2(const uint8* src, uint8* dst, int count); void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
void CopyRow_X86(const uint8* src, uint8* dst, int count); void CopyRow_X86(const uint8* src, uint8* dst, int count);
void CopyRow_NEON(const uint8* src, uint8* dst, int count); void CopyRow_NEON(const uint8* src, uint8* dst, int count);
void CopyRow_MIPS(const uint8* src, uint8* dst, int count); void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
......
...@@ -308,10 +308,10 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1, ...@@ -308,10 +308,10 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
CopyRow = CopyRow_SSE2; CopyRow = CopyRow_SSE2;
} }
#endif #endif
#if defined(HAS_COPYROW_AVX2) #if defined(HAS_COPYROW_ERMS)
// TODO(fbarchard): Detect Fast String support. // TODO(fbarchard): Detect Fast String support.
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasERMS)) {
CopyRow = CopyRow_AVX2; CopyRow = CopyRow_ERMS;
} }
#endif #endif
#if defined(HAS_COPYROW_NEON) #if defined(HAS_COPYROW_NEON)
...@@ -539,9 +539,9 @@ int Q420ToI420(const uint8* src_y, int src_stride_y, ...@@ -539,9 +539,9 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
CopyRow = CopyRow_SSE2; CopyRow = CopyRow_SSE2;
} }
#endif #endif
#if defined(HAS_COPYROW_AVX2) #if defined(HAS_COPYROW_ERMS)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasERMS)) {
CopyRow = CopyRow_AVX2; CopyRow = CopyRow_ERMS;
} }
#endif #endif
#if defined(HAS_COPYROW_MIPS) #if defined(HAS_COPYROW_MIPS)
......
...@@ -64,9 +64,9 @@ int I420ToI422(const uint8* src_y, int src_stride_y, ...@@ -64,9 +64,9 @@ int I420ToI422(const uint8* src_y, int src_stride_y,
CopyRow = CopyRow_SSE2; CopyRow = CopyRow_SSE2;
} }
#endif #endif
#if defined(HAS_COPYROW_AVX2) #if defined(HAS_COPYROW_ERMS)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasERMS)) {
CopyRow = CopyRow_AVX2; CopyRow = CopyRow_ERMS;
} }
#endif #endif
#if defined(HAS_COPYROW_NEON) #if defined(HAS_COPYROW_NEON)
......
...@@ -49,10 +49,9 @@ void CopyPlane(const uint8* src_y, int src_stride_y, ...@@ -49,10 +49,9 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
CopyRow = CopyRow_SSE2; CopyRow = CopyRow_SSE2;
} }
#endif #endif
#if defined(HAS_COPYROW_AVX2) #if defined(HAS_COPYROW_ERMS)
// TODO(fbarchard): Detect Fast String support. if (TestCpuFlag(kCpuHasERMS)) {
if (TestCpuFlag(kCpuHasAVX2)) { CopyRow = CopyRow_ERMS;
CopyRow = CopyRow_AVX2;
} }
#endif #endif
#if defined(HAS_COPYROW_NEON) #if defined(HAS_COPYROW_NEON)
......
...@@ -911,10 +911,9 @@ void RotatePlane180(const uint8* src, int src_stride, ...@@ -911,10 +911,9 @@ void RotatePlane180(const uint8* src, int src_stride,
CopyRow = CopyRow_SSE2; CopyRow = CopyRow_SSE2;
} }
#endif #endif
#if defined(HAS_COPYROW_AVX2) #if defined(HAS_COPYROW_ERMS)
// TODO(fbarchard): Detect Fast String support. if (TestCpuFlag(kCpuHasERMS)) {
if (TestCpuFlag(kCpuHasAVX2)) { CopyRow = CopyRow_ERMS;
CopyRow = CopyRow_AVX2;
} }
#endif #endif
#if defined(HAS_COPYROW_MIPS) #if defined(HAS_COPYROW_MIPS)
......
...@@ -128,10 +128,9 @@ void ARGBRotate180(const uint8* src, int src_stride, ...@@ -128,10 +128,9 @@ void ARGBRotate180(const uint8* src, int src_stride,
CopyRow = CopyRow_SSE2; CopyRow = CopyRow_SSE2;
} }
#endif #endif
#if defined(HAS_COPYROW_AVX2) #if defined(HAS_COPYROW_ERMS)
// TODO(fbarchard): Detect Fast String support. if (TestCpuFlag(kCpuHasERMS)) {
if (TestCpuFlag(kCpuHasAVX2)) { CopyRow = CopyRow_ERMS;
CopyRow = CopyRow_AVX2;
} }
#endif #endif
#if defined(HAS_COPYROW_MIPS) #if defined(HAS_COPYROW_MIPS)
......
...@@ -3027,6 +3027,19 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) { ...@@ -3027,6 +3027,19 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) {
} }
#endif // HAS_COPYROW_X86 #endif // HAS_COPYROW_X86
// Unaligned Multiple of 1.
void CopyRow_ERMS(const uint8* src, uint8* dst, int width) {
size_t width_tmp = static_cast<size_t>(width);
asm volatile (
"rep movsb \n"
: "+S"(src), // %0
"+D"(dst), // %1
"+c"(width_tmp) // %2
:
: "memory", "cc"
);
}
#ifdef HAS_SETROW_X86 #ifdef HAS_SETROW_X86
void SetRow_X86(uint8* dst, uint32 v32, int width) { void SetRow_X86(uint8* dst, uint32 v32, int width) {
size_t width_tmp = static_cast<size_t>(width); size_t width_tmp = static_cast<size_t>(width);
......
...@@ -3497,10 +3497,9 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { ...@@ -3497,10 +3497,9 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
} }
#endif // HAS_COPYROW_SSE2 #endif // HAS_COPYROW_SSE2
#ifdef HAS_COPYROW_AVX2
// Unaligned Multiple of 1. // Unaligned Multiple of 1.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void CopyRow_AVX2(const uint8* src, uint8* dst, int count) { void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
__asm { __asm {
mov eax, esi mov eax, esi
mov edx, edi mov edx, edi
...@@ -3513,7 +3512,6 @@ void CopyRow_AVX2(const uint8* src, uint8* dst, int count) { ...@@ -3513,7 +3512,6 @@ void CopyRow_AVX2(const uint8* src, uint8* dst, int count) {
ret ret
} }
} }
#endif // HAS_COPYROW_AVX2
#ifdef HAS_COPYROW_X86 #ifdef HAS_COPYROW_X86
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment