Commit 41e972ec authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBToI444_SSSE3 UV function ported. Thanks to changjun.yang@intel.com

BUG=148
TESTED=out\release\libyuv_unittest --gtest_filter=*ARGBToI* | grep ms
Review URL: https://webrtc-codereview.appspot.com/1019011

git-svn-id: http://libyuv.googlecode.com/svn/trunk@539 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent ba45bbff
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 538
Version: 539
License: BSD
License File: LICENSE
......
......@@ -119,6 +119,7 @@ extern "C" {
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_ARGBMULTIPLYROW_SSE2
#define HAS_ARGBTOUV444ROW_SSSE3
#endif
// The following are Yasm x86 only.
......@@ -492,6 +493,13 @@ void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV444Row_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV422Row_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 538
#define LIBYUV_VERSION 539
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
This diff is collapsed.
......@@ -287,6 +287,23 @@ UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2)
#endif
#undef UVANY
#define UV444ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \
void NAMEANY(const uint8* src_uv, \
uint8* dst_u, uint8* dst_v, int width) { \
int n = width & ~MASK; \
ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
ANYTOUV_C(src_uv + n * BPP, \
dst_u + n, \
dst_v + n, \
width & MASK); \
}
#ifdef HAS_ARGBTOUV444ROW_SSSE3
UV444ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3,
ARGBToUV444Row_C, 4, 15)
#endif
#undef UV444ANY
#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT) \
void NAMEANY(const uint8* src_uv, \
uint8* dst_u, uint8* dst_v, int width) { \
......
......@@ -1101,6 +1101,122 @@ __asm {
}
}
__declspec(naked) __declspec(align(16))
void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_argb
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
movdqa xmm7, kARGBToU
movdqa xmm6, kARGBToV
movdqa xmm5, kAddUV128
sub edi, edx // stride from u to v
align 16
convertloop:
/* convert to U and V */
movdqa xmm0, [eax] // U
movdqa xmm1, [eax + 16]
movdqa xmm2, [eax + 32]
movdqa xmm3, [eax + 48]
pmaddubsw xmm0, xmm7
pmaddubsw xmm1, xmm7
pmaddubsw xmm2, xmm7
pmaddubsw xmm3, xmm7
phaddw xmm0, xmm1
phaddw xmm2, xmm3
psrlw xmm0, 8
psrlw xmm2, 8
packuswb xmm0, xmm2
paddb xmm0, xmm5
sub ecx, 16
movdqa [edx], xmm0
movdqa xmm0, [eax] // V
movdqa xmm1, [eax + 16]
movdqa xmm2, [eax + 32]
movdqa xmm3, [eax + 48]
pmaddubsw xmm0, xmm6
pmaddubsw xmm1, xmm6
pmaddubsw xmm2, xmm6
pmaddubsw xmm3, xmm6
phaddw xmm0, xmm1
phaddw xmm2, xmm3
psrlw xmm0, 8
psrlw xmm2, 8
packuswb xmm0, xmm2
paddb xmm0, xmm5
lea eax, [eax + 64]
movdqa [edx + edi], xmm0
lea edx, [edx + 16]
jg convertloop
pop edi
ret
}
}
__declspec(naked) __declspec(align(16))
void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_argb
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
movdqa xmm7, kARGBToU
movdqa xmm6, kARGBToV
movdqa xmm5, kAddUV128
sub edi, edx // stride from u to v
align 16
convertloop:
/* convert to U and V */
movdqu xmm0, [eax] // U
movdqu xmm1, [eax + 16]
movdqu xmm2, [eax + 32]
movdqu xmm3, [eax + 48]
pmaddubsw xmm0, xmm7
pmaddubsw xmm1, xmm7
pmaddubsw xmm2, xmm7
pmaddubsw xmm3, xmm7
phaddw xmm0, xmm1
phaddw xmm2, xmm3
psrlw xmm0, 8
psrlw xmm2, 8
packuswb xmm0, xmm2
paddb xmm0, xmm5
sub ecx, 16
movdqu [edx], xmm0
movdqu xmm0, [eax] // V
movdqu xmm1, [eax + 16]
movdqu xmm2, [eax + 32]
movdqu xmm3, [eax + 48]
pmaddubsw xmm0, xmm6
pmaddubsw xmm1, xmm6
pmaddubsw xmm2, xmm6
pmaddubsw xmm3, xmm6
phaddw xmm0, xmm1
phaddw xmm2, xmm3
psrlw xmm0, 8
psrlw xmm2, 8
packuswb xmm0, xmm2
paddb xmm0, xmm5
lea eax, [eax + 64]
movdqu [edx + edi], xmm0
lea edx, [edx + 16]
jg convertloop
pop edi
ret
}
}
__declspec(naked) __declspec(align(16))
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment