Commit 5333e94e authored by Frank Barchard's avatar Frank Barchard

Port ARGBExtractAlpha_AVX2 function to windows.

BUG=libyuv:572
TEST=try bots
R=wangcheng@google.com, magjed@chromium.org

Review URL: https://codereview.chromium.org/2416783004 .
parent a5e93766
...@@ -83,6 +83,7 @@ extern "C" { ...@@ -83,6 +83,7 @@ extern "C" {
#define HAS_ABGRTOYROW_SSSE3 #define HAS_ABGRTOYROW_SSSE3
#define HAS_ARGB1555TOARGBROW_SSE2 #define HAS_ARGB1555TOARGBROW_SSE2
#define HAS_ARGB4444TOARGBROW_SSE2 #define HAS_ARGB4444TOARGBROW_SSE2
#define HAS_ARGBEXTRACTALPHAROW_SSE2
#define HAS_ARGBSETROW_X86 #define HAS_ARGBSETROW_X86
#define HAS_ARGBSHUFFLEROW_SSE2 #define HAS_ARGBSHUFFLEROW_SSE2
#define HAS_ARGBSHUFFLEROW_SSSE3 #define HAS_ARGBSHUFFLEROW_SSSE3
...@@ -97,12 +98,12 @@ extern "C" { ...@@ -97,12 +98,12 @@ extern "C" {
#define HAS_ARGBTOUVROW_SSSE3 #define HAS_ARGBTOUVROW_SSSE3
#define HAS_ARGBTOYJROW_SSSE3 #define HAS_ARGBTOYJROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3
#define HAS_ARGBEXTRACTALPHAROW_SSE2
#define HAS_BGRATOUVROW_SSSE3 #define HAS_BGRATOUVROW_SSSE3
#define HAS_BGRATOYROW_SSSE3 #define HAS_BGRATOYROW_SSSE3
#define HAS_COPYROW_ERMS #define HAS_COPYROW_ERMS
#define HAS_COPYROW_SSE2 #define HAS_COPYROW_SSE2
#define HAS_H422TOARGBROW_SSSE3 #define HAS_H422TOARGBROW_SSSE3
#define HAS_HALFFLOATROW_SSE2
#define HAS_I400TOARGBROW_SSE2 #define HAS_I400TOARGBROW_SSE2
#define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TOARGB4444ROW_SSSE3 #define HAS_I422TOARGB4444ROW_SSSE3
...@@ -140,7 +141,6 @@ extern "C" { ...@@ -140,7 +141,6 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_SSE2 #define HAS_YUY2TOUV422ROW_SSE2
#define HAS_YUY2TOUVROW_SSE2 #define HAS_YUY2TOUVROW_SSE2
#define HAS_YUY2TOYROW_SSE2 #define HAS_YUY2TOYROW_SSE2
#define HAS_HALFFLOATROW_SSE2
// Effects: // Effects:
#define HAS_ARGBADDROW_SSE2 #define HAS_ARGBADDROW_SSE2
...@@ -189,6 +189,7 @@ extern "C" { ...@@ -189,6 +189,7 @@ extern "C" {
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_ARGBCOPYALPHAROW_AVX2 #define HAS_ARGBCOPYALPHAROW_AVX2
#define HAS_ARGBCOPYYTOALPHAROW_AVX2 #define HAS_ARGBCOPYYTOALPHAROW_AVX2
#define HAS_ARGBEXTRACTALPHAROW_AVX2
#define HAS_ARGBMIRRORROW_AVX2 #define HAS_ARGBMIRRORROW_AVX2
#define HAS_ARGBPOLYNOMIALROW_AVX2 #define HAS_ARGBPOLYNOMIALROW_AVX2
#define HAS_ARGBSHUFFLEROW_AVX2 #define HAS_ARGBSHUFFLEROW_AVX2
...@@ -199,12 +200,8 @@ extern "C" { ...@@ -199,12 +200,8 @@ extern "C" {
#define HAS_ARGBTOYROW_AVX2 #define HAS_ARGBTOYROW_AVX2
#define HAS_COPYROW_AVX #define HAS_COPYROW_AVX
#define HAS_H422TOARGBROW_AVX2 #define HAS_H422TOARGBROW_AVX2
#define HAS_HALFFLOATROW_AVX2
#define HAS_I400TOARGBROW_AVX2 #define HAS_I400TOARGBROW_AVX2
#if !(defined(_DEBUG) && defined(__i386__))
// TODO(fbarchard): fix build error on android_full_debug=1
// https://code.google.com/p/libyuv/issues/detail?id=517
#define HAS_I422ALPHATOARGBROW_AVX2
#endif
#define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB1555ROW_AVX2
#define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2
#define HAS_I422TOARGBROW_AVX2 #define HAS_I422TOARGBROW_AVX2
...@@ -228,7 +225,6 @@ extern "C" { ...@@ -228,7 +225,6 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_AVX2 #define HAS_YUY2TOUV422ROW_AVX2
#define HAS_YUY2TOUVROW_AVX2 #define HAS_YUY2TOUVROW_AVX2
#define HAS_YUY2TOYROW_AVX2 #define HAS_YUY2TOYROW_AVX2
#define HAS_HALFFLOATROW_AVX2
// Effects: // Effects:
#define HAS_ARGBADDROW_AVX2 #define HAS_ARGBADDROW_AVX2
...@@ -237,15 +233,12 @@ extern "C" { ...@@ -237,15 +233,12 @@ extern "C" {
#define HAS_ARGBSUBTRACTROW_AVX2 #define HAS_ARGBSUBTRACTROW_AVX2
#define HAS_ARGBUNATTENUATEROW_AVX2 #define HAS_ARGBUNATTENUATEROW_AVX2
#define HAS_BLENDPLANEROW_AVX2 #define HAS_BLENDPLANEROW_AVX2
#endif
// The following are available clang 3.4 or gcc 4.7. #if !(defined(_DEBUG) && defined(__i386__))
// TODO(fbarchard): Port to Visual C // TODO(fbarchard): fix build error on android_full_debug=1
#if !defined(LIBYUV_DISABLE_X86) && \ // https://code.google.com/p/libyuv/issues/detail?id=517
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) && \ #define HAS_I422ALPHATOARGBROW_AVX2
!(defined(__clang__) && defined(_M_IX86) ) #endif
#define HAS_ARGBEXTRACTALPHAROW_AVX2
#endif #endif
// The following are available for AVX2 Visual C and clangcl 32 bit: // The following are available for AVX2 Visual C and clangcl 32 bit:
...@@ -279,6 +272,7 @@ extern "C" { ...@@ -279,6 +272,7 @@ extern "C" {
#define HAS_ARGB4444TOARGBROW_NEON #define HAS_ARGB4444TOARGBROW_NEON
#define HAS_ARGB4444TOUVROW_NEON #define HAS_ARGB4444TOUVROW_NEON
#define HAS_ARGB4444TOYROW_NEON #define HAS_ARGB4444TOYROW_NEON
#define HAS_ARGBEXTRACTALPHAROW_NEON
#define HAS_ARGBSETROW_NEON #define HAS_ARGBSETROW_NEON
#define HAS_ARGBTOARGB1555ROW_NEON #define HAS_ARGBTOARGB1555ROW_NEON
#define HAS_ARGBTOARGB4444ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON
...@@ -291,7 +285,6 @@ extern "C" { ...@@ -291,7 +285,6 @@ extern "C" {
#define HAS_ARGBTOUVROW_NEON #define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOYJROW_NEON #define HAS_ARGBTOYJROW_NEON
#define HAS_ARGBTOYROW_NEON #define HAS_ARGBTOYROW_NEON
#define HAS_ARGBEXTRACTALPHAROW_NEON
#define HAS_BGRATOUVROW_NEON #define HAS_BGRATOUVROW_NEON
#define HAS_BGRATOYROW_NEON #define HAS_BGRATOYROW_NEON
#define HAS_COPYROW_NEON #define HAS_COPYROW_NEON
...@@ -371,15 +364,15 @@ extern "C" { ...@@ -371,15 +364,15 @@ extern "C" {
#endif #endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_MIRRORROW_MSA
#define HAS_ARGBMIRRORROW_MSA #define HAS_ARGBMIRRORROW_MSA
#define HAS_I422TOYUY2ROW_MSA
#define HAS_I422TOUYVYROW_MSA #define HAS_I422TOUYVYROW_MSA
#define HAS_YUY2TOYROW_MSA #define HAS_I422TOYUY2ROW_MSA
#define HAS_YUY2TOUVROW_MSA #define HAS_MIRRORROW_MSA
#define HAS_YUY2TOUV422ROW_MSA
#define HAS_UYVYTOYROW_MSA
#define HAS_UYVYTOUVROW_MSA #define HAS_UYVYTOUVROW_MSA
#define HAS_UYVYTOYROW_MSA
#define HAS_YUY2TOUV422ROW_MSA
#define HAS_YUY2TOUVROW_MSA
#define HAS_YUY2TOYROW_MSA
#endif #endif
......
...@@ -3445,6 +3445,41 @@ void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) { ...@@ -3445,6 +3445,41 @@ void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) {
} }
#endif // HAS_ARGBEXTRACTALPHAROW_SSE2 #endif // HAS_ARGBEXTRACTALPHAROW_SSE2
#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
// width in pixels
__declspec(naked)
void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_a
mov ecx, [esp + 12] // width
vmovdqa ymm4, ymmword ptr kPermdARGBToY_AVX
extractloop:
vmovdqu ymm0, [eax]
vmovdqu ymm1, [eax + 32]
vpsrld ymm0, ymm0, 24
vpsrld ymm1, ymm1, 24
vmovdqu ymm2, [eax + 64]
vmovdqu ymm3, [eax + 96]
lea eax, [eax + 128]
vpackssdw ymm0, ymm0, ymm1 // mutates
vpsrld ymm2, ymm2, 24
vpsrld ymm3, ymm3, 24
vpackssdw ymm2, ymm2, ymm3 // mutates
vpackuswb ymm0, ymm0, ymm2 // mutates
vpermd ymm0, ymm4, ymm0 // unmutate
vmovdqu [edx], ymm0
lea edx, [edx + 32]
sub ecx, 32
jg extractloop
vzeroupper
ret
}
}
#endif // HAS_ARGBEXTRACTALPHAROW_AVX2
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
// width in pixels // width in pixels
__declspec(naked) __declspec(naked)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment