Commit 067892c5 authored by fbarchard@google.com's avatar fbarchard@google.com

Port YUY2ToYRow_AVX2 and UYVYToYRow_AVX2 to gcc/NaCL from Windows AVX code.

BUG=269
TESTED=ncval
R=brucedawson@google.com, harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/25039004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1151 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 260e3b22
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1148 Version: 1149
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1148 #define LIBYUV_VERSION 1149
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -243,9 +243,13 @@ BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C, ...@@ -243,9 +243,13 @@ BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C,
#ifdef HAS_ARGBTOYROW_AVX2 #ifdef HAS_ARGBTOYROW_AVX2
YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, ARGBToYRow_C, 4, 1, 31) YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, ARGBToYRow_C, 4, 1, 31)
YANY(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, ARGBToYJRow_C, 4, 1, 31) YANY(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, ARGBToYJRow_C, 4, 1, 31)
YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, YUY2ToYRow_C, 2, 1, 31) #endif
#ifdef HAS_UYVYTOYROW_AVX2
YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, UYVYToYRow_C, 2, 1, 31) YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, UYVYToYRow_C, 2, 1, 31)
#endif #endif
#ifdef HAS_YUY2TOYROW_AVX2
YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, YUY2ToYRow_C, 2, 1, 31)
#endif
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, ARGBToYRow_C, 4, 1, 15) YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, ARGBToYRow_C, 4, 1, 15)
#endif #endif
......
...@@ -2853,6 +2853,64 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy, ...@@ -2853,6 +2853,64 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
} }
#endif // HAS_YUY2TOYROW_SSE2 #endif // HAS_YUY2TOYROW_SSE2
#ifdef HAS_YUY2TOYROW_AVX2
void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix) {
asm volatile (
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
"vpsrlw $0x8,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
"lea " MEMLEA(0x40,0) ",%0 \n"
"vpand %%ymm5,%%ymm0,%%ymm0 \n"
"vpand %%ymm5,%%ymm1,%%ymm1 \n"
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"sub $0x20,%2 \n"
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
"jg 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm5"
#endif
);
}
void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix) {
asm volatile (
LABELALIGN
"1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
"lea " MEMLEA(0x40,0) ",%0 \n"
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
"vpsrlw $0x8,%%ymm1,%%ymm1 \n"
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"sub $0x20,%2 \n"
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
"jg 1b \n"
"vzeroupper \n"
"ret \n"
: "+r"(src_uyvy), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm5"
#endif
);
}
#endif // HAS_YUY2TOYROW_AVX2
#ifdef HAS_ARGBBLENDROW_SSE2 #ifdef HAS_ARGBBLENDROW_SSE2
// Blend 8 pixels at a time. // Blend 8 pixels at a time.
void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment