Commit 02e48bf7 authored by fbarchard@google.com's avatar fbarchard@google.com

YUY2 for AVX2

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/887006

git-svn-id: http://libyuv.googlecode.com/svn/trunk@424 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 0908a701
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 423 Version: 424
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 423 #define LIBYUV_VERSION 424
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
;*
;* Copyright 2012 The LibYuv Project Authors. All rights reserved.
;*
;* Use of this source code is governed by a BSD-style license
;* that can be found in the LICENSE file in the root of the source
;* tree. An additional intellectual property rights grant can be found
;* in the file PATENTS. All contributing project authors may
;* be found in the AUTHORS file in the root of the source tree.
;*
%include "x86inc.asm"
SECTION .text
; void YUY2ToYRow_SSE2(const uint8* src_yuy2,
; uint8* dst_y, int pix);
%macro YUY2TOYROW 2-3
cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
%ifidn %1,YUY2
pcmpeqb m2, m2 ; generate mask 0x00ff00ff
psrlw m2, 8
%endif
ALIGN 16
.convertloop:
mov%2 m0, [src_yuy2q]
mov%2 m1, [src_yuy2q + mmsize]
lea src_yuy2q, [src_yuy2q + mmsize * 2]
%ifidn %1,YUY2
pand m0, m2 ; YUY2 even bytes are Y
pand m1, m2
%else
psrlw m0, 8 ; UYVY odd bytes are Y
psrlw m1, 8
%endif
packuswb m0, m1
sub pixd, mmsize
mov%2 [dst_yq], m0
lea dst_yq, [dst_yq + mmsize]
jg .convertloop
RET
%endmacro
; TODO(fbarchard): Remove MMX when SSE2 is required.
INIT_MMX MMX
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_XMM SSE2
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_YMM AVX2
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
...@@ -34,7 +34,9 @@ ...@@ -34,7 +34,9 @@
; as this feature might be useful for others as well. Send patches or ideas ; as this feature might be useful for others as well. Send patches or ideas
; to x264-devel@videolan.org . ; to x264-devel@videolan.org .
%define program_name x264 ; Local changes for libyuv:
; remove %define program_name and references in labels
; rename cpus to uppercase
%define WIN64 0 %define WIN64 0
%define UNIX64 0 %define UNIX64 0
...@@ -505,7 +507,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 ...@@ -505,7 +507,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endmacro %endmacro
%macro cglobal_internal 1-2+ %macro cglobal_internal 1-2+
%ifndef cglobaled_%1 %ifndef cglobaled_%1
%xdefine %1 mangle(program_name %+ _ %+ %1) %xdefine %1 mangle(%1)
%xdefine %1.skip_prologue %1 %+ .skip_prologue %xdefine %1.skip_prologue %1 %+ .skip_prologue
CAT_XDEFINE cglobaled_, %1, 1 CAT_XDEFINE cglobaled_, %1, 1
%endif %endif
...@@ -525,7 +527,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 ...@@ -525,7 +527,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endmacro %endmacro
%macro cextern 1 %macro cextern 1
%xdefine %1 mangle(program_name %+ _ %+ %1) %xdefine %1 mangle(%1)
CAT_XDEFINE cglobaled_, %1, 1 CAT_XDEFINE cglobaled_, %1, 1
extern %1 extern %1
%endmacro %endmacro
...@@ -538,7 +540,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 ...@@ -538,7 +540,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endmacro %endmacro
%macro const 2+ %macro const 2+
%xdefine %1 mangle(program_name %+ _ %+ %1) %xdefine %1 mangle(%1)
global %1 global %1
%1: %2 %1: %2
%endmacro %endmacro
...@@ -551,22 +553,22 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits ...@@ -551,22 +553,22 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
; cpuflags ; cpuflags
%assign cpuflags_mmx (1<<0) %assign cpuflags_MMX (1<<0)
%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx %assign cpuflags_MMX2 (1<<1) | cpuflags_MMX
%assign cpuflags_3dnow (1<<2) | cpuflags_mmx %assign cpuflags_3dnow (1<<2) | cpuflags_MMX
%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow %assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow
%assign cpuflags_sse (1<<4) | cpuflags_mmx2 %assign cpuflags_SSE (1<<4) | cpuflags_MMX2
%assign cpuflags_sse2 (1<<5) | cpuflags_sse %assign cpuflags_SSE2 (1<<5) | cpuflags_SSE
%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 %assign cpuflags_SSE2slow (1<<6) | cpuflags_SSE2
%assign cpuflags_sse3 (1<<7) | cpuflags_sse2 %assign cpuflags_SSE3 (1<<7) | cpuflags_SSE2
%assign cpuflags_ssse3 (1<<8) | cpuflags_sse3 %assign cpuflags_SSSE3 (1<<8) | cpuflags_SSE3
%assign cpuflags_sse4 (1<<9) | cpuflags_ssse3 %assign cpuflags_SSE4 (1<<9) | cpuflags_SSSE3
%assign cpuflags_sse42 (1<<10)| cpuflags_sse4 %assign cpuflags_SSE42 (1<<10)| cpuflags_SSE4
%assign cpuflags_avx (1<<11)| cpuflags_sse42 %assign cpuflags_AVX (1<<11)| cpuflags_SSE42
%assign cpuflags_xop (1<<12)| cpuflags_avx %assign cpuflags_xop (1<<12)| cpuflags_AVX
%assign cpuflags_fma4 (1<<13)| cpuflags_avx %assign cpuflags_fma4 (1<<13)| cpuflags_AVX
%assign cpuflags_avx2 (1<<14)| cpuflags_avx %assign cpuflags_AVX2 (1<<14)| cpuflags_AVX
%assign cpuflags_fma3 (1<<15)| cpuflags_avx %assign cpuflags_fma3 (1<<15)| cpuflags_AVX
%assign cpuflags_cache32 (1<<16) %assign cpuflags_cache32 (1<<16)
%assign cpuflags_cache64 (1<<17) %assign cpuflags_cache64 (1<<17)
...@@ -594,17 +596,17 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits ...@@ -594,17 +596,17 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%assign cpuflags cpuflags | cpuflags_%2 %assign cpuflags cpuflags | cpuflags_%2
%endif %endif
%xdefine SUFFIX _ %+ cpuname %xdefine SUFFIX _ %+ cpuname
%if cpuflag(avx) %if cpuflag(AVX)
%assign avx_enabled 1 %assign AVX_enabled 1
%endif %endif
%if mmsize == 16 && notcpuflag(sse2) %if mmsize == 16 && notcpuflag(SSE2)
%define mova movaps %define mova movaps
%define movu movups %define movu movups
%define movnta movntps %define movnta movntps
%endif %endif
%if cpuflag(aligned) %if cpuflag(aligned)
%define movu mova %define movu mova
%elifidn %1, sse3 %elifidn %1, SSE3
%define movu lddqu %define movu lddqu
%endif %endif
%else %else
...@@ -614,7 +616,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits ...@@ -614,7 +616,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endif %endif
%endmacro %endmacro
; merge mmx and sse* ; merge MMX and SSE*
%macro CAT_XDEFINE 3 %macro CAT_XDEFINE 3
%xdefine %1%2 %3 %xdefine %1%2 %3
...@@ -625,7 +627,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits ...@@ -625,7 +627,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endmacro %endmacro
%macro INIT_MMX 0-1+ %macro INIT_MMX 0-1+
%assign avx_enabled 0 %assign AVX_enabled 0
%define RESET_MM_PERMUTATION INIT_MMX %1 %define RESET_MM_PERMUTATION INIT_MMX %1
%define mmsize 8 %define mmsize 8
%define num_mmregs 8 %define num_mmregs 8
...@@ -648,7 +650,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits ...@@ -648,7 +650,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endmacro %endmacro
%macro INIT_XMM 0-1+ %macro INIT_XMM 0-1+
%assign avx_enabled 0 %assign AVX_enabled 0
%define RESET_MM_PERMUTATION INIT_XMM %1 %define RESET_MM_PERMUTATION INIT_XMM %1
%define mmsize 16 %define mmsize 16
%define num_mmregs 8 %define num_mmregs 8
...@@ -669,7 +671,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits ...@@ -669,7 +671,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endmacro %endmacro
%macro INIT_YMM 0-1+ %macro INIT_YMM 0-1+
%assign avx_enabled 1 %assign AVX_enabled 1
%define RESET_MM_PERMUTATION INIT_YMM %1 %define RESET_MM_PERMUTATION INIT_YMM %1
%define mmsize 32 %define mmsize 32
%define num_mmregs 8 %define num_mmregs 8
...@@ -832,7 +834,7 @@ INIT_XMM ...@@ -832,7 +834,7 @@ INIT_XMM
%xdefine %%dst %2 %xdefine %%dst %2
%rep %0-2 %rep %0-2
%ifidn %%dst, %3 %ifidn %%dst, %3
%error non-avx emulation of ``%%opcode'' is not supported %error non-AVX emulation of ``%%opcode'' is not supported
%endif %endif
%rotate 1 %rotate 1
%endrep %endrep
...@@ -868,7 +870,7 @@ INIT_XMM ...@@ -868,7 +870,7 @@ INIT_XMM
%if %4>=3+%3 %if %4>=3+%3
%ifnidn %5, %6 %ifnidn %5, %6
%if avx_enabled && %%sizeofreg==16 %if AVX_enabled && %%sizeofreg==16
v%1 %5, %6, %7 v%1 %5, %6, %7
%else %else
CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7 CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
...@@ -891,7 +893,7 @@ INIT_XMM ...@@ -891,7 +893,7 @@ INIT_XMM
; So, if the op is symmetric and the wrong one is memory, swap them. ; So, if the op is symmetric and the wrong one is memory, swap them.
%macro RUN_AVX_INSTR1 8 %macro RUN_AVX_INSTR1 8
%assign %%swap 0 %assign %%swap 0
%if avx_enabled %if AVX_enabled
%ifnid %6 %ifnid %6
%assign %%swap 1 %assign %%swap 1
%endif %endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment