Commit 02e48bf7 authored by fbarchard@google.com's avatar fbarchard@google.com

YUY2 for AVX2

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/887006

git-svn-id: http://libyuv.googlecode.com/svn/trunk@424 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 0908a701
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 423
Version: 424
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 423
#define LIBYUV_VERSION 424
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
;*
;* Copyright 2012 The LibYuv Project Authors. All rights reserved.
;*
;* Use of this source code is governed by a BSD-style license
;* that can be found in the LICENSE file in the root of the source
;* tree. An additional intellectual property rights grant can be found
;* in the file PATENTS. All contributing project authors may
;* be found in the AUTHORS file in the root of the source tree.
;*
%include "x86inc.asm"
SECTION .text
; void YUY2ToYRow_SSE2(const uint8* src_yuy2,
; uint8* dst_y, int pix);
%macro YUY2TOYROW 2-3
cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
%ifidn %1,YUY2
pcmpeqb m2, m2 ; generate mask 0x00ff00ff
psrlw m2, 8
%endif
ALIGN 16
.convertloop:
mov%2 m0, [src_yuy2q]
mov%2 m1, [src_yuy2q + mmsize]
lea src_yuy2q, [src_yuy2q + mmsize * 2]
%ifidn %1,YUY2
pand m0, m2 ; YUY2 even bytes are Y
pand m1, m2
%else
psrlw m0, 8 ; UYVY odd bytes are Y
psrlw m1, 8
%endif
packuswb m0, m1
sub pixd, mmsize
mov%2 [dst_yq], m0
lea dst_yq, [dst_yq + mmsize]
jg .convertloop
RET
%endmacro
; TODO(fbarchard): Remove MMX when SSE2 is required.
INIT_MMX MMX
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_XMM SSE2
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_YMM AVX2
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
......@@ -34,7 +34,9 @@
; as this feature might be useful for others as well. Send patches or ideas
; to x264-devel@videolan.org .
%define program_name x264
; Local changes for libyuv:
; remove %define program_name and references in labels
; rename cpus to uppercase
%define WIN64 0
%define UNIX64 0
......@@ -505,7 +507,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endmacro
%macro cglobal_internal 1-2+
%ifndef cglobaled_%1
%xdefine %1 mangle(program_name %+ _ %+ %1)
%xdefine %1 mangle(%1)
%xdefine %1.skip_prologue %1 %+ .skip_prologue
CAT_XDEFINE cglobaled_, %1, 1
%endif
......@@ -525,7 +527,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endmacro
%macro cextern 1
%xdefine %1 mangle(program_name %+ _ %+ %1)
%xdefine %1 mangle(%1)
CAT_XDEFINE cglobaled_, %1, 1
extern %1
%endmacro
......@@ -538,7 +540,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endmacro
%macro const 2+
%xdefine %1 mangle(program_name %+ _ %+ %1)
%xdefine %1 mangle(%1)
global %1
%1: %2
%endmacro
......@@ -551,22 +553,22 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
; cpuflags
%assign cpuflags_mmx (1<<0)
%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx
%assign cpuflags_3dnow (1<<2) | cpuflags_mmx
%assign cpuflags_MMX (1<<0)
%assign cpuflags_MMX2 (1<<1) | cpuflags_MMX
%assign cpuflags_3dnow (1<<2) | cpuflags_MMX
%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow
%assign cpuflags_sse (1<<4) | cpuflags_mmx2
%assign cpuflags_sse2 (1<<5) | cpuflags_sse
%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
%assign cpuflags_sse3 (1<<7) | cpuflags_sse2
%assign cpuflags_ssse3 (1<<8) | cpuflags_sse3
%assign cpuflags_sse4 (1<<9) | cpuflags_ssse3
%assign cpuflags_sse42 (1<<10)| cpuflags_sse4
%assign cpuflags_avx (1<<11)| cpuflags_sse42
%assign cpuflags_xop (1<<12)| cpuflags_avx
%assign cpuflags_fma4 (1<<13)| cpuflags_avx
%assign cpuflags_avx2 (1<<14)| cpuflags_avx
%assign cpuflags_fma3 (1<<15)| cpuflags_avx
%assign cpuflags_SSE (1<<4) | cpuflags_MMX2
%assign cpuflags_SSE2 (1<<5) | cpuflags_SSE
%assign cpuflags_SSE2slow (1<<6) | cpuflags_SSE2
%assign cpuflags_SSE3 (1<<7) | cpuflags_SSE2
%assign cpuflags_SSSE3 (1<<8) | cpuflags_SSE3
%assign cpuflags_SSE4 (1<<9) | cpuflags_SSSE3
%assign cpuflags_SSE42 (1<<10)| cpuflags_SSE4
%assign cpuflags_AVX (1<<11)| cpuflags_SSE42
%assign cpuflags_xop (1<<12)| cpuflags_AVX
%assign cpuflags_fma4 (1<<13)| cpuflags_AVX
%assign cpuflags_AVX2 (1<<14)| cpuflags_AVX
%assign cpuflags_fma3 (1<<15)| cpuflags_AVX
%assign cpuflags_cache32 (1<<16)
%assign cpuflags_cache64 (1<<17)
......@@ -594,17 +596,17 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%assign cpuflags cpuflags | cpuflags_%2
%endif
%xdefine SUFFIX _ %+ cpuname
%if cpuflag(avx)
%assign avx_enabled 1
%if cpuflag(AVX)
%assign AVX_enabled 1
%endif
%if mmsize == 16 && notcpuflag(sse2)
%if mmsize == 16 && notcpuflag(SSE2)
%define mova movaps
%define movu movups
%define movnta movntps
%endif
%if cpuflag(aligned)
%define movu mova
%elifidn %1, sse3
%elifidn %1, SSE3
%define movu lddqu
%endif
%else
......@@ -614,7 +616,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endif
%endmacro
; merge mmx and sse*
; merge MMX and SSE*
%macro CAT_XDEFINE 3
%xdefine %1%2 %3
......@@ -625,7 +627,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endmacro
%macro INIT_MMX 0-1+
%assign avx_enabled 0
%assign AVX_enabled 0
%define RESET_MM_PERMUTATION INIT_MMX %1
%define mmsize 8
%define num_mmregs 8
......@@ -648,7 +650,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endmacro
%macro INIT_XMM 0-1+
%assign avx_enabled 0
%assign AVX_enabled 0
%define RESET_MM_PERMUTATION INIT_XMM %1
%define mmsize 16
%define num_mmregs 8
......@@ -669,7 +671,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endmacro
%macro INIT_YMM 0-1+
%assign avx_enabled 1
%assign AVX_enabled 1
%define RESET_MM_PERMUTATION INIT_YMM %1
%define mmsize 32
%define num_mmregs 8
......@@ -832,7 +834,7 @@ INIT_XMM
%xdefine %%dst %2
%rep %0-2
%ifidn %%dst, %3
%error non-avx emulation of ``%%opcode'' is not supported
%error non-AVX emulation of ``%%opcode'' is not supported
%endif
%rotate 1
%endrep
......@@ -868,7 +870,7 @@ INIT_XMM
%if %4>=3+%3
%ifnidn %5, %6
%if avx_enabled && %%sizeofreg==16
%if AVX_enabled && %%sizeofreg==16
v%1 %5, %6, %7
%else
CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
......@@ -891,7 +893,7 @@ INIT_XMM
; So, if the op is symmetric and the wrong one is memory, swap them.
%macro RUN_AVX_INSTR1 8
%assign %%swap 0
%if avx_enabled
%if AVX_enabled
%ifnid %6
%assign %%swap 1
%endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment