Commit e812e86e authored by fbarchard@google.com's avatar fbarchard@google.com

Simplify constraints on asm yuv scale columns for benefit of android intel build.

BUG=none
TEST=try bots
R=ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/3989005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@857 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent ff74e023
# This is a generic makefile for libyuv for gcc.
# Caveat: This file will get overwritten by GYP if projects are generated
# Caveat: This file will get overwritten by GYP if projects are generated
# with GYP_GENERATORS=make
CC=g++
......@@ -34,15 +34,15 @@ LOCAL_OBJ_FILES := \
.cc.o:
$(CC) -c $(CCFLAGS) $*.cc -o $*.o
all: libyuv.a convert
all: libyuv.a convert Makefile
libyuv.a: $(LOCAL_OBJ_FILES)
libyuv.a: $(LOCAL_OBJ_FILES) Makefile
$(AR) $(ARFLAGS) -o $@ $(LOCAL_OBJ_FILES)
# A test utility that uses libyuv conversion.
convert: util/convert.cc
convert: util/convert.cc Makefile
$(CC) $(CCFLAGS) -Iutil/ -o $@ util/convert.cc libyuv.a
clean:
/bin/rm -f *.o libyuv.a convert
/bin/rm -f source/*.o libyuv.a convert
......@@ -145,22 +145,26 @@ extern "C" {
#define HAS_YUY2TOYROW_SSE2
#endif
// The following are available on all x86 platforms, including NaCL, but
// require VS2012, clang, gcc4.7 or NaCL.
// Caveat: llvm 3.1 required, but does not provide a version.
// AVX2 functions available on all x86 platforms, but not NaCL, and
// require VS2012, clang 3.4 or gcc 4.7.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
/* Test for GCC >= 4.7.0 */
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
#define GCC_HAS_AVX2 1
#endif // GNUC >= 4.7
#endif // __GNUC__
// TODO(fbarchard): Test with new NaCL tool chain. Change __native_client__AVX2
// to __native_client__ to test.
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
/* Test for clang >= 3.4.0 */
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
#define CLANG_HAS_AVX2 1
#endif // GNUC >= 4.7
#endif // __GNUC__
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__)) && \
((defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700) || \
((defined(__x86_64__) || defined(__i386__)) && \
(defined(__native_client__AVX2) || defined(__clang__) || \
defined(GCC_HAS_AVX2))))
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
// Effects:
#define HAS_ARGBPOLYNOMIALROW_AVX2
#define HAS_ARGBSHUFFLEROW_AVX2
......
......@@ -1689,12 +1689,12 @@ static void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
asm volatile (
"movd %6,%%xmm2 \n"
"movd %7,%%xmm3 \n"
"movl $0x04040000,%k5 \n"
"movd %k5,%%xmm5 \n"
"movl $0x04040000,%k2 \n"
"movd %k2,%%xmm5 \n"
"pcmpeqb %%xmm6,%%xmm6 \n"
"psrlw $0x9,%%xmm6 \n"
"pextrw $0x1,%%xmm2,%k3 \n"
"subl $0x2,%2 \n"
"subl $0x2,%5 \n"
"jl 29f \n"
"movdqa %%xmm2,%%xmm0 \n"
"paddd %%xmm3,%%xmm0 \n"
......@@ -1706,11 +1706,11 @@ static void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"2: \n"
"movdqa %%xmm2,%%xmm1 \n"
"paddd %%xmm3,%%xmm2 \n"
"movzwl (%1,%3,1),%k5 \n"
"movd %k5,%%xmm0 \n"
"movzwl (%1,%3,1),%k2 \n"
"movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm1 \n"
"movzwl (%1,%4,1),%k5 \n"
"movd %k5,%%xmm4 \n"
"movzwl (%1,%4,1),%k2 \n"
"movd %k2,%%xmm4 \n"
"pshufb %%xmm5,%%xmm1 \n"
"punpcklwd %%xmm4,%%xmm0 \n"
"pxor %%xmm6,%%xmm1 \n"
......@@ -1719,32 +1719,32 @@ static void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"pextrw $0x3,%%xmm2,%k4 \n"
"psrlw $0x7,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movd %%xmm0,%k5 \n"
"mov %w5,(%0) \n"
"movd %%xmm0,%k2 \n"
"mov %w2,(%0) \n"
"lea 0x2(%0),%0 \n"
"sub $0x2,%2 \n"
"sub $0x2,%5 \n"
"jge 2b \n"
".p2align 2 \n"
"29: \n"
"addl $0x1,%2 \n"
"addl $0x1,%5 \n"
"jl 99f \n"
"movzwl (%1,%3,1),%k5 \n"
"movd %k5,%%xmm0 \n"
"movzwl (%1,%3,1),%k2 \n"
"movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm1 \n"
"pshufb %%xmm5,%%xmm1 \n"
"pxor %%xmm6,%%xmm1 \n"
"pmaddubsw %%xmm1,%%xmm0 \n"
"psrlw $0x7,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movd %%xmm0,%k5 \n"
"mov %b5,(%0) \n"
"movd %%xmm0,%k2 \n"
"mov %b2,(%0) \n"
"99: \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
"+rm"(dst_width), // %2
"+a"(x0), // %3
"+d"(x1), // %4
"+b"(temp_pixel) // %5
"+a"(temp_pixel), // %2
"+r"(x0), // %3
"+r"(x1), // %4
"+rm"(dst_width) // %5
: "rm"(x), // %6
"rm"(dx) // %7
: "memory", "cc"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment