Commit 6f61eb67 authored by fbarchard@google.com's avatar fbarchard@google.com

NaCL port of Affine

BUG=253
TESTED=ncval passed
R=dingkai@google.com

Review URL: https://webrtc-codereview.appspot.com/2035004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@761 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent d593876b
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 760 Version: 761
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -40,6 +40,7 @@ extern "C" { ...@@ -40,6 +40,7 @@ extern "C" {
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
// Effects: // Effects:
#define HAS_ARGBADDROW_SSE2 #define HAS_ARGBADDROW_SSE2
#define HAS_ARGBAFFINEROW_SSE2
#define HAS_ARGBATTENUATEROW_SSSE3 #define HAS_ARGBATTENUATEROW_SSSE3
#define HAS_ARGBBLENDROW_SSSE3 #define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3 #define HAS_ARGBCOLORMATRIXROW_SSSE3
...@@ -128,7 +129,6 @@ extern "C" { ...@@ -128,7 +129,6 @@ extern "C" {
#define HAS_YUY2TOYROW_SSE2 #define HAS_YUY2TOYROW_SSE2
// Effects: // Effects:
#define HAS_ARGBAFFINEROW_SSE2
#define HAS_ARGBUNATTENUATEROW_SSE2 #define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_INTERPOLATEROW_SSE2 #define HAS_INTERPOLATEROW_SSE2
#define HAS_SOBELROW_SSE2 #define HAS_SOBELROW_SSE2
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 760 #define LIBYUV_VERSION 761
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -4748,8 +4748,8 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -4748,8 +4748,8 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
intptr_t src_argb_stride_temp = src_argb_stride; intptr_t src_argb_stride_temp = src_argb_stride;
intptr_t temp = 0; intptr_t temp = 0;
asm volatile ( asm volatile (
"movq (%3),%%xmm2 \n" "movq "MEMACCESS(3)",%%xmm2 \n"
"movq 0x8(%3),%%xmm7 \n" "movq "MEMACCESS2(0x08,3)",%%xmm7 \n"
"shl $0x10,%1 \n" "shl $0x10,%1 \n"
"add $0x4,%1 \n" "add $0x4,%1 \n"
"movd %1,%%xmm5 \n" "movd %1,%%xmm5 \n"
...@@ -4775,6 +4775,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -4775,6 +4775,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"packssdw %%xmm1,%%xmm0 \n" "packssdw %%xmm1,%%xmm0 \n"
"pmaddwd %%xmm5,%%xmm0 \n" "pmaddwd %%xmm5,%%xmm0 \n"
#if defined(__x86_64__) #if defined(__x86_64__)
// TODO(fbarchard): use a real movd to zero upper with %w1 for x64 and nacl.
"movd %%xmm0,%1 \n" "movd %%xmm0,%1 \n"
"mov %1,%5 \n" "mov %1,%5 \n"
"and $0x0fffffff,%1 \n" "and $0x0fffffff,%1 \n"
...@@ -4786,11 +4787,19 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -4786,11 +4787,19 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"movd %%xmm0,%5 \n" "movd %%xmm0,%5 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n"
#endif #endif
#if defined(__x86_64__) && defined(__native_client__)
BUNDLEALIGN
"lea (%q0,%q1,1),%%r14d \n"
"movd (%%r15,%%r14,1),%%xmm1 \n"
"lea (%q0,%q5,1),%%r14d \n"
"movd (%%r15,%%r14,1),%%xmm6 \n"
#else
"movd (%0,%1,1),%%xmm1 \n" "movd (%0,%1,1),%%xmm1 \n"
"movd (%0,%5,1),%%xmm6 \n" "movd (%0,%5,1),%%xmm6 \n"
#endif
"punpckldq %%xmm6,%%xmm1 \n" "punpckldq %%xmm6,%%xmm1 \n"
"addps %%xmm4,%%xmm2 \n" "addps %%xmm4,%%xmm2 \n"
"movq %%xmm1,(%2) \n" "movq %%xmm1,"MEMACCESS(2)" \n"
#if defined(__x86_64__) #if defined(__x86_64__)
"movd %%xmm0,%1 \n" "movd %%xmm0,%1 \n"
"mov %1,%5 \n" "mov %1,%5 \n"
...@@ -4801,13 +4810,21 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -4801,13 +4810,21 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"pshufd $0x39,%%xmm0,%%xmm0 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n"
"movd %%xmm0,%5 \n" "movd %%xmm0,%5 \n"
#endif #endif
#if defined(__x86_64__) && defined(__native_client__)
BUNDLEALIGN
"lea (%q0,%q1,1),%%r14d \n"
"movd (%%r15,%%r14,1),%%xmm0 \n"
"lea (%q0,%q5,1),%%r14d \n"
"movd (%%r15,%%r14,1),%%xmm6 \n"
#else
"movd (%0,%1,1),%%xmm0 \n" "movd (%0,%1,1),%%xmm0 \n"
"movd (%0,%5,1),%%xmm6 \n" "movd (%0,%5,1),%%xmm6 \n"
#endif
"punpckldq %%xmm6,%%xmm0 \n" "punpckldq %%xmm6,%%xmm0 \n"
"addps %%xmm4,%%xmm3 \n" "addps %%xmm4,%%xmm3 \n"
"sub $0x4,%4 \n" "sub $0x4,%4 \n"
"movq %%xmm0,0x08(%2) \n" "movq %%xmm0,"MEMACCESS2(0x08,2)" \n"
"lea 0x10(%2),%2 \n" "lea "MEMLEA(0x10,2)",%2 \n"
"jge 40b \n" "jge 40b \n"
"49: \n" "49: \n"
...@@ -4825,10 +4842,16 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -4825,10 +4842,16 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
#if defined(__x86_64__) #if defined(__x86_64__)
"and $0x0fffffff,%1 \n" "and $0x0fffffff,%1 \n"
#endif #endif
#if defined(__x86_64__) && defined(__native_client__)
BUNDLEALIGN
"lea (%q0,%q1,1),%%r14d \n"
"movd (%%r15,%%r14,1),%%xmm0 \n"
#else
"movd (%0,%1,1),%%xmm0 \n" "movd (%0,%1,1),%%xmm0 \n"
#endif
"sub $0x1,%4 \n" "sub $0x1,%4 \n"
"movd %%xmm0,(%2) \n" "movd %%xmm0,"MEMACCESS(2)" \n"
"lea 0x4(%2),%2 \n" "lea "MEMLEA(0x04,2)",%2 \n"
"jge 10b \n" "jge 10b \n"
"19: \n" "19: \n"
: "+r"(src_argb), // %0 : "+r"(src_argb), // %0
...@@ -4839,6 +4862,9 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -4839,6 +4862,9 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"+r"(temp) // %5 "+r"(temp) // %5
: :
: "memory", "cc" : "memory", "cc"
#if defined(__native_client__) && defined(__x86_64__)
, "r14"
#endif
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment