Commit 7472021e authored by arphaxad.cy@gmail.com's avatar arphaxad.cy@gmail.com

git-svn-id: http://libyuv.googlecode.com/svn/trunk@50 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 7198d6da
......@@ -367,7 +367,135 @@ static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
);
}
// TODO(fbarchard): Port to 32 bit
#if defined (__i386__)
extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int w);
asm(
".text\n"
#if defined(OSX)
".globl _TransposeUVWx8_SSE2\n"
"_TransposeUVWx8_SSE2:\n"
#else
".global TransposeUVWx8_SSE2\n"
"TransposeUVWx8_SSE2:\n"
#endif
"push %ebx\n"
"push %esi\n"
"push %edi\n"
"push %ebp\n"
"mov 0x14(%esp),%eax\n"
"mov 0x18(%esp),%edi\n"
"mov 0x1c(%esp),%edx\n"
"mov 0x20(%esp),%esi\n"
"mov 0x24(%esp),%ebx\n"
"mov 0x28(%esp),%ebp\n"
"mov %esp,%ecx\n"
"sub $0x14,%esp\n"
"and $0xfffffff0,%esp\n"
"mov %ecx,0x10(%esp)\n"
"mov 0x2c(%ecx),%ecx\n"
"1:"
"movdqa (%eax),%xmm0\n"
"movdqa (%eax,%edi,1),%xmm1\n"
"lea (%eax,%edi,2),%eax\n"
"movdqa %xmm0,%xmm7\n"
"punpcklbw %xmm1,%xmm0\n"
"punpckhbw %xmm1,%xmm7\n"
"movdqa %xmm7,%xmm1\n"
"movdqa (%eax),%xmm2\n"
"movdqa (%eax,%edi,1),%xmm3\n"
"lea (%eax,%edi,2),%eax\n"
"movdqa %xmm2,%xmm7\n"
"punpcklbw %xmm3,%xmm2\n"
"punpckhbw %xmm3,%xmm7\n"
"movdqa %xmm7,%xmm3\n"
"movdqa (%eax),%xmm4\n"
"movdqa (%eax,%edi,1),%xmm5\n"
"lea (%eax,%edi,2),%eax\n"
"movdqa %xmm4,%xmm7\n"
"punpcklbw %xmm5,%xmm4\n"
"punpckhbw %xmm5,%xmm7\n"
"movdqa %xmm7,%xmm5\n"
"movdqa (%eax),%xmm6\n"
"movdqa (%eax,%edi,1),%xmm7\n"
"lea (%eax,%edi,2),%eax\n"
"movdqa %xmm5,(%esp)\n"
"neg %edi\n"
"movdqa %xmm6,%xmm5\n"
"punpcklbw %xmm7,%xmm6\n"
"punpckhbw %xmm7,%xmm5\n"
"movdqa %xmm5,%xmm7\n"
"lea 0x10(%eax,%edi,8),%eax\n"
"neg %edi\n"
"movdqa %xmm0,%xmm5\n"
"punpcklwd %xmm2,%xmm0\n"
"punpckhwd %xmm2,%xmm5\n"
"movdqa %xmm5,%xmm2\n"
"movdqa %xmm1,%xmm5\n"
"punpcklwd %xmm3,%xmm1\n"
"punpckhwd %xmm3,%xmm5\n"
"movdqa %xmm5,%xmm3\n"
"movdqa %xmm4,%xmm5\n"
"punpcklwd %xmm6,%xmm4\n"
"punpckhwd %xmm6,%xmm5\n"
"movdqa %xmm5,%xmm6\n"
"movdqa (%esp),%xmm5\n"
"movdqa %xmm6,(%esp)\n"
"movdqa %xmm5,%xmm6\n"
"punpcklwd %xmm7,%xmm5\n"
"punpckhwd %xmm7,%xmm6\n"
"movdqa %xmm6,%xmm7\n"
"movdqa %xmm0,%xmm6\n"
"punpckldq %xmm4,%xmm0\n"
"punpckhdq %xmm4,%xmm6\n"
"movdqa %xmm6,%xmm4\n"
"movdqa (%esp),%xmm6\n"
"movlpd %xmm0,(%edx)\n"
"movhpd %xmm0,(%ebx)\n"
"movlpd %xmm4,(%edx,%esi,1)\n"
"lea (%edx,%esi,2),%edx\n"
"movhpd %xmm4,(%ebx,%ebp,1)\n"
"lea (%ebx,%ebp,2),%ebx\n"
"movdqa %xmm2,%xmm0\n"
"punpckldq %xmm6,%xmm2\n"
"movlpd %xmm2,(%edx)\n"
"movhpd %xmm2,(%ebx)\n"
"punpckhdq %xmm6,%xmm0\n"
"movlpd %xmm0,(%edx,%esi,1)\n"
"lea (%edx,%esi,2),%edx\n"
"movhpd %xmm0,(%ebx,%ebp,1)\n"
"lea (%ebx,%ebp,2),%ebx\n"
"movdqa %xmm1,%xmm0\n"
"punpckldq %xmm5,%xmm1\n"
"movlpd %xmm1,(%edx)\n"
"movhpd %xmm1,(%ebx)\n"
"punpckhdq %xmm5,%xmm0\n"
"movlpd %xmm0,(%edx,%esi,1)\n"
"lea (%edx,%esi,2),%edx\n"
"movhpd %xmm0,(%ebx,%ebp,1)\n"
"lea (%ebx,%ebp,2),%ebx\n"
"movdqa %xmm3,%xmm0\n"
"punpckldq %xmm7,%xmm3\n"
"movlpd %xmm3,(%edx)\n"
"movhpd %xmm3,(%ebx)\n"
"punpckhdq %xmm7,%xmm0\n"
"movlpd %xmm0,(%edx,%esi,1)\n"
"lea (%edx,%esi,2),%edx\n"
"movhpd %xmm0,(%ebx,%ebp,1)\n"
"lea (%ebx,%ebp,2),%ebx\n"
"sub $0x8,%ecx\n"
"ja 1b\n"
"mov 0x10(%esp),%esp\n"
"pop %ebp\n"
"pop %edi\n"
"pop %esi\n"
"pop %ebx\n"
"ret\n"
);
#if defined (__x86_64__)
#define HAS_TRANSPOSE_UVWX8_SSE2
static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment