Commit 3fe36966 authored by fbarchard@google.com's avatar fbarchard@google.com

FastConvertYUVToARGBRow_SSSE3 use 2 pack and then 2 stores, which works better on Core2

BUG=none
TEST=none
Review URL: http://webrtc-codereview.appspot.com/323007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@103 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 19a248ab
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 102 Version: 103
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -409,8 +409,8 @@ void OMITFP FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf, // rdi ...@@ -409,8 +409,8 @@ void OMITFP FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf, // rdi
"punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm1 \n"
"punpcklwd %%xmm2,%%xmm0 \n" "punpcklwd %%xmm2,%%xmm0 \n"
"movdqa %%xmm0,(%3) \n"
"punpckhwd %%xmm2,%%xmm1 \n" "punpckhwd %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,(%3) \n"
"movdqa %%xmm1,0x10(%3) \n" "movdqa %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n" "lea 0x20(%3),%3 \n"
"sub $0x8,%4 \n" "sub $0x8,%4 \n"
......
...@@ -614,8 +614,8 @@ void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf, ...@@ -614,8 +614,8 @@ void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
punpcklbw xmm2, xmm5 // RA punpcklbw xmm2, xmm5 // RA
movdqa xmm1, xmm0 movdqa xmm1, xmm0
punpcklwd xmm0, xmm2 // BGRA first 4 pixels punpcklwd xmm0, xmm2 // BGRA first 4 pixels
movdqa [edx], xmm0
punpckhwd xmm1, xmm2 // BGRA next 4 pixels punpckhwd xmm1, xmm2 // BGRA next 4 pixels
movdqa [edx], xmm0
movdqa [edx + 16], xmm1 movdqa [edx + 16], xmm1
lea edx, [edx + 32] lea edx, [edx + 32]
...@@ -803,10 +803,10 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, ...@@ -803,10 +803,10 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
punpcklbw xmm0, xmm0 // GG punpcklbw xmm0, xmm0 // GG
movdqa xmm1, xmm0 movdqa xmm1, xmm0
punpcklwd xmm0, xmm0 // BGRA first 4 pixels punpcklwd xmm0, xmm0 // BGRA first 4 pixels
por xmm0, xmm5
movdqa [edx], xmm0
punpckhwd xmm1, xmm1 // BGRA next 4 pixels punpckhwd xmm1, xmm1 // BGRA next 4 pixels
por xmm0, xmm5
por xmm1, xmm5 por xmm1, xmm5
movdqa [edx], xmm0
movdqa [edx + 16], xmm1 movdqa [edx + 16], xmm1
lea edx, [edx + 32] lea edx, [edx + 32]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment