Commit 3faa0f15 authored by fbarchard@google.com's avatar fbarchard@google.com

x64 versions of assembly which also work on PIC 32 bit. biplanar take 2 strides…

x64 versions of assembly which also work on PIC 32 bit.  biplanar take 2 strides for odd width.  inversion for src height

TEST=media and planar unittests in Talk

BUG=none
Review URL: http://webrtc-codereview.appspot.com/244004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@33 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 3de12ae1
This diff is collapsed.
...@@ -21,7 +21,7 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi ...@@ -21,7 +21,7 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
const uint8* v_buf, // rdx const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx uint8* rgb_buf, // rcx
int width) { // r8 int width) { // r8
asm( asm volatile(
"1:" "1:"
"movzb (%1),%%r10\n" "movzb (%1),%%r10\n"
"lea 1(%1),%1\n" "lea 1(%1),%1\n"
...@@ -44,13 +44,12 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi ...@@ -44,13 +44,12 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
"lea 8(%3),%3\n" "lea 8(%3),%3\n"
"sub $0x2,%4\n" "sub $0x2,%4\n"
"ja 1b\n" "ja 1b\n"
: : "+r"(y_buf), // %0
: "r"(y_buf), // %0 "+r"(u_buf), // %1
"r"(u_buf), // %1 "+r"(v_buf), // %2
"r"(v_buf), // %2 "+r"(rgb_buf), // %3
"r"(rgb_buf), // %3 "+r"(width) // %4
"r"(width), // %4 : "r" (_kCoefficientsRgbY) // %5
"r" (_kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
); );
} }
...@@ -60,7 +59,7 @@ void FastConvertYUVToBGRARow(const uint8* y_buf, // rdi ...@@ -60,7 +59,7 @@ void FastConvertYUVToBGRARow(const uint8* y_buf, // rdi
const uint8* v_buf, // rdx const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx uint8* rgb_buf, // rcx
int width) { // r8 int width) { // r8
asm( asm volatile(
"1:" "1:"
"movzb (%1),%%r10\n" "movzb (%1),%%r10\n"
"lea 1(%1),%1\n" "lea 1(%1),%1\n"
...@@ -83,13 +82,12 @@ void FastConvertYUVToBGRARow(const uint8* y_buf, // rdi ...@@ -83,13 +82,12 @@ void FastConvertYUVToBGRARow(const uint8* y_buf, // rdi
"lea 8(%3),%3\n" "lea 8(%3),%3\n"
"sub $0x2,%4\n" "sub $0x2,%4\n"
"ja 1b\n" "ja 1b\n"
: : "+r"(y_buf), // %0
: "r"(y_buf), // %0 "+r"(u_buf), // %1
"r"(u_buf), // %1 "+r"(v_buf), // %2
"r"(v_buf), // %2 "+r"(rgb_buf), // %3
"r"(rgb_buf), // %3 "+r"(width) // %4
"r"(width), // %4 : "r" (_kCoefficientsBgraY) // %5
"r" (_kCoefficientsBgraY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
); );
} }
...@@ -99,7 +97,7 @@ void FastConvertYUVToABGRRow(const uint8* y_buf, // rdi ...@@ -99,7 +97,7 @@ void FastConvertYUVToABGRRow(const uint8* y_buf, // rdi
const uint8* v_buf, // rdx const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx uint8* rgb_buf, // rcx
int width) { // r8 int width) { // r8
asm( asm volatile(
"1:" "1:"
"movzb (%1),%%r10\n" "movzb (%1),%%r10\n"
"lea 1(%1),%1\n" "lea 1(%1),%1\n"
...@@ -122,13 +120,12 @@ void FastConvertYUVToABGRRow(const uint8* y_buf, // rdi ...@@ -122,13 +120,12 @@ void FastConvertYUVToABGRRow(const uint8* y_buf, // rdi
"lea 8(%3),%3\n" "lea 8(%3),%3\n"
"sub $0x2,%4\n" "sub $0x2,%4\n"
"ja 1b\n" "ja 1b\n"
: : "+r"(y_buf), // %0
: "r"(y_buf), // %0 "+r"(u_buf), // %1
"r"(u_buf), // %1 "+r"(v_buf), // %2
"r"(v_buf), // %2 "+r"(rgb_buf), // %3
"r"(rgb_buf), // %3 "+r"(width) // %4
"r"(width), // %4 : "r" (_kCoefficientsAbgrY) // %5
"r" (_kCoefficientsAbgrY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
); );
} }
...@@ -138,7 +135,7 @@ void FastConvertYUV444ToRGB32Row(const uint8* y_buf, // rdi ...@@ -138,7 +135,7 @@ void FastConvertYUV444ToRGB32Row(const uint8* y_buf, // rdi
const uint8* v_buf, // rdx const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx uint8* rgb_buf, // rcx
int width) { // r8 int width) { // r8
asm( asm volatile(
"1:" "1:"
"movzb (%1),%%r10\n" "movzb (%1),%%r10\n"
"lea 1(%1),%1\n" "lea 1(%1),%1\n"
...@@ -158,13 +155,12 @@ void FastConvertYUV444ToRGB32Row(const uint8* y_buf, // rdi ...@@ -158,13 +155,12 @@ void FastConvertYUV444ToRGB32Row(const uint8* y_buf, // rdi
"lea 4(%3),%3\n" "lea 4(%3),%3\n"
"sub $0x1,%4\n" "sub $0x1,%4\n"
"ja 1b\n" "ja 1b\n"
: : "+r"(y_buf), // %0
: "r"(y_buf), // %0 "+r"(u_buf), // %1
"r"(u_buf), // %1 "+r"(v_buf), // %2
"r"(v_buf), // %2 "+r"(rgb_buf), // %3
"r"(rgb_buf), // %3 "+r"(width) // %4
"r"(width), // %4 : "r" (_kCoefficientsRgbY) // %5
"r" (_kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2" : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2"
); );
} }
...@@ -172,7 +168,7 @@ void FastConvertYUV444ToRGB32Row(const uint8* y_buf, // rdi ...@@ -172,7 +168,7 @@ void FastConvertYUV444ToRGB32Row(const uint8* y_buf, // rdi
void FastConvertYToRGB32Row(const uint8* y_buf, // rdi void FastConvertYToRGB32Row(const uint8* y_buf, // rdi
uint8* rgb_buf, // rcx uint8* rgb_buf, // rcx
int width) { // r8 int width) { // r8
asm( asm volatile(
"1:" "1:"
"movzb (%0),%%r10\n" "movzb (%0),%%r10\n"
"movzb 0x1(%0),%%r11\n" "movzb 0x1(%0),%%r11\n"
...@@ -186,11 +182,10 @@ void FastConvertYToRGB32Row(const uint8* y_buf, // rdi ...@@ -186,11 +182,10 @@ void FastConvertYToRGB32Row(const uint8* y_buf, // rdi
"lea 8(%1),%1\n" "lea 8(%1),%1\n"
"sub $0x2,%2\n" "sub $0x2,%2\n"
"ja 1b\n" "ja 1b\n"
: : "+r"(y_buf), // %0
: "r"(y_buf), // %0 "+r"(rgb_buf), // %1
"r"(rgb_buf), // %1 "+r"(width) // %2
"r"(width), // %2 : "r" (_kCoefficientsRgbY) // %3
"r" (_kCoefficientsRgbY) // %3
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
); );
} }
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment