Commit bc8f28eb authored by fbarchard@google.com's avatar fbarchard@google.com

remove row table, make C use math that mimics SIMD for exactness. Also 2x…

remove row table, make C use math that mimics SIMD for exactness.  Also 2x faster than old code which mimiced old SIMD via tables.  9000 ms instead of 20000 ms
BUG=none
TEST=none
Review URL: http://webrtc-codereview.appspot.com/267020

git-svn-id: http://libyuv.googlecode.com/svn/trunk@85 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 15c3d45c
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 84
Version: 85
License: BSD
License File: LICENSE
......
......@@ -44,7 +44,6 @@
'source/planar_functions.cc',
'source/rotate.cc',
'source/row_common.cc',
'source/row_table.cc',
'source/scale.cc',
'source/video_common.cc',
],
......
......@@ -220,43 +220,41 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix) {
}
// C reference code that mimic the YUV assembly.
#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
(((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
static inline void YuvPixel(uint8 y,
uint8 u,
uint8 v,
uint8* rgb_buf,
int ashift,
int rshift,
int gshift,
int bshift) {
int b = kCoefficientsRgbY[256+u][0];
int g = kCoefficientsRgbY[256+u][1];
int r = kCoefficientsRgbY[256+u][2];
int a = kCoefficientsRgbY[256+u][3];
b = paddsw(b, kCoefficientsRgbY[512+v][0]);
g = paddsw(g, kCoefficientsRgbY[512+v][1]);
r = paddsw(r, kCoefficientsRgbY[512+v][2]);
a = paddsw(a, kCoefficientsRgbY[512+v][3]);
b = paddsw(b, kCoefficientsRgbY[y][0]);
g = paddsw(g, kCoefficientsRgbY[y][1]);
r = paddsw(r, kCoefficientsRgbY[y][2]);
a = paddsw(a, kCoefficientsRgbY[y][3]);
b >>= 6;
g >>= 6;
r >>= 6;
a >>= 6;
*reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b) << bshift) |
(packuswb(g) << gshift) |
(packuswb(r) << rshift) |
(packuswb(a) << ashift);
#define YG 74 /* static_cast<int8>(1.164 * 64 + 0.5) */
#define UB 127 /* min(63,static_cast<int8>(2.018 * 64)) */
#define UG -25 /* static_cast<int8>(-0.391 * 64 - 0.5) */
#define UR 0
#define VB 0
#define VG -52 /* static_cast<int8>(-0.813 * 64 - 0.5) */
#define VR 102 /* static_cast<int8>(1.596 * 64 + 0.5) */
// Bias
#define BB UB * 128 + VB * 128
#define BG UG * 128 + VG * 128
#define BR UR * 128 + VR * 128
static inline uint32 Clip(int32 val) {
if (val < 0) {
return (uint32) 0;
} else if (val > 255){
return (uint32) 255;
}
return (uint32) val;
}
static inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
int ashift, int rshift, int gshift, int bshift) {
int32 y1 = (static_cast<int32>(y) - 16) * YG;
uint32 b = Clip(static_cast<int32>((u * UB + v * VB) - (BB) + y1) >> 6);
uint32 g = Clip(static_cast<int32>((u * UG + v * VG) - (BG) + y1) >> 6);
uint32 r = Clip(static_cast<int32>((u * UR + v * VR) - (BR) + y1) >> 6);
*reinterpret_cast<uint32*>(rgb_buf) = (b << bshift) |
(g << gshift) |
(r << rshift) |
(255u << ashift);
}
void FastConvertYUVToARGBRow_C(const uint8* y_buf,
......
......@@ -381,9 +381,9 @@ struct {
"punpcklbw %%xmm4,%%xmm3 \n" \
"psubsw 96(%5),%%xmm3 \n" \
"pmullw 112(%5),%%xmm3 \n" \
"paddw %%xmm3,%%xmm0 \n" \
"paddw %%xmm3,%%xmm1 \n" \
"paddw %%xmm3,%%xmm2 \n" \
"paddsw %%xmm3,%%xmm0 \n" \
"paddsw %%xmm3,%%xmm1 \n" \
"paddsw %%xmm3,%%xmm2 \n" \
"psraw $0x6,%%xmm0 \n" \
"psraw $0x6,%%xmm1 \n" \
"psraw $0x6,%%xmm2 \n" \
......@@ -528,9 +528,9 @@ void OMITFP FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf, // rdi
"punpcklbw %%xmm4,%%xmm3 \n"
"psubsw 96(%5),%%xmm3 \n"
"pmullw 112(%5),%%xmm3 \n"
"paddw %%xmm3,%%xmm0 \n"
"paddw %%xmm3,%%xmm1 \n"
"paddw %%xmm3,%%xmm2 \n"
"paddsw %%xmm3,%%xmm0 \n"
"paddsw %%xmm3,%%xmm1 \n"
"paddsw %%xmm3,%%xmm2 \n"
"psraw $0x6,%%xmm0 \n"
"psraw $0x6,%%xmm1 \n"
"psraw $0x6,%%xmm2 \n"
......
This diff is collapsed.
......@@ -574,9 +574,9 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm punpcklbw xmm3, xmm4 \
__asm psubsw xmm3, kYSub16 \
__asm pmullw xmm3, kYToRgb \
__asm paddw xmm0, xmm3 /* B += Y */ \
__asm paddw xmm1, xmm3 /* G += Y */ \
__asm paddw xmm2, xmm3 /* R += Y */ \
__asm paddsw xmm0, xmm3 /* B += Y */ \
__asm paddsw xmm1, xmm3 /* G += Y */ \
__asm paddsw xmm2, xmm3 /* R += Y */ \
__asm psraw xmm0, 6 \
__asm psraw xmm1, 6 \
__asm psraw xmm2, 6 \
......@@ -744,9 +744,9 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
punpcklbw xmm3, xmm4
psubsw xmm3, kYSub16
pmullw xmm3, kYToRgb
paddw xmm0, xmm3 // B += Y
paddw xmm1, xmm3 // G += Y
paddw xmm2, xmm3 // R += Y
paddsw xmm0, xmm3 // B += Y
paddsw xmm1, xmm3 // G += Y
paddsw xmm2, xmm3 // R += Y
psraw xmm0, 6
psraw xmm1, 6
psraw xmm2, 6
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment