Commit 120d5e73 authored by mikhal@webrtc.org's avatar mikhal@webrtc.org

libyuv: Updating planar functions

Review URL: http://webrtc-codereview.appspot.com/209002

git-svn-id: http://libyuv.googlecode.com/svn/trunk@12 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 3660f2e5
......@@ -38,23 +38,90 @@ class PlanarFunctions {
int width, int height);
// Convert M420 to I420.
static void M420ToI420(uint8* dst_y, int dst_pitch_y,
static void M420ToI420(const uint8* src_m420, int src_pitch_m420,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Convert Q420 to I420.
static void Q420ToI420(const uint8* src_y, int src_pitch_y,
const uint8* src_yuy2, int src_pitch_yuy2,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
const uint8* m420, int pitch_m420,
int width, int height);
// Convert NV12 to I420. Also used for NV21.
static void NV12ToI420(uint8* dst_y, int dst_pitch_y,
static void NV12ToI420(const uint8* src_y,
const uint8* src_uv, int src_pitch,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Convert YUY2 to I420.
static void YUY2ToI420(const uint8* src_yuy2, int src_pitch_yuy2,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Convert UYVY to I420.
static void UYVYToI420(const uint8* src_uyvy, int src_pitch_uyvy,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
const uint8* src_y,
const uint8* src_uv,
int src_pitch,
int width, int height);
// Convert I420 to ARGB.
static void I420ToARGB(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert I422 to ARGB.
static void I422ToARGB(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert I444 to ARGB.
static void I444ToARGB(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert I400 to ARGB.
static void I400ToARGB(const uint8* src_y, int src_pitch_y,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert I400 to ARGB.
static void I400ToARGB_Reference(const uint8* src_y, int src_pitch_y,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert RAW to ARGB.
static void RAWToARGB(const uint8* src_raw, int src_pitch_raw,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert BG24 to ARGB.
static void BG24ToARGB(const uint8* src_bg24, int src_pitch_bg24,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert ABGR to ARGB.
static void ABGRToARGB(const uint8* src_abgr, int src_pitch_abgr,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
DISALLOW_IMPLICIT_CONSTRUCTORS(PlanarFunctions);
};
};
} // namespace libyuv
......
This diff is collapsed.
......@@ -22,6 +22,16 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
uint8* rgb_buf,
int width);
void FastConvertYUV444ToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYToRGB32Row(const uint8* y_buf,
uint8* rgb_buf,
int width);
#if defined(_MSC_VER)
#define SIMD_ALIGNED(var) __declspec(align(16)) var
#else
......@@ -68,6 +78,7 @@ extern SIMD_ALIGNED(const int16 _kCoefficientsRgbY[768][4]);
#define EMMS()
#endif
} // extern "C"
#endif // LIBYUV_SOURCE_ROW_H_
......@@ -55,6 +55,68 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
);
}
void FastConvertYUV444ToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width) { // r8
asm(
"1:"
"movzb (%1),%%r10\n"
"lea 1(%1),%1\n"
"movzb (%2),%%r11\n"
"lea 1(%2),%2\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%0),%%r10\n"
"movq 4096(%5,%%r11,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"movq (%5,%%r10,8),%%xmm2\n"
"lea 1(%0),%0\n"
"paddsw %%xmm0,%%xmm2\n"
"shufps $0x44,%%xmm2,%%xmm2\n"
"psraw $0x6,%%xmm2\n"
"packuswb %%xmm2,%%xmm2\n"
"movd %%xmm2,0x0(%3)\n"
"lea 4(%3),%3\n"
"sub $0x1,%4\n"
"ja 1b\n"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (_kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2"
);
}
void FastConvertYToRGB32Row(const uint8* y_buf, // rdi
uint8* rgb_buf, // rcx
int width) { // r8
asm(
"1:"
"movzb (%0),%%r10\n"
"movzb 0x1(%0),%%r11\n"
"movq (%3,%%r10,8),%%xmm2\n"
"lea 2(%0),%0\n"
"movq (%3,%%r11,8),%%xmm3\n"
"shufps $0x44,%%xmm3,%%xmm2\n"
"psraw $0x6,%%xmm2\n"
"packuswb %%xmm2,%%xmm2\n"
"movq %%xmm2,0x0(%1)\n"
"lea 8(%1),%1\n"
"sub $0x2,%2\n"
"ja 1b\n"
:
: "r"(y_buf), // %0
"r"(rgb_buf), // %1
"r"(width), // %2
"r" (_kCoefficientsRgbY) // %3
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
);
}
#elif defined(__i386__)
// 32 bit gcc version
......@@ -104,6 +166,81 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
"ret\n"
);
void FastConvertYUV444ToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
asm(
".text\n"
#if defined(OSX) || defined(IOS)
".globl _FastConvertYUV444ToRGB32Row\n"
"_FastConvertYUV444ToRGB32Row:\n"
#else
".global FastConvertYUV444ToRGB32Row\n"
"FastConvertYUV444ToRGB32Row:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"1:"
"movzbl (%edi),%eax\n"
"lea 1(%edi),%edi\n"
"movzbl (%esi),%ebx\n"
"lea 1(%esi),%esi\n"
"movq _kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw _kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
"lea 1(%edx),%edx\n"
"paddsw _kCoefficientsRgbY(,%eax,8),%mm0\n"
"psraw $0x6,%mm0\n"
"packuswb %mm0,%mm0\n"
"movd %mm0,0x0(%ebp)\n"
"lea 4(%ebp),%ebp\n"
"sub $0x1,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
void FastConvertYToRGB32Row(const uint8* y_buf,
uint8* rgb_buf,
int width);
asm(
".text\n"
#if defined(OSX) || defined(IOS)
".globl _FastConvertYToRGB32Row\n"
"_FastConvertYToRGB32Row:\n"
#else
".global FastConvertYToRGB32Row\n"
"FastConvertYToRGB32Row:\n"
#endif
"push %ebx\n"
"mov 0x8(%esp),%eax\n"
"mov 0xc(%esp),%edx\n"
"mov 0x10(%esp),%ecx\n"
"1:"
"movzbl (%eax),%ebx\n"
"movq _kCoefficientsRgbY(,%ebx,8),%mm0\n"
"psraw $0x6,%mm0\n"
"movzbl 0x1(%eax),%ebx\n"
"movq _kCoefficientsRgbY(,%ebx,8),%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm0\n"
"lea 0x2(%eax),%eax\n"
"movq %mm0,(%edx)\n"
"lea 0x8(%edx),%edx\n"
"sub $0x2,%ecx\n"
"ja 1b\n"
"pop %ebx\n"
"ret\n"
);
#else
// C reference code that mimic the YUV assembly.
#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
......@@ -158,6 +295,30 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
rgb_buf += 8; // Advance 2 pixels.
}
}
#endif
void FastConvertYUV444ToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width; ++x) {
uint8 u = u_buf[x];
uint8 v = v_buf[x];
uint8 y = y_buf[x];
YuvPixel(y, u, v, rgb_buf);
rgb_buf += 4; // Advance 1 pixel.
}
}
void FastConvertYToRGB32Row(const uint8* y_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width; ++x) {
uint8 y = y_buf[x];
YuvPixel(y, 128, 128, rgb_buf);
rgb_buf += 4; // Advance 1 pixel.
}
}
#endif
} // extern "C"
......@@ -16,14 +16,14 @@ extern "C" {
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
static_cast<int16>(256 * 64 - 1) \
}
#define RGBU(i) { \
static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
0, \
static_cast<int16>(256 * 64 - 1) \
0 \
}
#define RGBV(i) { \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment