Commit 120d5e73 authored by mikhal@webrtc.org's avatar mikhal@webrtc.org

libyuv: Updating planar functions

Review URL: http://webrtc-codereview.appspot.com/209002

git-svn-id: http://libyuv.googlecode.com/svn/trunk@12 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 3660f2e5
...@@ -19,17 +19,8 @@ namespace libyuv { ...@@ -19,17 +19,8 @@ namespace libyuv {
class PlanarFunctions { class PlanarFunctions {
public: public:
// Copy I420 to I420. // Copy I420 to I420.
static void I420Copy(const uint8* src_y, int src_pitch_y, static void I420Copy(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Convert I422 to I420. Used by MJPG.
static void I422ToI420(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u, const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v, const uint8* src_v, int src_pitch_v,
uint8* dst_y, int dst_pitch_y, uint8* dst_y, int dst_pitch_y,
...@@ -37,24 +28,100 @@ class PlanarFunctions { ...@@ -37,24 +28,100 @@ class PlanarFunctions {
uint8* dst_v, int dst_pitch_v, uint8* dst_v, int dst_pitch_v,
int width, int height); int width, int height);
// Convert M420 to I420. // Convert I422 to I420. Used by MJPG.
static void M420ToI420(uint8* dst_y, int dst_pitch_y, static void I422ToI420(const uint8* src_y, int src_pitch_y,
uint8* dst_u, int dst_pitch_u, const uint8* src_u, int src_pitch_u,
uint8* dst_v, int dst_pitch_v, const uint8* src_v, int src_pitch_v,
const uint8* m420, int pitch_m420, uint8* dst_y, int dst_pitch_y,
int width, int height); uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Convert NV12 to I420. Also used for NV21. // Convert M420 to I420.
static void NV12ToI420(uint8* dst_y, int dst_pitch_y, static void M420ToI420(const uint8* src_m420, int src_pitch_m420,
uint8* dst_u, int dst_pitch_u, uint8* dst_y, int dst_pitch_y,
uint8* dst_v, int dst_pitch_v, uint8* dst_u, int dst_pitch_u,
const uint8* src_y, uint8* dst_v, int dst_pitch_v,
const uint8* src_uv, int width, int height);
int src_pitch,
int width, int height); // Convert Q420 to I420.
static void Q420ToI420(const uint8* src_y, int src_pitch_y,
const uint8* src_yuy2, int src_pitch_yuy2,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Convert NV12 to I420. Also used for NV21.
static void NV12ToI420(const uint8* src_y,
const uint8* src_uv, int src_pitch,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Convert YUY2 to I420.
static void YUY2ToI420(const uint8* src_yuy2, int src_pitch_yuy2,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Convert UYVY to I420.
static void UYVYToI420(const uint8* src_uyvy, int src_pitch_uyvy,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Convert I420 to ARGB.
static void I420ToARGB(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert I422 to ARGB.
static void I422ToARGB(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert I444 to ARGB.
static void I444ToARGB(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert I400 to ARGB.
static void I400ToARGB(const uint8* src_y, int src_pitch_y,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert I400 to ARGB.
static void I400ToARGB_Reference(const uint8* src_y, int src_pitch_y,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert RAW to ARGB.
static void RAWToARGB(const uint8* src_raw, int src_pitch_raw,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert BG24 to ARGB.
static void BG24ToARGB(const uint8* src_bg24, int src_pitch_bg24,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
// Convert ABGR to ARGB.
static void ABGRToARGB(const uint8* src_abgr, int src_pitch_abgr,
uint8* dst_argb, int dst_pitch_argb,
int width, int height);
DISALLOW_IMPLICIT_CONSTRUCTORS(PlanarFunctions); DISALLOW_IMPLICIT_CONSTRUCTORS(PlanarFunctions);
}; };
} // namespace libyuv } // namespace libyuv
......
This diff is collapsed.
...@@ -22,6 +22,16 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, ...@@ -22,6 +22,16 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void FastConvertYUV444ToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYToRGB32Row(const uint8* y_buf,
uint8* rgb_buf,
int width);
#if defined(_MSC_VER) #if defined(_MSC_VER)
#define SIMD_ALIGNED(var) __declspec(align(16)) var #define SIMD_ALIGNED(var) __declspec(align(16)) var
#else #else
...@@ -68,6 +78,7 @@ extern SIMD_ALIGNED(const int16 _kCoefficientsRgbY[768][4]); ...@@ -68,6 +78,7 @@ extern SIMD_ALIGNED(const int16 _kCoefficientsRgbY[768][4]);
#define EMMS() #define EMMS()
#endif #endif
} // extern "C" } // extern "C"
#endif // LIBYUV_SOURCE_ROW_H_ #endif // LIBYUV_SOURCE_ROW_H_
...@@ -55,6 +55,68 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi ...@@ -55,6 +55,68 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
); );
} }
void FastConvertYUV444ToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width) { // r8
asm(
"1:"
"movzb (%1),%%r10\n"
"lea 1(%1),%1\n"
"movzb (%2),%%r11\n"
"lea 1(%2),%2\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%0),%%r10\n"
"movq 4096(%5,%%r11,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"movq (%5,%%r10,8),%%xmm2\n"
"lea 1(%0),%0\n"
"paddsw %%xmm0,%%xmm2\n"
"shufps $0x44,%%xmm2,%%xmm2\n"
"psraw $0x6,%%xmm2\n"
"packuswb %%xmm2,%%xmm2\n"
"movd %%xmm2,0x0(%3)\n"
"lea 4(%3),%3\n"
"sub $0x1,%4\n"
"ja 1b\n"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (_kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2"
);
}
void FastConvertYToRGB32Row(const uint8* y_buf, // rdi
uint8* rgb_buf, // rcx
int width) { // r8
asm(
"1:"
"movzb (%0),%%r10\n"
"movzb 0x1(%0),%%r11\n"
"movq (%3,%%r10,8),%%xmm2\n"
"lea 2(%0),%0\n"
"movq (%3,%%r11,8),%%xmm3\n"
"shufps $0x44,%%xmm3,%%xmm2\n"
"psraw $0x6,%%xmm2\n"
"packuswb %%xmm2,%%xmm2\n"
"movq %%xmm2,0x0(%1)\n"
"lea 8(%1),%1\n"
"sub $0x2,%2\n"
"ja 1b\n"
:
: "r"(y_buf), // %0
"r"(rgb_buf), // %1
"r"(width), // %2
"r" (_kCoefficientsRgbY) // %3
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
);
}
#elif defined(__i386__) #elif defined(__i386__)
// 32 bit gcc version // 32 bit gcc version
...@@ -104,6 +166,81 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, ...@@ -104,6 +166,81 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
"ret\n" "ret\n"
); );
void FastConvertYUV444ToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
asm(
".text\n"
#if defined(OSX) || defined(IOS)
".globl _FastConvertYUV444ToRGB32Row\n"
"_FastConvertYUV444ToRGB32Row:\n"
#else
".global FastConvertYUV444ToRGB32Row\n"
"FastConvertYUV444ToRGB32Row:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"1:"
"movzbl (%edi),%eax\n"
"lea 1(%edi),%edi\n"
"movzbl (%esi),%ebx\n"
"lea 1(%esi),%esi\n"
"movq _kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw _kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
"lea 1(%edx),%edx\n"
"paddsw _kCoefficientsRgbY(,%eax,8),%mm0\n"
"psraw $0x6,%mm0\n"
"packuswb %mm0,%mm0\n"
"movd %mm0,0x0(%ebp)\n"
"lea 4(%ebp),%ebp\n"
"sub $0x1,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
void FastConvertYToRGB32Row(const uint8* y_buf,
uint8* rgb_buf,
int width);
asm(
".text\n"
#if defined(OSX) || defined(IOS)
".globl _FastConvertYToRGB32Row\n"
"_FastConvertYToRGB32Row:\n"
#else
".global FastConvertYToRGB32Row\n"
"FastConvertYToRGB32Row:\n"
#endif
"push %ebx\n"
"mov 0x8(%esp),%eax\n"
"mov 0xc(%esp),%edx\n"
"mov 0x10(%esp),%ecx\n"
"1:"
"movzbl (%eax),%ebx\n"
"movq _kCoefficientsRgbY(,%ebx,8),%mm0\n"
"psraw $0x6,%mm0\n"
"movzbl 0x1(%eax),%ebx\n"
"movq _kCoefficientsRgbY(,%ebx,8),%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm0\n"
"lea 0x2(%eax),%eax\n"
"movq %mm0,(%edx)\n"
"lea 0x8(%edx),%edx\n"
"sub $0x2,%ecx\n"
"ja 1b\n"
"pop %ebx\n"
"ret\n"
);
#else #else
// C reference code that mimic the YUV assembly. // C reference code that mimic the YUV assembly.
#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x))) #define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
...@@ -158,6 +295,30 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, ...@@ -158,6 +295,30 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
} }
#endif
void FastConvertYUV444ToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width; ++x) {
uint8 u = u_buf[x];
uint8 v = v_buf[x];
uint8 y = y_buf[x];
YuvPixel(y, u, v, rgb_buf);
rgb_buf += 4; // Advance 1 pixel.
}
}
void FastConvertYToRGB32Row(const uint8* y_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width; ++x) {
uint8 y = y_buf[x];
YuvPixel(y, 128, 128, rgb_buf);
rgb_buf += 4; // Advance 1 pixel.
}
}
#endif
} // extern "C" } // extern "C"
...@@ -16,14 +16,14 @@ extern "C" { ...@@ -16,14 +16,14 @@ extern "C" {
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \ static_cast<int16>(256 * 64 - 1) \
} }
#define RGBU(i) { \ #define RGBU(i) { \
static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
0, \ 0, \
static_cast<int16>(256 * 64 - 1) \ 0 \
} }
#define RGBV(i) { \ #define RGBV(i) { \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment