Commit 2d9fe082 authored by fbarchard@google.com's avatar fbarchard@google.com

direct conversion from NV12 to ARGB

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/645004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@281 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 7c8e16f8
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 280 Version: 281
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -47,16 +47,33 @@ int I420Mirror(const uint8* src_y, int src_stride_y, ...@@ -47,16 +47,33 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// Convert NV12 to ARGB. Also used for NV21. // Convert NV12 to ARGB.
int NV12ToARGB(const uint8* src_y, int src_stride_y, int NV12ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv, const uint8* src_uv, int src_stride_uv,
uint8* dst_frame, int dst_stride_frame, uint8* dst_frame, int dst_stride_frame,
int width, int height); int width, int height);
// Convert NV12 to RGB565. Also used for NV21. // Convert NV21 to ARGB.
int NV21ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_vu, int src_stride_vu,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert M420 to ARGB.
int M420ToARGB(const uint8* src_m420, int src_stride_m420,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert NV12 to RGB565.
int NV12ToRGB565(const uint8* src_y, int src_stride_y, int NV12ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv, const uint8* src_uv, int src_stride_uv,
uint8* dst_frame, int dst_stride_frame, uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// Convert NV21 to RGB565.
int NV21ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height); int width, int height);
// Convert YUY2 to ARGB. // Convert YUY2 to ARGB.
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 280 #define LIBYUV_VERSION 281
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -367,7 +367,7 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1, ...@@ -367,7 +367,7 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for // Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for
// easy conversion to I420. // easy conversion to I420.
// M420 format description: // M420 format description:
// M420 is row biplanar 420: 2 rows of Y and 1 row of VU. // M420 is row biplanar 420: 2 rows of Y and 1 row of UV.
// Chroma is half width / half height. (420) // Chroma is half width / half height. (420)
// src_stride_m420 is row planar. Normally this will be the width in pixels. // src_stride_m420 is row planar. Normally this will be the width in pixels.
// The UV plane is half width, but 2 values, so src_stride_m420 applies to // The UV plane is half width, but 2 values, so src_stride_m420 applies to
......
...@@ -839,51 +839,191 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y, ...@@ -839,51 +839,191 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb; dst_stride_argb = -dst_stride_argb;
} }
void (*I422ToARGBRow)(const uint8* y_buf, void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* u_buf, const uint8* uv_buf,
const uint8* v_buf, uint8* rgb_buf,
uint8* argb_buf, int width) = NV12ToARGBRow_C;
int width) = I422ToARGBRow_C; #if defined(HAS_NV12TOARGBROW_SSSE3)
#if defined(HAS_I422TOARGBROW_NEON) if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
if (TestCpuFlag(kCpuHasNEON)) { NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
I422ToARGBRow = I422ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) {
if (IS_ALIGNED(width, 16)) { NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
I422ToARGBRow = I422ToARGBRow_NEON; if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
}
} }
} }
#elif defined(HAS_I422TOARGBROW_SSSE3) #endif
for (int y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_uv += src_stride_uv;
}
}
return 0;
}
// Convert NV21 to ARGB.
int NV21ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_vu, int src_stride_vu,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*NV21ToARGBRow)(const uint8* y_buf,
const uint8* vu_buf,
uint8* rgb_buf,
int width) = NV21ToARGBRow_C;
#if defined(HAS_NV21TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3; NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) && if (IS_ALIGNED(width, 8)) {
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { NV21ToARGBRow = NV21ToARGBRow_Unaligned_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3; if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV21ToARGBRow = NV21ToARGBRow_SSSE3;
}
} }
} }
#endif #endif
int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) = for (int y = 0; y < height; ++y) {
SplitUV_C; NV21ToARGBRow(src_y, src_vu, dst_argb, width);
#if defined(HAS_SPLITUV_NEON) dst_argb += dst_stride_argb;
if (TestCpuFlag(kCpuHasNEON)) { src_y += src_stride_y;
SplitUV = SplitUV_NEON; if (y & 1) {
src_vu += src_stride_vu;
}
} }
#elif defined(HAS_SPLITUV_SSE2) return 0;
if (TestCpuFlag(kCpuHasSSE2) && }
IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16)) {
SplitUV = SplitUV_SSE2; // Convert M420 to ARGB.
int M420ToARGB(const uint8* src_m420, int src_stride_m420,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV12ToARGBRow_C;
#if defined(HAS_NV12TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
}
}
}
#endif
for (int y = 0; y < height - 1; y += 2) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
dst_argb + dst_stride_argb, width);
dst_argb += dst_stride_argb * 2;
src_m420 += src_stride_m420 * 3;
}
if (height & 1) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
}
return 0;
}
// Convert NV12 to RGB565.
// TODO(fbarchard): (Re) Optimize for Neon.
int NV12ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
dst_stride_rgb565 = -dst_stride_rgb565;
}
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV12ToARGBRow_C;
#if defined(HAS_NV12TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width * 4 <= kMaxStride) {
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRGB565Row_C;
#if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
} }
#endif #endif
SIMD_ALIGNED(uint8 rowuv[kMaxStride * 2]);
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
if ((y & 1) == 0) { NV12ToARGBRow(src_y, src_uv, row, width);
// Copy a row of UV. ARGBToRGB565Row(row, dst_rgb565, width);
SplitUV(src_uv, rowuv, rowuv + kMaxStride, halfwidth); dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
src_uv += src_stride_uv; src_uv += src_stride_uv;
} }
I422ToARGBRow(src_y, rowuv, rowuv + kMaxStride, dst_argb, width); }
dst_argb += dst_stride_argb; return 0;
}
// Convert NV21 to RGB565.
int NV21ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_vu, int src_stride_vu,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
dst_stride_rgb565 = -dst_stride_rgb565;
}
void (*NV21ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV21ToARGBRow_C;
#if defined(HAS_NV21TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width * 4 <= kMaxStride) {
NV21ToARGBRow = NV21ToARGBRow_SSSE3;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRGB565Row_C;
#if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
NV21ToARGBRow(src_y, src_vu, row, width);
ARGBToRGB565Row(row, dst_rgb565, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) {
src_vu += src_stride_vu;
}
} }
return 0; return 0;
} }
...@@ -1020,69 +1160,6 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -1020,69 +1160,6 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
return 0; return 0;
} }
// Convert NV12 to RGB565.
int NV12ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb, int dst_stride_rgb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
dst_stride_rgb = -dst_stride_rgb;
}
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRGB565Row_C;
#if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
}
#endif
int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
SplitUV_C;
#if defined(HAS_SPLITUV_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SplitUV = SplitUV_NEON;
}
#elif defined(HAS_SPLITUV_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16)) {
SplitUV = SplitUV_SSE2;
}
#endif
SIMD_ALIGNED(uint8 rowuv[kMaxStride * 2]);
for (int y = 0; y < height; ++y) {
if ((y & 1) == 0) {
// Copy a row of UV.
SplitUV(src_uv, rowuv, rowuv + kMaxStride, halfwidth);
src_uv += src_stride_uv;
}
I422ToARGBRow(src_y, rowuv, rowuv + kMaxStride, row, width);
ARGBToRGB565Row(row, dst_rgb, width);
dst_rgb += dst_stride_rgb;
src_y += src_stride_y;
}
return 0;
}
// SetRow8 writes 'count' bytes using a 32 bit value repeated // SetRow8 writes 'count' bytes using a 32 bit value repeated
// SetRow32 writes 'count' words using a 32 bit value repeated // SetRow32 writes 'count' words using a 32 bit value repeated
......
...@@ -54,12 +54,14 @@ extern "C" { ...@@ -54,12 +54,14 @@ extern "C" {
#define HAS_BGRATOYROW_SSSE3 #define HAS_BGRATOYROW_SSSE3
#define HAS_COPYROW_SSE2 #define HAS_COPYROW_SSE2
#define HAS_COPYROW_X86 #define HAS_COPYROW_X86
#define HAS_I400TOARGBROW_SSE2
#define HAS_I422TOABGRROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
#define HAS_I422TOBGRAROW_SSSE3
#define HAS_I444TOARGBROW_SSSE3 #define HAS_I444TOARGBROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
#define HAS_I411TOARGBROW_SSSE3 #define HAS_I411TOARGBROW_SSSE3
#define HAS_NV12TOARGBROW_SSSE3
#define HAS_NV21TOARGBROW_SSSE3
#define HAS_I422TOBGRAROW_SSSE3
#define HAS_I422TOABGRROW_SSSE3
#define HAS_I400TOARGBROW_SSE2
#define HAS_MIRRORROW_SSSE3 #define HAS_MIRRORROW_SSSE3
#define HAS_MIRRORROWUV_SSSE3 #define HAS_MIRRORROWUV_SSSE3
#define HAS_ADDROW_SSE2 #define HAS_ADDROW_SSE2
...@@ -220,34 +222,44 @@ void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); ...@@ -220,34 +222,44 @@ void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix); void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix); void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
void I422ToARGBRow_C(const uint8* y_buf, void I444ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* argb_buf,
int width); int width);
void I422ToBGRARow_C(const uint8* y_buf, void I422ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* argb_buf,
int width); int width);
void I422ToABGRRow_C(const uint8* y_buf, void I411ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I444ToARGBRow_C(const uint8* y_buf, void NV12ToARGBRow_C(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width);
void NV21ToARGBRow_C(const uint8* y_buf,
const uint8* vu_buf,
uint8* argb_buf,
int width);
void I422ToBGRARow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* bgra_buf,
int width); int width);
void I411ToARGBRow_C(const uint8* y_buf, void I422ToABGRRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* abgr_buf,
int width); int width);
void YToARGBRow_C(const uint8* y_buf, void YToARGBRow_C(const uint8* y_buf,
...@@ -269,6 +281,16 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -269,6 +281,16 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
void I411ToARGBRow_SSSE3(const uint8* y_buf, void I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf,
int width);
void NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width);
void NV21ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* vu_buf,
uint8* argb_buf, uint8* argb_buf,
int width); int width);
...@@ -299,6 +321,16 @@ void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, ...@@ -299,6 +321,16 @@ void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf,
int width);
void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width);
void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* vu_buf,
uint8* argb_buf, uint8* argb_buf,
int width); int width);
...@@ -314,37 +346,16 @@ void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, ...@@ -314,37 +346,16 @@ void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
uint8* abgr_buf, uint8* abgr_buf,
int width); int width);
void YToARGBRow_SSE2(const uint8* y_buf,
uint8* argb_buf,
int width);
// ARGB preattenuated alpha blend.
void ARGBBlendRow_Aligned_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow1_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow1_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow_Any_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow_Any_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
// 'Any' functions handle any size and alignment.
void I444ToARGBRow_Any_SSSE3(const uint8* y_buf, void I444ToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* argb_buf,
int width); int width);
void I422ToARGBRow_Any_SSSE3(const uint8* y_buf, void I422ToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* argb_buf,
int width); int width);
void I411ToARGBRow_Any_SSSE3(const uint8* y_buf, void I411ToARGBRow_Any_SSSE3(const uint8* y_buf,
...@@ -353,18 +364,47 @@ void I411ToARGBRow_Any_SSSE3(const uint8* y_buf, ...@@ -353,18 +364,47 @@ void I411ToARGBRow_Any_SSSE3(const uint8* y_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void NV12ToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width);
void NV21ToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* vu_buf,
uint8* argb_buf,
int width);
void I422ToBGRARow_Any_SSSE3(const uint8* y_buf, void I422ToBGRARow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* bgra_buf,
int width); int width);
void I422ToABGRRow_Any_SSSE3(const uint8* y_buf, void I422ToABGRRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* abgr_buf,
int width); int width);
void YToARGBRow_SSE2(const uint8* y_buf,
uint8* argb_buf,
int width);
// ARGB preattenuated alpha blend.
void ARGBBlendRow_Aligned_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow1_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow1_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow_Any_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow_Any_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
......
...@@ -359,6 +359,20 @@ static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf, ...@@ -359,6 +359,20 @@ static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
(255u << ashift); (255u << ashift);
} }
void I444ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width; ++x) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 24, 16, 8, 0);
y_buf += 1;
u_buf += 1;
v_buf += 1;
rgb_buf += 4; // Advance 1 pixel.
}
}
// Also used for 420 // Also used for 420
void I422ToARGBRow_C(const uint8* y_buf, void I422ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
...@@ -378,79 +392,97 @@ void I422ToARGBRow_C(const uint8* y_buf, ...@@ -378,79 +392,97 @@ void I422ToARGBRow_C(const uint8* y_buf,
} }
} }
void I422ToBGRARow_C(const uint8* y_buf, void I411ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width) { int width) {
for (int x = 0; x < width - 1; x += 2) { for (int x = 0; x < width - 3; x += 4) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 8, 16, 24); YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 8, 16, 24); YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2; YuvPixel(y_buf[2], u_buf[0], v_buf[0], rgb_buf + 8, 24, 16, 8, 0);
YuvPixel(y_buf[3], u_buf[0], v_buf[0], rgb_buf + 12, 24, 16, 8, 0);
y_buf += 4;
u_buf += 1; u_buf += 1;
v_buf += 1; v_buf += 1;
rgb_buf += 16; // Advance 4 pixels.
}
if (width & 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 0, 8, 16, 24); YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
} }
} }
void I422ToABGRRow_C(const uint8* y_buf, void NV12ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* uv_buf,
const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width) { int width) {
for (int x = 0; x < width - 1; x += 2) { for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16); YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 0, 8, 16); YuvPixel(y_buf[1], uv_buf[0], uv_buf[1], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2; y_buf += 2;
u_buf += 1; uv_buf += 2;
v_buf += 1;
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16); YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0);
} }
} }
void I444ToARGBRow_C(const uint8* y_buf, void NV21ToARGBRow_C(const uint8* y_buf,
const uint8* vu_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], vu_buf[1], vu_buf[0], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
vu_buf += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0);
}
}
void I422ToBGRARow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width) { int width) {
for (int x = 0; x < width; ++x) { for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 24, 16, 8, 0); YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 8, 16, 24);
y_buf += 1; YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 8, 16, 24);
y_buf += 2;
u_buf += 1; u_buf += 1;
v_buf += 1; v_buf += 1;
rgb_buf += 4; // Advance 1 pixel. rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 0, 8, 16, 24);
} }
} }
void I411ToARGBRow_C(const uint8* y_buf, void I422ToABGRRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width) { int width) {
for (int x = 0; x < width - 3; x += 4) { for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0); YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 0, 8, 16);
YuvPixel(y_buf[2], u_buf[0], v_buf[0], rgb_buf + 8, 24, 16, 8, 0); y_buf += 2;
YuvPixel(y_buf[3], u_buf[0], v_buf[0], rgb_buf + 12, 24, 16, 8, 0);
y_buf += 4;
u_buf += 1; u_buf += 1;
v_buf += 1; v_buf += 1;
rgb_buf += 16; // Advance 4 pixels.
}
if (width & 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16);
} }
} }
...@@ -728,10 +760,26 @@ void ARGBBlendRow_Any_SSSE3(const uint8* src_argb0, const uint8* src_argb1, ...@@ -728,10 +760,26 @@ void ARGBBlendRow_Any_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
rgb_buf + n * 4, width & 7); \ rgb_buf + n * 4, width & 7); \
} }
// Wrappers to handle odd width
#define Y2NY(NAMEANY, NV12TORGB_SSE, NV12TORGB_C, UV_SHIFT) \
void NAMEANY(const uint8* y_buf, \
const uint8* uv_buf, \
uint8* rgb_buf, \
int width) { \
int n = width & ~7; \
NV12TORGB_SSE(y_buf, uv_buf, rgb_buf, n); \
NV12TORGB_C(y_buf + n, \
uv_buf + (n >> UV_SHIFT), \
rgb_buf + n * 4, width & 7); \
}
#if defined(HAS_I422TOARGBROW_SSSE3) #if defined(HAS_I422TOARGBROW_SSSE3)
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, 0) YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, 0)
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, 1) YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, 1)
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, 2) YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, 2)
Y2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C, 0)
Y2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C, 0)
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1) YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1)
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1) YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1)
#endif #endif
......
This diff is collapsed.
...@@ -1230,6 +1230,18 @@ static const vec8 kUVToG = { ...@@ -1230,6 +1230,18 @@ static const vec8 kUVToG = {
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
}; };
static const vec8 kVUToB = {
VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB,
};
static const vec8 kVUToR = {
VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR,
};
static const vec8 kVUToG = {
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
};
static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG }; static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 }; static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB }; static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
...@@ -1265,6 +1277,13 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; ...@@ -1265,6 +1277,13 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \ __asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \
} }
// Read 4 UV from NV12, upsample to 8 UV
#define READNV12 __asm { \
__asm movq xmm0, qword ptr [esi] /* UV */ \
__asm lea esi, [esi + 8] \
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
}
// Convert 8 pixels: 8 UV and 8 Y // Convert 8 pixels: 8 UV and 8 Y
#define YUVTORGB __asm { \ #define YUVTORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
...@@ -1293,6 +1312,34 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; ...@@ -1293,6 +1312,34 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm packuswb xmm2, xmm2 /* R */ \ __asm packuswb xmm2, xmm2 /* R */ \
} }
// Convert 8 pixels: 8 VU and 8 Y
#define YVUTORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movdqa xmm1, xmm0 \
__asm movdqa xmm2, xmm0 \
__asm pmaddubsw xmm0, kVUToB /* scale B UV */ \
__asm pmaddubsw xmm1, kVUToG /* scale G UV */ \
__asm pmaddubsw xmm2, kVUToR /* scale R UV */ \
__asm psubw xmm0, kUVBiasB /* unbias back to signed */ \
__asm psubw xmm1, kUVBiasG \
__asm psubw xmm2, kUVBiasR \
/* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
__asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm4 \
__asm psubsw xmm3, kYSub16 \
__asm pmullw xmm3, kYToRgb \
__asm paddsw xmm0, xmm3 /* B += Y */ \
__asm paddsw xmm1, xmm3 /* G += Y */ \
__asm paddsw xmm2, xmm3 /* R += Y */ \
__asm psraw xmm0, 6 \
__asm psraw xmm1, 6 \
__asm psraw xmm2, 6 \
__asm packuswb xmm0, xmm0 /* B */ \
__asm packuswb xmm1, xmm1 /* G */ \
__asm packuswb xmm2, xmm2 /* R */ \
}
// 8 pixels, dest aligned 16. // 8 pixels, dest aligned 16.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes) // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes)
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
...@@ -1423,6 +1470,82 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1423,6 +1470,82 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
} }
} }
// 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
__declspec(naked) __declspec(align(16))
void NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width) {
__asm {
push esi
mov eax, [esp + 4 + 4] // Y
mov esi, [esp + 4 + 8] // UV
mov edx, [esp + 4 + 12] // argb
mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
pxor xmm4, xmm4
align 16
convertloop:
READNV12
YUVTORGB
// Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG
punpcklbw xmm2, xmm5 // RA
movdqa xmm1, xmm0
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
movdqa [edx], xmm0
movdqa [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
pop esi
ret
}
}
// 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
__declspec(naked) __declspec(align(16))
void NV21ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width) {
__asm {
push esi
mov eax, [esp + 4 + 4] // Y
mov esi, [esp + 4 + 8] // VU
mov edx, [esp + 4 + 12] // argb
mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
pxor xmm4, xmm4
align 16
convertloop:
READNV12
YVUTORGB
// Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG
punpcklbw xmm2, xmm5 // RA
movdqa xmm1, xmm0
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
movdqa [edx], xmm0
movdqa [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
pop esi
ret
}
}
// 8 pixels, unaligned. // 8 pixels, unaligned.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes) // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes)
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
...@@ -1553,6 +1676,83 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, ...@@ -1553,6 +1676,83 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
} }
} }
// 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
__declspec(naked) __declspec(align(16))
void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width) {
__asm {
push esi
mov eax, [esp + 4 + 4] // Y
mov esi, [esp + 4 + 8] // UV
mov edx, [esp + 4 + 12] // argb
mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
pxor xmm4, xmm4
align 16
convertloop:
READNV12
YUVTORGB
// Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG
punpcklbw xmm2, xmm5 // RA
movdqa xmm1, xmm0
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
movdqu [edx], xmm0
movdqu [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
pop esi
ret
}
}
// 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
__declspec(naked) __declspec(align(16))
void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width) {
__asm {
push esi
mov eax, [esp + 4 + 4] // Y
mov esi, [esp + 4 + 8] // VU
mov edx, [esp + 4 + 12] // argb
mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
pxor xmm4, xmm4
align 16
convertloop:
READNV12
YVUTORGB
// Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG
punpcklbw xmm2, xmm5 // RA
movdqa xmm1, xmm0
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
movdqu [edx], xmm0
movdqu [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
pop esi
ret
}
}
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void I422ToBGRARow_SSSE3(const uint8* y_buf, void I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
namespace libyuv { namespace libyuv {
#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B) \ #define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B) \
TEST_F(libyuvTest, ##FMT_PLANAR##To##FMT_B##_CvsOPT) { \ TEST_F(libyuvTest, ##FMT_PLANAR##To##FMT_B##_OptVsC) { \
const int kWidth = 1280; \ const int kWidth = 1280; \
const int kHeight = 720; \ const int kHeight = 720; \
align_buffer_16(src_y, kWidth * kHeight); \ align_buffer_16(src_y, kWidth * kHeight); \
...@@ -88,8 +88,60 @@ TESTPLANARTOB(I411, 4, 1, ARGB, 4) ...@@ -88,8 +88,60 @@ TESTPLANARTOB(I411, 4, 1, ARGB, 4)
TESTPLANARTOB(I422, 2, 1, ARGB, 4) TESTPLANARTOB(I422, 2, 1, ARGB, 4)
TESTPLANARTOB(I444, 1, 1, ARGB, 4) TESTPLANARTOB(I444, 1, 1, ARGB, 4)
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B) \
TEST_F(libyuvTest, ##FMT_PLANAR##To##FMT_B##_OptVsC) { \
const int kWidth = 1280; \
const int kHeight = 720; \
align_buffer_16(src_y, kWidth * kHeight); \
align_buffer_16(src_uv, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y * 2); \
align_buffer_16(dst_argb_c, (kWidth * BPP_B) * kHeight); \
align_buffer_16(dst_argb_opt, (kWidth * BPP_B) * kHeight); \
srandom(time(NULL)); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
src_y[(i * kWidth) + j] = (random() & 0xff); \
for (int i = 0; i < kHeight / SUBSAMP_X; ++i) \
for (int j = 0; j < kWidth / SUBSAMP_Y * 2; ++j) { \
src_uv[(i * kWidth / SUBSAMP_X) * 2 + j] = (random() & 0xff); \
} \
MaskCpuFlags(kCpuInitialized); \
##FMT_PLANAR##To##FMT_B(src_y, kWidth, \
src_uv, kWidth / SUBSAMP_X * 2, \
dst_argb_c, kWidth * BPP_B, \
kWidth, kHeight); \
MaskCpuFlags(-1); \
const int runs = 1000; \
for (int i = 0; i < runs; ++i) { \
##FMT_PLANAR##To##FMT_B(src_y, kWidth, \
src_uv, kWidth / SUBSAMP_X * 2, \
dst_argb_opt, kWidth * BPP_B, \
kWidth, kHeight); \
} \
int err = 0; \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth * BPP_B; ++j) { \
int diff = static_cast<int>(dst_argb_c[i * kWidth * BPP_B + j]) - \
static_cast<int>(dst_argb_opt[i * kWidth * BPP_B + j]); \
if (abs(diff) > 2) { \
++err; \
} \
} \
} \
EXPECT_EQ(err, 0); \
free_aligned_buffer_16(src_y) \
free_aligned_buffer_16(src_uv) \
free_aligned_buffer_16(dst_argb_c) \
free_aligned_buffer_16(dst_argb_opt) \
}
TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4)
TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4)
TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2)
TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2)
#define TESTATOPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ #define TESTATOPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TEST_F(libyuvTest, ##FMT_A##To##FMT_PLANAR##_CvsOPT) { \ TEST_F(libyuvTest, ##FMT_A##To##FMT_PLANAR##_OptVsC) { \
const int kWidth = 1280; \ const int kWidth = 1280; \
const int kHeight = 720; \ const int kHeight = 720; \
align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight); \ align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight); \
...@@ -171,36 +223,34 @@ TESTATOPLANAR(ARGB, 4, I422, 2, 1) ...@@ -171,36 +223,34 @@ TESTATOPLANAR(ARGB, 4, I422, 2, 1)
//TESTATOPLANAR(ARGB, 4, I444, 1, 1) //TESTATOPLANAR(ARGB, 4, I444, 1, 1)
// TODO(fbarchard): Implement and test 411 and 444 // TODO(fbarchard): Implement and test 411 and 444
#define TESTATOB(FMT_A, BPP_A, FMT_B, BPP_B) \ #define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B) \
TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_CvsOPT) { \ TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_OptVsC) { \
const int kWidth = 1280; \ const int kWidth = 1280; \
const int kHeight = 720; \ const int kHeight = 720; \
align_buffer_16(src_argb, kWidth * kHeight * BPP_A); \ align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight); \
align_buffer_16(dst_argb_c, (kWidth * BPP_B) * kHeight); \ align_buffer_16(dst_argb_c, (kWidth * BPP_B) * kHeight); \
align_buffer_16(dst_argb_opt, (kWidth * BPP_B) * kHeight); \ align_buffer_16(dst_argb_opt, (kWidth * BPP_B) * kHeight); \
srandom(time(NULL)); \ srandom(time(NULL)); \
for (int i = 0; i < kHeight; ++i) \ for (int i = 0; i < kHeight * kWidth * BPP_A; ++i) { \
for (int j = 0; j < kWidth * BPP_A; ++j) \ src_argb[i] = (random() & 0xff); \
src_argb[(i * kWidth * BPP_A) + j] = (random() & 0xff); \ } \
MaskCpuFlags(kCpuInitialized); \ MaskCpuFlags(kCpuInitialized); \
##FMT_A##To##FMT_B(src_argb, kWidth * BPP_A, \ ##FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
dst_argb_c, kWidth * BPP_B, \ dst_argb_c, kWidth * BPP_B, \
kWidth, kHeight); \ kWidth, kHeight); \
MaskCpuFlags(-1); \ MaskCpuFlags(-1); \
const int runs = 1000; \ const int runs = 1000; \
for (int i = 0; i < runs; ++i) { \ for (int i = 0; i < runs; ++i) { \
##FMT_A##To##FMT_B(src_argb, kWidth * BPP_A, \ ##FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
dst_argb_opt, kWidth * BPP_B, \ dst_argb_opt, kWidth * BPP_B, \
kWidth, kHeight); \ kWidth, kHeight); \
} \ } \
int err = 0; \ int err = 0; \
for (int i = 0; i < kHeight; ++i) { \ for (int i = 0; i < kHeight * kWidth * BPP_B; ++i) { \
for (int j = 0; j < kWidth * BPP_B; ++j) { \ int diff = static_cast<int>(dst_argb_c[i]) - \
int diff = static_cast<int>(dst_argb_c[i * kWidth * BPP_B + j]) - \ static_cast<int>(dst_argb_opt[i]); \
static_cast<int>(dst_argb_opt[i * kWidth * BPP_B + j]); \ if (abs(diff) > 2) \
if (abs(diff) > 2) \ err++; \
err++; \
} \
} \ } \
EXPECT_EQ(err, 0); \ EXPECT_EQ(err, 0); \
free_aligned_buffer_16(src_argb) \ free_aligned_buffer_16(src_argb) \
...@@ -208,25 +258,26 @@ TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_CvsOPT) { \ ...@@ -208,25 +258,26 @@ TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_CvsOPT) { \
free_aligned_buffer_16(dst_argb_opt) \ free_aligned_buffer_16(dst_argb_opt) \
} }
TESTATOB(ARGB, 4, ARGB, 4) TESTATOB(ARGB, 4, 4, ARGB, 4)
TESTATOB(ARGB, 4, BGRA, 4) TESTATOB(ARGB, 4, 4, BGRA, 4)
TESTATOB(ARGB, 4, ABGR, 4) TESTATOB(ARGB, 4, 4, ABGR, 4)
TESTATOB(ARGB, 4, RAW, 3) TESTATOB(ARGB, 4, 4, RAW, 3)
TESTATOB(ARGB, 4, RGB24, 3) TESTATOB(ARGB, 4, 4, RGB24, 3)
TESTATOB(ARGB, 4, RGB565, 2) TESTATOB(ARGB, 4, 4, RGB565, 2)
TESTATOB(ARGB, 4, ARGB1555, 2) TESTATOB(ARGB, 4, 4, ARGB1555, 2)
TESTATOB(ARGB, 4, ARGB4444, 2) TESTATOB(ARGB, 4, 4, ARGB4444, 2)
TESTATOB(BGRA, 4, ARGB, 4) TESTATOB(BGRA, 4, 4, ARGB, 4)
TESTATOB(ABGR, 4, ARGB, 4) TESTATOB(ABGR, 4, 4, ARGB, 4)
TESTATOB(RAW, 3, ARGB, 4) TESTATOB(RAW, 3, 3, ARGB, 4)
TESTATOB(RGB24, 3, ARGB, 4) TESTATOB(RGB24, 3, 3, ARGB, 4)
TESTATOB(RGB565, 2, ARGB, 4) TESTATOB(RGB565, 2, 2, ARGB, 4)
TESTATOB(ARGB1555, 2, ARGB, 4) TESTATOB(ARGB1555, 2, 2, ARGB, 4)
TESTATOB(ARGB4444, 2, ARGB, 4) TESTATOB(ARGB4444, 2, 2, ARGB, 4)
TESTATOB(YUY2, 2, ARGB, 4) TESTATOB(YUY2, 2, 2, ARGB, 4)
TESTATOB(UYVY, 2, ARGB, 4) TESTATOB(UYVY, 2, 2, ARGB, 4)
TESTATOB(M420, 3 / 2, 1, ARGB, 4)
TEST_F(libyuvTest, TestAttenuate) { TEST_F(libyuvTest, TestAttenuate) {
SIMD_ALIGNED(uint8 orig_pixels[256][4]); SIMD_ALIGNED(uint8 orig_pixels[256][4]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment