Commit 446fa955 authored by fbarchard@google.com's avatar fbarchard@google.com

I422ToRGB565, ARGB4444 and ARGB1555 for AVX2

BUG=403
TESTED=avx2 emulator

Review URL: https://webrtc-codereview.appspot.com/34359004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1293 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent e2f1a754
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1292
Version: 1293
License: BSD
License File: LICENSE
......
......@@ -84,7 +84,6 @@ extern "C" {
#define HAS_I411TOARGBROW_SSSE3
#define HAS_I422TOABGRROW_SSSE3
#define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TOARGB4444ROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
#define HAS_I422TOBGRAROW_SSSE3
......@@ -220,6 +219,11 @@ extern "C" {
#if defined(HAS_I422TOARGBROW_AVX2)
#define HAS_YUY2TOARGBROW_AVX2
#define HAS_UYVYTOARGBROW_AVX2
#define HAS_NV12TORGB565ROW_AVX2
#define HAS_NV21TORGB565ROW_AVX2
#define HAS_I422TORGB565ROW_AVX2
#define HAS_I422TOARGB1555ROW_AVX2
#define HAS_I422TOARGB4444ROW_AVX2
#endif
// Effects:
......@@ -1047,6 +1051,14 @@ void NV21ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_vu,
uint8* dst_argb,
int width);
void NV12ToRGB565Row_AVX2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
int width);
void NV21ToRGB565Row_AVX2(const uint8* src_y,
const uint8* src_vu,
uint8* dst_argb,
int width);
void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
int width);
......@@ -1084,16 +1096,31 @@ void I422ToARGB4444Row_SSSE3(const uint8* src_y,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToARGB4444Row_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToARGB1555Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToARGB1555Row_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToRGB565Row_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToRGB24Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1155,6 +1182,14 @@ void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
const uint8* src_vu,
uint8* dst_argb,
int width);
void NV12ToRGB565Row_Any_AVX2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
int width);
void NV21ToRGB565Row_Any_AVX2(const uint8* src_y,
const uint8* src_vu,
uint8* dst_argb,
int width);
void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
int width);
......@@ -1187,16 +1222,31 @@ void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
const uint8* src_v,
uint8* dst_rgba,
int width);
void I422ToARGB4444Row_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
int width);
void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
int width);
void I422ToARGB1555Row_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
int width);
void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
int width);
void I422ToRGB565Row_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
int width);
void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1292
#define LIBYUV_VERSION 1293
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -843,6 +843,14 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGB1555Row = I422ToARGB1555Row_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON;
......@@ -896,6 +904,14 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGB4444Row = I422ToARGB4444Row_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON;
......@@ -948,6 +964,14 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
}
}
#endif
#if defined(HAS_I422TORGB565ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToRGB565Row = I422ToRGB565Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToRGB565Row = I422ToRGB565Row_AVX2;
}
}
#endif
#if defined(HAS_I422TORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToRGB565Row = I422ToRGB565Row_Any_NEON;
......
......@@ -1020,6 +1020,14 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
}
}
#endif
#if defined(HAS_NV12TORGB565ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
}
}
#endif
#if defined(HAS_NV12TORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
......@@ -1069,6 +1077,14 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
}
}
#endif
#if defined(HAS_NV21TORGB565ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
NV21ToRGB565Row = NV21ToRGB565Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
NV21ToRGB565Row = NV21ToRGB565Row_AVX2;
}
}
#endif
#if defined(HAS_NV21TORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
......
......@@ -73,6 +73,14 @@ YANY(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, I422ToRGBARow_C, 1, 4, 15)
#ifdef HAS_I422TOABGRROW_AVX2
YANY(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, I422ToABGRRow_C, 1, 4, 15)
#endif // HAS_I422TOABGRROW_AVX2
#ifdef HAS_I422TOARGB4444ROW_AVX2
YANY(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, I422ToARGB4444Row_C,
1, 2, 7)
YANY(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, I422ToARGB1555Row_C,
1, 2, 7)
YANY(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, I422ToRGB565Row_C,
1, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_NEON
YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7)
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7)
......@@ -123,6 +131,12 @@ NV2NY(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, NV12ToRGB565Row_C,
NV2NY(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, NV21ToRGB565Row_C,
0, 2, 7)
#endif // HAS_NV12TORGB565ROW_SSSE3
#ifdef HAS_NV12TORGB565ROW_AVX2
NV2NY(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, NV12ToRGB565Row_C,
0, 2, 15)
NV2NY(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, NV21ToRGB565Row_C,
0, 2, 15)
#endif // HAS_NV12TORGB565ROW_AVX2
#ifdef HAS_NV12TORGB565ROW_NEON
NV2NY(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, NV12ToRGB565Row_C,
0, 2, 7)
......
......@@ -2125,9 +2125,8 @@ void I422ToUYVYRow_C(const uint8* src_y,
// Maximum temporary width for wrappers to process at a time, in pixels.
#define MAXTWIDTH 2048
#if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
#if !defined(_MSC_VER) && defined(HAS_I422TORGB565ROW_SSSE3)
// row_win.cc has asm version, but GCC uses 2 step wrapper.
#if !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
void I422ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -2145,9 +2144,9 @@ void I422ToRGB565Row_SSSE3(const uint8* src_y,
width -= twidth;
}
}
#endif // !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
#endif
#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
#if defined(HAS_I422TOARGB1555ROW_SSSE3)
void I422ToARGB1555Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -2166,7 +2165,9 @@ void I422ToARGB1555Row_SSSE3(const uint8* src_y,
width -= twidth;
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_SSSE3)
void I422ToARGB4444Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -2185,7 +2186,9 @@ void I422ToARGB4444Row_SSSE3(const uint8* src_y,
width -= twidth;
}
}
#endif
#if defined(HAS_NV12TORGB565ROW_SSSE3)
void NV12ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_uv,
uint8* dst_rgb565, int width) {
// Row buffer for intermediate ARGB pixels.
......@@ -2200,7 +2203,9 @@ void NV12ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_uv,
width -= twidth;
}
}
#endif
#if defined(HAS_NV21TORGB565ROW_SSSE3)
void NV21ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_vu,
uint8* dst_rgb565, int width) {
// Row buffer for intermediate ARGB pixels.
......@@ -2215,7 +2220,9 @@ void NV21ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_vu,
width -= twidth;
}
}
#endif
#if defined(HAS_YUY2TOARGBROW_SSSE3)
void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, int width) {
// Row buffers for intermediate YUV pixels.
SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
......@@ -2231,7 +2238,9 @@ void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, int width) {
width -= twidth;
}
}
#endif
#if defined(HAS_UYVYTOARGBROW_SSSE3)
void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, int width) {
// Row buffers for intermediate YUV pixels.
SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
......@@ -2247,15 +2256,111 @@ void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, int width) {
width -= twidth;
}
}
#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
#endif // !defined(LIBYUV_DISABLE_X86)
#if defined(HAS_I422TORGB565ROW_AVX2) && !defined(_MSC_VER)
// row_win.cc has asm version, but GCC uses 2 step wrapper.
void I422ToRGB565Row_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
int width) {
SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
src_y += twidth;
src_u += twidth / 2;
src_v += twidth / 2;
dst_rgb565 += twidth * 2;
width -= twidth;
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_AVX2)
void I422ToARGB1555Row_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
src_y += twidth;
src_u += twidth / 2;
src_v += twidth / 2;
dst_argb1555 += twidth * 2;
width -= twidth;
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_AVX2)
void I422ToARGB4444Row_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
src_y += twidth;
src_u += twidth / 2;
src_v += twidth / 2;
dst_argb4444 += twidth * 2;
width -= twidth;
}
}
#endif
#if defined(HAS_NV12TORGB565ROW_AVX2)
void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv,
uint8* dst_rgb565, int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
NV12ToARGBRow_AVX2(src_y, src_uv, row, twidth);
ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
src_y += twidth;
src_uv += twidth;
dst_rgb565 += twidth * 2;
width -= twidth;
}
}
#endif
#if defined(HAS_NV21TORGB565ROW_AVX2)
void NV21ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_vu,
uint8* dst_rgb565, int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
NV21ToARGBRow_AVX2(src_y, src_vu, row, twidth);
ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
src_y += twidth;
src_vu += twidth;
dst_rgb565 += twidth * 2;
width -= twidth;
}
}
#endif
#if defined(HAS_YUY2TOARGBROW_AVX2)
void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, uint8* dst_argb, int width) {
// Row buffers for intermediate YUV conversion.
SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
// Row buffers for intermediate YUV pixels.
SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
YUY2ToUV422Row_AVX2(src_yuy2, row_u, row_v, twidth);
......@@ -2266,12 +2371,14 @@ void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, uint8* dst_argb, int width) {
width -= twidth;
}
}
#endif
#if defined(HAS_UYVYTOARGBROW_AVX2)
void UYVYToARGBRow_AVX2(const uint8* src_uyvy, uint8* dst_argb, int width) {
// Row buffers for intermediate YUV conversion.
SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
// Row buffers for intermediate YUV pixels.
SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
UYVYToUV422Row_AVX2(src_uyvy, row_u, row_v, twidth);
......@@ -2282,7 +2389,7 @@ void UYVYToARGBRow_AVX2(const uint8* src_uyvy, uint8* dst_argb, int width) {
width -= twidth;
}
}
#endif
#endif // !defined(LIBYUV_DISABLE_X86)
void ARGBPolynomialRow_C(const uint8* src_argb,
uint8* dst_argb, const float* poly,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment