Commit 081475b3 authored by Frank Barchard's avatar Frank Barchard

refactor ARGBToI422 using ARGBToI420 internally

R=harryjin@google.com
BUG=libyuv:546

Review URL: https://codereview.chromium.org/1574253004 .
parent 54bbea17
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1564 Version: 1565
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -93,7 +93,6 @@ extern "C" { ...@@ -93,7 +93,6 @@ extern "C" {
#define HAS_ARGBTORGB24ROW_SSSE3 #define HAS_ARGBTORGB24ROW_SSSE3
#define HAS_ARGBTORGB565DITHERROW_SSE2 #define HAS_ARGBTORGB565DITHERROW_SSE2
#define HAS_ARGBTORGB565ROW_SSE2 #define HAS_ARGBTORGB565ROW_SSE2
#define HAS_ARGBTOUV422ROW_SSSE3
#define HAS_ARGBTOUV444ROW_SSSE3 #define HAS_ARGBTOUV444ROW_SSSE3
#define HAS_ARGBTOUVJROW_SSSE3 #define HAS_ARGBTOUVJROW_SSSE3
#define HAS_ARGBTOUVROW_SSSE3 #define HAS_ARGBTOUVROW_SSSE3
...@@ -280,7 +279,6 @@ extern "C" { ...@@ -280,7 +279,6 @@ extern "C" {
#define HAS_ARGBTORGB565DITHERROW_NEON #define HAS_ARGBTORGB565DITHERROW_NEON
#define HAS_ARGBTORGB565ROW_NEON #define HAS_ARGBTORGB565ROW_NEON
#define HAS_ARGBTOUV411ROW_NEON #define HAS_ARGBTOUV411ROW_NEON
#define HAS_ARGBTOUV422ROW_NEON
#define HAS_ARGBTOUV444ROW_NEON #define HAS_ARGBTOUV444ROW_NEON
#define HAS_ARGBTOUVJROW_NEON #define HAS_ARGBTOUVJROW_NEON
#define HAS_ARGBTOUVROW_NEON #define HAS_ARGBTOUVROW_NEON
...@@ -648,8 +646,6 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width); ...@@ -648,8 +646,6 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width); void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width); int width);
void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width);
void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width); int width);
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
...@@ -736,8 +732,6 @@ void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba, ...@@ -736,8 +732,6 @@ void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width); int width);
void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width);
void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width); int width);
void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb, void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
...@@ -788,19 +782,10 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb, ...@@ -788,19 +782,10 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb,
void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb, void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV422Row_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV444Row_C(const uint8* src_argb, void ARGBToUV444Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV422Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV411Row_C(const uint8* src_argb, void ARGBToUV411Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVJ422Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1564 #define LIBYUV_VERSION 1565
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
This diff is collapsed.
...@@ -715,16 +715,12 @@ ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) ...@@ -715,16 +715,12 @@ ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31) ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31) ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
#endif #endif
#ifdef HAS_ARGBTOUV422ROW_SSSE3
ANY12(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_SSSE3, 0, 4, 1, 15)
#endif
#ifdef HAS_YUY2TOUV422ROW_SSE2 #ifdef HAS_YUY2TOUV422ROW_SSE2
ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15) ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15) ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
#endif #endif
#ifdef HAS_YUY2TOUV422ROW_NEON #ifdef HAS_YUY2TOUV422ROW_NEON
ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7) ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
ANY12(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON, 0, 4, 1, 15)
ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31) ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31)
ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15) ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
......
...@@ -433,28 +433,6 @@ void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \ ...@@ -433,28 +433,6 @@ void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \
MAKEROWYJ(ARGB, 2, 1, 0, 4) MAKEROWYJ(ARGB, 2, 1, 0, 4)
#undef MAKEROWYJ #undef MAKEROWYJ
void ARGBToUVJ422Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
dst_u[0] = RGBToUJ(ar, ag, ab);
dst_v[0] = RGBToVJ(ar, ag, ab);
src_argb += 8;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8 ab = src_argb[0];
uint8 ag = src_argb[1];
uint8 ar = src_argb[2];
dst_u[0] = RGBToUJ(ar, ag, ab);
dst_v[0] = RGBToVJ(ar, ag, ab);
}
}
void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
int x; int x;
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
...@@ -658,28 +636,6 @@ void ARGBToUV444Row_C(const uint8* src_argb, ...@@ -658,28 +636,6 @@ void ARGBToUV444Row_C(const uint8* src_argb,
} }
} }
void ARGBToUV422Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
src_argb += 8;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8 ab = src_argb[0];
uint8 ag = src_argb[1];
uint8 ar = src_argb[2];
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
}
}
void ARGBToUV411Row_C(const uint8* src_argb, void ARGBToUV411Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width) { uint8* dst_u, uint8* dst_v, int width) {
int x; int x;
......
...@@ -1144,59 +1144,6 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v, ...@@ -1144,59 +1144,6 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
} }
#endif // HAS_ARGBTOUV444ROW_SSSE3 #endif // HAS_ARGBTOUV444ROW_SSSE3
#ifdef HAS_ARGBTOUV422ROW_SSSE3
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"movdqa %4,%%xmm3 \n"
"movdqa %5,%%xmm4 \n"
"movdqa %6,%%xmm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
"movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
"lea " MEMLEA(0x40,0) ",%0 \n"
"movdqa %%xmm0,%%xmm7 \n"
"shufps $0x88,%%xmm1,%%xmm0 \n"
"shufps $0xdd,%%xmm1,%%xmm7 \n"
"pavgb %%xmm7,%%xmm0 \n"
"movdqa %%xmm2,%%xmm7 \n"
"shufps $0x88,%%xmm6,%%xmm2 \n"
"shufps $0xdd,%%xmm6,%%xmm7 \n"
"pavgb %%xmm7,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm2,%%xmm6 \n"
"pmaddubsw %%xmm4,%%xmm0 \n"
"pmaddubsw %%xmm4,%%xmm2 \n"
"pmaddubsw %%xmm3,%%xmm1 \n"
"pmaddubsw %%xmm3,%%xmm6 \n"
"phaddw %%xmm2,%%xmm0 \n"
"phaddw %%xmm6,%%xmm1 \n"
"psraw $0x8,%%xmm0 \n"
"psraw $0x8,%%xmm1 \n"
"packsswb %%xmm1,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_argb0), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
: "m"(kARGBToV), // %4
"m"(kARGBToU), // %5
"m"(kAddUV128) // %6
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
);
}
#endif // HAS_ARGBTOUV422ROW_SSSE3
void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width) { void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width) {
asm volatile ( asm volatile (
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
......
...@@ -1374,55 +1374,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, ...@@ -1374,55 +1374,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
); );
} }
// 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16.
void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width) {
asm volatile (
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
"vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
MEMACCESS(0)
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
"subs %3, %3, #16 \n" // 16 processed per loop.
"vmul.s16 q8, q0, q10 \n" // B
"vmls.s16 q8, q1, q11 \n" // G
"vmls.s16 q8, q2, q12 \n" // R
"vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
"vmul.s16 q9, q2, q10 \n" // R
"vmls.s16 q9, q1, q14 \n" // G
"vmls.s16 q9, q0, q13 \n" // B
"vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
"vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
"vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
MEMACCESS(1)
"vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
MEMACCESS(2)
"vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3
:
: "cc", "memory", "q0", "q1", "q2", "q3",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
// 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32.
void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width) { int width) {
......
...@@ -1477,50 +1477,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, ...@@ -1477,50 +1477,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
"movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
// 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16.
#ifdef HAS_ARGBTOUV422ROW_NEON
void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width) {
asm volatile (
RGBTOUV_SETUP_REG
"1: \n"
MEMACCESS(0)
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
"subs %w3, %w3, #16 \n" // 16 processed per loop.
"mul v3.8h, v0.8h, v20.8h \n" // B
"mls v3.8h, v1.8h, v21.8h \n" // G
"mls v3.8h, v2.8h, v22.8h \n" // R
"add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
"mul v4.8h, v2.8h, v20.8h \n" // R
"mls v4.8h, v1.8h, v24.8h \n" // G
"mls v4.8h, v0.8h, v23.8h \n" // B
"add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned
"uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U
"uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V
MEMACCESS(1)
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
MEMACCESS(2)
"st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
"b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v20", "v21", "v22", "v23", "v24", "v25"
);
}
#endif // HAS_ARGBTOUV422ROW_NEON
// 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32.
#ifdef HAS_ARGBTOUV411ROW_NEON #ifdef HAS_ARGBTOUV411ROW_NEON
void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
......
...@@ -1647,64 +1647,6 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0, ...@@ -1647,64 +1647,6 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
} }
} }
__declspec(naked)
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_argb
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // width
movdqa xmm5, xmmword ptr kAddUV128
movdqa xmm6, xmmword ptr kARGBToV
movdqa xmm7, xmmword ptr kARGBToU
sub edi, edx // stride from u to v
convertloop:
/* step 1 - subsample 16x2 argb pixels to 8x1 */
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
movdqu xmm2, [eax + 32]
movdqu xmm3, [eax + 48]
lea eax, [eax + 64]
movdqa xmm4, xmm0
shufps xmm0, xmm1, 0x88
shufps xmm4, xmm1, 0xdd
pavgb xmm0, xmm4
movdqa xmm4, xmm2
shufps xmm2, xmm3, 0x88
shufps xmm4, xmm3, 0xdd
pavgb xmm2, xmm4
// step 2 - convert to U and V
// from here down is very similar to Y code except
// instead of 16 different pixels, its 8 pixels of U and 8 of V
movdqa xmm1, xmm0
movdqa xmm3, xmm2
pmaddubsw xmm0, xmm7 // U
pmaddubsw xmm2, xmm7
pmaddubsw xmm1, xmm6 // V
pmaddubsw xmm3, xmm6
phaddw xmm0, xmm2
phaddw xmm1, xmm3
psraw xmm0, 8
psraw xmm1, 8
packsswb xmm0, xmm1
paddb xmm0, xmm5 // -> unsigned
// step 3 - store 8 U and 8 V values
movlps qword ptr [edx], xmm0 // U
movhps qword ptr [edx + edi], xmm0 // V
lea edx, [edx + 8]
sub ecx, 16
jg convertloop
pop edi
ret
}
}
__declspec(naked) __declspec(naked)
void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) { uint8* dst_u, uint8* dst_v, int width) {
......
...@@ -766,8 +766,10 @@ TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4) ...@@ -766,8 +766,10 @@ TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
// arm version subsamples by summing 4 pixels then multiplying by matrix with // arm version subsamples by summing 4 pixels then multiplying by matrix with
// 4x smaller coefficients which are rounded to nearest integer. // 4x smaller coefficients which are rounded to nearest integer.
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 4) TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 4)
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, 4)
#else #else
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0) TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0)
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, 0)
#endif #endif
TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment