Commit d82f4baf authored by Frank Barchard's avatar Frank Barchard Committed by Frank Barchard

Upstream minor changes. Faster tests, Faster YUV Rotate180 and Mirror

Bug: libyuv:840, libyuv:849: b/144318948
Change-Id: I303c02ac2b838a09d3e623df7a69ffc085fe3cd2
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1914781Reviewed-by: 's avatarMiguel Casas <mcasas@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
parent 6afd9bec
...@@ -73,7 +73,7 @@ group("libyuv") { ...@@ -73,7 +73,7 @@ group("libyuv") {
deps += [ ":libyuv_mmi" ] deps += [ ":libyuv_mmi" ]
} }
if (!is_ios) { if (!is_ios && !libyuv_disable_jpeg) {
# Make sure that clients of libyuv link with libjpeg. This can't go in # Make sure that clients of libyuv link with libjpeg. This can't go in
# libyuv_internal because in Windows x64 builds that will generate a clang # libyuv_internal because in Windows x64 builds that will generate a clang
# build of libjpeg, and we don't want two copies. # build of libjpeg, and we don't want two copies.
...@@ -150,7 +150,7 @@ static_library("libyuv_internal") { ...@@ -150,7 +150,7 @@ static_library("libyuv_internal") {
configs += [ "//build/config/gcc:symbol_visibility_default" ] configs += [ "//build/config/gcc:symbol_visibility_default" ]
} }
if (!is_ios) { if (!is_ios && !libyuv_disable_jpeg) {
defines += [ "HAVE_JPEG" ] defines += [ "HAVE_JPEG" ]
# Needed to pull in libjpeg headers. Can't add //third_party:jpeg to deps # Needed to pull in libjpeg headers. Can't add //third_party:jpeg to deps
......
...@@ -734,7 +734,7 @@ void MirrorPlane(const uint8_t* src_y, ...@@ -734,7 +734,7 @@ void MirrorPlane(const uint8_t* src_y,
#if defined(HAS_MIRRORROW_NEON) #if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
MirrorRow = MirrorRow_Any_NEON; MirrorRow = MirrorRow_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 32)) {
MirrorRow = MirrorRow_NEON; MirrorRow = MirrorRow_NEON;
} }
} }
......
...@@ -142,7 +142,7 @@ void RotatePlane180(const uint8_t* src, ...@@ -142,7 +142,7 @@ void RotatePlane180(const uint8_t* src,
#if defined(HAS_MIRRORROW_NEON) #if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
MirrorRow = MirrorRow_Any_NEON; MirrorRow = MirrorRow_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 32)) {
MirrorRow = MirrorRow_NEON; MirrorRow = MirrorRow_NEON;
} }
} }
...@@ -207,11 +207,11 @@ void RotatePlane180(const uint8_t* src, ...@@ -207,11 +207,11 @@ void RotatePlane180(const uint8_t* src,
// Odd height will harmlessly mirror the middle row twice. // Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) { for (y = 0; y < half_height; ++y) {
MirrorRow(src, row, width); // Mirror first row into a buffer CopyRow(src, row, width); // Copy first row into buffer
src += src_stride;
MirrorRow(src_bot, dst, width); // Mirror last row into first row MirrorRow(src_bot, dst, width); // Mirror last row into first row
MirrorRow(row, dst_bot, width); // Mirror buffer into last row
src += src_stride;
dst += dst_stride; dst += dst_stride;
CopyRow(row, dst_bot, width); // Copy first mirrored row into last
src_bot -= src_stride; src_bot -= src_stride;
dst_bot -= dst_stride; dst_bot -= dst_stride;
} }
......
...@@ -1156,7 +1156,7 @@ ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31) ...@@ -1156,7 +1156,7 @@ ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15) ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
#endif #endif
#ifdef HAS_MIRRORROW_NEON #ifdef HAS_MIRRORROW_NEON
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15) ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31)
#endif #endif
#ifdef HAS_MIRRORROW_MSA #ifdef HAS_MIRRORROW_MSA
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63) ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
......
...@@ -84,7 +84,7 @@ static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, ...@@ -84,7 +84,7 @@ static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
static const uvec16 kSub128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u, static const uvec16 kSub128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u,
0x8080u, 0x8080u, 0x8080u, 0x8080u}; 0x8080u, 0x8080u, 0x8080u, 0x8080u};
#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3) #endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
...@@ -1101,10 +1101,8 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { ...@@ -1101,10 +1101,8 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
"lea 0x40(%0),%0 \n" \ "lea 0x40(%0),%0 \n" \
"phaddw %%xmm0,%%xmm6 \n" \ "phaddw %%xmm0,%%xmm6 \n" \
"phaddw %%xmm2,%%xmm1 \n" \ "phaddw %%xmm2,%%xmm1 \n" \
"paddw %%" #round \ "paddw %%" #round ",%%xmm6 \n" \
",%%xmm6 \n" \ "paddw %%" #round ",%%xmm1 \n" \
"paddw %%" #round \
",%%xmm1 \n" \
"psrlw $0x8,%%xmm6 \n" \ "psrlw $0x8,%%xmm6 \n" \
"psrlw $0x8,%%xmm1 \n" \ "psrlw $0x8,%%xmm1 \n" \
"packuswb %%xmm1,%%xmm6 \n" \ "packuswb %%xmm1,%%xmm6 \n" \
...@@ -1113,35 +1111,33 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { ...@@ -1113,35 +1111,33 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
"sub $0x10,%2 \n" \ "sub $0x10,%2 \n" \
"jg 1b \n" "jg 1b \n"
#define RGBTOY_AVX2(round) \ #define RGBTOY_AVX2(round) \
"1: \n" \ "1: \n" \
"vmovdqu (%0),%%ymm0 \n" \ "vmovdqu (%0),%%ymm0 \n" \
"vmovdqu 0x20(%0),%%ymm1 \n" \ "vmovdqu 0x20(%0),%%ymm1 \n" \
"vmovdqu 0x40(%0),%%ymm2 \n" \ "vmovdqu 0x40(%0),%%ymm2 \n" \
"vmovdqu 0x60(%0),%%ymm3 \n" \ "vmovdqu 0x60(%0),%%ymm3 \n" \
"vpsubb %%ymm5, %%ymm0, %%ymm0 \n" \ "vpsubb %%ymm5, %%ymm0, %%ymm0 \n" \
"vpsubb %%ymm5, %%ymm1, %%ymm1 \n" \ "vpsubb %%ymm5, %%ymm1, %%ymm1 \n" \
"vpsubb %%ymm5, %%ymm2, %%ymm2 \n" \ "vpsubb %%ymm5, %%ymm2, %%ymm2 \n" \
"vpsubb %%ymm5, %%ymm3, %%ymm3 \n" \ "vpsubb %%ymm5, %%ymm3, %%ymm3 \n" \
"vpmaddubsw %%ymm0,%%ymm4,%%ymm0 \n" \ "vpmaddubsw %%ymm0,%%ymm4,%%ymm0 \n" \
"vpmaddubsw %%ymm1,%%ymm4,%%ymm1 \n" \ "vpmaddubsw %%ymm1,%%ymm4,%%ymm1 \n" \
"vpmaddubsw %%ymm2,%%ymm4,%%ymm2 \n" \ "vpmaddubsw %%ymm2,%%ymm4,%%ymm2 \n" \
"vpmaddubsw %%ymm3,%%ymm4,%%ymm3 \n" \ "vpmaddubsw %%ymm3,%%ymm4,%%ymm3 \n" \
"lea 0x80(%0),%0 \n" \ "lea 0x80(%0),%0 \n" \
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" /* mutates. */ \ "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" /* mutates. */ \
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n" \ "vphaddw %%ymm3,%%ymm2,%%ymm2 \n" \
"vpaddw %%" #round \ "vpaddw %%" #round ",%%ymm0,%%ymm0 \n" /* Add .5 for rounding. */ \
",%%ymm0,%%ymm0 \n" /* Add .5 for rounding. */ \ "vpaddw %%" #round ",%%ymm2,%%ymm2 \n" \
"vpaddw %%" #round \ "vpsrlw $0x8,%%ymm0,%%ymm0 \n" \
",%%ymm2,%%ymm2 \n" \ "vpsrlw $0x8,%%ymm2,%%ymm2 \n" \
"vpsrlw $0x8,%%ymm0,%%ymm0 \n" \ "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" /* mutates. */ \
"vpsrlw $0x8,%%ymm2,%%ymm2 \n" \ "vpermd %%ymm0,%%ymm6,%%ymm0 \n" /* unmutate. */ \
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" /* mutates. */ \ "vmovdqu %%ymm0,(%1) \n" \
"vpermd %%ymm0,%%ymm6,%%ymm0 \n" /* unmutate. */ \ "lea 0x20(%1),%1 \n" \
"vmovdqu %%ymm0,(%1) \n" \ "sub $0x20,%2 \n" \
"lea 0x20(%1),%1 \n" \ "jg 1b \n" \
"sub $0x20,%2 \n" \
"jg 1b \n" \
"vzeroupper \n" "vzeroupper \n"
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
...@@ -1152,15 +1148,15 @@ void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { ...@@ -1152,15 +1148,15 @@ void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
"movdqa %5,%%xmm7 \n" "movdqa %5,%%xmm7 \n"
LABELALIGN RGBTOY(xmm7) LABELALIGN
RGBTOY(xmm7)
: "+r"(src_argb), // %0 : "+r"(src_argb), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
: "m"(kARGBToY), // %3 : "m"(kARGBToY), // %3
"m"(kSub128), // %4 "m"(kSub128), // %4
"m"(kAddY16) // %5 "m"(kAddY16) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
"xmm7");
} }
#endif // HAS_ARGBTOYROW_SSSE3 #endif // HAS_ARGBTOYROW_SSSE3
...@@ -1172,7 +1168,8 @@ void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { ...@@ -1172,7 +1168,8 @@ void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
"movdqa %3,%%xmm4 \n" "movdqa %3,%%xmm4 \n"
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
LABELALIGN RGBTOY(xmm5) LABELALIGN
RGBTOY(xmm5)
: "+r"(src_argb), // %0 : "+r"(src_argb), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
...@@ -1190,7 +1187,8 @@ void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) { ...@@ -1190,7 +1187,8 @@ void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
"movdqa %3,%%xmm4 \n" "movdqa %3,%%xmm4 \n"
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
LABELALIGN RGBTOY(xmm5) LABELALIGN
RGBTOY(xmm5)
: "+r"(src_rgba), // %0 : "+r"(src_rgba), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
...@@ -1212,7 +1210,8 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { ...@@ -1212,7 +1210,8 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
"vbroadcastf128 %5,%%ymm7 \n" "vbroadcastf128 %5,%%ymm7 \n"
"vmovdqu %6,%%ymm6 \n" "vmovdqu %6,%%ymm6 \n"
LABELALIGN RGBTOY_AVX2(ymm7) LABELALIGN
RGBTOY_AVX2(ymm7)
: "+r"(src_argb), // %0 : "+r"(src_argb), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
...@@ -1220,8 +1219,7 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { ...@@ -1220,8 +1219,7 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
"m"(kSub128), // %4 "m"(kSub128), // %4
"m"(kAddY16), // %5 "m"(kAddY16), // %5
"m"(kPermdARGBToY_AVX) // %6 "m"(kPermdARGBToY_AVX) // %6
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
"xmm7");
} }
#endif // HAS_ARGBTOYROW_AVX2 #endif // HAS_ARGBTOYROW_AVX2
...@@ -1234,7 +1232,8 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) { ...@@ -1234,7 +1232,8 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
"vbroadcastf128 %5,%%ymm7 \n" "vbroadcastf128 %5,%%ymm7 \n"
"vmovdqu %6,%%ymm6 \n" "vmovdqu %6,%%ymm6 \n"
LABELALIGN RGBTOY_AVX2(ymm7) LABELALIGN
RGBTOY_AVX2(ymm7)
: "+r"(src_abgr), // %0 : "+r"(src_abgr), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
...@@ -1242,8 +1241,7 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) { ...@@ -1242,8 +1241,7 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
"m"(kSub128), // %4 "m"(kSub128), // %4
"m"(kAddY16), // %5 "m"(kAddY16), // %5
"m"(kPermdARGBToY_AVX) // %6 "m"(kPermdARGBToY_AVX) // %6
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
"xmm7");
} }
#endif // HAS_ABGRTOYROW_AVX2 #endif // HAS_ABGRTOYROW_AVX2
...@@ -1255,15 +1253,15 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { ...@@ -1255,15 +1253,15 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
"vbroadcastf128 %4,%%ymm5 \n" "vbroadcastf128 %4,%%ymm5 \n"
"vmovdqu %5,%%ymm6 \n" "vmovdqu %5,%%ymm6 \n"
LABELALIGN RGBTOY_AVX2(ymm5) LABELALIGN
RGBTOY_AVX2(ymm5)
: "+r"(src_argb), // %0 : "+r"(src_argb), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
: "m"(kARGBToYJ), // %3 : "m"(kARGBToYJ), // %3
"m"(kSub128), // %4 "m"(kSub128), // %4
"m"(kPermdARGBToY_AVX) // %5 "m"(kPermdARGBToY_AVX) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
"xmm7");
} }
#endif // HAS_ARGBTOYJROW_AVX2 #endif // HAS_ARGBTOYJROW_AVX2
...@@ -1275,8 +1273,9 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) { ...@@ -1275,8 +1273,9 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
"vbroadcastf128 %4,%%ymm5 \n" "vbroadcastf128 %4,%%ymm5 \n"
"vmovdqu %5,%%ymm6 \n" "vmovdqu %5,%%ymm6 \n"
LABELALIGN RGBTOY_AVX2( LABELALIGN
ymm5) "vzeroupper \n" RGBTOY_AVX2(ymm5)
"vzeroupper \n"
: "+r"(src_rgba), // %0 : "+r"(src_rgba), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
...@@ -1537,7 +1536,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb0, ...@@ -1537,7 +1536,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
"+r"(dst_v), // %2 "+r"(dst_v), // %2
"+rm"(width) // %3 "+rm"(width) // %3
: "r"((intptr_t)(src_stride_argb)), // %4 : "r"((intptr_t)(src_stride_argb)), // %4
"m"(kSub128), // %5 "m"(kSub128), // %5
"m"(kARGBToVJ), // %6 "m"(kARGBToVJ), // %6
"m"(kARGBToUJ), // %7 "m"(kARGBToUJ), // %7
"m"(kShufARGBToUV_AVX) // %8 "m"(kShufARGBToUV_AVX) // %8
...@@ -1607,7 +1606,7 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0, ...@@ -1607,7 +1606,7 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
: "r"((intptr_t)(src_stride_argb)), // %4 : "r"((intptr_t)(src_stride_argb)), // %4
"m"(kARGBToVJ), // %5 "m"(kARGBToVJ), // %5
"m"(kARGBToUJ), // %6 "m"(kARGBToUJ), // %6
"m"(kSub128) // %7 "m"(kSub128) // %7
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
} }
#endif // HAS_ARGBTOUVJROW_SSSE3 #endif // HAS_ARGBTOUVJROW_SSSE3
...@@ -1676,15 +1675,15 @@ void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) { ...@@ -1676,15 +1675,15 @@ void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
"movdqa %5,%%xmm7 \n" "movdqa %5,%%xmm7 \n"
LABELALIGN RGBTOY(xmm7) LABELALIGN
RGBTOY(xmm7)
: "+r"(src_bgra), // %0 : "+r"(src_bgra), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
: "m"(kBGRAToY), // %3 : "m"(kBGRAToY), // %3
"m"(kSub128), // %4 "m"(kSub128), // %4
"m"(kAddY16) // %5 "m"(kAddY16) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
"xmm7");
} }
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0, void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
...@@ -1756,15 +1755,15 @@ void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) { ...@@ -1756,15 +1755,15 @@ void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
"movdqa %5,%%xmm7 \n" "movdqa %5,%%xmm7 \n"
LABELALIGN RGBTOY(xmm7) LABELALIGN
RGBTOY(xmm7)
: "+r"(src_abgr), // %0 : "+r"(src_abgr), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
: "m"(kABGRToY), // %3 : "m"(kABGRToY), // %3
"m"(kSub128), // %4 "m"(kSub128), // %4
"m"(kAddY16) // %5 "m"(kAddY16) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
"xmm7");
} }
void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) { void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
...@@ -1773,15 +1772,15 @@ void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) { ...@@ -1773,15 +1772,15 @@ void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
"movdqa %5,%%xmm7 \n" "movdqa %5,%%xmm7 \n"
LABELALIGN RGBTOY(xmm7) LABELALIGN
RGBTOY(xmm7)
: "+r"(src_rgba), // %0 : "+r"(src_rgba), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
: "m"(kRGBAToY), // %3 : "m"(kRGBAToY), // %3
"m"(kSub128), // %4 "m"(kSub128), // %4
"m"(kAddY16) // %5 "m"(kAddY16) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
"xmm7");
} }
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0, void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -682,22 +682,23 @@ void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) { ...@@ -682,22 +682,23 @@ void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) {
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
asm volatile( asm volatile(
// Start at end of source row. // Start at end of source row.
"mov r3, #-16 \n"
"add %0, %0, %2 \n" "add %0, %0, %2 \n"
"sub %0, #16 \n" "sub %0, %0, #32 \n" // 32 bytes per loop
"1: \n" "1: \n"
"vld1.8 {q0}, [%0], r3 \n" // src -= 16 "vld1.8 {q1, q2}, [%0], %3 \n" // src -= 32
"subs %2, #16 \n" // 16 pixels per loop. "subs %2, #32 \n" // 32 pixels per loop.
"vrev64.8 q0, q0 \n" "vrev64.8 q0, q2 \n"
"vst1.8 {d1}, [%1]! \n" // dst += 16 "vrev64.8 q1, q1 \n"
"vst1.8 {d0}, [%1]! \n" "vswp d0, d1 \n"
"vswp d2, d3 \n"
"vst1.8 {q0, q1}, [%1]! \n" // dst += 32
"bgt 1b \n" "bgt 1b \n"
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(width) // %2 "+r"(width) // %2
: : "r"(-32) // %3
: "cc", "memory", "r3", "q0"); : "cc", "memory", "q0", "q1", "q2");
} }
void MirrorUVRow_NEON(const uint8_t* src_uv, void MirrorUVRow_NEON(const uint8_t* src_uv,
......
...@@ -723,23 +723,29 @@ void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) { ...@@ -723,23 +723,29 @@ void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) {
: "cc", "memory", "v0"); : "cc", "memory", "v0");
} }
// Shuffle table for reversing the bytes.
static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u,
7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
asm volatile( asm volatile(
// Start at end of source row. // Start at end of source row.
"ld1 {v3.16b}, [%4] \n" // shuffler
"add %0, %0, %w2, sxtw \n" "add %0, %0, %w2, sxtw \n"
"sub %0, %0, #16 \n" "sub %0, %0, #32 \n"
"1: \n" "1: \n"
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16 "ld1 {v1.16b,v2.16b}, [%0], %3 \n" // src -= 32
"subs %w2, %w2, #16 \n" // 16 pixels per loop. "subs %w2, %w2, #32 \n" // 32 pixels per loop.
"rev64 v0.16b, v0.16b \n" "tbl v1.16b, {v1.16b}, v3.16b \n"
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16 "tbl v0.16b, {v2.16b}, v3.16b \n"
"st1 {v0.D}[0], [%1], #8 \n" "st1 {v0.16b, v1.16b}, [%1], #32 \n" // store 32 pixels
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(width) // %2 "+r"(width) // %2
: "r"((ptrdiff_t)-16) // %3 : "r"((ptrdiff_t)-32), // %3
: "cc", "memory", "v0"); "r"(&kShuffleMirror) // %4
: "cc", "memory", "v0", "v1", "v2", "v3");
} }
void MirrorUVRow_NEON(const uint8_t* src_uv, void MirrorUVRow_NEON(const uint8_t* src_uv,
......
...@@ -40,9 +40,9 @@ ...@@ -40,9 +40,9 @@
#endif #endif
// Some functions fail on big endian. Enable these tests on all cpus except // Some functions fail on big endian. Enable these tests on all cpus except
// PowerPC // PowerPC, but they are not optimized so disabled by default.
#if !defined(__powerpc__) #if !defined(__powerpc__) && defined(ENABLE_SLOW_TESTS)
#define LITTLE_ENDIAN_TEST 1 #define INTEL_TEST 1
#endif #endif
namespace libyuv { namespace libyuv {
...@@ -691,7 +691,7 @@ TESTPLANARTOB(J420, 2, 2, RAW, 3, 3, 1) ...@@ -691,7 +691,7 @@ TESTPLANARTOB(J420, 2, 2, RAW, 3, 3, 1)
TESTPLANARTOB(J420, 2, 2, RGB24, 3, 3, 1) TESTPLANARTOB(J420, 2, 2, RGB24, 3, 3, 1)
TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1) TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1)
TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1) TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1) TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1) TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1)
TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1) TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1)
...@@ -723,7 +723,7 @@ TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) ...@@ -723,7 +723,7 @@ TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1) TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1)
TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
#endif #endif
...@@ -876,7 +876,7 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2) ...@@ -876,7 +876,7 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2)
TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2) TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2)
TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2) TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2)
TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2) TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9) TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9)
#endif #endif
TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2) TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
...@@ -1012,7 +1012,7 @@ TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2) ...@@ -1012,7 +1012,7 @@ TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2) TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR) TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR)
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR) TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15) TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
#endif #endif
...@@ -1022,7 +1022,7 @@ TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2) ...@@ -1022,7 +1022,7 @@ TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4) TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4) TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, ARM_YUV_ERROR) TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, ARM_YUV_ERROR)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
#endif #endif
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
...@@ -1200,20 +1200,20 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2) ...@@ -1200,20 +1200,20 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
// TODO(fbarchard): make ARM version of C code that matches NEON. // TODO(fbarchard): make ARM version of C code that matches NEON.
TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0) TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0)
TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0) TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0) TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
#endif #endif
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0) TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0)
#endif #endif
TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0) TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0) TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0)
TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0) TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
#endif #endif
TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
#endif #endif
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
...@@ -1226,7 +1226,7 @@ TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 0) ...@@ -1226,7 +1226,7 @@ TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 0)
TESTATOB(RGBA, 4, 4, 1, J400, 1, 1, 1, 0) TESTATOB(RGBA, 4, 4, 1, J400, 1, 1, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0) TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
#endif #endif
TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
...@@ -1245,7 +1245,7 @@ TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1, 0) ...@@ -1245,7 +1245,7 @@ TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1, 0)
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0) TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0) TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0) TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0) TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
#endif #endif
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
...@@ -1348,7 +1348,7 @@ TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0) ...@@ -1348,7 +1348,7 @@ TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0)
TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, DIFF) HEIGHT_B, DIFF)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
#endif #endif
...@@ -2447,7 +2447,7 @@ TEST_F(LibYUVConvertTest, TestDither) { ...@@ -2447,7 +2447,7 @@ TEST_F(LibYUVConvertTest, TestDither) {
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C) YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4) TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4)
#endif #endif
#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \ #define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \
...@@ -2591,7 +2591,7 @@ TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3) ...@@ -2591,7 +2591,7 @@ TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3)
TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3) TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3)
TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3) TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3)
TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4) TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2) TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2) TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2) TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
...@@ -2738,7 +2738,7 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) ...@@ -2738,7 +2738,7 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
_Opt, +, 0, FMT_C, BPP_C) _Opt, +, 0, FMT_C, BPP_C)
// Caveat: Destination needs to be 4 bytes // Caveat: Destination needs to be 4 bytes
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4) TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4)
TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4) TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4)
TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4) TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4)
...@@ -2929,7 +2929,7 @@ TESTPLANAR16TOB(H210, 2, 1, ARGB, 4, 4, 1, 2) ...@@ -2929,7 +2929,7 @@ TESTPLANAR16TOB(H210, 2, 1, ARGB, 4, 4, 1, 2)
TESTPLANAR16TOB(H210, 2, 1, ABGR, 4, 4, 1, 2) TESTPLANAR16TOB(H210, 2, 1, ABGR, 4, 4, 1, 2)
TESTPLANAR16TOB(U210, 2, 1, ARGB, 4, 4, 1, 2) TESTPLANAR16TOB(U210, 2, 1, ARGB, 4, 4, 1, 2)
TESTPLANAR16TOB(U210, 2, 1, ABGR, 4, 4, 1, 2) TESTPLANAR16TOB(U210, 2, 1, ABGR, 4, 4, 1, 2)
#ifdef LITTLE_ENDIAN_TEST #ifdef INTEL_TEST
TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2) TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2)
TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 2) TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 2)
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2) TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2)
......
...@@ -306,7 +306,9 @@ TEST_SCALETO(ARGBScale, 320, 240) ...@@ -306,7 +306,9 @@ TEST_SCALETO(ARGBScale, 320, 240)
TEST_SCALETO(ARGBScale, 569, 480) TEST_SCALETO(ARGBScale, 569, 480)
TEST_SCALETO(ARGBScale, 640, 360) TEST_SCALETO(ARGBScale, 640, 360)
TEST_SCALETO(ARGBScale, 1280, 720) TEST_SCALETO(ARGBScale, 1280, 720)
#ifdef ENABLE_SLOW_TESTS
TEST_SCALETO(ARGBScale, 1920, 1080) TEST_SCALETO(ARGBScale, 1920, 1080)
#endif // ENABLE_SLOW_TESTS
#undef TEST_SCALETO1 #undef TEST_SCALETO1
#undef TEST_SCALETO #undef TEST_SCALETO
......
...@@ -500,7 +500,7 @@ static int I444TestFilter_16(int src_width, ...@@ -500,7 +500,7 @@ static int I444TestFilter_16(int src_width,
#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2) #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ #define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff) \
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \ TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
int diff = I420TestFilter( \ int diff = I420TestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
...@@ -517,7 +517,7 @@ static int I444TestFilter_16(int src_width, ...@@ -517,7 +517,7 @@ static int I444TestFilter_16(int src_width,
benchmark_cpu_info_); \ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \ EXPECT_LE(diff, max_diff); \
} \ } \
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \ TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_16) { \
int diff = I420TestFilter_16( \ int diff = I420TestFilter_16( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
...@@ -525,7 +525,7 @@ static int I444TestFilter_16(int src_width, ...@@ -525,7 +525,7 @@ static int I444TestFilter_16(int src_width,
benchmark_cpu_info_); \ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \ EXPECT_LE(diff, max_diff); \
} \ } \
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \ TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_16) { \
int diff = I444TestFilter_16( \ int diff = I444TestFilter_16( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
...@@ -536,11 +536,19 @@ static int I444TestFilter_16(int src_width, ...@@ -536,11 +536,19 @@ static int I444TestFilter_16(int src_width,
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
// filtering is different fixed point implementations for SSSE3, Neon and C. // filtering is different fixed point implementations for SSSE3, Neon and C.
#ifdef ENABLE_SLOW_TESTS
#define TEST_FACTOR(name, nom, denom, boxdiff) \ #define TEST_FACTOR(name, nom, denom, boxdiff) \
TEST_FACTOR1(name, None, nom, denom, 0) \ TEST_FACTOR1(, name, None, nom, denom, 0) \
TEST_FACTOR1(name, Linear, nom, denom, 3) \ TEST_FACTOR1(, name, Linear, nom, denom, 3) \
TEST_FACTOR1(name, Bilinear, nom, denom, 3) \ TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
TEST_FACTOR1(name, Box, nom, denom, boxdiff) TEST_FACTOR1(, name, Box, nom, denom, boxdiff)
#else
#define TEST_FACTOR(name, nom, denom, boxdiff) \
TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
#endif
TEST_FACTOR(2, 1, 2, 0) TEST_FACTOR(2, 1, 2, 0)
TEST_FACTOR(4, 1, 4, 0) TEST_FACTOR(4, 1, 4, 0)
...@@ -553,7 +561,7 @@ TEST_FACTOR(3, 1, 3, 0) ...@@ -553,7 +561,7 @@ TEST_FACTOR(3, 1, 3, 0)
#undef SX #undef SX
#undef DX #undef DX
#define TEST_SCALETO1(name, width, height, filter, max_diff) \ #define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff) \
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \ TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \ int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \ height, kFilter##filter, benchmark_iterations_, \
...@@ -566,13 +574,13 @@ TEST_FACTOR(3, 1, 3, 0) ...@@ -566,13 +574,13 @@ TEST_FACTOR(3, 1, 3, 0)
disable_cpu_flags_, benchmark_cpu_info_); \ disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \ EXPECT_LE(diff, max_diff); \
} \ } \
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \ TEST_F(LibYUVScaleTest, DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
int diff = I420TestFilter_16( \ int diff = I420TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \ EXPECT_LE(diff, max_diff); \
} \ } \
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \ TEST_F(LibYUVScaleTest, DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
int diff = I444TestFilter_16( \ int diff = I444TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
...@@ -593,7 +601,7 @@ TEST_FACTOR(3, 1, 3, 0) ...@@ -593,7 +601,7 @@ TEST_FACTOR(3, 1, 3, 0)
EXPECT_LE(diff, max_diff); \ EXPECT_LE(diff, max_diff); \
} \ } \
TEST_F(LibYUVScaleTest, \ TEST_F(LibYUVScaleTest, \
I420##name##From##width##x##height##_##filter##_16) { \ DISABLED_##I420##name##From##width##x##height##_##filter##_16) { \
int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \ int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \ Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \ benchmark_iterations_, disable_cpu_flags_, \
...@@ -601,7 +609,7 @@ TEST_FACTOR(3, 1, 3, 0) ...@@ -601,7 +609,7 @@ TEST_FACTOR(3, 1, 3, 0)
EXPECT_LE(diff, max_diff); \ EXPECT_LE(diff, max_diff); \
} \ } \
TEST_F(LibYUVScaleTest, \ TEST_F(LibYUVScaleTest, \
I444##name##From##width##x##height##_##filter##_16) { \ DISABLED_##I444##name##From##width##x##height##_##filter##_16) { \
int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \ int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \ Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \ benchmark_iterations_, disable_cpu_flags_, \
...@@ -609,19 +617,30 @@ TEST_FACTOR(3, 1, 3, 0) ...@@ -609,19 +617,30 @@ TEST_FACTOR(3, 1, 3, 0)
EXPECT_LE(diff, max_diff); \ EXPECT_LE(diff, max_diff); \
} }
#ifdef ENABLE_SLOW_TESTS
// Test scale to a specified size with all 4 filters. // Test scale to a specified size with all 4 filters.
#define TEST_SCALETO(name, width, height) \ #define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(name, width, height, None, 0) \ TEST_SCALETO1(, name, width, height, None, 0) \
TEST_SCALETO1(name, width, height, Linear, 3) \ TEST_SCALETO1(, name, width, height, Linear, 3) \
TEST_SCALETO1(name, width, height, Bilinear, 3) \ TEST_SCALETO1(, name, width, height, Bilinear, 3) \
TEST_SCALETO1(name, width, height, Box, 3) TEST_SCALETO1(, name, width, height, Box, 3)
#else
// Test scale to a specified size with all 4 filters.
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
#endif
TEST_SCALETO(Scale, 1, 1) TEST_SCALETO(Scale, 1, 1)
TEST_SCALETO(Scale, 320, 240) TEST_SCALETO(Scale, 320, 240)
TEST_SCALETO(Scale, 569, 480) TEST_SCALETO(Scale, 569, 480)
TEST_SCALETO(Scale, 640, 360) TEST_SCALETO(Scale, 640, 360)
TEST_SCALETO(Scale, 1280, 720) TEST_SCALETO(Scale, 1280, 720)
#ifdef ENABLE_SLOW_TESTS
TEST_SCALETO(Scale, 1920, 1080) TEST_SCALETO(Scale, 1920, 1080)
#endif // ENABLE_SLOW_TESTS
#undef TEST_SCALETO1 #undef TEST_SCALETO1
#undef TEST_SCALETO #undef TEST_SCALETO
...@@ -879,7 +898,7 @@ static int TestPlaneFilter_16(int src_width, ...@@ -879,7 +898,7 @@ static int TestPlaneFilter_16(int src_width,
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
TEST_F(LibYUVScaleTest, ScalePlaneDownBy##name##_##filter##_16) { \ TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) {\
int diff = TestPlaneFilter_16( \ int diff = TestPlaneFilter_16( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment