Commit 90335f60 authored by Frank Barchard's avatar Frank Barchard

bug fix for odd width 16/24 bit to i420

A bug was introduced on arm when the code for 'any' width switch to
a temporary stack buffer and simd.
The C version handles odd width by doing 1 pixel, instead of averaging 2.
But the SIMD any version is supposed to replicate the last pixel, then
the subsampling in Neon will average the pixel with itself, producing
the same result.
The previous version did this, but only for ARGB 32 bit, which was to
avoid introducing issues with subsampled YUY2 source.  This CL adds
replication for RGB 16 bit values.

TBR=harryjin@google.com
BUG=libyuv:510

Review URL: https://codereview.chromium.org/1418983003 .
parent 5bf4de08
...@@ -675,22 +675,22 @@ ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3) ...@@ -675,22 +675,22 @@ ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3)
/* repeat last 4 bytes for 422 subsampler */ \ /* repeat last 4 bytes for 422 subsampler */ \
if ((width & 1) && BPP == 4 && DUVSHIFT == 1) { \ if ((width & 1) && BPP == 4 && DUVSHIFT == 1) { \
memcpy(temp + SS(r, UVSHIFT) * BPP, \ memcpy(temp + SS(r, UVSHIFT) * BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP, 4); \ temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \ } \
/* repeat last 4 - 12 bytes for 411 subsampler */ \ /* repeat last 4 - 12 bytes for 411 subsampler */ \
if (((width & 3) == 1) && BPP == 4 && DUVSHIFT == 2) { \ if (((width & 3) == 1) && BPP == 4 && DUVSHIFT == 2) { \
memcpy(temp + SS(r, UVSHIFT) * BPP, \ memcpy(temp + SS(r, UVSHIFT) * BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP, 4); \ temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
memcpy(temp + SS(r, UVSHIFT) * BPP + 4, \ memcpy(temp + SS(r, UVSHIFT) * BPP + BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP, 8); \ temp + SS(r, UVSHIFT) * BPP - BPP, BPP * 2); \
} \ } \
if (((width & 3) == 2) && BPP == 4 && DUVSHIFT == 2) { \ if (((width & 3) == 2) && BPP == 4 && DUVSHIFT == 2) { \
memcpy(temp + SS(r, UVSHIFT) * BPP, \ memcpy(temp + SS(r, UVSHIFT) * BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP * 2, 8); \ temp + SS(r, UVSHIFT) * BPP - BPP * 2, BPP * 2); \
} \ } \
if (((width & 3) == 3) && BPP == 4 && DUVSHIFT == 2) { \ if (((width & 3) == 3) && BPP == 4 && DUVSHIFT == 2) { \
memcpy(temp + SS(r, UVSHIFT) * BPP, \ memcpy(temp + SS(r, UVSHIFT) * BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP, 4); \ temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \ } \
ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
...@@ -747,11 +747,11 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) ...@@ -747,11 +747,11 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \ memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \ SS(r, UVSHIFT) * BPP); \
if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \ if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */\
memcpy(temp + SS(r, UVSHIFT) * BPP, \ memcpy(temp + SS(r, UVSHIFT) * BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP, 4); \ temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
temp + 128 + SS(r, UVSHIFT) * BPP - BPP, 4); \ temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \ } \
ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \ ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \ memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment