Commit 545a51c1 authored by fbarchard@google.com's avatar fbarchard@google.com

use scale for subsampling to handle odd source width to even destination width.

BUG=289
TEST=drmemory
R=nfullagar@google.com, ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/4779004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@884 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 0014ce00
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 883 Version: 884
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 883 #define LIBYUV_VERSION 884
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -58,6 +58,8 @@ int I420Copy(const uint8* src_y, int src_stride_y, ...@@ -58,6 +58,8 @@ int I420Copy(const uint8* src_y, int src_stride_y,
return 0; return 0;
} }
// 422 chroma is 1/2 width, 1x height
// 420 chroma is 1/2 width, 1/2 height
LIBYUV_API LIBYUV_API
int I422ToI420(const uint8* src_y, int src_stride_y, int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
...@@ -81,73 +83,29 @@ int I422ToI420(const uint8* src_y, int src_stride_y, ...@@ -81,73 +83,29 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
src_stride_u = -src_stride_u; src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v; src_stride_v = -src_stride_v;
} }
int halfwidth = (width + 1) >> 1;
void (*HalfRow)(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) = HalfRow_C;
#if defined(HAS_HALFROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
HalfRow = HalfRow_SSE2;
}
#endif
#if defined(HAS_HALFROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(halfwidth, 32)) {
HalfRow = HalfRow_AVX2;
}
#endif
#if defined(HAS_HALFROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
HalfRow = HalfRow_NEON;
}
#endif
// Copy Y plane // Copy Y plane
if (dst_y) { if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
} }
// SubSample U plane. int halfwidth = (width + 1) >> 1;
int y; int halfheight = (height + 1) >> 1;
for (y = 0; y < height - 1; y += 2) {
HalfRow(src_u, src_stride_u, dst_u, halfwidth);
src_u += src_stride_u * 2;
dst_u += dst_stride_u;
}
if (height & 1) {
HalfRow(src_u, 0, dst_u, halfwidth);
}
// SubSample V plane. // Resample U plane.
for (y = 0; y < height - 1; y += 2) { ScalePlane(src_u, src_stride_u, halfwidth, height,
HalfRow(src_v, src_stride_v, dst_v, halfwidth); dst_u, dst_stride_u, halfwidth, halfheight,
src_v += src_stride_v * 2; kFilterBilinear);
dst_v += dst_stride_v;
} // Resample V plane.
if (height & 1) { ScalePlane(src_v, src_stride_v, halfwidth, height,
HalfRow(src_v, 0, dst_v, halfwidth); dst_v, dst_stride_v, halfwidth, halfheight,
} kFilterBilinear);
return 0; return 0;
} }
// Blends 32x2 pixels to 16x1 // 444 chroma is 1x width, 1x height
// source in scale.cc // 420 chroma is 1/2 width, 1/2 height
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_SCALEROWDOWN2_NEON
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
#elif !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
#endif
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
LIBYUV_API LIBYUV_API
int I444ToI420(const uint8* src_y, int src_stride_y, int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
...@@ -171,54 +129,27 @@ int I444ToI420(const uint8* src_y, int src_stride_y, ...@@ -171,54 +129,27 @@ int I444ToI420(const uint8* src_y, int src_stride_y,
src_stride_u = -src_stride_u; src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v; src_stride_v = -src_stride_v;
} }
int halfwidth = (width + 1) >> 1;
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
#if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_NEON;
}
#elif defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_SSE2;
}
#endif
// Copy Y plane // Copy Y plane
if (dst_y) { if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
} }
// SubSample U plane. int halfwidth = (width + 1) >> 1;
int y; int halfheight = (height + 1) >> 1;
for (y = 0; y < height - 1; y += 2) {
ScaleRowDown2(src_u, src_stride_u, dst_u, halfwidth);
src_u += src_stride_u * 2;
dst_u += dst_stride_u;
}
if (height & 1) {
ScaleRowDown2(src_u, 0, dst_u, halfwidth);
}
// SubSample V plane. // Resample U plane.
for (y = 0; y < height - 1; y += 2) { ScalePlane(src_u, src_stride_u, width, height,
ScaleRowDown2(src_v, src_stride_v, dst_v, halfwidth); dst_u, dst_stride_u, halfwidth, halfheight,
src_v += src_stride_v * 2; kFilterBilinear);
dst_v += dst_stride_v;
} // Resample V plane.
if (height & 1) { ScalePlane(src_v, src_stride_v, width, height,
ScaleRowDown2(src_v, 0, dst_v, halfwidth); dst_v, dst_stride_v, halfwidth, halfheight,
} kFilterBilinear);
return 0; return 0;
} }
// TODO(fbarchard): Enable bilinear when fast enough or specialized upsampler.
// 411 chroma is 1/4 width, 1x height // 411 chroma is 1/4 width, 1x height
// 420 chroma is 1/2 width, 1/2 height // 420 chroma is 1/2 width, 1/2 height
LIBYUV_API LIBYUV_API
...@@ -254,15 +185,15 @@ int I411ToI420(const uint8* src_y, int src_stride_y, ...@@ -254,15 +185,15 @@ int I411ToI420(const uint8* src_y, int src_stride_y,
int halfheight = (height + 1) >> 1; int halfheight = (height + 1) >> 1;
int quarterwidth = (width + 3) >> 2; int quarterwidth = (width + 3) >> 2;
// Resample U plane from 1/4 width, 1x height to 1/2 width, 1/2 height. // Resample U plane.
ScalePlane(src_u, src_stride_u, quarterwidth, height, ScalePlane(src_u, src_stride_u, quarterwidth, height,
dst_u, dst_stride_u, halfwidth, halfheight, dst_u, dst_stride_u, halfwidth, halfheight,
kFilterNone); kFilterBilinear);
// Resample V plane. // Resample V plane.
ScalePlane(src_v, src_stride_v, quarterwidth, height, ScalePlane(src_v, src_stride_v, quarterwidth, height,
dst_v, dst_stride_v, halfwidth, halfheight, dst_v, dst_stride_v, halfwidth, halfheight,
kFilterNone); kFilterBilinear);
return 0; return 0;
} }
......
...@@ -5389,6 +5389,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -5389,6 +5389,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
// 1 pixel loop \n" // 1 pixel loop \n"
".p2align 2 \n" ".p2align 2 \n"
BUNDLEALIGN
"10: \n" "10: \n"
"cvttps2dq %%xmm2,%%xmm0 \n" "cvttps2dq %%xmm2,%%xmm0 \n"
"packssdw %%xmm0,%%xmm0 \n" "packssdw %%xmm0,%%xmm0 \n"
......
...@@ -106,7 +106,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ ...@@ -106,7 +106,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
} \ } \
} \ } \
} \ } \
EXPECT_LE(max_diff, 1); \ EXPECT_LE(max_diff, 0); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
int abs_diff = \ int abs_diff = \
...@@ -119,7 +119,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ ...@@ -119,7 +119,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
} \ } \
} \ } \
} \ } \
EXPECT_LE(max_diff, 1); \ EXPECT_LE(max_diff, 3); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
int abs_diff = \ int abs_diff = \
...@@ -132,7 +132,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ ...@@ -132,7 +132,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
} \ } \
} \ } \
} \ } \
EXPECT_LE(max_diff, 1); \ EXPECT_LE(max_diff, 3); \
free_aligned_buffer_64(dst_y_c) \ free_aligned_buffer_64(dst_y_c) \
free_aligned_buffer_64(dst_u_c) \ free_aligned_buffer_64(dst_u_c) \
free_aligned_buffer_64(dst_v_c) \ free_aligned_buffer_64(dst_v_c) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment