Commit 2657688e authored by Frank Barchard's avatar Frank Barchard

Add support for odd height YUVA alpha blending.

R=dhrosa@google.com, harryjin@google.com
BUG=libyuv:527

Review URL: https://codereview.chromium.org/1507683003 .
parent b0b22f88
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1548 Version: 1549
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1548 #define LIBYUV_VERSION 1549
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -651,6 +651,8 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, ...@@ -651,6 +651,8 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height) { int width, int height) {
int y; int y;
// Half width/height for UV.
int halfwidth = (width + 1) >> 1;
void (*BlendPlaneRow)(const uint8* src0, const uint8* src1, void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C; const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
...@@ -674,15 +676,11 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, ...@@ -674,15 +676,11 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
dst_y, dst_stride_y, dst_y, dst_stride_y,
width, height); width, height);
// Half width/height for UV.
width = (width + 1) >> 1;
height = (height + 1) >> 1;
#if defined(HAS_BLENDPLANEROW_SSSE3) #if defined(HAS_BLENDPLANEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
// TODO(fbarchard): Implement any versions for odd width. // TODO(fbarchard): Implement any versions for odd width.
// BlendPlaneRow = BlendPlaneRow_Any_SSSE3; // BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(halfwidth, 8)) {
BlendPlaneRow = BlendPlaneRow_SSSE3; BlendPlaneRow = BlendPlaneRow_SSSE3;
} }
} }
...@@ -690,7 +688,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, ...@@ -690,7 +688,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#if defined(HAS_BLENDPLANEROW_AVX2) #if defined(HAS_BLENDPLANEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
// BlendPlaneRow = BlendPlaneRow_Any_AVX2; // BlendPlaneRow = BlendPlaneRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
BlendPlaneRow = BlendPlaneRow_AVX2; BlendPlaneRow = BlendPlaneRow_AVX2;
} }
} }
...@@ -698,7 +696,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, ...@@ -698,7 +696,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#if defined(HAS_SCALEROWDOWN2_NEON) #if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_NEON; ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_NEON; ScaleRowDown2 = ScaleRowDown2Box_NEON;
} }
} }
...@@ -706,7 +704,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, ...@@ -706,7 +704,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#if defined(HAS_SCALEROWDOWN2_SSE2) #if defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_SSE2; ScaleRowDown2 = ScaleRowDown2Box_Any_SSE2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_SSE2; ScaleRowDown2 = ScaleRowDown2Box_SSE2;
} }
} }
...@@ -714,20 +712,24 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, ...@@ -714,20 +712,24 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#if defined(HAS_SCALEROWDOWN2_AVX2) #if defined(HAS_SCALEROWDOWN2_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2; ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(halfwidth, 32)) {
ScaleRowDown2 = ScaleRowDown2Box_AVX2; ScaleRowDown2 = ScaleRowDown2Box_AVX2;
} }
} }
#endif #endif
// Row buffer for intermediate alpha pixels. // Row buffer for intermediate alpha pixels.
align_buffer_64(halfalpha, width); align_buffer_64(halfalpha, halfwidth);
for (y = 0; y < height; ++y) { for (y = 0; y < height; y += 2) {
// last row of odd height image use 1 row of alpha instead of 2.
if (y == (height - 1)) {
alpha_stride = 0;
}
// Subsample 2 rows of UV to half width and half height. // Subsample 2 rows of UV to half width and half height.
ScaleRowDown2(alpha, alpha_stride, halfalpha, width); ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
alpha += alpha_stride * 2; alpha += alpha_stride * 2;
BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, width); BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, width); BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
src_u0 += src_stride_u0; src_u0 += src_stride_u0;
src_u1 += src_stride_u1; src_u1 += src_stride_u1;
dst_u += dst_stride_u; dst_u += dst_stride_u;
......
...@@ -1164,7 +1164,6 @@ TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { ...@@ -1164,7 +1164,6 @@ TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
} }
#ifdef HAS_BLENDPLANEROW_AVX2 #ifdef HAS_BLENDPLANEROW_AVX2
// TODO(fbarchard): Switch to I420Blend.
static void TestBlendPlaneRow(int width, int height, int benchmark_iterations, static void TestBlendPlaneRow(int width, int height, int benchmark_iterations,
int invert, int off) { int invert, int off) {
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
...@@ -1348,6 +1347,14 @@ TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) { ...@@ -1348,6 +1347,14 @@ TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1); disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
} }
TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
}
TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
}
#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) #define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a))
...@@ -1442,6 +1449,14 @@ TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) { ...@@ -1442,6 +1449,14 @@ TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1); disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
} }
TEST_F(LibYUVPlanarTest, I420Blend_Any) {
TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
}
TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
}
TEST_F(LibYUVPlanarTest, TestAffine) { TEST_F(LibYUVPlanarTest, TestAffine) {
SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment