Commit a2ea9056 authored by Frank Barchard's avatar Frank Barchard

BlendPlane any width.

Benchmark
out\release\libyuv_unittest --libyuv_width=1279 --libyuv_height=719 --libyuv_repeat=999 --libyuv_flags=-1 --gtest_filter=*Blend* | sortms

Was
I420Blend_Any (2321 ms)
I420Blend_Unaligned (1684 ms)
I420Blend_Opt (1675 ms)
I420Blend_Invert (1653 ms)
BlendPlane_Invert (1556 ms)
BlendPlane_Any (1552 ms)
BlendPlane_Unaligned (1548 ms)
BlendPlane_Opt (1535 ms)
ARGBBlend_Unaligned (659 ms)
ARGBBlend_Any (596 ms)
ARGBBlend_Invert (591 ms)
ARGBBlend_Opt (508 ms)
BlendPlaneRow_Unaligned (186 ms)
BlendPlaneRow_Opt (171 ms)

Now
ARGBBlend_Any (621 ms)
ARGBBlend_Unaligned (585 ms)
ARGBBlend_Invert (564 ms)
ARGBBlend_Opt (512 ms)
I420Blend_Unaligned (347 ms)
I420Blend_Invert (345 ms)
I420Blend_Any (337 ms)
I420Blend_Opt (327 ms)
BlendPlane_Unaligned (187 ms)
BlendPlaneRow_Unaligned (187 ms)
BlendPlane_Invert (186 ms)
BlendPlane_Any (186 ms)
BlendPlaneRow_Opt (173 ms)
BlendPlane_Opt (171 ms)

which is comparable to aligned case
out\release\libyuv_unittest --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=-1 --gtest_filter=*Blend* | sortms
ARGBBlend_Any (625 ms)
ARGBBlend_Unaligned (602 ms)
ARGBBlend_Invert (508 ms)
ARGBBlend_Opt (506 ms)
I420Blend_Any (353 ms)
I420Blend_Unaligned (322 ms)
I420Blend_Invert (304 ms)
I420Blend_Opt (301 ms)
BlendPlaneRow_Unaligned (188 ms)
BlendPlane_Unaligned (186 ms)
BlendPlane_Invert (185 ms)
BlendPlane_Any (184 ms)
BlendPlaneRow_Opt (173 ms)
BlendPlane_Opt (169 ms)

R=dhrosa@google.com, harryjin@google.com
BUG=libyuv:527

Review URL: https://codereview.chromium.org/1513443002 .
parent dee77a4e
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1550
Version: 1551
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1550
#define LIBYUV_VERSION 1551
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -610,8 +610,7 @@ int BlendPlane(const uint8* src_y0, int src_stride_y0,
#if defined(HAS_BLENDPLANEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
// TODO(fbarchard): Implement any versions for odd width.
// BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
BlendPlaneRow = BlendPlaneRow_SSSE3;
}
......@@ -619,7 +618,7 @@ int BlendPlane(const uint8* src_y0, int src_stride_y0,
#endif
#if defined(HAS_BLENDPLANEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
// BlendPlaneRow = BlendPlaneRow_Any_AVX2;
BlendPlaneRow = BlendPlaneRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
BlendPlaneRow = BlendPlaneRow_AVX2;
}
......@@ -678,8 +677,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#if defined(HAS_BLENDPLANEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
// TODO(fbarchard): Implement any versions for odd width.
// BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
if (IS_ALIGNED(halfwidth, 8)) {
BlendPlaneRow = BlendPlaneRow_SSSE3;
}
......@@ -687,7 +685,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#endif
#if defined(HAS_BLENDPLANEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
// BlendPlaneRow = BlendPlaneRow_Any_AVX2;
BlendPlaneRow = BlendPlaneRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
BlendPlaneRow = BlendPlaneRow_AVX2;
}
......
......@@ -83,6 +83,12 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
#ifdef HAS_I422TOUYVYROW_NEON
ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
#endif
#ifdef HAS_BLENDPLANEROW_AVX2
ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
#endif
#ifdef HAS_BLENDPLANEROW_SSSE3
ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
#endif
#undef ANY31
// Note that odd width replication includes 444 due to implementation
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment