Commit b8ddb5a2 authored by Frank Barchard's avatar Frank Barchard

rounding for arm filter

R=wangcheng@google.com, harryjin@google.com
BUG=libyuv:607

Review URL: https://codereview.chromium.org/2093913004 .
parent 1b3e4aee
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1600
Version: 1601
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1600
#define LIBYUV_VERSION 1601
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -417,11 +417,9 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
}
// (1-f)a + fb can be replaced with a + f(b-a)
#if defined(__arm__)
// arm uses 16 bit math with truncation.
// TODO(fbarchard): add rounding.
#if defined(__arm__) || defined(__aarch64__)
#define BLENDER(a, b, f) (uint8)((int)(a) + \
(((int)((f)) * ((int)(b) - (int)(a))) >> 16))
((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
#else
// inteluses 7 bit math with rounding.
#define BLENDER(a, b, f) (uint8)((int)(a) + \
......@@ -480,7 +478,7 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
// Same as 8 bit arm blender but return is cast to uint16
#define BLENDER(a, b, f) (uint16)((int)(a) + \
(((int)((f)) * ((int)(b) - (int)(a))) >> 16))
((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx) {
......@@ -818,7 +816,7 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
}
}
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=605.
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
// Mimics SSSE3 blender
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
#define BLENDERC(a, b, f, s) (uint32)( \
......
......@@ -612,8 +612,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
"vmovl.u16 q10, d21 \n"
"vmul.s32 q11, q11, q13 \n"
"vmul.s32 q12, q12, q10 \n"
"vshrn.s32 d18, q11, #16 \n"
"vshrn.s32 d19, q12, #16 \n"
"vrshrn.s32 d18, q11, #16 \n"
"vrshrn.s32 d19, q12, #16 \n"
"vadd.s16 q8, q8, q9 \n"
"vmovn.s16 d6, q8 \n"
......
......@@ -626,8 +626,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
"ushll2 v6.4s, v6.8h, #0 \n"
"mul v16.4s, v16.4s, v7.4s \n"
"mul v17.4s, v17.4s, v6.4s \n"
"shrn v6.4h, v16.4s, #16 \n"
"shrn2 v6.8h, v17.4s, #16 \n"
"rshrn v6.4h, v16.4s, #16 \n"
"rshrn2 v6.8h, v17.4s, #16 \n"
"add v4.8h, v4.8h, v6.8h \n"
"xtn v4.8b, v4.8h \n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment