Commit d83f63a3 authored by fbarchard@google.com's avatar fbarchard@google.com

InterpolateRow used for scale handle unaligned memory. Remove HalfRow which is not used.

BUG=367
TESTED=unittest on I422ToI420
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/28639004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1107 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 0c603fbc
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1105 Version: 1106
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -124,7 +124,6 @@ extern "C" { ...@@ -124,7 +124,6 @@ extern "C" {
#define HAS_COPYROW_ERMS #define HAS_COPYROW_ERMS
#define HAS_COPYROW_SSE2 #define HAS_COPYROW_SSE2
#define HAS_COPYROW_X86 #define HAS_COPYROW_X86
#define HAS_HALFROW_SSE2
#define HAS_I400TOARGBROW_SSE2 #define HAS_I400TOARGBROW_SSE2
#define HAS_I411TOARGBROW_SSSE3 #define HAS_I411TOARGBROW_SSSE3
#define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOARGB1555ROW_SSSE3
...@@ -212,7 +211,6 @@ extern "C" { ...@@ -212,7 +211,6 @@ extern "C" {
#define HAS_ARGBTOUVROW_AVX2 #define HAS_ARGBTOUVROW_AVX2
#define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2 #define HAS_ARGBTOYROW_AVX2
#define HAS_HALFROW_AVX2
#define HAS_I422TOARGBROW_AVX2 #define HAS_I422TOARGBROW_AVX2
#define HAS_INTERPOLATEROW_AVX2 #define HAS_INTERPOLATEROW_AVX2
#define HAS_MERGEUVROW_AVX2 #define HAS_MERGEUVROW_AVX2
...@@ -300,7 +298,6 @@ extern "C" { ...@@ -300,7 +298,6 @@ extern "C" {
#define HAS_UYVYTOUV422ROW_NEON #define HAS_UYVYTOUV422ROW_NEON
#define HAS_YUY2TOUVROW_NEON #define HAS_YUY2TOUVROW_NEON
#define HAS_UYVYTOUVROW_NEON #define HAS_UYVYTOUVROW_NEON
#define HAS_HALFROW_NEON
#define HAS_ARGBTOBAYERROW_NEON #define HAS_ARGBTOBAYERROW_NEON
#define HAS_ARGBTOBAYERGGROW_NEON #define HAS_ARGBTOBAYERGGROW_NEON
#define HAS_ARGBSHUFFLEROW_NEON #define HAS_ARGBSHUFFLEROW_NEON
...@@ -378,7 +375,6 @@ extern "C" { ...@@ -378,7 +375,6 @@ extern "C" {
#define HAS_BGRATOUVROW_NEON #define HAS_BGRATOUVROW_NEON
#define HAS_BGRATOYROW_NEON #define HAS_BGRATOYROW_NEON
#define HAS_COPYROW_NEON #define HAS_COPYROW_NEON
#define HAS_HALFROW_NEON
#define HAS_I400TOARGBROW_NEON #define HAS_I400TOARGBROW_NEON
#define HAS_I411TOARGBROW_NEON #define HAS_I411TOARGBROW_NEON
#define HAS_I422TOABGRROW_NEON #define HAS_I422TOABGRROW_NEON
...@@ -1577,18 +1573,6 @@ void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy, ...@@ -1577,18 +1573,6 @@ void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy, void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
uint8* dst_u, uint8* dst_v, int pix); uint8* dst_u, uint8* dst_v, int pix);
void HalfRow_C(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
uint16* dst_uv, int pix);
void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer, void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix); uint32 selector, int pix);
void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1105 #define LIBYUV_VERSION 1106
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -1885,17 +1885,17 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, ...@@ -1885,17 +1885,17 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
} }
} }
// Blend 2 rows into 1 for conversions such as I422ToI420. // Blend 2 rows into 1.
void HalfRow_C(const uint8* src_uv, int src_uv_stride, static void HalfRow_C(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) { uint8* dst_uv, int pix) {
int x; int x;
for (x = 0; x < pix; ++x) { for (x = 0; x < pix; ++x) {
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
} }
} }
void HalfRow_16_C(const uint16* src_uv, int src_uv_stride, static void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
uint16* dst_uv, int pix) { uint16* dst_uv, int pix) {
int x; int x;
for (x = 0; x < pix; ++x) { for (x = 0; x < pix; ++x) {
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
......
...@@ -1274,30 +1274,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, ...@@ -1274,30 +1274,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
); );
} }
void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
asm volatile (
// change the stride to row 2 pointer
"add %1, %0 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load row 1 16 pixels.
"subs %3, %3, #16 \n" // 16 processed per loop
MEMACCESS(1)
"vld1.8 {q1}, [%1]! \n" // load row 2 16 pixels.
"vrhadd.u8 q0, q1 \n" // average row 1 and 2
MEMACCESS(2)
"vst1.8 {q0}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(src_uv_stride), // %1
"+r"(dst_uv), // %2
"+r"(pix) // %3
:
: "cc", "memory", "q0", "q1" // Clobber List
);
}
// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG // Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) { uint32 selector, int pix) {
......
...@@ -1260,32 +1260,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, ...@@ -1260,32 +1260,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
} }
#endif // HAS_UYVYTOUVROW_NEON #endif // HAS_UYVYTOUVROW_NEON
#ifdef HAS_HALFROW_NEON
void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
const uint8* src_uvb = src_uv + src_uv_stride;
asm volatile (
// change the stride to row 2 pointer
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n" // load row 1 16 pixels.
"subs %3, %3, #16 \n" // 16 processed per loop
MEMACCESS(1)
"ld1 {v1.16b}, [%1], #16 \n" // load row 2 16 pixels.
"urhadd v0.16b, v0.16b, v1.16b \n" // average row 1 and 2
MEMACCESS(2)
"st1 {v0.16b}, [%2], #16 \n"
"b.gt 1b \n"
: "+r"(src_uv), // %0
"+r"(src_uvb), // %1
"+r"(dst_uv), // %2
"+r"(pix) // %3
:
: "cc", "memory", "v0", "v1" // Clobber List
);
}
#endif // HAS_HALFROW_NEON
// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG // Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
#ifdef HAS_ARGBTOBAYERROW_NEON #ifdef HAS_ARGBTOBAYERROW_NEON
void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
......
This diff is collapsed.
...@@ -3674,11 +3674,11 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { ...@@ -3674,11 +3674,11 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
align 4 align 4
convertloop: convertloop:
movdqa xmm0, [eax] movdqu xmm0, [eax]
movdqa xmm1, [eax + 16] movdqu xmm1, [eax + 16]
lea eax, [eax + 32] lea eax, [eax + 32]
movdqa [edx], xmm0 movdqu [edx], xmm0
movdqa [edx + 16], xmm1 movdqu [edx + 16], xmm1
lea edx, [edx + 32] lea edx, [edx + 32]
sub ecx, 32 sub ecx, 32
jg convertloop jg convertloop
...@@ -6540,58 +6540,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -6540,58 +6540,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
} }
#endif // HAS_INTERPOLATEROW_SSE2 #endif // HAS_INTERPOLATEROW_SSE2
__declspec(naked) __declspec(align(16))
void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // src_uv_stride
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
sub edi, eax
align 4
convertloop:
movdqa xmm0, [eax]
pavgb xmm0, [eax + edx]
sub ecx, 16
movdqa [eax + edi], xmm0
lea eax, [eax + 16]
jg convertloop
pop edi
ret
}
}
#ifdef HAS_HALFROW_AVX2
__declspec(naked) __declspec(align(16))
void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // src_uv_stride
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
sub edi, eax
align 4
convertloop:
vmovdqu ymm0, [eax]
vpavgb ymm0, ymm0, [eax + edx]
sub ecx, 32
vmovdqu [eax + edi], ymm0
lea eax, [eax + 32]
jg convertloop
pop edi
vzeroupper
ret
}
}
#endif // HAS_HALFROW_AVX2
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) { uint32 selector, int pix) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment