Commit f658aebb authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBToBayer ported to Neon and attempt V210 valgrind fix.

BUG=122
TEST=UNITTEST bayer
Review URL: https://webrtc-codereview.appspot.com/873011

git-svn-id: http://libyuv.googlecode.com/svn/trunk@405 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent f09570d5
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 404
Version: 405
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 404
#define LIBYUV_VERSION 405
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -229,9 +229,9 @@ int I400Copy(const uint8* src_y, int src_stride_y,
// U0Y0V0Y1
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_I42XTOYUY2ROW_SSE2
#define HAS_I422TOYUY2ROW_SSE2
__declspec(naked) __declspec(align(16))
static void I42xToYUY2Row_SSE2(const uint8* src_y,
static void I422ToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
......@@ -268,9 +268,9 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
}
}
#define HAS_I42XTOUYVYROW_SSE2
#define HAS_I422TOUYVYROW_SSE2
__declspec(naked) __declspec(align(16))
static void I42xToUYVYRow_SSE2(const uint8* src_y,
static void I422ToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
......@@ -307,8 +307,8 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y,
}
}
#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__))
#define HAS_I42XTOYUY2ROW_SSE2
static void I42xToYUY2Row_SSE2(const uint8* src_y,
#define HAS_I422TOYUY2ROW_SSE2
static void I422ToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
......@@ -343,8 +343,8 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
);
}
#define HAS_I42XTOUYVYROW_SSE2
static void I42xToUYVYRow_SSE2(const uint8* src_y,
#define HAS_I422TOUYVYROW_SSE2
static void I422ToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
......@@ -380,7 +380,7 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y,
}
#endif
static void I42xToYUY2Row_C(const uint8* src_y,
static void I422ToYUY2Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
......@@ -402,7 +402,7 @@ static void I42xToYUY2Row_C(const uint8* src_y,
}
}
static void I42xToUYVYRow_C(const uint8* src_y,
static void I422ToUYVYRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
......@@ -480,20 +480,20 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width) =
I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2)
I422ToYUY2Row_C;
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2;
I422ToYUY2Row = I422ToYUY2Row_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
I42xToYUY2Row(src_y, src_u, src_y, dst_frame, width);
I422ToYUY2Row(src_y, src_u, src_y, dst_frame, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
......@@ -518,21 +518,21 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width) =
I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2)
I422ToYUY2Row_C;
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2;
I422ToYUY2Row = I422ToYUY2Row_SSE2;
}
#endif
for (int y = 0; y < height - 1; y += 2) {
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width);
I42xToYUY2Row(src_y + src_stride_y, src_u, src_v,
I422ToYUY2Row(src_y, src_u, src_v, dst_frame, width);
I422ToYUY2Row(src_y + src_stride_y, src_u, src_v,
dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
......@@ -540,7 +540,7 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
dst_frame += dst_stride_frame * 2;
}
if (height & 1) {
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width);
I422ToYUY2Row(src_y, src_u, src_v, dst_frame, width);
}
return 0;
}
......@@ -562,20 +562,20 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width) =
I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
I422ToUYVYRow_C;
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
I42xToUYVYRow(src_y, src_u, src_y, dst_frame, width);
I422ToUYVYRow(src_y, src_u, src_y, dst_frame, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
......@@ -600,21 +600,21 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width) =
I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
I422ToUYVYRow_C;
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height - 1; y += 2) {
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v,
I422ToUYVYRow(src_y, src_u, src_v, dst_frame, width);
I422ToUYVYRow(src_y + src_stride_y, src_u, src_v,
dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
......@@ -622,7 +622,7 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
dst_frame += dst_stride_frame * 2;
}
if (height & 1) {
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
I422ToUYVYRow(src_y, src_u, src_v, dst_frame, width);
}
return 0;
}
......@@ -633,9 +633,8 @@ int I420ToV210(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (width * 16 / 6 > kMaxStride) { // Row buffer of V210 is required.
return -1;
} else if (!src_y || !src_u || !src_v || !dst_frame ||
if (width * 16 / 6 > kMaxStride ||
!src_y || !src_u || !src_v || !dst_frame ||
width <= 0 || height == 0) {
return -1;
}
......@@ -647,25 +646,22 @@ int I420ToV210(const uint8* src_y, int src_stride_y,
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*UYVYToV210Row)(const uint8* src_uyvy, uint8* dst_v210, int pix);
UYVYToV210Row = UYVYToV210Row_C;
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width) =
I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
I422ToUYVYRow_C;
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height - 1; y += 2) {
I42xToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame, width);
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame + dst_stride_frame, width);
I422ToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row_C(row, dst_frame, width);
I422ToUYVYRow(src_y + src_stride_y, src_u, src_v, row, width);
UYVYToV210Row_C(row, dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
......@@ -673,8 +669,8 @@ int I420ToV210(const uint8* src_y, int src_stride_y,
dst_frame += dst_stride_frame * 2;
}
if (height & 1) {
I42xToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame, width);
I422ToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row_C(row, dst_frame, width);
}
return 0;
}
......
......@@ -826,7 +826,7 @@ void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
asm volatile (
// change the stride to row 2 pointer
"add %1, %0 \n"
"1: \n"
"1: \n"
"vld1.u8 {q0}, [%0]! \n" // load row 1 16 pixels.
"subs %3, %3, #16 \n" // 16 processed per loop
"vld1.u8 {q1}, [%1]! \n" // load row 2 16 pixels.
......@@ -843,21 +843,23 @@ void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
}
// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
// TODO(fbarchard): Neon port.
void ARGBToBayerRow_NEON(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix) {
int index0 = selector & 0xff;
int index1 = (selector >> 8) & 0xff;
// Copy a row of Bayer.
for (int x = 0; x < pix - 1; x += 2) {
dst_bayer[0] = src_argb[index0];
dst_bayer[1] = src_argb[index1];
src_argb += 8;
dst_bayer += 2;
}
if (pix & 1) {
dst_bayer[0] = src_argb[index0];
}
asm volatile (
"vmov.u32 d2[0], %2 \n" // selector
"1: \n"
"vld1.u8 {q0}, [%0]! \n" // load row 4 pixels.
"subs %3, %3, #4 \n" // 4 processed per loop
"vtbl.8 d3, {d0, d1}, d2 \n" // look up 4 pixels
"vst1.u32 {d3[0]}, [%1]! \n" // store 4.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_bayer), // %1
"+r"(selector), // %2
"+r"(pix) // %3
:
: "memory", "cc", "q0", "q1" // Clobber List
);
}
#endif // __ARM_NEON__
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment