Commit 42831e0a authored by fbarchard@google.com's avatar fbarchard@google.com

Mirror a plane at a time so each can check cpu/alignment independently

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/370001

git-svn-id: http://libyuv.googlecode.com/svn/trunk@148 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent ba03e4d9
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 146
Version: 147
License: BSD
License File: LICENSE
......
......@@ -278,27 +278,40 @@ int I420Copy(const uint8* src_y, int src_stride_y,
return 0;
}
// Copy ARGB with optional flipping
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
// Mirror a plane of data
void MirrorPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
void (*MirrorRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_NEON;
} else
#endif
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
MirrorRow = MirrorRow_SSSE3;
} else
#endif
#if defined(HAS_MIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_SSE2;
} else
#endif
{
MirrorRow = MirrorRow_C;
}
CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width * 4, height);
return 0;
// Mirror plane
for (int y = 0; y < height; ++y) {
MirrorRow(src_y, dst_y, width);
src_y += src_stride_y;
dst_y += dst_stride_y;
}
}
// Mirror I420 with optional flipping
int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
......@@ -311,13 +324,10 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
width <= 0 || height == 0) {
return -1;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
int halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
......@@ -325,60 +335,35 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
void (*ReverseRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_REVERSE_ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(width, 32)) {
ReverseRow = ReverseRow_NEON;
} else
#endif
#if defined(HAS_REVERSE_ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 32) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
ReverseRow = ReverseRow_SSSE3;
} else
#endif
#if defined(HAS_REVERSE_ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 32) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
ReverseRow = ReverseRow_SSE2;
} else
#endif
{
ReverseRow = ReverseRow_C;
}
// Y Plane
int y;
for (y = 0; y < height; ++y) {
ReverseRow(src_y, dst_y, width);
src_y += src_stride_y;
dst_y += dst_stride_y;
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (dst_y) {
MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
// U Plane
for (y = 0; y < halfheight; ++y) {
ReverseRow(src_u, dst_u, halfwidth);
src_u += src_stride_u;
dst_u += dst_stride_u;
MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0;
}
// Copy ARGB with optional flipping
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// V Plane
for (y = 0; y < halfheight; ++y) {
ReverseRow(src_v, dst_v, halfwidth);
src_v += src_stride_v;
dst_v += dst_stride_v;
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width * 4, height);
return 0;
}
......
......@@ -24,7 +24,7 @@ extern "C" {
!defined(YUV_DISABLE_ASM)
// Note static const preferred, but gives internal compiler error on gcc 4.2
// Shuffle table for reversing the bytes of UV channels.
uvec8 kShuffleReverseUV = {
uvec8 kShuffleMirrorUV = {
14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
};
......@@ -47,7 +47,7 @@ uvec8 kShuffleReverseUV = {
#endif
#endif
typedef void (*reverse_uv_func)(const uint8*, uint8*, uint8*, int);
typedef void (*mirror_uv_func)(const uint8*, uint8*, uint8*, int);
typedef void (*rotate_uv_wx8_func)(const uint8*, int,
uint8*, int,
uint8*, int, int);
......@@ -58,10 +58,10 @@ typedef void (*rotate_wx8_func)(const uint8*, int, uint8*, int, int);
typedef void (*rotate_wxh_func)(const uint8*, int, uint8*, int, int, int);
#ifdef __ARM_NEON__
#define HAS_REVERSE_ROW_NEON
void ReverseRow_NEON(const uint8* src, uint8* dst, int width);
#define HAS_REVERSE_ROW_UV_NEON
void ReverseRowUV_NEON(const uint8* src,
#define HAS_MIRRORROW_NEON
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
#define HAS_MIRRORROW_UV_NEON
void MirrorRowUV_NEON(const uint8* src,
uint8* dst_a, uint8* dst_b,
int width);
#define HAS_TRANSPOSE_WX8_NEON
......@@ -852,37 +852,37 @@ void RotatePlane270(const uint8* src, int src_stride,
void RotatePlane180(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
void (*ReverseRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_REVERSE_ROW_NEON)
void (*MirrorRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ReverseRow = ReverseRow_NEON;
MirrorRow = MirrorRow_NEON;
} else
#endif
#if defined(HAS_REVERSE_ROW_SSSE3)
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ReverseRow = ReverseRow_SSSE3;
MirrorRow = MirrorRow_SSSE3;
} else
#endif
#if defined(HAS_REVERSE_ROW_SSE2)
#if defined(HAS_MIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ReverseRow = ReverseRow_SSE2;
MirrorRow = MirrorRow_SSE2;
} else
#endif
{
ReverseRow = ReverseRow_C;
MirrorRow = MirrorRow_C;
}
// Rotate by 180 is a mirror and vertical flip
src += src_stride * (height - 1);
for (int y = 0; y < height; ++y) {
ReverseRow(src, dst, width);
MirrorRow(src, dst, width);
src -= src_stride;
dst += dst_stride;
}
......@@ -1004,9 +1004,9 @@ void RotateUV270(const uint8* src, int src_stride,
}
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_REVERSE_ROW_UV_SSSE3
#define HAS_MIRRORROW_UV_SSSE3
__declspec(naked)
void ReverseRowUV_SSSE3(const uint8* src,
void MirrorRowUV_SSSE3(const uint8* src,
uint8* dst_a, uint8* dst_b,
int width) {
__asm {
......@@ -1015,7 +1015,7 @@ __asm {
mov edx, [esp + 4 + 8] // dst_a
mov edi, [esp + 4 + 12] // dst_b
mov ecx, [esp + 4 + 16] // width
movdqa xmm5, kShuffleReverseUV
movdqa xmm5, kShuffleMirrorUV
lea eax, [eax + ecx * 2 - 16]
convertloop:
......@@ -1035,8 +1035,8 @@ __asm {
#elif (defined(__i386__) || defined(__x86_64__)) && \
!defined(YUV_DISABLE_ASM)
#define HAS_REVERSE_ROW_UV_SSSE3
void ReverseRowUV_SSSE3(const uint8* src,
#define HAS_MIRRORROW_UV_SSSE3
void MirrorRowUV_SSSE3(const uint8* src,
uint8* dst_a, uint8* dst_b,
int width) {
intptr_t temp_width = static_cast<intptr_t>(width);
......@@ -1057,7 +1057,7 @@ void ReverseRowUV_SSSE3(const uint8* src,
"+r"(dst_a), // %1
"+r"(dst_b), // %2
"+r"(temp_width) // %3
: "m"(kShuffleReverseUV) // %4
: "m"(kShuffleMirrorUV) // %4
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm5"
......@@ -1066,7 +1066,7 @@ void ReverseRowUV_SSSE3(const uint8* src,
}
#endif
static void ReverseRowUV_C(const uint8* src,
static void MirrorRowUV_C(const uint8* src,
uint8* dst_a, uint8* dst_b,
int width) {
int i;
......@@ -1083,29 +1083,29 @@ void RotateUV180(const uint8* src, int src_stride,
uint8* dst_b, int dst_stride_b,
int width, int height) {
int i;
reverse_uv_func ReverseRow;
mirror_uv_func MirrorRow;
#if defined(HAS_REVERSE_ROW_UV_NEON)
#if defined(HAS_MIRRORROW_UV_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ReverseRow = ReverseRowUV_NEON;
MirrorRow = MirrorRowUV_NEON;
} else
#endif
#if defined(HAS_REVERSE_ROW_UV_SSSE3)
#if defined(HAS_MIRRORROW_UV_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
ReverseRow = ReverseRowUV_SSSE3;
MirrorRow = MirrorRowUV_SSSE3;
} else
#endif
{
ReverseRow = ReverseRowUV_C;
MirrorRow = MirrorRowUV_C;
}
dst_a += dst_stride_a * (height - 1);
dst_b += dst_stride_b * (height - 1);
for (i = 0; i < height; ++i) {
ReverseRow(src, dst_a, dst_b, width);
MirrorRow(src, dst_a, dst_b, width);
src += src_stride; // down one line at a time
dst_a -= dst_stride_a; // nominally up one line at a time
......
......@@ -19,7 +19,7 @@ extern "C" {
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
void ReverseRow_NEON(const uint8* src, uint8* dst, int width) {
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
asm volatile (
// compute where to start writing destination
"add %1, %2 \n"
......@@ -38,7 +38,7 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) {
"beq 2f \n"
// back of destination by the size of the register that is
// going to be reversed
// going to be mirrord
"sub %1, #16 \n"
// the loop needs to run on blocks of 16. what will be left
......@@ -50,12 +50,12 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) {
"1: \n"
"vld1.8 {q0}, [%0]! \n" // src += 16
// reverse the bytes in the 64 bit segments. unable to reverse
// mirror the bytes in the 64 bit segments. unable to mirror
// the bytes in the entire 128 bits in one go.
"vrev64.8 q0, q0 \n"
// because of the inability to reverse the entire 128 bits
// reverse the writing out of the two 64 bit segments.
// because of the inability to mirror the entire 128 bits
// mirror the writing out of the two 64 bit segments.
"vst1.8 {d1}, [%1]! \n"
"vst1.8 {d0}, [%1], r3 \n" // dst -= 16
......@@ -272,7 +272,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
);
}
void ReverseRowUV_NEON(const uint8* src,
void MirrorRowUV_NEON(const uint8* src,
uint8* dst_a, uint8* dst_b,
int width) {
asm volatile (
......@@ -291,7 +291,7 @@ void ReverseRowUV_NEON(const uint8* src,
"mov r12, #-8 \n"
// back of destination by the size of the register that is
// going to be reversed
// going to be mirrord
"sub %1, #8 \n"
"sub %2, #8 \n"
......@@ -304,7 +304,7 @@ void ReverseRowUV_NEON(const uint8* src,
"1: \n"
"vld2.8 {d0, d1}, [%0]! \n" // src += 16
// reverse the bytes in the 64 bit segments
// mirror the bytes in the 64 bit segments
"vrev64.8 q0, q0 \n"
"vst1.8 {d0}, [%1], r12 \n" // dst_a -= 8
......
......@@ -39,8 +39,8 @@
#define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3
#define HAS_FASTCONVERTYUVTOABGRROW_SSSE3
#define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3
#define HAS_REVERSE_ROW_SSSE3
#define HAS_REVERSE_ROW_SSE2
#define HAS_MIRRORROW_SSSE3
#define HAS_MIRRORROW_SSE2
#endif
// The following are available on Windows platforms
......@@ -58,7 +58,7 @@
// The following are available on Neon platforms
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_REVERSE_ROW_NEON
#define HAS_MIRRORROW_NEON
#define HAS_FASTCONVERTYUVTOARGBROW_NEON
#define HAS_FASTCONVERTYUVTOBGRAROW_NEON
#define HAS_FASTCONVERTYUVTOABGRROW_NEON
......@@ -107,10 +107,10 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width);
void ReverseRow_SSE2(const uint8* src, uint8* dst, int width);
void ReverseRow_NEON(const uint8* src, uint8* dst, int width);
void ReverseRow_C(const uint8* src, uint8* dst, int width);
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
void MirrorRow_C(const uint8* src, uint8* dst, int width);
void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
......
......@@ -360,7 +360,7 @@ void FastConvertYToARGBRow_C(const uint8* y_buf,
}
}
void ReverseRow_C(const uint8* src, uint8* dst, int width) {
void MirrorRow_C(const uint8* src, uint8* dst, int width) {
src += width - 1;
for (int i = 0; i < width; ++i) {
dst[i] = src[0];
......
......@@ -644,14 +644,14 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
}
#endif
#ifdef HAS_REVERSE_ROW_SSSE3
#ifdef HAS_MIRRORROW_SSSE3
// Shuffle table for reversing the bytes.
CONST uvec8 kShuffleReverse = {
CONST uvec8 kShuffleMirror = {
15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
};
void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = static_cast<intptr_t>(width);
asm volatile (
"movdqa %3,%%xmm5 \n"
......@@ -666,7 +666,7 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(temp_width) // %2
: "m"(kShuffleReverse) // %3
: "m"(kShuffleMirror) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm5"
......@@ -675,15 +675,15 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
}
#endif
#ifdef HAS_REVERSE_ROW_SSE2
#ifdef HAS_MIRRORROW_SSE2
void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = static_cast<intptr_t>(width);
asm volatile (
"lea -0x10(%0),%0 \n"
"1: \n"
"movdqa (%0,%2),%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqu (%0,%2),%%xmm0 \n"
"movdqu %%xmm0,%%xmm1 \n"
"psllw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n"
"por %%xmm1,%%xmm0 \n"
......@@ -691,7 +691,7 @@ void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
"pshufhw $0x1b,%%xmm0,%%xmm0 \n"
"pshufd $0x4e,%%xmm0,%%xmm0 \n"
"sub $0x10,%2 \n"
"movdqa %%xmm0,(%1) \n"
"movdqu %%xmm0,(%1) \n"
"lea 0x10(%1),%1 \n"
"ja 1b \n"
: "+r"(src), // %0
......
......@@ -1169,20 +1169,20 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
#endif
#endif
#ifdef HAS_REVERSE_ROW_SSSE3
#ifdef HAS_MIRRORROW_SSSE3
// Shuffle table for reversing the bytes.
static const uvec8 kShuffleReverse = {
static const uvec8 kShuffleMirror = {
15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
};
__declspec(naked)
void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width
movdqa xmm5, kShuffleReverse
movdqa xmm5, kShuffleMirror
lea eax, [eax - 16]
convertloop:
movdqa xmm0, [eax + ecx]
......@@ -1196,18 +1196,20 @@ __asm {
}
#endif
#ifdef HAS_REVERSE_ROW_SSE2
#ifdef HAS_MIRRORROW_SSE2
// SSE2 version has movdqu so it can be used on misaligned buffers when SSSE3
// version can not.
__declspec(naked)
void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width
lea eax, [eax - 16]
convertloop:
movdqa xmm0, [eax + ecx]
movdqa xmm1, xmm0 // swap bytes
movdqu xmm0, [eax + ecx]
movdqu xmm1, xmm0 // swap bytes
psllw xmm0, 8
psrlw xmm1, 8
por xmm0, xmm1
......@@ -1215,7 +1217,7 @@ __asm {
pshufhw xmm0, xmm0, 0x1b
pshufd xmm0, xmm0, 0x4e // swap qwords
sub ecx, 16
movdqa [edx], xmm0
movdqu [edx], xmm0
lea edx, [edx + 16]
ja convertloop
ret
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment