Commit 42831e0a authored by fbarchard@google.com's avatar fbarchard@google.com

Mirror a plane at a time so each can check cpu/alignment independently

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/370001

git-svn-id: http://libyuv.googlecode.com/svn/trunk@148 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent ba03e4d9
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 146 Version: 147
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -278,27 +278,40 @@ int I420Copy(const uint8* src_y, int src_stride_y, ...@@ -278,27 +278,40 @@ int I420Copy(const uint8* src_y, int src_stride_y,
return 0; return 0;
} }
// Copy ARGB with optional flipping // Mirror a plane of data
int ARGBCopy(const uint8* src_argb, int src_stride_argb, void MirrorPlane(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb, uint8* dst_y, int dst_stride_y,
int width, int height) { int width, int height) {
if (!src_argb || void (*MirrorRow)(const uint8* src, uint8* dst, int width);
!dst_argb || #if defined(HAS_MIRRORROW_NEON)
width <= 0 || height == 0) { if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
return -1; MirrorRow = MirrorRow_NEON;
} } else
// Negative height means invert the image. #endif
if (height < 0) { #if defined(HAS_MIRRORROW_SSSE3)
height = -height; if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
src_argb = src_argb + (height - 1) * src_stride_argb; IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
src_stride_argb = -src_stride_argb; MirrorRow = MirrorRow_SSSE3;
} else
#endif
#if defined(HAS_MIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_SSE2;
} else
#endif
{
MirrorRow = MirrorRow_C;
} }
CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb, // Mirror plane
width * 4, height); for (int y = 0; y < height; ++y) {
return 0; MirrorRow(src_y, dst_y, width);
src_y += src_stride_y;
dst_y += dst_stride_y;
}
} }
// Mirror I420 with optional flipping
int I420Mirror(const uint8* src_y, int src_stride_y, int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
...@@ -311,13 +324,10 @@ int I420Mirror(const uint8* src_y, int src_stride_y, ...@@ -311,13 +324,10 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
width <= 0 || height == 0) { width <= 0 || height == 0) {
return -1; return -1;
} }
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
halfheight = (height + 1) >> 1; int halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y; src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u; src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v; src_v = src_v + (halfheight - 1) * src_stride_v;
...@@ -325,60 +335,35 @@ int I420Mirror(const uint8* src_y, int src_stride_y, ...@@ -325,60 +335,35 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
src_stride_u = -src_stride_u; src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v; src_stride_v = -src_stride_v;
} }
void (*ReverseRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_REVERSE_ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(width, 32)) {
ReverseRow = ReverseRow_NEON;
} else
#endif
#if defined(HAS_REVERSE_ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 32) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
ReverseRow = ReverseRow_SSSE3;
} else
#endif
#if defined(HAS_REVERSE_ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 32) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
ReverseRow = ReverseRow_SSE2;
} else
#endif
{
ReverseRow = ReverseRow_C;
}
// Y Plane int halfwidth = (width + 1) >> 1;
int y; int halfheight = (height + 1) >> 1;
for (y = 0; y < height; ++y) { if (dst_y) {
ReverseRow(src_y, dst_y, width); MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
src_y += src_stride_y;
dst_y += dst_stride_y;
} }
// U Plane MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
for (y = 0; y < halfheight; ++y) { MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
ReverseRow(src_u, dst_u, halfwidth); return 0;
src_u += src_stride_u; }
dst_u += dst_stride_u;
// Copy ARGB with optional flipping
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
} }
// V Plane // Negative height means invert the image.
for (y = 0; y < halfheight; ++y) { if (height < 0) {
ReverseRow(src_v, dst_v, halfwidth); height = -height;
src_v += src_stride_v; src_argb = src_argb + (height - 1) * src_stride_argb;
dst_v += dst_stride_v; src_stride_argb = -src_stride_argb;
} }
CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width * 4, height);
return 0; return 0;
} }
......
...@@ -24,7 +24,7 @@ extern "C" { ...@@ -24,7 +24,7 @@ extern "C" {
!defined(YUV_DISABLE_ASM) !defined(YUV_DISABLE_ASM)
// Note static const preferred, but gives internal compiler error on gcc 4.2 // Note static const preferred, but gives internal compiler error on gcc 4.2
// Shuffle table for reversing the bytes of UV channels. // Shuffle table for reversing the bytes of UV channels.
uvec8 kShuffleReverseUV = { uvec8 kShuffleMirrorUV = {
14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
}; };
...@@ -47,7 +47,7 @@ uvec8 kShuffleReverseUV = { ...@@ -47,7 +47,7 @@ uvec8 kShuffleReverseUV = {
#endif #endif
#endif #endif
typedef void (*reverse_uv_func)(const uint8*, uint8*, uint8*, int); typedef void (*mirror_uv_func)(const uint8*, uint8*, uint8*, int);
typedef void (*rotate_uv_wx8_func)(const uint8*, int, typedef void (*rotate_uv_wx8_func)(const uint8*, int,
uint8*, int, uint8*, int,
uint8*, int, int); uint8*, int, int);
...@@ -58,10 +58,10 @@ typedef void (*rotate_wx8_func)(const uint8*, int, uint8*, int, int); ...@@ -58,10 +58,10 @@ typedef void (*rotate_wx8_func)(const uint8*, int, uint8*, int, int);
typedef void (*rotate_wxh_func)(const uint8*, int, uint8*, int, int, int); typedef void (*rotate_wxh_func)(const uint8*, int, uint8*, int, int, int);
#ifdef __ARM_NEON__ #ifdef __ARM_NEON__
#define HAS_REVERSE_ROW_NEON #define HAS_MIRRORROW_NEON
void ReverseRow_NEON(const uint8* src, uint8* dst, int width); void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
#define HAS_REVERSE_ROW_UV_NEON #define HAS_MIRRORROW_UV_NEON
void ReverseRowUV_NEON(const uint8* src, void MirrorRowUV_NEON(const uint8* src,
uint8* dst_a, uint8* dst_b, uint8* dst_a, uint8* dst_b,
int width); int width);
#define HAS_TRANSPOSE_WX8_NEON #define HAS_TRANSPOSE_WX8_NEON
...@@ -852,37 +852,37 @@ void RotatePlane270(const uint8* src, int src_stride, ...@@ -852,37 +852,37 @@ void RotatePlane270(const uint8* src, int src_stride,
void RotatePlane180(const uint8* src, int src_stride, void RotatePlane180(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height) { int width, int height) {
void (*ReverseRow)(const uint8* src, uint8* dst, int width); void (*MirrorRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_REVERSE_ROW_NEON) #if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ReverseRow = ReverseRow_NEON; MirrorRow = MirrorRow_NEON;
} else } else
#endif #endif
#if defined(HAS_REVERSE_ROW_SSSE3) #if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ReverseRow = ReverseRow_SSSE3; MirrorRow = MirrorRow_SSSE3;
} else } else
#endif #endif
#if defined(HAS_REVERSE_ROW_SSE2) #if defined(HAS_MIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ReverseRow = ReverseRow_SSE2; MirrorRow = MirrorRow_SSE2;
} else } else
#endif #endif
{ {
ReverseRow = ReverseRow_C; MirrorRow = MirrorRow_C;
} }
// Rotate by 180 is a mirror and vertical flip // Rotate by 180 is a mirror and vertical flip
src += src_stride * (height - 1); src += src_stride * (height - 1);
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
ReverseRow(src, dst, width); MirrorRow(src, dst, width);
src -= src_stride; src -= src_stride;
dst += dst_stride; dst += dst_stride;
} }
...@@ -1004,9 +1004,9 @@ void RotateUV270(const uint8* src, int src_stride, ...@@ -1004,9 +1004,9 @@ void RotateUV270(const uint8* src, int src_stride,
} }
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_REVERSE_ROW_UV_SSSE3 #define HAS_MIRRORROW_UV_SSSE3
__declspec(naked) __declspec(naked)
void ReverseRowUV_SSSE3(const uint8* src, void MirrorRowUV_SSSE3(const uint8* src,
uint8* dst_a, uint8* dst_b, uint8* dst_a, uint8* dst_b,
int width) { int width) {
__asm { __asm {
...@@ -1015,7 +1015,7 @@ __asm { ...@@ -1015,7 +1015,7 @@ __asm {
mov edx, [esp + 4 + 8] // dst_a mov edx, [esp + 4 + 8] // dst_a
mov edi, [esp + 4 + 12] // dst_b mov edi, [esp + 4 + 12] // dst_b
mov ecx, [esp + 4 + 16] // width mov ecx, [esp + 4 + 16] // width
movdqa xmm5, kShuffleReverseUV movdqa xmm5, kShuffleMirrorUV
lea eax, [eax + ecx * 2 - 16] lea eax, [eax + ecx * 2 - 16]
convertloop: convertloop:
...@@ -1035,8 +1035,8 @@ __asm { ...@@ -1035,8 +1035,8 @@ __asm {
#elif (defined(__i386__) || defined(__x86_64__)) && \ #elif (defined(__i386__) || defined(__x86_64__)) && \
!defined(YUV_DISABLE_ASM) !defined(YUV_DISABLE_ASM)
#define HAS_REVERSE_ROW_UV_SSSE3 #define HAS_MIRRORROW_UV_SSSE3
void ReverseRowUV_SSSE3(const uint8* src, void MirrorRowUV_SSSE3(const uint8* src,
uint8* dst_a, uint8* dst_b, uint8* dst_a, uint8* dst_b,
int width) { int width) {
intptr_t temp_width = static_cast<intptr_t>(width); intptr_t temp_width = static_cast<intptr_t>(width);
...@@ -1057,7 +1057,7 @@ void ReverseRowUV_SSSE3(const uint8* src, ...@@ -1057,7 +1057,7 @@ void ReverseRowUV_SSSE3(const uint8* src,
"+r"(dst_a), // %1 "+r"(dst_a), // %1
"+r"(dst_b), // %2 "+r"(dst_b), // %2
"+r"(temp_width) // %3 "+r"(temp_width) // %3
: "m"(kShuffleReverseUV) // %4 : "m"(kShuffleMirrorUV) // %4
: "memory", "cc" : "memory", "cc"
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm5" , "xmm0", "xmm5"
...@@ -1066,7 +1066,7 @@ void ReverseRowUV_SSSE3(const uint8* src, ...@@ -1066,7 +1066,7 @@ void ReverseRowUV_SSSE3(const uint8* src,
} }
#endif #endif
static void ReverseRowUV_C(const uint8* src, static void MirrorRowUV_C(const uint8* src,
uint8* dst_a, uint8* dst_b, uint8* dst_a, uint8* dst_b,
int width) { int width) {
int i; int i;
...@@ -1083,29 +1083,29 @@ void RotateUV180(const uint8* src, int src_stride, ...@@ -1083,29 +1083,29 @@ void RotateUV180(const uint8* src, int src_stride,
uint8* dst_b, int dst_stride_b, uint8* dst_b, int dst_stride_b,
int width, int height) { int width, int height) {
int i; int i;
reverse_uv_func ReverseRow; mirror_uv_func MirrorRow;
#if defined(HAS_REVERSE_ROW_UV_NEON) #if defined(HAS_MIRRORROW_UV_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ReverseRow = ReverseRowUV_NEON; MirrorRow = MirrorRowUV_NEON;
} else } else
#endif #endif
#if defined(HAS_REVERSE_ROW_UV_SSSE3) #if defined(HAS_MIRRORROW_UV_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
ReverseRow = ReverseRowUV_SSSE3; MirrorRow = MirrorRowUV_SSSE3;
} else } else
#endif #endif
{ {
ReverseRow = ReverseRowUV_C; MirrorRow = MirrorRowUV_C;
} }
dst_a += dst_stride_a * (height - 1); dst_a += dst_stride_a * (height - 1);
dst_b += dst_stride_b * (height - 1); dst_b += dst_stride_b * (height - 1);
for (i = 0; i < height; ++i) { for (i = 0; i < height; ++i) {
ReverseRow(src, dst_a, dst_b, width); MirrorRow(src, dst_a, dst_b, width);
src += src_stride; // down one line at a time src += src_stride; // down one line at a time
dst_a -= dst_stride_a; // nominally up one line at a time dst_a -= dst_stride_a; // nominally up one line at a time
......
...@@ -19,7 +19,7 @@ extern "C" { ...@@ -19,7 +19,7 @@ extern "C" {
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
void ReverseRow_NEON(const uint8* src, uint8* dst, int width) { void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
asm volatile ( asm volatile (
// compute where to start writing destination // compute where to start writing destination
"add %1, %2 \n" "add %1, %2 \n"
...@@ -38,7 +38,7 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) { ...@@ -38,7 +38,7 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) {
"beq 2f \n" "beq 2f \n"
// back of destination by the size of the register that is // back of destination by the size of the register that is
// going to be reversed // going to be mirrord
"sub %1, #16 \n" "sub %1, #16 \n"
// the loop needs to run on blocks of 16. what will be left // the loop needs to run on blocks of 16. what will be left
...@@ -50,12 +50,12 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) { ...@@ -50,12 +50,12 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) {
"1: \n" "1: \n"
"vld1.8 {q0}, [%0]! \n" // src += 16 "vld1.8 {q0}, [%0]! \n" // src += 16
// reverse the bytes in the 64 bit segments. unable to reverse // mirror the bytes in the 64 bit segments. unable to mirror
// the bytes in the entire 128 bits in one go. // the bytes in the entire 128 bits in one go.
"vrev64.8 q0, q0 \n" "vrev64.8 q0, q0 \n"
// because of the inability to reverse the entire 128 bits // because of the inability to mirror the entire 128 bits
// reverse the writing out of the two 64 bit segments. // mirror the writing out of the two 64 bit segments.
"vst1.8 {d1}, [%1]! \n" "vst1.8 {d1}, [%1]! \n"
"vst1.8 {d0}, [%1], r3 \n" // dst -= 16 "vst1.8 {d0}, [%1], r3 \n" // dst -= 16
...@@ -272,7 +272,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride, ...@@ -272,7 +272,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
); );
} }
void ReverseRowUV_NEON(const uint8* src, void MirrorRowUV_NEON(const uint8* src,
uint8* dst_a, uint8* dst_b, uint8* dst_a, uint8* dst_b,
int width) { int width) {
asm volatile ( asm volatile (
...@@ -291,7 +291,7 @@ void ReverseRowUV_NEON(const uint8* src, ...@@ -291,7 +291,7 @@ void ReverseRowUV_NEON(const uint8* src,
"mov r12, #-8 \n" "mov r12, #-8 \n"
// back of destination by the size of the register that is // back of destination by the size of the register that is
// going to be reversed // going to be mirrord
"sub %1, #8 \n" "sub %1, #8 \n"
"sub %2, #8 \n" "sub %2, #8 \n"
...@@ -304,7 +304,7 @@ void ReverseRowUV_NEON(const uint8* src, ...@@ -304,7 +304,7 @@ void ReverseRowUV_NEON(const uint8* src,
"1: \n" "1: \n"
"vld2.8 {d0, d1}, [%0]! \n" // src += 16 "vld2.8 {d0, d1}, [%0]! \n" // src += 16
// reverse the bytes in the 64 bit segments // mirror the bytes in the 64 bit segments
"vrev64.8 q0, q0 \n" "vrev64.8 q0, q0 \n"
"vst1.8 {d0}, [%1], r12 \n" // dst_a -= 8 "vst1.8 {d0}, [%1], r12 \n" // dst_a -= 8
......
...@@ -39,8 +39,8 @@ ...@@ -39,8 +39,8 @@
#define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3 #define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3
#define HAS_FASTCONVERTYUVTOABGRROW_SSSE3 #define HAS_FASTCONVERTYUVTOABGRROW_SSSE3
#define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3 #define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3
#define HAS_REVERSE_ROW_SSSE3 #define HAS_MIRRORROW_SSSE3
#define HAS_REVERSE_ROW_SSE2 #define HAS_MIRRORROW_SSE2
#endif #endif
// The following are available on Windows platforms // The following are available on Windows platforms
...@@ -58,7 +58,7 @@ ...@@ -58,7 +58,7 @@
// The following are available on Neon platforms // The following are available on Neon platforms
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_REVERSE_ROW_NEON #define HAS_MIRRORROW_NEON
#define HAS_FASTCONVERTYUVTOARGBROW_NEON #define HAS_FASTCONVERTYUVTOARGBROW_NEON
#define HAS_FASTCONVERTYUVTOBGRAROW_NEON #define HAS_FASTCONVERTYUVTOBGRAROW_NEON
#define HAS_FASTCONVERTYUVTOABGRROW_NEON #define HAS_FASTCONVERTYUVTOABGRROW_NEON
...@@ -107,10 +107,10 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -107,10 +107,10 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width); void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
void ReverseRow_SSE2(const uint8* src, uint8* dst, int width); void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
void ReverseRow_NEON(const uint8* src, uint8* dst, int width); void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
void ReverseRow_C(const uint8* src, uint8* dst, int width); void MirrorRow_C(const uint8* src, uint8* dst, int width);
void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
......
...@@ -360,7 +360,7 @@ void FastConvertYToARGBRow_C(const uint8* y_buf, ...@@ -360,7 +360,7 @@ void FastConvertYToARGBRow_C(const uint8* y_buf,
} }
} }
void ReverseRow_C(const uint8* src, uint8* dst, int width) { void MirrorRow_C(const uint8* src, uint8* dst, int width) {
src += width - 1; src += width - 1;
for (int i = 0; i < width; ++i) { for (int i = 0; i < width; ++i) {
dst[i] = src[0]; dst[i] = src[0];
......
...@@ -644,14 +644,14 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, ...@@ -644,14 +644,14 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#ifdef HAS_REVERSE_ROW_SSSE3 #ifdef HAS_MIRRORROW_SSSE3
// Shuffle table for reversing the bytes. // Shuffle table for reversing the bytes.
CONST uvec8 kShuffleReverse = { CONST uvec8 kShuffleMirror = {
15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
}; };
void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) { void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = static_cast<intptr_t>(width); intptr_t temp_width = static_cast<intptr_t>(width);
asm volatile ( asm volatile (
"movdqa %3,%%xmm5 \n" "movdqa %3,%%xmm5 \n"
...@@ -666,7 +666,7 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) { ...@@ -666,7 +666,7 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(temp_width) // %2 "+r"(temp_width) // %2
: "m"(kShuffleReverse) // %3 : "m"(kShuffleMirror) // %3
: "memory", "cc" : "memory", "cc"
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm5" , "xmm0", "xmm5"
...@@ -675,15 +675,15 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) { ...@@ -675,15 +675,15 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
} }
#endif #endif
#ifdef HAS_REVERSE_ROW_SSE2 #ifdef HAS_MIRRORROW_SSE2
void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) { void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = static_cast<intptr_t>(width); intptr_t temp_width = static_cast<intptr_t>(width);
asm volatile ( asm volatile (
"lea -0x10(%0),%0 \n" "lea -0x10(%0),%0 \n"
"1: \n" "1: \n"
"movdqa (%0,%2),%%xmm0 \n" "movdqu (%0,%2),%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n" "movdqu %%xmm0,%%xmm1 \n"
"psllw $0x8,%%xmm0 \n" "psllw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"por %%xmm1,%%xmm0 \n" "por %%xmm1,%%xmm0 \n"
...@@ -691,7 +691,7 @@ void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) { ...@@ -691,7 +691,7 @@ void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
"pshufhw $0x1b,%%xmm0,%%xmm0 \n" "pshufhw $0x1b,%%xmm0,%%xmm0 \n"
"pshufd $0x4e,%%xmm0,%%xmm0 \n" "pshufd $0x4e,%%xmm0,%%xmm0 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
"movdqa %%xmm0,(%1) \n" "movdqu %%xmm0,(%1) \n"
"lea 0x10(%1),%1 \n" "lea 0x10(%1),%1 \n"
"ja 1b \n" "ja 1b \n"
: "+r"(src), // %0 : "+r"(src), // %0
......
...@@ -1169,20 +1169,20 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, ...@@ -1169,20 +1169,20 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
#endif #endif
#endif #endif
#ifdef HAS_REVERSE_ROW_SSSE3 #ifdef HAS_MIRRORROW_SSSE3
// Shuffle table for reversing the bytes. // Shuffle table for reversing the bytes.
static const uvec8 kShuffleReverse = { static const uvec8 kShuffleMirror = {
15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
}; };
__declspec(naked) __declspec(naked)
void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) { void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
__asm { __asm {
mov eax, [esp + 4] // src mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width mov ecx, [esp + 12] // width
movdqa xmm5, kShuffleReverse movdqa xmm5, kShuffleMirror
lea eax, [eax - 16] lea eax, [eax - 16]
convertloop: convertloop:
movdqa xmm0, [eax + ecx] movdqa xmm0, [eax + ecx]
...@@ -1196,18 +1196,20 @@ __asm { ...@@ -1196,18 +1196,20 @@ __asm {
} }
#endif #endif
#ifdef HAS_REVERSE_ROW_SSE2 #ifdef HAS_MIRRORROW_SSE2
// SSE2 version has movdqu so it can be used on misaligned buffers when SSSE3
// version can not.
__declspec(naked) __declspec(naked)
void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) { void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
__asm { __asm {
mov eax, [esp + 4] // src mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width mov ecx, [esp + 12] // width
lea eax, [eax - 16] lea eax, [eax - 16]
convertloop: convertloop:
movdqa xmm0, [eax + ecx] movdqu xmm0, [eax + ecx]
movdqa xmm1, xmm0 // swap bytes movdqu xmm1, xmm0 // swap bytes
psllw xmm0, 8 psllw xmm0, 8
psrlw xmm1, 8 psrlw xmm1, 8
por xmm0, xmm1 por xmm0, xmm1
...@@ -1215,7 +1217,7 @@ __asm { ...@@ -1215,7 +1217,7 @@ __asm {
pshufhw xmm0, xmm0, 0x1b pshufhw xmm0, xmm0, 0x1b
pshufd xmm0, xmm0, 0x4e // swap qwords pshufd xmm0, xmm0, 0x4e // swap qwords
sub ecx, 16 sub ecx, 16
movdqa [edx], xmm0 movdqu [edx], xmm0
lea edx, [edx + 16] lea edx, [edx + 16]
ja convertloop ja convertloop
ret ret
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment