Commit 3de12ae1 authored by frkoenig@google.com's avatar frkoenig@google.com

I420 and NV12 rotate functions.

Consolidate rotate files.  Add unit tests for I420
and NV12 rotate functions.  Fix remaining pitch/stride
references.
Review URL: http://webrtc-codereview.appspot.com/239001

git-svn-id: http://libyuv.googlecode.com/svn/trunk@32 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent a1280730
...@@ -20,14 +20,6 @@ ...@@ -20,14 +20,6 @@
namespace libyuv { namespace libyuv {
// Supported rotation
enum RotationMode {
kRotateNone = 0,
kRotateClockwise = 90,
kRotateCounterClockwise = 270,
kRotate180 = 180,
};
// I420 mirror // I420 mirror
int int
I420Mirror(const uint8* src_yplane, int src_ystride, I420Mirror(const uint8* src_yplane, int src_ystride,
...@@ -50,17 +42,6 @@ I420Crop(uint8* frame, ...@@ -50,17 +42,6 @@ I420Crop(uint8* frame,
int src_width, int src_height, int src_width, int src_height,
int dst_width, int dst_height); int dst_width, int dst_height);
// Rotate I420 frame
int
I420Rotate(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int width, int height,
RotationMode mode);
} // namespace libyuv } // namespace libyuv
#endif // INCLUDE_LIBYUV_GENERAL_H_ #endif // INCLUDE_LIBYUV_GENERAL_H_
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_H_
#define INCLUDE_LIBYUV_ROTATE_H_
#include "libyuv/basic_types.h"
namespace libyuv {
// Supported rotation
enum RotationMode {
kRotateNone = 0,
kRotateClockwise = 90,
kRotateCounterClockwise = 270,
kRotate180 = 180,
};
// Rotate I420 frame
int
I420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
RotationMode mode);
// Split a NV12 input buffer into Y, U, V buffers and
// then rotate the buffers.
int
NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
RotationMode mode);
} // namespace libyuv
#endif // INCLUDE_LIBYUV_ROTATE_H_
...@@ -44,7 +44,6 @@ ...@@ -44,7 +44,6 @@
'source/general.cc', 'source/general.cc',
'source/planar_functions.cc', 'source/planar_functions.cc',
'source/rotate.cc', 'source/rotate.cc',
'source/rotate_deinterleave.cc',
'source/row_table.cc', 'source/row_table.cc',
'source/scale.cc', 'source/scale.cc',
'source/video_common.cc', 'source/video_common.cc',
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
#include <string.h> // memcpy(), memset() #include <string.h> // memcpy(), memset()
#include "libyuv/planar_functions.h" #include "libyuv/planar_functions.h"
#include "rotate.h"
namespace libyuv { namespace libyuv {
...@@ -282,66 +281,4 @@ I420CropPad(const uint8* src_frame, int src_width, ...@@ -282,66 +281,4 @@ I420CropPad(const uint8* src_frame, int src_width,
return 0; return 0;
} }
int
I420Rotate(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int width, int height,
RotationMode mode) {
switch (mode) {
case kRotateNone:
// copy frame
return I420Copy(src_yplane, src_ystride,
src_uplane, src_ustride,
src_vplane, src_vstride,
dst_yplane, dst_ystride,
dst_uplane, dst_ustride,
dst_vplane, dst_vstride,
width, height);
break;
case kRotateClockwise:
Rotate90(src_yplane, src_ystride,
dst_yplane, dst_ystride,
width, height);
Rotate90(src_uplane, src_ustride,
dst_uplane, dst_ustride,
width, height);
Rotate90(src_vplane, src_vstride,
dst_vplane, dst_vstride,
width, height);
return 0;
break;
case kRotateCounterClockwise:
Rotate270(src_yplane, src_ystride,
dst_yplane, dst_ystride,
width, height);
Rotate270(src_uplane, src_ustride,
dst_uplane, dst_ustride,
width, height);
Rotate270(src_vplane, src_vstride,
dst_vplane, dst_vstride,
width, height);
return 0;
break;
case kRotate180:
Rotate180(src_yplane, src_ystride,
dst_yplane, dst_ystride,
width, height);
Rotate180(src_uplane, src_ustride,
dst_uplane, dst_ustride,
width, height);
Rotate180(src_vplane, src_vstride,
dst_vplane, dst_vstride,
width, height);
return 0;
break;
default:
return -1;
break;
}
}
} // namespace libyuv } // namespace libyuv
...@@ -8,107 +8,135 @@ ...@@ -8,107 +8,135 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include "rotate.h" #include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "rotate_priv.h"
namespace libyuv { namespace libyuv {
typedef void (*reverse_uv_func)(const uint8*, uint8*, uint8*, int);
typedef void (*reverse_func)(const uint8*, uint8*, int); typedef void (*reverse_func)(const uint8*, uint8*, int);
typedef void (*rotate_wx8func)(const uint8*, int, uint8*, int, int); typedef void (*rotate_uv_wx8_func)(const uint8*, int,
typedef void (*rotate_wxhfunc)(const uint8*, int, uint8*, int, int, int); uint8*, int,
uint8*, int, int);
typedef void (*rotate_uv_wxh_func)(const uint8*, int,
uint8*, int,
uint8*, int, int, int);
typedef void (*rotate_wx8_func)(const uint8*, int, uint8*, int, int);
typedef void (*rotate_wxh_func)(const uint8*, int, uint8*, int, int, int);
#ifdef __ARM_NEON__ #ifdef __ARM_NEON__
extern "C" { extern "C" {
void RestoreRegisters_NEON(unsigned long long *restore);
void SaveRegisters_NEON(unsigned long long *store);
void ReverseLine_NEON(const uint8* src, uint8* dst, int width); void ReverseLine_NEON(const uint8* src, uint8* dst, int width);
void Transpose_wx8_NEON(const uint8* src, int src_stride, void ReverseLineUV_NEON(const uint8* src,
uint8* dst, int dst_stride, int width); uint8* dst_a, uint8* dst_b,
int width);
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width);
} // extern "C" } // extern "C"
#endif #endif
static void Transpose_wx8_C(const uint8* src, int src_stride, static void TransposeWx8_C(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int w) { int w) {
int i, j; int i, j;
for (i = 0; i < w; ++i) for (i = 0; i < w; ++i)
for (j = 0; j < 8; ++j) for (j = 0; j < 8; ++j)
dst[i * dst_stride + j] = src[j * src_stride + i]; dst[i * dst_stride + j] = src[j * src_stride + i];
} }
static void Transpose_wxh_C(const uint8* src, int src_stride, static void TransposeWxH_C(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height) { int width, int height) {
int i, j; int i, j;
for (i = 0; i < width; ++i) for (i = 0; i < width; ++i)
for (j = 0; j < height; ++j) for (j = 0; j < height; ++j)
dst[i * dst_stride + j] = src[j * src_stride + i]; dst[i * dst_stride + j] = src[j * src_stride + i];
} }
void Transpose(const uint8* src, int src_stride, void TransposePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height) { int width, int height) {
int i = height; int i = height;
rotate_wx8func Transpose_wx8; rotate_wx8_func TransposeWx8;
rotate_wxhfunc Transpose_wxh; rotate_wxh_func TransposeWxH;
// do processor detection here. // do processor detection here.
#ifdef __ARM_NEON__ #ifdef __ARM_NEON__
Transpose_wx8 = Transpose_wx8_NEON; TransposeWx8 = TransposeWx8_NEON;
Transpose_wxh = Transpose_wxh_C; TransposeWxH = TransposeWxH_C;
#else #else
Transpose_wx8 = Transpose_wx8_C; TransposeWx8 = TransposeWx8_C;
Transpose_wxh = Transpose_wxh_C; TransposeWxH = TransposeWxH_C;
#endif #endif
// work across the source in 8x8 tiles // work across the source in 8x8 tiles
do { while (i >= 8) {
Transpose_wx8(src, src_stride, dst, dst_stride, width); TransposeWx8(src, src_stride, dst, dst_stride, width);
src += 8 * src_stride; src += 8 * src_stride; // go down 8 rows
dst += 8; dst += 8; // move over 8 columns
i -= 8; i -= 8;
} while (i >= 8); }
// TODO(frkoenig): Have wx4 and maybe wx2 TransposeWxH(src, src_stride, dst, dst_stride, width, i);
Transpose_wxh(src, src_stride, dst, dst_stride, width, i);
} }
void Rotate90(const uint8* src, int src_stride, void RotatePlane90(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height) { int width, int height) {
src += src_stride*(height-1); // Rotate by 90 is a transpose with the source read
// from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride.
src += src_stride * (height - 1);
src_stride = -src_stride; src_stride = -src_stride;
Transpose(src, src_stride, dst, dst_stride, width, height); TransposePlane(src, src_stride, dst, dst_stride, width, height);
} }
void Rotate270(const uint8* src, int src_stride, void RotatePlane270(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height) { int width, int height) {
dst += dst_stride*(width-1); // Rotate by 270 is a transpose with the destination written
// from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride.
dst += dst_stride * (width - 1);
dst_stride = -dst_stride; dst_stride = -dst_stride;
Transpose(src, src_stride, dst, dst_stride, width, height); TransposePlane(src, src_stride, dst, dst_stride, width, height);
} }
void ReverseLine_C(const uint8* src, uint8* dst, int width) { void ReverseLine_C(const uint8* src, uint8* dst, int width) {
int i; int i;
for (i = 0; i < width; ++i) src += width;
dst[width-1 - i] = src[i]; for (i = 0; i < width; ++i) {
--src;
dst[i] = src[0];
}
} }
void Rotate180(const uint8* src, int src_stride, void RotatePlane180(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height) { int width, int height) {
int i; int i;
reverse_func ReverseLine; reverse_func ReverseLine;
// do processor detection here. // TODO(frkoenig): do processor detection here.
#ifdef __ARM_NEON__ #ifdef __ARM_NEON__
ReverseLine = ReverseLine_NEON; ReverseLine = ReverseLine_NEON;
#else #else
ReverseLine = ReverseLine_C; ReverseLine = ReverseLine_C;
#endif #endif
dst += dst_stride*(height-1); // Rotate by 180 is a mirror with the destination
// written in reverse.
dst += dst_stride * (height - 1);
for (i = 0; i < height; ++i) { for (i = 0; i < height; ++i) {
ReverseLine(src, dst, width); ReverseLine(src, dst, width);
...@@ -118,4 +146,269 @@ void Rotate180(const uint8* src, int src_stride, ...@@ -118,4 +146,269 @@ void Rotate180(const uint8* src, int src_stride,
} }
} }
static void TransposeUVWx8_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int w) {
int i, j;
for (i = 0; i < w * 2; i += 2)
for (j = 0; j < 8; ++j) {
dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
}
}
static void TransposeUVWxH_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int w, int h) {
int i, j;
for (i = 0; i < w*2; i += 2)
for (j = 0; j < h; ++j) {
dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
}
}
void TransposeUV(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
int i = height;
rotate_uv_wx8_func TransposeWx8;
rotate_uv_wxh_func TransposeWxH;
// do processor detection here.
#ifdef __ARM_NEON__
unsigned long long store_reg[8];
SaveRegisters_NEON(store_reg);
TransposeWx8 = TransposeUVWx8_NEON;
TransposeWxH = TransposeUVWxH_C;
#else
TransposeWx8 = TransposeUVWx8_C;
TransposeWxH = TransposeUVWxH_C;
#endif
// work through the source in 8x8 tiles
while (i >= 8) {
TransposeWx8(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width);
src += 8 * src_stride; // go down 8 rows
dst_a += 8; // move over 8 columns
dst_b += 8; // move over 8 columns
i -= 8;
}
TransposeWxH(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width, i);
#ifdef __ARM_NEON__
RestoreRegisters_NEON(store_reg);
#endif
}
void RotateUV90(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
src += src_stride * (height - 1);
src_stride = -src_stride;
TransposeUV(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width, height);
}
void RotateUV270(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
dst_a += dst_stride_a * (width - 1);
dst_b += dst_stride_b * (width - 1);
dst_stride_a = -dst_stride_a;
dst_stride_b = -dst_stride_b;
TransposeUV(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width, height);
}
static void ReverseLineUV_C(const uint8* src,
uint8* dst_a, uint8* dst_b,
int width) {
int i;
src += width << 1;
for (i = 0; i < width; ++i) {
src -= 2;
dst_a[i] = src[0];
dst_b[i] = src[1];
}
}
void RotateUV180(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
int i;
reverse_uv_func ReverseLine;
// TODO(frkoenig) : do processor detection here.
#ifdef __ARM_NEON__
ReverseLine = ReverseLineUV_NEON;
#else
ReverseLine = ReverseLineUV_C;
#endif
dst_a += dst_stride_a * (height - 1);
dst_b += dst_stride_b * (height - 1);
for (i = 0; i < height; ++i) {
ReverseLine(src, dst_a, dst_b, width);
src += src_stride; // down one line at a time
dst_a -= dst_stride_a; // nominally up one line at a time
dst_b -= dst_stride_b; // nominally up one line at a time
}
}
int I420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
switch (mode) {
case kRotateNone:
// copy frame
return I420Copy(src_y, src_stride_y,
src_u, src_stride_u,
src_v, src_stride_v,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
case kRotateClockwise:
RotatePlane90(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotatePlane90(src_u, src_stride_u,
dst_u, dst_stride_u,
halfwidth, halfheight);
RotatePlane90(src_v, src_stride_v,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
case kRotateCounterClockwise:
RotatePlane270(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotatePlane270(src_u, src_stride_u,
dst_u, dst_stride_u,
halfwidth, halfheight);
RotatePlane270(src_v, src_stride_v,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
case kRotate180:
RotatePlane180(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotatePlane180(src_u, src_stride_u,
dst_u, dst_stride_u,
halfwidth, halfheight);
RotatePlane180(src_v, src_stride_v,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
default:
break;
}
return -1;
}
int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_uv = src_uv + (halfheight - 1) * src_stride_uv;
src_stride_y = -src_stride_y;
src_stride_uv = -src_stride_uv;
}
switch (mode) {
case kRotateNone:
// copy frame
return NV12ToI420(src_y, src_uv, src_stride_y,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
case kRotateClockwise:
RotatePlane90(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotateUV90(src_uv, src_stride_uv,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
case kRotateCounterClockwise:
RotatePlane270(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotateUV270(src_uv, src_stride_uv,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
case kRotate180:
RotatePlane180(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotateUV180(src_uv, src_stride_uv,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
default:
break;
}
return -1;
}
} // namespace libyuv } // namespace libyuv
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "rotate.h"
namespace libyuv {
typedef void (*reverse_func)(const uint8*, uint8*, uint8*, int);
typedef void (*rotate_wx8func)(const uint8*, int,
uint8*, int,
uint8*, int, int);
typedef void (*rotate_wxhfunc)(const uint8*, int,
uint8*, int,
uint8*, int, int, int);
#ifdef __ARM_NEON__
extern "C" {
void RestoreRegisters_NEON(unsigned long long *restore);
void ReverseLine_di_NEON(const uint8* src,
uint8* dst_a, uint8* dst_b,
int width);
void SaveRegisters_NEON(unsigned long long *store);
void Transpose_di_wx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width);
} // extern "C"
#endif
static void Transpose_di_wx8_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int w) {
int i, j;
for (i = 0; i < w*2; i += 2)
for (j = 0; j < 8; ++j) {
dst_a[j + (i>>1)*dst_stride_a] = src[i + j*src_stride];
dst_b[j + (i>>1)*dst_stride_b] = src[i + j*src_stride + 1];
}
}
static void Transpose_di_wxh_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int w, int h) {
int i, j;
for (i = 0; i < w*2; i += 2)
for (j = 0; j < h; ++j) {
dst_a[j + (i>>1)*dst_stride_a] = src[i + j*src_stride];
dst_b[j + (i>>1)*dst_stride_b] = src[i + j*src_stride + 1];
}
}
void Transpose_deinterleave(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
int i = height;
rotate_wx8func Transpose_wx8;
rotate_wxhfunc Transpose_wxh;
// do processor detection here.
#ifdef __ARM_NEON__
unsigned long long store_reg[8];
SaveRegisters_NEON(store_reg);
Transpose_wx8 = Transpose_di_wx8_NEON;
Transpose_wxh = Transpose_di_wxh_C;
#else
Transpose_wx8 = Transpose_di_wx8_C;
Transpose_wxh = Transpose_di_wxh_C;
#endif
width >>= 1;
// work across the source in 8x8 tiles
do {
Transpose_wx8(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width);
src += 8 * src_stride;
dst_a += 8;
dst_b += 8;
i -= 8;
} while (i >= 8);
Transpose_wxh(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width, i);
#ifdef __ARM_NEON__
RestoreRegisters_NEON(store_reg);
#endif
}
void Rotate90_deinterleave(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
src += src_stride*(height-1);
src_stride = -src_stride;
Transpose_deinterleave(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width, height);
}
void Rotate270_deinterleave(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
dst_a += dst_stride_a*((width>>1)-1);
dst_b += dst_stride_b*((width>>1)-1);
dst_stride_a = -dst_stride_a;
dst_stride_b = -dst_stride_b;
Transpose_deinterleave(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width, height);
}
static void ReverseLine_di_C(const uint8* src,
uint8* dst_a, uint8* dst_b,
int width) {
int i;
for (i = 0; i < width*2; i += 2) {
dst_a[width-1 - (i>>1)] = src[i];
dst_b[width-1 - (i>>1)] = src[i+1];
}
}
void Rotate180_deinterleave(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
int i;
reverse_func ReverseLine;
// do processor detection here.
#ifdef __ARM_NEON__
ReverseLine = ReverseLine_di_NEON;
#else
ReverseLine = ReverseLine_di_C;
#endif
dst_a += dst_stride_a*(height-1);
dst_b += dst_stride_b*(height-1);
width >>= 1;
for (i = 0; i < height; ++i) {
ReverseLine(src, dst_a, dst_b, width);
src += src_stride;
dst_a -= dst_stride_a;
dst_b -= dst_stride_b;
}
}
} // namespace libyuv
.global RestoreRegisters_NEON
.global ReverseLine_di_NEON
.global SaveRegisters_NEON
.global Transpose_di_wx8_NEON
.type RestoreRegisters_NEON, function
.type ReverseLine_di_NEON, function
.type SaveRegisters_NEON, function
.type Transpose_di_wx8_NEON, function
@ void SaveRegisters_NEON (unsigned long long store)
@ r0 unsigned long long store
SaveRegisters_NEON:
vst1.i64 {d8, d9, d10, d11}, [r0]!
vst1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
@ void RestoreRegisters_NEON (unsigned long long store)
@ r0 unsigned long long store
RestoreRegisters_NEON:
vld1.i64 {d8, d9, d10, d11}, [r0]!
vld1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
@ void ReverseLine_NEON (const uint8* src,
@ uint8* dst_a,
@ uint8* dst_b,
@ int width)
@ r0 const uint8* src
@ r1 uint8* dst_a
@ r2 uint8* dst_b
@ r3 width
ReverseLine_di_NEON:
@ compute where to start writing destination
add r1, r1, r3 @ dst_a + width
add r2, r2, r3 @ dst_b + width
@ work on input segments that are multiples of 16, but
@ width that has been passed is output segments, half
@ the size of input.
lsrs r12, r3, #3
beq .line_residuals
@ the output is written in to two blocks.
mov r12, #-8
@ back of destination by the size of the register that is
@ going to be reversed
sub r1, r1, #8
sub r2, r2, #8
@ the loop needs to run on blocks of 16. what will be left
@ over is either a negative number, the residuals that need
@ to be done, or 0. if this isn't subtracted off here the
@ loop will run one extra time.
sub r3, r3, #8
.segments_of_8:
vld2.8 {d0, d1}, [r0]! @ src += 16
@ reverse the bytes in the 64 bit segments
vrev64.8 q0, q0
vst1.8 {d0}, [r1], r12 @ dst_a -= 8
vst1.8 {d1}, [r2], r12 @ dst_b -= 8
subs r3, r3, #8
bge .segments_of_8
@ add 16 back to the counter. if the result is 0 there is no
@ residuals so return
adds r3, r3, #8
bxeq lr
add r1, r1, #8
add r2, r2, #8
.line_residuals:
mov r12, #-1
sub r1, r1, #1
sub r2, r2, #1
@ do this in neon registers as per
@ http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/
.segments_of_2:
vld2.8 {d0[0], d1[0]}, [r0]! @ src += 2
vst1.8 {d0[0]}, [r1], r12 @ dst_a -= 1
vst1.8 {d1[0]}, [r2], r12 @ dst_b -= 1
subs r3, r3, #1
bgt .segments_of_2
bx lr
@ void Transpose_di_wx8_NEON (const uint8* src, int src_pitch,
@ uint8* dst_a, int dst_pitch_a,
@ uint8* dst_b, int dst_pitch_b,
@ int width)
@ r0 const uint8* src
@ r1 int src_pitch
@ r2 uint8* dst_a
@ r3 int dst_pitch_a
@ stack uint8* dst_b
@ stack int dst_pitch_b
@ stack int width
Transpose_di_wx8_NEON:
push {r4-r9,lr}
ldr r4, [sp, #28] @ dst_b
ldr r5, [sp, #32] @ dst_pitch_b
ldr r7, [sp, #36] @ width
@ loops are on blocks of 8. loop will stop when
@ counter gets to or below 0. starting the counter
@ at w-8 allow for this
sub r8, #8
@ handle 8x8 blocks. this should be the majority of the plane
.loop_8x8:
mov r9, r0
vld2.8 {d0, d1}, [r9], r1
vld2.8 {d2, d3}, [r9], r1
vld2.8 {d4, d5}, [r9], r1
vld2.8 {d6, d7}, [r9], r1
vld2.8 {d8, d9}, [r9], r1
vld2.8 {d10, d11}, [r9], r1
vld2.8 {d12, d13}, [r9], r1
vld2.8 {d14, d15}, [r9]
vtrn.8 q1, q0
vtrn.8 q3, q2
vtrn.8 q5, q4
vtrn.8 q7, q6
vtrn.16 q1, q3
vtrn.16 q0, q2
vtrn.16 q5, q7
vtrn.16 q4, q6
vtrn.32 q1, q5
vtrn.32 q0, q4
vtrn.32 q3, q7
vtrn.32 q2, q6
vrev16.8 q0, q0
vrev16.8 q1, q1
vrev16.8 q2, q2
vrev16.8 q3, q3
vrev16.8 q4, q4
vrev16.8 q5, q5
vrev16.8 q6, q6
vrev16.8 q7, q7
mov r9, r2
vst1.8 {d2}, [r9], r3
vst1.8 {d0}, [r9], r3
vst1.8 {d6}, [r9], r3
vst1.8 {d4}, [r9], r3
vst1.8 {d10}, [r9], r3
vst1.8 {d8}, [r9], r3
vst1.8 {d14}, [r9], r3
vst1.8 {d12}, [r9]
mov r9, r4
vst1.8 {d3}, [r9], r5
vst1.8 {d1}, [r9], r5
vst1.8 {d7}, [r9], r5
vst1.8 {d5}, [r9], r5
vst1.8 {d11}, [r9], r5
vst1.8 {d9}, [r9], r5
vst1.8 {d15}, [r9], r5
vst1.8 {d13}, [r9]
add r0, #8*2 @ src += 8*2
add r2, r3, lsl #3 @ dst_a += 8 * dst_pitch_a
add r4, r5, lsl #3 @ dst_b += 8 * dst_pitch_b
subs r8, #8 @ w -= 8
bge .loop_8x8
@ add 8 back to counter. if the result is 0 there are
@ no residuals.
adds r8, #8
beq .done
@ some residual, so between 1 and 7 lines left to transpose
cmp r8, #2
blt .block_1x8
cmp r8, #4
blt .block_2x8
@ TODO(frkoenig) : clean this up
.block_4x8:
mov r9, r0
vld1.64 {d0}, [r9], r1
vld1.64 {d1}, [r9], r1
vld1.64 {d2}, [r9], r1
vld1.64 {d3}, [r9], r1
vld1.64 {d4}, [r9], r1
vld1.64 {d5}, [r9], r1
vld1.64 {d6}, [r9], r1
vld1.64 {d7}, [r9]
adr r12, vtbl_4x4_transpose
vld1.8 {q7}, [r12]
vtrn.8 q0, q1
vtrn.8 q2, q3
vtbl.8 d8, {d0, d1}, d14
vtbl.8 d9, {d0, d1}, d15
vtbl.8 d10, {d2, d3}, d14
vtbl.8 d11, {d2, d3}, d15
vtbl.8 d12, {d4, d5}, d14
vtbl.8 d13, {d4, d5}, d15
vtbl.8 d0, {d6, d7}, d14
vtbl.8 d1, {d6, d7}, d15
mov r9, r2
vst1.32 {d8[0]}, [r9], r3
vst1.32 {d8[1]}, [r9], r3
vst1.32 {d9[0]}, [r9], r3
vst1.32 {d9[1]}, [r9], r3
add r9, r2, #4
vst1.32 {d12[0]}, [r9], r3
vst1.32 {d12[1]}, [r9], r3
vst1.32 {d13[0]}, [r9], r3
vst1.32 {d13[1]}, [r9]
mov r9, r4
vst1.32 {d10[0]}, [r9], r5
vst1.32 {d10[1]}, [r9], r5
vst1.32 {d11[0]}, [r9], r5
vst1.32 {d11[1]}, [r9], r5
add r9, r4, #4
vst1.32 {d0[0]}, [r9], r5
vst1.32 {d0[1]}, [r9], r5
vst1.32 {d1[0]}, [r9], r5
vst1.32 {d1[1]}, [r9]
add r0, #4*2 @ src += 4 * 2
add r2, r3, lsl #2 @ dst_a += 4 * dst_pitch_a
add r4, r5, lsl #2 @ dst_b += 4 * dst_pitch_b
subs r8, #4 @ w -= 4
beq .done
@ some residual, check to see if it includes a 2x8 block,
@ or less
cmp r8, #2
blt .block_1x8
.block_2x8:
mov r9, r0
vld2.16 {d0[0], d2[0]}, [r9], r1
vld2.16 {d1[0], d3[0]}, [r9], r1
vld2.16 {d0[1], d2[1]}, [r9], r1
vld2.16 {d1[1], d3[1]}, [r9], r1
vld2.16 {d0[2], d2[2]}, [r9], r1
vld2.16 {d1[2], d3[2]}, [r9], r1
vld2.16 {d0[3], d2[3]}, [r9], r1
vld2.16 {d1[3], d3[3]}, [r9]
vtrn.8 d0, d1
vtrn.8 d2, d3
mov r9, r2
vst1.64 {d0}, [r9], r3
vst1.64 {d2}, [r9]
mov r9, r4
vst1.64 {d1}, [r9], r5
vst1.64 {d3}, [r9]
add r0, #2*2 @ src += 2 * 2
add r2, r3, lsl #1 @ dst_a += 2 * dst_pitch_a
add r4, r5, lsl #1 @ dst_a += 2 * dst_pitch_a
subs r8, #2 @ w -= 2
beq .done
.block_1x8:
vld2.8 {d0[0], d1[0]}, [r0], r1
vld2.8 {d0[1], d1[1]}, [r0], r1
vld2.8 {d0[2], d1[2]}, [r0], r1
vld2.8 {d0[3], d1[3]}, [r0], r1
vld2.8 {d0[4], d1[4]}, [r0], r1
vld2.8 {d0[5], d1[5]}, [r0], r1
vld2.8 {d0[6], d1[6]}, [r0], r1
vld2.8 {d0[7], d1[7]}, [r0]
vst1.64 {d0}, [r2]
vst1.64 {d1}, [r4]
.done:
pop {r4-r9, pc}
vtbl_4x4_transpose:
.byte 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15
.global RestoreRegisters_NEON
.global ReverseLine_NEON .global ReverseLine_NEON
.global Transpose_wx8_NEON .global ReverseLineUV_NEON
.global SaveRegisters_NEON
.global TransposeWx8_NEON
.global TransposeUVWx8_NEON
.type RestoreRegisters_NEON, function
.type ReverseLine_NEON, function .type ReverseLine_NEON, function
.type Transpose_wx8_NEON, function .type ReverseLineUV_NEON, function
.type SaveRegisters_NEON, function
.type TransposeWx8_NEON, function
.type TransposeUVWx8_NEON, function
@ void ReverseLine_NEON (const uint8* src, uint8* dst, int width) @ void ReverseLine_NEON (const uint8* src, uint8* dst, int width)
@ r0 const uint8* src @ r0 const uint8* src
...@@ -23,7 +31,7 @@ ReverseLine_NEON: ...@@ -23,7 +31,7 @@ ReverseLine_NEON:
@ along with 16 to get the next location. @ along with 16 to get the next location.
mov r3, #-24 mov r3, #-24
beq .line_residuals beq Lline_residuals
@ back of destination by the size of the register that is @ back of destination by the size of the register that is
@ going to be reversed @ going to be reversed
...@@ -35,7 +43,7 @@ ReverseLine_NEON: ...@@ -35,7 +43,7 @@ ReverseLine_NEON:
@ loop will run one extra time. @ loop will run one extra time.
sub r2, #16 sub r2, #16
.segments_of_16: Lsegments_of_16:
vld1.8 {q0}, [r0]! @ src += 16 vld1.8 {q0}, [r0]! @ src += 16
@ reverse the bytes in the 64 bit segments. unable to reverse @ reverse the bytes in the 64 bit segments. unable to reverse
...@@ -48,7 +56,7 @@ ReverseLine_NEON: ...@@ -48,7 +56,7 @@ ReverseLine_NEON:
vst1.8 {d0}, [r1], r3 @ dst -= 16 vst1.8 {d0}, [r1], r3 @ dst -= 16
subs r2, #16 subs r2, #16
bge .segments_of_16 bge Lsegments_of_16
@ add 16 back to the counter. if the result is 0 there is no @ add 16 back to the counter. if the result is 0 there is no
@ residuals so return @ residuals so return
...@@ -57,7 +65,7 @@ ReverseLine_NEON: ...@@ -57,7 +65,7 @@ ReverseLine_NEON:
add r1, #16 add r1, #16
.line_residuals: Lline_residuals:
mov r3, #-3 mov r3, #-3
...@@ -65,38 +73,38 @@ ReverseLine_NEON: ...@@ -65,38 +73,38 @@ ReverseLine_NEON:
subs r2, #2 subs r2, #2
@ check for 16*n+1 scenarios where segments_of_2 should not @ check for 16*n+1 scenarios where segments_of_2 should not
@ be run, but there is something left over. @ be run, but there is something left over.
blt .segment_of_1 blt Lsegment_of_1
@ do this in neon registers as per @ do this in neon registers as per
@ http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/ @ http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/
.segments_of_2: Lsegments_of_2:
vld2.8 {d0[0], d1[0]}, [r0]! @ src += 2 vld2.8 {d0[0], d1[0]}, [r0]! @ src += 2
vst1.8 {d1[0]}, [r1]! vst1.8 {d1[0]}, [r1]!
vst1.8 {d0[0]}, [r1], r3 @ dst -= 2 vst1.8 {d0[0]}, [r1], r3 @ dst -= 2
subs r2, #2 subs r2, #2
bge .segments_of_2 bge Lsegments_of_2
adds r2, #2 adds r2, #2
bxeq lr bxeq lr
.segment_of_1: Lsegment_of_1:
add r1, #1 add r1, #1
vld1.8 {d0[0]}, [r0] vld1.8 {d0[0]}, [r0]
vst1.8 {d0[0]}, [r1] vst1.8 {d0[0]}, [r1]
bx lr bx lr
@ void Transpose_wx8_NEON (const uint8* src, int src_pitch, @ void TransposeWx8_NEON (const uint8* src, int src_stride,
@ uint8* dst, int dst_pitch, @ uint8* dst, int dst_stride,
@ int w) @ int w)
@ r0 const uint8* src @ r0 const uint8* src
@ r1 int src_pitch @ r1 int src_stride
@ r2 uint8* dst @ r2 uint8* dst
@ r3 int dst_pitch @ r3 int dst_stride
@ stack int w @ stack int w
Transpose_wx8_NEON: TransposeWx8_NEON:
push {r4,r8,r9,lr} push {r4,r8,r9,lr}
ldr r8, [sp, #16] @ width ldr r8, [sp, #16] @ width
...@@ -107,7 +115,7 @@ Transpose_wx8_NEON: ...@@ -107,7 +115,7 @@ Transpose_wx8_NEON:
sub r8, #8 sub r8, #8
@ handle 8x8 blocks. this should be the majority of the plane @ handle 8x8 blocks. this should be the majority of the plane
.loop_8x8: Lloop_8x8:
mov r9, r0 mov r9, r0
vld1.8 {d0}, [r9], r1 vld1.8 {d0}, [r9], r1
...@@ -151,23 +159,23 @@ Transpose_wx8_NEON: ...@@ -151,23 +159,23 @@ Transpose_wx8_NEON:
vst1.8 {d6}, [r9] vst1.8 {d6}, [r9]
add r0, #8 @ src += 8 add r0, #8 @ src += 8
add r2, r3, lsl #3 @ dst += 8 * dst_pitch add r2, r3, lsl #3 @ dst += 8 * dst_stride
subs r8, #8 @ w -= 8 subs r8, #8 @ w -= 8
bge .loop_8x8 bge Lloop_8x8
@ add 8 back to counter. if the result is 0 there are @ add 8 back to counter. if the result is 0 there are
@ no residuals. @ no residuals.
adds r8, #8 adds r8, #8
beq .done beq Ldone
@ some residual, so between 1 and 7 lines left to transpose @ some residual, so between 1 and 7 lines left to transpose
cmp r8, #2 cmp r8, #2
blt .block_1x8 blt Lblock_1x8
cmp r8, #4 cmp r8, #4
blt .block_2x8 blt Lblock_2x8
.block_4x8: Lblock_4x8:
mov r9, r0 mov r9, r0
vld1.32 {d0[0]}, [r9], r1 vld1.32 {d0[0]}, [r9], r1
vld1.32 {d0[1]}, [r9], r1 vld1.32 {d0[1]}, [r9], r1
...@@ -202,16 +210,16 @@ Transpose_wx8_NEON: ...@@ -202,16 +210,16 @@ Transpose_wx8_NEON:
vst1.32 {d1[1]}, [r9] vst1.32 {d1[1]}, [r9]
add r0, #4 @ src += 4 add r0, #4 @ src += 4
add r2, r3, lsl #2 @ dst += 4 * dst_pitch add r2, r3, lsl #2 @ dst += 4 * dst_stride
subs r8, #4 @ w -= 4 subs r8, #4 @ w -= 4
beq .done beq Ldone
@ some residual, check to see if it includes a 2x8 block, @ some residual, check to see if it includes a 2x8 block,
@ or less @ or less
cmp r8, #2 cmp r8, #2
blt .block_1x8 blt Lblock_1x8
.block_2x8: Lblock_2x8:
mov r9, r0 mov r9, r0
vld1.16 {d0[0]}, [r9], r1 vld1.16 {d0[0]}, [r9], r1
vld1.16 {d1[0]}, [r9], r1 vld1.16 {d1[0]}, [r9], r1
...@@ -230,11 +238,11 @@ Transpose_wx8_NEON: ...@@ -230,11 +238,11 @@ Transpose_wx8_NEON:
vst1.64 {d1}, [r9] vst1.64 {d1}, [r9]
add r0, #2 @ src += 2 add r0, #2 @ src += 2
add r2, r3, lsl #1 @ dst += 2 * dst_pitch add r2, r3, lsl #1 @ dst += 2 * dst_stride
subs r8, #2 @ w -= 2 subs r8, #2 @ w -= 2
beq .done beq Ldone
.block_1x8: Lblock_1x8:
vld1.8 {d0[0]}, [r0], r1 vld1.8 {d0[0]}, [r0], r1
vld1.8 {d0[1]}, [r0], r1 vld1.8 {d0[1]}, [r0], r1
vld1.8 {d0[2]}, [r0], r1 vld1.8 {d0[2]}, [r0], r1
...@@ -246,9 +254,310 @@ Transpose_wx8_NEON: ...@@ -246,9 +254,310 @@ Transpose_wx8_NEON:
vst1.64 {d0}, [r2] vst1.64 {d0}, [r2]
.done: Ldone:
pop {r4,r8,r9,pc} pop {r4,r8,r9,pc}
vtbl_4x4_transpose: vtbl_4x4_transpose:
.byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
@ void SaveRegisters_NEON (unsigned long long store)
@ r0 unsigned long long store
SaveRegisters_NEON:
vst1.i64 {d8, d9, d10, d11}, [r0]!
vst1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
@ void RestoreRegisters_NEON (unsigned long long store)
@ r0 unsigned long long store
RestoreRegisters_NEON:
vld1.i64 {d8, d9, d10, d11}, [r0]!
vld1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
@ void ReverseLineUV_NEON (const uint8* src,
@ uint8* dst_a,
@ uint8* dst_b,
@ int width)
@ r0 const uint8* src
@ r1 uint8* dst_a
@ r2 uint8* dst_b
@ r3 width
ReverseLineUV_NEON:
@ compute where to start writing destination
add r1, r1, r3 @ dst_a + width
add r2, r2, r3 @ dst_b + width
@ work on input segments that are multiples of 16, but
@ width that has been passed is output segments, half
@ the size of input.
lsrs r12, r3, #3
beq Lline_residuals_di
@ the output is written in to two blocks.
mov r12, #-8
@ back of destination by the size of the register that is
@ going to be reversed
sub r1, r1, #8
sub r2, r2, #8
@ the loop needs to run on blocks of 8. what will be left
@ over is either a negative number, the residuals that need
@ to be done, or 0. if this isn't subtracted off here the
@ loop will run one extra time.
sub r3, r3, #8
Lsegments_of_8_di:
vld2.8 {d0, d1}, [r0]! @ src += 16
@ reverse the bytes in the 64 bit segments
vrev64.8 q0, q0
vst1.8 {d0}, [r1], r12 @ dst_a -= 8
vst1.8 {d1}, [r2], r12 @ dst_b -= 8
subs r3, r3, #8
bge Lsegments_of_8_di
@ add 8 back to the counter. if the result is 0 there is no
@ residuals so return
adds r3, r3, #8
bxeq lr
add r1, r1, #8
add r2, r2, #8
Lline_residuals_di:
mov r12, #-1
sub r1, r1, #1
sub r2, r2, #1
@ do this in neon registers as per
@ http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/
Lsegments_of_1:
vld2.8 {d0[0], d1[0]}, [r0]! @ src += 2
vst1.8 {d0[0]}, [r1], r12 @ dst_a -= 1
vst1.8 {d1[0]}, [r2], r12 @ dst_b -= 1
subs r3, r3, #1
bgt Lsegments_of_1
bx lr
@ void TransposeUVWx8_NEON (const uint8* src, int src_stride,
@ uint8* dst_a, int dst_stride_a,
@ uint8* dst_b, int dst_stride_b,
@ int width)
@ r0 const uint8* src
@ r1 int src_stride
@ r2 uint8* dst_a
@ r3 int dst_stride_a
@ stack uint8* dst_b
@ stack int dst_stride_b
@ stack int width
TransposeUVWx8_NEON:
push {r4-r9,lr}
ldr r4, [sp, #28] @ dst_b
ldr r5, [sp, #32] @ dst_stride_b
ldr r8, [sp, #36] @ width
@ loops are on blocks of 8. loop will stop when
@ counter gets to or below 0. starting the counter
@ at w-8 allow for this
sub r8, #8
@ handle 8x8 blocks. this should be the majority of the plane
Lloop_8x8_di:
mov r9, r0
vld2.8 {d0, d1}, [r9], r1
vld2.8 {d2, d3}, [r9], r1
vld2.8 {d4, d5}, [r9], r1
vld2.8 {d6, d7}, [r9], r1
vld2.8 {d8, d9}, [r9], r1
vld2.8 {d10, d11}, [r9], r1
vld2.8 {d12, d13}, [r9], r1
vld2.8 {d14, d15}, [r9]
vtrn.8 q1, q0
vtrn.8 q3, q2
vtrn.8 q5, q4
vtrn.8 q7, q6
vtrn.16 q1, q3
vtrn.16 q0, q2
vtrn.16 q5, q7
vtrn.16 q4, q6
vtrn.32 q1, q5
vtrn.32 q0, q4
vtrn.32 q3, q7
vtrn.32 q2, q6
vrev16.8 q0, q0
vrev16.8 q1, q1
vrev16.8 q2, q2
vrev16.8 q3, q3
vrev16.8 q4, q4
vrev16.8 q5, q5
vrev16.8 q6, q6
vrev16.8 q7, q7
mov r9, r2
vst1.8 {d2}, [r9], r3
vst1.8 {d0}, [r9], r3
vst1.8 {d6}, [r9], r3
vst1.8 {d4}, [r9], r3
vst1.8 {d10}, [r9], r3
vst1.8 {d8}, [r9], r3
vst1.8 {d14}, [r9], r3
vst1.8 {d12}, [r9]
mov r9, r4
vst1.8 {d3}, [r9], r5
vst1.8 {d1}, [r9], r5
vst1.8 {d7}, [r9], r5
vst1.8 {d5}, [r9], r5
vst1.8 {d11}, [r9], r5
vst1.8 {d9}, [r9], r5
vst1.8 {d15}, [r9], r5
vst1.8 {d13}, [r9]
add r0, #8*2 @ src += 8*2
add r2, r3, lsl #3 @ dst_a += 8 * dst_stride_a
add r4, r5, lsl #3 @ dst_b += 8 * dst_stride_b
subs r8, #8 @ w -= 8
bge Lloop_8x8_di
@ add 8 back to counter. if the result is 0 there are
@ no residuals.
adds r8, #8
beq Ldone_di
@ some residual, so between 1 and 7 lines left to transpose
cmp r8, #2
blt Lblock_1x8_di
cmp r8, #4
blt Lblock_2x8_di
@ TODO(frkoenig) : clean this up
Lblock_4x8_di:
mov r9, r0
vld1.64 {d0}, [r9], r1
vld1.64 {d1}, [r9], r1
vld1.64 {d2}, [r9], r1
vld1.64 {d3}, [r9], r1
vld1.64 {d4}, [r9], r1
vld1.64 {d5}, [r9], r1
vld1.64 {d6}, [r9], r1
vld1.64 {d7}, [r9]
adr r12, vtbl_4x4_transpose_di
vld1.8 {q7}, [r12]
vtrn.8 q0, q1
vtrn.8 q2, q3
vtbl.8 d8, {d0, d1}, d14
vtbl.8 d9, {d0, d1}, d15
vtbl.8 d10, {d2, d3}, d14
vtbl.8 d11, {d2, d3}, d15
vtbl.8 d12, {d4, d5}, d14
vtbl.8 d13, {d4, d5}, d15
vtbl.8 d0, {d6, d7}, d14
vtbl.8 d1, {d6, d7}, d15
mov r9, r2
vst1.32 {d8[0]}, [r9], r3
vst1.32 {d8[1]}, [r9], r3
vst1.32 {d9[0]}, [r9], r3
vst1.32 {d9[1]}, [r9], r3
add r9, r2, #4
vst1.32 {d12[0]}, [r9], r3
vst1.32 {d12[1]}, [r9], r3
vst1.32 {d13[0]}, [r9], r3
vst1.32 {d13[1]}, [r9]
mov r9, r4
vst1.32 {d10[0]}, [r9], r5
vst1.32 {d10[1]}, [r9], r5
vst1.32 {d11[0]}, [r9], r5
vst1.32 {d11[1]}, [r9], r5
add r9, r4, #4
vst1.32 {d0[0]}, [r9], r5
vst1.32 {d0[1]}, [r9], r5
vst1.32 {d1[0]}, [r9], r5
vst1.32 {d1[1]}, [r9]
add r0, #4*2 @ src += 4 * 2
add r2, r3, lsl #2 @ dst_a += 4 * dst_stride_a
add r4, r5, lsl #2 @ dst_b += 4 * dst_stride_b
subs r8, #4 @ w -= 4
beq Ldone_di
@ some residual, check to see if it includes a 2x8 block,
@ or less
cmp r8, #2
blt Lblock_1x8_di
Lblock_2x8_di:
mov r9, r0
vld2.16 {d0[0], d2[0]}, [r9], r1
vld2.16 {d1[0], d3[0]}, [r9], r1
vld2.16 {d0[1], d2[1]}, [r9], r1
vld2.16 {d1[1], d3[1]}, [r9], r1
vld2.16 {d0[2], d2[2]}, [r9], r1
vld2.16 {d1[2], d3[2]}, [r9], r1
vld2.16 {d0[3], d2[3]}, [r9], r1
vld2.16 {d1[3], d3[3]}, [r9]
vtrn.8 d0, d1
vtrn.8 d2, d3
mov r9, r2
vst1.64 {d0}, [r9], r3
vst1.64 {d2}, [r9]
mov r9, r4
vst1.64 {d1}, [r9], r5
vst1.64 {d3}, [r9]
add r0, #2*2 @ src += 2 * 2
add r2, r3, lsl #1 @ dst_a += 2 * dst_stride_a
add r4, r5, lsl #1 @ dst_a += 2 * dst_stride_a
subs r8, #2 @ w -= 2
beq Ldone_di
Lblock_1x8_di:
vld2.8 {d0[0], d1[0]}, [r0], r1
vld2.8 {d0[1], d1[1]}, [r0], r1
vld2.8 {d0[2], d1[2]}, [r0], r1
vld2.8 {d0[3], d1[3]}, [r0], r1
vld2.8 {d0[4], d1[4]}, [r0], r1
vld2.8 {d0[5], d1[5]}, [r0], r1
vld2.8 {d0[6], d1[6]}, [r0], r1
vld2.8 {d0[7], d1[7]}, [r0]
vst1.64 {d0}, [r2]
vst1.64 {d1}, [r4]
Ldone_di:
pop {r4-r9, pc}
vtbl_4x4_transpose_di:
.byte 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15
...@@ -8,39 +8,65 @@ ...@@ -8,39 +8,65 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#ifndef LIBYUV_SOURCE_ROTATE_H_ #ifndef SOURCE_ROTATE_PRIV_H_
#define LIBYUV_SOURCE_ROTATE_H_ #define SOURCE_ROTATE_PRIV_H_
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
namespace libyuv { namespace libyuv {
void Rotate90(const uint8* src, int src_stride, // Rotate planes by 90, 180, 270
void
RotatePlane90(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height); int width, int height);
void Rotate180(const uint8* src, int src_stride,
void
RotatePlane180(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height); int width, int height);
void Rotate270(const uint8* src, int src_stride,
void
RotatePlane270(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height); int width, int height);
void Rotate90_deinterleave(const uint8* src, int src_stride, void
uint8* dst_a, int dst_stride_a, RotateUV90(const uint8* src, int src_stride,
uint8* dst_b, int dst_stride_b, uint8* dst_a, int dst_stride_a,
int width, int height); uint8* dst_b, int dst_stride_b,
void Rotate180_deinterleave(const uint8* src, int src_stride, int width, int height);
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, // Rotations for when U and V are interleaved.
int width, int height); // These functions take one input pointer and
void Rotate270_deinterleave(const uint8* src, int src_stride, // split the data into two buffers while
uint8* dst_a, int dst_stride_a, // rotating them.
uint8* dst_b, int dst_stride_b, void
int width, int height); RotateUV180(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
void Transpose(const uint8* src, int src_stride, uint8* dst_b, int dst_stride_b,
int width, int height);
void
RotateUV270(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
// The 90 and 270 functions are based on transposes.
// Doing a transpose with reversing the read/write
// order will result in a rotation by +- 90 degrees.
void
TransposePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride,
int width, int height); int width, int height);
void
TransposeUV(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
} // namespace libyuv } // namespace libyuv
#endif // LIBYUV_SOURCE_ROTATE_H_ #endif // SOURCE_ROTATE_PRIV_H_
...@@ -8,9 +8,11 @@ ...@@ -8,9 +8,11 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include "libyuv/rotate.h"
#include "../source/rotate_priv.h"
#include "unit_test.h" #include "unit_test.h"
#include "rotate.h"
#include <stdlib.h> #include <stdlib.h>
#include <time.h>
using namespace libyuv; using namespace libyuv;
...@@ -19,7 +21,7 @@ void print_array(uint8 *array, int w, int h) { ...@@ -19,7 +21,7 @@ void print_array(uint8 *array, int w, int h) {
for (i = 0; i < h; ++i) { for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) for (j = 0; j < w; ++j)
printf("%4d", array[i*w + j]); printf("%4d", (signed char)array[(i * w) + j]);
printf("\n"); printf("\n");
} }
...@@ -39,20 +41,17 @@ TEST_F(libyuvTest, Transpose) { ...@@ -39,20 +41,17 @@ TEST_F(libyuvTest, Transpose) {
ow = ih; ow = ih;
oh = iw; oh = iw;
input = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); input = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_1 = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_1 = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_2 = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); output_2 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i)
input[i] = i; input[i] = i;
output_1[i] = 0;
output_2[i] = 0;
}
Transpose(input, iw, output_1, ow, iw, ih); TransposePlane(input, iw, output_1, ow, iw, ih);
Transpose(output_1, ow, output_2, oh, ow, oh); TransposePlane(output_1, ow, output_2, oh, ow, oh);
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i) {
if (input[i] != output_2[i]) if (input[i] != output_2[i])
err++; err++;
} }
...@@ -76,7 +75,67 @@ TEST_F(libyuvTest, Transpose) { ...@@ -76,7 +75,67 @@ TEST_F(libyuvTest, Transpose) {
EXPECT_EQ(0, err); EXPECT_EQ(0, err);
} }
TEST_F(libyuvTest, Rotate90) { TEST_F(libyuvTest, TransposeUV) {
int iw, ih, ow, oh;
int err = 0;
for (iw = 16; iw < _rotate_max_w && !err; iw += 2)
for (ih = 8; ih < _rotate_max_h && !err; ++ih) {
int i;
uint8 *input;
uint8 *output_a1, *output_b1;
uint8 *output_a2, *output_b2;
ow = ih;
oh = iw >> 1;
input = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_a1 = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_b1 = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_a2 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_b2 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
for (i = 0; i < (iw * ih); i += 2) {
input[i] = i >> 1;
input[i + 1] = -(i >> 1);
}
TransposeUV(input, iw, output_a1, ow, output_b1, ow, iw >> 1, ih);
TransposePlane(output_a1, ow, output_a2, oh, ow, oh);
TransposePlane(output_b1, ow, output_b2, oh, ow, oh);
for (i = 0; i < (iw * ih); i += 2) {
if (input[i] != output_a2[i >> 1])
err++;
if (input[i + 1] != output_b2[i >> 1])
err++;
}
if (err) {
printf("input %dx%d \n", iw, ih);
print_array(input, iw, ih);
printf("transpose 1\n");
print_array(output_a1, ow, oh);
print_array(output_b1, ow, oh);
printf("transpose 2\n");
print_array(output_a2, oh, ow);
print_array(output_b2, oh, ow);
}
free(input);
free(output_a1);
free(output_b1);
free(output_a2);
free(output_b2);
}
EXPECT_EQ(0, err);
}
TEST_F(libyuvTest, RotatePlane90) {
int iw, ih, ow, oh; int iw, ih, ow, oh;
int err = 0; int err = 0;
...@@ -92,26 +151,21 @@ TEST_F(libyuvTest, Rotate90) { ...@@ -92,26 +151,21 @@ TEST_F(libyuvTest, Rotate90) {
ow = ih; ow = ih;
oh = iw; oh = iw;
input = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); input = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_0 = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); output_0 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_90 = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_90 = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_180 = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); output_180 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_270 = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_270 = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i)
input[i] = i; input[i] = i;
output_0[i] = 0;
output_90[i] = 0;
output_180[i] = 0;
output_270[i] = 0;
}
Rotate90(input, iw, output_90, ow, iw, ih); RotatePlane90(input, iw, output_90, ow, iw, ih);
Rotate90(output_90, ow, output_180, oh, ow, oh); RotatePlane90(output_90, ow, output_180, oh, ow, oh);
Rotate90(output_180, oh, output_270, ow, oh, ow); RotatePlane90(output_180, oh, output_270, ow, oh, ow);
Rotate90(output_270, ow, output_0, iw, ow, oh); RotatePlane90(output_270, ow, output_0, iw, ow, oh);
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i) {
if (input[i] != output_0[i]) if (input[i] != output_0[i])
err++; err++;
} }
...@@ -143,7 +197,7 @@ TEST_F(libyuvTest, Rotate90) { ...@@ -143,7 +197,7 @@ TEST_F(libyuvTest, Rotate90) {
EXPECT_EQ(0, err); EXPECT_EQ(0, err);
} }
TEST_F(libyuvTest, Rotate90Deinterleave) { TEST_F(libyuvTest, RotateUV90) {
int iw, ih, ow, oh; int iw, ih, ow, oh;
int err = 0; int err = 0;
...@@ -159,42 +213,30 @@ TEST_F(libyuvTest, Rotate90Deinterleave) { ...@@ -159,42 +213,30 @@ TEST_F(libyuvTest, Rotate90Deinterleave) {
uint8 *output_180_v; uint8 *output_180_v;
ow = ih; ow = ih;
oh = iw>>1; oh = iw >> 1;
input = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); input = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_0_u = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_0_u = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_0_v = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_0_v = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_90_u = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_90_u = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_90_v = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_90_v = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_180_u = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_180_u = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_180_v = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_180_v = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
for (i = 0; i < iw*ih; i +=2) { for (i = 0; i < (iw * ih); i += 2) {
input[i] = i>>1; input[i] = i >> 1;
input[i+1] = -(i>>1); input[i + 1] = -(i >> 1);
}
for (i = 0; i < ow*oh; ++i) {
output_0_u[i] = 0;
output_0_v[i] = 0;
output_90_u[i] = 0;
output_90_v[i] = 0;
output_180_u[i] = 0;
output_180_v[i] = 0;
} }
Rotate90_deinterleave(input, iw, RotateUV90(input, iw, output_90_u, ow, output_90_v, ow, iw >> 1, ih);
output_90_u, ow,
output_90_v, ow,
iw, ih);
Rotate90(output_90_u, ow, output_180_u, oh, ow, oh); RotatePlane90(output_90_u, ow, output_180_u, oh, ow, oh);
Rotate90(output_90_v, ow, output_180_v, oh, ow, oh); RotatePlane90(output_90_v, ow, output_180_v, oh, ow, oh);
Rotate180(output_180_u, ow, output_0_u, ow, ow, oh); RotatePlane180(output_180_u, ow, output_0_u, ow, ow, oh);
Rotate180(output_180_v, ow, output_0_v, ow, ow, oh); RotatePlane180(output_180_v, ow, output_0_v, ow, ow, oh);
for (i = 0; i < ow*oh; ++i) { for (i = 0; i < (ow * oh); ++i) {
if (output_0_u[i] != (uint8)i) if (output_0_u[i] != (uint8)i)
err++; err++;
if (output_0_v[i] != (uint8)(-i)) if (output_0_v[i] != (uint8)(-i))
...@@ -236,7 +278,7 @@ TEST_F(libyuvTest, Rotate90Deinterleave) { ...@@ -236,7 +278,7 @@ TEST_F(libyuvTest, Rotate90Deinterleave) {
EXPECT_EQ(0, err); EXPECT_EQ(0, err);
} }
TEST_F(libyuvTest, Rotate180Deinterleave) { TEST_F(libyuvTest, RotateUV180) {
int iw, ih, ow, oh; int iw, ih, ow, oh;
int err = 0; int err = 0;
...@@ -251,43 +293,31 @@ TEST_F(libyuvTest, Rotate180Deinterleave) { ...@@ -251,43 +293,31 @@ TEST_F(libyuvTest, Rotate180Deinterleave) {
uint8 *output_180_u; uint8 *output_180_u;
uint8 *output_180_v; uint8 *output_180_v;
ow = iw>>1; ow = iw >> 1;
oh = ih; oh = ih;
input = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); input = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_0_u = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_0_u = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_0_v = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_0_v = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_90_u = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_90_u = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_90_v = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_90_v = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_180_u = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_180_u = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_180_v = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_180_v = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
for (i = 0; i < iw*ih; i +=2) { for (i = 0; i < (iw * ih); i += 2) {
input[i] = i>>1; input[i] = i >> 1;
input[i+1] = -(i>>1); input[i + 1] = -(i >> 1);
} }
for (i = 0; i < ow*oh; ++i) { RotateUV180(input, iw, output_180_u, ow, output_180_v, ow, iw >> 1, ih);
output_0_u[i] = 0;
output_0_v[i] = 0;
output_90_u[i] = 0;
output_90_v[i] = 0;
output_180_u[i] = 0;
output_180_v[i] = 0;
}
Rotate180_deinterleave(input, iw, RotatePlane90(output_180_u, ow, output_90_u, oh, ow, oh);
output_180_u, ow, RotatePlane90(output_180_v, ow, output_90_v, oh, ow, oh);
output_180_v, ow,
iw, ih);
Rotate90(output_180_u, ow, output_90_u, oh, ow, oh); RotatePlane90(output_90_u, oh, output_0_u, ow, oh, ow);
Rotate90(output_180_v, ow, output_90_v, oh, ow, oh); RotatePlane90(output_90_v, oh, output_0_v, ow, oh, ow);
Rotate90(output_90_u, oh, output_0_u, ow, oh, ow); for (i = 0; i < (ow * oh); ++i) {
Rotate90(output_90_v, oh, output_0_v, ow, oh, ow);
for (i = 0; i < ow*oh; ++i) {
if (output_0_u[i] != (uint8)i) if (output_0_u[i] != (uint8)i)
err++; err++;
if (output_0_v[i] != (uint8)(-i)) if (output_0_v[i] != (uint8)(-i))
...@@ -329,7 +359,7 @@ TEST_F(libyuvTest, Rotate180Deinterleave) { ...@@ -329,7 +359,7 @@ TEST_F(libyuvTest, Rotate180Deinterleave) {
EXPECT_EQ(0, err); EXPECT_EQ(0, err);
} }
TEST_F(libyuvTest, Rotate270Deinterleave) { TEST_F(libyuvTest, RotateUV270) {
int iw, ih, ow, oh; int iw, ih, ow, oh;
int err = 0; int err = 0;
...@@ -345,42 +375,31 @@ TEST_F(libyuvTest, Rotate270Deinterleave) { ...@@ -345,42 +375,31 @@ TEST_F(libyuvTest, Rotate270Deinterleave) {
uint8 *output_180_v; uint8 *output_180_v;
ow = ih; ow = ih;
oh = iw>>1; oh = iw >> 1;
input = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); input = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_0_u = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_0_u = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_0_v = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_0_v = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_270_u = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_270_u = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_270_v = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_270_v = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_180_u = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_180_u = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_180_v = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_180_v = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
for (i = 0; i < iw*ih; i +=2) { for (i = 0; i < (iw * ih); i += 2) {
input[i] = i>>1; input[i] = i >> 1;
input[i+1] = -(i>>1); input[i + 1] = -(i >> 1);
}
for (i = 0; i < ow*oh; ++i) {
output_0_u[i] = 0;
output_0_v[i] = 0;
output_270_u[i] = 0;
output_270_v[i] = 0;
output_180_u[i] = 0;
output_180_v[i] = 0;
} }
Rotate270_deinterleave(input, iw, RotateUV270(input, iw, output_270_u, ow, output_270_v, ow,
output_270_u, ow, iw >> 1, ih);
output_270_v, ow,
iw, ih);
Rotate270(output_270_u, ow, output_180_u, oh, ow, oh); RotatePlane270(output_270_u, ow, output_180_u, oh, ow, oh);
Rotate270(output_270_v, ow, output_180_v, oh, ow, oh); RotatePlane270(output_270_v, ow, output_180_v, oh, ow, oh);
Rotate180(output_180_u, ow, output_0_u, ow, ow, oh); RotatePlane180(output_180_u, ow, output_0_u, ow, ow, oh);
Rotate180(output_180_v, ow, output_0_v, ow, ow, oh); RotatePlane180(output_180_v, ow, output_0_v, ow, ow, oh);
for (i = 0; i < ow*oh; ++i) { for (i = 0; i < (ow * oh); ++i) {
if (output_0_u[i] != (uint8)i) if (output_0_u[i] != (uint8)i)
err++; err++;
if (output_0_v[i] != (uint8)(-i)) if (output_0_v[i] != (uint8)(-i))
...@@ -422,7 +441,7 @@ TEST_F(libyuvTest, Rotate270Deinterleave) { ...@@ -422,7 +441,7 @@ TEST_F(libyuvTest, Rotate270Deinterleave) {
EXPECT_EQ(0, err); EXPECT_EQ(0, err);
} }
TEST_F(libyuvTest, Rotate180) { TEST_F(libyuvTest, RotatePlane180) {
int iw, ih, ow, oh; int iw, ih, ow, oh;
int err = 0; int err = 0;
...@@ -436,20 +455,17 @@ TEST_F(libyuvTest, Rotate180) { ...@@ -436,20 +455,17 @@ TEST_F(libyuvTest, Rotate180) {
ow = iw; ow = iw;
oh = ih; oh = ih;
input = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); input = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_0 = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); output_0 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_180 = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); output_180 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i)
input[i] = i; input[i] = i;
output_0[i] = 0;
output_180[i] = 0;
}
Rotate180(input, iw, output_180, ow, iw, ih); RotatePlane180(input, iw, output_180, ow, iw, ih);
Rotate180(output_180, ow, output_0, iw, ow, oh); RotatePlane180(output_180, ow, output_0, iw, ow, oh);
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i) {
if (input[i] != output_0[i]) if (input[i] != output_0[i])
err++; err++;
} }
...@@ -473,7 +489,7 @@ TEST_F(libyuvTest, Rotate180) { ...@@ -473,7 +489,7 @@ TEST_F(libyuvTest, Rotate180) {
EXPECT_EQ(0, err); EXPECT_EQ(0, err);
} }
TEST_F(libyuvTest, Rotate270) { TEST_F(libyuvTest, RotatePlane270) {
int iw, ih, ow, oh; int iw, ih, ow, oh;
int err = 0; int err = 0;
...@@ -489,26 +505,21 @@ TEST_F(libyuvTest, Rotate270) { ...@@ -489,26 +505,21 @@ TEST_F(libyuvTest, Rotate270) {
ow = ih; ow = ih;
oh = iw; oh = iw;
input = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); input = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_0 = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); output_0 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_90 = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_90 = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
output_180 = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); output_180 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_270 = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_270 = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i)
input[i] = i; input[i] = i;
output_0[i] = 0;
output_90[i] = 0;
output_180[i] = 0;
output_270[i] = 0;
}
Rotate270(input, iw, output_270, ow, iw, ih); RotatePlane270(input, iw, output_270, ow, iw, ih);
Rotate270(output_270, ow, output_180, oh, ow, oh); RotatePlane270(output_270, ow, output_180, oh, ow, oh);
Rotate270(output_180, oh, output_90, ow, oh, ow); RotatePlane270(output_180, oh, output_90, ow, oh, ow);
Rotate270(output_90, ow, output_0, iw, ow, oh); RotatePlane270(output_90, ow, output_0, iw, ow, oh);
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i) {
if (input[i] != output_0[i]) if (input[i] != output_0[i])
err++; err++;
} }
...@@ -540,7 +551,7 @@ TEST_F(libyuvTest, Rotate270) { ...@@ -540,7 +551,7 @@ TEST_F(libyuvTest, Rotate270) {
EXPECT_EQ(0, err); EXPECT_EQ(0, err);
} }
TEST_F(libyuvTest, Rotate90and270) { TEST_F(libyuvTest, RotatePlane90and270) {
int iw, ih, ow, oh; int iw, ih, ow, oh;
int err = 0; int err = 0;
...@@ -553,20 +564,17 @@ TEST_F(libyuvTest, Rotate90and270) { ...@@ -553,20 +564,17 @@ TEST_F(libyuvTest, Rotate90and270) {
ow = ih; ow = ih;
oh = iw; oh = iw;
input = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); input = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_0 = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); output_0 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_90 = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_90 = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i)
input[i] = i; input[i] = i;
output_0[i] = 0;
output_90[i] = 0;
}
Rotate90(input, iw, output_90, ow, iw, ih); RotatePlane90(input, iw, output_90, ow, iw, ih);
Rotate270(output_90, ow, output_0, iw, ow, oh); RotatePlane270(output_90, ow, output_0, iw, ow, oh);
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i) {
if (input[i] != output_0[i]) if (input[i] != output_0[i])
err++; err++;
} }
...@@ -590,8 +598,8 @@ TEST_F(libyuvTest, Rotate90and270) { ...@@ -590,8 +598,8 @@ TEST_F(libyuvTest, Rotate90and270) {
EXPECT_EQ(0, err); EXPECT_EQ(0, err);
} }
TEST_F(libyuvTest, Rotate90Pitch) { TEST_F(libyuvTest, RotatePlane90Pitch) {
int iw, ih, ow, oh; int iw, ih;
int err = 0; int err = 0;
for (iw = 16; iw < _rotate_max_w && !err; iw += 4) for (iw = 16; iw < _rotate_max_w && !err; iw += 4)
...@@ -600,31 +608,32 @@ TEST_F(libyuvTest, Rotate90Pitch) { ...@@ -600,31 +608,32 @@ TEST_F(libyuvTest, Rotate90Pitch) {
uint8 *input; uint8 *input;
uint8 *output_0; uint8 *output_0;
uint8 *output_90; uint8 *output_90;
ow = ih; int ow = ih;
oh = iw; int oh = iw;
input = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); input = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_0 = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); output_0 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_90 = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_90 = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i)
input[i] = i; input[i] = i;
output_0[i] = 0;
output_90[i] = 0;
}
Rotate90(input, iw,
output_90 + (ow>>1), ow, iw>>1, ih>>1);
Rotate90(input + (iw>>1), iw,
output_90 + (ow>>1) + ow*(oh>>1), ow, iw>>1, ih>>1);
Rotate90(input + iw*(ih>>1), iw,
output_90, ow, iw>>1, ih>>1);
Rotate90(input + (iw>>1) + iw*(ih>>1), iw,
output_90 + ow*(oh>>1), ow, iw>>1, ih>>1);
Rotate270(output_90, ih, output_0, iw, ow, oh); RotatePlane90(input, iw,
output_90 + (ow >> 1), ow,
for (i = 0; i < iw*ih; ++i) { iw >> 1, ih >> 1);
RotatePlane90(input + (iw >> 1), iw,
output_90 + (ow >> 1) + ow * (oh >> 1), ow,
iw >> 1, ih >> 1);
RotatePlane90(input + iw * (ih >> 1), iw,
output_90, ow,
iw >> 1, ih >> 1);
RotatePlane90(input + (iw >> 1) + iw * (ih >> 1), iw,
output_90 + ow * (oh >> 1), ow,
iw >> 1, ih >> 1);
RotatePlane270(output_90, ih, output_0, iw, ow, oh);
for (i = 0; i < (iw * ih); ++i) {
if (input[i] != output_0[i]) if (input[i] != output_0[i])
err++; err++;
} }
...@@ -648,7 +657,7 @@ TEST_F(libyuvTest, Rotate90Pitch) { ...@@ -648,7 +657,7 @@ TEST_F(libyuvTest, Rotate90Pitch) {
EXPECT_EQ(0, err); EXPECT_EQ(0, err);
} }
TEST_F(libyuvTest, Rotate270Pitch) { TEST_F(libyuvTest, RotatePlane270Pitch) {
int iw, ih, ow, oh; int iw, ih, ow, oh;
int err = 0; int err = 0;
...@@ -662,27 +671,29 @@ TEST_F(libyuvTest, Rotate270Pitch) { ...@@ -662,27 +671,29 @@ TEST_F(libyuvTest, Rotate270Pitch) {
ow = ih; ow = ih;
oh = iw; oh = iw;
input = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); input = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_0 = static_cast<uint8*>(malloc(sizeof(uint8)*iw*ih)); output_0 = static_cast<uint8*>(calloc(iw * ih, sizeof(uint8)));
output_270 = static_cast<uint8*>(malloc(sizeof(uint8)*ow*oh)); output_270 = static_cast<uint8*>(calloc(ow * oh, sizeof(uint8)));
for (i = 0; i < iw*ih; ++i) { for (i = 0; i < (iw * ih); ++i)
input[i] = i; input[i] = i;
output_270[i] = 0;
}
Rotate270(input, iw,
output_270 + ow*(oh>>1), ow, iw>>1, ih>>1);
Rotate270(input + (iw>>1), iw,
output_270, ow, iw>>1, ih>>1);
Rotate270(input + iw*(ih>>1), iw,
output_270 + (ow>>1) + ow*(oh>>1), ow, iw>>1, ih>>1);
Rotate270(input + (iw>>1) + iw*(ih>>1), iw,
output_270 + (ow>>1), ow, iw>>1, ih>>1);
Rotate90(output_270, ih, output_0, iw, ow, oh);
for (i = 0; i < iw*ih; ++i) { RotatePlane270(input, iw,
output_270 + ow * (oh >> 1), ow,
iw >> 1, ih >> 1);
RotatePlane270(input + (iw >> 1), iw,
output_270, ow,
iw >> 1, ih >> 1);
RotatePlane270(input + iw * (ih >> 1), iw,
output_270 + (ow >> 1) + ow * (oh >> 1), ow,
iw >> 1, ih >> 1);
RotatePlane270(input + (iw >> 1) + iw * (ih >> 1), iw,
output_270 + (ow >> 1), ow,
iw >> 1, ih >> 1);
RotatePlane90(output_270, ih, output_0, iw, ow, oh);
for (i = 0; i < (iw * ih); ++i) {
if (input[i] != output_0[i]) if (input[i] != output_0[i])
err++; err++;
} }
...@@ -705,3 +716,804 @@ TEST_F(libyuvTest, Rotate270Pitch) { ...@@ -705,3 +716,804 @@ TEST_F(libyuvTest, Rotate270Pitch) {
EXPECT_EQ(0, err); EXPECT_EQ(0, err);
} }
TEST_F(libyuvTest, I420Rotate90) {
int err = 0;
uint8 *orig_y, *orig_u, *orig_v;
uint8 *ro0_y, *ro0_u, *ro0_v;
uint8 *ro90_y, *ro90_u, *ro90_v;
uint8 *ro270_y, *ro270_u, *ro270_v;
int yw = 1024;
int yh = 768;
int b = 128;
int uvw = (yw + 1) >> 1;
int uvh = (yh + 1) >> 1;
int i, j;
int y_plane_size = (yw + (2 * b)) * (yh + (2 * b));
int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b));
srandom(time(NULL));
orig_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
orig_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
orig_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro0_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro0_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro0_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro90_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro90_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro90_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro270_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro270_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro270_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
// fill image buffers with random data
for (i = b; i < (yh + b); ++i) {
for (j = b; j < (yw + b); ++j) {
orig_y[i * (yw + (2 * b)) + j] = random() & 0xff;
}
}
for (i = b; i < (uvh + b); ++i) {
for (j = b; j < (uvw + b); ++j) {
orig_u[i * (uvw + (2 * b)) + j] = random() & 0xff;
orig_v[i * (uvw + (2 * b)) + j] = random() & 0xff;
}
}
int y_off_0 = b * (yw + (2 * b)) + b;
int uv_off_0 = b * (uvw + (2 * b)) + b;
int y_off_90 = b * (yh + (2 * b)) + b;
int uv_off_90 = b * (uvh + (2 * b)) + b;
int y_st_0 = yw + (2 * b);
int uv_st_0 = uvw + (2 * b);
int y_st_90 = yh + (2 * b);
int uv_st_90 = uvh + (2 * b);
I420Rotate(orig_y+y_off_0, y_st_0,
orig_u+uv_off_0, uv_st_0,
orig_v+uv_off_0, uv_st_0,
ro90_y+y_off_90, y_st_90,
ro90_u+uv_off_90, uv_st_90,
ro90_v+uv_off_90, uv_st_90,
yw, yh,
kRotateClockwise);
I420Rotate(ro90_y+y_off_90, y_st_90,
ro90_u+uv_off_90, uv_st_90,
ro90_v+uv_off_90, uv_st_90,
ro270_y+y_off_90, y_st_90,
ro270_u+uv_off_90, uv_st_90,
ro270_v+uv_off_90, uv_st_90,
yh, yw,
kRotate180);
I420Rotate(ro270_y+y_off_90, y_st_90,
ro270_u+uv_off_90, uv_st_90,
ro270_v+uv_off_90, uv_st_90,
ro0_y+y_off_0, y_st_0,
ro0_u+uv_off_0, uv_st_0,
ro0_v+uv_off_0, uv_st_0,
yh, yw,
kRotateClockwise);
for (i = 0; i < y_plane_size; ++i) {
if (orig_y[i] != ro0_y[i])
++err;
}
for (i = 0; i < uv_plane_size; ++i) {
if (orig_u[i] != ro0_u[i])
++err;
if (orig_v[i] != ro0_v[i])
++err;
}
free(orig_y);
free(orig_u);
free(orig_v);
free(ro0_y);
free(ro0_u);
free(ro0_v);
free(ro90_y);
free(ro90_u);
free(ro90_v);
free(ro270_y);
free(ro270_u);
free(ro270_v);
EXPECT_EQ(0, err);
}
TEST_F(libyuvTest, I420Rotate270) {
int err = 0;
uint8 *orig_y, *orig_u, *orig_v;
uint8 *ro0_y, *ro0_u, *ro0_v;
uint8 *ro90_y, *ro90_u, *ro90_v;
uint8 *ro270_y, *ro270_u, *ro270_v;
int yw = 1024;
int yh = 768;
int b = 128;
int uvw = (yw + 1) >> 1;
int uvh = (yh + 1) >> 1;
int i, j;
int y_plane_size = (yw + (2 * b)) * (yh + (2 * b));
int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b));
srandom(time(NULL));
orig_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
orig_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
orig_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro0_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro0_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro0_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro90_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro90_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro90_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro270_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro270_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro270_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
// fill image buffers with random data
for (i = b; i < (yh + b); ++i) {
for (j = b; j < (yw + b); ++j) {
orig_y[i * (yw + (2 * b)) + j] = random() & 0xff;
}
}
for (i = b; i < (uvh + b); ++i) {
for (j = b; j < (uvw + b); ++j) {
orig_u[i * (uvw + (2 * b)) + j] = random() & 0xff;
orig_v[i * (uvw + (2 * b)) + j] = random() & 0xff;
}
}
int y_off_0 = b * (yw + (2 * b)) + b;
int uv_off_0 = b * (uvw + (2 * b)) + b;
int y_off_90 = b * (yh + (2 * b)) + b;
int uv_off_90 = b * (uvh + (2 * b)) + b;
int y_st_0 = yw + (2 * b);
int uv_st_0 = uvw + (2 * b);
int y_st_90 = yh + (2 * b);
int uv_st_90 = uvh + (2 * b);
I420Rotate(orig_y+y_off_0, y_st_0,
orig_u+uv_off_0, uv_st_0,
orig_v+uv_off_0, uv_st_0,
ro270_y+y_off_90, y_st_90,
ro270_u+uv_off_90, uv_st_90,
ro270_v+uv_off_90, uv_st_90,
yw, yh,
kRotateCounterClockwise);
I420Rotate(ro270_y+y_off_90, y_st_90,
ro270_u+uv_off_90, uv_st_90,
ro270_v+uv_off_90, uv_st_90,
ro90_y+y_off_90, y_st_90,
ro90_u+uv_off_90, uv_st_90,
ro90_v+uv_off_90, uv_st_90,
yh, yw,
kRotate180);
I420Rotate(ro90_y+y_off_90, y_st_90,
ro90_u+uv_off_90, uv_st_90,
ro90_v+uv_off_90, uv_st_90,
ro0_y+y_off_0, y_st_0,
ro0_u+uv_off_0, uv_st_0,
ro0_v+uv_off_0, uv_st_0,
yh, yw,
kRotateCounterClockwise);
for (i = 0; i < y_plane_size; ++i) {
if (orig_y[i] != ro0_y[i])
++err;
}
for (i = 0; i < uv_plane_size; ++i) {
if (orig_u[i] != ro0_u[i])
++err;
if (orig_v[i] != ro0_v[i])
++err;
}
free(orig_y);
free(orig_u);
free(orig_v);
free(ro0_y);
free(ro0_u);
free(ro0_v);
free(ro90_y);
free(ro90_u);
free(ro90_v);
free(ro270_y);
free(ro270_u);
free(ro270_v);
EXPECT_EQ(0, err);
}
TEST_F(libyuvTest, NV12ToI420Rotate90) {
int err = 0;
uint8 *orig_y, *orig_uv;
uint8 *ro0_y, *ro0_u, *ro0_v;
uint8 *ro90_y, *ro90_u, *ro90_v;
int yw = 1024;
int yh = 768;
int b = 128;
int uvw = (yw + 1) >> 1;
int uvh = (yh + 1) >> 1;
int i, j;
int y_plane_size = (yw + (2 * b)) * (yh + (2 * b));
int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b));
int o_uv_plane_size = ((2 * uvw) + (2 * b)) * (uvh + (2 * b));
srandom(time(NULL));
orig_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
orig_uv = static_cast<uint8*>(calloc(o_uv_plane_size, sizeof(uint8)));
ro0_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro0_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro0_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro90_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro90_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro90_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
// fill image buffers with random data
for (i = b; i < (yh + b); ++i) {
for (j = b; j < (yw + b); ++j) {
orig_y[i * (yw + (2 * b)) + j] = random() & 0xff;
}
}
for (i = b; i < (uvh + b); ++i) {
for (j = b; j < ((2 * uvw) + b); j += 2) {
uint8 random_number = random() & 0x7f;
orig_uv[i * ((2 * uvw) + (2 * b)) + j] = random_number;
orig_uv[i * ((2 * uvw) + (2 * b)) + j + 1] = -random_number;
}
}
int y_off_0 = b * (yw + (2 * b)) + b;
int uv_off_0 = b * (uvw + (2 * b)) + b;
int y_off_90 = b * (yh + (2 * b)) + b;
int uv_off_90 = b * (uvh + (2 * b)) + b;
int y_st_0 = yw + (2 * b);
int uv_st_0 = uvw + (2 * b);
int y_st_90 = yh + (2 * b);
int uv_st_90 = uvh + (2 * b);
NV12ToI420Rotate(orig_y+y_off_0, y_st_0,
orig_uv+y_off_0, y_st_0,
ro90_y+y_off_90, y_st_90,
ro90_u+uv_off_90, uv_st_90,
ro90_v+uv_off_90, uv_st_90,
yw, yh,
kRotateClockwise);
I420Rotate(ro90_y+y_off_90, y_st_90,
ro90_u+uv_off_90, uv_st_90,
ro90_v+uv_off_90, uv_st_90,
ro0_y+y_off_0, y_st_0,
ro0_u+uv_off_0, uv_st_0,
ro0_v+uv_off_0, uv_st_0,
yh, yw,
kRotateCounterClockwise);
for (i = 0; i < y_plane_size; ++i) {
if (orig_y[i] != ro0_y[i])
++err;
}
int zero_cnt = 0;
for (i = 0; i < uv_plane_size; ++i) {
if ((signed char)ro0_u[i] != -(signed char)ro0_v[i])
++err;
if (ro0_u[i] != 0)
++zero_cnt;
}
if (!zero_cnt)
++err;
free(orig_y);
free(orig_uv);
free(ro0_y);
free(ro0_u);
free(ro0_v);
free(ro90_y);
free(ro90_u);
free(ro90_v);
EXPECT_EQ(0, err);
}
TEST_F(libyuvTest, NV12ToI420Rotate270) {
int err = 0;
uint8 *orig_y, *orig_uv;
uint8 *ro0_y, *ro0_u, *ro0_v;
uint8 *ro270_y, *ro270_u, *ro270_v;
int yw = 1024;
int yh = 768;
int b = 128;
int uvw = (yw + 1) >> 1;
int uvh = (yh + 1) >> 1;
int i, j;
int y_plane_size = (yw + (2 * b)) * (yh + (2 * b));
int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b));
int o_uv_plane_size = ((2 * uvw) + (2 * b)) * (uvh + (2 * b));
srandom(time(NULL));
orig_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
orig_uv = static_cast<uint8*>(calloc(o_uv_plane_size, sizeof(uint8)));
ro0_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro0_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro0_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro270_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro270_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro270_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
// fill image buffers with random data
for (i = b; i < (yh + b); ++i) {
for (j = b; j < (yw + b); ++j) {
orig_y[i * (yw + (2 * b)) + j] = random() & 0xff;
}
}
for (i = b; i < (uvh + b); ++i) {
for (j = b; j < ((2 * uvw) + b); j += 2) {
uint8 random_number = random() & 0x7f;
orig_uv[i * ((2 * uvw) + (2 * b)) + j] = random_number;
orig_uv[i * ((2 * uvw) + (2 * b)) + j + 1] = -random_number;
}
}
int y_off_0 = b * (yw + (2 * b)) + b;
int uv_off_0 = b * (uvw + (2 * b)) + b;
int y_off_270 = b * (yh + (2 * b)) + b;
int uv_off_270 = b * (uvh + (2 * b)) + b;
int y_st_0 = yw + (2 * b);
int uv_st_0 = uvw + (2 * b);
int y_st_270 = yh + (2 * b);
int uv_st_270 = uvh + (2 * b);
NV12ToI420Rotate(orig_y+y_off_0, y_st_0,
orig_uv+y_off_0, y_st_0,
ro270_y+y_off_270, y_st_270,
ro270_u+uv_off_270, uv_st_270,
ro270_v+uv_off_270, uv_st_270,
yw, yh,
kRotateCounterClockwise);
I420Rotate(ro270_y+y_off_270, y_st_270,
ro270_u+uv_off_270, uv_st_270,
ro270_v+uv_off_270, uv_st_270,
ro0_y+y_off_0, y_st_0,
ro0_u+uv_off_0, uv_st_0,
ro0_v+uv_off_0, uv_st_0,
yh, yw,
kRotateClockwise);
for (i = 0; i < y_plane_size; ++i) {
if (orig_y[i] != ro0_y[i])
++err;
}
int zero_cnt = 0;
for (i = 0; i < uv_plane_size; ++i) {
if ((signed char)ro0_u[i] != -(signed char)ro0_v[i])
++err;
if (ro0_u[i] != 0)
++zero_cnt;
}
if (!zero_cnt)
++err;
free(orig_y);
free(orig_uv);
free(ro0_y);
free(ro0_u);
free(ro0_v);
free(ro270_y);
free(ro270_u);
free(ro270_v);
EXPECT_EQ(0, err);
}
TEST_F(libyuvTest, NV12ToI420Rotate180) {
int err = 0;
uint8 *orig_y, *orig_uv;
uint8 *ro0_y, *ro0_u, *ro0_v;
uint8 *ro180_y, *ro180_u, *ro180_v;
int yw = 1024;
int yh = 768;
int b = 128;
int uvw = (yw + 1) >> 1;
int uvh = (yh + 1) >> 1;
int i, j;
int y_plane_size = (yw + (2 * b)) * (yh + (2 * b));
int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b));
int o_uv_plane_size = ((2 * uvw) + (2 * b)) * (uvh + (2 * b));
srandom(time(NULL));
orig_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
orig_uv = static_cast<uint8*>(calloc(o_uv_plane_size, sizeof(uint8)));
ro0_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro0_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro0_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro180_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
ro180_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
ro180_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
// fill image buffers with random data
for (i = b; i < (yh + b); ++i) {
for (j = b; j < (yw + b); ++j) {
orig_y[i * (yw + (2 * b)) + j] = random() & 0xff;
}
}
for (i = b; i < (uvh + b); ++i) {
for (j = b; j < ((2 * uvw) + b); j += 2) {
uint8 random_number = random() & 0x7f;
orig_uv[i * ((2 * uvw) + (2 * b)) + j] = random_number;
orig_uv[i * ((2 * uvw) + (2 * b)) + j + 1] = -random_number;
}
}
int y_off = b * (yw + (2 * b)) + b;
int uv_off = b * (uvw + (2 * b)) + b;
int y_st = yw + (2 * b);
int uv_st = uvw + (2 * b);
NV12ToI420Rotate(orig_y+y_off, y_st,
orig_uv+y_off, y_st,
ro180_y+y_off, y_st,
ro180_u+uv_off, uv_st,
ro180_v+uv_off, uv_st,
yw, yh,
kRotate180);
I420Rotate(ro180_y+y_off, y_st,
ro180_u+uv_off, uv_st,
ro180_v+uv_off, uv_st,
ro0_y+y_off, y_st,
ro0_u+uv_off, uv_st,
ro0_v+uv_off, uv_st,
yw, yh,
kRotate180);
for (i = 0; i < y_plane_size; ++i) {
if (orig_y[i] != ro0_y[i])
++err;
}
int zero_cnt = 0;
for (i = 0; i < uv_plane_size; ++i) {
if ((signed char)ro0_u[i] != -(signed char)ro0_v[i])
++err;
if (ro0_u[i] != 0)
++zero_cnt;
}
if (!zero_cnt)
++err;
free(orig_y);
free(orig_uv);
free(ro0_y);
free(ro0_u);
free(ro0_v);
free(ro180_y);
free(ro180_u);
free(ro180_v);
EXPECT_EQ(0, err);
}
TEST_F(libyuvTest, NV12ToI420RotateNegHeight90) {
int y_err = 0, uv_err = 0;
uint8 *orig_y, *orig_uv;
uint8 *roa_y, *roa_u, *roa_v;
uint8 *rob_y, *rob_u, *rob_v;
uint8 *roc_y, *roc_u, *roc_v;
int yw = 1024;
int yh = 768;
int b = 128;
int uvw = (yw + 1) >> 1;
int uvh = (yh + 1) >> 1;
int i, j;
int y_plane_size = (yw + (2 * b)) * (yh + (2 * b));
int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b));
int o_uv_plane_size = ((2 * uvw) + (2 * b)) * (uvh + (2 * b));
srandom(time(NULL));
orig_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
orig_uv = static_cast<uint8*>(calloc(o_uv_plane_size, sizeof(uint8)));
roa_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
roa_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
roa_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
rob_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
rob_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
rob_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
roc_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
roc_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
roc_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
// fill image buffers with random data
for (i = b; i < (yh + b); ++i) {
for (j = b; j < (yw + b); ++j) {
orig_y[i * (yw + (2 * b)) + j] = random() & 0xff;
}
}
for (i = b; i < (uvh + b); ++i) {
for (j = b; j < ((2 * uvw) + b); j += 2) {
uint8 random_number = random() & 0x7f;
orig_uv[i * ((2 * uvw) + (2 * b)) + j] = random_number;
orig_uv[i * ((2 * uvw) + (2 * b)) + j + 1] = -random_number;
}
}
int y_off_0 = b * (yw + (2 * b)) + b;
int uv_off_0 = b * (uvw + (2 * b)) + b;
int y_off_90 = b * (yh + (2 * b)) + b;
int uv_off_90 = b * (uvh + (2 * b)) + b;
int y_st_0 = yw + (2 * b);
int uv_st_0 = uvw + (2 * b);
int y_st_90 = yh + (2 * b);
int uv_st_90 = uvh + (2 * b);
NV12ToI420Rotate(orig_y+y_off_0, y_st_0,
orig_uv+y_off_0, y_st_0,
roa_y+y_off_90, y_st_90,
roa_u+uv_off_90, uv_st_90,
roa_v+uv_off_90, uv_st_90,
yw, -yh,
kRotateClockwise);
I420Rotate(roa_y+y_off_90, y_st_90,
roa_u+uv_off_90, uv_st_90,
roa_v+uv_off_90, uv_st_90,
rob_y+y_off_0, y_st_0,
rob_u+uv_off_0, uv_st_0,
rob_v+uv_off_0, uv_st_0,
yh, -yw,
kRotateCounterClockwise);
I420Rotate(rob_y+y_off_0, y_st_0,
rob_u+uv_off_0, uv_st_0,
rob_v+uv_off_0, uv_st_0,
roc_y+y_off_0, y_st_0,
roc_u+uv_off_0, uv_st_0,
roc_v+uv_off_0, uv_st_0,
yw, yh,
kRotate180);
for (i = 0; i < y_plane_size; ++i) {
if (orig_y[i] != roc_y[i])
++y_err;
}
if (y_err) {
printf("input %dx%d \n", yw, yh);
print_array(orig_y, y_st_0, yh + (2 * b));
printf("rotate a\n");
print_array(roa_y, y_st_90, y_st_0);
printf("rotate b\n");
print_array(rob_y, y_st_90, y_st_0);
printf("rotate c\n");
print_array(roc_y, y_st_0, y_st_90);
}
int zero_cnt = 0;
for (i = 0; i < uv_plane_size; ++i) {
if ((signed char)roc_u[i] != -(signed char)roc_v[i])
++uv_err;
if (rob_u[i] != 0)
++zero_cnt;
}
if (!zero_cnt)
++uv_err;
if (uv_err) {
printf("input %dx%d \n", (2 * uvw), uvh);
print_array(orig_uv, y_st_0, uvh + (2 * b));
printf("rotate a\n");
print_array(roa_u, uv_st_90, uv_st_0);
print_array(roa_v, uv_st_90, uv_st_0);
printf("rotate b\n");
print_array(rob_u, uv_st_90, uv_st_0);
print_array(rob_v, uv_st_90, uv_st_0);
printf("rotate c\n");
print_array(roc_u, uv_st_0, uv_st_90);
print_array(roc_v, uv_st_0, uv_st_90);
}
free(orig_y);
free(orig_uv);
free(roa_y);
free(roa_u);
free(roa_v);
free(rob_y);
free(rob_u);
free(rob_v);
free(roc_y);
free(roc_u);
free(roc_v);
EXPECT_EQ(0, y_err + uv_err);
}
TEST_F(libyuvTest, NV12ToI420RotateNegHeight180) {
int y_err = 0, uv_err = 0;
uint8 *orig_y, *orig_uv;
uint8 *roa_y, *roa_u, *roa_v;
uint8 *rob_y, *rob_u, *rob_v;
int yw = 1024;
int yh = 768;
int b = 128;
int uvw = (yw + 1) >> 1;
int uvh = (yh + 1) >> 1;
int i, j;
int y_plane_size = (yw + (2 * b)) * (yh + (2 * b));
int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b));
int o_uv_plane_size = ((2 * uvw) + (2 * b)) * (uvh + (2 * b));
srandom(time(NULL));
orig_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
orig_uv = static_cast<uint8*>(calloc(o_uv_plane_size, sizeof(uint8)));
roa_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
roa_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
roa_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
rob_y = static_cast<uint8*>(calloc(y_plane_size, sizeof(uint8)));
rob_u = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
rob_v = static_cast<uint8*>(calloc(uv_plane_size, sizeof(uint8)));
// fill image buffers with random data
for (i = b; i < (yh + b); ++i) {
for (j = b; j < (yw + b); ++j) {
orig_y[i * (yw + (2 * b)) + j] = random() & 0xff;
}
}
for (i = b; i < (uvh + b); ++i) {
for (j = b; j < ((2 * uvw) + b); j += 2) {
uint8 random_number = random() & 0x7f;
orig_uv[i * ((2 * uvw) + (2 * b)) + j] = random_number;
orig_uv[i * ((2 * uvw) + (2 * b)) + j + 1] = -random_number;
}
}
int y_off = b * (yw + (2 * b)) + b;
int uv_off = b * (uvw + (2 * b)) + b;
int y_st = yw + (2 * b);
int uv_st = uvw + (2 * b);
NV12ToI420Rotate(orig_y+y_off, y_st,
orig_uv+y_off, y_st,
roa_y+y_off, y_st,
roa_u+uv_off, uv_st,
roa_v+uv_off, uv_st,
yw, -yh,
kRotate180);
I420Rotate(roa_y+y_off, y_st,
roa_u+uv_off, uv_st,
roa_v+uv_off, uv_st,
rob_y+y_off, y_st,
rob_u+uv_off, uv_st,
rob_v+uv_off, uv_st,
yw, -yh,
kRotate180);
for (i = 0; i < y_plane_size; ++i) {
if (orig_y[i] != rob_y[i])
++y_err;
}
if (y_err) {
printf("input %dx%d \n", yw, yh);
print_array(orig_y, y_st, yh + (2 * b));
printf("rotate a\n");
print_array(roa_y, y_st, yh + (2 * b));
printf("rotate b\n");
print_array(rob_y, y_st, yh + (2 * b));
}
int zero_cnt = 0;
for (i = 0; i < uv_plane_size; ++i) {
if ((signed char)rob_u[i] != -(signed char)rob_v[i])
++uv_err;
if (rob_u[i] != 0)
++zero_cnt;
}
if (!zero_cnt)
++uv_err;
if (uv_err) {
printf("input %dx%d \n", (2 * uvw), uvh);
print_array(orig_uv, y_st, uvh + (2 * b));
printf("rotate a\n");
print_array(roa_u, uv_st, uvh + (2 * b));
print_array(roa_v, uv_st, uvh + (2 * b));
printf("rotate b\n");
print_array(rob_u, uv_st, uvh + (2 * b));
print_array(rob_v, uv_st, uvh + (2 * b));
}
free(orig_y);
free(orig_uv);
free(roa_y);
free(roa_u);
free(roa_v);
free(rob_y);
free(rob_u);
free(rob_v);
EXPECT_EQ(0, y_err + uv_err);
}
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
#ifndef UINIT_TEST_H_ #ifndef UINIT_TEST_H_
#define UINIT_TEST_H_ #define UINIT_TEST_H_
#include "basic_types.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
class libyuvTest : public ::testing::Test { class libyuvTest : public ::testing::Test {
...@@ -20,8 +19,8 @@ class libyuvTest : public ::testing::Test { ...@@ -20,8 +19,8 @@ class libyuvTest : public ::testing::Test {
virtual void SetUp(); virtual void SetUp();
virtual void TearDown(); virtual void TearDown();
const uint32 _rotate_max_w; const int _rotate_max_w;
const uint32 _rotate_max_h; const int _rotate_max_h;
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment