Commit 8f0b3277 authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBToUV AVX2 functions hooked up.

BUG=none
TESTED=RGB565ToI420
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/46829004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1359 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 9afabe29
......@@ -544,10 +544,12 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
......@@ -839,34 +841,30 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif // HAS_ARGBTOUVROW_SSSE3
#if defined(HAS_ARGBTOYROW_AVX2)
#endif
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
{
#if !defined(HAS_RGB24TOYROW_NEON)
{
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15;
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
......@@ -899,8 +897,8 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
}
#if !defined(HAS_RGB24TOYROW_NEON)
free_aligned_buffer_64(row);
#endif
}
#endif
return 0;
}
......@@ -960,66 +958,64 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif // HAS_ARGBTOUVROW_SSSE3
#if defined(HAS_ARGBTOYROW_AVX2)
#endif
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if !defined(HAS_RAWTOYROW_NEON)
{
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15;
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_RAWTOYROW_NEON)
#if defined(HAS_RAWTOYROW_NEON)
RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width);
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
#else
RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
#endif
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
#if defined(HAS_RAWTOYROW_NEON)
#if defined(HAS_RAWTOYROW_NEON)
RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width);
#else
#else
RAWToARGBRow(src_raw, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
#endif
#endif
}
#if !defined(HAS_RAWTOYROW_NEON)
#if !defined(HAS_RAWTOYROW_NEON)
free_aligned_buffer_64(row);
#endif
}
#endif
return 0;
}
......@@ -1083,35 +1079,30 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
{
#if !defined(HAS_RGB565TOYROW_NEON)
{
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15;
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
......@@ -1144,8 +1135,8 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
}
#if !defined(HAS_RGB565TOYROW_NEON)
free_aligned_buffer_64(row);
#endif
}
#endif
return 0;
}
......@@ -1202,35 +1193,30 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
{
#if !defined(HAS_ARGB1555TOYROW_NEON)
{
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15;
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
......@@ -1264,8 +1250,8 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
}
#if !defined(HAS_ARGB1555TOYROW_NEON)
free_aligned_buffer_64(row);
#endif
}
#endif
return 0;
}
......@@ -1322,35 +1308,31 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
{
#if !defined(HAS_ARGB4444TOYROW_NEON)
{
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15;
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
......@@ -1385,8 +1367,8 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
}
#if !defined(HAS_ARGB4444TOYROW_NEON)
free_aligned_buffer_64(row);
#endif
}
#endif
return 0;
}
......
......@@ -289,10 +289,12 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
......@@ -339,8 +341,8 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
uint8* row_v = row_u + ((halfwidth + 15) & ~15);
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
......@@ -396,10 +398,12 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
......@@ -446,8 +450,8 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
uint8* row_v = row_u + ((halfwidth + 15) & ~15);
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
......@@ -1112,7 +1116,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
......
......@@ -2040,7 +2040,7 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
#endif
{
// 3 rows with edges before/after.
const int kRowSize = (width + kEdge + 15) & ~15;
const int kRowSize = (width + kEdge + 31) & ~31;
align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
uint8* row_sobelx = rows;
uint8* row_sobely = rows + kRowSize;
......
......@@ -953,7 +953,6 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
#endif // HAS_ARGBTOUVROW_AVX2
#ifdef HAS_ARGBTOUVJROW_SSSE3
// TODO(fbarchard): Share code with ARGBToUVRow_SSSE3.
void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
......
......@@ -1161,7 +1161,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
}
#endif // HAS_ARGBTOYROW_AVX2
#ifdef HAS_ARGBTOYROW_AVX2
#ifdef HAS_ARGBTOYJROW_AVX2
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
__declspec(naked) __declspec(align(32))
void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
......
......@@ -1164,7 +1164,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
const uint8* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
const int kRowSize = (dst_width + 15) & ~15;
const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row;
......@@ -1295,7 +1295,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
const uint16* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
const int kRowSize = (dst_width + 15) & ~15;
const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 4);
uint16* rowptr = (uint16*)row;
......
......@@ -87,7 +87,7 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
int x, int dx, int y, int dy) {
int j;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
......@@ -353,7 +353,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
const uint8* src = src_argb + yi * src_stride;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15;
const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row;
......@@ -538,7 +538,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
const uint8* src_row_v = src_v + uv_yi * src_stride_v;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15;
const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
// Allocate 1 row of ARGB for source conversion.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment