Commit 8f0b3277 authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBToUV AVX2 functions hooked up.

BUG=none
TESTED=RGB565ToI420
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/46829004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1359 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 9afabe29
...@@ -544,10 +544,12 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, ...@@ -544,10 +544,12 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
...@@ -839,34 +841,30 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, ...@@ -839,34 +841,30 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
#endif // HAS_ARGBTOUVROW_SSSE3 #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
#endif #endif
{
#if !defined(HAS_RGB24TOYROW_NEON) #if !defined(HAS_RGB24TOYROW_NEON)
{
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15; const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
#endif #endif
...@@ -899,8 +897,8 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, ...@@ -899,8 +897,8 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
} }
#if !defined(HAS_RGB24TOYROW_NEON) #if !defined(HAS_RGB24TOYROW_NEON)
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
#endif
} }
#endif
return 0; return 0;
} }
...@@ -960,66 +958,64 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, ...@@ -960,66 +958,64 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
#endif // HAS_ARGBTOUVROW_SSSE3 #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
#endif #endif
#if !defined(HAS_RAWTOYROW_NEON)
{ {
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15; const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_RAWTOYROW_NEON) #if defined(HAS_RAWTOYROW_NEON)
RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width); RAWToYRow(src_raw, dst_y, width);
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else #else
RAWToARGBRow(src_raw, row, width); RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
#endif #endif
src_raw += src_stride_raw * 2; src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2; dst_y += dst_stride_y * 2;
dst_u += dst_stride_u; dst_u += dst_stride_u;
dst_v += dst_stride_v; dst_v += dst_stride_v;
} }
if (height & 1) { if (height & 1) {
#if defined(HAS_RAWTOYROW_NEON) #if defined(HAS_RAWTOYROW_NEON)
RAWToUVRow(src_raw, 0, dst_u, dst_v, width); RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width); RAWToYRow(src_raw, dst_y, width);
#else #else
RAWToARGBRow(src_raw, row, width); RAWToARGBRow(src_raw, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
#endif #endif
} }
#if !defined(HAS_RAWTOYROW_NEON) #if !defined(HAS_RAWTOYROW_NEON)
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
#endif
} }
#endif
return 0; return 0;
} }
...@@ -1083,35 +1079,30 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, ...@@ -1083,35 +1079,30 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
#endif #endif
{
#if !defined(HAS_RGB565TOYROW_NEON) #if !defined(HAS_RGB565TOYROW_NEON)
{
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15; const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
#endif #endif
...@@ -1144,8 +1135,8 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, ...@@ -1144,8 +1135,8 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
} }
#if !defined(HAS_RGB565TOYROW_NEON) #if !defined(HAS_RGB565TOYROW_NEON)
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
#endif
} }
#endif
return 0; return 0;
} }
...@@ -1202,35 +1193,30 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, ...@@ -1202,35 +1193,30 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
#endif #endif
{
#if !defined(HAS_ARGB1555TOYROW_NEON) #if !defined(HAS_ARGB1555TOYROW_NEON)
{
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15; const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
...@@ -1263,9 +1249,9 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, ...@@ -1263,9 +1249,9 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
#endif #endif
} }
#if !defined(HAS_ARGB1555TOYROW_NEON) #if !defined(HAS_ARGB1555TOYROW_NEON)
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
#endif
} }
#endif
return 0; return 0;
} }
...@@ -1322,35 +1308,31 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, ...@@ -1322,35 +1308,31 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
#endif #endif
{
#if !defined(HAS_ARGB4444TOYROW_NEON) #if !defined(HAS_ARGB4444TOYROW_NEON)
{
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15; const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
#endif #endif
...@@ -1385,8 +1367,8 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, ...@@ -1385,8 +1367,8 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
} }
#if !defined(HAS_ARGB4444TOYROW_NEON) #if !defined(HAS_ARGB4444TOYROW_NEON)
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
#endif
} }
#endif
return 0; return 0;
} }
......
...@@ -289,10 +289,12 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, ...@@ -289,10 +289,12 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
...@@ -339,8 +341,8 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, ...@@ -339,8 +341,8 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
#endif #endif
{ {
// Allocate a rows of uv. // Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2); align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8* row_v = row_u + ((halfwidth + 15) & ~15); uint8* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
...@@ -396,10 +398,12 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, ...@@ -396,10 +398,12 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
...@@ -446,8 +450,8 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, ...@@ -446,8 +450,8 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
#endif #endif
{ {
// Allocate a rows of uv. // Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2); align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8* row_v = row_u + ((halfwidth + 15) & ~15); uint8* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
...@@ -1088,7 +1092,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb, ...@@ -1088,7 +1092,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb, void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) = void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
ARGBToYJRow_C; ARGBToYJRow_C;
if (!src_argb || if (!src_argb ||
...@@ -1112,7 +1116,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb, ...@@ -1112,7 +1116,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2) #if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2; ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
......
...@@ -2040,7 +2040,7 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, ...@@ -2040,7 +2040,7 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
#endif #endif
{ {
// 3 rows with edges before/after. // 3 rows with edges before/after.
const int kRowSize = (width + kEdge + 15) & ~15; const int kRowSize = (width + kEdge + 31) & ~31;
align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
uint8* row_sobelx = rows; uint8* row_sobelx = rows;
uint8* row_sobely = rows + kRowSize; uint8* row_sobely = rows + kRowSize;
......
...@@ -953,7 +953,6 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, ...@@ -953,7 +953,6 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
#endif // HAS_ARGBTOUVROW_AVX2 #endif // HAS_ARGBTOUVROW_AVX2
#ifdef HAS_ARGBTOUVJROW_SSSE3 #ifdef HAS_ARGBTOUVJROW_SSSE3
// TODO(fbarchard): Share code with ARGBToUVRow_SSSE3.
void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) { uint8* dst_u, uint8* dst_v, int width) {
asm volatile ( asm volatile (
......
...@@ -1161,7 +1161,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { ...@@ -1161,7 +1161,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
} }
#endif // HAS_ARGBTOYROW_AVX2 #endif // HAS_ARGBTOYROW_AVX2
#ifdef HAS_ARGBTOYROW_AVX2 #ifdef HAS_ARGBTOYJROW_AVX2
// Convert 32 ARGB pixels (128 bytes) to 32 Y values. // Convert 32 ARGB pixels (128 bytes) to 32 Y values.
__declspec(naked) __declspec(align(32)) __declspec(naked) __declspec(align(32))
void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
......
...@@ -1164,7 +1164,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height, ...@@ -1164,7 +1164,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
const uint8* src = src_ptr + yi * src_stride; const uint8* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers. // Allocate 2 row buffers.
const int kRowSize = (dst_width + 15) & ~15; const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row; uint8* rowptr = row;
...@@ -1295,7 +1295,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height, ...@@ -1295,7 +1295,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
const uint16* src = src_ptr + yi * src_stride; const uint16* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers. // Allocate 2 row buffers.
const int kRowSize = (dst_width + 15) & ~15; const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 4); align_buffer_64(row, kRowSize * 4);
uint16* rowptr = (uint16*)row; uint16* rowptr = (uint16*)row;
......
...@@ -87,7 +87,7 @@ static void ScaleARGBDown4Box(int src_width, int src_height, ...@@ -87,7 +87,7 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
int x, int dx, int y, int dy) { int x, int dx, int y, int dy) {
int j; int j;
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 2 * 4 + 15) & ~15; const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
int row_stride = src_stride * (dy >> 16); int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
...@@ -353,7 +353,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, ...@@ -353,7 +353,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
const uint8* src = src_argb + yi * src_stride; const uint8* src = src_argb + yi * src_stride;
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15; const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row; uint8* rowptr = row;
...@@ -538,7 +538,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, ...@@ -538,7 +538,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
const uint8* src_row_v = src_v + uv_yi * src_stride_v; const uint8* src_row_v = src_v + uv_yi * src_stride_v;
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15; const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
// Allocate 1 row of ARGB for source conversion. // Allocate 1 row of ARGB for source conversion.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment