Commit f3fb7b69 authored by fbarchard@google.com's avatar fbarchard@google.com

Lint cleanup for Neon and Visual C inline.

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/464002

git-svn-id: http://libyuv.googlecode.com/svn/trunk@231 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 5b22506b
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 230
Version: 231
License: BSD
License File: LICENSE
......
......@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION 230
#define INCLUDE_LIBYUV_VERSION 231
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -78,7 +78,7 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
movdqa xmm0, [eax]
pavgb xmm0, [eax + edx]
sub ecx, 16
movdqa [eax + edi], xmm0 // NOLINT
movdqa [eax + edi], xmm0
lea eax, [eax + 16]
jg convertloop
pop edi
......@@ -475,7 +475,7 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
pand xmm2, xmm5 // even bytes are Y
pand xmm3, xmm5
packuswb xmm2, xmm3
movdqa [edx], xmm2 // NOLINT
movdqa [edx], xmm2
lea edx, [edx + 16]
psrlw xmm0, 8 // YUYV -> UVUV
psrlw xmm1, 8
......@@ -483,12 +483,12 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
movdqa xmm1, xmm0
pand xmm0, xmm5 // U
packuswb xmm0, xmm0
movq qword ptr [esi], xmm0 // NOLINT
movq qword ptr [esi], xmm0
lea esi, [esi + 8]
psrlw xmm1, 8 // V
packuswb xmm1, xmm1
sub ecx, 16
movq qword ptr [edi], xmm1 // NOLINT
movq qword ptr [edi], xmm1
lea edi, [edi + 8]
jg convertloop
......
......@@ -283,7 +283,7 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y,
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
#define HAS_I42XTOYUY2ROW_SSE2
static void I42xToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
......@@ -355,7 +355,9 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y,
}
#endif
static void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
static void I42xToYUY2Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_y[0];
......@@ -375,7 +377,9 @@ static void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8*
}
}
static void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
static void I42xToUYVYRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_u[0];
......@@ -406,7 +410,7 @@ static void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8*
#endif
#ifdef LIBYUV_LITTLE_ENDIAN
#define WRITEWORD(p, v) (*((uint32*) (p))) = v
#define WRITEWORD(p, v) *reinterpret_cast<uint32*>(p) = v
#else
static inline void WRITEWORD(uint8* p, uint32 v) {
......@@ -453,8 +457,8 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToYUY2Row = I42xToYUY2Row_C;
const uint8* src_v, uint8* dst_frame, int width) =
I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
......@@ -489,8 +493,8 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToYUY2Row = I42xToYUY2Row_C;
const uint8* src_v, uint8* dst_frame, int width) =
I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
......@@ -531,8 +535,8 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
const uint8* src_v, uint8* dst_frame, int width) =
I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
......@@ -567,8 +571,8 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
const uint8* src_v, uint8* dst_frame, int width) =
I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
......@@ -598,7 +602,7 @@ int I420ToV210(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (width * 16 / 6 > kMaxStride || // row buffer of V210 is required
if (width * 16 / 6 > kMaxStride || // Row buffer of V210 is required.
src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
......@@ -614,8 +618,8 @@ int I420ToV210(const uint8* src_y, int src_stride_y,
UYVYToV210Row = UYVYToV210Row_C;
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
const uint8* src_v, uint8* dst_frame, int width) =
I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
......@@ -658,14 +662,14 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToARGBRow = I420ToARGBRow_NEON;
}
} else
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_Any_SSSE3;
......@@ -673,11 +677,9 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
}
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
......@@ -706,14 +708,14 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
int width) = I420ToBGRARow_C;
#if defined(HAS_I420TOBGRAROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToBGRARow = I420ToBGRARow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToBGRARow = I420ToBGRARow_NEON;
}
} else
}
#elif defined(HAS_I420TOBGRAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToBGRARow = I420ToBGRARow_Any_SSSE3;
......@@ -721,11 +723,9 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
I420ToBGRARow = I420ToBGRARow_SSSE3;
}
} else
#endif
{
I420ToBGRARow = I420ToBGRARow_C;
}
#endif
for (int y = 0; y < height; ++y) {
I420ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
dst_bgra += dst_stride_bgra;
......@@ -754,14 +754,14 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
int width) = I420ToABGRRow_C;
#if defined(HAS_I420TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToABGRRow = I420ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToABGRRow = I420ToABGRRow_NEON;
}
} else
}
#elif defined(HAS_I420TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToABGRRow = I420ToABGRRow_Any_SSSE3;
......@@ -769,11 +769,9 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
I420ToABGRRow = I420ToABGRRow_SSSE3;
}
} else
#endif
{
I420ToABGRRow = I420ToABGRRow_C;
}
#endif
for (int y = 0; y < height; ++y) {
I420ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
dst_abgr += dst_stride_abgr;
......@@ -802,22 +800,20 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRGB24Row_C;
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
......@@ -825,11 +821,8 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
}
} else
#endif
{
ARGBToRGB24Row = ARGBToRGB24Row_C;
}
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
......@@ -860,22 +853,20 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix);
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRAWRow_C;
#if defined(HAS_ARGBTORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
......@@ -883,11 +874,8 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRAWRow = ARGBToRAWRow_SSSE3;
}
} else
#endif
{
ARGBToRAWRow = ARGBToRAWRow_C;
}
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
......@@ -918,33 +906,28 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix);
void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix) =
ARGBToRGB565Row_C;
#if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
}
} else
#endif
{
ARGBToRGB565Row = ARGBToRGB565Row_C;
}
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
......@@ -975,33 +958,28 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToARGB1555Row_C;
#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
}
} else
#endif
{
ARGBToARGB1555Row = ARGBToARGB1555Row_C;
}
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
......@@ -1032,33 +1010,28 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToARGB4444Row_C;
#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
}
} else
#endif
{
ARGBToARGB4444Row = ARGBToARGB4444Row_C;
}
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
......@@ -1080,7 +1053,6 @@ int ConvertFromI420(const uint8* y, int y_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format) {
if (y == NULL || u == NULL || v == NULL || dst_sample == NULL) {
return -1;
}
......
......@@ -48,7 +48,7 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb,
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
#define HAS_ARGBTOBAYERROW_SSSE3
static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
......@@ -141,18 +141,15 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBToBayerRow)(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix);
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
} else
#endif
{
ARGBToBayerRow = ARGBToBayerRow_C;
}
#endif
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
const int red_index = 2;
......@@ -170,7 +167,7 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
return 0;
}
#define AVG(a,b) (((a) + (b)) >> 1)
#define AVG(a, b) (((a) + (b)) >> 1)
static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
......@@ -369,9 +366,10 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
#if defined(HAS_ARGBTOYROW_SSSE3)
......@@ -379,19 +377,13 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
} else
#endif
{
ARGBToYRow = ARGBToYRow_C;
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
} else
#endif
{
ARGBToUVRow = ARGBToUVRow_C;
}
#endif
switch (src_fourcc_bayer) {
case FOURCC_BGGR:
......@@ -456,30 +448,24 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToBayerRow)(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix);
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
} else
#endif
{
ARGBToBayerRow = ARGBToBayerRow_C;
}
#endif
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
const int red_index = 2;
......
......@@ -29,7 +29,7 @@ extern "C" {
".private_extern _" #name " \n" \
".align 4,0x90 \n" \
"_" #name ": \n"
#elif (defined(__MINGW32__) || defined(__CYGWIN__)) && defined(__i386__)
#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
#define DECLARE_FUNCTION(name) \
".text \n" \
".align 4,0x90 \n" \
......@@ -42,16 +42,6 @@ extern "C" {
#endif
#endif
typedef void (*mirror_uv_func)(const uint8*, uint8*, uint8*, int);
typedef void (*rotate_uv_wx8_func)(const uint8*, int,
uint8*, int,
uint8*, int, int);
typedef void (*rotate_uv_wxh_func)(const uint8*, int,
uint8*, int,
uint8*, int, int, int);
typedef void (*rotate_wx8_func)(const uint8*, int, uint8*, int, int);
typedef void (*rotate_wxh_func)(const uint8*, int, uint8*, int, int, int);
#ifdef __ARM_NEON__
#define HAS_MIRRORROW_NEON
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
......@@ -291,7 +281,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
ret
}
}
#elif (defined(__i386__) || defined(__x86_64__)) && !defined(YUV_DISABLE_ASM)
#elif defined(__i386__) || defined(__x86_64__) && !defined(YUV_DISABLE_ASM)
#define HAS_TRANSPOSE_WX8_SSSE3
static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) {
......@@ -501,7 +491,7 @@ extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
"pop %ebx \n"
"ret \n"
);
#elif defined (__x86_64__)
#elif defined(__x86_64__)
// 64 bit version has enough registers to do 16x8 to 8x16 at a time.
#define HAS_TRANSPOSE_WX8_FAST_SSSE3
static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
......@@ -781,45 +771,37 @@ static void TransposeWxH_C(const uint8* src, int src_stride,
void TransposePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
int i = height;
rotate_wx8_func TransposeWx8;
rotate_wxh_func TransposeWxH;
void (*TransposeWx8)(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) = TransposeWx8_C;
#if defined(HAS_TRANSPOSE_WX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
TransposeWx8 = TransposeWx8_NEON;
TransposeWxH = TransposeWxH_C;
} else
}
#endif
#if defined(HAS_TRANSPOSE_WX8_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
TransposeWx8 = TransposeWx8_SSSE3;
}
#endif
#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
TransposeWx8 = TransposeWx8_FAST_SSSE3;
TransposeWxH = TransposeWxH_C;
} else
#endif
#if defined(HAS_TRANSPOSE_WX8_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
TransposeWx8 = TransposeWx8_SSSE3;
TransposeWxH = TransposeWxH_C;
} else
#endif
{
TransposeWx8 = TransposeWx8_C;
TransposeWxH = TransposeWxH_C;
}
#endif
// work across the source in 8x8 tiles
// Work across the source in 8x8 tiles
int i = height;
while (i >= 8) {
TransposeWx8(src, src_stride, dst, dst_stride, width);
src += 8 * src_stride; // go down 8 rows
dst += 8; // move over 8 columns
src += 8 * src_stride; // Go down 8 rows.
dst += 8; // Move over 8 columns.
i -= 8;
}
TransposeWxH(src, src_stride, dst, dst_stride, width, i);
TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
}
void RotatePlane90(const uint8* src, int src_stride,
......@@ -830,7 +812,6 @@ void RotatePlane90(const uint8* src, int src_stride,
// of the buffer and flip the sign of the source stride.
src += src_stride * (height - 1);
src_stride = -src_stride;
TransposePlane(src, src_stride, dst, dst_stride, width, height);
}
......@@ -842,26 +823,17 @@ void RotatePlane270(const uint8* src, int src_stride,
// of the buffer and flip the sign of the destination stride.
dst += dst_stride * (width - 1);
dst_stride = -dst_stride;
TransposePlane(src, src_stride, dst, dst_stride, width, height);
}
void RotatePlane180(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
void (*MirrorRow)(const uint8* src, uint8* dst, int width);
void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
#if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MirrorRow = MirrorRow_NEON;
} else
#endif
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
MirrorRow = MirrorRow_SSSE3;
} else
}
#endif
#if defined(HAS_MIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
......@@ -869,11 +841,16 @@ void RotatePlane180(const uint8* src, int src_stride,
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
MirrorRow = MirrorRow_SSE2;
} else
}
#endif
{
MirrorRow = MirrorRow_C;
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
MirrorRow = MirrorRow_SSSE3;
}
#endif
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_COPYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) {
......@@ -915,8 +892,7 @@ static void TransposeUVWx8_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
int i;
for (i = 0; i < width; ++i) {
for (int i = 0; i < width; ++i) {
dst_a[0] = src[0 * src_stride + 0];
dst_b[0] = src[0 * src_stride + 1];
dst_a[1] = src[1 * src_stride + 0];
......@@ -943,9 +919,8 @@ static void TransposeUVWxH_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
int i, j;
for (i = 0; i < width * 2; i += 2)
for (j = 0; j < height; ++j) {
for (int i = 0; i < width * 2; i += 2)
for (int j = 0; j < height; ++j) {
dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
}
......@@ -955,47 +930,39 @@ void TransposeUV(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
int i = height;
rotate_uv_wx8_func TransposeWx8;
rotate_uv_wxh_func TransposeWxH;
void (*TransposeUVWx8)(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) = TransposeUVWx8_C;
#if defined(HAS_TRANSPOSE_UVWX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
TransposeWx8 = TransposeUVWx8_NEON;
TransposeWxH = TransposeUVWxH_C;
} else
#endif
#if defined(HAS_TRANSPOSE_UVWX8_SSE2)
TransposeUVWx8 = TransposeUVWx8_NEON;
}
#elif defined(HAS_TRANSPOSE_UVWX8_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
TransposeWx8 = TransposeUVWx8_SSE2;
TransposeWxH = TransposeUVWxH_C;
} else
#endif
{
TransposeWx8 = TransposeUVWx8_C;
TransposeWxH = TransposeUVWxH_C;
TransposeUVWx8 = TransposeUVWx8_SSE2;
}
#endif
// work through the source in 8x8 tiles
// Work through the source in 8x8 tiles.
int i = height;
while (i >= 8) {
TransposeWx8(src, src_stride,
TransposeUVWx8(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width);
src += 8 * src_stride; // go down 8 rows
dst_a += 8; // move over 8 columns
dst_b += 8; // move over 8 columns
src += 8 * src_stride; // Go down 8 rows.
dst_a += 8; // Move over 8 columns.
dst_b += 8; // Move over 8 columns.
i -= 8;
}
TransposeWxH(src, src_stride,
TransposeUVWxH_C(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width, i);
}
void RotateUV90(const uint8* src, int src_stride,
......@@ -1031,29 +998,25 @@ void RotateUV180(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
mirror_uv_func MirrorRow;
void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
MirrorRowUV_C;
#if defined(HAS_MIRRORROW_UV_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MirrorRow = MirrorRowUV_NEON;
} else
#endif
#if defined(HAS_MIRRORROW_UV_SSSE3)
MirrorRowUV = MirrorRowUV_NEON;
}
#elif defined(HAS_MIRRORROW_UV_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
MirrorRow = MirrorRowUV_SSSE3;
} else
#endif
{
MirrorRow = MirrorRowUV_C;
MirrorRowUV = MirrorRowUV_SSSE3;
}
#endif
dst_a += dst_stride_a * (height - 1);
dst_b += dst_stride_b * (height - 1);
for (int i = 0; i < height; ++i) {
MirrorRow(src, dst_a, dst_b, width);
MirrorRowUV(src, dst_a, dst_b, width);
src += src_stride;
dst_a -= dst_stride_a;
dst_b -= dst_stride_b;
......
......@@ -471,7 +471,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
dst_argb[3] = 255u;
} else {
*reinterpret_cast<uint32*>(dst_argb) =
*reinterpret_cast<uint32*>(src_argb);
*reinterpret_cast<const uint32*>(src_argb);
}
}
a = src_argb[4 + 3];
......@@ -489,7 +489,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
dst_argb[4 + 3] = 255u;
} else {
*reinterpret_cast<uint32*>(dst_argb + 4) =
*reinterpret_cast<uint32*>(src_argb + 4);
*reinterpret_cast<const uint32*>(src_argb + 4);
}
}
src_argb += 8;
......@@ -512,7 +512,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
dst_argb[3] = 255u;
} else {
*reinterpret_cast<uint32*>(dst_argb) =
*reinterpret_cast<uint32*>(src_argb);
*reinterpret_cast<const uint32*>(src_argb);
}
}
}
......
......@@ -105,7 +105,7 @@ CONST uvec8 kShuffleMaskARGBToRGB24 = {
// Shuffle table for converting ARGB to RAW.
CONST uvec8 kShuffleMaskARGBToRAW = {
2u, 1u,0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
};
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
......@@ -1728,6 +1728,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
);
}
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
uint8* dst_y, int pix) {
asm volatile (
......
......@@ -96,7 +96,7 @@ static const uvec8 kShuffleMaskARGBToRGB24 = {
// Shuffle table for converting ARGB to RAW.
static const uvec8 kShuffleMaskARGBToRAW = {
2u, 1u,0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
};
__declspec(naked)
......@@ -1252,7 +1252,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm psubw xmm1, kUVBiasG \
__asm psubw xmm2, kUVBiasR \
/* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] \
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
__asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm4 \
__asm psubsw xmm3, kYSub16 \
......
......@@ -269,14 +269,14 @@ static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride,
}
#define HAS_SCALEROWDOWN38_NEON
const uint8 shuf38[16] __attribute__ ((aligned(16))) =
const uvec8 shuf38 =
{ 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
const uint8 shuf38_2[16] __attribute__ ((aligned(16))) =
const uvec8 shuf38_2 =
{ 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 };
const unsigned short mult38_div6[8] __attribute__ ((aligned(16))) =
const vec16 mult38_div6 =
{ 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) =
const vec16 mult38_div9 =
{ 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
......@@ -566,11 +566,11 @@ static void ScaleFilterRows_NEON(uint8* dst_ptr,
*/
// Constants for SSE2 code
#elif (defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)) && \
#elif defined(_M_IX86) || defined(__i386__) || defined(__x86_64__) && \
!defined(YUV_DISABLE_ASM)
#if defined(_MSC_VER)
#define TALIGN16(t, var) __declspec(align(16)) t _ ## var
#elif (defined(__APPLE__) || defined(__MINGW32__) || defined(__CYGWIN__)) && \
#elif defined(__APPLE__) || defined(__MINGW32__) || defined(__CYGWIN__) && \
defined(__i386__)
#define TALIGN16(t, var) t var __attribute__((aligned(16)))
#else
......@@ -583,7 +583,7 @@ static void ScaleFilterRows_NEON(uint8* dst_ptr,
".private_extern _" #name " \n" \
".align 4,0x90 \n" \
"_" #name ": \n"
#elif (defined(__MINGW32__) || defined(__CYGWIN__)) && defined(__i386__)
#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
#define DECLARE_FUNCTION(name) \
".text \n" \
".align 4,0x90 \n" \
......@@ -1547,7 +1547,7 @@ static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
// GCC versions of row functions are verbatim conversions from Visual C.
// Generated using gcc disassembly on Visual C object file:
......@@ -2910,7 +2910,8 @@ static void ScaleFilterCols34_C(uint8* dst_ptr, const uint8* src_ptr,
#endif
// (1-f)a + fb can be replaced with a + f(b-a)
#define BLENDER(a, b, f) ((int)(a) + ((f) * ((int)(b) - (int)(a)) >> 16))
#define BLENDER(a, b, f) (static_cast<int>(a) + \
((f) * (static_cast<int>(b) - static_cast<int>(a)) >> 16))
static void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
......@@ -3067,24 +3068,22 @@ static void ScalePlaneDown2(int src_width, int src_height,
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
uint8* dst_ptr, int dst_width) =
filtering ? ScaleRowDown2Int_C : ScaleRowDown2_C;
#if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering ? ScaleRowDown2Int_NEON : ScaleRowDown2_NEON;
} else
#endif
#if defined(HAS_SCALEROWDOWN2_SSE2)
}
#elif defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleRowDown2 = filtering ? ScaleRowDown2Int_SSE2 : ScaleRowDown2_SSE2;
} else
#endif
{
ScaleRowDown2 = filtering ? ScaleRowDown2Int_C : ScaleRowDown2_C;
}
#endif
// TODO(fbarchard): Loop through source height to allow odd height.
for (int y = 0; y < dst_height; ++y) {
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
......@@ -3107,23 +3106,21 @@ static void ScalePlaneDown4(int src_width, int src_height,
assert(IS_ALIGNED(src_width, 4));
assert(IS_ALIGNED(src_height, 4));
void (*ScaleRowDown4)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
uint8* dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Int_C : ScaleRowDown4_C;
#if defined(HAS_SCALEROWDOWN4_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(dst_width, 4)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Int_NEON : ScaleRowDown4_NEON;
} else
#endif
#if defined(HAS_SCALEROWDOWN4_SSE2)
}
#elif defined(HAS_SCALEROWDOWN4_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Int_SSE2 : ScaleRowDown4_SSE2;
} else
#endif
{
ScaleRowDown4 = filtering ? ScaleRowDown4Int_C : ScaleRowDown4_C;
}
#endif
for (int y = 0; y < dst_height; ++y) {
ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += (src_stride << 2);
......@@ -3146,18 +3143,17 @@ static void ScalePlaneDown8(int src_width, int src_height,
assert(IS_ALIGNED(src_width, 8));
assert(IS_ALIGNED(src_height, 8));
void (*ScaleRowDown8)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
uint8* dst_ptr, int dst_width) =
filtering && (dst_width <= kMaxOutputWidth) ?
ScaleRowDown8Int_C : ScaleRowDown8_C;
#if defined(HAS_SCALEROWDOWN8_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown8 = filtering ? ScaleRowDown8Int_SSE2 : ScaleRowDown8_SSE2;
} else
#endif
{
ScaleRowDown8 = filtering && (dst_width <= kMaxOutputWidth) ?
ScaleRowDown8Int_C : ScaleRowDown8_C;
}
#endif
for (int y = 0; y < dst_height; ++y) {
ScaleRowDown8(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += (src_stride << 3);
......@@ -3181,6 +3177,13 @@ static void ScalePlaneDown34(int src_width, int src_height,
uint8* dst_ptr, int dst_width);
void (*ScaleRowDown34_1)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_C;
ScaleRowDown34_1 = ScaleRowDown34_C;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_C;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_C;
}
#if defined(HAS_SCALEROWDOWN34_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
if (!filtering) {
......@@ -3190,36 +3193,27 @@ static void ScalePlaneDown34(int src_width, int src_height,
ScaleRowDown34_0 = ScaleRowDown34_0_Int_NEON;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_NEON;
}
} else
#endif
#if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSSE3;
}
} else
#endif
#if defined(HAS_SCALEROWDOWN34_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && filtering) {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSE2;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSE2;
} else
}
#endif
{
#if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_C;
ScaleRowDown34_1 = ScaleRowDown34_C;
ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_C;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_C;
ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSSE3;
}
}
#endif
for (int y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width);
......@@ -3272,6 +3266,13 @@ static void ScalePlaneDown38(int src_width, int src_height,
uint8* dst_ptr, int dst_width);
void (*ScaleRowDown38_2)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_C;
ScaleRowDown38_2 = ScaleRowDown38_C;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Int_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Int_C;
}
#if defined(HAS_SCALEROWDOWN38_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
if (!filtering) {
......@@ -3281,9 +3282,8 @@ static void ScalePlaneDown38(int src_width, int src_height,
ScaleRowDown38_3 = ScaleRowDown38_3_Int_NEON;
ScaleRowDown38_2 = ScaleRowDown38_2_Int_NEON;
}
} else
#endif
#if defined(HAS_SCALEROWDOWN38_SSSE3)
}
#elif defined(HAS_SCALEROWDOWN38_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) {
......@@ -3293,17 +3293,9 @@ static void ScalePlaneDown38(int src_width, int src_height,
ScaleRowDown38_3 = ScaleRowDown38_3_Int_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Int_SSSE3;
}
} else
#endif
{
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_C;
ScaleRowDown38_2 = ScaleRowDown38_C;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Int_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Int_C;
}
}
#endif
for (int y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3;
......@@ -3426,23 +3418,21 @@ static void ScalePlaneBox(int src_width, int src_height,
} else {
ALIGN16(uint16 row[kMaxInputWidth]);
void (*ScaleAddRows)(const uint8* src_ptr, int src_stride,
uint16* dst_ptr, int src_width, int src_height);
uint16* dst_ptr, int src_width, int src_height)=
ScaleAddRows_C;
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr);
#if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
ScaleAddRows = ScaleAddRows_SSE2;
} else
#endif
{
ScaleAddRows = ScaleAddRows_C;
}
if (dx & 0xffff) {
ScaleAddCols = ScaleAddCols2_C;
} else {
ScaleAddCols = ScaleAddCols1_C;
}
#if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
ScaleAddRows = ScaleAddRows_SSE2;
}
#endif
for (int j = 0; j < dst_height; ++j) {
int iy = y >> 16;
......@@ -3518,27 +3508,25 @@ void ScalePlaneBilinear(int src_width, int src_height,
ALIGN16(uint8 row[kMaxInputWidth + 1]);
void (*ScaleFilterRows)(uint8* dst_ptr, const uint8* src_ptr,
int src_stride,
int dst_width, int source_y_fraction);
int dst_width, int source_y_fraction) =
ScaleFilterRows_C;
#if defined(HAS_SCALEFILTERROWS_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleFilterRows = ScaleFilterRows_NEON;
} else
#endif
#if defined(HAS_SCALEFILTERROWS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
ScaleFilterRows = ScaleFilterRows_SSSE3;
} else
}
#endif
#if defined(HAS_SCALEFILTERROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
ScaleFilterRows = ScaleFilterRows_SSE2;
} else
}
#endif
{
ScaleFilterRows = ScaleFilterRows_C;
#if defined(HAS_SCALEFILTERROWS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
ScaleFilterRows = ScaleFilterRows_SSSE3;
}
#endif
int dx = (src_width << 16) / dst_width;
int dy = (src_height << 16) / dst_height;
......@@ -3645,7 +3633,7 @@ void ScalePlane(const uint8* src, int src_stride,
// environment variable overrides for testing.
char *filter_override = getenv("LIBYUV_FILTER");
if (filter_override) {
filtering = (FilterMode)atoi(filter_override);
filtering = (FilterMode)atoi(filter_override); // NOLINT
}
#endif
// Use specialized scales to improve performance for common resolutions.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment