Commit c7161d1c authored by fbarchard@google.com's avatar fbarchard@google.com

Remove code alignment declspec from Visual C versions for vs2014 compatibility.

BUG=422
TESTED=local vs2013 build still passes.

Review URL: https://webrtc-codereview.appspot.com/45959004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1365 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 1eb51bcf
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1364
Version: 1365
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1364
#define LIBYUV_VERSION 1365
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -19,7 +19,7 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
defined(_MSC_VER) && !defined(__clang__)
__declspec(naked) __declspec(align(16))
__declspec(naked)
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
......@@ -60,7 +60,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
#if _MSC_VER >= 1700
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable: 4752)
__declspec(naked) __declspec(align(16))
__declspec(naked)
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
......@@ -134,7 +134,7 @@ static uvec32 kHashMul3 = {
#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
_asm _emit 0x40 _asm _emit reg
__declspec(naked) __declspec(align(16))
__declspec(naked)
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
......@@ -185,7 +185,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
__declspec(naked) __declspec(align(16))
__declspec(naked)
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
......
......@@ -23,7 +23,7 @@ extern "C" {
#ifdef ENABLE_SCASB
// Multiple of 1.
__declspec(naked) __declspec(align(16))
__declspec(naked)
const uint8* ScanRow_ERMS(const uint8* src, uint32 val, int count) {
__asm {
mov edx, edi
......
......@@ -73,7 +73,7 @@ void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
#if !defined(LIBYUV_DISABLE_X86) && \
defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__)
#define HAS_TRANSPOSE_WX8_SSSE3
__declspec(naked) __declspec(align(16))
__declspec(naked)
static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) {
__asm {
......@@ -165,7 +165,7 @@ static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
}
#define HAS_TRANSPOSE_UVWX8_SSE2
__declspec(naked) __declspec(align(16))
__declspec(naked)
static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
......
......@@ -147,8 +147,6 @@ static YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
// 64 bit
#if defined(_M_X64)
__declspec(align(16))
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -198,10 +196,8 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
width -= 8;
}
}
// 32 bit
#else // defined(_M_X64)
#ifdef HAS_ARGBTOYROW_SSSE3
// Constants for ARGB.
......@@ -324,7 +320,7 @@ static const uvec8 kShuffleMaskARGBToRAW_0 = {
};
// Duplicates gray value 3 times and fills in alpha opaque.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
__asm {
mov eax, [esp + 4] // src_y
......@@ -353,7 +349,7 @@ void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
#ifdef HAS_J400TOARGBROW_AVX2
// Duplicates gray value 3 times and fills in alpha opaque.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int pix) {
__asm {
mov eax, [esp + 4] // src_y
......@@ -383,7 +379,7 @@ void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int pix) {
}
#endif // HAS_J400TOARGBROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
__asm {
mov eax, [esp + 4] // src_rgb24
......@@ -421,7 +417,7 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
int pix) {
__asm {
......@@ -467,7 +463,7 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
// v * (256 + 8)
// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
// 20 instructions.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
int pix) {
__asm {
......@@ -523,7 +519,7 @@ void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
// v * 256 + v * 8
// v * (256 + 8)
// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
__declspec(naked) __declspec(align(16))
__declspec(naked)
void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb,
int pix) {
__asm {
......@@ -574,7 +570,7 @@ void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb,
#endif // HAS_RGB565TOARGBROW_AVX2
#ifdef HAS_ARGB1555TOARGBROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
int pix) {
__asm {
......@@ -624,7 +620,7 @@ void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
#endif // HAS_ARGB1555TOARGBROW_AVX2
#ifdef HAS_ARGB4444TOARGBROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
int pix) {
__asm {
......@@ -660,7 +656,7 @@ void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
#endif // HAS_ARGB4444TOARGBROW_AVX2
// 24 instructions
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
int pix) {
__asm {
......@@ -713,7 +709,7 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
}
// 18 instructions.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
int pix) {
__asm {
......@@ -751,7 +747,7 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm {
mov eax, [esp + 4] // src_argb
......@@ -789,7 +785,7 @@ void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm {
mov eax, [esp + 4] // src_argb
......@@ -828,7 +824,7 @@ void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
}
// 4 pixels
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm {
mov eax, [esp + 4] // src_argb
......@@ -866,7 +862,7 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
}
// 8 pixels
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int pix) {
__asm {
......@@ -912,7 +908,7 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
}
#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int pix) {
__asm {
......@@ -955,7 +951,7 @@ void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
#endif // HAS_ARGBTORGB565DITHERROW_AVX2
// TODO(fbarchard): Improve sign extension/packing.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm {
mov eax, [esp + 4] // src_argb
......@@ -996,7 +992,7 @@ void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm {
mov eax, [esp + 4] // src_argb
......@@ -1026,7 +1022,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
}
#ifdef HAS_ARGBTORGB565ROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm {
mov eax, [esp + 4] // src_argb
......@@ -1063,7 +1059,7 @@ void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
#endif // HAS_ARGBTORGB565ROW_AVX2
#ifdef HAS_ARGBTOARGB1555ROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm {
mov eax, [esp + 4] // src_argb
......@@ -1103,7 +1099,7 @@ void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
#endif // HAS_ARGBTOARGB1555ROW_AVX2
#ifdef HAS_ARGBTOARGB4444ROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm {
mov eax, [esp + 4] // src_argb
......@@ -1134,7 +1130,7 @@ void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
#endif // HAS_ARGBTOARGB4444ROW_AVX2
// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
__asm {
mov eax, [esp + 4] /* src_argb */
......@@ -1169,7 +1165,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
__asm {
mov eax, [esp + 4] /* src_argb */
......@@ -1288,7 +1284,7 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
}
#endif // HAS_ARGBTOYJROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
__asm {
mov eax, [esp + 4] /* src_argb */
......@@ -1321,7 +1317,7 @@ void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
__asm {
mov eax, [esp + 4] /* src_argb */
......@@ -1354,7 +1350,7 @@ void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
__asm {
mov eax, [esp + 4] /* src_argb */
......@@ -1387,7 +1383,7 @@ void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
......@@ -1457,7 +1453,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
......@@ -1594,7 +1590,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
}
#endif // HAS_ARGBTOUVROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
......@@ -1651,7 +1647,7 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
......@@ -1709,7 +1705,7 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
......@@ -1779,7 +1775,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
......@@ -1849,7 +1845,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
......@@ -2005,7 +2001,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
#ifdef HAS_I422TOARGBROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2041,7 +2037,7 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
#ifdef HAS_J422TOARGBROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void J422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2077,7 +2073,7 @@ void J422ToARGBRow_AVX2(const uint8* y_buf,
#ifdef HAS_I444TOARGBROW_AVX2
// 16 pixels
// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I444ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2113,7 +2109,7 @@ void I444ToARGBRow_AVX2(const uint8* y_buf,
#ifdef HAS_I411TOARGBROW_AVX2
// 16 pixels
// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I411ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2149,7 +2145,7 @@ void I411ToARGBRow_AVX2(const uint8* y_buf,
#ifdef HAS_NV12TOARGBROW_AVX2
// 16 pixels.
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void NV12ToARGBRow_AVX2(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
......@@ -2180,7 +2176,7 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf,
#ifdef HAS_NV21TOARGBROW_AVX2
// 16 pixels.
// 8 VU values upsampled to 16 VU, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void NV21ToARGBRow_AVX2(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
......@@ -2212,7 +2208,7 @@ void NV21ToARGBRow_AVX2(const uint8* y_buf,
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToBGRARow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2258,7 +2254,7 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToRGBARow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2304,7 +2300,7 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToABGRRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2542,7 +2538,7 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
// 8 pixels.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2575,7 +2571,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToRGB24Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2609,7 +2605,7 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToRAWRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2643,7 +2639,7 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
// 8 pixels
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToRGB565Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2682,7 +2678,7 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2716,7 +2712,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
// 8 pixels.
// JPeg color space version of I422ToARGB
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void J422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2750,7 +2746,7 @@ void J422ToARGBRow_SSSE3(const uint8* y_buf,
// 8 pixels.
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
// Similar to I420 but duplicate UV once more.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2785,7 +2781,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
......@@ -2813,7 +2809,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
// 8 pixels.
// 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void NV21ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
......@@ -2839,7 +2835,7 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2869,7 +2865,7 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2900,7 +2896,7 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToRGBARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -2934,7 +2930,7 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
#ifdef HAS_I400TOARGBROW_SSE2
// 8 pixels of Y converted to 8 pixels of ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I400ToARGBRow_SSE2(const uint8* y_buf,
uint8* rgb_buf,
int width) {
......@@ -2982,7 +2978,7 @@ void I400ToARGBRow_SSE2(const uint8* y_buf,
#ifdef HAS_I400TOARGBROW_AVX2
// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
// note: vpunpcklbw mutates and vpackuswb unmutates.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I400ToARGBRow_AVX2(const uint8* y_buf,
uint8* rgb_buf,
int width) {
......@@ -3037,7 +3033,7 @@ static const uvec8 kShuffleMirror = {
};
// TODO(fbarchard): Replace lea with -16 offset.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
......@@ -3058,7 +3054,7 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
#endif // HAS_MIRRORROW_SSSE3
#ifdef HAS_MIRRORROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
......@@ -3081,7 +3077,7 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
#endif // HAS_MIRRORROW_AVX2
#ifdef HAS_MIRRORROW_SSE2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
......@@ -3112,7 +3108,7 @@ static const uvec8 kShuffleMirrorUV = {
14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
};
__declspec(naked) __declspec(align(16))
__declspec(naked)
void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
int width) {
__asm {
......@@ -3142,7 +3138,7 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
#endif // HAS_MIRRORROW_UV_SSSE3
#ifdef HAS_ARGBMIRRORROW_SSE2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
......@@ -3169,7 +3165,7 @@ static const ulvec32 kARGBShuffleMirror_AVX2 = {
7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
};
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
......@@ -3190,7 +3186,7 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
#endif // HAS_ARGBMIRRORROW_AVX2
#ifdef HAS_SPLITUVROW_SSE2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
__asm {
push edi
......@@ -3228,7 +3224,7 @@ void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
#endif // HAS_SPLITUVROW_SSE2
#ifdef HAS_SPLITUVROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
__asm {
push edi
......@@ -3266,7 +3262,7 @@ void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
#endif // HAS_SPLITUVROW_AVX2
#ifdef HAS_MERGEUVROW_SSE2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
__asm {
......@@ -3297,7 +3293,7 @@ void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
#endif // HAS_MERGEUVROW_SSE2
#ifdef HAS_MERGEUVROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
__asm {
......@@ -3331,7 +3327,7 @@ void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
#ifdef HAS_COPYROW_SSE2
// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
__asm {
mov eax, [esp + 4] // src
......@@ -3354,7 +3350,7 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
#ifdef HAS_COPYROW_AVX
// CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
__asm {
mov eax, [esp + 4] // src
......@@ -3378,7 +3374,7 @@ void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
#endif // HAS_COPYROW_AVX
// Multiple of 1.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
__asm {
mov eax, esi
......@@ -3395,7 +3391,7 @@ void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
#ifdef HAS_ARGBCOPYALPHAROW_SSE2
// width in pixels
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
......@@ -3431,7 +3427,7 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
// width in pixels
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
......@@ -3460,7 +3456,7 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
// width in pixels
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
......@@ -3498,7 +3494,7 @@ void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
// width in pixels
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
......@@ -3530,7 +3526,7 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
#ifdef HAS_SETROW_X86
// Write 'count' bytes using an 8 bit value repeated.
// Count should be multiple of 4.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void SetRow_X86(uint8* dst, uint8 v8, int count) {
__asm {
movzx eax, byte ptr [esp + 8] // v8
......@@ -3547,7 +3543,7 @@ void SetRow_X86(uint8* dst, uint8 v8, int count) {
}
// Write 'count' bytes using an 8 bit value repeated.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void SetRow_ERMS(uint8* dst, uint8 v8, int count) {
__asm {
mov edx, edi
......@@ -3561,7 +3557,7 @@ void SetRow_ERMS(uint8* dst, uint8 v8, int count) {
}
// Write 'count' 32 bit values.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) {
__asm {
mov edx, edi
......@@ -3576,7 +3572,7 @@ void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) {
#endif // HAS_SETROW_X86
#ifdef HAS_YUY2TOYROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void YUY2ToYRow_AVX2(const uint8* src_yuy2,
uint8* dst_y, int pix) {
__asm {
......@@ -3603,7 +3599,7 @@ void YUY2ToYRow_AVX2(const uint8* src_yuy2,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix) {
__asm {
......@@ -3647,7 +3643,7 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
uint8* dst_u, uint8* dst_v, int pix) {
__asm {
......@@ -3686,7 +3682,7 @@ void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void UYVYToYRow_AVX2(const uint8* src_uyvy,
uint8* dst_y, int pix) {
__asm {
......@@ -3711,7 +3707,7 @@ void UYVYToYRow_AVX2(const uint8* src_uyvy,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix) {
__asm {
......@@ -3755,7 +3751,7 @@ void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
uint8* dst_u, uint8* dst_v, int pix) {
__asm {
......@@ -3796,7 +3792,7 @@ void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
#endif // HAS_YUY2TOYROW_AVX2
#ifdef HAS_YUY2TOYROW_SSE2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void YUY2ToYRow_SSE2(const uint8* src_yuy2,
uint8* dst_y, int pix) {
__asm {
......@@ -3821,7 +3817,7 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix) {
__asm {
......@@ -3864,7 +3860,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
uint8* dst_u, uint8* dst_v, int pix) {
__asm {
......@@ -3900,7 +3896,7 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void UYVYToYRow_SSE2(const uint8* src_uyvy,
uint8* dst_y, int pix) {
__asm {
......@@ -3923,7 +3919,7 @@ void UYVYToYRow_SSE2(const uint8* src_uyvy,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix) {
__asm {
......@@ -3966,7 +3962,7 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
uint8* dst_u, uint8* dst_v, int pix) {
__asm {
......@@ -4005,7 +4001,7 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
#ifdef HAS_ARGBBLENDROW_SSE2
// Blend 8 pixels at a time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
......@@ -4139,7 +4135,7 @@ static const uvec8 kShuffleAlpha = {
// pshufb xmm3, kShuffleAlpha // alpha
// Blend 8 pixels at a time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
......@@ -4255,7 +4251,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBATTENUATEROW_SSE2
// Attenuate 4 pixels at a time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] // src_argb0
......@@ -4304,7 +4300,7 @@ static const uvec8 kShuffleAlpha1 = {
11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
};
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] // src_argb0
......@@ -4348,7 +4344,7 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
static const uvec8 kShuffleAlpha_AVX2 = {
6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u
};
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] // src_argb0
......@@ -4385,7 +4381,7 @@ void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
#ifdef HAS_ARGBUNATTENUATEROW_SSE2
// Unattenuate 4 pixels at a time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
int width) {
__asm {
......@@ -4439,7 +4435,7 @@ static const uvec8 kUnattenShuffleAlpha_AVX2 = {
// TODO(fbarchard): Enable USE_GATHER for future hardware if faster.
// USE_GATHER is not on by default, due to being a slow instruction.
#ifdef USE_GATHER
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
int width) {
__asm {
......@@ -4473,7 +4469,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
}
}
#else // USE_GATHER
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
int width) {
__asm {
......@@ -4540,7 +4536,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
#ifdef HAS_ARGBGRAYROW_SSSE3
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
......@@ -4599,7 +4595,7 @@ static const vec8 kARGBToSepiaR = {
};
// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] /* dst_argb */
......@@ -4656,7 +4652,7 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
// Same as Sepia except matrix is provided.
// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
const int8* matrix_argb, int width) {
__asm {
......@@ -4717,7 +4713,7 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
#ifdef HAS_ARGBQUANTIZEROW_SSE2
// Quantize 4 ARGB pixels (16 bytes).
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width) {
__asm {
......@@ -4762,7 +4758,7 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
#ifdef HAS_ARGBSHADEROW_SSE2
// Shade 4 pixels at a time by specified value.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value) {
__asm {
......@@ -4796,7 +4792,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
#ifdef HAS_ARGBMULTIPLYROW_SSE2
// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
......@@ -4835,7 +4831,7 @@ void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBADDROW_SSE2
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
// TODO(fbarchard): Port this to posix, neon and other math functions.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
......@@ -4883,7 +4879,7 @@ void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBSUBTRACTROW_SSE2
// Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
......@@ -4912,7 +4908,7 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBMULTIPLYROW_AVX2
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
......@@ -4949,7 +4945,7 @@ void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBADDROW_AVX2
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
......@@ -4978,7 +4974,7 @@ void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBSUBTRACTROW_AVX2
// Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
......@@ -5010,7 +5006,7 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
// -1 0 1
// -2 0 2
// -1 0 1
__declspec(naked) __declspec(align(16))
__declspec(naked)
void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width) {
__asm {
......@@ -5066,7 +5062,7 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
// -1 -2 -1
// 0 0 0
// 1 2 1
__declspec(naked) __declspec(align(16))
__declspec(naked)
void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) {
__asm {
......@@ -5119,7 +5115,7 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
// R = Sobel
// G = Sobel
// B = Sobel
__declspec(naked) __declspec(align(16))
__declspec(naked)
void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) {
__asm {
......@@ -5166,7 +5162,7 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
#ifdef HAS_SOBELTOPLANEROW_SSE2
// Adds Sobel X and Sobel Y and stores Sobel into a plane.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_y, int width) {
__asm {
......@@ -5199,7 +5195,7 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
// R = Sobel X
// G = Sobel
// B = Sobel Y
__declspec(naked) __declspec(align(16))
__declspec(naked)
void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) {
__asm {
......@@ -5486,7 +5482,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
#ifdef HAS_ARGBAFFINEROW_SSE2
// Copy ARGB pixels from source image with slope to a row of destination.
__declspec(naked) __declspec(align(16))
__declspec(naked)
LIBYUV_API
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width) {
......@@ -5571,7 +5567,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
#ifdef HAS_INTERPOLATEROW_AVX2
// Bilinear filter 32x2 -> 32x1
__declspec(naked) __declspec(align(16))
__declspec(naked)
void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
......@@ -5668,7 +5664,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
#endif // HAS_INTERPOLATEROW_AVX2
// Bilinear filter 16x2 -> 16x1
__declspec(naked) __declspec(align(16))
__declspec(naked)
void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
......@@ -5769,7 +5765,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
#ifdef HAS_INTERPOLATEROW_SSE2
// Bilinear filter 16x2 -> 16x1
__declspec(naked) __declspec(align(16))
__declspec(naked)
void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
......@@ -5876,7 +5872,7 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
#endif // HAS_INTERPOLATEROW_SSE2
// Specialized ARGB to Bayer that just isolates G channel.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) {
__asm {
......@@ -5906,7 +5902,7 @@ void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
}
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
const uint8* shuffler, int pix) {
__asm {
......@@ -5932,7 +5928,7 @@ void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
}
#ifdef HAS_ARGBSHUFFLEROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
const uint8* shuffler, int pix) {
__asm {
......@@ -5960,7 +5956,7 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
}
#endif // HAS_ARGBSHUFFLEROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
const uint8* shuffler, int pix) {
__asm {
......@@ -6082,7 +6078,7 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
// UYVY - Macro-pixel = 2 image pixels
// U0Y0V0Y1
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -6119,7 +6115,7 @@ void I422ToYUY2Row_SSE2(const uint8* src_y,
}
}
__declspec(naked) __declspec(align(16))
__declspec(naked)
void I422ToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -6157,7 +6153,7 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
}
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
uint8* dst_argb, const float* poly,
int width) {
......@@ -6216,7 +6212,7 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb,
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBPolynomialRow_AVX2(const uint8* src_argb,
uint8* dst_argb, const float* poly,
int width) {
......@@ -6256,7 +6252,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
#ifdef HAS_ARGBCOLORTABLEROW_X86
// Tranform ARGB pixels with color table.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
int width) {
__asm {
......@@ -6290,7 +6286,7 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
#ifdef HAS_RGBCOLORTABLEROW_X86
// Tranform RGB pixels with color table.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
__asm {
push esi
......@@ -6321,7 +6317,7 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
// Tranform RGB pixels with luma table.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
int width,
const uint8* luma, uint32 lumacoeff) {
......
......@@ -95,7 +95,7 @@ static uvec16 kScaleAb2 =
{ 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
// Reads 32 pixels, throws half away and writes 16 pixels.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
......@@ -121,7 +121,7 @@ void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
}
// Blends 32x1 rectangle to 16x1.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
......@@ -157,7 +157,7 @@ void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
}
// Blends 32x2 rectangle to 16x1.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
......@@ -200,7 +200,7 @@ void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
#ifdef HAS_SCALEROWDOWN2_AVX2
// Reads 64 pixels, throws half away and writes 32 pixels.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
......@@ -228,7 +228,7 @@ void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
}
// Blends 64x1 rectangle to 32x1.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
......@@ -265,7 +265,7 @@ void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
}
// Blends 64x2 rectangle to 32x1.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
......@@ -307,7 +307,7 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
#endif // HAS_SCALEROWDOWN2_AVX2
// Point samples 32 pixels to 8 pixels.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
......@@ -338,7 +338,7 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
}
// Blends 32x4 rectangle to 8x1.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
......@@ -401,7 +401,7 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
// Then shuffled to do the scaling.
// Note that movdqa+palign may be better than movdqu.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
......@@ -448,7 +448,7 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
// xmm7 kRound34
// Note that movdqa+palign may be better than movdqu.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
......@@ -505,7 +505,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
}
// Note that movdqa+palign may be better than movdqu.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
......@@ -567,7 +567,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
// 3/8 point sampler
// Scale 32 pixels to 12
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
......@@ -598,7 +598,7 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
}
// Scale 16x3 pixels to 6x1 with interpolation
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
......@@ -663,7 +663,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
}
// Scale 16x2 pixels to 6x1 with interpolation
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
......@@ -709,7 +709,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
// Reads 16xN bytes and produces 16 shorts at a time.
// TODO(fbarchard): Make this handle 4xN bytes for any width ARGB.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width,
int src_height) {
......@@ -775,7 +775,7 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
// when drmemory bug fixed.
// https://code.google.com/p/drmemory/issues/detail?id=1396
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
__asm {
......@@ -852,7 +852,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
}
// Reads 16 pixels, duplicates them and writes 32 pixels.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
__asm {
......@@ -877,7 +877,7 @@ void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
}
// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6)
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
......@@ -902,7 +902,7 @@ void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
}
// Blends 8x1 rectangle to 4x1.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
......@@ -930,7 +930,7 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
}
// Blends 8x2 rectangle to 4x1.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
......@@ -964,7 +964,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
}
// Reads 4 pixels at a time.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width) {
......@@ -1000,7 +1000,7 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
}
// Blends four 2x2 to 4x1.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
......@@ -1048,7 +1048,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
}
// Column scaling unfiltered. SSE2 version.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
__asm {
......@@ -1139,7 +1139,7 @@ static uvec8 kShuffleFractions = {
0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
};
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
__asm {
......@@ -1210,7 +1210,7 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
}
// Reads 4 pixels, duplicates them and writes 8 pixels.
__declspec(naked) __declspec(align(16))
__declspec(naked)
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
__asm {
......@@ -1235,7 +1235,7 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
}
// Divide num by div and return as 16.16 fixed point result.
__declspec(naked) __declspec(align(16))
__declspec(naked)
int FixedDiv_X86(int num, int div) {
__asm {
mov eax, [esp + 4] // num
......@@ -1248,7 +1248,7 @@ int FixedDiv_X86(int num, int div) {
}
// Divide num by div and return as 16.16 fixed point result.
__declspec(naked) __declspec(align(16))
__declspec(naked)
int FixedDiv1_X86(int num, int div) {
__asm {
mov eax, [esp + 4] // num
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment