Commit a2142148 authored by Frank Barchard's avatar Frank Barchard Committed by Frank Barchard

Remove x64 native_client macros.

Bug: libyuv:702
Test: try bots pass
Change-Id: I76d74b5f02fe9843418108b84742e2f714d1ab0a
Reviewed-on: https://chromium-review.googlesource.com/855656Reviewed-by: 's avatarFrank Barchard <fbarchard@chromium.org>
parent 00d526d4
...@@ -19,16 +19,19 @@ extern "C" { ...@@ -19,16 +19,19 @@ extern "C" {
#endif #endif
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature) #if defined(__has_feature)
#if __has_feature(memory_sanitizer) #if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
#endif #endif
// Visual C 2012 required for AVX2. // Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \ #if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
_MSC_VER >= 1700 _MSC_VER >= 1700
......
...@@ -22,6 +22,24 @@ namespace libyuv { ...@@ -22,6 +22,24 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// TODO(fbarchard): Move cpu macros to row.h
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_ARGBAFFINEROW_SSE2
#endif
// Copy a plane of data. // Copy a plane of data.
LIBYUV_API LIBYUV_API
void CopyPlane(const uint8* src_y, void CopyPlane(const uint8* src_y,
...@@ -763,22 +781,6 @@ int I420Interpolate(const uint8* src0_y, ...@@ -763,22 +781,6 @@ int I420Interpolate(const uint8* src0_y,
int height, int height,
int interpolation); int interpolation);
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_ARGBAFFINEROW_SSE2
#endif
// Row function for copying pixels from a source with a slope to a row // Row function for copying pixels from a source with a slope to a row
// of destination. Useful for scaling, rotation, mirror, texture mapping. // of destination. Useful for scaling, rotation, mirror, texture mapping.
LIBYUV_API LIBYUV_API
...@@ -787,6 +789,7 @@ void ARGBAffineRow_C(const uint8* src_argb, ...@@ -787,6 +789,7 @@ void ARGBAffineRow_C(const uint8* src_argb,
uint8* dst_argb, uint8* dst_argb,
const float* uv_dudv, const float* uv_dudv,
int width); int width);
// TODO(fbarchard): Move ARGBAffineRow_SSE2 to row.h
LIBYUV_API LIBYUV_API
void ARGBAffineRow_SSE2(const uint8* src_argb, void ARGBAffineRow_SSE2(const uint8* src_argb,
int src_argb_stride, int src_argb_stride,
......
...@@ -19,9 +19,13 @@ extern "C" { ...@@ -19,9 +19,13 @@ extern "C" {
#endif #endif
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature) #if defined(__has_feature)
#if __has_feature(memory_sanitizer) #if __has_feature(memory_sanitizer)
...@@ -34,21 +38,18 @@ extern "C" { ...@@ -34,21 +38,18 @@ extern "C" {
#define HAS_TRANSPOSEUVWX8_SSE2 #define HAS_TRANSPOSEUVWX8_SSE2
#endif #endif
// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit: // The following are available for GCC 32 or 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && \ #if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
(defined(__i386__) || \
(defined(__x86_64__) && !defined(__native_client__)))
#define HAS_TRANSPOSEWX8_SSSE3 #define HAS_TRANSPOSEWX8_SSSE3
#endif #endif
// The following are available for 64 bit GCC but not NaCL: // The following are available for 64 bit GCC:
#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \ #if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__)
defined(__x86_64__)
#define HAS_TRANSPOSEWX8_FAST_SSSE3 #define HAS_TRANSPOSEWX8_FAST_SSSE3
#define HAS_TRANSPOSEUVWX8_SSE2 #define HAS_TRANSPOSEUVWX8_SSE2
#endif #endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ #if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_TRANSPOSEWX8_NEON #define HAS_TRANSPOSEWX8_NEON
#define HAS_TRANSPOSEUVWX8_NEON #define HAS_TRANSPOSEUVWX8_NEON
......
...@@ -20,28 +20,19 @@ namespace libyuv { ...@@ -20,28 +20,19 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
#define align_buffer_64(var, size) \
uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \
uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
#define free_aligned_buffer_64(var) \
free(var##_mem); \
var = 0
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature) #if defined(__has_feature)
#if __has_feature(memory_sanitizer) #if __has_feature(memory_sanitizer)
// define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif
#endif #endif
#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif #endif
// clang >= 3.5.0 required for Arm64. // clang >= 3.5.0 required for Arm64.
#if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON) #if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON)
...@@ -561,6 +552,16 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants); // BT.601 ...@@ -561,6 +552,16 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants); // BT.601
extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants); // JPeg extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants); // JPeg
extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
#define align_buffer_64(var, size) \
uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \
uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
#define free_aligned_buffer_64(var) \
free(var##_mem); \
var = 0
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
#define OMITFP #define OMITFP
#else #else
...@@ -573,48 +574,14 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 ...@@ -573,48 +574,14 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
#else #else
#define LABELALIGN #define LABELALIGN
#endif #endif
#if defined(__native_client__) && defined(__x86_64__)
// r14 is used for MEMOP macros. // NaCL macros for GCC x64 - deprecated.
#define BUNDLELOCK ".bundle_lock\n"
#define BUNDLEUNLOCK ".bundle_unlock\n"
#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
#define MEMLEA(offset, base) #offset "(%q" #base ")"
#define MEMLEA3(offset, index, scale) #offset "(,%q" #index "," #scale ")"
#define MEMLEA4(offset, base, index, scale) \
#offset "(%q" #base ",%q" #index "," #scale ")"
#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
#define MEMOPREG(opcode, offset, base, index, scale, reg) \
BUNDLELOCK \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \
" (%%r15,%%r14),%%" #reg "\n" BUNDLEUNLOCK
#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
BUNDLELOCK \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \
" %%" #reg ",(%%r15,%%r14)\n" BUNDLEUNLOCK
#define MEMOPARG(opcode, offset, base, index, scale, arg) \
BUNDLELOCK \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \
" (%%r15,%%r14),%" #arg "\n" BUNDLEUNLOCK
#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
BUNDLELOCK \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \
" (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" BUNDLEUNLOCK
#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
BUNDLELOCK \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #op \
" $" #sel ",%%" #reg ",(%%r15,%%r14)\n" BUNDLEUNLOCK
#else // defined(__native_client__) && defined(__x86_64__)
#define BUNDLEALIGN
#define MEMACCESS(base) "(%" #base ")" #define MEMACCESS(base) "(%" #base ")"
#define MEMACCESS2(offset, base) #offset "(%" #base ")" #define MEMACCESS2(offset, base) #offset "(%" #base ")"
#define MEMLEA(offset, base) #offset "(%" #base ")" #define MEMLEA(offset, base) #offset "(%" #base ")"
#define MEMLEA3(offset, index, scale) #offset "(,%" #index "," #scale ")" #define MEMLEA3(offset, index, scale) #offset "(,%" #index "," #scale ")"
#define MEMLEA4(offset, base, index, scale) \ #define MEMLEA4(offset, base, index, scale) \
#offset "(%" #base ",%" #index "," #scale ")" #offset "(%" #base ",%" #index "," #scale ")"
#define MEMMOVESTRING(s, d)
#define MEMSTORESTRING(reg, d)
#define MEMOPREG(opcode, offset, base, index, scale, reg) \ #define MEMOPREG(opcode, offset, base, index, scale, reg) \
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n" #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
...@@ -626,7 +593,6 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 ...@@ -626,7 +593,6 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
",%%" #reg2 "\n" ",%%" #reg2 "\n"
#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \ #define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
#op " $" #sel ",%%" #reg "," #offset "(%" #base ",%" #index "," #scale ")\n" #op " $" #sel ",%%" #reg "," #offset "(%" #base ",%" #index "," #scale ")\n"
#endif // defined(__native_client__) && defined(__x86_64__)
// Intel Code Analizer markers. Insert IACA_START IACA_END around code to be // Intel Code Analizer markers. Insert IACA_START IACA_END around code to be
// measured and then run with iaca -64 libyuv_unittest. // measured and then run with iaca -64 libyuv_unittest.
......
...@@ -20,16 +20,19 @@ extern "C" { ...@@ -20,16 +20,19 @@ extern "C" {
#endif #endif
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature) #if defined(__has_feature)
#if __has_feature(memory_sanitizer) #if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
#endif #endif
// GCC >= 4.7.0 required for AVX2. // GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
...@@ -81,7 +84,7 @@ extern "C" { ...@@ -81,7 +84,7 @@ extern "C" {
#endif #endif
// The following are available on Neon platforms: // The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ #if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEARGBCOLS_NEON #define HAS_SCALEARGBCOLS_NEON
#define HAS_SCALEARGBROWDOWN2_NEON #define HAS_SCALEARGBROWDOWN2_NEON
......
...@@ -23,9 +23,7 @@ extern "C" { ...@@ -23,9 +23,7 @@ extern "C" {
// ARGBScale has a function to copy pixels to a row, striding each source // ARGBScale has a function to copy pixels to a row, striding each source
// pixel by a constant. // pixel by a constant.
#if !defined(LIBYUV_DISABLE_X86) && \ #if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
(defined(__x86_64__) && !defined(__native_client__)) || \
defined(__i386__))
#define HAS_SCALEARGBROWDOWNEVEN_SSE2 #define HAS_SCALEARGBROWDOWNEVEN_SSE2
void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr,
int src_stride, int src_stride,
...@@ -33,7 +31,7 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, ...@@ -33,7 +31,7 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr,
uint8* dst_ptr, uint8* dst_ptr,
int dst_width); int dst_width);
#endif #endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ #if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEARGBROWDOWNEVEN_NEON #define HAS_SCALEARGBROWDOWNEVEN_NEON
void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, void ScaleARGBRowDownEven_NEON(const uint8* src_ptr,
......
...@@ -1997,7 +1997,7 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1997,7 +1997,7 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
[width]"+rm"(width) // %[width] [width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleNV21]"m"(kShuffleNV21) [kShuffleNV21]"m"(kShuffleNV21)
: "memory", "cc", YUVTORGB_REGS // Does not use r14. : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
); );
// clang-format on // clang-format on
...@@ -2025,7 +2025,7 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, ...@@ -2025,7 +2025,7 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2Y]"m"(kShuffleYUY2Y),
[kShuffleYUY2UV]"m"(kShuffleYUY2UV) [kShuffleYUY2UV]"m"(kShuffleYUY2UV)
: "memory", "cc", YUVTORGB_REGS // Does not use r14. : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
); );
// clang-format on // clang-format on
...@@ -2053,7 +2053,7 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, ...@@ -2053,7 +2053,7 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf,
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYY]"m"(kShuffleUYVYY),
[kShuffleUYVYUV]"m"(kShuffleUYVYUV) [kShuffleUYVYUV]"m"(kShuffleUYVYUV)
: "memory", "cc", YUVTORGB_REGS // Does not use r14. : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
); );
// clang-format on // clang-format on
...@@ -2471,7 +2471,7 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2471,7 +2471,7 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf,
[dst_argb]"+r"(dst_argb), // %[dst_argb] [dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width] [width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
); );
// clang-format on // clang-format on
...@@ -2505,7 +2505,7 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2505,7 +2505,7 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf,
[width]"+rm"(width) // %[width] [width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleNV21]"m"(kShuffleNV21) [kShuffleNV21]"m"(kShuffleNV21)
: "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
); );
// clang-format on // clang-format on
...@@ -2538,7 +2538,7 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, ...@@ -2538,7 +2538,7 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf,
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2Y]"m"(kShuffleYUY2Y),
[kShuffleYUY2UV]"m"(kShuffleYUY2UV) [kShuffleYUY2UV]"m"(kShuffleYUY2UV)
: "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
); );
// clang-format on // clang-format on
...@@ -2571,7 +2571,7 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, ...@@ -2571,7 +2571,7 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf,
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYY]"m"(kShuffleUYVYY),
[kShuffleUYVYUV]"m"(kShuffleUYVYUV) [kShuffleUYVYUV]"m"(kShuffleUYVYUV)
: "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
); );
// clang-format on // clang-format on
...@@ -3449,7 +3449,7 @@ void CopyRow_AVX(const uint8* src, uint8* dst, int count) { ...@@ -3449,7 +3449,7 @@ void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
// Multiple of 1. // Multiple of 1.
void CopyRow_ERMS(const uint8* src, uint8* dst, int width) { void CopyRow_ERMS(const uint8* src, uint8* dst, int width) {
size_t width_tmp = (size_t)(width); size_t width_tmp = (size_t)(width);
asm volatile("rep movsb " MEMMOVESTRING(0, 1) " \n" asm volatile("rep movsb \n"
: "+S"(src), // %0 : "+S"(src), // %0
"+D"(dst), // %1 "+D"(dst), // %1
"+c"(width_tmp) // %2 "+c"(width_tmp) // %2
...@@ -3668,7 +3668,7 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { ...@@ -3668,7 +3668,7 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
void SetRow_X86(uint8* dst, uint8 v8, int width) { void SetRow_X86(uint8* dst, uint8 v8, int width) {
size_t width_tmp = (size_t)(width >> 2); size_t width_tmp = (size_t)(width >> 2);
const uint32 v32 = v8 * 0x01010101u; // Duplicate byte to all bytes. const uint32 v32 = v8 * 0x01010101u; // Duplicate byte to all bytes.
asm volatile("rep stosl " MEMSTORESTRING(eax, 0) " \n" asm volatile("rep stosl \n"
: "+D"(dst), // %0 : "+D"(dst), // %0
"+c"(width_tmp) // %1 "+c"(width_tmp) // %1
: "a"(v32) // %2 : "a"(v32) // %2
...@@ -3677,7 +3677,7 @@ void SetRow_X86(uint8* dst, uint8 v8, int width) { ...@@ -3677,7 +3677,7 @@ void SetRow_X86(uint8* dst, uint8 v8, int width) {
void SetRow_ERMS(uint8* dst, uint8 v8, int width) { void SetRow_ERMS(uint8* dst, uint8 v8, int width) {
size_t width_tmp = (size_t)(width); size_t width_tmp = (size_t)(width);
asm volatile("rep stosb " MEMSTORESTRING(al, 0) " \n" asm volatile("rep stosb \n"
: "+D"(dst), // %0 : "+D"(dst), // %0
"+c"(width_tmp) // %1 "+c"(width_tmp) // %1
: "a"(v8) // %2 : "a"(v8) // %2
...@@ -3686,7 +3686,7 @@ void SetRow_ERMS(uint8* dst, uint8 v8, int width) { ...@@ -3686,7 +3686,7 @@ void SetRow_ERMS(uint8* dst, uint8 v8, int width) {
void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int width) { void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int width) {
size_t width_tmp = (size_t)(width); size_t width_tmp = (size_t)(width);
asm volatile("rep stosl " MEMSTORESTRING(eax, 0) " \n" asm volatile("rep stosl \n"
: "+D"(dst_argb), // %0 : "+D"(dst_argb), // %0
"+c"(width_tmp) // %1 "+c"(width_tmp) // %1
: "a"(v32) // %2 : "a"(v32) // %2
...@@ -5707,7 +5707,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, ...@@ -5707,7 +5707,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr,
// Blend 100 / 0 - Copy row unchanged. // Blend 100 / 0 - Copy row unchanged.
LABELALIGN LABELALIGN
"100: \n" "100: \n"
"rep movsb " MEMMOVESTRING(1,0) " \n" "rep movsb \n"
"jmp 999f \n" "jmp 999f \n"
"99: \n" "99: \n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment