Commit 76e7f104 authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

documentation updates

BUG=None
TEST=Untested

Change-Id: I8ab95654255d1aa9cf05a664ecf59ee6c0757e66
Reviewed-on: https://chromium-review.googlesource.com/434941Reviewed-by: 's avatarHenrik Kjellander <kjellander@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@google.com>
parent 0fb56759
...@@ -44,7 +44,7 @@ For Android add `;target_os=['android'];` to your Linux .gclient ...@@ -44,7 +44,7 @@ For Android add `;target_os=['android'];` to your Linux .gclient
"safesync_url": "", "safesync_url": "",
}, },
]; ];
target_os = ["android", "unix"]; target_os = ["android", "linux"];
Then run: Then run:
...@@ -208,7 +208,7 @@ Running test with C code: ...@@ -208,7 +208,7 @@ Running test with C code:
make V=1 -f linux.mk clean make V=1 -f linux.mk clean
make V=1 -f linux.mk CXX=clang++ make V=1 -f linux.mk CXX=clang++
## Building the Library with cmake ## Building the library with cmake
Install cmake: http://www.cmake.org/ Install cmake: http://www.cmake.org/
...@@ -227,7 +227,7 @@ Install cmake: http://www.cmake.org/ ...@@ -227,7 +227,7 @@ Install cmake: http://www.cmake.org/
cmake --build . --config Release cmake --build . --config Release
sudo cmake --build . --target install --config Release sudo cmake --build . --target install --config Release
### Release package ### Build RPM/DEB packages
mkdir out mkdir out
cd out cd out
...@@ -237,8 +237,7 @@ Install cmake: http://www.cmake.org/ ...@@ -237,8 +237,7 @@ Install cmake: http://www.cmake.org/
## Setup for Arm Cross compile ## Setup for Arm Cross compile
See also See also https://www.ccoderun.ca/programming/2015-12-20_CrossCompiling/index.html
https://www.ccoderun.ca/programming/2015-12-20_CrossCompiling/index.html#setup
sudo apt-get install ssh dkms build-essential linux-headers-generic sudo apt-get install ssh dkms build-essential linux-headers-generic
sudo apt-get install kdevelop cmake git subversion sudo apt-get install kdevelop cmake git subversion
......
...@@ -640,6 +640,7 @@ void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int width) { ...@@ -640,6 +640,7 @@ void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int width) {
"pslld $0xa,%%xmm6 \n" "pslld $0xa,%%xmm6 \n"
"pcmpeqb %%xmm7,%%xmm7 \n" "pcmpeqb %%xmm7,%%xmm7 \n"
"pslld $0xf,%%xmm7 \n" "pslld $0xf,%%xmm7 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -677,6 +678,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int width) { ...@@ -677,6 +678,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int width) {
"psllw $0xc,%%xmm4 \n" "psllw $0xc,%%xmm4 \n"
"movdqa %%xmm4,%%xmm3 \n" "movdqa %%xmm4,%%xmm3 \n"
"psrlw $0x8,%%xmm3 \n" "psrlw $0x8,%%xmm3 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -706,6 +708,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { ...@@ -706,6 +708,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile ( asm volatile (
"movdqa %3,%%xmm4 \n" "movdqa %3,%%xmm4 \n"
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -744,6 +747,7 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { ...@@ -744,6 +747,7 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile ( asm volatile (
"movdqa %3,%%xmm4 \n" "movdqa %3,%%xmm4 \n"
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -786,6 +790,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { ...@@ -786,6 +790,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) {
"vbroadcastf128 %3,%%ymm4 \n" "vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n" "vbroadcastf128 %4,%%ymm5 \n"
"vmovdqu %5,%%ymm6 \n" "vmovdqu %5,%%ymm6 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n" "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
...@@ -827,6 +832,7 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { ...@@ -827,6 +832,7 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) {
"vbroadcastf128 %3,%%ymm4 \n" "vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n" "vbroadcastf128 %4,%%ymm5 \n"
"vmovdqu %5,%%ymm6 \n" "vmovdqu %5,%%ymm6 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n" "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
...@@ -873,6 +879,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, ...@@ -873,6 +879,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0,
"movdqa %6,%%xmm4 \n" "movdqa %6,%%xmm4 \n"
"movdqa %7,%%xmm5 \n" "movdqa %7,%%xmm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -943,6 +950,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, ...@@ -943,6 +950,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0,
"vbroadcastf128 %6,%%ymm6 \n" "vbroadcastf128 %6,%%ymm6 \n"
"vbroadcastf128 %7,%%ymm7 \n" "vbroadcastf128 %7,%%ymm7 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n" "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
...@@ -1006,6 +1014,7 @@ void ARGBToUVJRow_AVX2(const uint8* src_argb0, ...@@ -1006,6 +1014,7 @@ void ARGBToUVJRow_AVX2(const uint8* src_argb0,
"vbroadcastf128 %6,%%ymm6 \n" "vbroadcastf128 %6,%%ymm6 \n"
"vbroadcastf128 %7,%%ymm7 \n" "vbroadcastf128 %7,%%ymm7 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n" "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
...@@ -1070,6 +1079,7 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, ...@@ -1070,6 +1079,7 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0,
"movdqa %6,%%xmm4 \n" "movdqa %6,%%xmm4 \n"
"movdqa %7,%%xmm5 \n" "movdqa %7,%%xmm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -1136,6 +1146,7 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb, ...@@ -1136,6 +1146,7 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb,
"movdqa %5,%%xmm4 \n" "movdqa %5,%%xmm4 \n"
"movdqa %6,%%xmm5 \n" "movdqa %6,%%xmm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -1189,6 +1200,7 @@ void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width) { ...@@ -1189,6 +1200,7 @@ void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width) {
asm volatile ( asm volatile (
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
"movdqa %3,%%xmm4 \n" "movdqa %3,%%xmm4 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -1229,6 +1241,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, ...@@ -1229,6 +1241,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0,
"movdqa %6,%%xmm4 \n" "movdqa %6,%%xmm4 \n"
"movdqa %7,%%xmm5 \n" "movdqa %7,%%xmm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -1287,6 +1300,7 @@ void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width) { ...@@ -1287,6 +1300,7 @@ void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width) {
asm volatile ( asm volatile (
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
"movdqa %3,%%xmm4 \n" "movdqa %3,%%xmm4 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -1321,6 +1335,7 @@ void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width) { ...@@ -1321,6 +1335,7 @@ void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width) {
asm volatile ( asm volatile (
"movdqa %4,%%xmm5 \n" "movdqa %4,%%xmm5 \n"
"movdqa %3,%%xmm4 \n" "movdqa %3,%%xmm4 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -1361,6 +1376,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, ...@@ -1361,6 +1376,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0,
"movdqa %6,%%xmm4 \n" "movdqa %6,%%xmm4 \n"
"movdqa %7,%%xmm5 \n" "movdqa %7,%%xmm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -1425,6 +1441,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, ...@@ -1425,6 +1441,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
"movdqa %6,%%xmm4 \n" "movdqa %6,%%xmm4 \n"
"movdqa %7,%%xmm5 \n" "movdqa %7,%%xmm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -1647,6 +1664,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1647,6 +1664,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV444 READYUV444
...@@ -1676,6 +1694,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, ...@@ -1676,6 +1694,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
"movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV422 READYUV422
...@@ -1720,6 +1739,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1720,6 +1739,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV422 READYUV422
...@@ -1750,6 +1770,7 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1750,6 +1770,7 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUVA422 READYUVA422
...@@ -1784,6 +1805,7 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1784,6 +1805,7 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READNV12 READNV12
...@@ -1811,6 +1833,7 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1811,6 +1833,7 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READNV21 READNV21
...@@ -1838,6 +1861,7 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, ...@@ -1838,6 +1861,7 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUY2 READYUY2
...@@ -1865,6 +1889,7 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, ...@@ -1865,6 +1889,7 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READUYVY READUYVY
...@@ -1894,6 +1919,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, ...@@ -1894,6 +1919,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV422 READYUV422
...@@ -2003,6 +2029,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, ...@@ -2003,6 +2029,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
"vmovdqa " MEMACCESS2(128, [yuvconstants]) ",%%ymm12 \n" \ "vmovdqa " MEMACCESS2(128, [yuvconstants]) ",%%ymm12 \n" \
"vmovdqa " MEMACCESS2(160, [yuvconstants]) ",%%ymm13 \n" \ "vmovdqa " MEMACCESS2(160, [yuvconstants]) ",%%ymm13 \n" \
"vmovdqa " MEMACCESS2(192, [yuvconstants]) ",%%ymm14 \n" "vmovdqa " MEMACCESS2(192, [yuvconstants]) ",%%ymm14 \n"
#define YUVTORGB_AVX2(yuvconstants) \ #define YUVTORGB_AVX2(yuvconstants) \
"vpmaddubsw %%ymm10,%%ymm0,%%ymm2 \n" \ "vpmaddubsw %%ymm10,%%ymm0,%%ymm2 \n" \
"vpmaddubsw %%ymm9,%%ymm0,%%ymm1 \n" \ "vpmaddubsw %%ymm9,%%ymm0,%%ymm1 \n" \
...@@ -2020,9 +2047,12 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, ...@@ -2020,9 +2047,12 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
"vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \
"vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n"
#define YUVTORGB_REGS_AVX2 \ #define YUVTORGB_REGS_AVX2 \
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
#else // Convert 16 pixels: 16 UV and 16 Y. #else // Convert 16 pixels: 16 UV and 16 Y.
#define YUVTORGB_SETUP_AVX2(yuvconstants) #define YUVTORGB_SETUP_AVX2(yuvconstants)
#define YUVTORGB_AVX2(yuvconstants) \ #define YUVTORGB_AVX2(yuvconstants) \
"vpmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%ymm0,%%ymm2 \n" \ "vpmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%ymm0,%%ymm2 \n" \
...@@ -2072,6 +2102,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2072,6 +2102,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV444_AVX2 READYUV444_AVX2
...@@ -2105,6 +2136,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2105,6 +2136,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV422_AVX2 READYUV422_AVX2
...@@ -2140,6 +2172,7 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, ...@@ -2140,6 +2172,7 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUVA422_AVX2 READYUVA422_AVX2
...@@ -2179,6 +2212,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, ...@@ -2179,6 +2212,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV422_AVX2 READYUV422_AVX2
...@@ -2221,6 +2255,7 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2221,6 +2255,7 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READNV12_AVX2 READNV12_AVX2
...@@ -2253,6 +2288,7 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2253,6 +2288,7 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READNV21_AVX2 READNV21_AVX2
...@@ -2285,6 +2321,7 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, ...@@ -2285,6 +2321,7 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUY2_AVX2 READYUY2_AVX2
...@@ -2317,6 +2354,7 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, ...@@ -2317,6 +2354,7 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READUYVY_AVX2 READUYVY_AVX2
...@@ -2349,6 +2387,7 @@ void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) { ...@@ -2349,6 +2387,7 @@ void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) {
"pshufd $0x0,%%xmm3,%%xmm3 \n" "pshufd $0x0,%%xmm3,%%xmm3 \n"
"pcmpeqb %%xmm4,%%xmm4 \n" "pcmpeqb %%xmm4,%%xmm4 \n"
"pslld $0x18,%%xmm4 \n" "pslld $0x18,%%xmm4 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
// Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
...@@ -2439,6 +2478,7 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { ...@@ -2439,6 +2478,7 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = (intptr_t)(width); intptr_t temp_width = (intptr_t)(width);
asm volatile ( asm volatile (
"movdqa %3,%%xmm5 \n" "movdqa %3,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
MEMOPREG(movdqu,-0x10,0,2,1,xmm0) // movdqu -0x10(%0,%2),%%xmm0 MEMOPREG(movdqu,-0x10,0,2,1,xmm0) // movdqu -0x10(%0,%2),%%xmm0
...@@ -2462,6 +2502,7 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { ...@@ -2462,6 +2502,7 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = (intptr_t)(width); intptr_t temp_width = (intptr_t)(width);
asm volatile ( asm volatile (
"vbroadcastf128 %3,%%ymm5 \n" "vbroadcastf128 %3,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
MEMOPREG(vmovdqu,-0x20,0,2,1,ymm0) // vmovdqu -0x20(%0,%2),%%ymm0 MEMOPREG(vmovdqu,-0x20,0,2,1,ymm0) // vmovdqu -0x20(%0,%2),%%ymm0
...@@ -2495,6 +2536,7 @@ void MirrorUVRow_SSSE3(const uint8* src, ...@@ -2495,6 +2536,7 @@ void MirrorUVRow_SSSE3(const uint8* src,
"movdqa %4,%%xmm1 \n" "movdqa %4,%%xmm1 \n"
"lea " MEMLEA4(-0x10,0,3,2) ",%0 \n" "lea " MEMLEA4(-0x10,0,3,2) ",%0 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -2522,6 +2564,7 @@ void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) { ...@@ -2522,6 +2564,7 @@ void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = (intptr_t)(width); intptr_t temp_width = (intptr_t)(width);
asm volatile ( asm volatile (
"lea " MEMLEA4(-0x10,0,2,4) ",%0 \n" "lea " MEMLEA4(-0x10,0,2,4) ",%0 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -2548,6 +2591,7 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { ...@@ -2548,6 +2591,7 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = (intptr_t)(width); intptr_t temp_width = (intptr_t)(width);
asm volatile ( asm volatile (
"vmovdqu %3,%%ymm5 \n" "vmovdqu %3,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
VMEMOPREG(vpermd,-0x20,0,2,4,ymm5,ymm0) // vpermd -0x20(%0,%2,4),ymm5,ymm0 VMEMOPREG(vpermd,-0x20,0,2,4,ymm5,ymm0) // vpermd -0x20(%0,%2,4),ymm5,ymm0
...@@ -2575,6 +2619,7 @@ void SplitUVRow_AVX2(const uint8* src_uv, ...@@ -2575,6 +2619,7 @@ void SplitUVRow_AVX2(const uint8* src_uv,
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
"vpsrlw $0x8,%%ymm5,%%ymm5 \n" "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n" "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
...@@ -2614,6 +2659,7 @@ void SplitUVRow_SSE2(const uint8* src_uv, ...@@ -2614,6 +2659,7 @@ void SplitUVRow_SSE2(const uint8* src_uv,
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -2650,6 +2696,7 @@ void MergeUVRow_AVX2(const uint8* src_u, ...@@ -2650,6 +2696,7 @@ void MergeUVRow_AVX2(const uint8* src_u,
int width) { int width) {
asm volatile ( asm volatile (
"sub %0,%1 \n" "sub %0,%1 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n" "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
...@@ -2683,6 +2730,7 @@ void MergeUVRow_SSE2(const uint8* src_u, ...@@ -2683,6 +2730,7 @@ void MergeUVRow_SSE2(const uint8* src_u,
int width) { int width) {
asm volatile ( asm volatile (
"sub %0,%1 \n" "sub %0,%1 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -2714,6 +2762,7 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { ...@@ -2714,6 +2762,7 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
"jne 2f \n" "jne 2f \n"
"test $0xf,%1 \n" "test $0xf,%1 \n"
"jne 2f \n" "jne 2f \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqa " MEMACCESS(0) ",%%xmm0 \n" "movdqa " MEMACCESS(0) ",%%xmm0 \n"
...@@ -2725,6 +2774,7 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { ...@@ -2725,6 +2774,7 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
"sub $0x20,%2 \n" "sub $0x20,%2 \n"
"jg 1b \n" "jg 1b \n"
"jmp 9f \n" "jmp 9f \n"
LABELALIGN LABELALIGN
"2: \n" "2: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -2790,6 +2840,7 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { ...@@ -2790,6 +2840,7 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
"pslld $0x18,%%xmm0 \n" "pslld $0x18,%%xmm0 \n"
"pcmpeqb %%xmm1,%%xmm1 \n" "pcmpeqb %%xmm1,%%xmm1 \n"
"psrld $0x8,%%xmm1 \n" "psrld $0x8,%%xmm1 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm2 \n" "movdqu " MEMACCESS(0) ",%%xmm2 \n"
...@@ -2824,6 +2875,7 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { ...@@ -2824,6 +2875,7 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
asm volatile ( asm volatile (
"vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
"vpsrld $0x8,%%ymm0,%%ymm0 \n" "vpsrld $0x8,%%ymm0,%%ymm0 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm1 \n" "vmovdqu " MEMACCESS(0) ",%%ymm1 \n"
...@@ -2883,6 +2935,7 @@ void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) { ...@@ -2883,6 +2935,7 @@ void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) {
asm volatile ( asm volatile (
"vmovdqa %3,%%ymm4 \n" "vmovdqa %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n" "vbroadcastf128 %4,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ", %%ymm0 \n" "vmovdqu " MEMACCESS(0) ", %%ymm0 \n"
...@@ -2922,6 +2975,7 @@ void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { ...@@ -2922,6 +2975,7 @@ void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
"pslld $0x18,%%xmm0 \n" "pslld $0x18,%%xmm0 \n"
"pcmpeqb %%xmm1,%%xmm1 \n" "pcmpeqb %%xmm1,%%xmm1 \n"
"psrld $0x8,%%xmm1 \n" "psrld $0x8,%%xmm1 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movq " MEMACCESS(0) ",%%xmm2 \n" "movq " MEMACCESS(0) ",%%xmm2 \n"
...@@ -2958,6 +3012,7 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { ...@@ -2958,6 +3012,7 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
asm volatile ( asm volatile (
"vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
"vpsrld $0x8,%%ymm0,%%ymm0 \n" "vpsrld $0x8,%%ymm0,%%ymm0 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vpmovzxbd " MEMACCESS(0) ",%%ymm1 \n" "vpmovzxbd " MEMACCESS(0) ",%%ymm1 \n"
...@@ -3018,6 +3073,7 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width) { ...@@ -3018,6 +3073,7 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width) {
asm volatile ( asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -3048,6 +3104,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, ...@@ -3048,6 +3104,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2,
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -3088,6 +3145,7 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, ...@@ -3088,6 +3145,7 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -3148,6 +3206,7 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, ...@@ -3148,6 +3206,7 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy,
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -3188,6 +3247,7 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy, ...@@ -3188,6 +3247,7 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
...@@ -3222,6 +3282,7 @@ void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width) { ...@@ -3222,6 +3282,7 @@ void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width) {
asm volatile ( asm volatile (
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
"vpsrlw $0x8,%%ymm5,%%ymm5 \n" "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n" "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
...@@ -3254,6 +3315,7 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2, ...@@ -3254,6 +3315,7 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2,
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
"vpsrlw $0x8,%%ymm5,%%ymm5 \n" "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n" "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
...@@ -3295,6 +3357,7 @@ void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, ...@@ -3295,6 +3357,7 @@ void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
"vpsrlw $0x8,%%ymm5,%%ymm5 \n" "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n" "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
...@@ -3401,6 +3464,7 @@ void UYVYToUV422Row_AVX2(const uint8* src_uyvy, ...@@ -3401,6 +3464,7 @@ void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
"vpsrlw $0x8,%%ymm5,%%ymm5 \n" "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n" "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
...@@ -4583,7 +4647,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, ...@@ -4583,7 +4647,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row,
"test $0xf,%1 \n" "test $0xf,%1 \n"
"jne 49f \n" "jne 49f \n"
// 4 pixel loop \n" // 4 pixel loop.
LABELALIGN LABELALIGN
"40: \n" "40: \n"
"movdqu " MEMACCESS(0) ",%%xmm2 \n" "movdqu " MEMACCESS(0) ",%%xmm2 \n"
...@@ -4622,7 +4686,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, ...@@ -4622,7 +4686,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row,
"add $0x3,%3 \n" "add $0x3,%3 \n"
"jl 19f \n" "jl 19f \n"
// 1 pixel loop \n" // 1 pixel loop.
LABELALIGN LABELALIGN
"10: \n" "10: \n"
"movd " MEMACCESS(0) ",%%xmm2 \n" "movd " MEMACCESS(0) ",%%xmm2 \n"
...@@ -4676,7 +4740,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, ...@@ -4676,7 +4740,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft,
"cvtps2dq %%xmm5,%%xmm5 \n" "cvtps2dq %%xmm5,%%xmm5 \n"
"packssdw %%xmm5,%%xmm5 \n" "packssdw %%xmm5,%%xmm5 \n"
// 4 pixel small loop \n" // 4 pixel small loop.
LABELALIGN LABELALIGN
"4: \n" "4: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment