Commit cf101116 authored by Frank Barchard's avatar Frank Barchard

Remove initialize to zero on output variables for inline.

Inline that uses temporary variables is currently initializing them
to 0 and passing in as output "+r".
This CL replaces the output constraint to "=&r" for most meaning an
output with early write (before inputs).  This allows the initialize
to zero step to be removed, saving 1 instruction.

BUG=libyuv:580
TESTED=local libyuv build on gcc/linux and try bots
R=harryjin@google.com

Review URL: https://codereview.chromium.org/1895743008 .
parent f160ce90
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1584
Version: 1585
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1584
#define LIBYUV_VERSION 1585
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -27,7 +27,7 @@ static uvec8 kVTbl4x4Transpose =
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) {
const uint8* src_temp = NULL;
const uint8* src_temp;
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
......@@ -229,7 +229,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
"4: \n"
: "+r"(src_temp), // %0
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(src_stride), // %2
"+r"(dst), // %3
......@@ -247,7 +247,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
const uint8* src_temp = NULL;
const uint8* src_temp;
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
......@@ -512,7 +512,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
"4: \n"
: "+r"(src_temp), // %0
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(src_stride), // %2
"+r"(dst_a), // %3
......
......@@ -26,7 +26,7 @@ static uvec8 kVTbl4x4Transpose =
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) {
const uint8* src_temp = NULL;
const uint8* src_temp;
int64 width64 = (int64) width; // Work around clang 3.4 warning.
asm volatile (
// loops are on blocks of 8. loop will stop when
......@@ -235,7 +235,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
"4: \n"
: "+r"(src_temp), // %0
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst), // %2
"+r"(width64) // %3
......@@ -255,7 +255,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
const uint8* src_temp = NULL;
const uint8* src_temp;
int64 width64 = (int64) width; // Work around clang 3.4 warning.
asm volatile (
// loops are on blocks of 8. loop will stop when
......@@ -520,7 +520,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
"4: \n"
: "+r"(src_temp), // %0
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst_a), // %2
"+r"(dst_b), // %3
......
......@@ -1811,7 +1811,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
int temp = 0;
int temp;
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
......@@ -1823,15 +1823,15 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
STOREARGB
"subl $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[temp]"+r"(temp), // %[temp]
[temp]"=&r"(temp), // %[temp]
#if defined(__i386__) && defined(__pic__)
[width]"+m"(width) // %[width]
[width]"+m"(width) // %[width]
#else
[width]"+rm"(width) // %[width]
[width]"+rm"(width) // %[width]
#endif
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14 YUVTORGB_REGS
......@@ -3732,7 +3732,7 @@ void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
// Unattenuate 4 pixels at a time.
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
int width) {
uintptr_t alpha = 0;
uintptr_t alpha;
asm volatile (
// 4 pixel loop.
LABELALIGN
......@@ -3763,10 +3763,10 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"lea " MEMLEA(0x10,1) ",%1 \n"
"sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width), // %2
"+r"(alpha) // %3
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width), // %2
"=&r"(alpha) // %3
: "r"(fixed_invtbl8) // %4
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
......@@ -3782,7 +3782,7 @@ static const uvec8 kUnattenShuffleAlpha_AVX2 = {
// Unattenuate 8 pixels at a time.
void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
int width) {
uintptr_t alpha = 0;
uintptr_t alpha;
asm volatile (
"sub %0,%1 \n"
"vbroadcastf128 %5,%%ymm5 \n"
......@@ -3831,10 +3831,10 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
"sub $0x8,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width), // %2
"+r"(alpha) // %3
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width), // %2
"=&r"(alpha) // %3
: "r"(fixed_invtbl8), // %4
"m"(kUnattenShuffleAlpha_AVX2) // %5
: "memory", "cc", NACL_R14
......@@ -4759,7 +4759,7 @@ LIBYUV_API
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* src_dudv, int width) {
intptr_t src_argb_stride_temp = src_argb_stride;
intptr_t temp = 0;
intptr_t temp;
asm volatile (
"movq " MEMACCESS(3) ",%%xmm2 \n"
"movq " MEMACCESS2(0x08,3) ",%%xmm7 \n"
......@@ -4831,7 +4831,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"+r"(dst_argb), // %2
"+r"(src_dudv), // %3
"+rm"(width), // %4
"+r"(temp) // %5
"=&r"(temp) // %5
:
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
......@@ -5057,7 +5057,7 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
const uint8* shuffler, int width) {
uintptr_t pixel_temp = 0u;
uintptr_t pixel_temp;
asm volatile (
"pxor %%xmm5,%%xmm5 \n"
"mov " MEMACCESS(4) ",%k2 \n"
......@@ -5162,11 +5162,11 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"jg 3012b \n"
"99: \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+d"(pixel_temp), // %2
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"=&d"(pixel_temp), // %2
"+r"(width) // %3
: "r"(shuffler) // %4
: "r"(shuffler) // %4
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm5"
);
......@@ -5343,7 +5343,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
// Tranform ARGB pixels with color table.
void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
int width) {
uintptr_t pixel_temp = 0u;
uintptr_t pixel_temp;
asm volatile (
// 1 pixel loop.
LABELALIGN
......@@ -5363,10 +5363,10 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
"mov %b1," MEMACCESS2(-0x1,0) " \n"
"dec %2 \n"
"jg 1b \n"
: "+r"(dst_argb), // %0
"+d"(pixel_temp), // %1
"+r"(width) // %2
: "r"(table_argb) // %3
: "+r"(dst_argb), // %0
"=&d"(pixel_temp), // %1
"+r"(width) // %2
: "r"(table_argb) // %3
: "memory", "cc");
}
#endif // HAS_ARGBCOLORTABLEROW_X86
......@@ -5374,7 +5374,7 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
#ifdef HAS_RGBCOLORTABLEROW_X86
// Tranform RGB pixels with color table.
void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
uintptr_t pixel_temp = 0u;
uintptr_t pixel_temp;
asm volatile (
// 1 pixel loop.
LABELALIGN
......@@ -5391,10 +5391,10 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
"mov %b1," MEMACCESS2(-0x2,0) " \n"
"dec %2 \n"
"jg 1b \n"
: "+r"(dst_argb), // %0
"+d"(pixel_temp), // %1
"+r"(width) // %2
: "r"(table_argb) // %3
: "+r"(dst_argb), // %0
"=&d"(pixel_temp), // %1
"+r"(width) // %2
: "r"(table_argb) // %3
: "memory", "cc");
}
#endif // HAS_RGBCOLORTABLEROW_X86
......@@ -5404,8 +5404,8 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
int width,
const uint8* luma, uint32 lumacoeff) {
uintptr_t pixel_temp = 0u;
uintptr_t table_temp = 0u;
uintptr_t pixel_temp;
uintptr_t table_temp;
asm volatile (
"movd %6,%%xmm3 \n"
"pshufd $0x0,%%xmm3,%%xmm3 \n"
......@@ -5487,13 +5487,13 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
"lea " MEMLEA(0x10,3) ",%3 \n"
"sub $0x4,%4 \n"
"jg 1b \n"
: "+d"(pixel_temp), // %0
"+a"(table_temp), // %1
"+r"(src_argb), // %2
"+r"(dst_argb), // %3
"+rm"(width) // %4
: "r"(luma), // %5
"rm"(lumacoeff) // %6
: "=&d"(pixel_temp), // %0
"=&a"(table_temp), // %1
"+r"(src_argb), // %2
"+r"(dst_argb), // %3
"+rm"(width) // %4
: "r"(luma), // %5
"rm"(lumacoeff) // %6
: "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5"
);
}
......
......@@ -498,8 +498,8 @@ void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) {
void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
int x = 0;
int y = 0;
int x;
int y;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
......@@ -579,7 +579,7 @@ void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
[dst_u] "+r" (dst_u),
[dst_v] "+r" (dst_v),
[x] "=&r" (x),
[y] "+r" (y)
[y] "=&r" (y)
: [width] "r" (width)
: "t0", "t1", "t2", "t3", "t4",
"t5", "t7", "t8", "t9"
......
......@@ -316,7 +316,7 @@ void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
intptr_t stridex3 = 0;
intptr_t stridex3;
asm volatile (
"pcmpeqb %%xmm4,%%xmm4 \n"
"psrlw $0xf,%%xmm4 \n"
......@@ -361,7 +361,7 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width), // %2
"+r"(stridex3) // %3
"=&r"(stridex3) // %3
: "r"((intptr_t)(src_stride)) // %4
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
......@@ -824,7 +824,7 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
// Bilinear column filtering. SSSE3 version.
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
intptr_t x0 = 0, x1 = 0, temp_pixel = 0;
intptr_t x0, x1, temp_pixel;
asm volatile (
"movd %6,%%xmm2 \n"
"movd %7,%%xmm3 \n"
......@@ -880,14 +880,14 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"movd %%xmm0,%k2 \n"
"mov %b2," MEMACCESS(0) " \n"
"99: \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
"+a"(temp_pixel), // %2
"+r"(x0), // %3
"+r"(x1), // %4
"+rm"(dst_width) // %5
: "rm"(x), // %6
"rm"(dx) // %7
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
"=&a"(temp_pixel), // %2
"=&r"(x0), // %3
"=&r"(x1), // %4
"+rm"(dst_width) // %5
: "rm"(x), // %6
"rm"(dx) // %7
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
);
......@@ -998,7 +998,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, uint8* dst_argb, int dst_width) {
intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
intptr_t src_stepx_x12 = 0;
intptr_t src_stepx_x12;
asm volatile (
"lea " MEMLEA3(0x00,1,4) ",%1 \n"
"lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
......@@ -1016,11 +1016,11 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
"lea " MEMLEA(0x10,2) ",%2 \n"
"sub $0x4,%3 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(src_stepx_x4), // %1
"+r"(dst_argb), // %2
"+r"(dst_width), // %3
"+r"(src_stepx_x12) // %4
: "+r"(src_argb), // %0
"+r"(src_stepx_x4), // %1
"+r"(dst_argb), // %2
"+r"(dst_width), // %3
"=&r"(src_stepx_x12) // %4
:: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3"
);
......@@ -1032,7 +1032,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
ptrdiff_t src_stride, int src_stepx,
uint8* dst_argb, int dst_width) {
intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
intptr_t src_stepx_x12 = 0;
intptr_t src_stepx_x12;
intptr_t row1 = (intptr_t)(src_stride);
asm volatile (
"lea " MEMLEA3(0x00,1,4) ",%1 \n"
......@@ -1061,12 +1061,12 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
"lea " MEMLEA(0x10,2) ",%2 \n"
"sub $0x4,%3 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(src_stepx_x4), // %1
"+r"(dst_argb), // %2
"+rm"(dst_width), // %3
"+r"(src_stepx_x12), // %4
"+r"(row1) // %5
: "+r"(src_argb), // %0
"+r"(src_stepx_x4), // %1
"+r"(dst_argb), // %2
"+rm"(dst_width), // %3
"=&r"(src_stepx_x12), // %4
"+r"(row1) // %5
:: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3"
);
......@@ -1074,7 +1074,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
intptr_t x0 = 0, x1 = 0;
intptr_t x0, x1;
asm volatile (
"movd %5,%%xmm2 \n"
"movd %6,%%xmm3 \n"
......@@ -1127,8 +1127,8 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
"movd %%xmm0," MEMACCESS(2) " \n"
"99: \n"
: "+a"(x0), // %0
"+d"(x1), // %1
: "=&a"(x0), // %0
"=&d"(x1), // %1
"+r"(dst_argb), // %2
"+r"(src_argb), // %3
"+r"(dst_width) // %4
......@@ -1179,7 +1179,7 @@ static uvec8 kShuffleFractions = {
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
intptr_t x0 = 0, x1 = 0;
intptr_t x0, x1;
asm volatile (
"movdqa %0,%%xmm4 \n"
"movdqa %1,%%xmm5 \n"
......@@ -1242,8 +1242,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+rm"(dst_width), // %2
"+r"(x0), // %3
"+r"(x1) // %4
"=&r"(x0), // %3
"=&r"(x1) // %4
: "rm"(x), // %5
"rm"(dx) // %6
: "memory", "cc", NACL_R14
......
......@@ -532,7 +532,7 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) {
const uint8* src_tmp = NULL;
const uint8* src_tmp;
asm volatile (
"1: \n"
"mov %0, %1 \n"
......@@ -552,12 +552,12 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add %1, %1, #16 \n"
"subs %4, %4, #16 \n" // 16 processed per loop
"bgt 1b \n"
: "+r"(src_tmp), // %0
"+r"(src_ptr), // %1
"+r"(dst_ptr), // %2
"+r"(src_stride), // %3
"+r"(src_width), // %4
"+r"(src_height) // %5
: "=&r"(src_tmp), // %0
"+r"(src_ptr), // %1
"+r"(dst_ptr), // %2
"+r"(src_stride), // %3
"+r"(src_width), // %4
"+r"(src_height) // %5
:
: "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List
);
......@@ -909,7 +909,7 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
int tmp = 0;
int tmp;
const uint8* src_tmp = src_argb;
asm volatile (
"1: \n"
......@@ -926,13 +926,13 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
"vst1.32 {q0, q1}, [%0]! \n" // store pixels
"subs %2, %2, #8 \n" // 8 processed per loop
"bgt 1b \n"
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width), // %2
"+r"(x), // %3
"+r"(dx), // %4
"+r"(tmp), // %5
"+r"(src_tmp) // %6
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width), // %2
"+r"(x), // %3
"+r"(dx), // %4
"=&r"(tmp), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "q0", "q1"
);
......
......@@ -547,7 +547,7 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) {
const uint8* src_tmp = NULL;
const uint8* src_tmp;
asm volatile (
"1: \n"
"mov %0, %1 \n"
......@@ -567,12 +567,12 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add %1, %1, #16 \n"
"subs %w4, %w4, #16 \n" // 16 processed per loop
"b.gt 1b \n"
: "+r"(src_tmp), // %0
"+r"(src_ptr), // %1
"+r"(dst_ptr), // %2
"+r"(src_stride), // %3
"+r"(src_width), // %4
"+r"(src_height) // %5
: "=&r"(src_tmp), // %0
"+r"(src_ptr), // %1
"+r"(dst_ptr), // %2
"+r"(src_stride), // %3
"+r"(src_width), // %4
"+r"(src_height) // %5
:
: "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List
);
......@@ -931,7 +931,7 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
int64 x64 = (int64) x;
int64 dx64 = (int64) dx;
int64 tmp64 = 0;
int64 tmp64;
asm volatile (
"1: \n"
LOAD1_DATA32_LANE(v0, 0)
......@@ -947,13 +947,13 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
"st1 {v0.4s, v1.4s}, [%0], #32 \n" // store pixels
"subs %w2, %w2, #8 \n" // 8 processed per loop
"b.gt 1b \n"
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width64), // %2
"+r"(x64), // %3
"+r"(dx64), // %4
"+r"(tmp64), // %5
"+r"(src_tmp) // %6
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width64), // %2
"+r"(x64), // %3
"+r"(dx64), // %4
"=&r"(tmp64), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "v0", "v1"
);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment