Commit 1f461f73 authored by Frank Barchard's avatar Frank Barchard

remove align directives

R=harryjin@google.com
BUG=none

Review URL: https://webrtc-codereview.appspot.com/54809004.
parent 6e7ef3fd
......@@ -27,7 +27,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
"vmov.u8 q9, #0 \n"
"vmov.u8 q11, #0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n"
......
......@@ -26,7 +26,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
"eor v17.16b, v17.16b, v17.16b \n"
"eor v19.16b, v19.16b, v19.16b \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n"
......
......@@ -26,7 +26,7 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride,
asm volatile (
// Read in the data from the source pointer.
// First round of bit swap.
".p2align 2 \n"
LABELALIGN
"1: \n"
"movq (%0),%%xmm0 \n"
"movq (%0,%3),%%xmm1 \n"
......@@ -114,7 +114,7 @@ void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
asm volatile (
// Read in the data from the source pointer.
// First round of bit swap.
".p2align 2 \n"
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu (%0,%3),%%xmm1 \n"
......@@ -256,7 +256,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
asm volatile (
// Read in the data from the source pointer.
// First round of bit swap.
".p2align 2 \n"
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu (%0,%4),%%xmm1 \n"
......
......@@ -35,7 +35,6 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
"sub %5, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
......@@ -256,7 +255,6 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
"sub %7, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
......
......@@ -141,101 +141,6 @@ static uvec8 kShuffleMaskARGBToRAW_0 = {
};
#endif // HAS_RGB24TOARGBROW_SSSE3
#if defined(TESTING) && defined(__x86_64__)
void TestRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
asm volatile (
".p2align 5 \n"
"mov %%eax,%%eax \n"
"mov %%ebx,%%ebx \n"
"mov %%ecx,%%ecx \n"
"mov %%edx,%%edx \n"
"mov %%esi,%%esi \n"
"mov %%edi,%%edi \n"
"mov %%ebp,%%ebp \n"
"mov %%esp,%%esp \n"
".p2align 5 \n"
"mov %%r8d,%%r8d \n"
"mov %%r9d,%%r9d \n"
"mov %%r10d,%%r10d \n"
"mov %%r11d,%%r11d \n"
"mov %%r12d,%%r12d \n"
"mov %%r13d,%%r13d \n"
"mov %%r14d,%%r14d \n"
"mov %%r15d,%%r15d \n"
".p2align 5 \n"
"lea (%%rax),%%eax \n"
"lea (%%rbx),%%ebx \n"
"lea (%%rcx),%%ecx \n"
"lea (%%rdx),%%edx \n"
"lea (%%rsi),%%esi \n"
"lea (%%rdi),%%edi \n"
"lea (%%rbp),%%ebp \n"
"lea (%%rsp),%%esp \n"
".p2align 5 \n"
"lea (%%r8),%%r8d \n"
"lea (%%r9),%%r9d \n"
"lea (%%r10),%%r10d \n"
"lea (%%r11),%%r11d \n"
"lea (%%r12),%%r12d \n"
"lea (%%r13),%%r13d \n"
"lea (%%r14),%%r14d \n"
"lea (%%r15),%%r15d \n"
".p2align 5 \n"
"lea 0x10(%%rax),%%eax \n"
"lea 0x10(%%rbx),%%ebx \n"
"lea 0x10(%%rcx),%%ecx \n"
"lea 0x10(%%rdx),%%edx \n"
"lea 0x10(%%rsi),%%esi \n"
"lea 0x10(%%rdi),%%edi \n"
"lea 0x10(%%rbp),%%ebp \n"
"lea 0x10(%%rsp),%%esp \n"
".p2align 5 \n"
"lea 0x10(%%r8),%%r8d \n"
"lea 0x10(%%r9),%%r9d \n"
"lea 0x10(%%r10),%%r10d \n"
"lea 0x10(%%r11),%%r11d \n"
"lea 0x10(%%r12),%%r12d \n"
"lea 0x10(%%r13),%%r13d \n"
"lea 0x10(%%r14),%%r14d \n"
"lea 0x10(%%r15),%%r15d \n"
".p2align 5 \n"
"add 0x10,%%eax \n"
"add 0x10,%%ebx \n"
"add 0x10,%%ecx \n"
"add 0x10,%%edx \n"
"add 0x10,%%esi \n"
"add 0x10,%%edi \n"
"add 0x10,%%ebp \n"
"add 0x10,%%esp \n"
".p2align 5 \n"
"add 0x10,%%r8d \n"
"add 0x10,%%r9d \n"
"add 0x10,%%r10d \n"
"add 0x10,%%r11d \n"
"add 0x10,%%r12d \n"
"add 0x10,%%r13d \n"
"add 0x10,%%r14d \n"
"add 0x10,%%r15d \n"
".p2align 2 \n"
"1: \n"
"movq " MEMACCESS(0) ",%%xmm0 \n"
"lea " MEMLEA(0x8,0) ",%0 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src_y), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
: "memory", "cc", "xmm0", "xmm1", "xmm5"
);
}
#endif // TESTING
#ifdef HAS_J400TOARGBROW_SSE2
void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
asm volatile (
......
......@@ -389,7 +389,6 @@ void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"blez $t4, 2f \n"
" andi %[width], %[width], 0xf \n" // residual
".p2align 2 \n"
"1: \n"
"addiu $t4, $t4, -1 \n"
"lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
......@@ -457,7 +456,6 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
"blez $t4, 2f \n"
" addu %[src], %[src], %[width] \n" // src += width
".p2align 2 \n"
"1: \n"
"lw $t0, -16(%[src]) \n" // |3|2|1|0|
"lw $t1, -12(%[src]) \n" // |7|6|5|4|
......@@ -512,7 +510,6 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"blez %[x], 2f \n"
" addu %[src_uv], %[src_uv], $t4 \n"
".p2align 2 \n"
"1: \n"
"lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
"lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
......@@ -673,7 +670,6 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
".p2align 2 \n"
"1: \n"
I422ToTransientMipsRGB
// Arranging into argb format
......@@ -735,7 +731,6 @@ void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|
".p2align 2 \n"
"1: \n"
I422ToTransientMipsRGB
// Arranging into abgr format
......@@ -797,7 +792,6 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
"lui $s6, 0xff \n"
"ori $s6, 0xff \n" // |00|ff|00|ff|
".p2align 2 \n"
"1: \n"
I422ToTransientMipsRGB
// Arranging into bgra format
......@@ -857,7 +851,6 @@ void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
"replv.ph $t0, %[y0_fraction] \n"
"replv.ph $t1, %[source_y_fraction] \n"
".p2align 2 \n"
"1: \n"
"lw $t2, 0(%[src_ptr]) \n"
"lw $t3, 0(%[src_ptr1]) \n"
......
This diff is collapsed.
......@@ -31,7 +31,6 @@ void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"beqz $t9, 2f \n"
" nop \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
......@@ -90,7 +89,6 @@ void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"bltz $t9, 2f \n"
" nop \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
......@@ -188,7 +186,6 @@ void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"beqz $t9, 2f \n"
" nop \n"
".p2align 2 \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
......@@ -248,7 +245,6 @@ void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"srl $t9, %[dst_width], 1 \n"
"andi $t8, %[dst_width], 1 \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
......@@ -319,7 +315,6 @@ void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
......@@ -368,7 +363,6 @@ void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set noreorder \n"
"repl.ph $t3, 3 \n" // 0x00030003
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
......@@ -425,7 +419,6 @@ void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set noreorder \n"
"repl.ph $t2, 3 \n" // 0x00030003
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
......@@ -477,7 +470,6 @@ void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
......@@ -528,7 +520,6 @@ void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
......@@ -586,7 +577,6 @@ void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
......
......@@ -26,7 +26,6 @@ extern "C" {
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
".p2align 2 \n"
"1: \n"
// load even pixels into q0, odd into q1
MEMACCESS(0)
......@@ -47,7 +46,6 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0, q1}, [%0]! \n" // load pixels and post inc
......@@ -73,7 +71,6 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
asm volatile (
// change the stride to row 2 pointer
"add %1, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc
......@@ -101,7 +98,6 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
......@@ -123,7 +119,6 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
const uint8* src_ptr2 = src_ptr + src_stride * 2;
const uint8* src_ptr3 = src_ptr + src_stride * 3;
asm volatile (
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load up 16x4
......@@ -162,7 +157,6 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
......@@ -185,7 +179,6 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
asm volatile (
"vmov.u8 d24, #3 \n"
"add %3, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
......@@ -245,7 +238,6 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
asm volatile (
"vmov.u8 d24, #3 \n"
"add %3, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
......@@ -300,7 +292,6 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
asm volatile (
MEMACCESS(3)
"vld1.8 {q3}, [%3] \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0, d1, d2, d3}, [%0]! \n"
......@@ -334,7 +325,6 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
MEMACCESS(7)
"vld1.8 {q15}, [%7] \n"
"add %3, %0 \n"
".p2align 2 \n"
"1: \n"
// d0 = 00 40 01 41 02 42 03 43
......@@ -450,7 +440,6 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
MEMACCESS(5)
"vld1.8 {q14}, [%5] \n"
"add %3, %0 \n"
".p2align 2 \n"
"1: \n"
// d0 = 00 40 01 41 02 42 03 43
......@@ -545,7 +534,6 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) {
const uint8* src_tmp = NULL;
asm volatile (
".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
"mov r12, %5 \n"
......@@ -590,7 +578,6 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
int* tmp = dx_offset;
const uint8* src_tmp = src_ptr;
asm volatile (
".p2align 2 \n"
"vdup.32 q0, %3 \n" // x
"vdup.32 q1, %4 \n" // dx
"vld1.32 {q2}, [%5] \n" // 0 1 2 3
......@@ -749,7 +736,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
".p2align 2 \n"
"1: \n"
// load even pixels into q0, odd into q1
MEMACCESS(0)
......@@ -773,7 +759,6 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
asm volatile (
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
......@@ -804,7 +789,6 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
asm volatile (
// change the stride to row 2 pointer
"add %1, %1, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
......@@ -845,7 +829,6 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, uint8* dst_argb, int dst_width) {
asm volatile (
"mov r12, %3, lsl #2 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.32 {d0[0]}, [%0], r12 \n"
......@@ -875,7 +858,6 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
asm volatile (
"mov r12, %4, lsl #2 \n"
"add %1, %1, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
......@@ -930,7 +912,6 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int tmp = 0;
const uint8* src_tmp = src_argb;
asm volatile (
".p2align 2 \n"
"1: \n"
LOAD1_DATA32_LANE(d0, 0)
LOAD1_DATA32_LANE(d0, 1)
......@@ -974,7 +955,6 @@ void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
int* tmp = dx_offset;
const uint8* src_tmp = src_argb;
asm volatile (
".p2align 2 \n"
"vdup.32 q0, %3 \n" // x
"vdup.32 q1, %4 \n" // dx
"vld1.32 {q2}, [%5] \n" // 0 1 2 3
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment