Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
faa78822
Commit
faa78822
authored
Jul 31, 2012
by
Mans Rullgard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ARM: use =const syntax instead of explicit literal pools
Signed-off-by:
Mans Rullgard
<
mans@mansr.com
>
parent
99817091
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
35 additions
and
74 deletions
+35
-74
simple_idct_arm.S
libavcodec/arm/simple_idct_arm.S
+16
-40
simple_idct_armv5te.S
libavcodec/arm/simple_idct_armv5te.S
+7
-13
simple_idct_armv6.S
libavcodec/arm/simple_idct_armv6.S
+12
-21
No files found.
libavcodec/arm/simple_idct_arm.S
View file @
faa78822
...
...
@@ -25,8 +25,7 @@
#include "libavutil/arm/asm.S"
/* useful constants for the algorithm, they are save in __constant_ptr__ at */
/* the end of the source code.*/
/* useful constants for the algorithm */
#define W1 22725
#define W2 21407
#define W3 19266
...
...
@@ -36,16 +35,6 @@
#define W7 4520
#define MASK_MSHW 0xFFFF0000
/* offsets of the constants in the vector */
#define offW1 0
#define offW2 4
#define offW3 8
#define offW4 12
#define offW5 16
#define offW6 20
#define offW7 24
#define offMASK_MSHW 28
#define ROW_SHIFT 11
#define ROW_SHIFT2MSHW (16-11)
#define COL_SHIFT 20
...
...
@@ -63,7 +52,6 @@ function ff_simple_idct_arm, export=1
stmfd sp!, {r4-r11, r14} @ R14 is also called LR
@@ at this point, R0=block, other registers are free.
add r14, r0, #112 @ R14=&block[8*7], better start from the last row, and decrease the value until row=0, i.e. R12=block.
adr r12, __constant_ptr__ @ R12=__constant_ptr__, the vector containing the constants, probably not necessary to reserve a register for it
@@ add 2 temporary variables in the stack: R0 and R14
sub sp, sp, #8 @ allow 2 local variables
str r0, [sp, #0] @ save block in sp[0]
...
...
@@ -109,13 +97,13 @@ __b_evaluation:
@@ MAC16(b1, -W7, row[3]);
@@ MAC16(b2, -W1, row[3]);
@@ MAC16(b3, -W5, row[3]);
ldr r8,
[r12, #offW1]
@ R8=W1
ldr r8,
=W1
@ R8=W1
mov r2, r2, asr #16 @ R2=ROWr16[3]
mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
ldr r9,
[r12, #offW3]
@ R9=W3
ldr r10,
[r12, #offW5]
@ R10=W5
ldr r9,
=W3
@ R9=W3
ldr r10,
=W5
@ R10=W5
mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
ldr r11,
[r12, #offW7]
@ R11=W7
ldr r11,
=W7
@ R11=W7
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
teq r2, #0 @ if null avoid muls
...
...
@@ -177,14 +165,14 @@ __a_evaluation:
@@ a2 = a0 - W6 * row[2];
@@ a3 = a0 - W2 * row[2];
@@ a0 = a0 + W2 * row[2];
ldr r9,
[r12, #offW4]
@ R9=W4
ldr r9,
=W4
@ R9=W4
mul r6, r9, r6 @ R6=W4*ROWr16[0]
ldr r10,
[r12, #offW6]
@ R10=W6
ldr r10,
=W6
@ R10=W6
ldrsh r4, [r14, #4] @ R4=ROWr16[2] (a3 not defined yet)
add r6, r6, #ROW_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(ROW_SHIFT-1) (a0)
mul r11, r10, r4 @ R11=W6*ROWr16[2]
ldr r8,
[r12, #offW2]
@ R8=W2
ldr r8,
=W2
@ R8=W2
sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2)
@@ temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
@@ if (temp != 0) {}
...
...
@@ -248,7 +236,7 @@ __end_a_evaluation:
add r9, r2, r1 @ R9=a1+b1
@@ put 2 16 bits half-words in a 32bits word
@@ ROWr32[0]=ROWr16[0] | (ROWr16[1]<<16) (only Little Endian compliant then!!!)
ldr r10,
[r12, #offMASK_MSHW]
@ R10=0xFFFF0000
ldr r10,
=MASK_MSHW
@ R10=0xFFFF0000
and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a1+b1)<<5)
mvn r11, r10 @ R11= NOT R10= 0x0000FFFF
and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a0+b0)>>11)
...
...
@@ -319,13 +307,13 @@ __b_evaluation2:
@@ MAC16(b1, -W7, col[8x3]);
@@ MAC16(b2, -W1, col[8x3]);
@@ MAC16(b3, -W5, col[8x3]);
ldr r8,
[r12, #offW1]
@ R8=W1
ldr r8,
=W1
@ R8=W1
ldrsh r7, [r14, #16]
mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
ldr r9,
[r12, #offW3]
@ R9=W3
ldr r10,
[r12, #offW5]
@ R10=W5
ldr r9,
=W3
@ R9=W3
ldr r10,
=W5
@ R10=W5
mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
ldr r11,
[r12, #offW7]
@ R11=W7
ldr r11,
=W7
@ R11=W7
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
ldrsh r2, [r14, #48]
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
...
...
@@ -381,13 +369,13 @@ __a_evaluation2:
@@ a3 = a0 - W2 * row[2];
@@ a0 = a0 + W2 * row[2];
ldrsh r6, [r14, #0]
ldr r9,
[r12, #offW4]
@ R9=W4
ldr r9,
=W4
@ R9=W4
mul r6, r9, r6 @ R6=W4*ROWr16[0]
ldr r10,
[r12, #offW6]
@ R10=W6
ldr r10,
=W6
@ R10=W6
ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet)
add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0)
mul r11, r10, r4 @ R11=W6*ROWr16[2]
ldr r8,
[r12, #offW2]
@ R8=W2
ldr r8,
=W2
@ R8=W2
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2)
mul r11, r8, r4 @ R11=W2*ROWr16[2]
...
...
@@ -489,15 +477,3 @@ __end_bef_a_evaluation:
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
bal __end_a_evaluation
.align
__constant_ptr__: @@ see #defines at the beginning of the source code for values.
.word W1
.word W2
.word W3
.word W4
.word W5
.word W6
.word W7
.word MASK_MSHW
libavcodec/arm/simple_idct_armv5te.S
View file @
faa78822
...
...
@@ -37,12 +37,6 @@
#define W26 (W2 | (W6 << 16))
#define W57 (W5 | (W7 << 16))
.text
.align
w13: .long W13
w26: .long W26
w57: .long W57
function idct_row_armv5te
str lr, [sp, #-4]!
...
...
@@ -58,7 +52,7 @@ function idct_row_armv5te
mov ip, #16384
sub ip, ip, #1 /* ip = W4 */
smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */
ldr ip,
w26
/* ip = W2 | (W6 << 16) */
ldr ip,
=W26
/* ip = W2 | (W6 << 16) */
smultb a2, ip, a4
smulbb lr, ip, a4
add v2, v1, a2
...
...
@@ -66,8 +60,8 @@ function idct_row_armv5te
sub v4, v1, lr
add v1, v1, lr
ldr ip,
w13
/* ip = W1 | (W3 << 16) */
ldr lr,
w57
/* lr = W5 | (W7 << 16) */
ldr ip,
=W13
/* ip = W1 | (W3 << 16) */
ldr lr,
=W57
/* lr = W5 | (W7 << 16) */
smulbt v5, ip, a3
smultt v6, lr, a4
smlatt v5, ip, a4, v5
...
...
@@ -94,7 +88,7 @@ function idct_row_armv5te
smlatt v7, ip, a4, v7
sub fp, fp, a2
ldr ip,
w26
/* ip = W2 | (W6 << 16) */
ldr ip,
=W26
/* ip = W2 | (W6 << 16) */
mov a2, #16384
sub a2, a2, #1 /* a2 = W4 */
smulbb a2, a2, a3 /* a2 = W4*row[4] */
...
...
@@ -178,7 +172,7 @@ endfunc
sub v4, v2, a3
sub v6, v2, a3
add fp, v2, a3
ldr ip,
w
26
ldr ip,
=W
26
ldr a4, [a1, #(16*2)]
add v2, v2, a3
...
...
@@ -211,9 +205,9 @@ endfunc
stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
ldr ip,
w
13
ldr ip,
=W
13
ldr a4, [a1, #(16*1)]
ldr lr,
w
57
ldr lr,
=W
57
smulbb v1, ip, a4
smultb v3, ip, a4
smulbb v5, lr, a4
...
...
libavcodec/arm/simple_idct_armv6.S
View file @
faa78822
...
...
@@ -40,15 +40,6 @@
#define W46 (W4 | (W6 << 16))
#define W57 (W5 | (W7 << 16))
.text
.align
w13: .long W13
w26: .long W26
w42: .long W42
w42n: .long W42n
w46: .long W46
w57: .long W57
/*
Compute partial IDCT of single row.
shift = left-shift amount
...
...
@@ -60,12 +51,12 @@ w57: .long W57
Output in registers r4--r11
*/
.macro idct_row shift
ldr lr,
w46
/* lr = W4 | (W6 << 16) */
ldr lr,
=W46
/* lr = W4 | (W6 << 16) */
mov r1, #(1<<(\shift-1))
smlad r4, r2, ip, r1
smlsd r7, r2, ip, r1
ldr ip,
w13
/* ip = W1 | (W3 << 16) */
ldr r10,
w57
/* r10 = W5 | (W7 << 16) */
ldr ip,
=W13
/* ip = W1 | (W3 << 16) */
ldr r10,
=W57
/* r10 = W5 | (W7 << 16) */
smlad r5, r2, lr, r1
smlsd r6, r2, lr, r1
...
...
@@ -78,11 +69,11 @@ w57: .long W57
smlad r8, lr, r10,r8 /* B0 += W5*row[5] + W7*row[7] */
smusdx r10,r3, r1 /* r10 = B2 = W5*row[1] - W1*row[3] */
ldr r3,
w42n
/* r3 = -W4 | (-W2 << 16) */
ldr r3,
=W42n
/* r3 = -W4 | (-W2 << 16) */
smlad r10,lr, r2, r10 /* B2 += W7*row[5] + W3*row[7] */
ldr r2, [r0, #4] /* r2 = row[6,4] */
smlsdx r11,lr, ip, r11 /* B3 += W3*row[5] - W1*row[7] */
ldr ip,
w46
/* ip = W4 | (W6 << 16) */
ldr ip,
=W46
/* ip = W4 | (W6 << 16) */
smlad r9, lr, r1, r9 /* B1 -= W1*row[5] + W5*row[7] */
smlad r5, r2, r3, r5 /* A1 += -W4*row[4] - W2*row[6] */
...
...
@@ -101,12 +92,12 @@ w57: .long W57
Output in registers r4--r11
*/
.macro idct_row4 shift
ldr lr,
w46
/* lr = W4 | (W6 << 16) */
ldr r10,
w57
/* r10 = W5 | (W7 << 16) */
ldr lr,
=W46
/* lr = W4 | (W6 << 16) */
ldr r10,
=W57
/* r10 = W5 | (W7 << 16) */
mov r1, #(1<<(\shift-1))
smlad r4, r2, ip, r1
smlsd r7, r2, ip, r1
ldr ip,
w13
/* ip = W1 | (W3 << 16) */
ldr ip,
=W13
/* ip = W1 | (W3 << 16) */
smlad r5, r2, lr, r1
smlsd r6, r2, lr, r1
smusdx r11,r3, r10 /* r11 = B3 = W7*row[1] - W5*row[3] */
...
...
@@ -205,7 +196,7 @@ function idct_row_armv6
cmpeq lr, r2, lsr #16
beq 1f
push {r1}
ldr ip,
w42
/* ip = W4 | (W2 << 16) */
ldr ip,
=W42
/* ip = W4 | (W2 << 16) */
cmp lr, #0
beq 2f
...
...
@@ -249,7 +240,7 @@ function idct_col_armv6
push {r1, lr}
ldr r2, [r0] /* r2 = row[2,0] */
ldr ip,
w42
/* ip = W4 | (W2 << 16) */
ldr ip,
=W42
/* ip = W4 | (W2 << 16) */
ldr r3, [r0, #8] /* r3 = row[3,1] */
idct_row COL_SHIFT
pop {r1}
...
...
@@ -277,7 +268,7 @@ function idct_col_put_armv6
push {r1, r2, lr}
ldr r2, [r0] /* r2 = row[2,0] */
ldr ip,
w42
/* ip = W4 | (W2 << 16) */
ldr ip,
=W42
/* ip = W4 | (W2 << 16) */
ldr r3, [r0, #8] /* r3 = row[3,1] */
idct_row COL_SHIFT
pop {r1, r2}
...
...
@@ -307,7 +298,7 @@ function idct_col_add_armv6
push {r1, r2, lr}
ldr r2, [r0] /* r2 = row[2,0] */
ldr ip,
w42
/* ip = W4 | (W2 << 16) */
ldr ip,
=W42
/* ip = W4 | (W2 << 16) */
ldr r3, [r0, #8] /* r3 = row[3,1] */
idct_row COL_SHIFT
pop {r1, r2}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment