Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
de99545f
Commit
de99545f
authored
Mar 10, 2013
by
Ronald S. Bultje
Committed by
Michael Niedermayer
Mar 13, 2013
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Move arm half-pel assembly from dsputil to hpeldsp.
parent
89f16ded
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
1539 additions
and
1350 deletions
+1539
-1350
Makefile
libavcodec/arm/Makefile
+9
-0
dsputil_arm.S
libavcodec/arm/dsputil_arm.S
+0
-584
dsputil_armv6.S
libavcodec/arm/dsputil_armv6.S
+0
-238
dsputil_init_arm.c
libavcodec/arm/dsputil_init_arm.c
+0
-39
dsputil_init_armv6.c
libavcodec/arm/dsputil_init_armv6.c
+0
-41
dsputil_init_neon.c
libavcodec/arm/dsputil_init_neon.c
+0
-59
dsputil_neon.S
libavcodec/arm/dsputil_neon.S
+0
-388
hpeldsp_arm.S
libavcodec/arm/hpeldsp_arm.S
+611
-0
hpeldsp_arm.h
libavcodec/arm/hpeldsp_arm.h
+29
-0
hpeldsp_armv6.S
libavcodec/arm/hpeldsp_armv6.S
+259
-0
hpeldsp_init_arm.c
libavcodec/arm/hpeldsp_init_arm.c
+68
-0
hpeldsp_init_armv6.c
libavcodec/arm/hpeldsp_init_armv6.c
+66
-0
hpeldsp_init_neon.c
libavcodec/arm/hpeldsp_init_neon.c
+86
-0
hpeldsp_neon.S
libavcodec/arm/hpeldsp_neon.S
+410
-0
hpeldsp.c
libavcodec/hpeldsp.c
+1
-1
No files found.
libavcodec/arm/Makefile
View file @
de99545f
...
...
@@ -31,6 +31,9 @@ OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o
OBJS-$(CONFIG_H264PRED)
+=
arm/h264pred_init_arm.o
OBJS-$(CONFIG_H264QPEL)
+=
arm/h264qpel_init_arm.o
OBJS-$(CONFIG_HPELDSP)
+=
arm/hpeldsp_arm.o
\
arm/hpeldsp_init_arm.o
OBJS-$(CONFIG_RV30_DECODER)
+=
arm/rv34dsp_init_arm.o
OBJS-$(CONFIG_RV40_DECODER)
+=
arm/rv34dsp_init_arm.o
\
arm/rv40dsp_init_arm.o
\
...
...
@@ -58,6 +61,9 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \
arm/dsputil_armv6.o
\
arm/simple_idct_armv6.o
\
ARMV6-OBJS-$(CONFIG_HPELDSP)
+=
arm/hpeldsp_armv6.o
\
arm/hpeldsp_init_armv6.o
VFP-OBJS-$(HAVE_ARMV6)
+=
arm/fmtconvert_vfp.o
NEON-OBJS-$(CONFIG_FFT)
+=
arm/fft_neon.o
\
...
...
@@ -76,6 +82,9 @@ NEON-OBJS-$(CONFIG_H264PRED) += arm/h264pred_neon.o \
NEON-OBJS-$(CONFIG_H264QPEL)
+=
arm/h264qpel_neon.o
\
NEON-OBJS-$(CONFIG_HPELDSP)
+=
arm/hpeldsp_neon.o
\
arm/hpeldsp_init_neon.o
NEON-OBJS-$(CONFIG_AC3DSP)
+=
arm/ac3dsp_neon.o
NEON-OBJS-$(CONFIG_AAC_DECODER)
+=
arm/sbrdsp_neon.o
\
...
...
libavcodec/arm/dsputil_arm.S
View file @
de99545f
...
...
@@ -26,590 +26,6 @@
#define pld @
#endif
.macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
mov \Rd0, \Rn0, lsr #(\shift * 8)
mov \Rd1, \Rn1, lsr #(\shift * 8)
mov \Rd2, \Rn2, lsr #(\shift * 8)
mov \Rd3, \Rn3, lsr #(\shift * 8)
orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
.endm
.macro ALIGN_DWORD shift, R0, R1, R2
mov \R0, \R0, lsr #(\shift * 8)
orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
mov \R1, \R1, lsr #(\shift * 8)
orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
.endm
.macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
.endm
.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
@ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
@ Rmask = 0xFEFEFEFE
@ Rn = destroy
eor \Rd0, \Rn0, \Rm0
eor \Rd1, \Rn1, \Rm1
orr \Rn0, \Rn0, \Rm0
orr \Rn1, \Rn1, \Rm1
and \Rd0, \Rd0, \Rmask
and \Rd1, \Rd1, \Rmask
sub \Rd0, \Rn0, \Rd0, lsr #1
sub \Rd1, \Rn1, \Rd1, lsr #1
.endm
.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
@ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
@ Rmask = 0xFEFEFEFE
@ Rn = destroy
eor \Rd0, \Rn0, \Rm0
eor \Rd1, \Rn1, \Rm1
and \Rn0, \Rn0, \Rm0
and \Rn1, \Rn1, \Rm1
and \Rd0, \Rd0, \Rmask
and \Rd1, \Rd1, \Rmask
add \Rd0, \Rn0, \Rd0, lsr #1
add \Rd1, \Rn1, \Rd1, lsr #1
.endm
.macro JMP_ALIGN tmp, reg
ands \tmp, \reg, #3
bic \reg, \reg, #3
beq 1f
subs \tmp, \tmp, #1
beq 2f
subs \tmp, \tmp, #1
beq 3f
b 4f
.endm
@ ----------------------------------------------------------------
.align 5
function ff_put_pixels16_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11, lr}
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r7}
add r1, r1, r2
stm r0, {r4-r7}
pld [r1]
subs r3, r3, #1
add r0, r0, r2
bne 1b
pop {r4-r11, pc}
.align 5
2:
ldm r1, {r4-r8}
add r1, r1, r2
ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
pld [r1]
subs r3, r3, #1
stm r0, {r9-r12}
add r0, r0, r2
bne 2b
pop {r4-r11, pc}
.align 5
3:
ldm r1, {r4-r8}
add r1, r1, r2
ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
pld [r1]
subs r3, r3, #1
stm r0, {r9-r12}
add r0, r0, r2
bne 3b
pop {r4-r11, pc}
.align 5
4:
ldm r1, {r4-r8}
add r1, r1, r2
ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
pld [r1]
subs r3, r3, #1
stm r0, {r9-r12}
add r0, r0, r2
bne 4b
pop {r4-r11,pc}
endfunc
@ ----------------------------------------------------------------
.align 5
function ff_put_pixels8_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r5,lr}
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r5}
add r1, r1, r2
subs r3, r3, #1
pld [r1]
stm r0, {r4-r5}
add r0, r0, r2
bne 1b
pop {r4-r5,pc}
.align 5
2:
ldm r1, {r4-r5, r12}
add r1, r1, r2
ALIGN_DWORD 1, r4, r5, r12
pld [r1]
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 2b
pop {r4-r5,pc}
.align 5
3:
ldm r1, {r4-r5, r12}
add r1, r1, r2
ALIGN_DWORD 2, r4, r5, r12
pld [r1]
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 3b
pop {r4-r5,pc}
.align 5
4:
ldm r1, {r4-r5, r12}
add r1, r1, r2
ALIGN_DWORD 3, r4, r5, r12
pld [r1]
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 4b
pop {r4-r5,pc}
endfunc
@ ----------------------------------------------------------------
.align 5
function ff_put_pixels8_x2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r10,lr}
ldr r12, =0xfefefefe
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
pld [r1]
RND_AVG32 r8, r9, r4, r5, r6, r7, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 1b
pop {r4-r10,pc}
.align 5
2:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
pld [r1]
RND_AVG32 r4, r5, r6, r7, r8, r9, r12
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 2b
pop {r4-r10,pc}
.align 5
3:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
pld [r1]
RND_AVG32 r4, r5, r6, r7, r8, r9, r12
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 3b
pop {r4-r10,pc}
.align 5
4:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
pld [r1]
RND_AVG32 r8, r9, r6, r7, r5, r10, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 4b
pop {r4-r10,pc}
endfunc
.align 5
function ff_put_no_rnd_pixels8_x2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r10,lr}
ldr r12, =0xfefefefe
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
pld [r1]
NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 1b
pop {r4-r10,pc}
.align 5
2:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
pld [r1]
NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 2b
pop {r4-r10,pc}
.align 5
3:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
pld [r1]
NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 3b
pop {r4-r10,pc}
.align 5
4:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
pld [r1]
NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 4b
pop {r4-r10,pc}
endfunc
@ ----------------------------------------------------------------
.align 5
function ff_put_pixels8_y2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11,lr}
mov r3, r3, lsr #1
ldr r12, =0xfefefefe
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r5}
add r1, r1, r2
6: ldm r1, {r6-r7}
add r1, r1, r2
pld [r1]
RND_AVG32 r8, r9, r4, r5, r6, r7, r12
ldm r1, {r4-r5}
add r1, r1, r2
stm r0, {r8-r9}
add r0, r0, r2
pld [r1]
RND_AVG32 r8, r9, r6, r7, r4, r5, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
2:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r7, r8, r9
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r4, r5, r6
subs r3, r3, #1
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
3:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r7, r8, r9
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r4, r5, r6
subs r3, r3, #1
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
4:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r7, r8, r9
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r4, r5, r6
subs r3, r3, #1
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
endfunc
.align 5
function ff_put_no_rnd_pixels8_y2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11,lr}
mov r3, r3, lsr #1
ldr r12, =0xfefefefe
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r5}
add r1, r1, r2
6: ldm r1, {r6-r7}
add r1, r1, r2
pld [r1]
NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
ldm r1, {r4-r5}
add r1, r1, r2
stm r0, {r8-r9}
add r0, r0, r2
pld [r1]
NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
2:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r7, r8, r9
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r4, r5, r6
subs r3, r3, #1
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
3:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r7, r8, r9
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r4, r5, r6
subs r3, r3, #1
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
4:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r7, r8, r9
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r4, r5, r6
subs r3, r3, #1
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
endfunc
.ltorg
@ ----------------------------------------------------------------
.macro RND_XY2_IT align, rnd
@ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
@ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
.if \align == 0
ldm r1, {r6-r8}
.elseif \align == 3
ldm r1, {r5-r7}
.else
ldm r1, {r8-r10}
.endif
add r1, r1, r2
pld [r1]
.if \align == 0
ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
.elseif \align == 1
ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
.elseif \align == 2
ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
.elseif \align == 3
ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
.endif
ldr r14, =0x03030303
tst r3, #1
and r8, r4, r14
and r9, r5, r14
and r10, r6, r14
and r11, r7, r14
it eq
andeq r14, r14, r14, \rnd #1
add r8, r8, r10
add r9, r9, r11
ldr r12, =0xfcfcfcfc >> 2
itt eq
addeq r8, r8, r14
addeq r9, r9, r14
and r4, r12, r4, lsr #2
and r5, r12, r5, lsr #2
and r6, r12, r6, lsr #2
and r7, r12, r7, lsr #2
add r10, r4, r6
add r11, r5, r7
subs r3, r3, #1
.endm
.macro RND_XY2_EXPAND align, rnd
RND_XY2_IT \align, \rnd
6: push {r8-r11}
RND_XY2_IT \align, \rnd
pop {r4-r7}
add r4, r4, r8
add r5, r5, r9
ldr r14, =0x0f0f0f0f
add r6, r6, r10
add r7, r7, r11
and r4, r14, r4, lsr #2
and r5, r14, r5, lsr #2
add r4, r4, r6
add r5, r5, r7
stm r0, {r4-r5}
add r0, r0, r2
bge 6b
pop {r4-r11,pc}
.endm
.align 5
function ff_put_pixels8_xy2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11,lr} @ R14 is also called LR
JMP_ALIGN r5, r1
1: RND_XY2_EXPAND 0, lsl
.align 5
2: RND_XY2_EXPAND 1, lsl
.align 5
3: RND_XY2_EXPAND 2, lsl
.align 5
4: RND_XY2_EXPAND 3, lsl
endfunc
.align 5
function ff_put_no_rnd_pixels8_xy2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11,lr}
JMP_ALIGN r5, r1
1: RND_XY2_EXPAND 0, lsr
.align 5
2: RND_XY2_EXPAND 1, lsr
.align 5
3: RND_XY2_EXPAND 2, lsr
.align 5
4: RND_XY2_EXPAND 3, lsr
endfunc
.align 5
@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
function ff_add_pixels_clamped_arm, export=1
...
...
libavcodec/arm/dsputil_armv6.S
View file @
de99545f
...
...
@@ -20,244 +20,6 @@
#include "libavutil/arm/asm.S"
.macro call_2x_pixels type, subp
function ff_\type\()_pixels16\subp\()_armv6, export=1
push {r0-r3, lr}
bl ff_\type\()_pixels8\subp\()_armv6
pop {r0-r3, lr}
add r0, r0, #8
add r1, r1, #8
b ff_\type\()_pixels8\subp\()_armv6
endfunc
.endm
call_2x_pixels avg
call_2x_pixels put, _x2
call_2x_pixels put, _y2
call_2x_pixels put, _x2_no_rnd
call_2x_pixels put, _y2_no_rnd
function ff_put_pixels16_armv6, export=1
push {r4-r11}
1:
ldr r5, [r1, #4]
ldr r6, [r1, #8]
ldr r7, [r1, #12]
ldr_post r4, r1, r2
strd r6, r7, [r0, #8]
ldr r9, [r1, #4]
strd_post r4, r5, r0, r2
ldr r10, [r1, #8]
ldr r11, [r1, #12]
ldr_post r8, r1, r2
strd r10, r11, [r0, #8]
subs r3, r3, #2
strd_post r8, r9, r0, r2
bne 1b
pop {r4-r11}
bx lr
endfunc
function ff_put_pixels8_armv6, export=1
push {r4-r7}
1:
ldr r5, [r1, #4]
ldr_post r4, r1, r2
ldr r7, [r1, #4]
strd_post r4, r5, r0, r2
ldr_post r6, r1, r2
subs r3, r3, #2
strd_post r6, r7, r0, r2
bne 1b
pop {r4-r7}
bx lr
endfunc
function ff_put_pixels8_x2_armv6, export=1
push {r4-r11, lr}
mov r12, #1
orr r12, r12, r12, lsl #8
orr r12, r12, r12, lsl #16
1:
ldr r4, [r1]
subs r3, r3, #2
ldr r5, [r1, #4]
ldr r7, [r1, #5]
lsr r6, r4, #8
ldr_pre r8, r1, r2
orr r6, r6, r5, lsl #24
ldr r9, [r1, #4]
ldr r11, [r1, #5]
lsr r10, r8, #8
add r1, r1, r2
orr r10, r10, r9, lsl #24
eor r14, r4, r6
uhadd8 r4, r4, r6
eor r6, r5, r7
uhadd8 r5, r5, r7
and r14, r14, r12
and r6, r6, r12
uadd8 r4, r4, r14
eor r14, r8, r10
uadd8 r5, r5, r6
eor r6, r9, r11
uhadd8 r8, r8, r10
and r14, r14, r12
uhadd8 r9, r9, r11
and r6, r6, r12
uadd8 r8, r8, r14
strd_post r4, r5, r0, r2
uadd8 r9, r9, r6
strd_post r8, r9, r0, r2
bne 1b
pop {r4-r11, pc}
endfunc
function ff_put_pixels8_y2_armv6, export=1
push {r4-r11}
mov r12, #1
orr r12, r12, r12, lsl #8
orr r12, r12, r12, lsl #16
ldr r4, [r1]
ldr r5, [r1, #4]
ldr_pre r6, r1, r2
ldr r7, [r1, #4]
1:
subs r3, r3, #2
uhadd8 r8, r4, r6
eor r10, r4, r6
uhadd8 r9, r5, r7
eor r11, r5, r7
and r10, r10, r12
ldr_pre r4, r1, r2
uadd8 r8, r8, r10
and r11, r11, r12
uadd8 r9, r9, r11
ldr r5, [r1, #4]
uhadd8 r10, r4, r6
eor r6, r4, r6
uhadd8 r11, r5, r7
and r6, r6, r12
eor r7, r5, r7
uadd8 r10, r10, r6
and r7, r7, r12
ldr_pre r6, r1, r2
uadd8 r11, r11, r7
strd_post r8, r9, r0, r2
ldr r7, [r1, #4]
strd_post r10, r11, r0, r2
bne 1b
pop {r4-r11}
bx lr
endfunc
function ff_put_pixels8_x2_no_rnd_armv6, export=1
push {r4-r9, lr}
1:
subs r3, r3, #2
ldr r4, [r1]
ldr r5, [r1, #4]
ldr r7, [r1, #5]
ldr_pre r8, r1, r2
ldr r9, [r1, #4]
ldr r14, [r1, #5]
add r1, r1, r2
lsr r6, r4, #8
orr r6, r6, r5, lsl #24
lsr r12, r8, #8
orr r12, r12, r9, lsl #24
uhadd8 r4, r4, r6
uhadd8 r5, r5, r7
uhadd8 r8, r8, r12
uhadd8 r9, r9, r14
stm r0, {r4,r5}
add r0, r0, r2
stm r0, {r8,r9}
add r0, r0, r2
bne 1b
pop {r4-r9, pc}
endfunc
function ff_put_pixels8_y2_no_rnd_armv6, export=1
push {r4-r9, lr}
ldr r4, [r1]
ldr r5, [r1, #4]
ldr_pre r6, r1, r2
ldr r7, [r1, #4]
1:
subs r3, r3, #2
uhadd8 r8, r4, r6
ldr_pre r4, r1, r2
uhadd8 r9, r5, r7
ldr r5, [r1, #4]
uhadd8 r12, r4, r6
ldr_pre r6, r1, r2
uhadd8 r14, r5, r7
ldr r7, [r1, #4]
stm r0, {r8,r9}
add r0, r0, r2
stm r0, {r12,r14}
add r0, r0, r2
bne 1b
pop {r4-r9, pc}
endfunc
function ff_avg_pixels8_armv6, export=1
pld [r1, r2]
push {r4-r10, lr}
mov lr, #1
orr lr, lr, lr, lsl #8
orr lr, lr, lr, lsl #16
ldrd r4, r5, [r0]
ldr r10, [r1, #4]
ldr_post r9, r1, r2
subs r3, r3, #2
1:
pld [r1, r2]
eor r8, r4, r9
uhadd8 r4, r4, r9
eor r12, r5, r10
ldrd_reg r6, r7, r0, r2
uhadd8 r5, r5, r10
and r8, r8, lr
ldr r10, [r1, #4]
and r12, r12, lr
uadd8 r4, r4, r8
ldr_post r9, r1, r2
eor r8, r6, r9
uadd8 r5, r5, r12
pld [r1, r2, lsl #1]
eor r12, r7, r10
uhadd8 r6, r6, r9
strd_post r4, r5, r0, r2
uhadd8 r7, r7, r10
beq 2f
and r8, r8, lr
ldrd_reg r4, r5, r0, r2
uadd8 r6, r6, r8
ldr r10, [r1, #4]
and r12, r12, lr
subs r3, r3, #2
uadd8 r7, r7, r12
ldr_post r9, r1, r2
strd_post r6, r7, r0, r2
b 1b
2:
and r8, r8, lr
and r12, r12, lr
uadd8 r6, r6, r8
uadd8 r7, r7, r12
strd_post r6, r7, r0, r2
pop {r4-r10, pc}
endfunc
function ff_add_pixels_clamped_armv6, export=1
push {r4-r8,lr}
mov r3, #8
...
...
libavcodec/arm/dsputil_init_arm.c
View file @
de99545f
...
...
@@ -30,24 +30,6 @@ void ff_simple_idct_arm(int16_t *data);
static
void
(
*
ff_put_pixels_clamped
)(
const
int16_t
*
block
,
uint8_t
*
pixels
,
int
line_size
);
static
void
(
*
ff_add_pixels_clamped
)(
const
int16_t
*
block
,
uint8_t
*
pixels
,
int
line_size
);
void
ff_put_pixels8_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_x2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_y2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_xy2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_no_rnd_pixels8_x2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_no_rnd_pixels8_y2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_no_rnd_pixels8_xy2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels16_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
CALL_2X_PIXELS
(
ff_put_pixels16_x2_arm
,
ff_put_pixels8_x2_arm
,
8
)
CALL_2X_PIXELS
(
ff_put_pixels16_y2_arm
,
ff_put_pixels8_y2_arm
,
8
)
CALL_2X_PIXELS
(
ff_put_pixels16_xy2_arm
,
ff_put_pixels8_xy2_arm
,
8
)
CALL_2X_PIXELS
(
ff_put_no_rnd_pixels16_x2_arm
,
ff_put_no_rnd_pixels8_x2_arm
,
8
)
CALL_2X_PIXELS
(
ff_put_no_rnd_pixels16_y2_arm
,
ff_put_no_rnd_pixels8_y2_arm
,
8
)
CALL_2X_PIXELS
(
ff_put_no_rnd_pixels16_xy2_arm
,
ff_put_no_rnd_pixels8_xy2_arm
,
8
)
void
ff_add_pixels_clamped_arm
(
const
int16_t
*
block
,
uint8_t
*
dest
,
int
line_size
);
...
...
@@ -76,7 +58,6 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block)
av_cold
void
ff_dsputil_init_arm
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
{
const
int
high_bit_depth
=
avctx
->
bits_per_raw_sample
>
8
;
int
cpu_flags
=
av_get_cpu_flags
();
ff_put_pixels_clamped
=
c
->
put_pixels_clamped
;
...
...
@@ -99,26 +80,6 @@ av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx)
c
->
add_pixels_clamped
=
ff_add_pixels_clamped_arm
;
if
(
!
high_bit_depth
)
{
c
->
put_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_arm
;
c
->
put_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_arm
;
c
->
put_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_arm
;
c
->
put_pixels_tab
[
0
][
3
]
=
ff_put_pixels16_xy2_arm
;
c
->
put_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_arm
;
c
->
put_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_arm
;
c
->
put_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_arm
;
c
->
put_pixels_tab
[
1
][
3
]
=
ff_put_pixels8_xy2_arm
;
c
->
put_no_rnd_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_arm
;
c
->
put_no_rnd_pixels_tab
[
0
][
1
]
=
ff_put_no_rnd_pixels16_x2_arm
;
c
->
put_no_rnd_pixels_tab
[
0
][
2
]
=
ff_put_no_rnd_pixels16_y2_arm
;
c
->
put_no_rnd_pixels_tab
[
0
][
3
]
=
ff_put_no_rnd_pixels16_xy2_arm
;
c
->
put_no_rnd_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_arm
;
c
->
put_no_rnd_pixels_tab
[
1
][
1
]
=
ff_put_no_rnd_pixels8_x2_arm
;
c
->
put_no_rnd_pixels_tab
[
1
][
2
]
=
ff_put_no_rnd_pixels8_y2_arm
;
c
->
put_no_rnd_pixels_tab
[
1
][
3
]
=
ff_put_no_rnd_pixels8_xy2_arm
;
}
if
(
have_armv5te
(
cpu_flags
))
ff_dsputil_init_armv5te
(
c
,
avctx
);
if
(
have_armv6
(
cpu_flags
))
ff_dsputil_init_armv6
(
c
,
avctx
);
if
(
have_neon
(
cpu_flags
))
ff_dsputil_init_neon
(
c
,
avctx
);
...
...
libavcodec/arm/dsputil_init_armv6.c
View file @
de99545f
...
...
@@ -27,24 +27,6 @@ void ff_simple_idct_armv6(int16_t *data);
void
ff_simple_idct_put_armv6
(
uint8_t
*
dest
,
int
line_size
,
int16_t
*
data
);
void
ff_simple_idct_add_armv6
(
uint8_t
*
dest
,
int
line_size
,
int16_t
*
data
);
void
ff_put_pixels16_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_x2_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_y2_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_x2_no_rnd_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_y2_no_rnd_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_x2_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_y2_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_x2_no_rnd_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_y2_no_rnd_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels8_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_add_pixels_clamped_armv6
(
const
int16_t
*
block
,
uint8_t
*
restrict
pixels
,
int
line_size
);
...
...
@@ -82,29 +64,6 @@ av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx)
c
->
idct_permutation_type
=
FF_LIBMPEG2_IDCT_PERM
;
}
if
(
!
high_bit_depth
)
{
c
->
put_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_armv6
;
c
->
put_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_armv6
;
c
->
put_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_armv6
;
/* c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_armv6; */
c
->
put_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_armv6
;
c
->
put_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_armv6
;
c
->
put_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_armv6
;
/* c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_armv6; */
c
->
put_no_rnd_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_armv6
;
c
->
put_no_rnd_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_no_rnd_armv6
;
c
->
put_no_rnd_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_no_rnd_armv6
;
/* c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_armv6; */
c
->
put_no_rnd_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_armv6
;
c
->
put_no_rnd_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_no_rnd_armv6
;
c
->
put_no_rnd_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_no_rnd_armv6
;
/* c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_armv6; */
c
->
avg_pixels_tab
[
0
][
0
]
=
ff_avg_pixels16_armv6
;
c
->
avg_pixels_tab
[
1
][
0
]
=
ff_avg_pixels8_armv6
;
}
if
(
!
high_bit_depth
)
c
->
get_pixels
=
ff_get_pixels_armv6
;
c
->
add_pixels_clamped
=
ff_add_pixels_clamped_armv6
;
...
...
libavcodec/arm/dsputil_init_neon.c
View file @
de99545f
...
...
@@ -32,33 +32,6 @@ void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);
void
ff_clear_block_neon
(
int16_t
*
block
);
void
ff_clear_blocks_neon
(
int16_t
*
blocks
);
void
ff_put_pixels16_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_x2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_y2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_xy2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_x2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_y2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_xy2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_x2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_y2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_xy2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_x2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_y2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_xy2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_x2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_y2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_xy2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels8_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels8_x2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels8_y2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels8_xy2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_x2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_y2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_xy2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_add_pixels_clamped_neon
(
const
int16_t
*
,
uint8_t
*
,
int
);
void
ff_put_pixels_clamped_neon
(
const
int16_t
*
,
uint8_t
*
,
int
);
void
ff_put_signed_pixels_clamped_neon
(
const
int16_t
*
,
uint8_t
*
,
int
);
...
...
@@ -92,38 +65,6 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
if
(
!
high_bit_depth
)
{
c
->
clear_block
=
ff_clear_block_neon
;
c
->
clear_blocks
=
ff_clear_blocks_neon
;
c
->
put_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_neon
;
c
->
put_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_neon
;
c
->
put_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_neon
;
c
->
put_pixels_tab
[
0
][
3
]
=
ff_put_pixels16_xy2_neon
;
c
->
put_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_neon
;
c
->
put_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_neon
;
c
->
put_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_neon
;
c
->
put_pixels_tab
[
1
][
3
]
=
ff_put_pixels8_xy2_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
3
]
=
ff_put_pixels16_xy2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
3
]
=
ff_put_pixels8_xy2_no_rnd_neon
;
c
->
avg_pixels_tab
[
0
][
0
]
=
ff_avg_pixels16_neon
;
c
->
avg_pixels_tab
[
0
][
1
]
=
ff_avg_pixels16_x2_neon
;
c
->
avg_pixels_tab
[
0
][
2
]
=
ff_avg_pixels16_y2_neon
;
c
->
avg_pixels_tab
[
0
][
3
]
=
ff_avg_pixels16_xy2_neon
;
c
->
avg_pixels_tab
[
1
][
0
]
=
ff_avg_pixels8_neon
;
c
->
avg_pixels_tab
[
1
][
1
]
=
ff_avg_pixels8_x2_neon
;
c
->
avg_pixels_tab
[
1
][
2
]
=
ff_avg_pixels8_y2_neon
;
c
->
avg_pixels_tab
[
1
][
3
]
=
ff_avg_pixels8_xy2_neon
;
c
->
avg_no_rnd_pixels_tab
[
0
]
=
ff_avg_pixels16_neon
;
c
->
avg_no_rnd_pixels_tab
[
1
]
=
ff_avg_pixels16_x2_no_rnd_neon
;
c
->
avg_no_rnd_pixels_tab
[
2
]
=
ff_avg_pixels16_y2_no_rnd_neon
;
c
->
avg_no_rnd_pixels_tab
[
3
]
=
ff_avg_pixels16_xy2_no_rnd_neon
;
}
c
->
add_pixels_clamped
=
ff_add_pixels_clamped_neon
;
...
...
libavcodec/arm/dsputil_neon.S
View file @
de99545f
...
...
@@ -37,394 +37,6 @@ function ff_clear_blocks_neon, export=1
bx lr
endfunc
.macro pixels16 rnd=1, avg=0
.if \avg
mov r12, r0
.endif
1: vld1.8 {q0}, [r1], r2
vld1.8 {q1}, [r1], r2
vld1.8 {q2}, [r1], r2
pld [r1, r2, lsl #2]
vld1.8 {q3}, [r1], r2
pld [r1]
pld [r1, r2]
pld [r1, r2, lsl #1]
.if \avg
vld1.8 {q8}, [r12,:128], r2
vrhadd.u8 q0, q0, q8
vld1.8 {q9}, [r12,:128], r2
vrhadd.u8 q1, q1, q9
vld1.8 {q10}, [r12,:128], r2
vrhadd.u8 q2, q2, q10
vld1.8 {q11}, [r12,:128], r2
vrhadd.u8 q3, q3, q11
.endif
subs r3, r3, #4
vst1.64 {q0}, [r0,:128], r2
vst1.64 {q1}, [r0,:128], r2
vst1.64 {q2}, [r0,:128], r2
vst1.64 {q3}, [r0,:128], r2
bne 1b
bx lr
.endm
.macro pixels16_x2 rnd=1, avg=0
1: vld1.8 {d0-d2}, [r1], r2
vld1.8 {d4-d6}, [r1], r2
pld [r1]
pld [r1, r2]
subs r3, r3, #2
vext.8 q1, q0, q1, #1
avg q0, q0, q1
vext.8 q3, q2, q3, #1
avg q2, q2, q3
.if \avg
vld1.8 {q1}, [r0,:128], r2
vld1.8 {q3}, [r0,:128]
vrhadd.u8 q0, q0, q1
vrhadd.u8 q2, q2, q3
sub r0, r0, r2
.endif
vst1.8 {q0}, [r0,:128], r2
vst1.8 {q2}, [r0,:128], r2
bne 1b
bx lr
.endm
.macro pixels16_y2 rnd=1, avg=0
sub r3, r3, #2
vld1.8 {q0}, [r1], r2
vld1.8 {q1}, [r1], r2
1: subs r3, r3, #2
avg q2, q0, q1
vld1.8 {q0}, [r1], r2
avg q3, q0, q1
vld1.8 {q1}, [r1], r2
pld [r1]
pld [r1, r2]
.if \avg
vld1.8 {q8}, [r0,:128], r2
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q2, q2, q8
vrhadd.u8 q3, q3, q9
sub r0, r0, r2
.endif
vst1.8 {q2}, [r0,:128], r2
vst1.8 {q3}, [r0,:128], r2
bne 1b
avg q2, q0, q1
vld1.8 {q0}, [r1], r2
avg q3, q0, q1
.if \avg
vld1.8 {q8}, [r0,:128], r2
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q2, q2, q8
vrhadd.u8 q3, q3, q9
sub r0, r0, r2
.endif
vst1.8 {q2}, [r0,:128], r2
vst1.8 {q3}, [r0,:128], r2
bx lr
.endm
.macro pixels16_xy2 rnd=1, avg=0
sub r3, r3, #2
vld1.8 {d0-d2}, [r1], r2
vld1.8 {d4-d6}, [r1], r2
NRND vmov.i16 q13, #1
pld [r1]
pld [r1, r2]
vext.8 q1, q0, q1, #1
vext.8 q3, q2, q3, #1
vaddl.u8 q8, d0, d2
vaddl.u8 q10, d1, d3
vaddl.u8 q9, d4, d6
vaddl.u8 q11, d5, d7
1: subs r3, r3, #2
vld1.8 {d0-d2}, [r1], r2
vadd.u16 q12, q8, q9
pld [r1]
NRND vadd.u16 q12, q12, q13
vext.8 q15, q0, q1, #1
vadd.u16 q1 , q10, q11
shrn d28, q12, #2
NRND vadd.u16 q1, q1, q13
shrn d29, q1, #2
.if \avg
vld1.8 {q8}, [r0,:128]
vrhadd.u8 q14, q14, q8
.endif
vaddl.u8 q8, d0, d30
vld1.8 {d2-d4}, [r1], r2
vaddl.u8 q10, d1, d31
vst1.8 {q14}, [r0,:128], r2
vadd.u16 q12, q8, q9
pld [r1, r2]
NRND vadd.u16 q12, q12, q13
vext.8 q2, q1, q2, #1
vadd.u16 q0, q10, q11
shrn d30, q12, #2
NRND vadd.u16 q0, q0, q13
shrn d31, q0, #2
.if \avg
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q15, q15, q9
.endif
vaddl.u8 q9, d2, d4
vaddl.u8 q11, d3, d5
vst1.8 {q15}, [r0,:128], r2
bgt 1b
vld1.8 {d0-d2}, [r1], r2
vadd.u16 q12, q8, q9
NRND vadd.u16 q12, q12, q13
vext.8 q15, q0, q1, #1
vadd.u16 q1 , q10, q11
shrn d28, q12, #2
NRND vadd.u16 q1, q1, q13
shrn d29, q1, #2
.if \avg
vld1.8 {q8}, [r0,:128]
vrhadd.u8 q14, q14, q8
.endif
vaddl.u8 q8, d0, d30
vaddl.u8 q10, d1, d31
vst1.8 {q14}, [r0,:128], r2
vadd.u16 q12, q8, q9
NRND vadd.u16 q12, q12, q13
vadd.u16 q0, q10, q11
shrn d30, q12, #2
NRND vadd.u16 q0, q0, q13
shrn d31, q0, #2
.if \avg
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q15, q15, q9
.endif
vst1.8 {q15}, [r0,:128], r2
bx lr
.endm
.macro pixels8 rnd=1, avg=0
1: vld1.8 {d0}, [r1], r2
vld1.8 {d1}, [r1], r2
vld1.8 {d2}, [r1], r2
pld [r1, r2, lsl #2]
vld1.8 {d3}, [r1], r2
pld [r1]
pld [r1, r2]
pld [r1, r2, lsl #1]
.if \avg
vld1.8 {d4}, [r0,:64], r2
vrhadd.u8 d0, d0, d4
vld1.8 {d5}, [r0,:64], r2
vrhadd.u8 d1, d1, d5
vld1.8 {d6}, [r0,:64], r2
vrhadd.u8 d2, d2, d6
vld1.8 {d7}, [r0,:64], r2
vrhadd.u8 d3, d3, d7
sub r0, r0, r2, lsl #2
.endif
subs r3, r3, #4
vst1.8 {d0}, [r0,:64], r2
vst1.8 {d1}, [r0,:64], r2
vst1.8 {d2}, [r0,:64], r2
vst1.8 {d3}, [r0,:64], r2
bne 1b
bx lr
.endm
.macro pixels8_x2 rnd=1, avg=0
1: vld1.8 {q0}, [r1], r2
vext.8 d1, d0, d1, #1
vld1.8 {q1}, [r1], r2
vext.8 d3, d2, d3, #1
pld [r1]
pld [r1, r2]
subs r3, r3, #2
vswp d1, d2
avg q0, q0, q1
.if \avg
vld1.8 {d4}, [r0,:64], r2
vld1.8 {d5}, [r0,:64]
vrhadd.u8 q0, q0, q2
sub r0, r0, r2
.endif
vst1.8 {d0}, [r0,:64], r2
vst1.8 {d1}, [r0,:64], r2
bne 1b
bx lr
.endm
.macro pixels8_y2 rnd=1, avg=0
sub r3, r3, #2
vld1.8 {d0}, [r1], r2
vld1.8 {d1}, [r1], r2
1: subs r3, r3, #2
avg d4, d0, d1
vld1.8 {d0}, [r1], r2
avg d5, d0, d1
vld1.8 {d1}, [r1], r2
pld [r1]
pld [r1, r2]
.if \avg
vld1.8 {d2}, [r0,:64], r2
vld1.8 {d3}, [r0,:64]
vrhadd.u8 q2, q2, q1
sub r0, r0, r2
.endif
vst1.8 {d4}, [r0,:64], r2
vst1.8 {d5}, [r0,:64], r2
bne 1b
avg d4, d0, d1
vld1.8 {d0}, [r1], r2
avg d5, d0, d1
.if \avg
vld1.8 {d2}, [r0,:64], r2
vld1.8 {d3}, [r0,:64]
vrhadd.u8 q2, q2, q1
sub r0, r0, r2
.endif
vst1.8 {d4}, [r0,:64], r2
vst1.8 {d5}, [r0,:64], r2
bx lr
.endm
.macro pixels8_xy2 rnd=1, avg=0
sub r3, r3, #2
vld1.8 {q0}, [r1], r2
vld1.8 {q1}, [r1], r2
NRND vmov.i16 q11, #1
pld [r1]
pld [r1, r2]
vext.8 d4, d0, d1, #1
vext.8 d6, d2, d3, #1
vaddl.u8 q8, d0, d4
vaddl.u8 q9, d2, d6
1: subs r3, r3, #2
vld1.8 {q0}, [r1], r2
pld [r1]
vadd.u16 q10, q8, q9
vext.8 d4, d0, d1, #1
NRND vadd.u16 q10, q10, q11
vaddl.u8 q8, d0, d4
shrn d5, q10, #2
vld1.8 {q1}, [r1], r2
vadd.u16 q10, q8, q9
pld [r1, r2]
.if \avg
vld1.8 {d7}, [r0,:64]
vrhadd.u8 d5, d5, d7
.endif
NRND vadd.u16 q10, q10, q11
vst1.8 {d5}, [r0,:64], r2
shrn d7, q10, #2
.if \avg
vld1.8 {d5}, [r0,:64]
vrhadd.u8 d7, d7, d5
.endif
vext.8 d6, d2, d3, #1
vaddl.u8 q9, d2, d6
vst1.8 {d7}, [r0,:64], r2
bgt 1b
vld1.8 {q0}, [r1], r2
vadd.u16 q10, q8, q9
vext.8 d4, d0, d1, #1
NRND vadd.u16 q10, q10, q11
vaddl.u8 q8, d0, d4
shrn d5, q10, #2
vadd.u16 q10, q8, q9
.if \avg
vld1.8 {d7}, [r0,:64]
vrhadd.u8 d5, d5, d7
.endif
NRND vadd.u16 q10, q10, q11
vst1.8 {d5}, [r0,:64], r2
shrn d7, q10, #2
.if \avg
vld1.8 {d5}, [r0,:64]
vrhadd.u8 d7, d7, d5
.endif
vst1.8 {d7}, [r0,:64], r2
bx lr
.endm
.macro pixfunc pfx, name, suf, rnd=1, avg=0
.if \rnd
.macro avg rd, rn, rm
vrhadd.u8 \rd, \rn, \rm
.endm
.macro shrn rd, rn, rm
vrshrn.u16 \rd, \rn, \rm
.endm
.macro NRND insn:vararg
.endm
.else
.macro avg rd, rn, rm
vhadd.u8 \rd, \rn, \rm
.endm
.macro shrn rd, rn, rm
vshrn.u16 \rd, \rn, \rm
.endm
.macro NRND insn:vararg
\insn
.endm
.endif
function ff_\pfx\name\suf\()_neon, export=1
\name \rnd, \avg
endfunc
.purgem avg
.purgem shrn
.purgem NRND
.endm
.macro pixfunc2 pfx, name, avg=0
pixfunc \pfx, \name, rnd=1, avg=\avg
pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg
.endm
function ff_put_h264_qpel16_mc00_neon, export=1
mov r3, #16
endfunc
pixfunc put_, pixels16, avg=0
pixfunc2 put_, pixels16_x2, avg=0
pixfunc2 put_, pixels16_y2, avg=0
pixfunc2 put_, pixels16_xy2, avg=0
function ff_avg_h264_qpel16_mc00_neon, export=1
mov r3, #16
endfunc
pixfunc avg_, pixels16, avg=1
pixfunc2 avg_, pixels16_x2, avg=1
pixfunc2 avg_, pixels16_y2, avg=1
pixfunc2 avg_, pixels16_xy2, avg=1
function ff_put_h264_qpel8_mc00_neon, export=1
mov r3, #8
endfunc
pixfunc put_, pixels8, avg=0
pixfunc2 put_, pixels8_x2, avg=0
pixfunc2 put_, pixels8_y2, avg=0
pixfunc2 put_, pixels8_xy2, avg=0
function ff_avg_h264_qpel8_mc00_neon, export=1
mov r3, #8
endfunc
pixfunc avg_, pixels8, avg=1
pixfunc avg_, pixels8_x2, avg=1
pixfunc avg_, pixels8_y2, avg=1
pixfunc avg_, pixels8_xy2, avg=1
function ff_put_pixels_clamped_neon, export=1
vld1.16 {d16-d19}, [r0,:128]!
vqmovun.s16 d0, q8
...
...
libavcodec/arm/hpeldsp_arm.S
0 → 100644
View file @
de99545f
@
@ ARMv4 optimized DSP utils
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
@
@ This file is part of FFmpeg.
@
@ FFmpeg is free software; you can redistribute it and/or
@ modify it under the terms of the GNU Lesser General Public
@ License as published by the Free Software Foundation; either
@ version 2.1 of the License, or (at your option) any later version.
@
@ FFmpeg is distributed in the hope that it will be useful,
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
@ Lesser General Public License for more details.
@
@ You should have received a copy of the GNU Lesser General Public
@ License along with FFmpeg; if not, write to the Free Software
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@
#include "config.h"
#include "libavutil/arm/asm.S"
#if !HAVE_ARMV5TE_EXTERNAL
#define pld @
#endif
.macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
mov \Rd0, \Rn0, lsr #(\shift * 8)
mov \Rd1, \Rn1, lsr #(\shift * 8)
mov \Rd2, \Rn2, lsr #(\shift * 8)
mov \Rd3, \Rn3, lsr #(\shift * 8)
orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
.endm
.macro ALIGN_DWORD shift, R0, R1, R2
mov \R0, \R0, lsr #(\shift * 8)
orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
mov \R1, \R1, lsr #(\shift * 8)
orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
.endm
.macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
.endm
.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
@ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
@ Rmask = 0xFEFEFEFE
@ Rn = destroy
eor \Rd0, \Rn0, \Rm0
eor \Rd1, \Rn1, \Rm1
orr \Rn0, \Rn0, \Rm0
orr \Rn1, \Rn1, \Rm1
and \Rd0, \Rd0, \Rmask
and \Rd1, \Rd1, \Rmask
sub \Rd0, \Rn0, \Rd0, lsr #1
sub \Rd1, \Rn1, \Rd1, lsr #1
.endm
.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
@ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
@ Rmask = 0xFEFEFEFE
@ Rn = destroy
eor \Rd0, \Rn0, \Rm0
eor \Rd1, \Rn1, \Rm1
and \Rn0, \Rn0, \Rm0
and \Rn1, \Rn1, \Rm1
and \Rd0, \Rd0, \Rmask
and \Rd1, \Rd1, \Rmask
add \Rd0, \Rn0, \Rd0, lsr #1
add \Rd1, \Rn1, \Rd1, lsr #1
.endm
.macro JMP_ALIGN tmp, reg
ands \tmp, \reg, #3
bic \reg, \reg, #3
beq 1f
subs \tmp, \tmp, #1
beq 2f
subs \tmp, \tmp, #1
beq 3f
b 4f
.endm
@ ----------------------------------------------------------------
.align 5
function ff_put_pixels16_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11, lr}
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r7}
add r1, r1, r2
stm r0, {r4-r7}
pld [r1]
subs r3, r3, #1
add r0, r0, r2
bne 1b
pop {r4-r11, pc}
.align 5
2:
ldm r1, {r4-r8}
add r1, r1, r2
ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
pld [r1]
subs r3, r3, #1
stm r0, {r9-r12}
add r0, r0, r2
bne 2b
pop {r4-r11, pc}
.align 5
3:
ldm r1, {r4-r8}
add r1, r1, r2
ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
pld [r1]
subs r3, r3, #1
stm r0, {r9-r12}
add r0, r0, r2
bne 3b
pop {r4-r11, pc}
.align 5
4:
ldm r1, {r4-r8}
add r1, r1, r2
ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
pld [r1]
subs r3, r3, #1
stm r0, {r9-r12}
add r0, r0, r2
bne 4b
pop {r4-r11,pc}
endfunc
@ ----------------------------------------------------------------
.align 5
function ff_put_pixels8_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r5,lr}
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r5}
add r1, r1, r2
subs r3, r3, #1
pld [r1]
stm r0, {r4-r5}
add r0, r0, r2
bne 1b
pop {r4-r5,pc}
.align 5
2:
ldm r1, {r4-r5, r12}
add r1, r1, r2
ALIGN_DWORD 1, r4, r5, r12
pld [r1]
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 2b
pop {r4-r5,pc}
.align 5
3:
ldm r1, {r4-r5, r12}
add r1, r1, r2
ALIGN_DWORD 2, r4, r5, r12
pld [r1]
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 3b
pop {r4-r5,pc}
.align 5
4:
ldm r1, {r4-r5, r12}
add r1, r1, r2
ALIGN_DWORD 3, r4, r5, r12
pld [r1]
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 4b
pop {r4-r5,pc}
endfunc
@ ----------------------------------------------------------------
.align 5
function ff_put_pixels8_x2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r10,lr}
ldr r12, =0xfefefefe
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
pld [r1]
RND_AVG32 r8, r9, r4, r5, r6, r7, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 1b
pop {r4-r10,pc}
.align 5
2:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
pld [r1]
RND_AVG32 r4, r5, r6, r7, r8, r9, r12
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 2b
pop {r4-r10,pc}
.align 5
3:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
pld [r1]
RND_AVG32 r4, r5, r6, r7, r8, r9, r12
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 3b
pop {r4-r10,pc}
.align 5
4:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
pld [r1]
RND_AVG32 r8, r9, r6, r7, r5, r10, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 4b
pop {r4-r10,pc}
endfunc
.align 5
function ff_put_no_rnd_pixels8_x2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r10,lr}
ldr r12, =0xfefefefe
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
pld [r1]
NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 1b
pop {r4-r10,pc}
.align 5
2:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
pld [r1]
NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 2b
pop {r4-r10,pc}
.align 5
3:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
pld [r1]
NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
subs r3, r3, #1
stm r0, {r4-r5}
add r0, r0, r2
bne 3b
pop {r4-r10,pc}
.align 5
4:
ldm r1, {r4-r5, r10}
add r1, r1, r2
ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
pld [r1]
NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 4b
pop {r4-r10,pc}
endfunc
@ ----------------------------------------------------------------
.align 5
function ff_put_pixels8_y2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11,lr}
mov r3, r3, lsr #1
ldr r12, =0xfefefefe
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r5}
add r1, r1, r2
6: ldm r1, {r6-r7}
add r1, r1, r2
pld [r1]
RND_AVG32 r8, r9, r4, r5, r6, r7, r12
ldm r1, {r4-r5}
add r1, r1, r2
stm r0, {r8-r9}
add r0, r0, r2
pld [r1]
RND_AVG32 r8, r9, r6, r7, r4, r5, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
2:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r7, r8, r9
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r4, r5, r6
subs r3, r3, #1
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
3:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r7, r8, r9
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r4, r5, r6
subs r3, r3, #1
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
4:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r7, r8, r9
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r4, r5, r6
subs r3, r3, #1
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
endfunc
.align 5
function ff_put_no_rnd_pixels8_y2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11,lr}
mov r3, r3, lsr #1
ldr r12, =0xfefefefe
JMP_ALIGN r5, r1
1:
ldm r1, {r4-r5}
add r1, r1, r2
6: ldm r1, {r6-r7}
add r1, r1, r2
pld [r1]
NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
ldm r1, {r4-r5}
add r1, r1, r2
stm r0, {r8-r9}
add r0, r0, r2
pld [r1]
NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
subs r3, r3, #1
stm r0, {r8-r9}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
2:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r7, r8, r9
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 1, r4, r5, r6
subs r3, r3, #1
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
3:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r7, r8, r9
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 2, r4, r5, r6
subs r3, r3, #1
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
.align 5
4:
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r4, r5, r6
6: ldm r1, {r7-r9}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r7, r8, r9
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
stm r0, {r10-r11}
add r0, r0, r2
ldm r1, {r4-r6}
add r1, r1, r2
pld [r1]
ALIGN_DWORD 3, r4, r5, r6
subs r3, r3, #1
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
stm r0, {r10-r11}
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
endfunc
.ltorg
@ ----------------------------------------------------------------
.macro RND_XY2_IT align, rnd
@ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
@ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
.if \align == 0
ldm r1, {r6-r8}
.elseif \align == 3
ldm r1, {r5-r7}
.else
ldm r1, {r8-r10}
.endif
add r1, r1, r2
pld [r1]
.if \align == 0
ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
.elseif \align == 1
ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
.elseif \align == 2
ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
.elseif \align == 3
ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
.endif
ldr r14, =0x03030303
tst r3, #1
and r8, r4, r14
and r9, r5, r14
and r10, r6, r14
and r11, r7, r14
it eq
andeq r14, r14, r14, \rnd #1
add r8, r8, r10
add r9, r9, r11
ldr r12, =0xfcfcfcfc >> 2
itt eq
addeq r8, r8, r14
addeq r9, r9, r14
and r4, r12, r4, lsr #2
and r5, r12, r5, lsr #2
and r6, r12, r6, lsr #2
and r7, r12, r7, lsr #2
add r10, r4, r6
add r11, r5, r7
subs r3, r3, #1
.endm
.macro RND_XY2_EXPAND align, rnd
RND_XY2_IT \align, \rnd
6: push {r8-r11}
RND_XY2_IT \align, \rnd
pop {r4-r7}
add r4, r4, r8
add r5, r5, r9
ldr r14, =0x0f0f0f0f
add r6, r6, r10
add r7, r7, r11
and r4, r14, r4, lsr #2
and r5, r14, r5, lsr #2
add r4, r4, r6
add r5, r5, r7
stm r0, {r4-r5}
add r0, r0, r2
bge 6b
pop {r4-r11,pc}
.endm
.align 5
function ff_put_pixels8_xy2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11,lr} @ R14 is also called LR
JMP_ALIGN r5, r1
1: RND_XY2_EXPAND 0, lsl
.align 5
2: RND_XY2_EXPAND 1, lsl
.align 5
3: RND_XY2_EXPAND 2, lsl
.align 5
4: RND_XY2_EXPAND 3, lsl
endfunc
.align 5
function ff_put_no_rnd_pixels8_xy2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11,lr}
JMP_ALIGN r5, r1
1: RND_XY2_EXPAND 0, lsr
.align 5
2: RND_XY2_EXPAND 1, lsr
.align 5
3: RND_XY2_EXPAND 2, lsr
.align 5
4: RND_XY2_EXPAND 3, lsr
endfunc
libavcodec/arm/hpeldsp_arm.h
0 → 100644
View file @
de99545f
/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_ARM_HPELDSP_H
#define AVCODEC_ARM_HPELDSP_H
#include "libavcodec/hpeldsp.h"
void
ff_hpeldsp_init_armv6
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init_neon
(
HpelDSPContext
*
c
,
int
flags
);
#endif
/* AVCODEC_ARM_HPELDSP_H */
libavcodec/arm/hpeldsp_armv6.S
0 → 100644
View file @
de99545f
/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/arm/asm.S"
.macro call_2x_pixels type, subp
function ff_\type\()_pixels16\subp\()_armv6, export=1
push {r0-r3, lr}
bl ff_\type\()_pixels8\subp\()_armv6
pop {r0-r3, lr}
add r0, r0, #8
add r1, r1, #8
b ff_\type\()_pixels8\subp\()_armv6
endfunc
.endm
call_2x_pixels avg
call_2x_pixels put, _x2
call_2x_pixels put, _y2
call_2x_pixels put, _x2_no_rnd
call_2x_pixels put, _y2_no_rnd
function ff_put_pixels16_armv6, export=1
push {r4-r11}
1:
ldr r5, [r1, #4]
ldr r6, [r1, #8]
ldr r7, [r1, #12]
ldr_post r4, r1, r2
strd r6, r7, [r0, #8]
ldr r9, [r1, #4]
strd_post r4, r5, r0, r2
ldr r10, [r1, #8]
ldr r11, [r1, #12]
ldr_post r8, r1, r2
strd r10, r11, [r0, #8]
subs r3, r3, #2
strd_post r8, r9, r0, r2
bne 1b
pop {r4-r11}
bx lr
endfunc
function ff_put_pixels8_armv6, export=1
push {r4-r7}
1:
ldr r5, [r1, #4]
ldr_post r4, r1, r2
ldr r7, [r1, #4]
strd_post r4, r5, r0, r2
ldr_post r6, r1, r2
subs r3, r3, #2
strd_post r6, r7, r0, r2
bne 1b
pop {r4-r7}
bx lr
endfunc
function ff_put_pixels8_x2_armv6, export=1
push {r4-r11, lr}
mov r12, #1
orr r12, r12, r12, lsl #8
orr r12, r12, r12, lsl #16
1:
ldr r4, [r1]
subs r3, r3, #2
ldr r5, [r1, #4]
ldr r7, [r1, #5]
lsr r6, r4, #8
ldr_pre r8, r1, r2
orr r6, r6, r5, lsl #24
ldr r9, [r1, #4]
ldr r11, [r1, #5]
lsr r10, r8, #8
add r1, r1, r2
orr r10, r10, r9, lsl #24
eor r14, r4, r6
uhadd8 r4, r4, r6
eor r6, r5, r7
uhadd8 r5, r5, r7
and r14, r14, r12
and r6, r6, r12
uadd8 r4, r4, r14
eor r14, r8, r10
uadd8 r5, r5, r6
eor r6, r9, r11
uhadd8 r8, r8, r10
and r14, r14, r12
uhadd8 r9, r9, r11
and r6, r6, r12
uadd8 r8, r8, r14
strd_post r4, r5, r0, r2
uadd8 r9, r9, r6
strd_post r8, r9, r0, r2
bne 1b
pop {r4-r11, pc}
endfunc
function ff_put_pixels8_y2_armv6, export=1
push {r4-r11}
mov r12, #1
orr r12, r12, r12, lsl #8
orr r12, r12, r12, lsl #16
ldr r4, [r1]
ldr r5, [r1, #4]
ldr_pre r6, r1, r2
ldr r7, [r1, #4]
1:
subs r3, r3, #2
uhadd8 r8, r4, r6
eor r10, r4, r6
uhadd8 r9, r5, r7
eor r11, r5, r7
and r10, r10, r12
ldr_pre r4, r1, r2
uadd8 r8, r8, r10
and r11, r11, r12
uadd8 r9, r9, r11
ldr r5, [r1, #4]
uhadd8 r10, r4, r6
eor r6, r4, r6
uhadd8 r11, r5, r7
and r6, r6, r12
eor r7, r5, r7
uadd8 r10, r10, r6
and r7, r7, r12
ldr_pre r6, r1, r2
uadd8 r11, r11, r7
strd_post r8, r9, r0, r2
ldr r7, [r1, #4]
strd_post r10, r11, r0, r2
bne 1b
pop {r4-r11}
bx lr
endfunc
function ff_put_pixels8_x2_no_rnd_armv6, export=1
push {r4-r9, lr}
1:
subs r3, r3, #2
ldr r4, [r1]
ldr r5, [r1, #4]
ldr r7, [r1, #5]
ldr_pre r8, r1, r2
ldr r9, [r1, #4]
ldr r14, [r1, #5]
add r1, r1, r2
lsr r6, r4, #8
orr r6, r6, r5, lsl #24
lsr r12, r8, #8
orr r12, r12, r9, lsl #24
uhadd8 r4, r4, r6
uhadd8 r5, r5, r7
uhadd8 r8, r8, r12
uhadd8 r9, r9, r14
stm r0, {r4,r5}
add r0, r0, r2
stm r0, {r8,r9}
add r0, r0, r2
bne 1b
pop {r4-r9, pc}
endfunc
function ff_put_pixels8_y2_no_rnd_armv6, export=1
push {r4-r9, lr}
ldr r4, [r1]
ldr r5, [r1, #4]
ldr_pre r6, r1, r2
ldr r7, [r1, #4]
1:
subs r3, r3, #2
uhadd8 r8, r4, r6
ldr_pre r4, r1, r2
uhadd8 r9, r5, r7
ldr r5, [r1, #4]
uhadd8 r12, r4, r6
ldr_pre r6, r1, r2
uhadd8 r14, r5, r7
ldr r7, [r1, #4]
stm r0, {r8,r9}
add r0, r0, r2
stm r0, {r12,r14}
add r0, r0, r2
bne 1b
pop {r4-r9, pc}
endfunc
function ff_avg_pixels8_armv6, export=1
pld [r1, r2]
push {r4-r10, lr}
mov lr, #1
orr lr, lr, lr, lsl #8
orr lr, lr, lr, lsl #16
ldrd r4, r5, [r0]
ldr r10, [r1, #4]
ldr_post r9, r1, r2
subs r3, r3, #2
1:
pld [r1, r2]
eor r8, r4, r9
uhadd8 r4, r4, r9
eor r12, r5, r10
ldrd_reg r6, r7, r0, r2
uhadd8 r5, r5, r10
and r8, r8, lr
ldr r10, [r1, #4]
and r12, r12, lr
uadd8 r4, r4, r8
ldr_post r9, r1, r2
eor r8, r6, r9
uadd8 r5, r5, r12
pld [r1, r2, lsl #1]
eor r12, r7, r10
uhadd8 r6, r6, r9
strd_post r4, r5, r0, r2
uhadd8 r7, r7, r10
beq 2f
and r8, r8, lr
ldrd_reg r4, r5, r0, r2
uadd8 r6, r6, r8
ldr r10, [r1, #4]
and r12, r12, lr
subs r3, r3, #2
uadd8 r7, r7, r12
ldr_post r9, r1, r2
strd_post r6, r7, r0, r2
b 1b
2:
and r8, r8, lr
and r12, r12, lr
uadd8 r6, r6, r8
uadd8 r7, r7, r12
strd_post r6, r7, r0, r2
pop {r4-r10, pc}
endfunc
libavcodec/arm/hpeldsp_init_arm.c
0 → 100644
View file @
de99545f
/*
* ARM optimized DSP utils
* Copyright (c) 2001 Lionel Ulmer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/arm/cpu.h"
#include "libavcodec/bit_depth_template.c" // for CALL_2X_PIXELS
#include "hpeldsp_arm.h"
void
ff_put_pixels8_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_x2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_y2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_xy2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_no_rnd_pixels8_x2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_no_rnd_pixels8_y2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_no_rnd_pixels8_xy2_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels16_arm
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
CALL_2X_PIXELS
(
ff_put_pixels16_x2_arm
,
ff_put_pixels8_x2_arm
,
8
)
CALL_2X_PIXELS
(
ff_put_pixels16_y2_arm
,
ff_put_pixels8_y2_arm
,
8
)
CALL_2X_PIXELS
(
ff_put_pixels16_xy2_arm
,
ff_put_pixels8_xy2_arm
,
8
)
CALL_2X_PIXELS
(
ff_put_no_rnd_pixels16_x2_arm
,
ff_put_no_rnd_pixels8_x2_arm
,
8
)
CALL_2X_PIXELS
(
ff_put_no_rnd_pixels16_y2_arm
,
ff_put_no_rnd_pixels8_y2_arm
,
8
)
CALL_2X_PIXELS
(
ff_put_no_rnd_pixels16_xy2_arm
,
ff_put_no_rnd_pixels8_xy2_arm
,
8
)
void
ff_hpeldsp_init_arm
(
HpelDSPContext
*
c
,
int
flags
)
{
int
cpu_flags
=
av_get_cpu_flags
();
c
->
put_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_arm
;
c
->
put_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_arm
;
c
->
put_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_arm
;
c
->
put_pixels_tab
[
0
][
3
]
=
ff_put_pixels16_xy2_arm
;
c
->
put_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_arm
;
c
->
put_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_arm
;
c
->
put_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_arm
;
c
->
put_pixels_tab
[
1
][
3
]
=
ff_put_pixels8_xy2_arm
;
c
->
put_no_rnd_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_arm
;
c
->
put_no_rnd_pixels_tab
[
0
][
1
]
=
ff_put_no_rnd_pixels16_x2_arm
;
c
->
put_no_rnd_pixels_tab
[
0
][
2
]
=
ff_put_no_rnd_pixels16_y2_arm
;
c
->
put_no_rnd_pixels_tab
[
0
][
3
]
=
ff_put_no_rnd_pixels16_xy2_arm
;
c
->
put_no_rnd_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_arm
;
c
->
put_no_rnd_pixels_tab
[
1
][
1
]
=
ff_put_no_rnd_pixels8_x2_arm
;
c
->
put_no_rnd_pixels_tab
[
1
][
2
]
=
ff_put_no_rnd_pixels8_y2_arm
;
c
->
put_no_rnd_pixels_tab
[
1
][
3
]
=
ff_put_no_rnd_pixels8_xy2_arm
;
if
(
have_armv6
(
cpu_flags
))
ff_hpeldsp_init_armv6
(
c
,
flags
);
if
(
have_neon
(
cpu_flags
))
ff_hpeldsp_init_neon
(
c
,
flags
);
}
libavcodec/arm/hpeldsp_init_armv6.c
0 → 100644
View file @
de99545f
/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "hpeldsp_arm.h"
void
ff_put_pixels16_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_x2_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_y2_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_x2_no_rnd_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_y2_no_rnd_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_x2_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_y2_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_x2_no_rnd_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_y2_no_rnd_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels8_armv6
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
av_cold
void
ff_hpeldsp_init_armv6
(
HpelDSPContext
*
c
,
int
flags
)
{
c
->
put_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_armv6
;
c
->
put_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_armv6
;
c
->
put_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_armv6
;
/* c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_armv6; */
c
->
put_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_armv6
;
c
->
put_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_armv6
;
c
->
put_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_armv6
;
/* c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_armv6; */
c
->
put_no_rnd_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_armv6
;
c
->
put_no_rnd_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_no_rnd_armv6
;
c
->
put_no_rnd_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_no_rnd_armv6
;
/* c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_armv6; */
c
->
put_no_rnd_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_armv6
;
c
->
put_no_rnd_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_no_rnd_armv6
;
c
->
put_no_rnd_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_no_rnd_armv6
;
/* c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_armv6; */
c
->
avg_pixels_tab
[
0
][
0
]
=
ff_avg_pixels16_armv6
;
c
->
avg_pixels_tab
[
1
][
0
]
=
ff_avg_pixels8_armv6
;
}
libavcodec/arm/hpeldsp_init_neon.c
0 → 100644
View file @
de99545f
/*
* ARM NEON optimised DSP functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "hpeldsp_arm.h"
void
ff_put_pixels16_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_x2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_y2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_xy2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_x2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_y2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_xy2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_x2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_y2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels16_xy2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_x2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_y2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_put_pixels8_xy2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_x2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_y2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_xy2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels8_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels8_x2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels8_y2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels8_xy2_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_x2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_y2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_avg_pixels16_xy2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
ptrdiff_t
,
int
);
void
ff_hpeldsp_init_neon
(
HpelDSPContext
*
c
,
int
flags
)
{
c
->
put_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_neon
;
c
->
put_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_neon
;
c
->
put_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_neon
;
c
->
put_pixels_tab
[
0
][
3
]
=
ff_put_pixels16_xy2_neon
;
c
->
put_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_neon
;
c
->
put_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_neon
;
c
->
put_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_neon
;
c
->
put_pixels_tab
[
1
][
3
]
=
ff_put_pixels8_xy2_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
3
]
=
ff_put_pixels16_xy2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
3
]
=
ff_put_pixels8_xy2_no_rnd_neon
;
c
->
avg_pixels_tab
[
0
][
0
]
=
ff_avg_pixels16_neon
;
c
->
avg_pixels_tab
[
0
][
1
]
=
ff_avg_pixels16_x2_neon
;
c
->
avg_pixels_tab
[
0
][
2
]
=
ff_avg_pixels16_y2_neon
;
c
->
avg_pixels_tab
[
0
][
3
]
=
ff_avg_pixels16_xy2_neon
;
c
->
avg_pixels_tab
[
1
][
0
]
=
ff_avg_pixels8_neon
;
c
->
avg_pixels_tab
[
1
][
1
]
=
ff_avg_pixels8_x2_neon
;
c
->
avg_pixels_tab
[
1
][
2
]
=
ff_avg_pixels8_y2_neon
;
c
->
avg_pixels_tab
[
1
][
3
]
=
ff_avg_pixels8_xy2_neon
;
c
->
avg_no_rnd_pixels_tab
[
0
]
=
ff_avg_pixels16_neon
;
c
->
avg_no_rnd_pixels_tab
[
1
]
=
ff_avg_pixels16_x2_no_rnd_neon
;
c
->
avg_no_rnd_pixels_tab
[
2
]
=
ff_avg_pixels16_y2_no_rnd_neon
;
c
->
avg_no_rnd_pixels_tab
[
3
]
=
ff_avg_pixels16_xy2_no_rnd_neon
;
}
libavcodec/arm/hpeldsp_neon.S
0 → 100644
View file @
de99545f
/*
* ARM NEON optimised DSP functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/arm/asm.S"
.macro pixels16 rnd=1, avg=0
.if \avg
mov r12, r0
.endif
1: vld1.8 {q0}, [r1], r2
vld1.8 {q1}, [r1], r2
vld1.8 {q2}, [r1], r2
pld [r1, r2, lsl #2]
vld1.8 {q3}, [r1], r2
pld [r1]
pld [r1, r2]
pld [r1, r2, lsl #1]
.if \avg
vld1.8 {q8}, [r12,:128], r2
vrhadd.u8 q0, q0, q8
vld1.8 {q9}, [r12,:128], r2
vrhadd.u8 q1, q1, q9
vld1.8 {q10}, [r12,:128], r2
vrhadd.u8 q2, q2, q10
vld1.8 {q11}, [r12,:128], r2
vrhadd.u8 q3, q3, q11
.endif
subs r3, r3, #4
vst1.64 {q0}, [r0,:128], r2
vst1.64 {q1}, [r0,:128], r2
vst1.64 {q2}, [r0,:128], r2
vst1.64 {q3}, [r0,:128], r2
bne 1b
bx lr
.endm
.macro pixels16_x2 rnd=1, avg=0
1: vld1.8 {d0-d2}, [r1], r2
vld1.8 {d4-d6}, [r1], r2
pld [r1]
pld [r1, r2]
subs r3, r3, #2
vext.8 q1, q0, q1, #1
avg q0, q0, q1
vext.8 q3, q2, q3, #1
avg q2, q2, q3
.if \avg
vld1.8 {q1}, [r0,:128], r2
vld1.8 {q3}, [r0,:128]
vrhadd.u8 q0, q0, q1
vrhadd.u8 q2, q2, q3
sub r0, r0, r2
.endif
vst1.8 {q0}, [r0,:128], r2
vst1.8 {q2}, [r0,:128], r2
bne 1b
bx lr
.endm
.macro pixels16_y2 rnd=1, avg=0
sub r3, r3, #2
vld1.8 {q0}, [r1], r2
vld1.8 {q1}, [r1], r2
1: subs r3, r3, #2
avg q2, q0, q1
vld1.8 {q0}, [r1], r2
avg q3, q0, q1
vld1.8 {q1}, [r1], r2
pld [r1]
pld [r1, r2]
.if \avg
vld1.8 {q8}, [r0,:128], r2
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q2, q2, q8
vrhadd.u8 q3, q3, q9
sub r0, r0, r2
.endif
vst1.8 {q2}, [r0,:128], r2
vst1.8 {q3}, [r0,:128], r2
bne 1b
avg q2, q0, q1
vld1.8 {q0}, [r1], r2
avg q3, q0, q1
.if \avg
vld1.8 {q8}, [r0,:128], r2
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q2, q2, q8
vrhadd.u8 q3, q3, q9
sub r0, r0, r2
.endif
vst1.8 {q2}, [r0,:128], r2
vst1.8 {q3}, [r0,:128], r2
bx lr
.endm
.macro pixels16_xy2 rnd=1, avg=0
sub r3, r3, #2
vld1.8 {d0-d2}, [r1], r2
vld1.8 {d4-d6}, [r1], r2
NRND vmov.i16 q13, #1
pld [r1]
pld [r1, r2]
vext.8 q1, q0, q1, #1
vext.8 q3, q2, q3, #1
vaddl.u8 q8, d0, d2
vaddl.u8 q10, d1, d3
vaddl.u8 q9, d4, d6
vaddl.u8 q11, d5, d7
1: subs r3, r3, #2
vld1.8 {d0-d2}, [r1], r2
vadd.u16 q12, q8, q9
pld [r1]
NRND vadd.u16 q12, q12, q13
vext.8 q15, q0, q1, #1
vadd.u16 q1 , q10, q11
shrn d28, q12, #2
NRND vadd.u16 q1, q1, q13
shrn d29, q1, #2
.if \avg
vld1.8 {q8}, [r0,:128]
vrhadd.u8 q14, q14, q8
.endif
vaddl.u8 q8, d0, d30
vld1.8 {d2-d4}, [r1], r2
vaddl.u8 q10, d1, d31
vst1.8 {q14}, [r0,:128], r2
vadd.u16 q12, q8, q9
pld [r1, r2]
NRND vadd.u16 q12, q12, q13
vext.8 q2, q1, q2, #1
vadd.u16 q0, q10, q11
shrn d30, q12, #2
NRND vadd.u16 q0, q0, q13
shrn d31, q0, #2
.if \avg
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q15, q15, q9
.endif
vaddl.u8 q9, d2, d4
vaddl.u8 q11, d3, d5
vst1.8 {q15}, [r0,:128], r2
bgt 1b
vld1.8 {d0-d2}, [r1], r2
vadd.u16 q12, q8, q9
NRND vadd.u16 q12, q12, q13
vext.8 q15, q0, q1, #1
vadd.u16 q1 , q10, q11
shrn d28, q12, #2
NRND vadd.u16 q1, q1, q13
shrn d29, q1, #2
.if \avg
vld1.8 {q8}, [r0,:128]
vrhadd.u8 q14, q14, q8
.endif
vaddl.u8 q8, d0, d30
vaddl.u8 q10, d1, d31
vst1.8 {q14}, [r0,:128], r2
vadd.u16 q12, q8, q9
NRND vadd.u16 q12, q12, q13
vadd.u16 q0, q10, q11
shrn d30, q12, #2
NRND vadd.u16 q0, q0, q13
shrn d31, q0, #2
.if \avg
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q15, q15, q9
.endif
vst1.8 {q15}, [r0,:128], r2
bx lr
.endm
.macro pixels8 rnd=1, avg=0
1: vld1.8 {d0}, [r1], r2
vld1.8 {d1}, [r1], r2
vld1.8 {d2}, [r1], r2
pld [r1, r2, lsl #2]
vld1.8 {d3}, [r1], r2
pld [r1]
pld [r1, r2]
pld [r1, r2, lsl #1]
.if \avg
vld1.8 {d4}, [r0,:64], r2
vrhadd.u8 d0, d0, d4
vld1.8 {d5}, [r0,:64], r2
vrhadd.u8 d1, d1, d5
vld1.8 {d6}, [r0,:64], r2
vrhadd.u8 d2, d2, d6
vld1.8 {d7}, [r0,:64], r2
vrhadd.u8 d3, d3, d7
sub r0, r0, r2, lsl #2
.endif
subs r3, r3, #4
vst1.8 {d0}, [r0,:64], r2
vst1.8 {d1}, [r0,:64], r2
vst1.8 {d2}, [r0,:64], r2
vst1.8 {d3}, [r0,:64], r2
bne 1b
bx lr
.endm
.macro pixels8_x2 rnd=1, avg=0
1: vld1.8 {q0}, [r1], r2
vext.8 d1, d0, d1, #1
vld1.8 {q1}, [r1], r2
vext.8 d3, d2, d3, #1
pld [r1]
pld [r1, r2]
subs r3, r3, #2
vswp d1, d2
avg q0, q0, q1
.if \avg
vld1.8 {d4}, [r0,:64], r2
vld1.8 {d5}, [r0,:64]
vrhadd.u8 q0, q0, q2
sub r0, r0, r2
.endif
vst1.8 {d0}, [r0,:64], r2
vst1.8 {d1}, [r0,:64], r2
bne 1b
bx lr
.endm
.macro pixels8_y2 rnd=1, avg=0
sub r3, r3, #2
vld1.8 {d0}, [r1], r2
vld1.8 {d1}, [r1], r2
1: subs r3, r3, #2
avg d4, d0, d1
vld1.8 {d0}, [r1], r2
avg d5, d0, d1
vld1.8 {d1}, [r1], r2
pld [r1]
pld [r1, r2]
.if \avg
vld1.8 {d2}, [r0,:64], r2
vld1.8 {d3}, [r0,:64]
vrhadd.u8 q2, q2, q1
sub r0, r0, r2
.endif
vst1.8 {d4}, [r0,:64], r2
vst1.8 {d5}, [r0,:64], r2
bne 1b
avg d4, d0, d1
vld1.8 {d0}, [r1], r2
avg d5, d0, d1
.if \avg
vld1.8 {d2}, [r0,:64], r2
vld1.8 {d3}, [r0,:64]
vrhadd.u8 q2, q2, q1
sub r0, r0, r2
.endif
vst1.8 {d4}, [r0,:64], r2
vst1.8 {d5}, [r0,:64], r2
bx lr
.endm
.macro pixels8_xy2 rnd=1, avg=0
sub r3, r3, #2
vld1.8 {q0}, [r1], r2
vld1.8 {q1}, [r1], r2
NRND vmov.i16 q11, #1
pld [r1]
pld [r1, r2]
vext.8 d4, d0, d1, #1
vext.8 d6, d2, d3, #1
vaddl.u8 q8, d0, d4
vaddl.u8 q9, d2, d6
1: subs r3, r3, #2
vld1.8 {q0}, [r1], r2
pld [r1]
vadd.u16 q10, q8, q9
vext.8 d4, d0, d1, #1
NRND vadd.u16 q10, q10, q11
vaddl.u8 q8, d0, d4
shrn d5, q10, #2
vld1.8 {q1}, [r1], r2
vadd.u16 q10, q8, q9
pld [r1, r2]
.if \avg
vld1.8 {d7}, [r0,:64]
vrhadd.u8 d5, d5, d7
.endif
NRND vadd.u16 q10, q10, q11
vst1.8 {d5}, [r0,:64], r2
shrn d7, q10, #2
.if \avg
vld1.8 {d5}, [r0,:64]
vrhadd.u8 d7, d7, d5
.endif
vext.8 d6, d2, d3, #1
vaddl.u8 q9, d2, d6
vst1.8 {d7}, [r0,:64], r2
bgt 1b
vld1.8 {q0}, [r1], r2
vadd.u16 q10, q8, q9
vext.8 d4, d0, d1, #1
NRND vadd.u16 q10, q10, q11
vaddl.u8 q8, d0, d4
shrn d5, q10, #2
vadd.u16 q10, q8, q9
.if \avg
vld1.8 {d7}, [r0,:64]
vrhadd.u8 d5, d5, d7
.endif
NRND vadd.u16 q10, q10, q11
vst1.8 {d5}, [r0,:64], r2
shrn d7, q10, #2
.if \avg
vld1.8 {d5}, [r0,:64]
vrhadd.u8 d7, d7, d5
.endif
vst1.8 {d7}, [r0,:64], r2
bx lr
.endm
.macro pixfunc pfx, name, suf, rnd=1, avg=0
.if \rnd
.macro avg rd, rn, rm
vrhadd.u8 \rd, \rn, \rm
.endm
.macro shrn rd, rn, rm
vrshrn.u16 \rd, \rn, \rm
.endm
.macro NRND insn:vararg
.endm
.else
.macro avg rd, rn, rm
vhadd.u8 \rd, \rn, \rm
.endm
.macro shrn rd, rn, rm
vshrn.u16 \rd, \rn, \rm
.endm
.macro NRND insn:vararg
\insn
.endm
.endif
function ff_\pfx\name\suf\()_neon, export=1
\name \rnd, \avg
endfunc
.purgem avg
.purgem shrn
.purgem NRND
.endm
.macro pixfunc2 pfx, name, avg=0
pixfunc \pfx, \name, rnd=1, avg=\avg
pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg
.endm
function ff_put_h264_qpel16_mc00_neon, export=1
mov r3, #16
endfunc
pixfunc put_, pixels16, avg=0
pixfunc2 put_, pixels16_x2, avg=0
pixfunc2 put_, pixels16_y2, avg=0
pixfunc2 put_, pixels16_xy2, avg=0
function ff_avg_h264_qpel16_mc00_neon, export=1
mov r3, #16
endfunc
pixfunc avg_, pixels16, avg=1
pixfunc2 avg_, pixels16_x2, avg=1
pixfunc2 avg_, pixels16_y2, avg=1
pixfunc2 avg_, pixels16_xy2, avg=1
function ff_put_h264_qpel8_mc00_neon, export=1
mov r3, #8
endfunc
pixfunc put_, pixels8, avg=0
pixfunc2 put_, pixels8_x2, avg=0
pixfunc2 put_, pixels8_y2, avg=0
pixfunc2 put_, pixels8_xy2, avg=0
function ff_avg_h264_qpel8_mc00_neon, export=1
mov r3, #8
endfunc
pixfunc avg_, pixels8, avg=1
pixfunc avg_, pixels8_x2, avg=1
pixfunc avg_, pixels8_y2, avg=1
pixfunc avg_, pixels8_xy2, avg=1
libavcodec/hpeldsp.c
View file @
de99545f
...
...
@@ -54,8 +54,8 @@ av_cold void ff_hpeldsp_init(HpelDSPContext* c, int flags)
hpel_funcs
(
avg_no_rnd
,,
16
);
if
(
ARCH_X86
)
ff_hpeldsp_init_x86
(
c
,
flags
);
#if 0
if
(
ARCH_ARM
)
ff_hpeldsp_init_arm
(
c
,
flags
);
#if 0
if (HAVE_VIS) ff_hpeldsp_init_vis (c, flags);
if (ARCH_ALPHA) ff_hpeldsp_init_alpha (c, flags);
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment