Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
edbf0fff
Commit
edbf0fff
authored
May 01, 2017
by
Alexandra Hájková
Committed by
Martin Storsjö
May 01, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
hevc: Add NEON add_residual for bitdepth 10
Signed-off-by:
Martin Storsjö
<
martin@martin.st
>
parent
81a4cb8e
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
102 additions
and
0 deletions
+102
-0
hevc_idct.S
libavcodec/arm/hevc_idct.S
+89
-0
hevcdsp_init_arm.c
libavcodec/arm/hevcdsp_init_arm.c
+13
-0
No files found.
libavcodec/arm/hevc_idct.S
View file @
edbf0fff
...
...
@@ -30,6 +30,13 @@ const trans, align=4
.short 57, 43, 25, 9
endconst
.macro clip10 in1, in2, c1, c2
vmax.s16 \in1, \in1, \c1
vmax.s16 \in2, \in2, \c1
vmin.s16 \in1, \in1, \c2
vmin.s16 \in2, \in2, \c2
.endm
function ff_hevc_add_residual_4x4_8_neon, export=1
vld1.16 {q0-q1}, [r1, :128]
vld1.32 d4[0], [r0, :32], r2
...
...
@@ -50,6 +57,25 @@ function ff_hevc_add_residual_4x4_8_neon, export=1
bx lr
endfunc
function ff_hevc_add_residual_4x4_10_neon, export=1
mov r12, r0
vld1.16 {q0-q1}, [r1, :128]
vld1.16 d4, [r12, :64], r2
vld1.16 d5, [r12, :64], r2
vld1.16 d6, [r12, :64], r2
vqadd.s16 q0, q2
vld1.16 d7, [r12, :64], r2
vmov.s16 q12, #0
vqadd.s16 q1, q3
vmvn.s16 q13, #0xFC00 @ vmov.s16 #0x3FF
clip10 q0, q1, q12, q13
vst1.16 d0, [r0, :64], r2
vst1.16 d1, [r0, :64], r2
vst1.16 d2, [r0, :64], r2
vst1.16 d3, [r0, :64], r2
bx lr
endfunc
function ff_hevc_add_residual_8x8_8_neon, export=1
add r12, r0, r2
add r2, r2, r2
...
...
@@ -70,6 +96,25 @@ function ff_hevc_add_residual_8x8_8_neon, export=1
bx lr
endfunc
function ff_hevc_add_residual_8x8_10_neon, export=1
add r12, r0, r2
add r2, r2, r2
mov r3, #8
vmov.s16 q12, #0
vmvn.s16 q13, #0xFC00 @ vmov.s16 #0x3FF
1: subs r3, #2
vld1.16 {q0-q1}, [r1, :128]!
vld1.16 {q8}, [r0, :128]
vqadd.s16 q0, q8
vld1.16 {q9}, [r12, :128]
vqadd.s16 q1, q9
clip10 q0, q1, q12, q13
vst1.16 {q0}, [r0, :128], r2
vst1.16 {q1}, [r12, :128], r2
bne 1b
bx lr
endfunc
function ff_hevc_add_residual_16x16_8_neon, export=1
mov r3, #16
add r12, r0, r2
...
...
@@ -97,6 +142,29 @@ function ff_hevc_add_residual_16x16_8_neon, export=1
bx lr
endfunc
function ff_hevc_add_residual_16x16_10_neon, export=1
mov r3, #16
vmov.s16 q12, #0
vmvn.s16 q13, #0xFC00 @ vmov.s16 #0x3FF
add r12, r0, r2
add r2, r2, r2
1: subs r3, #2
vld1.16 {q8-q9}, [r0, :128]
vld1.16 {q0, q1}, [r1, :128]!
vqadd.s16 q0, q8
vld1.16 {q10-q11}, [r12, :128]
vqadd.s16 q1, q9
vld1.16 {q2, q3}, [r1, :128]!
vqadd.s16 q2, q10
vqadd.s16 q3, q11
clip10 q0, q1, q12, q13
clip10 q2, q3, q12, q13
vst1.16 {q0-q1}, [r0, :128], r2
vst1.16 {q2-q3}, [r12, :128], r2
bne 1b
bx lr
endfunc
function ff_hevc_add_residual_32x32_8_neon, export=1
vpush {q4-q7}
add r12, r0, r2
...
...
@@ -137,6 +205,27 @@ function ff_hevc_add_residual_32x32_8_neon, export=1
bx lr
endfunc
function ff_hevc_add_residual_32x32_10_neon, export=1
mov r3, #32
add r12, r0, #32
vmov.s16 q12, #0
vmvn.s16 q13, #0xFC00 @ vmov.s16 #0x3FF
1: subs r3, #1
vldm r1!, {q0-q3}
vld1.16 {q8, q9}, [r0, :128]
vld1.16 {q10, q11}, [r12, :128]
vqadd.s16 q0, q8
vqadd.s16 q1, q9
vqadd.s16 q2, q10
vqadd.s16 q3, q11
clip10 q0, q1, q12, q13
clip10 q2, q3, q12, q13
vst1.16 {q0-q1}, [r0, :128], r2
vst1.16 {q2-q3}, [r12, :128], r2
bne 1b
bx lr
endfunc
.macro idct_4x4_dc bitdepth
function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
ldrsh r1, [r0]
...
...
libavcodec/arm/hevcdsp_init_arm.c
View file @
edbf0fff
...
...
@@ -28,12 +28,20 @@
void
ff_hevc_add_residual_4x4_8_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_4x4_10_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_8x8_8_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_8x8_10_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_16x16_8_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_16x16_10_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_32x32_8_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_32x32_10_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_idct_4x4_dc_8_neon
(
int16_t
*
coeffs
);
void
ff_hevc_idct_8x8_dc_8_neon
(
int16_t
*
coeffs
);
...
...
@@ -72,6 +80,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth)
c
->
idct
[
2
]
=
ff_hevc_idct_16x16_8_neon
;
}
if
(
bit_depth
==
10
)
{
c
->
add_residual
[
0
]
=
ff_hevc_add_residual_4x4_10_neon
;
c
->
add_residual
[
1
]
=
ff_hevc_add_residual_8x8_10_neon
;
c
->
add_residual
[
2
]
=
ff_hevc_add_residual_16x16_10_neon
;
c
->
add_residual
[
3
]
=
ff_hevc_add_residual_32x32_10_neon
;
c
->
idct_dc
[
0
]
=
ff_hevc_idct_4x4_dc_10_neon
;
c
->
idct_dc
[
1
]
=
ff_hevc_idct_8x8_dc_10_neon
;
c
->
idct_dc
[
2
]
=
ff_hevc_idct_16x16_dc_10_neon
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment