Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
baf6b738
Commit
baf6b738
authored
Sep 28, 2011
by
Mans Rullgard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ARM: NEON optimised vector_fmac_scalar()
Signed-off-by:
Mans Rullgard
<
mans@mansr.com
>
parent
a92a1b93
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
51 additions
and
0 deletions
+51
-0
dsputil_init_neon.c
libavcodec/arm/dsputil_init_neon.c
+3
-0
dsputil_neon.S
libavcodec/arm/dsputil_neon.S
+48
-0
No files found.
libavcodec/arm/dsputil_init_neon.c
View file @
baf6b738
...
@@ -143,6 +143,8 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0,
...
@@ -143,6 +143,8 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0,
const
float
*
src1
,
const
float
*
win
,
int
len
);
const
float
*
src1
,
const
float
*
win
,
int
len
);
void
ff_vector_fmul_scalar_neon
(
float
*
dst
,
const
float
*
src
,
float
mul
,
void
ff_vector_fmul_scalar_neon
(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
int
len
);
void
ff_vector_fmac_scalar_neon
(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
void
ff_butterflies_float_neon
(
float
*
v1
,
float
*
v2
,
int
len
);
void
ff_butterflies_float_neon
(
float
*
v1
,
float
*
v2
,
int
len
);
float
ff_scalarproduct_float_neon
(
const
float
*
v1
,
const
float
*
v2
,
int
len
);
float
ff_scalarproduct_float_neon
(
const
float
*
v1
,
const
float
*
v2
,
int
len
);
void
ff_vector_fmul_reverse_neon
(
float
*
dst
,
const
float
*
src0
,
void
ff_vector_fmul_reverse_neon
(
float
*
dst
,
const
float
*
src0
,
...
@@ -305,6 +307,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
...
@@ -305,6 +307,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c
->
vector_fmul
=
ff_vector_fmul_neon
;
c
->
vector_fmul
=
ff_vector_fmul_neon
;
c
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
c
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
c
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_neon
;
c
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_neon
;
c
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_neon
;
c
->
butterflies_float
=
ff_butterflies_float_neon
;
c
->
butterflies_float
=
ff_butterflies_float_neon
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_neon
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_neon
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_neon
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_neon
;
...
...
libavcodec/arm/dsputil_neon.S
View file @
baf6b738
...
@@ -587,6 +587,54 @@ NOVFP vdup.32 q8, r2
...
@@ -587,6 +587,54 @@ NOVFP vdup.32 q8, r2
.unreq len
.unreq len
endfunc
endfunc
function ff_vector_fmac_scalar_neon, export=1
VFP len .req r2
VFP acc .req r3
NOVFP len .req r3
NOVFP acc .req r2
VFP vdup.32 q15, d0[0]
NOVFP vdup.32 q15, r2
bics r12, len, #15
mov acc, r0
beq 3f
vld1.32 {q0}, [r1,:128]!
vld1.32 {q8}, [acc,:128]!
vld1.32 {q1}, [r1,:128]!
vld1.32 {q9}, [acc,:128]!
1: vmla.f32 q8, q0, q15
vld1.32 {q2}, [r1,:128]!
vld1.32 {q10}, [acc,:128]!
vmla.f32 q9, q1, q15
vld1.32 {q3}, [r1,:128]!
vld1.32 {q11}, [acc,:128]!
vmla.f32 q10, q2, q15
vst1.32 {q8}, [r0,:128]!
vmla.f32 q11, q3, q15
vst1.32 {q9}, [r0,:128]!
subs r12, r12, #16
beq 2f
vld1.32 {q0}, [r1,:128]!
vld1.32 {q8}, [acc,:128]!
vst1.32 {q10}, [r0,:128]!
vld1.32 {q1}, [r1,:128]!
vld1.32 {q9}, [acc,:128]!
vst1.32 {q11}, [r0,:128]!
b 1b
2: vst1.32 {q10}, [r0,:128]!
vst1.32 {q11}, [r0,:128]!
ands len, len, #15
it eq
bxeq lr
3: vld1.32 {q0}, [r1,:128]!
vld1.32 {q8}, [acc,:128]!
vmla.f32 q8, q0, q15
vst1.32 {q8}, [r0,:128]!
subs len, len, #4
bgt 3b
bx lr
.unreq len
endfunc
function ff_butterflies_float_neon, export=1
function ff_butterflies_float_neon, export=1
1: vld1.32 {q0},[r0,:128]
1: vld1.32 {q0},[r0,:128]
vld1.32 {q1},[r1,:128]
vld1.32 {q1},[r1,:128]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment