Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
55aa03b9
Commit
55aa03b9
authored
Jan 20, 2013
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
floatdsp: move vector_fmul_add from dsputil to avfloatdsp.
parent
0881cbf3
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
131 additions
and
108 deletions
+131
-108
aacsbr.c
libavcodec/aacsbr.c
+5
-5
dsputil_init_neon.c
libavcodec/arm/dsputil_init_neon.c
+0
-3
dsputil_neon.S
libavcodec/arm/dsputil_neon.S
+0
-27
dsputil.c
libavcodec/dsputil.c
+0
-7
dsputil.h
libavcodec/dsputil.h
+0
-2
float_altivec.c
libavcodec/ppc/float_altivec.c
+0
-25
wmadec.c
libavcodec/wmadec.c
+4
-4
dsputil.asm
libavcodec/x86/dsputil.asm
+0
-28
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+0
-7
float_dsp_init_neon.c
libavutil/arm/float_dsp_init_neon.c
+4
-0
float_dsp_neon.S
libavutil/arm/float_dsp_neon.S
+27
-0
float_dsp.c
libavutil/float_dsp.c
+9
-0
float_dsp.h
libavutil/float_dsp.h
+18
-0
float_dsp_altivec.c
libavutil/ppc/float_dsp_altivec.c
+24
-0
float_dsp_altivec.h
libavutil/ppc/float_dsp_altivec.h
+4
-0
float_dsp_init.c
libavutil/ppc/float_dsp_init.c
+1
-0
float_dsp.asm
libavutil/x86/float_dsp.asm
+28
-0
float_dsp_init.c
libavutil/x86/float_dsp_init.c
+7
-0
No files found.
libavcodec/aacsbr.c
View file @
55aa03b9
...
@@ -1172,8 +1172,8 @@ static void sbr_qmf_analysis(DSPContext *dsp, FFTContext *mdct,
...
@@ -1172,8 +1172,8 @@ static void sbr_qmf_analysis(DSPContext *dsp, FFTContext *mdct,
* Synthesis QMF Bank (14496-3 sp04 p206) and Downsampled Synthesis QMF Bank
* Synthesis QMF Bank (14496-3 sp04 p206) and Downsampled Synthesis QMF Bank
* (14496-3 sp04 p206)
* (14496-3 sp04 p206)
*/
*/
static
void
sbr_qmf_synthesis
(
DSPContext
*
dsp
,
FFTContext
*
mdct
,
static
void
sbr_qmf_synthesis
(
FFTContext
*
mdct
,
SBRDSPContext
*
sbrdsp
,
AVFloatDSPContext
*
f
dsp
,
SBRDSPContext
*
sbrdsp
,
AVFloatDSPContext
*
dsp
,
float
*
out
,
float
X
[
2
][
38
][
64
],
float
*
out
,
float
X
[
2
][
38
][
64
],
float
mdct_buf
[
2
][
64
],
float
mdct_buf
[
2
][
64
],
float
*
v0
,
int
*
v_off
,
const
unsigned
int
div
)
float
*
v0
,
int
*
v_off
,
const
unsigned
int
div
)
...
@@ -1204,7 +1204,7 @@ static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct,
...
@@ -1204,7 +1204,7 @@ static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct,
mdct
->
imdct_half
(
mdct
,
mdct_buf
[
1
],
X
[
1
][
i
]);
mdct
->
imdct_half
(
mdct
,
mdct_buf
[
1
],
X
[
1
][
i
]);
sbrdsp
->
qmf_deint_bfly
(
v
,
mdct_buf
[
1
],
mdct_buf
[
0
]);
sbrdsp
->
qmf_deint_bfly
(
v
,
mdct_buf
[
1
],
mdct_buf
[
0
]);
}
}
fdsp
->
vector_fmul
(
out
,
v
,
sbr_qmf_window
,
64
>>
div
);
dsp
->
vector_fmul
(
out
,
v
,
sbr_qmf_window
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
192
>>
div
),
sbr_qmf_window
+
(
64
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
192
>>
div
),
sbr_qmf_window
+
(
64
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
256
>>
div
),
sbr_qmf_window
+
(
128
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
256
>>
div
),
sbr_qmf_window
+
(
128
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
448
>>
div
),
sbr_qmf_window
+
(
192
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
448
>>
div
),
sbr_qmf_window
+
(
192
>>
div
),
out
,
64
>>
div
);
...
@@ -1702,13 +1702,13 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
...
@@ -1702,13 +1702,13 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
nch
=
2
;
nch
=
2
;
}
}
sbr_qmf_synthesis
(
&
ac
->
dsp
,
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
sbr_qmf_synthesis
(
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
L
,
sbr
->
X
[
0
],
sbr
->
qmf_filter_scratch
,
L
,
sbr
->
X
[
0
],
sbr
->
qmf_filter_scratch
,
sbr
->
data
[
0
].
synthesis_filterbank_samples
,
sbr
->
data
[
0
].
synthesis_filterbank_samples
,
&
sbr
->
data
[
0
].
synthesis_filterbank_samples_offset
,
&
sbr
->
data
[
0
].
synthesis_filterbank_samples_offset
,
downsampled
);
downsampled
);
if
(
nch
==
2
)
if
(
nch
==
2
)
sbr_qmf_synthesis
(
&
ac
->
dsp
,
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
sbr_qmf_synthesis
(
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
R
,
sbr
->
X
[
1
],
sbr
->
qmf_filter_scratch
,
R
,
sbr
->
X
[
1
],
sbr
->
qmf_filter_scratch
,
sbr
->
data
[
1
].
synthesis_filterbank_samples
,
sbr
->
data
[
1
].
synthesis_filterbank_samples
,
&
sbr
->
data
[
1
].
synthesis_filterbank_samples_offset
,
&
sbr
->
data
[
1
].
synthesis_filterbank_samples_offset
,
...
...
libavcodec/arm/dsputil_init_neon.c
View file @
55aa03b9
...
@@ -146,8 +146,6 @@ void ff_butterflies_float_neon(float *v1, float *v2, int len);
...
@@ -146,8 +146,6 @@ void ff_butterflies_float_neon(float *v1, float *v2, int len);
float
ff_scalarproduct_float_neon
(
const
float
*
v1
,
const
float
*
v2
,
int
len
);
float
ff_scalarproduct_float_neon
(
const
float
*
v1
,
const
float
*
v2
,
int
len
);
void
ff_vector_fmul_reverse_neon
(
float
*
dst
,
const
float
*
src0
,
void
ff_vector_fmul_reverse_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
const
float
*
src1
,
int
len
);
void
ff_vector_fmul_add_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_clipf_neon
(
float
*
dst
,
const
float
*
src
,
float
min
,
float
max
,
void
ff_vector_clipf_neon
(
float
*
dst
,
const
float
*
src
,
float
min
,
float
max
,
int
len
);
int
len
);
...
@@ -301,7 +299,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
...
@@ -301,7 +299,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c
->
butterflies_float
=
ff_butterflies_float_neon
;
c
->
butterflies_float
=
ff_butterflies_float_neon
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_neon
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_neon
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_neon
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_neon
;
c
->
vector_fmul_add
=
ff_vector_fmul_add_neon
;
c
->
vector_clipf
=
ff_vector_clipf_neon
;
c
->
vector_clipf
=
ff_vector_clipf_neon
;
c
->
vector_clip_int32
=
ff_vector_clip_int32_neon
;
c
->
vector_clip_int32
=
ff_vector_clip_int32_neon
;
...
...
libavcodec/arm/dsputil_neon.S
View file @
55aa03b9
...
@@ -580,33 +580,6 @@ function ff_vector_fmul_reverse_neon, export=1
...
@@ -580,33 +580,6 @@ function ff_vector_fmul_reverse_neon, export=1
bx lr
bx lr
endfunc
endfunc
function ff_vector_fmul_add_neon, export=1
ldr r12, [sp]
vld1.32 {q0-q1}, [r1,:128]!
vld1.32 {q8-q9}, [r2,:128]!
vld1.32 {q2-q3}, [r3,:128]!
vmul.f32 q10, q0, q8
vmul.f32 q11, q1, q9
1: vadd.f32 q12, q2, q10
vadd.f32 q13, q3, q11
pld [r1, #16]
pld [r2, #16]
pld [r3, #16]
subs r12, r12, #8
beq 2f
vld1.32 {q0}, [r1,:128]!
vld1.32 {q8}, [r2,:128]!
vmul.f32 q10, q0, q8
vld1.32 {q1}, [r1,:128]!
vld1.32 {q9}, [r2,:128]!
vmul.f32 q11, q1, q9
vld1.32 {q2-q3}, [r3,:128]!
vst1.32 {q12-q13},[r0,:128]!
b 1b
2: vst1.32 {q12-q13},[r0,:128]!
bx lr
endfunc
function ff_vector_clipf_neon, export=1
function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1]
VFP vdup.32 q1, d0[1]
VFP vdup.32 q0, d0[0]
VFP vdup.32 q0, d0[0]
...
...
libavcodec/dsputil.c
View file @
55aa03b9
...
@@ -2360,12 +2360,6 @@ static void vector_fmul_reverse_c(float *dst, const float *src0, const float *sr
...
@@ -2360,12 +2360,6 @@ static void vector_fmul_reverse_c(float *dst, const float *src0, const float *sr
dst
[
i
]
=
src0
[
i
]
*
src1
[
-
i
];
dst
[
i
]
=
src0
[
i
]
*
src1
[
-
i
];
}
}
static
void
vector_fmul_add_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
){
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
src0
[
i
]
*
src1
[
i
]
+
src2
[
i
];
}
static
void
butterflies_float_c
(
float
*
restrict
v1
,
float
*
restrict
v2
,
static
void
butterflies_float_c
(
float
*
restrict
v1
,
float
*
restrict
v2
,
int
len
)
int
len
)
{
{
...
@@ -2714,7 +2708,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
...
@@ -2714,7 +2708,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
add_8x8basis
=
add_8x8basis_c
;
c
->
add_8x8basis
=
add_8x8basis_c
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_c
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_c
;
c
->
vector_fmul_add
=
vector_fmul_add_c
;
c
->
vector_clipf
=
vector_clipf_c
;
c
->
vector_clipf
=
vector_clipf_c
;
c
->
scalarproduct_int16
=
scalarproduct_int16_c
;
c
->
scalarproduct_int16
=
scalarproduct_int16_c
;
c
->
scalarproduct_and_madd_int16
=
scalarproduct_and_madd_int16_c
;
c
->
scalarproduct_and_madd_int16
=
scalarproduct_and_madd_int16_c
;
...
...
libavcodec/dsputil.h
View file @
55aa03b9
...
@@ -342,8 +342,6 @@ typedef struct DSPContext {
...
@@ -342,8 +342,6 @@ typedef struct DSPContext {
/* assume len is a multiple of 16, and arrays are 32-byte aligned */
/* assume len is a multiple of 16, and arrays are 32-byte aligned */
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void
(
*
vector_fmul_add
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void
(
*
vector_clipf
)(
float
*
dst
/* align 16 */
,
const
float
*
src
/* align 16 */
,
float
min
,
float
max
,
int
len
/* align 16 */
);
void
(
*
vector_clipf
)(
float
*
dst
/* align 16 */
,
const
float
*
src
/* align 16 */
,
float
min
,
float
max
,
int
len
/* align 16 */
);
/**
/**
...
...
libavcodec/ppc/float_altivec.c
View file @
55aa03b9
...
@@ -51,32 +51,7 @@ static void vector_fmul_reverse_altivec(float *dst, const float *src0,
...
@@ -51,32 +51,7 @@ static void vector_fmul_reverse_altivec(float *dst, const float *src0,
}
}
}
}
static
void
vector_fmul_add_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
)
{
int
i
;
vector
float
d
,
s0
,
s1
,
s2
,
t0
,
t1
,
edges
;
vector
unsigned
char
align
=
vec_lvsr
(
0
,
dst
),
mask
=
vec_lvsl
(
0
,
dst
);
for
(
i
=
0
;
i
<
len
-
3
;
i
+=
4
)
{
t0
=
vec_ld
(
0
,
dst
+
i
);
t1
=
vec_ld
(
15
,
dst
+
i
);
s0
=
vec_ld
(
0
,
src0
+
i
);
s1
=
vec_ld
(
0
,
src1
+
i
);
s2
=
vec_ld
(
0
,
src2
+
i
);
edges
=
vec_perm
(
t1
,
t0
,
mask
);
d
=
vec_madd
(
s0
,
s1
,
s2
);
t1
=
vec_perm
(
d
,
edges
,
align
);
t0
=
vec_perm
(
edges
,
d
,
align
);
vec_st
(
t1
,
15
,
dst
+
i
);
vec_st
(
t0
,
0
,
dst
+
i
);
}
}
void
ff_float_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
void
ff_float_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
{
{
c
->
vector_fmul_reverse
=
vector_fmul_reverse_altivec
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_altivec
;
c
->
vector_fmul_add
=
vector_fmul_add_altivec
;
}
}
libavcodec/wmadec.c
View file @
55aa03b9
...
@@ -379,16 +379,16 @@ static void wma_window(WMACodecContext *s, float *out)
...
@@ -379,16 +379,16 @@ static void wma_window(WMACodecContext *s, float *out)
block_len
=
s
->
block_len
;
block_len
=
s
->
block_len
;
bsize
=
s
->
frame_len_bits
-
s
->
block_len_bits
;
bsize
=
s
->
frame_len_bits
-
s
->
block_len_bits
;
s
->
dsp
.
vector_fmul_add
(
out
,
in
,
s
->
windows
[
bsize
],
s
->
f
dsp
.
vector_fmul_add
(
out
,
in
,
s
->
windows
[
bsize
],
out
,
block_len
);
out
,
block_len
);
}
else
{
}
else
{
block_len
=
1
<<
s
->
prev_block_len_bits
;
block_len
=
1
<<
s
->
prev_block_len_bits
;
n
=
(
s
->
block_len
-
block_len
)
/
2
;
n
=
(
s
->
block_len
-
block_len
)
/
2
;
bsize
=
s
->
frame_len_bits
-
s
->
prev_block_len_bits
;
bsize
=
s
->
frame_len_bits
-
s
->
prev_block_len_bits
;
s
->
dsp
.
vector_fmul_add
(
out
+
n
,
in
+
n
,
s
->
windows
[
bsize
],
s
->
f
dsp
.
vector_fmul_add
(
out
+
n
,
in
+
n
,
s
->
windows
[
bsize
],
out
+
n
,
block_len
);
out
+
n
,
block_len
);
memcpy
(
out
+
n
+
block_len
,
in
+
n
+
block_len
,
n
*
sizeof
(
float
));
memcpy
(
out
+
n
+
block_len
,
in
+
n
+
block_len
,
n
*
sizeof
(
float
));
}
}
...
...
libavcodec/x86/dsputil.asm
View file @
55aa03b9
...
@@ -604,34 +604,6 @@ VECTOR_FMUL_REVERSE
...
@@ -604,34 +604,6 @@ VECTOR_FMUL_REVERSE
INIT_YMM
avx
INIT_YMM
avx
VECTOR_FMUL_REVERSE
VECTOR_FMUL_REVERSE
;-----------------------------------------------------------------------------
; vector_fmul_add(float *dst, const float *src0, const float *src1,
; const float *src2, int len)
;-----------------------------------------------------------------------------
%macro
VECTOR_FMUL_ADD
0
cglobal
vector_fmul_add
,
5
,
5
,
2
,
dst
,
src0
,
src1
,
src2
,
len
lea
lenq
,
[
lend
*
4
-
2
*
mmsize
]
ALIGN
16
.
loop
:
mova
m0
,
[
src0q
+
lenq
]
mova
m1
,
[
src0q
+
lenq
+
mmsize
]
mulps
m0
,
m0
,
[
src1q
+
lenq
]
mulps
m1
,
m1
,
[
src1q
+
lenq
+
mmsize
]
addps
m0
,
m0
,
[
src2q
+
lenq
]
addps
m1
,
m1
,
[
src2q
+
lenq
+
mmsize
]
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
mmsize
]
,
m1
sub
lenq
,
2
*
mmsize
jge
.
loop
REP_RET
%endmacro
INIT_XMM
sse
VECTOR_FMUL_ADD
INIT_YMM
avx
VECTOR_FMUL_ADD
; %1 = aligned/unaligned
; %1 = aligned/unaligned
%macro
BSWAP_LOOPS
1
%macro
BSWAP_LOOPS
1
mov
r3
,
r2
mov
r3
,
r2
...
...
libavcodec/x86/dsputil_mmx.c
View file @
55aa03b9
...
@@ -1853,11 +1853,6 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
...
@@ -1853,11 +1853,6 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
void
ff_vector_fmul_reverse_avx
(
float
*
dst
,
const
float
*
src0
,
void
ff_vector_fmul_reverse_avx
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
const
float
*
src1
,
int
len
);
void
ff_vector_fmul_add_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_fmul_add_avx
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_clip_int32_mmx
(
int32_t
*
dst
,
const
int32_t
*
src
,
void
ff_vector_clip_int32_mmx
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
int32_t
max
,
unsigned
int
len
);
int32_t
min
,
int32_t
max
,
unsigned
int
len
);
void
ff_vector_clip_int32_sse2
(
int32_t
*
dst
,
const
int32_t
*
src
,
void
ff_vector_clip_int32_sse2
(
int32_t
*
dst
,
const
int32_t
*
src
,
...
@@ -2141,7 +2136,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
...
@@ -2141,7 +2136,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
#if HAVE_YASM
#if HAVE_YASM
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_sse
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_sse
;
c
->
vector_fmul_add
=
ff_vector_fmul_add_sse
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_sse
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_sse
;
#endif
/* HAVE_YASM */
#endif
/* HAVE_YASM */
...
@@ -2295,7 +2289,6 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
...
@@ -2295,7 +2289,6 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
}
}
}
}
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_avx
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_avx
;
c
->
vector_fmul_add
=
ff_vector_fmul_add_avx
;
#endif
/* HAVE_AVX_EXTERNAL */
#endif
/* HAVE_AVX_EXTERNAL */
}
}
...
...
libavutil/arm/float_dsp_init_neon.c
View file @
55aa03b9
...
@@ -35,10 +35,14 @@ void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
...
@@ -35,10 +35,14 @@ void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
const
float
*
src1
,
const
float
*
win
,
int
len
);
void
ff_vector_fmul_add_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_float_dsp_init_neon
(
AVFloatDSPContext
*
fdsp
)
void
ff_float_dsp_init_neon
(
AVFloatDSPContext
*
fdsp
)
{
{
fdsp
->
vector_fmul
=
ff_vector_fmul_neon
;
fdsp
->
vector_fmul
=
ff_vector_fmul_neon
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_neon
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_neon
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_neon
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_neon
;
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_neon
;
}
}
libavutil/arm/float_dsp_neon.S
View file @
55aa03b9
...
@@ -193,3 +193,30 @@ function ff_vector_fmul_window_neon, export=1
...
@@ -193,3 +193,30 @@ function ff_vector_fmul_window_neon, export=1
vst1.32 {d22,d23},[ip,:128], r5
vst1.32 {d22,d23},[ip,:128], r5
pop {r4,r5,pc}
pop {r4,r5,pc}
endfunc
endfunc
function ff_vector_fmul_add_neon, export=1
ldr r12, [sp]
vld1.32 {q0-q1}, [r1,:128]!
vld1.32 {q8-q9}, [r2,:128]!
vld1.32 {q2-q3}, [r3,:128]!
vmul.f32 q10, q0, q8
vmul.f32 q11, q1, q9
1: vadd.f32 q12, q2, q10
vadd.f32 q13, q3, q11
pld [r1, #16]
pld [r2, #16]
pld [r3, #16]
subs r12, r12, #8
beq 2f
vld1.32 {q0}, [r1,:128]!
vld1.32 {q8}, [r2,:128]!
vmul.f32 q10, q0, q8
vld1.32 {q1}, [r1,:128]!
vld1.32 {q9}, [r2,:128]!
vmul.f32 q11, q1, q9
vld1.32 {q2-q3}, [r3,:128]!
vst1.32 {q12-q13},[r0,:128]!
b 1b
2: vst1.32 {q12-q13},[r0,:128]!
bx lr
endfunc
libavutil/float_dsp.c
View file @
55aa03b9
...
@@ -71,6 +71,14 @@ static void vector_fmul_window_c(float *dst, const float *src0,
...
@@ -71,6 +71,14 @@ static void vector_fmul_window_c(float *dst, const float *src0,
}
}
}
}
static
void
vector_fmul_add_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
){
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
src0
[
i
]
*
src1
[
i
]
+
src2
[
i
];
}
void
avpriv_float_dsp_init
(
AVFloatDSPContext
*
fdsp
,
int
bit_exact
)
void
avpriv_float_dsp_init
(
AVFloatDSPContext
*
fdsp
,
int
bit_exact
)
{
{
fdsp
->
vector_fmul
=
vector_fmul_c
;
fdsp
->
vector_fmul
=
vector_fmul_c
;
...
@@ -78,6 +86,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
...
@@ -78,6 +86,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
fdsp
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
fdsp
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
fdsp
->
vector_dmul_scalar
=
vector_dmul_scalar_c
;
fdsp
->
vector_dmul_scalar
=
vector_dmul_scalar_c
;
fdsp
->
vector_fmul_window
=
vector_fmul_window_c
;
fdsp
->
vector_fmul_window
=
vector_fmul_window_c
;
fdsp
->
vector_fmul_add
=
vector_fmul_add_c
;
#if ARCH_ARM
#if ARCH_ARM
ff_float_dsp_init_arm
(
fdsp
);
ff_float_dsp_init_arm
(
fdsp
);
...
...
libavutil/float_dsp.h
View file @
55aa03b9
...
@@ -100,6 +100,24 @@ typedef struct AVFloatDSPContext {
...
@@ -100,6 +100,24 @@ typedef struct AVFloatDSPContext {
*/
*/
void
(
*
vector_fmul_window
)(
float
*
dst
,
const
float
*
src0
,
void
(
*
vector_fmul_window
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
const
float
*
src1
,
const
float
*
win
,
int
len
);
/**
* Calculate the product of two vectors of floats, add a third vector of
* floats and store the result in a vector of floats.
*
* @param dst output vector
* constraints: 32-byte aligned
* @param src0 first input vector
* constraints: 32-byte aligned
* @param src1 second input vector
* constraints: 32-byte aligned
* @param src1 third input vector
* constraints: 32-byte aligned
* @param len number of elements in the input
* constraints: multiple of 16
*/
void
(
*
vector_fmul_add
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
}
AVFloatDSPContext
;
}
AVFloatDSPContext
;
/**
/**
...
...
libavutil/ppc/float_dsp_altivec.c
View file @
55aa03b9
...
@@ -69,3 +69,27 @@ void ff_vector_fmul_window_altivec(float *dst, const float *src0,
...
@@ -69,3 +69,27 @@ void ff_vector_fmul_window_altivec(float *dst, const float *src0,
vec_st
(
t1
,
j
,
dst
);
vec_st
(
t1
,
j
,
dst
);
}
}
}
}
void
ff_vector_fmul_add_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
)
{
int
i
;
vector
float
d
,
s0
,
s1
,
s2
,
t0
,
t1
,
edges
;
vector
unsigned
char
align
=
vec_lvsr
(
0
,
dst
),
mask
=
vec_lvsl
(
0
,
dst
);
for
(
i
=
0
;
i
<
len
-
3
;
i
+=
4
)
{
t0
=
vec_ld
(
0
,
dst
+
i
);
t1
=
vec_ld
(
15
,
dst
+
i
);
s0
=
vec_ld
(
0
,
src0
+
i
);
s1
=
vec_ld
(
0
,
src1
+
i
);
s2
=
vec_ld
(
0
,
src2
+
i
);
edges
=
vec_perm
(
t1
,
t0
,
mask
);
d
=
vec_madd
(
s0
,
s1
,
s2
);
t1
=
vec_perm
(
d
,
edges
,
align
);
t0
=
vec_perm
(
edges
,
d
,
align
);
vec_st
(
t1
,
15
,
dst
+
i
);
vec_st
(
t0
,
0
,
dst
+
i
);
}
}
libavutil/ppc/float_dsp_altivec.h
View file @
55aa03b9
...
@@ -28,4 +28,8 @@ extern void ff_vector_fmul_window_altivec(float *dst, const float *src0,
...
@@ -28,4 +28,8 @@ extern void ff_vector_fmul_window_altivec(float *dst, const float *src0,
const
float
*
src1
,
const
float
*
win
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
int
len
);
extern
void
ff_vector_fmul_add_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
#endif
/* AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H */
#endif
/* AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H */
libavutil/ppc/float_dsp_init.c
View file @
55aa03b9
...
@@ -32,6 +32,7 @@ void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
...
@@ -32,6 +32,7 @@ void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
return
;
return
;
fdsp
->
vector_fmul
=
ff_vector_fmul_altivec
;
fdsp
->
vector_fmul
=
ff_vector_fmul_altivec
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_altivec
;
if
(
!
bit_exact
)
{
if
(
!
bit_exact
)
{
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_altivec
;
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_altivec
;
...
...
libavutil/x86/float_dsp.asm
View file @
55aa03b9
...
@@ -162,3 +162,31 @@ VECTOR_DMUL_SCALAR
...
@@ -162,3 +162,31 @@ VECTOR_DMUL_SCALAR
INIT_YMM
avx
INIT_YMM
avx
VECTOR_DMUL_SCALAR
VECTOR_DMUL_SCALAR
%endif
%endif
;-----------------------------------------------------------------------------
; vector_fmul_add(float *dst, const float *src0, const float *src1,
; const float *src2, int len)
;-----------------------------------------------------------------------------
%macro
VECTOR_FMUL_ADD
0
cglobal
vector_fmul_add
,
5
,
5
,
2
,
dst
,
src0
,
src1
,
src2
,
len
lea
lenq
,
[
lend
*
4
-
2
*
mmsize
]
ALIGN
16
.
loop
:
mova
m0
,
[
src0q
+
lenq
]
mova
m1
,
[
src0q
+
lenq
+
mmsize
]
mulps
m0
,
m0
,
[
src1q
+
lenq
]
mulps
m1
,
m1
,
[
src1q
+
lenq
+
mmsize
]
addps
m0
,
m0
,
[
src2q
+
lenq
]
addps
m1
,
m1
,
[
src2q
+
lenq
+
mmsize
]
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
mmsize
]
,
m1
sub
lenq
,
2
*
mmsize
jge
.
loop
REP_RET
%endmacro
INIT_XMM
sse
VECTOR_FMUL_ADD
INIT_YMM
avx
VECTOR_FMUL_ADD
libavutil/x86/float_dsp_init.c
View file @
55aa03b9
...
@@ -41,6 +41,11 @@ extern void ff_vector_dmul_scalar_sse2(double *dst, const double *src,
...
@@ -41,6 +41,11 @@ extern void ff_vector_dmul_scalar_sse2(double *dst, const double *src,
extern
void
ff_vector_dmul_scalar_avx
(
double
*
dst
,
const
double
*
src
,
extern
void
ff_vector_dmul_scalar_avx
(
double
*
dst
,
const
double
*
src
,
double
mul
,
int
len
);
double
mul
,
int
len
);
void
ff_vector_fmul_add_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_fmul_add_avx
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
#if HAVE_6REGS && HAVE_INLINE_ASM
#if HAVE_6REGS && HAVE_INLINE_ASM
static
void
vector_fmul_window_3dnowext
(
float
*
dst
,
const
float
*
src0
,
static
void
vector_fmul_window_3dnowext
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
const
float
*
src1
,
const
float
*
win
,
...
@@ -123,6 +128,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
...
@@ -123,6 +128,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp
->
vector_fmul
=
ff_vector_fmul_sse
;
fdsp
->
vector_fmul
=
ff_vector_fmul_sse
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_sse
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_sse
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_sse
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_sse
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_sse
;
}
}
if
(
EXTERNAL_SSE2
(
mm_flags
))
{
if
(
EXTERNAL_SSE2
(
mm_flags
))
{
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_sse2
;
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_sse2
;
...
@@ -131,5 +137,6 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
...
@@ -131,5 +137,6 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp
->
vector_fmul
=
ff_vector_fmul_avx
;
fdsp
->
vector_fmul
=
ff_vector_fmul_avx
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_avx
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_avx
;
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_avx
;
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_avx
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_avx
;
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment