Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
284ea790
Commit
284ea790
authored
Sep 22, 2012
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil
parent
1e276553
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
93 additions
and
83 deletions
+93
-83
aacdec.c
libavcodec/aacdec.c
+6
-6
dsputil_init_neon.c
libavcodec/arm/dsputil_init_neon.c
+0
-3
dsputil_neon.S
libavcodec/arm/dsputil_neon.S
+0
-38
dsputil.c
libavcodec/dsputil.c
+0
-9
dsputil.h
libavcodec/dsputil.h
+0
-10
libmp3lame.c
libavcodec/libmp3lame.c
+7
-7
wmaenc.c
libavcodec/wmaenc.c
+1
-1
wmaprodec.c
libavcodec/wmaprodec.c
+13
-9
float_dsp_init_neon.c
libavutil/arm/float_dsp_init_neon.c
+4
-0
float_dsp_neon.S
libavutil/arm/float_dsp_neon.S
+38
-0
float_dsp.c
libavutil/float_dsp.c
+9
-0
float_dsp.h
libavutil/float_dsp.h
+15
-0
No files found.
libavcodec/aacdec.c
View file @
284ea790
...
@@ -1360,7 +1360,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
...
@@ -1360,7 +1360,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
band_energy
=
ac
->
dsp
.
scalarproduct_float
(
cfo
,
cfo
,
off_len
);
band_energy
=
ac
->
dsp
.
scalarproduct_float
(
cfo
,
cfo
,
off_len
);
scale
=
sf
[
idx
]
/
sqrtf
(
band_energy
);
scale
=
sf
[
idx
]
/
sqrtf
(
band_energy
);
ac
->
dsp
.
vector_fmul_scalar
(
cfo
,
cfo
,
scale
,
off_len
);
ac
->
f
dsp
.
vector_fmul_scalar
(
cfo
,
cfo
,
scale
,
off_len
);
}
}
}
else
{
}
else
{
const
float
*
vq
=
ff_aac_codebook_vector_vals
[
cbt_m1
];
const
float
*
vq
=
ff_aac_codebook_vector_vals
[
cbt_m1
];
...
@@ -1506,7 +1506,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
...
@@ -1506,7 +1506,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
}
}
}
while
(
len
-=
2
);
}
while
(
len
-=
2
);
ac
->
dsp
.
vector_fmul_scalar
(
cfo
,
cfo
,
sf
[
idx
],
off_len
);
ac
->
f
dsp
.
vector_fmul_scalar
(
cfo
,
cfo
,
sf
[
idx
],
off_len
);
}
}
}
}
...
@@ -1730,10 +1730,10 @@ static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_p
...
@@ -1730,10 +1730,10 @@ static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_p
c
*=
1
-
2
*
cpe
->
ms_mask
[
idx
];
c
*=
1
-
2
*
cpe
->
ms_mask
[
idx
];
scale
=
c
*
sce1
->
sf
[
idx
];
scale
=
c
*
sce1
->
sf
[
idx
];
for
(
group
=
0
;
group
<
ics
->
group_len
[
g
];
group
++
)
for
(
group
=
0
;
group
<
ics
->
group_len
[
g
];
group
++
)
ac
->
dsp
.
vector_fmul_scalar
(
coef1
+
group
*
128
+
offsets
[
i
],
ac
->
f
dsp
.
vector_fmul_scalar
(
coef1
+
group
*
128
+
offsets
[
i
],
coef0
+
group
*
128
+
offsets
[
i
],
coef0
+
group
*
128
+
offsets
[
i
],
scale
,
scale
,
offsets
[
i
+
1
]
-
offsets
[
i
]);
offsets
[
i
+
1
]
-
offsets
[
i
]);
}
}
}
else
{
}
else
{
int
bt_run_end
=
sce1
->
band_type_run_end
[
idx
];
int
bt_run_end
=
sce1
->
band_type_run_end
[
idx
];
...
...
libavcodec/arm/dsputil_init_neon.c
View file @
284ea790
...
@@ -144,8 +144,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
...
@@ -144,8 +144,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
const
float
*
src1
,
const
float
*
win
,
int
len
);
void
ff_vector_fmul_scalar_neon
(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
void
ff_butterflies_float_neon
(
float
*
v1
,
float
*
v2
,
int
len
);
void
ff_butterflies_float_neon
(
float
*
v1
,
float
*
v2
,
int
len
);
float
ff_scalarproduct_float_neon
(
const
float
*
v1
,
const
float
*
v2
,
int
len
);
float
ff_scalarproduct_float_neon
(
const
float
*
v1
,
const
float
*
v2
,
int
len
);
void
ff_vector_fmul_reverse_neon
(
float
*
dst
,
const
float
*
src0
,
void
ff_vector_fmul_reverse_neon
(
float
*
dst
,
const
float
*
src0
,
...
@@ -305,7 +303,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
...
@@ -305,7 +303,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
}
}
c
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
c
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
c
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_neon
;
c
->
butterflies_float
=
ff_butterflies_float_neon
;
c
->
butterflies_float
=
ff_butterflies_float_neon
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_neon
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_neon
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_neon
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_neon
;
...
...
libavcodec/arm/dsputil_neon.S
View file @
284ea790
...
@@ -642,44 +642,6 @@ function ff_vorbis_inverse_coupling_neon, export=1
...
@@ -642,44 +642,6 @@ function ff_vorbis_inverse_coupling_neon, export=1
endfunc
endfunc
#endif
#endif
function ff_vector_fmul_scalar_neon, export=1
VFP len .req r2
NOVFP len .req r3
VFP vdup.32 q8, d0[0]
NOVFP vdup.32 q8, r2
bics r12, len, #15
beq 3f
vld1.32 {q0},[r1,:128]!
vld1.32 {q1},[r1,:128]!
1: vmul.f32 q0, q0, q8
vld1.32 {q2},[r1,:128]!
vmul.f32 q1, q1, q8
vld1.32 {q3},[r1,:128]!
vmul.f32 q2, q2, q8
vst1.32 {q0},[r0,:128]!
vmul.f32 q3, q3, q8
vst1.32 {q1},[r0,:128]!
subs r12, r12, #16
beq 2f
vld1.32 {q0},[r1,:128]!
vst1.32 {q2},[r0,:128]!
vld1.32 {q1},[r1,:128]!
vst1.32 {q3},[r0,:128]!
b 1b
2: vst1.32 {q2},[r0,:128]!
vst1.32 {q3},[r0,:128]!
ands len, len, #15
it eq
bxeq lr
3: vld1.32 {q0},[r1,:128]!
vmul.f32 q0, q0, q8
vst1.32 {q0},[r0,:128]!
subs len, len, #4
bgt 3b
bx lr
.unreq len
endfunc
function ff_butterflies_float_neon, export=1
function ff_butterflies_float_neon, export=1
1: vld1.32 {q0},[r0,:128]
1: vld1.32 {q0},[r0,:128]
vld1.32 {q1},[r1,:128]
vld1.32 {q1},[r1,:128]
...
...
libavcodec/dsputil.c
View file @
284ea790
...
@@ -2392,14 +2392,6 @@ static void vector_fmul_window_c(float *dst, const float *src0,
...
@@ -2392,14 +2392,6 @@ static void vector_fmul_window_c(float *dst, const float *src0,
}
}
}
}
static
void
vector_fmul_scalar_c
(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
)
{
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
src
[
i
]
*
mul
;
}
static
void
butterflies_float_c
(
float
*
restrict
v1
,
float
*
restrict
v2
,
static
void
butterflies_float_c
(
float
*
restrict
v1
,
float
*
restrict
v2
,
int
len
)
int
len
)
{
{
...
@@ -2869,7 +2861,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
...
@@ -2869,7 +2861,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
scalarproduct_float
=
ff_scalarproduct_float_c
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_c
;
c
->
butterflies_float
=
butterflies_float_c
;
c
->
butterflies_float
=
butterflies_float_c
;
c
->
butterflies_float_interleave
=
butterflies_float_interleave_c
;
c
->
butterflies_float_interleave
=
butterflies_float_interleave_c
;
c
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
c
->
shrink
[
0
]
=
av_image_copy_plane
;
c
->
shrink
[
0
]
=
av_image_copy_plane
;
c
->
shrink
[
1
]
=
ff_shrink22
;
c
->
shrink
[
1
]
=
ff_shrink22
;
...
...
libavcodec/dsputil.h
View file @
284ea790
...
@@ -382,16 +382,6 @@ typedef struct DSPContext {
...
@@ -382,16 +382,6 @@ typedef struct DSPContext {
void
(
*
vector_fmul_window
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
void
(
*
vector_fmul_window
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void
(
*
vector_clipf
)(
float
*
dst
/* align 16 */
,
const
float
*
src
/* align 16 */
,
float
min
,
float
max
,
int
len
/* align 16 */
);
void
(
*
vector_clipf
)(
float
*
dst
/* align 16 */
,
const
float
*
src
/* align 16 */
,
float
min
,
float
max
,
int
len
/* align 16 */
);
/**
* Multiply a vector of floats by a scalar float. Source and
* destination vectors must overlap exactly or not at all.
* @param dst result vector, 16-byte aligned
* @param src input vector, 16-byte aligned
* @param mul scalar value
* @param len length of vector, multiple of 4
*/
void
(
*
vector_fmul_scalar
)(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
/**
/**
* Calculate the scalar product of two vectors of floats.
* Calculate the scalar product of two vectors of floats.
* @param v1 first vector, 16-byte aligned
* @param v1 first vector, 16-byte aligned
...
...
libavcodec/libmp3lame.c
View file @
284ea790
...
@@ -28,12 +28,12 @@
...
@@ -28,12 +28,12 @@
#include "libavutil/channel_layout.h"
#include "libavutil/channel_layout.h"
#include "libavutil/common.h"
#include "libavutil/common.h"
#include "libavutil/float_dsp.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/log.h"
#include "libavutil/log.h"
#include "libavutil/opt.h"
#include "libavutil/opt.h"
#include "avcodec.h"
#include "avcodec.h"
#include "audio_frame_queue.h"
#include "audio_frame_queue.h"
#include "dsputil.h"
#include "internal.h"
#include "internal.h"
#include "mpegaudio.h"
#include "mpegaudio.h"
#include "mpegaudiodecheader.h"
#include "mpegaudiodecheader.h"
...
@@ -50,7 +50,7 @@ typedef struct LAMEContext {
...
@@ -50,7 +50,7 @@ typedef struct LAMEContext {
int
reservoir
;
int
reservoir
;
float
*
samples_flt
[
2
];
float
*
samples_flt
[
2
];
AudioFrameQueue
afq
;
AudioFrameQueue
afq
;
DSPContext
dsp
;
AVFloatDSPContext
f
dsp
;
}
LAMEContext
;
}
LAMEContext
;
...
@@ -167,7 +167,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
...
@@ -167,7 +167,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
if
(
ret
<
0
)
if
(
ret
<
0
)
goto
error
;
goto
error
;
ff_dsputil_init
(
&
s
->
dsp
,
avctx
);
avpriv_float_dsp_init
(
&
s
->
fdsp
,
avctx
->
flags
&
CODEC_FLAG_BITEXACT
);
return
0
;
return
0
;
error:
error:
...
@@ -205,10 +205,10 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
...
@@ -205,10 +205,10 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
return
AVERROR
(
EINVAL
);
return
AVERROR
(
EINVAL
);
}
}
for
(
ch
=
0
;
ch
<
avctx
->
channels
;
ch
++
)
{
for
(
ch
=
0
;
ch
<
avctx
->
channels
;
ch
++
)
{
s
->
dsp
.
vector_fmul_scalar
(
s
->
samples_flt
[
ch
],
s
->
f
dsp
.
vector_fmul_scalar
(
s
->
samples_flt
[
ch
],
(
const
float
*
)
frame
->
data
[
ch
],
(
const
float
*
)
frame
->
data
[
ch
],
32768
.
0
f
,
32768
.
0
f
,
FFALIGN
(
frame
->
nb_samples
,
8
));
FFALIGN
(
frame
->
nb_samples
,
8
));
}
}
ENCODE_BUFFER
(
lame_encode_buffer_float
,
float
,
s
->
samples_flt
);
ENCODE_BUFFER
(
lame_encode_buffer_float
,
float
,
s
->
samples_flt
);
break
;
break
;
...
...
libavcodec/wmaenc.c
View file @
284ea790
...
@@ -111,7 +111,7 @@ static void apply_window_and_mdct(AVCodecContext * avctx, const AVFrame *frame)
...
@@ -111,7 +111,7 @@ static void apply_window_and_mdct(AVCodecContext * avctx, const AVFrame *frame)
for
(
ch
=
0
;
ch
<
avctx
->
channels
;
ch
++
)
{
for
(
ch
=
0
;
ch
<
avctx
->
channels
;
ch
++
)
{
memcpy
(
s
->
output
,
s
->
frame_out
[
ch
],
window_len
*
sizeof
(
*
s
->
output
));
memcpy
(
s
->
output
,
s
->
frame_out
[
ch
],
window_len
*
sizeof
(
*
s
->
output
));
s
->
dsp
.
vector_fmul_scalar
(
s
->
frame_out
[
ch
],
audio
[
ch
],
n
,
len
);
s
->
f
dsp
.
vector_fmul_scalar
(
s
->
frame_out
[
ch
],
audio
[
ch
],
n
,
len
);
s
->
dsp
.
vector_fmul_reverse
(
&
s
->
output
[
window_len
],
s
->
frame_out
[
ch
],
win
,
len
);
s
->
dsp
.
vector_fmul_reverse
(
&
s
->
output
[
window_len
],
s
->
frame_out
[
ch
],
win
,
len
);
s
->
fdsp
.
vector_fmul
(
s
->
frame_out
[
ch
],
s
->
frame_out
[
ch
],
win
,
len
);
s
->
fdsp
.
vector_fmul
(
s
->
frame_out
[
ch
],
s
->
frame_out
[
ch
],
win
,
len
);
mdct
->
mdct_calc
(
mdct
,
s
->
coefs
[
ch
],
s
->
output
);
mdct
->
mdct_calc
(
mdct
,
s
->
coefs
[
ch
],
s
->
output
);
...
...
libavcodec/wmaprodec.c
View file @
284ea790
...
@@ -86,6 +86,7 @@
...
@@ -86,6 +86,7 @@
* subframe in order to reconstruct the output samples.
* subframe in order to reconstruct the output samples.
*/
*/
#include "libavutil/float_dsp.h"
#include "libavutil/intfloat.h"
#include "libavutil/intfloat.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/intreadwrite.h"
#include "avcodec.h"
#include "avcodec.h"
...
@@ -170,6 +171,7 @@ typedef struct WMAProDecodeCtx {
...
@@ -170,6 +171,7 @@ typedef struct WMAProDecodeCtx {
AVCodecContext
*
avctx
;
///< codec context for av_log
AVCodecContext
*
avctx
;
///< codec context for av_log
AVFrame
frame
;
///< AVFrame for decoded output
AVFrame
frame
;
///< AVFrame for decoded output
DSPContext
dsp
;
///< accelerated DSP functions
DSPContext
dsp
;
///< accelerated DSP functions
AVFloatDSPContext
fdsp
;
uint8_t
frame_data
[
MAX_FRAMESIZE
+
uint8_t
frame_data
[
MAX_FRAMESIZE
+
FF_INPUT_BUFFER_PADDING_SIZE
];
///< compressed frame data
FF_INPUT_BUFFER_PADDING_SIZE
];
///< compressed frame data
PutBitContext
pb
;
///< context for filling the frame_data buffer
PutBitContext
pb
;
///< context for filling the frame_data buffer
...
@@ -280,6 +282,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
...
@@ -280,6 +282,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
s
->
avctx
=
avctx
;
s
->
avctx
=
avctx
;
ff_dsputil_init
(
&
s
->
dsp
,
avctx
);
ff_dsputil_init
(
&
s
->
dsp
,
avctx
);
avpriv_float_dsp_init
(
&
s
->
fdsp
,
avctx
->
flags
&
CODEC_FLAG_BITEXACT
);
init_put_bits
(
&
s
->
pb
,
s
->
frame_data
,
MAX_FRAMESIZE
);
init_put_bits
(
&
s
->
pb
,
s
->
frame_data
,
MAX_FRAMESIZE
);
avctx
->
sample_fmt
=
AV_SAMPLE_FMT_FLTP
;
avctx
->
sample_fmt
=
AV_SAMPLE_FMT_FLTP
;
...
@@ -1008,12 +1012,12 @@ static void inverse_channel_transform(WMAProDecodeCtx *s)
...
@@ -1008,12 +1012,12 @@ static void inverse_channel_transform(WMAProDecodeCtx *s)
}
}
}
else
if
(
s
->
avctx
->
channels
==
2
)
{
}
else
if
(
s
->
avctx
->
channels
==
2
)
{
int
len
=
FFMIN
(
sfb
[
1
],
s
->
subframe_len
)
-
sfb
[
0
];
int
len
=
FFMIN
(
sfb
[
1
],
s
->
subframe_len
)
-
sfb
[
0
];
s
->
dsp
.
vector_fmul_scalar
(
ch_data
[
0
]
+
sfb
[
0
],
s
->
f
dsp
.
vector_fmul_scalar
(
ch_data
[
0
]
+
sfb
[
0
],
ch_data
[
0
]
+
sfb
[
0
],
ch_data
[
0
]
+
sfb
[
0
],
181
.
0
/
128
,
len
);
181
.
0
/
128
,
len
);
s
->
dsp
.
vector_fmul_scalar
(
ch_data
[
1
]
+
sfb
[
0
],
s
->
f
dsp
.
vector_fmul_scalar
(
ch_data
[
1
]
+
sfb
[
0
],
ch_data
[
1
]
+
sfb
[
0
],
ch_data
[
1
]
+
sfb
[
0
],
181
.
0
/
128
,
len
);
181
.
0
/
128
,
len
);
}
}
}
}
}
}
...
@@ -1259,9 +1263,9 @@ static int decode_subframe(WMAProDecodeCtx *s)
...
@@ -1259,9 +1263,9 @@ static int decode_subframe(WMAProDecodeCtx *s)
s
->
channel
[
c
].
scale_factor_step
;
s
->
channel
[
c
].
scale_factor_step
;
const
float
quant
=
pow
(
10
.
0
,
exp
/
20
.
0
);
const
float
quant
=
pow
(
10
.
0
,
exp
/
20
.
0
);
int
start
=
s
->
cur_sfb_offsets
[
b
];
int
start
=
s
->
cur_sfb_offsets
[
b
];
s
->
dsp
.
vector_fmul_scalar
(
s
->
tmp
+
start
,
s
->
f
dsp
.
vector_fmul_scalar
(
s
->
tmp
+
start
,
s
->
channel
[
c
].
coeffs
+
start
,
s
->
channel
[
c
].
coeffs
+
start
,
quant
,
end
-
start
);
quant
,
end
-
start
);
}
}
/** apply imdct (imdct_half == DCTIV with reverse) */
/** apply imdct (imdct_half == DCTIV with reverse) */
...
...
libavutil/arm/float_dsp_init_neon.c
View file @
284ea790
...
@@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l
...
@@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l
void
ff_vector_fmac_scalar_neon
(
float
*
dst
,
const
float
*
src
,
float
mul
,
void
ff_vector_fmac_scalar_neon
(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
int
len
);
void
ff_vector_fmul_scalar_neon
(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
void
ff_float_dsp_init_neon
(
AVFloatDSPContext
*
fdsp
)
void
ff_float_dsp_init_neon
(
AVFloatDSPContext
*
fdsp
)
{
{
fdsp
->
vector_fmul
=
ff_vector_fmul_neon
;
fdsp
->
vector_fmul
=
ff_vector_fmul_neon
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_neon
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_neon
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_neon
;
}
}
libavutil/arm/float_dsp_neon.S
View file @
284ea790
...
@@ -108,3 +108,41 @@ NOVFP vdup.32 q15, r2
...
@@ -108,3 +108,41 @@ NOVFP vdup.32 q15, r2
bx lr
bx lr
.unreq len
.unreq len
endfunc
endfunc
function ff_vector_fmul_scalar_neon, export=1
VFP len .req r2
NOVFP len .req r3
VFP vdup.32 q8, d0[0]
NOVFP vdup.32 q8, r2
bics r12, len, #15
beq 3f
vld1.32 {q0},[r1,:128]!
vld1.32 {q1},[r1,:128]!
1: vmul.f32 q0, q0, q8
vld1.32 {q2},[r1,:128]!
vmul.f32 q1, q1, q8
vld1.32 {q3},[r1,:128]!
vmul.f32 q2, q2, q8
vst1.32 {q0},[r0,:128]!
vmul.f32 q3, q3, q8
vst1.32 {q1},[r0,:128]!
subs r12, r12, #16
beq 2f
vld1.32 {q0},[r1,:128]!
vst1.32 {q2},[r0,:128]!
vld1.32 {q1},[r1,:128]!
vst1.32 {q3},[r0,:128]!
b 1b
2: vst1.32 {q2},[r0,:128]!
vst1.32 {q3},[r0,:128]!
ands len, len, #15
it eq
bxeq lr
3: vld1.32 {q0},[r1,:128]!
vmul.f32 q0, q0, q8
vst1.32 {q0},[r0,:128]!
subs len, len, #4
bgt 3b
bx lr
.unreq len
endfunc
libavutil/float_dsp.c
View file @
284ea790
...
@@ -36,10 +36,19 @@ static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
...
@@ -36,10 +36,19 @@ static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
dst
[
i
]
+=
src
[
i
]
*
mul
;
dst
[
i
]
+=
src
[
i
]
*
mul
;
}
}
static
void
vector_fmul_scalar_c
(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
)
{
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
src
[
i
]
*
mul
;
}
void
avpriv_float_dsp_init
(
AVFloatDSPContext
*
fdsp
,
int
bit_exact
)
void
avpriv_float_dsp_init
(
AVFloatDSPContext
*
fdsp
,
int
bit_exact
)
{
{
fdsp
->
vector_fmul
=
vector_fmul_c
;
fdsp
->
vector_fmul
=
vector_fmul_c
;
fdsp
->
vector_fmac_scalar
=
vector_fmac_scalar_c
;
fdsp
->
vector_fmac_scalar
=
vector_fmac_scalar_c
;
fdsp
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
#if ARCH_ARM
#if ARCH_ARM
ff_float_dsp_init_arm
(
fdsp
);
ff_float_dsp_init_arm
(
fdsp
);
...
...
libavutil/float_dsp.h
View file @
284ea790
...
@@ -51,6 +51,21 @@ typedef struct AVFloatDSPContext {
...
@@ -51,6 +51,21 @@ typedef struct AVFloatDSPContext {
*/
*/
void
(
*
vector_fmac_scalar
)(
float
*
dst
,
const
float
*
src
,
float
mul
,
void
(
*
vector_fmac_scalar
)(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
int
len
);
/**
* Multiply a vector of floats by a scalar float. Source and
* destination vectors must overlap exactly or not at all.
*
* @param dst result vector
* constraints: 16-byte aligned
* @param src input vector
* constraints: 16-byte aligned
* @param mul scalar value
* @param len length of vector
* constraints: multiple of 4
*/
void
(
*
vector_fmul_scalar
)(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
}
AVFloatDSPContext
;
}
AVFloatDSPContext
;
/**
/**
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment