Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
0265aec5
Commit
0265aec5
authored
Jan 09, 2017
by
Matthieu Bouron
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swresample/aarch64: add ff_resample_common_apply_filter_{x4,x8}_{float,s16}_neon
parent
2eaee6e7
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
202 additions
and
2 deletions
+202
-2
Makefile
libswresample/aarch64/Makefile
+4
-2
resample.S
libswresample/aarch64/resample.S
+76
-0
resample_init.c
libswresample/aarch64/resample_init.c
+120
-0
resample.h
libswresample/resample.h
+1
-0
resample_dsp.c
libswresample/resample_dsp.c
+1
-0
No files found.
libswresample/aarch64/Makefile
View file @
0265aec5
OBJS
+=
aarch64/audio_convert_init.o
OBJS
+=
aarch64/audio_convert_init.o
\
aarch64/resample_init.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST)
+=
aarch64/neontest.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST)
+=
aarch64/neontest.o
NEON-OBJS
+=
aarch64/audio_convert_neon.o
NEON-OBJS
+=
aarch64/audio_convert_neon.o
\
aarch64/resample.o
libswresample/aarch64/resample.S
0 → 100644
View file @
0265aec5
/*
* Copyright (c) 2017 Matthieu Bouron <matthieu.bouron gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
function ff_resample_common_apply_filter_x4_float_neon, export=1
movi v0.4S, #0 // accumulator
1: ld1 {v1.4S}, [x1], #16 // src[0..3]
ld1 {v2.4S}, [x2], #16 // filter[0..3]
fmla v0.4S, v1.4S, v2.4S // accumulator += src[0..3] * filter[0..3]
subs w3, w3, #4 // filter_length -= 4
b.gt 1b // loop until filter_length
faddp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values
faddp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values
st1 {v0.S}[0], [x0], #4 // write accumulator
ret
endfunc
function ff_resample_common_apply_filter_x8_float_neon, export=1
movi v0.4S, #0 // accumulator
1: ld1 {v1.4S}, [x1], #16 // src[0..3]
ld1 {v2.4S}, [x2], #16 // filter[0..3]
ld1 {v3.4S}, [x1], #16 // src[4..7]
ld1 {v4.4S}, [x2], #16 // filter[4..7]
fmla v0.4S, v1.4S, v2.4S // accumulator += src[0..3] * filter[0..3]
fmla v0.4S, v3.4S, v4.4S // accumulator += src[4..7] * filter[4..7]
subs w3, w3, #8 // filter_length -= 8
b.gt 1b // loop until filter_length
faddp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values
faddp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values
st1 {v0.S}[0], [x0], #4 // write accumulator
endfunc
function ff_resample_common_apply_filter_x4_s16_neon, export=1
movi v0.4S, #0 // accumulator
1: ld1 {v1.4H}, [x1], #8 // src[0..3]
ld1 {v2.4H}, [x2], #8 // filter[0..3]
smlal v0.4S, v1.4H, v2.4H // accumulator += src[0..3] * filter[0..3]
subs w3, w3, #4 // filter_length -= 4
b.gt 1b // loop until filter_length
addp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values
addp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values
st1 {v0.S}[0], [x0], #4 // write accumulator
ret
endfunc
function ff_resample_common_apply_filter_x8_s16_neon, export=1
movi v0.4S, #0 // accumulator
1: ld1 {v1.8H}, [x1], #16 // src[0..7]
ld1 {v2.8H}, [x2], #16 // filter[0..7]
smlal v0.4S, v1.4H, v2.4H // accumulator += src[0..3] * filter[0..3]
smlal2 v0.4S, v1.8H, v2.8H // accumulator += src[4..7] * filter[4..7]
subs w3, w3, #8 // filter_length -= 8
b.gt 1b // loop until filter_length
addp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values
addp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values
st1 {v0.S}[0], [x0], #4 // write accumulator
ret
endfunc
libswresample/aarch64/resample_init.c
0 → 100644
View file @
0265aec5
/*
* Audio resampling
*
* Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/cpu.h"
#include "libavutil/avassert.h"
#include "libavutil/aarch64/cpu.h"
#include "libswresample/resample.h"
#define DECLARE_RESAMPLE_COMMON_TEMPLATE(TYPE, DELEM, FELEM, FELEM2, OUT) \
\
void ff_resample_common_apply_filter_x4_##TYPE##_neon(FELEM2 *acc, const DELEM *src, \
const FELEM *filter, int length); \
\
void ff_resample_common_apply_filter_x8_##TYPE##_neon(FELEM2 *acc, const DELEM *src, \
const FELEM *filter, int length); \
\
static int ff_resample_common_##TYPE##_neon(ResampleContext *c, void *dest, const void *source, \
int n, int update_ctx) \
{ \
DELEM *dst = dest; \
const DELEM *src = source; \
int dst_index; \
int index = c->index; \
int frac = c->frac; \
int sample_index = 0; \
int x4_aligned_filter_length = c->filter_length & ~3; \
int x8_aligned_filter_length = c->filter_length & ~7; \
\
while (index >= c->phase_count) { \
sample_index++; \
index -= c->phase_count; \
} \
\
for (dst_index = 0; dst_index < n; dst_index++) { \
FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index; \
\
FELEM2 val = 0; \
int i = 0; \
if (x8_aligned_filter_length >= 8) { \
ff_resample_common_apply_filter_x8_##TYPE##_neon(&val, &src[sample_index], \
filter, x8_aligned_filter_length); \
i += x8_aligned_filter_length; \
\
} else if (x4_aligned_filter_length >= 4) { \
ff_resample_common_apply_filter_x4_##TYPE##_neon(&val, &src[sample_index], \
filter, x4_aligned_filter_length); \
i += x4_aligned_filter_length; \
} \
for (; i < c->filter_length; i++) { \
val += src[sample_index + i] * (FELEM2)filter[i]; \
} \
OUT(dst[dst_index], val); \
\
frac += c->dst_incr_mod; \
index += c->dst_incr_div; \
if (frac >= c->src_incr) { \
frac -= c->src_incr; \
index++; \
} \
\
while (index >= c->phase_count) { \
sample_index++; \
index -= c->phase_count; \
} \
} \
\
if (update_ctx) { \
c->frac = frac; \
c->index = index; \
} \
\
return sample_index; \
} \
#define OUT(d, v) d = v
DECLARE_RESAMPLE_COMMON_TEMPLATE
(
float
,
float
,
float
,
float
,
OUT
)
#undef OUT
#define OUT(d, v) (v) = ((v) + (1<<(14)))>>15; (d) = av_clip_int16(v)
DECLARE_RESAMPLE_COMMON_TEMPLATE
(
s16
,
int16_t
,
int16_t
,
int32_t
,
OUT
)
#undef OUT
av_cold
void
swri_resample_dsp_aarch64_init
(
ResampleContext
*
c
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
!
have_neon
(
cpu_flags
))
return
;
switch
(
c
->
format
)
{
case
AV_SAMPLE_FMT_FLTP
:
c
->
dsp
.
resample_common
=
ff_resample_common_float_neon
;
break
;
case
AV_SAMPLE_FMT_S16P
:
c
->
dsp
.
resample_common
=
ff_resample_common_s16_neon
;
break
;
}
}
libswresample/resample.h
View file @
0265aec5
...
@@ -63,5 +63,6 @@ typedef struct ResampleContext {
...
@@ -63,5 +63,6 @@ typedef struct ResampleContext {
void
swri_resample_dsp_init
(
ResampleContext
*
c
);
void
swri_resample_dsp_init
(
ResampleContext
*
c
);
void
swri_resample_dsp_x86_init
(
ResampleContext
*
c
);
void
swri_resample_dsp_x86_init
(
ResampleContext
*
c
);
void
swri_resample_dsp_arm_init
(
ResampleContext
*
c
);
void
swri_resample_dsp_arm_init
(
ResampleContext
*
c
);
void
swri_resample_dsp_aarch64_init
(
ResampleContext
*
c
);
#endif
/* SWRESAMPLE_RESAMPLE_H */
#endif
/* SWRESAMPLE_RESAMPLE_H */
libswresample/resample_dsp.c
View file @
0265aec5
...
@@ -70,4 +70,5 @@ void swri_resample_dsp_init(ResampleContext *c)
...
@@ -70,4 +70,5 @@ void swri_resample_dsp_init(ResampleContext *c)
if
(
ARCH_X86
)
swri_resample_dsp_x86_init
(
c
);
if
(
ARCH_X86
)
swri_resample_dsp_x86_init
(
c
);
else
if
(
ARCH_ARM
)
swri_resample_dsp_arm_init
(
c
);
else
if
(
ARCH_ARM
)
swri_resample_dsp_arm_init
(
c
);
else
if
(
ARCH_AARCH64
)
swri_resample_dsp_aarch64_init
(
c
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment