Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
f1efbca5
Commit
f1efbca5
authored
Mar 11, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ac3enc: add SIMD-optimized shifting functions for use with the fixed-point AC3 encoder.
parent
323e6fea
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
120 additions
and
37 deletions
+120
-37
ac3dsp.c
libavcodec/ac3dsp.c
+37
-0
ac3dsp.h
libavcodec/ac3dsp.h
+22
-0
ac3enc_fixed.c
libavcodec/ac3enc_fixed.c
+4
-37
ac3dsp.asm
libavcodec/x86/ac3dsp.asm
+45
-0
ac3dsp_mmx.c
libavcodec/x86/ac3dsp_mmx.c
+12
-0
No files found.
libavcodec/ac3dsp.c
View file @
f1efbca5
...
@@ -50,10 +50,47 @@ static int ac3_max_msb_abs_int16_c(const int16_t *src, int len)
...
@@ -50,10 +50,47 @@ static int ac3_max_msb_abs_int16_c(const int16_t *src, int len)
return
v
;
return
v
;
}
}
static
void
ac3_lshift_int16_c
(
int16_t
*
src
,
unsigned
int
len
,
unsigned
int
shift
)
{
uint32_t
*
src32
=
(
uint32_t
*
)
src
;
const
uint32_t
mask
=
~
(((
1
<<
shift
)
-
1
)
<<
16
);
int
i
;
len
>>=
1
;
for
(
i
=
0
;
i
<
len
;
i
+=
8
)
{
src32
[
i
]
=
(
src32
[
i
]
<<
shift
)
&
mask
;
src32
[
i
+
1
]
=
(
src32
[
i
+
1
]
<<
shift
)
&
mask
;
src32
[
i
+
2
]
=
(
src32
[
i
+
2
]
<<
shift
)
&
mask
;
src32
[
i
+
3
]
=
(
src32
[
i
+
3
]
<<
shift
)
&
mask
;
src32
[
i
+
4
]
=
(
src32
[
i
+
4
]
<<
shift
)
&
mask
;
src32
[
i
+
5
]
=
(
src32
[
i
+
5
]
<<
shift
)
&
mask
;
src32
[
i
+
6
]
=
(
src32
[
i
+
6
]
<<
shift
)
&
mask
;
src32
[
i
+
7
]
=
(
src32
[
i
+
7
]
<<
shift
)
&
mask
;
}
}
static
void
ac3_rshift_int32_c
(
int32_t
*
src
,
unsigned
int
len
,
unsigned
int
shift
)
{
do
{
*
src
++
>>=
shift
;
*
src
++
>>=
shift
;
*
src
++
>>=
shift
;
*
src
++
>>=
shift
;
*
src
++
>>=
shift
;
*
src
++
>>=
shift
;
*
src
++
>>=
shift
;
*
src
++
>>=
shift
;
len
-=
8
;
}
while
(
len
>
0
);
}
av_cold
void
ff_ac3dsp_init
(
AC3DSPContext
*
c
)
av_cold
void
ff_ac3dsp_init
(
AC3DSPContext
*
c
)
{
{
c
->
ac3_exponent_min
=
ac3_exponent_min_c
;
c
->
ac3_exponent_min
=
ac3_exponent_min_c
;
c
->
ac3_max_msb_abs_int16
=
ac3_max_msb_abs_int16_c
;
c
->
ac3_max_msb_abs_int16
=
ac3_max_msb_abs_int16_c
;
c
->
ac3_lshift_int16
=
ac3_lshift_int16_c
;
c
->
ac3_rshift_int32
=
ac3_rshift_int32_c
;
if
(
HAVE_MMX
)
if
(
HAVE_MMX
)
ff_ac3dsp_init_x86
(
c
);
ff_ac3dsp_init_x86
(
c
);
...
...
libavcodec/ac3dsp.h
View file @
f1efbca5
...
@@ -46,6 +46,28 @@ typedef struct AC3DSPContext {
...
@@ -46,6 +46,28 @@ typedef struct AC3DSPContext {
* @return a value with the same MSB as max(abs(src[]))
* @return a value with the same MSB as max(abs(src[]))
*/
*/
int
(
*
ac3_max_msb_abs_int16
)(
const
int16_t
*
src
,
int
len
);
int
(
*
ac3_max_msb_abs_int16
)(
const
int16_t
*
src
,
int
len
);
/**
* Left-shift each value in an array of int16_t by a specified amount.
* @param src input array
* constraints: align 16
* @param len number of values in the array
* constraints: multiple of 32 greater than 0
* @param shift left shift amount
* constraints: range [0,15]
*/
void
(
*
ac3_lshift_int16
)(
int16_t
*
src
,
unsigned
int
len
,
unsigned
int
shift
);
/**
* Right-shift each value in an array of int32_t by a specified amount.
* @param src input array
* constraints: align 16
* @param len number of values in the array
* constraints: multiple of 16 greater than 0
* @param shift right shift amount
* constraints: range [0,31]
*/
void
(
*
ac3_rshift_int32
)(
int32_t
*
src
,
unsigned
int
len
,
unsigned
int
shift
);
}
AC3DSPContext
;
}
AC3DSPContext
;
void
ff_ac3dsp_init
(
AC3DSPContext
*
c
);
void
ff_ac3dsp_init
(
AC3DSPContext
*
c
);
...
...
libavcodec/ac3enc_fixed.c
View file @
f1efbca5
...
@@ -277,40 +277,6 @@ static int log2_tab(AC3EncodeContext *s, int16_t *src, int len)
...
@@ -277,40 +277,6 @@ static int log2_tab(AC3EncodeContext *s, int16_t *src, int len)
}
}
/**
* Left-shift each value in an array by a specified amount.
* @param tab input array
* @param n number of values in the array
* @param lshift left shift amount
*/
static
void
lshift_tab
(
int16_t
*
tab
,
int
n
,
unsigned
int
lshift
)
{
int
i
;
if
(
lshift
>
0
)
{
for
(
i
=
0
;
i
<
n
;
i
++
)
tab
[
i
]
<<=
lshift
;
}
}
/**
* Right-shift each value in an array of int32_t by a specified amount.
* @param src input array
* @param len number of values in the array
* @param shift right shift amount
*/
static
void
ac3_rshift_int32_c
(
int32_t
*
src
,
unsigned
int
len
,
unsigned
int
shift
)
{
int
i
;
if
(
shift
>
0
)
{
for
(
i
=
0
;
i
<
len
;
i
++
)
src
[
i
]
>>=
shift
;
}
}
/**
/**
* Normalize the input samples to use the maximum available precision.
* Normalize the input samples to use the maximum available precision.
* This assumes signed 16-bit input samples.
* This assumes signed 16-bit input samples.
...
@@ -320,7 +286,8 @@ static void ac3_rshift_int32_c(int32_t *src, unsigned int len, unsigned int shif
...
@@ -320,7 +286,8 @@ static void ac3_rshift_int32_c(int32_t *src, unsigned int len, unsigned int shif
static
int
normalize_samples
(
AC3EncodeContext
*
s
)
static
int
normalize_samples
(
AC3EncodeContext
*
s
)
{
{
int
v
=
14
-
log2_tab
(
s
,
s
->
windowed_samples
,
AC3_WINDOW_SIZE
);
int
v
=
14
-
log2_tab
(
s
,
s
->
windowed_samples
,
AC3_WINDOW_SIZE
);
lshift_tab
(
s
->
windowed_samples
,
AC3_WINDOW_SIZE
,
v
);
if
(
v
>
0
)
s
->
ac3dsp
.
ac3_lshift_int16
(
s
->
windowed_samples
,
AC3_WINDOW_SIZE
,
v
);
/* +6 to right-shift from 31-bit to 25-bit */
/* +6 to right-shift from 31-bit to 25-bit */
return
v
+
6
;
return
v
+
6
;
}
}
...
@@ -336,8 +303,8 @@ static void scale_coefficients(AC3EncodeContext *s)
...
@@ -336,8 +303,8 @@ static void scale_coefficients(AC3EncodeContext *s)
for
(
blk
=
0
;
blk
<
AC3_MAX_BLOCKS
;
blk
++
)
{
for
(
blk
=
0
;
blk
<
AC3_MAX_BLOCKS
;
blk
++
)
{
AC3Block
*
block
=
&
s
->
blocks
[
blk
];
AC3Block
*
block
=
&
s
->
blocks
[
blk
];
for
(
ch
=
0
;
ch
<
s
->
channels
;
ch
++
)
{
for
(
ch
=
0
;
ch
<
s
->
channels
;
ch
++
)
{
ac3_rshift_int32_c
(
block
->
mdct_coef
[
ch
],
AC3_MAX_COEFS
,
s
->
ac3dsp
.
ac3_rshift_int32
(
block
->
mdct_coef
[
ch
],
AC3_MAX_COEFS
,
block
->
coeff_shift
[
ch
]);
block
->
coeff_shift
[
ch
]);
}
}
}
}
}
}
...
...
libavcodec/x86/ac3dsp.asm
View file @
f1efbca5
...
@@ -133,3 +133,48 @@ INIT_XMM
...
@@ -133,3 +133,48 @@ INIT_XMM
AC3_MAX_MSB_ABS_INT16
sse2
,
min_max
AC3_MAX_MSB_ABS_INT16
sse2
,
min_max
%define
ABS2
ABS2_SSSE3
%define
ABS2
ABS2_SSSE3
AC3_MAX_MSB_ABS_INT16
ssse3
,
or_abs
AC3_MAX_MSB_ABS_INT16
ssse3
,
or_abs
;-----------------------------------------------------------------------------
; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
;-----------------------------------------------------------------------------
%macro
AC3_SHIFT
4
; l/r, 16/32, shift instruction, instruction set
cglobal
ac3_
%1
shift_int
%2
_
%4
,
3
,
3
,
5
,
src
,
len
,
shift
movd
m0
,
shiftd
.
loop
:
mova
m1
,
[
srcq
]
mova
m2
,
[
srcq
+
mmsize
]
mova
m3
,
[
srcq
+
mmsize
*
2
]
mova
m4
,
[
srcq
+
mmsize
*
3
]
%3
m1
,
m0
%3
m2
,
m0
%3
m3
,
m0
%3
m4
,
m0
mova
[
srcq
]
,
m1
mova
[
srcq
+
mmsize
]
,
m2
mova
[
srcq
+
mmsize
*
2
]
,
m3
mova
[
srcq
+
mmsize
*
3
]
,
m4
add
srcq
,
mmsize
*
4
sub
lend
,
mmsize
*
32
/
%2
ja
.
loop
.
end
:
REP_RET
%endmacro
;-----------------------------------------------------------------------------
; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
;-----------------------------------------------------------------------------
INIT_MMX
AC3_SHIFT
l
,
16
,
psllw
,
mmx
INIT_XMM
AC3_SHIFT
l
,
16
,
psllw
,
sse2
;-----------------------------------------------------------------------------
; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
;-----------------------------------------------------------------------------
INIT_MMX
AC3_SHIFT
r
,
32
,
psrad
,
mmx
INIT_XMM
AC3_SHIFT
r
,
32
,
psrad
,
sse2
libavcodec/x86/ac3dsp_mmx.c
View file @
f1efbca5
...
@@ -32,6 +32,12 @@ extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
...
@@ -32,6 +32,12 @@ extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
extern
int
ff_ac3_max_msb_abs_int16_sse2
(
const
int16_t
*
src
,
int
len
);
extern
int
ff_ac3_max_msb_abs_int16_sse2
(
const
int16_t
*
src
,
int
len
);
extern
int
ff_ac3_max_msb_abs_int16_ssse3
(
const
int16_t
*
src
,
int
len
);
extern
int
ff_ac3_max_msb_abs_int16_ssse3
(
const
int16_t
*
src
,
int
len
);
extern
void
ff_ac3_lshift_int16_mmx
(
int16_t
*
src
,
unsigned
int
len
,
unsigned
int
shift
);
extern
void
ff_ac3_lshift_int16_sse2
(
int16_t
*
src
,
unsigned
int
len
,
unsigned
int
shift
);
extern
void
ff_ac3_rshift_int32_mmx
(
int32_t
*
src
,
unsigned
int
len
,
unsigned
int
shift
);
extern
void
ff_ac3_rshift_int32_sse2
(
int32_t
*
src
,
unsigned
int
len
,
unsigned
int
shift
);
av_cold
void
ff_ac3dsp_init_x86
(
AC3DSPContext
*
c
)
av_cold
void
ff_ac3dsp_init_x86
(
AC3DSPContext
*
c
)
{
{
int
mm_flags
=
av_get_cpu_flags
();
int
mm_flags
=
av_get_cpu_flags
();
...
@@ -40,6 +46,8 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
...
@@ -40,6 +46,8 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
c
->
ac3_exponent_min
=
ff_ac3_exponent_min_mmx
;
c
->
ac3_exponent_min
=
ff_ac3_exponent_min_mmx
;
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_mmx
;
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_mmx
;
c
->
ac3_lshift_int16
=
ff_ac3_lshift_int16_mmx
;
c
->
ac3_rshift_int32
=
ff_ac3_rshift_int32_mmx
;
}
}
if
(
mm_flags
&
AV_CPU_FLAG_MMX2
&&
HAVE_MMX2
)
{
if
(
mm_flags
&
AV_CPU_FLAG_MMX2
&&
HAVE_MMX2
)
{
c
->
ac3_exponent_min
=
ff_ac3_exponent_min_mmxext
;
c
->
ac3_exponent_min
=
ff_ac3_exponent_min_mmxext
;
...
@@ -48,6 +56,10 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
...
@@ -48,6 +56,10 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
&&
HAVE_SSE
)
{
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
&&
HAVE_SSE
)
{
c
->
ac3_exponent_min
=
ff_ac3_exponent_min_sse2
;
c
->
ac3_exponent_min
=
ff_ac3_exponent_min_sse2
;
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_sse2
;
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_sse2
;
if
(
!
(
mm_flags
&
AV_CPU_FLAG_SSE2SLOW
))
{
c
->
ac3_lshift_int16
=
ff_ac3_lshift_int16_sse2
;
c
->
ac3_rshift_int32
=
ff_ac3_rshift_int32_sse2
;
}
}
}
if
(
mm_flags
&
AV_CPU_FLAG_SSSE3
&&
HAVE_SSSE3
)
{
if
(
mm_flags
&
AV_CPU_FLAG_SSSE3
&&
HAVE_SSSE3
)
{
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_ssse3
;
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_ssse3
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment