Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
e6e98234
Commit
e6e98234
authored
Mar 20, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add apply_window_int16() to DSPContext with x86-optimized versions and use it
in the ac3_fixed encoder.
parent
e971d813
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
210 additions
and
36 deletions
+210
-36
ac3enc.c
libavcodec/ac3enc.c
+1
-1
ac3enc_fixed.c
libavcodec/ac3enc_fixed.c
+2
-8
ac3enc_float.c
libavcodec/ac3enc_float.c
+2
-2
ac3tab.c
libavcodec/ac3tab.c
+1
-1
dsputil.c
libavcodec/dsputil.c
+14
-0
dsputil.h
libavcodec/dsputil.h
+14
-0
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+38
-2
dsputil_yasm.asm
libavcodec/x86/dsputil_yasm.asm
+126
-0
ac3_fixed
tests/ref/acodec/ac3_fixed
+1
-1
rm
tests/ref/lavf/rm
+1
-1
ac3_rm
tests/ref/seek/ac3_rm
+10
-20
No files found.
libavcodec/ac3enc.c
View file @
e6e98234
...
...
@@ -167,7 +167,7 @@ static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
static
void
mdct512
(
AC3MDCTContext
*
mdct
,
CoefType
*
out
,
SampleType
*
in
);
static
void
apply_window
(
DSPContext
*
dsp
,
SampleType
*
output
,
const
SampleType
*
input
,
const
SampleType
*
window
,
int
n
);
const
SampleType
*
window
,
unsigned
int
le
n
);
static
int
normalize_samples
(
AC3EncodeContext
*
s
);
...
...
libavcodec/ac3enc_fixed.c
View file @
e6e98234
...
...
@@ -252,15 +252,9 @@ static void mdct512(AC3MDCTContext *mdct, int32_t *out, int16_t *in)
* Apply KBD window to input samples prior to MDCT.
*/
static
void
apply_window
(
DSPContext
*
dsp
,
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
int
n
)
const
int16_t
*
window
,
unsigned
int
le
n
)
{
int
i
;
int
n2
=
n
>>
1
;
for
(
i
=
0
;
i
<
n2
;
i
++
)
{
output
[
i
]
=
MUL16
(
input
[
i
],
window
[
i
])
>>
15
;
output
[
n
-
i
-
1
]
=
MUL16
(
input
[
n
-
i
-
1
],
window
[
i
])
>>
15
;
}
dsp
->
apply_window_int16
(
output
,
input
,
window
,
len
);
}
...
...
libavcodec/ac3enc_float.c
View file @
e6e98234
...
...
@@ -83,9 +83,9 @@ static void mdct512(AC3MDCTContext *mdct, float *out, float *in)
* Apply KBD window to input samples prior to MDCT.
*/
static
void
apply_window
(
DSPContext
*
dsp
,
float
*
output
,
const
float
*
input
,
const
float
*
window
,
int
n
)
const
float
*
window
,
unsigned
int
le
n
)
{
dsp
->
vector_fmul
(
output
,
input
,
window
,
n
);
dsp
->
vector_fmul
(
output
,
input
,
window
,
le
n
);
}
...
...
libavcodec/ac3tab.c
View file @
e6e98234
...
...
@@ -141,7 +141,7 @@ const uint8_t ff_ac3_rematrix_band_tab[5] = { 13, 25, 37, 61, 253 };
/* AC-3 MDCT window */
/* MDCT window */
const
int16_t
ff_ac3_window
[
AC3_WINDOW_SIZE
/
2
]
=
{
DECLARE_ALIGNED
(
16
,
const
int16_t
,
ff_ac3_window
)
[
AC3_WINDOW_SIZE
/
2
]
=
{
4
,
7
,
12
,
16
,
21
,
28
,
34
,
42
,
51
,
61
,
72
,
84
,
97
,
111
,
127
,
145
,
164
,
184
,
207
,
231
,
257
,
285
,
315
,
347
,
...
...
libavcodec/dsputil.c
View file @
e6e98234
...
...
@@ -3890,6 +3890,19 @@ static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, co
return
res
;
}
static
void
apply_window_int16_c
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
)
{
int
i
;
int
len2
=
len
>>
1
;
for
(
i
=
0
;
i
<
len2
;
i
++
)
{
int16_t
w
=
window
[
i
];
output
[
i
]
=
(
MUL16
(
input
[
i
],
w
)
+
(
1
<<
14
))
>>
15
;
output
[
len
-
i
-
1
]
=
(
MUL16
(
input
[
len
-
i
-
1
],
w
)
+
(
1
<<
14
))
>>
15
;
}
}
#define W0 2048
#define W1 2841
/* 2048*sqrt (2)*cos (1*pi/16) */
#define W2 2676
/* 2048*sqrt (2)*cos (2*pi/16) */
...
...
@@ -4364,6 +4377,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
vector_clipf
=
vector_clipf_c
;
c
->
scalarproduct_int16
=
scalarproduct_int16_c
;
c
->
scalarproduct_and_madd_int16
=
scalarproduct_and_madd_int16_c
;
c
->
apply_window_int16
=
apply_window_int16_c
;
c
->
scalarproduct_float
=
scalarproduct_float_c
;
c
->
butterflies_float
=
butterflies_float_c
;
c
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
...
...
libavcodec/dsputil.h
View file @
e6e98234
...
...
@@ -524,6 +524,20 @@ typedef struct DSPContext {
*/
int32_t
(
*
scalarproduct_and_madd_int16
)(
int16_t
*
v1
/*align 16*/
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
len
,
int
mul
);
/**
* Apply symmetric window in 16-bit fixed-point.
* @param output destination array
* constraints: 16-byte aligned
* @param input source array
* constraints: 16-byte aligned
* @param window window array
* constraints: 16-byte aligned, at least len/2 elements
* @param len full window length
* constraints: multiple of ? greater than zero
*/
void
(
*
apply_window_int16
)(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
/* rv30 functions */
qpel_mc_func
put_rv30_tpel_pixels_tab
[
4
][
16
];
qpel_mc_func
avg_rv30_tpel_pixels_tab
[
4
][
16
];
...
...
libavcodec/x86/dsputil_mmx.c
View file @
e6e98234
...
...
@@ -2388,6 +2388,20 @@ int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int or
int32_t
ff_scalarproduct_and_madd_int16_mmx2
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
int32_t
ff_scalarproduct_and_madd_int16_sse2
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
int32_t
ff_scalarproduct_and_madd_int16_ssse3
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
void
ff_apply_window_int16_mmxext
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_apply_window_int16_mmxext_ba
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_apply_window_int16_sse2
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_apply_window_int16_sse2_ba
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_apply_window_int16_ssse3
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_apply_window_int16_ssse3_atom
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_add_hfyu_median_prediction_mmx2
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
const
uint8_t
*
diff
,
int
w
,
int
*
left
,
int
*
left_top
);
int
ff_add_hfyu_left_prediction_ssse3
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
w
,
int
left
);
int
ff_add_hfyu_left_prediction_sse4
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
w
,
int
left
);
...
...
@@ -2749,6 +2763,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if HAVE_YASM
c
->
scalarproduct_int16
=
ff_scalarproduct_int16_mmx2
;
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_mmx2
;
if
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)
{
c
->
apply_window_int16
=
ff_apply_window_int16_mmxext_ba
;
}
else
{
c
->
apply_window_int16
=
ff_apply_window_int16_mmxext
;
}
#endif
}
if
(
mm_flags
&
AV_CPU_FLAG_SSE
){
...
...
@@ -2771,13 +2790,30 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if HAVE_YASM
c
->
scalarproduct_int16
=
ff_scalarproduct_int16_sse2
;
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_sse2
;
if
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)
{
c
->
apply_window_int16
=
ff_apply_window_int16_sse2_ba
;
}
else
{
if
(
!
(
mm_flags
&
AV_CPU_FLAG_SSE2SLOW
))
{
c
->
apply_window_int16
=
ff_apply_window_int16_sse2
;
}
}
c
->
emulated_edge_mc
=
emulated_edge_mc_sse
;
c
->
gmc
=
gmc_sse
;
#endif
}
if
((
mm_flags
&
AV_CPU_FLAG_SSSE3
)
&&
!
(
mm_flags
&
(
AV_CPU_FLAG_SSE42
|
AV_CPU_FLAG_3DNOW
))
&&
HAVE_YASM
)
// cachesplit
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_ssse3
;
if
(
mm_flags
&
AV_CPU_FLAG_SSSE3
)
{
#if HAVE_YASM
if
(
mm_flags
&
AV_CPU_FLAG_ATOM
)
{
c
->
apply_window_int16
=
ff_apply_window_int16_ssse3_atom
;
}
else
{
c
->
apply_window_int16
=
ff_apply_window_int16_ssse3
;
}
if
(
!
(
mm_flags
&
(
AV_CPU_FLAG_SSE42
|
AV_CPU_FLAG_3DNOW
)))
{
// cachesplit
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_ssse3
;
}
#endif
}
}
if
(
CONFIG_ENCODERS
)
...
...
libavcodec/x86/dsputil_yasm.asm
View file @
e6e98234
...
...
@@ -27,6 +27,8 @@ pb_zzzzzzzz77777777: times 8 db -1
pb_7
:
times
8
db
7
pb_zzzz3333zzzzbbbb
:
db
-
1
,
-
1
,
-
1
,
-
1
,
3
,
3
,
3
,
3
,
-
1
,
-
1
,
-
1
,
-
1
,
11
,
11
,
11
,
11
pb_zz11zz55zz99zzdd
:
db
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
,
5
,
5
,
-
1
,
-
1
,
9
,
9
,
-
1
,
-
1
,
13
,
13
pb_revwords
:
db
14
,
15
,
12
,
13
,
10
,
11
,
8
,
9
,
6
,
7
,
4
,
5
,
2
,
3
,
0
,
1
pd_16384
:
times
4
dd
16384
section
.
text
align
=
16
...
...
@@ -202,6 +204,130 @@ SCALARPRODUCT_LOOP 0
RET
;-----------------------------------------------------------------------------
; void ff_apply_window_int16(int16_t *output, const int16_t *input,
; const int16_t *window, unsigned int len)
;-----------------------------------------------------------------------------
%macro
REVERSE_WORDS_MMXEXT
1
-
2
pshufw
%1
,
%1
,
0x1B
%endmacro
%macro
REVERSE_WORDS_SSE2
1
-
2
pshuflw
%1
,
%1
,
0x1B
pshufhw
%1
,
%1
,
0x1B
pshufd
%1
,
%1
,
0x4E
%endmacro
%macro
REVERSE_WORDS_SSSE3
2
pshufb
%1
,
%2
%endmacro
; dst = (dst * src) >> 15
; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back
; in from the pmullw result.
%macro
MUL16FIXED_MMXEXT
3
; dst, src, temp
mova
%3
,
%1
pmulhw
%1
,
%2
pmullw
%3
,
%2
psrlw
%3
,
15
psllw
%1
,
1
por
%1
,
%3
%endmacro
; dst = ((dst * src) + (1<<14)) >> 15
%macro
MUL16FIXED_SSSE3
3
; dst, src, unused
pmulhrsw
%1
,
%2
%endmacro
%macro
APPLY_WINDOW_INT16
3
; %1=instruction set, %2=mmxext/sse2 bit exact version, %3=has_ssse3
cglobal
apply_window_int16_
%1
,
4
,
5
,
6
,
output
,
input
,
window
,
offset
,
offset2
lea
offset2q
,
[
offsetq
-
mmsize
]
%if
%2
mova
m5
,
[
pd_16384
]
%elifidn
%1
,
ssse3
mova
m5
,
[
pb_revwords
]
ALIGN
16
%endif
.
loop
:
%if
%2
; This version expands 16-bit to 32-bit, multiplies by the window,
; adds 16384 for rounding, right shifts 15, then repacks back to words to
; save to the output. The window is reversed for the second half.
mova
m3
,
[
windowq
+
offset2q
]
mova
m4
,
[
inputq
+
offset2q
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offset2q
]
,
m0
REVERSE_WORDS
m3
mova
m4
,
[
inputq
+
offsetq
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offsetq
]
,
m0
%elif
%3
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The ssse3 version is bit-identical.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
pmulhrsw
m1
,
m0
REVERSE_WORDS
m0
,
m5
pmulhrsw
m0
,
[
inputq
+
offsetq
]
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m0
%else
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The mmxext and sse2 versions do not use rounding, and
; therefore are not bit-identical to the C version.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
mova
m2
,
[
inputq
+
offsetq
]
MUL16FIXED
m1
,
m0
,
m3
REVERSE_WORDS
m0
MUL16FIXED
m2
,
m0
,
m3
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m2
%endif
add
offsetd
,
mmsize
sub
offset2d
,
mmsize
jae
.
loop
REP_RET
%endmacro
INIT_MMX
%define
REVERSE_WORDS
REVERSE_WORDS_MMXEXT
%define
MUL16FIXED
MUL16FIXED_MMXEXT
APPLY_WINDOW_INT16
mmxext
,
0
,
0
APPLY_WINDOW_INT16
mmxext_ba
,
1
,
0
INIT_XMM
%define
REVERSE_WORDS
REVERSE_WORDS_SSE2
APPLY_WINDOW_INT16
sse2
,
0
,
0
APPLY_WINDOW_INT16
sse2_ba
,
1
,
0
APPLY_WINDOW_INT16
ssse3_atom
,
0
,
1
%define
REVERSE_WORDS
REVERSE_WORDS_SSSE3
APPLY_WINDOW_INT16
ssse3
,
0
,
1
; void add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
cglobal
add_hfyu_median_prediction_mmx2
,
6
,
6
,
0
,
dst
,
top
,
diff
,
w
,
left
,
left_top
...
...
tests/ref/acodec/ac3_fixed
View file @
e6e98234
b3a8f0a8809a58b2ece90744f06fff96
*./tests/data/acodec/ac3.rm
346073c97eada69330f61e103a170ca1
*./tests/data/acodec/ac3.rm
98751 ./tests/data/acodec/ac3.rm
tests/ref/lavf/rm
View file @
e6e98234
7
da378131db880bcf2e58305d54418ec
*./tests/data/lavf/lavf.rm
7
b7ede9548a09346675edad36acfbf19
*./tests/data/lavf/lavf.rm
346706 ./tests/data/lavf/lavf.rm
tests/ref/seek/ac3_rm
View file @
e6e98234
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret: 0 st:-1 flags:0 ts:-1.000000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret: 0 st:-1 flags:1 ts: 1.894167
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret: 0 st: 0 flags:0 ts: 0.788000
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916
ret:-1 st:-1 flags:1 ts: 1.894167
ret:-1 st: 0 flags:0 ts: 0.788000
ret: 0 st: 0 flags:1 ts:-0.317000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret: 0 st:-1 flags:0 ts: 2.576668
ret: 0 st: 0 flags:1 dts:524.800000 pts:524.800000 pos: 6155 size: 244
ret:-1 st:-1 flags:0 ts: 2.576668
ret:-1 st:-1 flags:1 ts: 1.470835
ret: 0 st: 0 flags:0 ts: 0.365000
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916
ret:-1 st: 0 flags:0 ts: 0.365000
ret: 0 st: 0 flags:1 ts:-0.741000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st:-1 flags:0 ts: 2.153336
ret: 0 st:-1 flags:1 ts: 1.047503
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st:-1 flags:1 ts: 1.047503
ret: 0 st: 0 flags:0 ts:-0.058000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret: 0 st: 0 flags:1 ts: 2.836000
ret: 0 st: 0 flags:1 dts: 2.681000 pts: 2.681000 pos: 44105 size: 558
ret:-1 st: 0 flags:1 ts: 2.836000
ret:-1 st:-1 flags:0 ts: 1.730004
ret: 0 st:-1 flags:1 ts: 0.624171
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st:-1 flags:1 ts: 0.624171
ret: 0 st: 0 flags:0 ts:-0.482000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st: 0 flags:1 ts: 2.413000
ret:-1 st:-1 flags:0 ts: 1.306672
ret: 0 st:-1 flags:1 ts: 0.200839
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st:-1 flags:1 ts: 0.200839
ret: 0 st: 0 flags:0 ts:-0.905000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st: 0 flags:1 ts: 1.989000
ret: 0 st:-1 flags:0 ts: 0.883340
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916
ret:-1 st:-1 flags:0 ts: 0.883340
ret: 0 st:-1 flags:1 ts:-0.222493
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st: 0 flags:0 ts: 2.672000
ret:-1 st: 0 flags:1 ts: 1.566000
ret: 0 st:-1 flags:0 ts: 0.460008
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916
ret:-1 st:-1 flags:0 ts: 0.460008
ret: 0 st:-1 flags:1 ts:-0.645825
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment