Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
9d06037d
Commit
9d06037d
authored
Oct 30, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
twinvq: add SSE/AVX optimized sum/difference stereo interleaving
parent
7b966566
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
101 additions
and
18 deletions
+101
-18
dsputil.c
libavcodec/dsputil.c
+13
-0
dsputil.h
libavcodec/dsputil.h
+17
-0
twinvq.c
libavcodec/twinvq.c
+16
-18
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+7
-0
dsputil_yasm.asm
libavcodec/x86/dsputil_yasm.asm
+48
-0
No files found.
libavcodec/dsputil.c
View file @
9d06037d
...
@@ -2509,6 +2509,18 @@ static void butterflies_float_c(float *restrict v1, float *restrict v2,
...
@@ -2509,6 +2509,18 @@ static void butterflies_float_c(float *restrict v1, float *restrict v2,
}
}
}
}
static
void
butterflies_float_interleave_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
)
{
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
{
float
f1
=
src0
[
i
];
float
f2
=
src1
[
i
];
dst
[
2
*
i
]
=
f1
+
f2
;
dst
[
2
*
i
+
1
]
=
f1
-
f2
;
}
}
static
float
scalarproduct_float_c
(
const
float
*
v1
,
const
float
*
v2
,
int
len
)
static
float
scalarproduct_float_c
(
const
float
*
v1
,
const
float
*
v2
,
int
len
)
{
{
float
p
=
0
.
0
;
float
p
=
0
.
0
;
...
@@ -3036,6 +3048,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
...
@@ -3036,6 +3048,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
vector_clip_int32
=
vector_clip_int32_c
;
c
->
vector_clip_int32
=
vector_clip_int32_c
;
c
->
scalarproduct_float
=
scalarproduct_float_c
;
c
->
scalarproduct_float
=
scalarproduct_float_c
;
c
->
butterflies_float
=
butterflies_float_c
;
c
->
butterflies_float
=
butterflies_float_c
;
c
->
butterflies_float_interleave
=
butterflies_float_interleave_c
;
c
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
c
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
c
->
vector_fmac_scalar
=
vector_fmac_scalar_c
;
c
->
vector_fmac_scalar
=
vector_fmac_scalar_c
;
...
...
libavcodec/dsputil.h
View file @
9d06037d
...
@@ -453,6 +453,23 @@ typedef struct DSPContext {
...
@@ -453,6 +453,23 @@ typedef struct DSPContext {
*/
*/
void
(
*
butterflies_float
)(
float
*
restrict
v1
,
float
*
restrict
v2
,
int
len
);
void
(
*
butterflies_float
)(
float
*
restrict
v1
,
float
*
restrict
v2
,
int
len
);
/**
* Calculate the sum and difference of two vectors of floats and interleave
* results into a separate output vector of floats, with each sum
* positioned before the corresponding difference.
*
* @param dst output vector
* constraints: 16-byte aligned
* @param src0 first input vector
* constraints: 32-byte aligned
* @param src1 second input vector
* constraints: 32-byte aligned
* @param len number of elements in the input
* constraints: multiple of 8
*/
void
(
*
butterflies_float_interleave
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
/* (I)DCT */
/* (I)DCT */
void
(
*
fdct
)(
DCTELEM
*
block
/* align 16*/
);
void
(
*
fdct
)(
DCTELEM
*
block
/* align 16*/
);
void
(
*
fdct248
)(
DCTELEM
*
block
/* align 16*/
);
void
(
*
fdct248
)(
DCTELEM
*
block
/* align 16*/
);
...
...
libavcodec/twinvq.c
View file @
9d06037d
...
@@ -665,8 +665,9 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype,
...
@@ -665,8 +665,9 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype,
float
*
out
)
float
*
out
)
{
{
const
ModeTab
*
mtab
=
tctx
->
mtab
;
const
ModeTab
*
mtab
=
tctx
->
mtab
;
int
size1
,
size2
;
float
*
prev_buf
=
tctx
->
prev_frame
+
tctx
->
last_block_pos
[
0
];
float
*
prev_buf
=
tctx
->
prev_frame
+
tctx
->
last_block_pos
[
0
];
int
i
,
j
;
int
i
;
for
(
i
=
0
;
i
<
tctx
->
avctx
->
channels
;
i
++
)
{
for
(
i
=
0
;
i
<
tctx
->
avctx
->
channels
;
i
++
)
{
imdct_and_window
(
tctx
,
ftype
,
wtype
,
imdct_and_window
(
tctx
,
ftype
,
wtype
,
...
@@ -675,27 +676,24 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype,
...
@@ -675,27 +676,24 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype,
i
);
i
);
}
}
size2
=
tctx
->
last_block_pos
[
0
];
size1
=
mtab
->
size
-
size2
;
if
(
tctx
->
avctx
->
channels
==
2
)
{
if
(
tctx
->
avctx
->
channels
==
2
)
{
for
(
i
=
0
;
i
<
mtab
->
size
-
tctx
->
last_block_pos
[
0
];
i
++
)
{
tctx
->
dsp
.
butterflies_float_interleave
(
out
,
prev_buf
,
float
f1
=
prev_buf
[
i
];
&
prev_buf
[
2
*
mtab
->
size
],
float
f2
=
prev_buf
[
2
*
mtab
->
size
+
i
];
size1
);
out
[
2
*
i
]
=
f1
+
f2
;
out
[
2
*
i
+
1
]
=
f1
-
f2
;
out
+=
2
*
size1
;
}
for
(
j
=
0
;
i
<
mtab
->
size
;
j
++
,
i
++
)
{
tctx
->
dsp
.
butterflies_float_interleave
(
out
,
tctx
->
curr_frame
,
float
f1
=
tctx
->
curr_frame
[
j
];
&
tctx
->
curr_frame
[
2
*
mtab
->
size
],
float
f2
=
tctx
->
curr_frame
[
2
*
mtab
->
size
+
j
];
size2
);
out
[
2
*
i
]
=
f1
+
f2
;
out
[
2
*
i
+
1
]
=
f1
-
f2
;
}
}
else
{
}
else
{
memcpy
(
out
,
prev_buf
,
memcpy
(
out
,
prev_buf
,
size1
*
sizeof
(
*
out
));
(
mtab
->
size
-
tctx
->
last_block_pos
[
0
])
*
sizeof
(
*
out
));
out
+=
mtab
->
size
-
tctx
->
last_block_pos
[
0
]
;
out
+=
size1
;
memcpy
(
out
,
tctx
->
curr_frame
,
memcpy
(
out
,
tctx
->
curr_frame
,
size2
*
sizeof
(
*
out
));
(
tctx
->
last_block_pos
[
0
])
*
sizeof
(
*
out
));
}
}
}
}
...
...
libavcodec/x86/dsputil_mmx.c
View file @
9d06037d
...
@@ -2424,6 +2424,11 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min
...
@@ -2424,6 +2424,11 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min
void
ff_vector_clip_int32_sse4
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
void
ff_vector_clip_int32_sse4
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
int32_t
max
,
unsigned
int
len
);
int32_t
max
,
unsigned
int
len
);
extern
void
ff_butterflies_float_interleave_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
extern
void
ff_butterflies_float_interleave_avx
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
void
dsputil_init_mmx
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
void
dsputil_init_mmx
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
{
{
int
mm_flags
=
av_get_cpu_flags
();
int
mm_flags
=
av_get_cpu_flags
();
...
@@ -2868,6 +2873,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
...
@@ -2868,6 +2873,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
vector_clipf
=
vector_clipf_sse
;
c
->
vector_clipf
=
vector_clipf_sse
;
#if HAVE_YASM
#if HAVE_YASM
c
->
scalarproduct_float
=
ff_scalarproduct_float_sse
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_sse
;
c
->
butterflies_float_interleave
=
ff_butterflies_float_interleave_sse
;
#endif
#endif
}
}
if
(
HAVE_AMD3DNOW
&&
(
mm_flags
&
AV_CPU_FLAG_3DNOW
))
if
(
HAVE_AMD3DNOW
&&
(
mm_flags
&
AV_CPU_FLAG_3DNOW
))
...
@@ -2925,6 +2931,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
...
@@ -2925,6 +2931,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
put_h264_chroma_pixels_tab
[
0
]
=
ff_put_h264_chroma_mc8_10_avx
;
c
->
put_h264_chroma_pixels_tab
[
0
]
=
ff_put_h264_chroma_mc8_10_avx
;
c
->
avg_h264_chroma_pixels_tab
[
0
]
=
ff_avg_h264_chroma_mc8_10_avx
;
c
->
avg_h264_chroma_pixels_tab
[
0
]
=
ff_avg_h264_chroma_mc8_10_avx
;
}
}
c
->
butterflies_float_interleave
=
ff_butterflies_float_interleave_avx
;
}
}
#endif
#endif
}
}
...
...
libavcodec/x86/dsputil_yasm.asm
View file @
9d06037d
...
@@ -1129,3 +1129,51 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
...
@@ -1129,3 +1129,51 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
%else
%else
VECTOR_CLIP_INT32
6
,
1
,
0
,
0
VECTOR_CLIP_INT32
6
,
1
,
0
,
0
%endif
%endif
;-----------------------------------------------------------------------------
; void ff_butterflies_float_interleave(float *dst, const float *src0,
; const float *src1, int len);
;-----------------------------------------------------------------------------
%macro
BUTTERFLIES_FLOAT_INTERLEAVE
0
cglobal
butterflies_float_interleave
,
4
,
4
,
3
,
dst
,
src0
,
src1
,
len
%ifdef
ARCH_X86_64
movsxd
lenq
,
lend
%endif
test
lenq
,
lenq
jz
.
end
shl
lenq
,
2
lea
src0q
,
[
src0q
+
lenq
]
lea
src1q
,
[
src1q
+
lenq
]
lea
dstq
,
[
dstq
+
2
*
lenq
]
neg
lenq
.
loop
:
mova
m0
,
[
src0q
+
lenq
]
mova
m1
,
[
src1q
+
lenq
]
subps
m2
,
m0
,
m1
addps
m0
,
m0
,
m1
unpcklps
m1
,
m0
,
m2
unpckhps
m0
,
m0
,
m2
%if
cpuflag
(
avx
)
vextractf128
[
dstq
+
2
*
lenq
]
,
m1
,
0
vextractf128
[
dstq
+
2
*
lenq
+
16
]
,
m0
,
0
vextractf128
[
dstq
+
2
*
lenq
+
32
]
,
m1
,
1
vextractf128
[
dstq
+
2
*
lenq
+
48
]
,
m0
,
1
%else
mova
[
dstq
+
2
*
lenq
]
,
m1
mova
[
dstq
+
2
*
lenq
+
mmsize
]
,
m0
%endif
add
lenq
,
mmsize
jl
.
loop
%if
mmsize
==
32
vzeroupper
RET
%endif
.
end
:
REP_RET
%endmacro
INIT_XMM
sse
BUTTERFLIES_FLOAT_INTERLEAVE
INIT_YMM
avx
BUTTERFLIES_FLOAT_INTERLEAVE
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment