Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
4e8e2624
Commit
4e8e2624
authored
Oct 10, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fmtconvert: port int32_to_float_fmul_scalar() x86 inline asm to yasm
parent
185142a5
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
65 additions
and
60 deletions
+65
-60
dsputil_yasm.asm
libavcodec/x86/dsputil_yasm.asm
+0
-8
fmtconvert.asm
libavcodec/x86/fmtconvert.asm
+46
-0
fmtconvert_mmx.c
libavcodec/x86/fmtconvert_mmx.c
+7
-52
x86util.asm
libavutil/x86/x86util.asm
+12
-0
No files found.
libavcodec/x86/dsputil_yasm.asm
View file @
4e8e2624
...
@@ -1055,14 +1055,6 @@ emu_edge mmx
...
@@ -1055,14 +1055,6 @@ emu_edge mmx
; int32_t max, unsigned int len)
; int32_t max, unsigned int len)
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
%macro
SPLATD_MMX
1
punpckldq
%1
,
%1
%endmacro
%macro
SPLATD_SSE2
1
pshufd
%1
,
%1
,
0
%endmacro
%macro
VECTOR_CLIP_INT32
4
%macro
VECTOR_CLIP_INT32
4
cglobal
vector_clip_int32_
%1
,
5
,
5
,
%2
,
dst
,
src
,
min
,
max
,
len
cglobal
vector_clip_int32_
%1
,
5
,
5
,
%2
,
dst
,
src
,
min
,
max
,
len
%ifidn
%1
,
sse2
%ifidn
%1
,
sse2
...
...
libavcodec/x86/fmtconvert.asm
View file @
4e8e2624
...
@@ -24,6 +24,52 @@
...
@@ -24,6 +24,52 @@
SECTION_TEXT
SECTION_TEXT
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
;---------------------------------------------------------------------------------
%macro
INT32_TO_FLOAT_FMUL_SCALAR
2
%ifdef
ARCH_X86_64
cglobal
int32_to_float_fmul_scalar_
%1
,
3
,
3
,
%2
,
dst
,
src
,
len
%else
cglobal
int32_to_float_fmul_scalar_
%1
,
4
,
4
,
%2
,
dst
,
src
,
mul
,
len
movss
m0
,
mulm
%endif
SPLATD
m0
shl
lenq
,
2
add
srcq
,
lenq
add
dstq
,
lenq
neg
lenq
.
loop
:
%ifidn
%1
,
sse2
cvtdq2ps
m1
,
[
srcq
+
lenq
]
cvtdq2ps
m2
,
[
srcq
+
lenq
+
16
]
%else
cvtpi2ps
m1
,
[
srcq
+
lenq
]
cvtpi2ps
m3
,
[
srcq
+
lenq
+
8
]
cvtpi2ps
m2
,
[
srcq
+
lenq
+
16
]
cvtpi2ps
m4
,
[
srcq
+
lenq
+
24
]
movlhps
m1
,
m3
movlhps
m2
,
m4
%endif
mulps
m1
,
m0
mulps
m2
,
m0
mova
[
dstq
+
lenq
]
,
m1
mova
[
dstq
+
lenq
+
16
]
,
m2
add
lenq
,
32
jl
.
loop
REP_RET
%endmacro
INIT_XMM
%define
SPLATD
SPLATD_SSE
%define
movdqa
movaps
INT32_TO_FLOAT_FMUL_SCALAR
sse
,
5
%undef
movdqa
%define
SPLATD
SPLATD_SSE2
INT32_TO_FLOAT_FMUL_SCALAR
sse2
,
3
%undef
SPLATD
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
...
...
libavcodec/x86/fmtconvert_mmx.c
View file @
4e8e2624
...
@@ -26,52 +26,11 @@
...
@@ -26,52 +26,11 @@
#include "libavutil/x86_cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/fmtconvert.h"
#include "libavcodec/fmtconvert.h"
static
void
int32_to_float_fmul_scalar_sse
(
float
*
dst
,
const
int
*
src
,
float
mul
,
int
len
)
{
x86_reg
i
=
-
4
*
len
;
__asm__
volatile
(
"movss %3, %%xmm4
\n
"
"shufps $0, %%xmm4, %%xmm4
\n
"
"1:
\n
"
"cvtpi2ps (%2,%0), %%xmm0
\n
"
"cvtpi2ps 8(%2,%0), %%xmm1
\n
"
"cvtpi2ps 16(%2,%0), %%xmm2
\n
"
"cvtpi2ps 24(%2,%0), %%xmm3
\n
"
"movlhps %%xmm1, %%xmm0
\n
"
"movlhps %%xmm3, %%xmm2
\n
"
"mulps %%xmm4, %%xmm0
\n
"
"mulps %%xmm4, %%xmm2
\n
"
"movaps %%xmm0, (%1,%0)
\n
"
"movaps %%xmm2, 16(%1,%0)
\n
"
"add $32, %0
\n
"
"jl 1b
\n
"
:
"+r"
(
i
)
:
"r"
(
dst
+
len
),
"r"
(
src
+
len
),
"m"
(
mul
)
);
}
static
void
int32_to_float_fmul_scalar_sse2
(
float
*
dst
,
const
int
*
src
,
float
mul
,
int
len
)
{
x86_reg
i
=
-
4
*
len
;
__asm__
volatile
(
"movss %3, %%xmm4
\n
"
"shufps $0, %%xmm4, %%xmm4
\n
"
"1:
\n
"
"cvtdq2ps (%2,%0), %%xmm0
\n
"
"cvtdq2ps 16(%2,%0), %%xmm1
\n
"
"mulps %%xmm4, %%xmm0
\n
"
"mulps %%xmm4, %%xmm1
\n
"
"movaps %%xmm0, (%1,%0)
\n
"
"movaps %%xmm1, 16(%1,%0)
\n
"
"add $32, %0
\n
"
"jl 1b
\n
"
:
"+r"
(
i
)
:
"r"
(
dst
+
len
),
"r"
(
src
+
len
),
"m"
(
mul
)
);
}
#if HAVE_YASM
#if HAVE_YASM
void
ff_int32_to_float_fmul_scalar_sse
(
float
*
dst
,
const
int
*
src
,
float
mul
,
int
len
);
void
ff_int32_to_float_fmul_scalar_sse2
(
float
*
dst
,
const
int
*
src
,
float
mul
,
int
len
);
void
ff_float_to_int16_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
...
@@ -204,8 +163,8 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
...
@@ -204,8 +163,8 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
{
{
int
mm_flags
=
av_get_cpu_flags
();
int
mm_flags
=
av_get_cpu_flags
();
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
#if HAVE_YASM
#if HAVE_YASM
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
c
->
float_interleave
=
float_interleave_mmx
;
c
->
float_interleave
=
float_interleave_mmx
;
if
(
HAVE_AMD3DNOW
&&
mm_flags
&
AV_CPU_FLAG_3DNOW
)
{
if
(
HAVE_AMD3DNOW
&&
mm_flags
&
AV_CPU_FLAG_3DNOW
)
{
...
@@ -219,21 +178,17 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
...
@@ -219,21 +178,17 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dn2
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dn2
;
}
}
}
}
#endif
if
(
HAVE_SSE
&&
mm_flags
&
AV_CPU_FLAG_SSE
)
{
if
(
HAVE_SSE
&&
mm_flags
&
AV_CPU_FLAG_SSE
)
{
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_sse
;
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_sse
;
#if HAVE_YASM
c
->
float_to_int16
=
ff_float_to_int16_sse
;
c
->
float_to_int16
=
ff_float_to_int16_sse
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse
;
c
->
float_interleave
=
float_interleave_sse
;
c
->
float_interleave
=
float_interleave_sse
;
#endif
}
}
if
(
HAVE_SSE
&&
mm_flags
&
AV_CPU_FLAG_SSE2
)
{
if
(
HAVE_SSE
&&
mm_flags
&
AV_CPU_FLAG_SSE2
)
{
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_sse2
;
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_sse2
;
#if HAVE_YASM
c
->
float_to_int16
=
ff_float_to_int16_sse2
;
c
->
float_to_int16
=
ff_float_to_int16_sse2
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse2
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse2
;
#endif
}
}
}
}
#endif
}
}
libavutil/x86/x86util.asm
View file @
4e8e2624
...
@@ -536,6 +536,18 @@
...
@@ -536,6 +536,18 @@
%endif
%endif
%endmacro
%endmacro
%macro
SPLATD_MMX
1
punpckldq
%1
,
%1
%endmacro
%macro
SPLATD_SSE
1
shufps
%1
,
%1
,
0
%endmacro
%macro
SPLATD_SSE2
1
pshufd
%1
,
%1
,
0
%endmacro
%macro
CLIPW
3
;(dst, min, max)
%macro
CLIPW
3
;(dst, min, max)
pmaxsw
%1
,
%2
pmaxsw
%1
,
%2
pminsw
%1
,
%3
pminsw
%1
,
%3
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment