Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
e9930883
Commit
e9930883
authored
Oct 22, 2017
by
Martin Vignali
Committed by
James Darnley
Oct 29, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
libavcodec/bswapdsp : add AVX2 func for bswap_buf (swap uint32_t)
parent
9b0510a8
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
13 deletions
+38
-13
bswapdsp.asm
libavcodec/x86/bswapdsp.asm
+35
-13
bswapdsp_init.c
libavcodec/x86/bswapdsp_init.c
+3
-0
No files found.
libavcodec/x86/bswapdsp.asm
View file @
e9930883
...
...
@@ -35,14 +35,18 @@ SECTION .text
mov
r3d
,
r2d
sar
r2d
,
3
jz
.
left4_
%1
%if
cpuflag
(
avx2
)
sar
r2d
,
1
jz
.
left8_
%1
%endif
.
loop8_
%1
:
mov%1
m0
,
[
r1
+
0
]
mov%1
m1
,
[
r1
+
16
]
%if
cpuflag
(
ssse3
)
mov%1
m1
,
[
r1
+
mmsize
]
%if
cpuflag
(
ssse3
)
||
cpuflag
(
avx2
)
pshufb
m0
,
m2
pshufb
m1
,
m2
mov%1
[
r0
+
0
]
,
m0
mov%1
[
r0
+
16
]
,
m1
mov%1
[
r0
+
mmsize
]
,
m1
%else
pshuflw
m0
,
m0
,
10110001
b
pshuflw
m1
,
m1
,
10110001
b
...
...
@@ -59,18 +63,29 @@ SECTION .text
mov%1
[
r0
+
0
]
,
m2
mov%1
[
r0
+
16
]
,
m3
%endif
add
r0
,
3
2
add
r1
,
3
2
add
r0
,
mmsize
*
2
add
r1
,
mmsize
*
2
dec
r2d
jnz
.
loop8_
%1
%if
cpuflag
(
avx2
)
.
left8_
%1
:
mov
r2d
,
r3d
test
r3d
,
8
jz
.
left4_
%1
mov%1
m0
,
[r1]
pshufb
m0
,
m2
mov%1
[
r0
+
0
]
,
m0
add
r1
,
mmsize
add
r0
,
mmsize
%endif
.
left4_
%1
:
mov
r2d
,
r3d
test
r3d
,
4
jz
.
left
mov%1
m0
,
[r1]
mov%1
x
m0
,
[r1]
%if
cpuflag
(
ssse3
)
pshufb
m0
,
m2
mov%1
[r0],
m0
pshufb
xm0
,
x
m2
mov%1
[r0],
x
m0
%else
pshuflw
m0
,
m0
,
10110001
b
pshufhw
m0
,
m0
,
10110001
b
...
...
@@ -86,16 +101,20 @@ SECTION .text
; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
%macro
BSWAP32_BUF
0
%if
cpuflag
(
ssse3
)
%if
cpuflag
(
ssse3
)
||
cpuflag
(
avx2
)
cglobal
bswap32_buf
,
3
,
4
,
3
mov
r3
,
r1
%if
cpuflag
(
avx2
)
vbroadcasti128
m2
,
[
pb_bswap32
]
%else
mova
m2
,
[
pb_bswap32
]
%endif
%else
cglobal
bswap32_buf
,
3
,
4
,
5
mov
r3
,
r1
%endif
or
r3
,
r0
test
r3
,
15
test
r3
,
mmsize
-
1
jz
.
start_align
BSWAP_LOOPS
u
jmp
.
left
...
...
@@ -105,9 +124,9 @@ cglobal bswap32_buf, 3,4,5
%if
cpuflag
(
ssse3
)
test
r2d
,
2
jz
.
left1
movq
m0
,
[r1]
pshufb
m0
,
m2
movq
[r0],
m0
movq
x
m0
,
[r1]
pshufb
xm0
,
x
m2
movq
[r0],
x
m0
add
r1
,
8
add
r0
,
8
.
left1
:
...
...
@@ -137,3 +156,6 @@ BSWAP32_BUF
INIT_XMM
ssse3
BSWAP32_BUF
INIT_YMM
avx2
BSWAP32_BUF
libavcodec/x86/bswapdsp_init.c
View file @
e9930883
...
...
@@ -25,6 +25,7 @@
void
ff_bswap32_buf_sse2
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
void
ff_bswap32_buf_ssse3
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
void
ff_bswap32_buf_avx2
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
av_cold
void
ff_bswapdsp_init_x86
(
BswapDSPContext
*
c
)
{
...
...
@@ -34,4 +35,6 @@ av_cold void ff_bswapdsp_init_x86(BswapDSPContext *c)
c
->
bswap_buf
=
ff_bswap32_buf_sse2
;
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
c
->
bswap_buf
=
ff_bswap32_buf_ssse3
;
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
c
->
bswap_buf
=
ff_bswap32_buf_avx2
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment