Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
07a566e7
Commit
07a566e7
authored
Apr 22, 2018
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale/swscale_unscaled : add X86_64 (SSE2 and AVX) for uyvyto422
and checkasm test
parent
e6e46258
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
219 additions
and
0 deletions
+219
-0
rgb2rgb.c
libswscale/x86/rgb2rgb.c
+19
-0
rgb_2_rgb.asm
libswscale/x86/rgb_2_rgb.asm
+150
-0
sw_rgb.c
tests/checkasm/sw_rgb.c
+50
-0
No files found.
libswscale/x86/rgb2rgb.c
View file @
07a566e7
...
...
@@ -150,6 +150,15 @@ void ff_shuffle_bytes_1230_ssse3(const uint8_t *src, uint8_t *dst, int src_size)
void
ff_shuffle_bytes_3012_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
void
ff_shuffle_bytes_3210_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
#if ARCH_X86_64
void
ff_uyvytoyuv422_sse2
(
uint8_t
*
ydst
,
uint8_t
*
udst
,
uint8_t
*
vdst
,
const
uint8_t
*
src
,
int
width
,
int
height
,
int
lumStride
,
int
chromStride
,
int
srcStride
);
void
ff_uyvytoyuv422_avx
(
uint8_t
*
ydst
,
uint8_t
*
udst
,
uint8_t
*
vdst
,
const
uint8_t
*
src
,
int
width
,
int
height
,
int
lumStride
,
int
chromStride
,
int
srcStride
);
#endif
av_cold
void
rgb2rgb_init_x86
(
void
)
{
int
cpu_flags
=
av_get_cpu_flags
();
...
...
@@ -167,6 +176,11 @@ av_cold void rgb2rgb_init_x86(void)
rgb2rgb_init_avx
();
#endif
/* HAVE_INLINE_ASM */
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
#if ARCH_X86_64
uyvytoyuv422
=
ff_uyvytoyuv422_sse2
;
#endif
}
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
shuffle_bytes_0321
=
ff_shuffle_bytes_0321_ssse3
;
shuffle_bytes_2103
=
ff_shuffle_bytes_2103_ssse3
;
...
...
@@ -174,4 +188,9 @@ av_cold void rgb2rgb_init_x86(void)
shuffle_bytes_3012
=
ff_shuffle_bytes_3012_ssse3
;
shuffle_bytes_3210
=
ff_shuffle_bytes_3210_ssse3
;
}
if
(
EXTERNAL_AVX
(
cpu_flags
))
{
#if ARCH_X86_64
uyvytoyuv422
=
ff_uyvytoyuv422_avx
;
#endif
}
}
libswscale/x86/rgb_2_rgb.asm
View file @
07a566e7
...
...
@@ -32,6 +32,16 @@ pb_shuffle3210: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
SECTION
.
text
%macro
RSHIFT_COPY
3
; %1 dst ; %2 src ; %3 shift
%if
cpuflag
(
avx
)
psrldq
%1
,
%2
,
%3
%else
mova
%1
,
%2
RSHIFT
%1
,
%3
%endif
%endmacro
;------------------------------------------------------------------------------
; shuffle_bytes_## (const uint8_t *src, uint8_t *dst, int src_size)
;------------------------------------------------------------------------------
...
...
@@ -84,3 +94,143 @@ SHUFFLE_BYTES 0, 3, 2, 1
SHUFFLE_BYTES
1
,
2
,
3
,
0
SHUFFLE_BYTES
3
,
0
,
1
,
2
SHUFFLE_BYTES
3
,
2
,
1
,
0
;-----------------------------------------------------------------------------------------------
; uyvytoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
; const uint8_t *src, int width, int height,
; int lumStride, int chromStride, int srcStride)
;-----------------------------------------------------------------------------------------------
%macro
UYVY_TO_YUV422
0
cglobal
uyvytoyuv422
,
9
,
14
,
8
,
ydst
,
udst
,
vdst
,
src
,
w
,
h
,
lum_stride
,
chrom_stride
,
src_stride
,
wtwo
,
whalf
,
tmp
,
x
,
back_w
pxor
m0
,
m0
pcmpeqw
m1
,
m1
psrlw
m1
,
8
movsxdifnidn
wq
,
wd
movsxdifnidn
lum_strideq
,
lum_strided
movsxdifnidn
chrom_strideq
,
chrom_strided
movsxdifnidn
src_strideq
,
src_strided
mov
back_wq
,
wq
mov
whalfq
,
wq
shr
whalfq
,
1
; whalf = width / 2
lea
srcq
,
[
srcq
+
wq
*
2
]
add
ydstq
,
wq
add
udstq
,
whalfq
add
vdstq
,
whalfq
.
loop_line
:
mov
xq
,
wq
mov
wtwoq
,
wq
add
wtwoq
,
wtwoq
; wtwo = width * 2
neg
wq
neg
wtwoq
neg
whalfq
;calc scalar loop count
and
xq
,
mmsize
*
2
-
1
je
.
loop_simd
.
loop_scalar
:
mov
tmpb
,
[
srcq
+
wtwoq
+
0
]
mov
[
udstq
+
whalfq
]
,
tmpb
mov
tmpb
,
[
srcq
+
wtwoq
+
1
]
mov
[
ydstq
+
wq
]
,
tmpb
mov
tmpb
,
[
srcq
+
wtwoq
+
2
]
mov
[
vdstq
+
whalfq
]
,
tmpb
mov
tmpb
,
[
srcq
+
wtwoq
+
3
]
mov
[
ydstq
+
wq
+
1
]
,
tmpb
add
wq
,
2
add
wtwoq
,
4
add
whalfq
,
1
sub
xq
,
2
jg
.
loop_scalar
; check if simd loop is need
cmp
wq
,
0
jge
.
end_line
.
loop_simd
:
movu
m2
,
[
srcq
+
wtwoq
]
movu
m3
,
[
srcq
+
wtwoq
+
mmsize
]
movu
m4
,
[
srcq
+
wtwoq
+
mmsize
*
2
]
movu
m5
,
[
srcq
+
wtwoq
+
mmsize
*
3
]
; extract y part 1
RSHIFT_COPY
m6
,
m2
,
1
; UYVY UYVY -> YVYU YVY...
pand
m6
,
m1
; YxYx YxYx...
RSHIFT_COPY
m7
,
m3
,
1
; UYVY UYVY -> YVYU YVY...
pand
m7
,
m1
; YxYx YxYx...
packuswb
m6
,
m7
; YYYY YYYY...
movu
[
ydstq
+
wq
]
,
m6
; extract y part 2
RSHIFT_COPY
m6
,
m4
,
1
; UYVY UYVY -> YVYU YVY...
pand
m6
,
m1
; YxYx YxYx...
RSHIFT_COPY
m7
,
m5
,
1
; UYVY UYVY -> YVYU YVY...
pand
m7
,
m1
; YxYx YxYx...
packuswb
m6
,
m7
; YYYY YYYY...
movu
[
ydstq
+
wq
+
mmsize
]
,
m6
; extract uv
pand
m2
,
m1
; UxVx...
pand
m3
,
m1
; UxVx...
pand
m4
,
m1
; UxVx...
pand
m5
,
m1
; UxVx...
packuswb
m2
,
m3
; UVUV...
packuswb
m4
,
m5
; UVUV...
; U
pand
m6
,
m2
,
m1
; UxUx...
pand
m7
,
m4
,
m1
; UxUx...
packuswb
m6
,
m7
; UUUU
movu
[
udstq
+
whalfq
]
,
m6
; V
psrlw
m2
,
8
; VxVx...
psrlw
m4
,
8
; VxVx...
packuswb
m2
,
m4
; VVVV
movu
[
vdstq
+
whalfq
]
,
m2
add
whalfq
,
mmsize
add
wtwoq
,
mmsize
*
4
add
wq
,
mmsize
*
2
jl
.
loop_simd
.
end_line
:
add
srcq
,
src_strideq
add
ydstq
,
lum_strideq
add
udstq
,
chrom_strideq
add
vdstq
,
chrom_strideq
;restore initial state of line variable
mov
wq
,
back_wq
mov
xq
,
wq
mov
whalfq
,
wq
shr
whalfq
,
1
; whalf = width / 2
sub
hd
,
1
jg
.
loop_line
RET
%endmacro
%if
ARCH_X86_64
INIT_XMM
sse2
UYVY_TO_YUV422
INIT_XMM
avx
UYVY_TO_YUV422
%endif
tests/checkasm/sw_rgb.c
View file @
07a566e7
...
...
@@ -35,8 +35,12 @@
} while (0)
static
const
uint8_t
width
[]
=
{
12
,
16
,
20
,
32
,
36
,
128
};
static
const
struct
{
uint8_t
w
,
h
,
s
;}
planes
[]
=
{
{
12
,
16
,
12
},
{
16
,
16
,
16
},
{
20
,
23
,
25
},
{
32
,
18
,
48
},
{
8
,
128
,
16
},
{
128
,
128
,
128
}
};
#define MAX_STRIDE 128
#define MAX_HEIGHT 128
static
void
check_shuffle_bytes
(
void
*
func
,
const
char
*
report
)
{
...
...
@@ -64,6 +68,49 @@ static void check_shuffle_bytes(void * func, const char * report)
}
}
static
void
check_uyvy_to_422p
()
{
int
i
;
LOCAL_ALIGNED_32
(
uint8_t
,
src0
,
[
MAX_STRIDE
*
MAX_HEIGHT
*
2
]);
LOCAL_ALIGNED_32
(
uint8_t
,
src1
,
[
MAX_STRIDE
*
MAX_HEIGHT
*
2
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_y_0
,
[
MAX_STRIDE
*
MAX_HEIGHT
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_y_1
,
[
MAX_STRIDE
*
MAX_HEIGHT
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_u_0
,
[(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_u_1
,
[(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_v_0
,
[(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_v_1
,
[(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
]);
declare_func_emms
(
AV_CPU_FLAG_MMX
,
void
,
uint8_t
*
ydst
,
uint8_t
*
udst
,
uint8_t
*
vdst
,
const
uint8_t
*
src
,
int
width
,
int
height
,
int
lumStride
,
int
chromStride
,
int
srcStride
);
randomize_buffers
(
src0
,
MAX_STRIDE
*
MAX_HEIGHT
*
2
);
memcpy
(
src1
,
src0
,
MAX_STRIDE
*
MAX_HEIGHT
*
2
);
if
(
check_func
(
uyvytoyuv422
,
"uyvytoyuv422"
))
{
for
(
i
=
0
;
i
<
6
;
i
++
)
{
memset
(
dst_y_0
,
0
,
MAX_STRIDE
*
MAX_HEIGHT
);
memset
(
dst_y_1
,
0
,
MAX_STRIDE
*
MAX_HEIGHT
);
memset
(
dst_u_0
,
0
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
);
memset
(
dst_u_1
,
0
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
);
memset
(
dst_v_0
,
0
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
);
memset
(
dst_v_1
,
0
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
);
call_ref
(
dst_y_0
,
dst_u_0
,
dst_v_0
,
src0
,
planes
[
i
].
w
,
planes
[
i
].
h
,
MAX_STRIDE
,
MAX_STRIDE
/
2
,
planes
[
i
].
s
);
call_new
(
dst_y_1
,
dst_u_1
,
dst_v_1
,
src1
,
planes
[
i
].
w
,
planes
[
i
].
h
,
MAX_STRIDE
,
MAX_STRIDE
/
2
,
planes
[
i
].
s
);
if
(
memcmp
(
dst_y_0
,
dst_y_1
,
MAX_STRIDE
*
MAX_HEIGHT
)
||
memcmp
(
dst_u_0
,
dst_u_1
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
)
||
memcmp
(
dst_v_0
,
dst_v_1
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
))
fail
();
}
bench_new
(
dst_y_1
,
dst_u_1
,
dst_v_1
,
src1
,
planes
[
5
].
w
,
planes
[
5
].
h
,
MAX_STRIDE
,
MAX_STRIDE
/
2
,
planes
[
5
].
s
);
}
}
void
checkasm_check_sw_rgb
(
void
)
{
ff_sws_rgb2rgb_init
();
...
...
@@ -82,4 +129,7 @@ void checkasm_check_sw_rgb(void)
check_shuffle_bytes
(
shuffle_bytes_3210
,
"shuffle_bytes_3210"
);
report
(
"shuffle_bytes_3210"
);
check_uyvy_to_422p
();
report
(
"uyvytoyuv422"
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment