Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
942e22c6
Commit
942e22c6
authored
Jun 16, 2014
by
plepere
Committed by
Michael Niedermayer
Jun 25, 2014
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/x86/hevc: add avx2 dc idct
Signed-off-by:
Michael Niedermayer
<
michaelni@gmx.at
>
parent
a30f1b15
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
72 additions
and
3 deletions
+72
-3
hevc_idct.asm
libavcodec/x86/hevc_idct.asm
+48
-3
hevcdsp.h
libavcodec/x86/hevcdsp.h
+6
-0
hevcdsp_init.c
libavcodec/x86/hevcdsp_init.c
+18
-0
No files found.
libavcodec/x86/hevc_idct.asm
View file @
942e22c6
...
...
@@ -20,12 +20,12 @@
; */
%include
"libavutil/x86/x86util.asm"
SECTION_RODATA
max_pixels_10
:
times
8
dw
((
1
<<
10
)
-
1
)
SECTION_RODATA
32
max_pixels_10
:
times
16
dw
((
1
<<
10
)
-
1
)
dc_add_10
:
times
4
dd
((
1
<<
14
-
10
)
+
1
)
SECTION
.
text
SECTION
_TEXT
32
;the idct_dc_add macros and functions were largely inspired by x264 project's code in the h264_idct.asm file
...
...
@@ -41,6 +41,18 @@ SECTION .text
packuswb
m1
,
m1
%endmacro
%macro
DC_ADD_INIT_AVX2
2
add
%1
w
,
((
1
<<
14
-
8
)
+
1
)
sar
%1
w
,
(
15
-
8
)
movd
xm0
,
%1
d
vpbroadcastw
m0
,
xm0
;SPLATW
lea
%1
,
[
%2
*
3
]
pxor
m1
,
m1
psubw
m1
,
m0
packuswb
m0
,
m0
packuswb
m1
,
m1
%endmacro
%macro
DC_ADD_OP
4
%1
m2
,
[
%2
]
%1
m3
,
[
%2
+
%3
]
...
...
@@ -112,6 +124,19 @@ cglobal hevc_idct16_dc_add_8, 3, 4, 0
DC_ADD_OP
mova
,
r0
,
r2
,
r3
RET
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
; void ff_hevc_idct32_dc_add_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
cglobal
hevc_idct32_dc_add_8
,
3
,
4
,
6
movsx
r3
,
word
[r1]
DC_ADD_INIT_AVX2
r3
,
r2
DC_ADD_OP
mova
,
r0
,
r2
,
r3
,
%
rep
7
lea
r0
,
[
r0
+
r2
*
4
]
DC_ADD_OP
mova
,
r0
,
r2
,
r3
%endrep
RET
%endif
;HAVE_AVX2_EXTERNAL
;-----------------------------------------------------------------------------
; void ff_hevc_idct_dc_add_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------
...
...
@@ -178,3 +203,23 @@ IDCT8_DC_ADD
INIT_XMM
avx
IDCT8_DC_ADD
%endif
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
cglobal
hevc_idct16_dc_add_10
,
3
,
4
,
7
mov
r1w
,
[r1]
add
r1w
,
((
1
<<
4
)
+
1
)
sar
r1w
,
5
movd
xm0
,
r1d
lea
r1
,
[
r2
*
3
]
vpbroadcastw
m0
,
xm0
;SPLATW
mova
m6
,
[
max_pixels_10
]
IDCT_DC_ADD_OP_10
r0
,
r2
,
r1
lea
r0
,
[
r0
+
r2
*
4
]
IDCT_DC_ADD_OP_10
r0
,
r2
,
r1
lea
r0
,
[
r0
+
r2
*
4
]
IDCT_DC_ADD_OP_10
r0
,
r2
,
r1
lea
r0
,
[
r0
+
r2
*
4
]
IDCT_DC_ADD_OP_10
r0
,
r2
,
r1
RET
%endif
;HAVE_AVX_EXTERNAL
libavcodec/x86/hevcdsp.h
View file @
942e22c6
...
...
@@ -133,6 +133,8 @@ idct_dc_proto(8, 8,mmxext);
idct_dc_proto
(
16
,
8
,
sse2
);
idct_dc_proto
(
32
,
8
,
sse2
);
idct_dc_proto
(
32
,
8
,
avx2
);
idct_dc_proto
(
4
,
10
,
mmxext
);
idct_dc_proto
(
8
,
10
,
sse2
);
...
...
@@ -142,6 +144,10 @@ idct_dc_proto(8, 10, avx);
idct_dc_proto
(
16
,
10
,
avx
);
idct_dc_proto
(
32
,
10
,
avx
);
idct_dc_proto
(
16
,
10
,
avx2
);
idct_dc_proto
(
32
,
10
,
avx2
);
...
...
libavcodec/x86/hevcdsp_init.c
View file @
942e22c6
...
...
@@ -92,6 +92,17 @@ void ff_hevc_idct32_dc_add_10_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t strid
}
#endif //HAVE_AVX_EXTERNAL
#if HAVE_AVX2_EXTERNAL
void
ff_hevc_idct32_dc_add_10_avx2
(
uint8_t
*
dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
)
{
ff_hevc_idct16_dc_add_10_avx2
(
dst
,
coeffs
,
stride
);
ff_hevc_idct16_dc_add_10_avx2
(
dst
+
32
,
coeffs
,
stride
);
ff_hevc_idct16_dc_add_10_avx2
(
dst
+
16
*
stride
,
coeffs
,
stride
);
ff_hevc_idct16_dc_add_10_avx2
(
dst
+
16
*
stride
+
32
,
coeffs
,
stride
);
}
#endif //HAVE_AVX2_EXTERNAL
#define mc_rep_func(name, bitd, step, W, opt) \
void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, ptrdiff_t dststride, \
uint8_t *_src, ptrdiff_t _srcstride, int height, \
...
...
@@ -438,6 +449,9 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
QPEL_LINKS
(
c
->
put_hevc_qpel
,
1
,
0
,
qpel_v
,
8
,
sse4
);
QPEL_LINKS
(
c
->
put_hevc_qpel
,
1
,
1
,
qpel_hv
,
8
,
sse4
);
}
if
(
EXTERNAL_AVX2
(
mm_flags
))
{
c
->
transform_dc_add
[
3
]
=
ff_hevc_idct32_dc_add_8_avx2
;
}
}
else
if
(
bit_depth
==
10
)
{
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
c
->
transform_dc_add
[
0
]
=
ff_hevc_idct4_dc_add_10_mmxext
;
...
...
@@ -473,6 +487,10 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c
->
transform_dc_add
[
2
]
=
ff_hevc_idct16_dc_add_10_avx
;
c
->
transform_dc_add
[
3
]
=
ff_hevc_idct32_dc_add_10_avx
;
}
if
(
EXTERNAL_AVX2
(
mm_flags
))
{
c
->
transform_dc_add
[
2
]
=
ff_hevc_idct16_dc_add_10_avx2
;
c
->
transform_dc_add
[
3
]
=
ff_hevc_idct32_dc_add_10_avx2
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment