Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
f5c05b9a
Commit
f5c05b9a
authored
Dec 05, 2011
by
Janne Grunau
Committed by
Mans Rullgard
Dec 06, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
rv40: NEON optimised chroma MC
Signed-off-by:
Mans Rullgard
<
mans@mansr.com
>
parent
f054a827
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
118 additions
and
5 deletions
+118
-5
Makefile
libavcodec/arm/Makefile
+2
-0
h264cmc_neon.S
libavcodec/arm/h264cmc_neon.S
+75
-5
rv40dsp_init_neon.c
libavcodec/arm/rv40dsp_init_neon.c
+38
-0
rv34dsp.h
libavcodec/rv34dsp.h
+1
-0
rv40dsp.c
libavcodec/rv40dsp.c
+2
-0
No files found.
libavcodec/arm/Makefile
View file @
f5c05b9a
...
@@ -68,6 +68,8 @@ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_neon.o \
...
@@ -68,6 +68,8 @@ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_neon.o \
NEON-OBJS-$(CONFIG_RV40_DECODER)
+=
arm/rv34dsp_init_neon.o
\
NEON-OBJS-$(CONFIG_RV40_DECODER)
+=
arm/rv34dsp_init_neon.o
\
arm/rv34dsp_neon.o
\
arm/rv34dsp_neon.o
\
arm/rv40dsp_init_neon.o
\
arm/h264cmc_neon.o
\
NEON-OBJS-$(CONFIG_VP3_DECODER)
+=
arm/vp3dsp_neon.o
NEON-OBJS-$(CONFIG_VP3_DECODER)
+=
arm/vp3dsp_neon.o
...
...
libavcodec/arm/h264cmc_neon.S
View file @
f5c05b9a
...
@@ -21,8 +21,8 @@
...
@@ -21,8 +21,8 @@
#include "asm.S"
#include "asm.S"
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
.macro h264_chroma_mc8 type
.macro h264_chroma_mc8 type
, codec=h264
function ff_\type\()_
h264
_chroma_mc8_neon, export=1
function ff_\type\()_
\codec\()
_chroma_mc8_neon, export=1
push {r4-r7, lr}
push {r4-r7, lr}
ldrd r4, [sp, #20]
ldrd r4, [sp, #20]
.ifc \type,avg
.ifc \type,avg
...
@@ -31,6 +31,15 @@ function ff_\type\()_h264_chroma_mc8_neon, export=1
...
@@ -31,6 +31,15 @@ function ff_\type\()_h264_chroma_mc8_neon, export=1
pld [r1]
pld [r1]
pld [r1, r2]
pld [r1, r2]
.ifc \codec,rv40
movrel r6, rv40bias
lsr r7, r5, #1
add r6, r6, r7, lsl #3
lsr r7, r4, #1
add r6, r6, r7, lsl #1
vld1.16 {d22[],d23[]}, [r6,:16]
.endif
A muls r7, r4, r5
A muls r7, r4, r5
T mul r7, r4, r5
T mul r7, r4, r5
T cmp r7, #0
T cmp r7, #0
...
@@ -67,10 +76,17 @@ T cmp r7, #0
...
@@ -67,10 +76,17 @@ T cmp r7, #0
vmlal.u8 q9, d7, d1
vmlal.u8 q9, d7, d1
vmlal.u8 q9, d4, d2
vmlal.u8 q9, d4, d2
vmlal.u8 q9, d5, d3
vmlal.u8 q9, d5, d3
vrshrn.u16 d16, q8, #6
vld1.8 {d6, d7}, [r5], r4
vld1.8 {d6, d7}, [r5], r4
pld [r1]
pld [r1]
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d17, q9, #6
vrshrn.u16 d17, q9, #6
.else
vadd.u16 q8, q8, q11
vadd.u16 q9, q9, q11
vshrn.u16 d16, q8, #6
vshrn.u16 d17, q9, #6
.endif
.ifc \type,avg
.ifc \type,avg
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
...
@@ -102,8 +118,15 @@ T cmp r7, #0
...
@@ -102,8 +118,15 @@ T cmp r7, #0
vmull.u8 q9, d6, d0
vmull.u8 q9, d6, d0
vmlal.u8 q9, d4, d1
vmlal.u8 q9, d4, d1
vld1.8 {d6}, [r5], r4
vld1.8 {d6}, [r5], r4
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d16, q8, #6
vrshrn.u16 d17, q9, #6
vrshrn.u16 d17, q9, #6
.else
vadd.u16 q8, q8, q11
vadd.u16 q9, q9, q11
vshrn.u16 d16, q8, #6
vshrn.u16 d17, q9, #6
.endif
.ifc \type,avg
.ifc \type,avg
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
...
@@ -131,8 +154,15 @@ T cmp r7, #0
...
@@ -131,8 +154,15 @@ T cmp r7, #0
vmlal.u8 q9, d7, d1
vmlal.u8 q9, d7, d1
pld [r1]
pld [r1]
vext.8 d5, d4, d5, #1
vext.8 d5, d4, d5, #1
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d16, q8, #6
vrshrn.u16 d17, q9, #6
vrshrn.u16 d17, q9, #6
.else
vadd.u16 q8, q8, q11
vadd.u16 q9, q9, q11
vshrn.u16 d16, q8, #6
vshrn.u16 d17, q9, #6
.endif
.ifc \type,avg
.ifc \type,avg
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
...
@@ -149,8 +179,8 @@ endfunc
...
@@ -149,8 +179,8 @@ endfunc
.endm
.endm
/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
.macro h264_chroma_mc4 type
.macro h264_chroma_mc4 type
, codec=h264
function ff_\type\()_
h264
_chroma_mc4_neon, export=1
function ff_\type\()_
\codec\()
_chroma_mc4_neon, export=1
push {r4-r7, lr}
push {r4-r7, lr}
ldrd r4, [sp, #20]
ldrd r4, [sp, #20]
.ifc \type,avg
.ifc \type,avg
...
@@ -159,6 +189,15 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
...
@@ -159,6 +189,15 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
pld [r1]
pld [r1]
pld [r1, r2]
pld [r1, r2]
.ifc \codec,rv40
movrel r6, rv40bias
lsr r7, r5, #1
add r6, r6, r7, lsl #3
lsr r7, r4, #1
add r6, r6, r7, lsl #1
vld1.16 {d22[],d23[]}, [r6,:16]
.endif
A muls r7, r4, r5
A muls r7, r4, r5
T mul r7, r4, r5
T mul r7, r4, r5
T cmp r7, #0
T cmp r7, #0
...
@@ -199,7 +238,12 @@ T cmp r7, #0
...
@@ -199,7 +238,12 @@ T cmp r7, #0
vld1.8 {d6}, [r5], r4
vld1.8 {d6}, [r5], r4
vadd.i16 d16, d16, d17
vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19
vadd.i16 d17, d18, d19
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d16, q8, #6
.else
vadd.u16 q8, q8, q11
vshrn.u16 d16, q8, #6
.endif
subs r3, r3, #2
subs r3, r3, #2
pld [r1]
pld [r1]
.ifc \type,avg
.ifc \type,avg
...
@@ -236,7 +280,12 @@ T cmp r7, #0
...
@@ -236,7 +280,12 @@ T cmp r7, #0
vld1.32 {d4[1]}, [r5], r4
vld1.32 {d4[1]}, [r5], r4
vadd.i16 d16, d16, d17
vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19
vadd.i16 d17, d18, d19
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d16, q8, #6
.else
vadd.u16 q8, q8, q11
vshrn.u16 d16, q8, #6
.endif
.ifc \type,avg
.ifc \type,avg
vld1.32 {d20[0]}, [lr,:32], r2
vld1.32 {d20[0]}, [lr,:32], r2
vld1.32 {d20[1]}, [lr,:32], r2
vld1.32 {d20[1]}, [lr,:32], r2
...
@@ -266,7 +315,12 @@ T cmp r7, #0
...
@@ -266,7 +315,12 @@ T cmp r7, #0
vadd.i16 d16, d16, d17
vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19
vadd.i16 d17, d18, d19
pld [r1]
pld [r1]
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d16, q8, #6
.else
vadd.u16 q8, q8, q11
vshrn.u16 d16, q8, #6
.endif
.ifc \type,avg
.ifc \type,avg
vld1.32 {d20[0]}, [lr,:32], r2
vld1.32 {d20[0]}, [lr,:32], r2
vld1.32 {d20[1]}, [lr,:32], r2
vld1.32 {d20[1]}, [lr,:32], r2
...
@@ -352,9 +406,25 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
...
@@ -352,9 +406,25 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
endfunc
endfunc
.endm
.endm
#if CONFIG_H264_DECODER
h264_chroma_mc8 put
h264_chroma_mc8 put
h264_chroma_mc8 avg
h264_chroma_mc8 avg
h264_chroma_mc4 put
h264_chroma_mc4 put
h264_chroma_mc4 avg
h264_chroma_mc4 avg
h264_chroma_mc2 put
h264_chroma_mc2 put
h264_chroma_mc2 avg
h264_chroma_mc2 avg
#endif
#if CONFIG_RV40_DECODER
const rv40bias
.short 0, 16, 32, 16
.short 32, 28, 32, 28
.short 0, 32, 16, 32
.short 32, 28, 32, 28
endconst
h264_chroma_mc8 put, rv40
h264_chroma_mc8 avg, rv40
h264_chroma_mc4 put, rv40
h264_chroma_mc4 avg, rv40
#endif
libavcodec/arm/rv40dsp_init_neon.c
0 → 100644
View file @
f5c05b9a
/*
* Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavcodec/avcodec.h"
#include "libavcodec/rv34dsp.h"
void
ff_put_rv40_chroma_mc8_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_put_rv40_chroma_mc4_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_avg_rv40_chroma_mc8_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_avg_rv40_chroma_mc4_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_rv40dsp_init_neon
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
)
{
c
->
put_chroma_pixels_tab
[
0
]
=
ff_put_rv40_chroma_mc8_neon
;
c
->
put_chroma_pixels_tab
[
1
]
=
ff_put_rv40_chroma_mc4_neon
;
c
->
avg_chroma_pixels_tab
[
0
]
=
ff_avg_rv40_chroma_mc8_neon
;
c
->
avg_chroma_pixels_tab
[
1
]
=
ff_avg_rv40_chroma_mc4_neon
;
}
libavcodec/rv34dsp.h
View file @
f5c05b9a
...
@@ -59,5 +59,6 @@ void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp);
...
@@ -59,5 +59,6 @@ void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp);
void
ff_rv34dsp_init_neon
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
);
void
ff_rv34dsp_init_neon
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
);
void
ff_rv40dsp_init_x86
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
);
void
ff_rv40dsp_init_x86
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
);
void
ff_rv40dsp_init_neon
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
);
#endif
/* AVCODEC_RV34DSP_H */
#endif
/* AVCODEC_RV34DSP_H */
libavcodec/rv40dsp.c
View file @
f5c05b9a
...
@@ -534,4 +534,6 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
...
@@ -534,4 +534,6 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
if
(
HAVE_MMX
)
if
(
HAVE_MMX
)
ff_rv40dsp_init_x86
(
c
,
dsp
);
ff_rv40dsp_init_x86
(
c
,
dsp
);
if
(
HAVE_NEON
)
ff_rv40dsp_init_neon
(
c
,
dsp
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment