Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
b7d24fd4
Commit
b7d24fd4
authored
Jan 15, 2014
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ppc: dsputil: Merge some declarations and initializations
parent
b045283f
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
38 additions
and
48 deletions
+38
-48
dsputil_altivec.c
libavcodec/ppc/dsputil_altivec.c
+0
-0
dsputil_ppc.c
libavcodec/ppc/dsputil_ppc.c
+3
-6
fdct_altivec.c
libavcodec/ppc/fdct_altivec.c
+1
-2
gmc_altivec.c
libavcodec/ppc/gmc_altivec.c
+15
-16
idct_altivec.c
libavcodec/ppc/idct_altivec.c
+17
-20
int_altivec.c
libavcodec/ppc/int_altivec.c
+2
-4
No files found.
libavcodec/ppc/dsputil_altivec.c
View file @
b7d24fd4
This diff is collapsed.
Click to expand it.
libavcodec/ppc/dsputil_ppc.c
View file @
b7d24fd4
...
...
@@ -51,8 +51,7 @@
*/
static
void
clear_blocks_dcbz32_ppc
(
int16_t
*
blocks
)
{
register
int
misal
=
(
unsigned
long
)
blocks
&
0x00000010
;
register
int
i
=
0
;
register
int
misal
=
(
unsigned
long
)
blocks
&
0x00000010
,
i
=
0
;
if
(
misal
)
{
((
unsigned
long
*
)
blocks
)[
0
]
=
0L
;
...
...
@@ -77,8 +76,7 @@ static void clear_blocks_dcbz32_ppc(int16_t *blocks)
static
void
clear_blocks_dcbz128_ppc
(
int16_t
*
blocks
)
{
#if HAVE_DCBZL
register
int
misal
=
(
unsigned
long
)
blocks
&
0x0000007f
;
register
int
i
=
0
;
register
int
misal
=
(
unsigned
long
)
blocks
&
0x0000007f
,
i
=
0
;
if
(
misal
)
{
/* We could probably also optimize this case,
...
...
@@ -104,8 +102,7 @@ static long check_dcbzl_effect(void)
#if HAVE_DCBZL
register
char
*
fakedata
=
av_malloc
(
1024
);
register
char
*
fakedata_middle
;
register
long
zero
=
0
;
register
long
i
=
0
;
register
long
zero
=
0
,
i
=
0
;
if
(
!
fakedata
)
return
0L
;
...
...
libavcodec/ppc/fdct_altivec.c
View file @
b7d24fd4
...
...
@@ -191,7 +191,7 @@ static vector float fdctconsts[3] = {
void
ff_fdct_altivec
(
int16_t
*
block
)
{
vector
signed
short
*
bp
;
vector
float
*
cp
;
vector
float
*
cp
=
fdctconsts
;
vector
float
b00
,
b10
,
b20
,
b30
,
b40
,
b50
,
b60
,
b70
;
vector
float
b01
,
b11
,
b21
,
b31
,
b41
,
b51
,
b61
,
b71
;
vector
float
mzero
,
cnst
,
cnsts0
,
cnsts1
,
cnsts2
;
...
...
@@ -201,7 +201,6 @@ void ff_fdct_altivec(int16_t *block)
/* mzero = -0.0 */
mzero
=
((
vector
float
)
vec_splat_u32
(
-
1
));
mzero
=
((
vector
float
)
vec_sl
(
vu32
(
mzero
),
vu32
(
mzero
)));
cp
=
fdctconsts
;
cnsts0
=
vec_ld
(
0
,
cp
);
cp
++
;
cnsts1
=
vec_ld
(
0
,
cp
);
...
...
libavcodec/ppc/gmc_altivec.c
View file @
b7d24fd4
...
...
@@ -30,6 +30,7 @@
void
ff_gmc1_altivec
(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align1 */
,
int
stride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
{
int
i
;
const
DECLARE_ALIGNED
(
16
,
unsigned
short
,
rounder_a
)
=
rounder
;
const
DECLARE_ALIGNED
(
16
,
unsigned
short
,
ABCD
)[
8
]
=
{
(
16
-
x16
)
*
(
16
-
y16
),
/* A */
...
...
@@ -42,28 +43,26 @@ void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */,
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vcsr8
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
8
);
register
vector
unsigned
char
dstv
,
dstv2
,
src_0
,
src_1
,
srcvA
,
srcvB
,
srcvC
,
srcvD
;
register
vector
unsigned
short
Av
,
Bv
,
Cv
,
Dv
,
rounderV
,
tempA
,
tempB
,
tempC
,
tempD
;
int
i
;
register
vector
unsigned
char
dstv
,
dstv2
,
srcvB
,
srcvC
,
srcvD
;
register
vector
unsigned
short
tempB
,
tempC
,
tempD
;
unsigned
long
dst_odd
=
(
unsigned
long
)
dst
&
0x0000000F
;
unsigned
long
src_really_odd
=
(
unsigned
long
)
src
&
0x0000000F
;
tempA
=
vec_ld
(
0
,
(
const
unsigned
short
*
)
ABCD
);
Av
=
vec_splat
(
tempA
,
0
);
Bv
=
vec_splat
(
tempA
,
1
);
Cv
=
vec_splat
(
tempA
,
2
);
Dv
=
vec_splat
(
tempA
,
3
);
rounderV
=
vec_splat
((
vec_u16
)
vec_lde
(
0
,
&
rounder_a
),
0
);
register
vector
unsigned
short
tempA
=
vec_ld
(
0
,
(
const
unsigned
short
*
)
ABCD
);
register
vector
unsigned
short
Av
=
vec_splat
(
tempA
,
0
);
register
vector
unsigned
short
Bv
=
vec_splat
(
tempA
,
1
);
register
vector
unsigned
short
Cv
=
vec_splat
(
tempA
,
2
);
register
vector
unsigned
short
Dv
=
vec_splat
(
tempA
,
3
);
register
vector
unsigned
short
rounderV
=
vec_splat
((
vec_u16
)
vec_lde
(
0
,
&
rounder_a
),
0
);
/* we'll be able to pick-up our 9 char elements at src from those
* 32 bytes we load the first batch here, as inside the loop we can
* reuse 'src + stride' from one iteration as the 'src' of the next. */
src_0
=
vec_ld
(
0
,
src
);
src_1
=
vec_ld
(
16
,
src
);
srcvA
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
0
,
src
));
register
vector
unsigned
char
src_0
=
vec_ld
(
0
,
src
);
register
vector
unsigned
char
src_1
=
vec_ld
(
16
,
src
);
register
vector
unsigned
char
srcvA
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
0
,
src
));
if
(
src_really_odd
!=
0x0000000F
)
/* If src & 0xF == 0xF, then (src + 1) is properly aligned
...
...
libavcodec/ppc/idct_altivec.c
View file @
b7d24fd4
...
...
@@ -76,31 +76,28 @@
vy4 = vec_subs(t2, t6);
#define IDCT \
vec_s16 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
vec_s16 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
vec_s16 a0, a1, a2, ma2, c4, mc4, zero, bias; \
vec_s16 t0, t1, t2, t3, t4, t5, t6, t7, t8; \
vec_u16 shift; \
\
c4 = vec_splat(constants[0], 0);
\
a0 = vec_splat(constants[0], 1);
\
a1 = vec_splat(constants[0], 2);
\
a2 = vec_splat(constants[0], 3);
\
mc4 = vec_splat(constants[0], 4);
\
ma2 = vec_splat(constants[0], 5);
\
bias = (vec_s16) vec_splat((vec_s32) constants[0], 3);
\
vec_s16 c4 = vec_splat(constants[0], 0);
\
vec_s16 a0 = vec_splat(constants[0], 1);
\
vec_s16 a1 = vec_splat(constants[0], 2);
\
vec_s16 a2 = vec_splat(constants[0], 3);
\
vec_s16 mc4 = vec_splat(constants[0], 4);
\
vec_s16 ma2 = vec_splat(constants[0], 5);
\
vec_s16 bias = (vec_s16) vec_splat((vec_s32) constants[0], 3);
\
\
zero = vec_splat_s16(0);
\
shift = vec_splat_u16(4);
\
vec_s16 zero = vec_splat_s16(0);
\
vec_u16 shift = vec_splat_u16(4);
\
\
v
x0 = vec_mradds(vec_sl(block[0], shift), constants[1], zero);
\
v
x1 = vec_mradds(vec_sl(block[1], shift), constants[2], zero);
\
v
x2 = vec_mradds(vec_sl(block[2], shift), constants[3], zero);
\
v
x3 = vec_mradds(vec_sl(block[3], shift), constants[4], zero);
\
v
x4 = vec_mradds(vec_sl(block[4], shift), constants[1], zero);
\
v
x5 = vec_mradds(vec_sl(block[5], shift), constants[4], zero);
\
v
x6 = vec_mradds(vec_sl(block[6], shift), constants[3], zero);
\
v
x7 = vec_mradds(vec_sl(block[7], shift), constants[2], zero);
\
v
ec_s16 vx0 = vec_mradds(vec_sl(block[0], shift), constants[1], zero);
\
v
ec_s16 vx1 = vec_mradds(vec_sl(block[1], shift), constants[2], zero);
\
v
ec_s16 vx2 = vec_mradds(vec_sl(block[2], shift), constants[3], zero);
\
v
ec_s16 vx3 = vec_mradds(vec_sl(block[3], shift), constants[4], zero);
\
v
ec_s16 vx4 = vec_mradds(vec_sl(block[4], shift), constants[1], zero);
\
v
ec_s16 vx5 = vec_mradds(vec_sl(block[5], shift), constants[4], zero);
\
v
ec_s16 vx6 = vec_mradds(vec_sl(block[6], shift), constants[3], zero);
\
v
ec_s16 vx7 = vec_mradds(vec_sl(block[7], shift), constants[2], zero);
\
\
IDCT_HALF \
\
...
...
libavcodec/ppc/int_altivec.c
View file @
b7d24fd4
...
...
@@ -36,21 +36,19 @@
static
int
ssd_int8_vs_int16_altivec
(
const
int8_t
*
pix1
,
const
int16_t
*
pix2
,
int
size
)
{
int
i
,
size16
;
int
i
,
size16
=
size
>>
4
;
vector
signed
char
vpix1
;
vector
signed
short
vpix2
,
vdiff
,
vpix1l
,
vpix1h
;
union
{
vector
signed
int
vscore
;
int32_t
score
[
4
];
}
u
;
u
.
vscore
=
vec_splat_s32
(
0
);
}
u
=
{
.
vscore
=
vec_splat_s32
(
0
)
};
// XXX lazy way, fix it later
#define vec_unaligned_load(b) \
vec_perm(vec_ld(0, b), vec_ld(15, b), vec_lvsl(0, b));
size16
=
size
>>
4
;
while
(
size16
)
{
// score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
// load pix1 and the first batch of pix2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment