Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
b7d24fd4
Commit
b7d24fd4
authored
Jan 15, 2014
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ppc: dsputil: Merge some declarations and initializations
parent
b045283f
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
219 additions
and
270 deletions
+219
-270
dsputil_altivec.c
libavcodec/ppc/dsputil_altivec.c
+181
-222
dsputil_ppc.c
libavcodec/ppc/dsputil_ppc.c
+3
-6
fdct_altivec.c
libavcodec/ppc/fdct_altivec.c
+1
-2
gmc_altivec.c
libavcodec/ppc/gmc_altivec.c
+15
-16
idct_altivec.c
libavcodec/ppc/idct_altivec.c
+17
-20
int_altivec.c
libavcodec/ppc/int_altivec.c
+2
-4
No files found.
libavcodec/ppc/dsputil_altivec.c
View file @
b7d24fd4
...
...
@@ -35,34 +35,30 @@
static
int
sad16_x2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
=
0
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
perm2
=
vec_add
(
perm1
,
vec_splat_u8
(
1
));
vector
unsigned
char
pix2l
,
pix2r
;
vector
unsigned
char
pix1v
,
pix2v
,
pix2iv
,
avgv
,
t5
;
vector
unsigned
int
sad
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
s
=
0
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read unaligned pixels into our vectors. The vectors are as follows:
* pix1v: pix1[0] - pix1[15]
* pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */
pix1v
=
vec_ld
(
0
,
pix1
);
pix2l
=
vec_ld
(
0
,
pix2
);
pix2r
=
vec_ld
(
16
,
pix2
);
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm1
);
pix2iv
=
vec_perm
(
pix2l
,
pix2r
,
perm2
);
vector
unsigned
char
pix1v
=
vec_ld
(
0
,
pix1
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
16
,
pix2
);
vector
unsigned
char
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm1
);
vector
unsigned
char
pix2iv
=
vec_perm
(
pix2l
,
pix2r
,
perm2
);
/* Calculate the average vector. */
avgv
=
vec_avg
(
pix2v
,
pix2iv
);
vector
unsigned
char
avgv
=
vec_avg
(
pix2v
,
pix2iv
);
/* Calculate a sum of abs differences vector. */
t5
=
vec_sub
(
vec_max
(
pix1v
,
avgv
),
vec_min
(
pix1v
,
avgv
));
vector
unsigned
char
t5
=
vec_sub
(
vec_max
(
pix1v
,
avgv
),
vec_min
(
pix1v
,
avgv
));
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t5
,
sad
);
...
...
@@ -81,20 +77,15 @@ static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
sad16_y2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
=
0
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
pix2l
,
pix2r
;
vector
unsigned
char
pix1v
,
pix2v
,
pix3v
,
avgv
,
t5
;
vector
unsigned
int
sad
;
vector
unsigned
char
pix1v
,
pix3v
,
avgv
,
t5
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
uint8_t
*
pix3
=
pix2
+
line_size
;
s
=
0
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
/* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
* iteration becomes pix2 in the next iteration. We can use this
* fact to avoid a potentially expensive unaligned read, each
...
...
@@ -102,9 +93,9 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
* Read unaligned pixels into our vectors. The vectors are as follows:
* pix2v: pix2[0] - pix2[15]
* Split the pixel vectors into shorts. */
pix2l
=
vec_ld
(
0
,
pix2
);
pix2r
=
vec_ld
(
15
,
pix2
);
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
15
,
pix2
);
vector
unsigned
char
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read unaligned pixels into our vectors. The vectors are as follows:
...
...
@@ -140,8 +131,7 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
sad16_xy2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
=
0
;
uint8_t
*
pix3
=
pix2
+
line_size
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
...
...
@@ -150,19 +140,12 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
vector
unsigned
char
avgv
,
t5
;
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
perm2
=
vec_add
(
perm1
,
vec_splat_u8
(
1
));
vector
unsigned
char
pix2l
,
pix2r
;
vector
unsigned
char
pix1v
,
pix2v
,
pix3v
,
pix2iv
,
pix3iv
;
vector
unsigned
short
pix2lv
,
pix2hv
,
pix2ilv
,
pix2ihv
;
vector
unsigned
char
pix1v
,
pix3v
,
pix3iv
;
vector
unsigned
short
pix3lv
,
pix3hv
,
pix3ilv
,
pix3ihv
;
vector
unsigned
short
avghv
,
avglv
;
vector
unsigned
short
t1
,
t2
,
t3
,
t4
;
vector
unsigned
int
sad
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
s
=
0
;
/* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
* iteration becomes pix2 in the next iteration. We can use this
* fact to avoid a potentially expensive unaligned read, as well
...
...
@@ -170,17 +153,22 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
* Read unaligned pixels into our vectors. The vectors are as follows:
* pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16]
* Split the pixel vectors into shorts. */
pix2l
=
vec_ld
(
0
,
pix2
);
pix2r
=
vec_ld
(
16
,
pix2
);
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm1
);
pix2iv
=
vec_perm
(
pix2l
,
pix2r
,
perm2
);
pix2hv
=
(
vector
unsigned
short
)
vec_mergeh
(
zero
,
pix2v
);
pix2lv
=
(
vector
unsigned
short
)
vec_mergel
(
zero
,
pix2v
);
pix2ihv
=
(
vector
unsigned
short
)
vec_mergeh
(
zero
,
pix2iv
);
pix2ilv
=
(
vector
unsigned
short
)
vec_mergel
(
zero
,
pix2iv
);
t1
=
vec_add
(
pix2hv
,
pix2ihv
);
t2
=
vec_add
(
pix2lv
,
pix2ilv
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
16
,
pix2
);
vector
unsigned
char
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm1
);
vector
unsigned
char
pix2iv
=
vec_perm
(
pix2l
,
pix2r
,
perm2
);
vector
unsigned
short
pix2hv
=
(
vector
unsigned
short
)
vec_mergeh
(
zero
,
pix2v
);
vector
unsigned
short
pix2lv
=
(
vector
unsigned
short
)
vec_mergel
(
zero
,
pix2v
);
vector
unsigned
short
pix2ihv
=
(
vector
unsigned
short
)
vec_mergeh
(
zero
,
pix2iv
);
vector
unsigned
short
pix2ilv
=
(
vector
unsigned
short
)
vec_mergel
(
zero
,
pix2iv
);
vector
unsigned
short
t1
=
vec_add
(
pix2hv
,
pix2ihv
);
vector
unsigned
short
t2
=
vec_add
(
pix2lv
,
pix2ilv
);
vector
unsigned
short
t3
,
t4
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read unaligned pixels into our vectors. The vectors are as follows:
...
...
@@ -238,28 +226,24 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
sad16_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sad
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2. */
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
15
,
pix2
);
t1
=
vec_ld
(
0
,
pix1
);
t2
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
vector
unsigned
char
t1
=
vec_ld
(
0
,
pix1
);
vector
unsigned
char
t2
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
/* Calculate a sum of abs differences vector. */
t3
=
vec_max
(
t1
,
t2
);
t4
=
vec_min
(
t1
,
t2
);
t5
=
vec_sub
(
t3
,
t4
);
vector
unsigned
char
t3
=
vec_max
(
t1
,
t2
);
vector
unsigned
char
t4
=
vec_min
(
t1
,
t2
);
vector
unsigned
char
t5
=
vec_sub
(
t3
,
t4
);
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t5
,
sad
);
...
...
@@ -279,8 +263,7 @@ static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
sad8_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
char
permclear
=
...
...
@@ -288,12 +271,9 @@ static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
{
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix1
);
vector
unsigned
char
perm2
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sad
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2.
* Since we're reading 16 pixels, and actually only want 8,
...
...
@@ -302,13 +282,15 @@ static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
vector
unsigned
char
pix1r
=
vec_ld
(
7
,
pix1
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
7
,
pix2
);
t1
=
vec_and
(
vec_perm
(
pix1l
,
pix1r
,
perm1
),
permclear
);
t2
=
vec_and
(
vec_perm
(
pix2l
,
pix2r
,
perm2
),
permclear
);
vector
unsigned
char
t1
=
vec_and
(
vec_perm
(
pix1l
,
pix1r
,
perm1
),
permclear
);
vector
unsigned
char
t2
=
vec_and
(
vec_perm
(
pix2l
,
pix2r
,
perm2
),
permclear
);
/* Calculate a sum of abs differences vector. */
t3
=
vec_max
(
t1
,
t2
);
t4
=
vec_min
(
t1
,
t2
);
t5
=
vec_sub
(
t3
,
t4
);
vector
unsigned
char
t3
=
vec_max
(
t1
,
t2
);
vector
unsigned
char
t4
=
vec_min
(
t1
,
t2
);
vector
unsigned
char
t5
=
vec_sub
(
t3
,
t4
);
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t5
,
sad
);
...
...
@@ -327,23 +309,18 @@ static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
pix_norm1_altivec
(
uint8_t
*
pix
,
int
line_size
)
{
int
i
;
int
s
;
int
i
,
s
=
0
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix
);
vector
unsigned
char
pixv
;
vector
unsigned
int
sv
;
vector
unsigned
int
sv
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sum
;
sv
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
s
=
0
;
for
(
i
=
0
;
i
<
16
;
i
++
)
{
/* Read the potentially unaligned pixels. */
vector
unsigned
char
pixl
=
vec_ld
(
0
,
pix
);
vector
unsigned
char
pixr
=
vec_ld
(
15
,
pix
);
pixv
=
vec_perm
(
pixl
,
pixr
,
perm
);
vector
unsigned
char
pixv
=
vec_perm
(
pixl
,
pixr
,
perm
);
/* Square the values, and add them to our sum. */
sv
=
vec_msum
(
pixv
,
pixv
,
sv
);
...
...
@@ -363,8 +340,7 @@ static int pix_norm1_altivec(uint8_t *pix, int line_size)
static
int
sse8_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
char
permclear
=
...
...
@@ -372,12 +348,9 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
{
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix1
);
vector
unsigned
char
perm2
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sum
;
vector
unsigned
int
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumsqr
;
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2.
* Since we're reading 16 pixels, and actually only want 8,
...
...
@@ -386,16 +359,18 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
vector
unsigned
char
pix1r
=
vec_ld
(
7
,
pix1
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
7
,
pix2
);
t1
=
vec_and
(
vec_perm
(
pix1l
,
pix1r
,
perm1
),
permclear
);
t2
=
vec_and
(
vec_perm
(
pix2l
,
pix2r
,
perm2
),
permclear
);
vector
unsigned
char
t1
=
vec_and
(
vec_perm
(
pix1l
,
pix1r
,
perm1
),
permclear
);
vector
unsigned
char
t2
=
vec_and
(
vec_perm
(
pix2l
,
pix2r
,
perm2
),
permclear
);
/* Since we want to use unsigned chars, we can take advantage
* of the fact that abs(a - b) ^ 2 = (a - b) ^ 2. */
/* Calculate abs differences vector. */
t3
=
vec_max
(
t1
,
t2
);
t4
=
vec_min
(
t1
,
t2
);
t5
=
vec_sub
(
t3
,
t4
);
vector
unsigned
char
t3
=
vec_max
(
t1
,
t2
);
vector
unsigned
char
t4
=
vec_min
(
t1
,
t2
);
vector
unsigned
char
t5
=
vec_sub
(
t3
,
t4
);
/* Square the values and add them to our sum. */
sum
=
vec_msum
(
t5
,
t5
,
sum
);
...
...
@@ -417,31 +392,27 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
sse16_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sum
;
vector
unsigned
int
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumsqr
;
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2. */
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
15
,
pix2
);
t1
=
vec_ld
(
0
,
pix1
);
t2
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
vector
unsigned
char
t1
=
vec_ld
(
0
,
pix1
);
vector
unsigned
char
t2
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
/* Since we want to use unsigned chars, we can take advantage
* of the fact that abs(a - b) ^ 2 = (a - b) ^ 2. */
/* Calculate abs differences vector. */
t3
=
vec_max
(
t1
,
t2
);
t4
=
vec_min
(
t1
,
t2
);
t5
=
vec_sub
(
t3
,
t4
);
vector
unsigned
char
t3
=
vec_max
(
t1
,
t2
);
vector
unsigned
char
t4
=
vec_min
(
t1
,
t2
);
vector
unsigned
char
t5
=
vec_sub
(
t3
,
t4
);
/* Square the values and add them to our sum. */
sum
=
vec_msum
(
t5
,
t5
,
sum
);
...
...
@@ -460,23 +431,18 @@ static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
pix_sum_altivec
(
uint8_t
*
pix
,
int
line_size
)
{
int
i
,
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix
);
vector
unsigned
char
t1
;
vector
unsigned
int
sad
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
int
i
;
int
s
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
16
;
i
++
)
{
/* Read the potentially unaligned 16 pixels into t1. */
vector
unsigned
char
pixl
=
vec_ld
(
0
,
pix
);
vector
unsigned
char
pixr
=
vec_ld
(
15
,
pix
);
t1
=
vec_perm
(
pixl
,
pixr
,
perm
);
vector
unsigned
char
t1
=
vec_perm
(
pixl
,
pixr
,
perm
);
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t1
,
sad
);
...
...
@@ -497,10 +463,8 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
{
int
i
;
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pixels
);
vector
unsigned
char
bytes
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
signed
short
shorts
;
for
(
i
=
0
;
i
<
8
;
i
++
)
{
/* Read potentially unaligned pixels.
...
...
@@ -508,10 +472,11 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
* but we simply ignore the extras. */
vector
unsigned
char
pixl
=
vec_ld
(
0
,
pixels
);
vector
unsigned
char
pixr
=
vec_ld
(
7
,
pixels
);
bytes
=
vec_perm
(
pixl
,
pixr
,
perm
);
vector
unsigned
char
bytes
=
vec_perm
(
pixl
,
pixr
,
perm
);
// Convert the bytes into shorts.
shorts
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
vector
signed
short
shorts
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
// Save the data to the block, we assume the block is 16-byte aligned.
vec_st
(
shorts
,
i
*
16
,
(
vector
signed
short
*
)
block
);
...
...
@@ -526,7 +491,6 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
int
i
;
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
s1
);
vector
unsigned
char
perm2
=
vec_lvsl
(
0
,
s2
);
vector
unsigned
char
bytes
,
pixl
,
pixr
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
signed
short
shorts1
,
shorts2
;
...
...
@@ -535,9 +499,9 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
/* Read potentially unaligned pixels.
* We're reading 16 pixels, and actually only want 8,
* but we simply ignore the extras. */
pixl
=
vec_ld
(
0
,
s1
);
pixr
=
vec_ld
(
15
,
s1
);
bytes
=
vec_perm
(
pixl
,
pixr
,
perm1
);
vector
unsigned
char
pixl
=
vec_ld
(
0
,
s1
);
vector
unsigned
char
pixr
=
vec_ld
(
15
,
s1
);
vector
unsigned
char
bytes
=
vec_perm
(
pixl
,
pixr
,
perm1
);
// Convert the bytes into shorts.
shorts1
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
...
...
@@ -653,29 +617,31 @@ static int hadamard8_diff8x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst,
#define ONEITERBUTTERFLY(i, res) \
{ \
register vector unsigned char src1, src2, srcO; \
register vector unsigned char dst1, dst2, dstO; \
register vector signed short srcV, dstV; \
register vector signed short but0, but1, but2, op1, op2, op3; \
src1 = vec_ld(stride * i, src); \
src2 = vec_ld(stride * i + 15, src); \
srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
dst1 = vec_ld(stride * i, dst); \
dst2 = vec_ld(stride * i + 15, dst); \
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
register vector unsigned char src1 = vec_ld(stride * i, src); \
register vector unsigned char src2 = vec_ld(stride * i + 15, src); \
register vector unsigned char srcO = \
vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
register vector unsigned char dst1 = vec_ld(stride * i, dst); \
register vector unsigned char dst2 = vec_ld(stride * i + 15, dst); \
register vector unsigned char dstO = \
vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
\
/* Promote the unsigned chars to signed shorts. */
\
/* We're in the 8x8 function, we only care for the first 8. */
\
srcV = (vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) srcO); \
dstV = (vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) dstO); \
register vector signed short srcV = \
(vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) srcO); \
register vector signed short dstV = \
(vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) dstO); \
\
/* subtractions inside the first butterfly */
\
but0 = vec_sub(srcV, dstV);
\
op1 = vec_perm(but0, but0, perm1);
\
but1 = vec_mladd(but0, vprod1, op1);
\
op2 = vec_perm(but1, but1, perm2);
\
but2 = vec_mladd(but1, vprod2, op2);
\
op3 = vec_perm(but2, but2, perm3);
\
register vector signed short but0 = vec_sub(srcV, dstV);
\
register vector signed short op1 = vec_perm(but0, but0, perm1);
\
register vector signed short but1 = vec_mladd(but0, vprod1, op1);
\
register vector signed short op2 = vec_perm(but1, but1, perm2);
\
register vector signed short but2 = vec_mladd(but1, vprod2, op2);
\
register vector signed short op3 = vec_perm(but2, but2, perm3);
\
res = vec_mladd(but2, vprod3, op3); \
}
ONEITERBUTTERFLY
(
0
,
temp0
);
...
...
@@ -801,62 +767,60 @@ static int hadamard8_diff16x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst
#define ONEITERBUTTERFLY(i, res1, res2) \
{ \
register vector unsigned char \
src1 __asm__ ("v22"), \
src2 __asm__ ("v23"), \
dst1 __asm__ ("v24"), \
dst2 __asm__ ("v25"), \
srcO __asm__ ("v22"), \
dstO __asm__ ("v23"); \
register vector unsigned char src1 __asm__ ("v22") = \
vec_ld(stride * i, src); \
register vector unsigned char src2 __asm__ ("v23") = \
vec_ld(stride * i + 16, src); \
register vector unsigned char srcO __asm__ ("v22") = \
vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
register vector unsigned char dst1 __asm__ ("v24") = \
vec_ld(stride * i, dst); \
register vector unsigned char dst2 __asm__ ("v25") = \
vec_ld(stride * i + 16, dst); \
register vector unsigned char dstO __asm__ ("v23") = \
vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
\
register vector signed short \
srcV __asm__ ("v24"), \
dstV __asm__ ("v25"), \
srcW __asm__ ("v26"), \
dstW __asm__ ("v27"), \
but0 __asm__ ("v28"), \
but0S __asm__ ("v29"), \
op1 __asm__ ("v30"), \
but1 __asm__ ("v22"), \
op1S __asm__ ("v23"), \
but1S __asm__ ("v24"), \
op2 __asm__ ("v25"), \
but2 __asm__ ("v26"), \
op2S __asm__ ("v27"), \
but2S __asm__ ("v28"), \
op3 __asm__ ("v29"), \
op3S __asm__ ("v30"); \
\
src1 = vec_ld(stride * i, src); \
src2 = vec_ld(stride * i + 16, src); \
srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
dst1 = vec_ld(stride * i, dst); \
dst2 = vec_ld(stride * i + 16, dst); \
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
/* Promote the unsigned chars to signed shorts. */
\
srcV = (vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) srcO); \
dstV = (vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) dstO); \
srcW = (vector signed short) vec_mergel((vector signed char) vzero, \
(vector signed char) srcO); \
dstW = (vector signed short) vec_mergel((vector signed char) vzero, \
(vector signed char) dstO); \
register vector signed short srcV __asm__ ("v24") = \
(vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) srcO); \
register vector signed short dstV __asm__ ("v25") = \
(vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) dstO); \
register vector signed short srcW __asm__ ("v26") = \
(vector signed short) vec_mergel((vector signed char) vzero, \
(vector signed char) srcO); \
register vector signed short dstW __asm__ ("v27") = \
(vector signed short) vec_mergel((vector signed char) vzero, \
(vector signed char) dstO); \
\
/* subtractions inside the first butterfly */
\
but0 = vec_sub(srcV, dstV); \
but0S = vec_sub(srcW, dstW); \
op1 = vec_perm(but0, but0, perm1); \
but1 = vec_mladd(but0, vprod1, op1); \
op1S = vec_perm(but0S, but0S, perm1); \
but1S = vec_mladd(but0S, vprod1, op1S); \
op2 = vec_perm(but1, but1, perm2); \
but2 = vec_mladd(but1, vprod2, op2); \
op2S = vec_perm(but1S, but1S, perm2); \
but2S = vec_mladd(but1S, vprod2, op2S); \
op3 = vec_perm(but2, but2, perm3); \
res1 = vec_mladd(but2, vprod3, op3); \
op3S = vec_perm(but2S, but2S, perm3); \
res2 = vec_mladd(but2S, vprod3, op3S); \
register vector signed short but0 __asm__ ("v28") = \
vec_sub(srcV, dstV); \
register vector signed short but0S __asm__ ("v29") = \
vec_sub(srcW, dstW); \
register vector signed short op1 __asm__ ("v30") = \
vec_perm(but0, but0, perm1); \
register vector signed short but1 __asm__ ("v22") = \
vec_mladd(but0, vprod1, op1); \
register vector signed short op1S __asm__ ("v23") = \
vec_perm(but0S, but0S, perm1); \
register vector signed short but1S __asm__ ("v24") = \
vec_mladd(but0S, vprod1, op1S); \
register vector signed short op2 __asm__ ("v25") = \
vec_perm(but1, but1, perm2); \
register vector signed short but2 __asm__ ("v26") = \
vec_mladd(but1, vprod2, op2); \
register vector signed short op2S __asm__ ("v27") = \
vec_perm(but1S, but1S, perm2); \
register vector signed short but2S __asm__ ("v28") = \
vec_mladd(but1S, vprod2, op2S); \
register vector signed short op3 __asm__ ("v29") = \
vec_perm(but2, but2, perm3); \
register vector signed short op3S __asm__ ("v30") = \
vec_perm(but2S, but2S, perm3); \
res1 = vec_mladd(but2, vprod3, op3); \
res2 = vec_mladd(but2S, vprod3, op3S); \
}
ONEITERBUTTERFLY
(
0
,
temp0
,
temp0S
);
ONEITERBUTTERFLY
(
1
,
temp1
,
temp1S
);
...
...
@@ -870,11 +834,6 @@ static int hadamard8_diff16x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst
#undef ONEITERBUTTERFLY
{
register
vector
signed
int
vsum
;
register
vector
signed
short
line0S
,
line1S
,
line2S
,
line3S
,
line4S
,
line5S
,
line6S
,
line7S
,
line0BS
,
line2BS
,
line1BS
,
line3BS
,
line4BS
,
line6BS
,
line5BS
,
line7BS
,
line0CS
,
line4CS
,
line1CS
,
line5CS
,
line2CS
,
line6CS
,
line3CS
,
line7CS
;
register
vector
signed
short
line0
=
vec_add
(
temp0
,
temp1
);
register
vector
signed
short
line1
=
vec_sub
(
temp0
,
temp1
);
...
...
@@ -903,6 +862,33 @@ static int hadamard8_diff16x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst
register
vector
signed
short
line3C
=
vec_add
(
line3B
,
line7B
);
register
vector
signed
short
line7C
=
vec_sub
(
line3B
,
line7B
);
register
vector
signed
short
line0S
=
vec_add
(
temp0S
,
temp1S
);
register
vector
signed
short
line1S
=
vec_sub
(
temp0S
,
temp1S
);
register
vector
signed
short
line2S
=
vec_add
(
temp2S
,
temp3S
);
register
vector
signed
short
line3S
=
vec_sub
(
temp2S
,
temp3S
);
register
vector
signed
short
line4S
=
vec_add
(
temp4S
,
temp5S
);
register
vector
signed
short
line5S
=
vec_sub
(
temp4S
,
temp5S
);
register
vector
signed
short
line6S
=
vec_add
(
temp6S
,
temp7S
);
register
vector
signed
short
line7S
=
vec_sub
(
temp6S
,
temp7S
);
register
vector
signed
short
line0BS
=
vec_add
(
line0S
,
line2S
);
register
vector
signed
short
line2BS
=
vec_sub
(
line0S
,
line2S
);
register
vector
signed
short
line1BS
=
vec_add
(
line1S
,
line3S
);
register
vector
signed
short
line3BS
=
vec_sub
(
line1S
,
line3S
);
register
vector
signed
short
line4BS
=
vec_add
(
line4S
,
line6S
);
register
vector
signed
short
line6BS
=
vec_sub
(
line4S
,
line6S
);
register
vector
signed
short
line5BS
=
vec_add
(
line5S
,
line7S
);
register
vector
signed
short
line7BS
=
vec_sub
(
line5S
,
line7S
);
register
vector
signed
short
line0CS
=
vec_add
(
line0BS
,
line4BS
);
register
vector
signed
short
line4CS
=
vec_sub
(
line0BS
,
line4BS
);
register
vector
signed
short
line1CS
=
vec_add
(
line1BS
,
line5BS
);
register
vector
signed
short
line5CS
=
vec_sub
(
line1BS
,
line5BS
);
register
vector
signed
short
line2CS
=
vec_add
(
line2BS
,
line6BS
);
register
vector
signed
short
line6CS
=
vec_sub
(
line2BS
,
line6BS
);
register
vector
signed
short
line3CS
=
vec_add
(
line3BS
,
line7BS
);
register
vector
signed
short
line7CS
=
vec_sub
(
line3BS
,
line7BS
);
vsum
=
vec_sum4s
(
vec_abs
(
line0C
),
vec_splat_s32
(
0
));
vsum
=
vec_sum4s
(
vec_abs
(
line1C
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line2C
),
vsum
);
...
...
@@ -912,33 +898,6 @@ static int hadamard8_diff16x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst
vsum
=
vec_sum4s
(
vec_abs
(
line6C
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line7C
),
vsum
);
line0S
=
vec_add
(
temp0S
,
temp1S
);
line1S
=
vec_sub
(
temp0S
,
temp1S
);
line2S
=
vec_add
(
temp2S
,
temp3S
);
line3S
=
vec_sub
(
temp2S
,
temp3S
);
line4S
=
vec_add
(
temp4S
,
temp5S
);
line5S
=
vec_sub
(
temp4S
,
temp5S
);
line6S
=
vec_add
(
temp6S
,
temp7S
);
line7S
=
vec_sub
(
temp6S
,
temp7S
);
line0BS
=
vec_add
(
line0S
,
line2S
);
line2BS
=
vec_sub
(
line0S
,
line2S
);
line1BS
=
vec_add
(
line1S
,
line3S
);
line3BS
=
vec_sub
(
line1S
,
line3S
);
line4BS
=
vec_add
(
line4S
,
line6S
);
line6BS
=
vec_sub
(
line4S
,
line6S
);
line5BS
=
vec_add
(
line5S
,
line7S
);
line7BS
=
vec_sub
(
line5S
,
line7S
);
line0CS
=
vec_add
(
line0BS
,
line4BS
);
line4CS
=
vec_sub
(
line0BS
,
line4BS
);
line1CS
=
vec_add
(
line1BS
,
line5BS
);
line5CS
=
vec_sub
(
line1BS
,
line5BS
);
line2CS
=
vec_add
(
line2BS
,
line6BS
);
line6CS
=
vec_sub
(
line2BS
,
line6BS
);
line3CS
=
vec_add
(
line3BS
,
line7BS
);
line7CS
=
vec_sub
(
line3BS
,
line7BS
);
vsum
=
vec_sum4s
(
vec_abs
(
line0CS
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line1CS
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line2CS
),
vsum
);
...
...
@@ -957,8 +916,8 @@ static int hadamard8_diff16x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst
static
int
hadamard8_diff16_altivec
(
/* MpegEncContext */
void
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
)
{
int
score
;
score
=
hadamard8_diff16x8_altivec
(
s
,
dst
,
src
,
stride
,
8
);
int
score
=
hadamard8_diff16x8_altivec
(
s
,
dst
,
src
,
stride
,
8
)
;
if
(
h
==
16
)
{
dst
+=
8
*
stride
;
src
+=
8
*
stride
;
...
...
libavcodec/ppc/dsputil_ppc.c
View file @
b7d24fd4
...
...
@@ -51,8 +51,7 @@
*/
static
void
clear_blocks_dcbz32_ppc
(
int16_t
*
blocks
)
{
register
int
misal
=
(
unsigned
long
)
blocks
&
0x00000010
;
register
int
i
=
0
;
register
int
misal
=
(
unsigned
long
)
blocks
&
0x00000010
,
i
=
0
;
if
(
misal
)
{
((
unsigned
long
*
)
blocks
)[
0
]
=
0L
;
...
...
@@ -77,8 +76,7 @@ static void clear_blocks_dcbz32_ppc(int16_t *blocks)
static
void
clear_blocks_dcbz128_ppc
(
int16_t
*
blocks
)
{
#if HAVE_DCBZL
register
int
misal
=
(
unsigned
long
)
blocks
&
0x0000007f
;
register
int
i
=
0
;
register
int
misal
=
(
unsigned
long
)
blocks
&
0x0000007f
,
i
=
0
;
if
(
misal
)
{
/* We could probably also optimize this case,
...
...
@@ -104,8 +102,7 @@ static long check_dcbzl_effect(void)
#if HAVE_DCBZL
register
char
*
fakedata
=
av_malloc
(
1024
);
register
char
*
fakedata_middle
;
register
long
zero
=
0
;
register
long
i
=
0
;
register
long
zero
=
0
,
i
=
0
;
if
(
!
fakedata
)
return
0L
;
...
...
libavcodec/ppc/fdct_altivec.c
View file @
b7d24fd4
...
...
@@ -191,7 +191,7 @@ static vector float fdctconsts[3] = {
void
ff_fdct_altivec
(
int16_t
*
block
)
{
vector
signed
short
*
bp
;
vector
float
*
cp
;
vector
float
*
cp
=
fdctconsts
;
vector
float
b00
,
b10
,
b20
,
b30
,
b40
,
b50
,
b60
,
b70
;
vector
float
b01
,
b11
,
b21
,
b31
,
b41
,
b51
,
b61
,
b71
;
vector
float
mzero
,
cnst
,
cnsts0
,
cnsts1
,
cnsts2
;
...
...
@@ -201,7 +201,6 @@ void ff_fdct_altivec(int16_t *block)
/* mzero = -0.0 */
mzero
=
((
vector
float
)
vec_splat_u32
(
-
1
));
mzero
=
((
vector
float
)
vec_sl
(
vu32
(
mzero
),
vu32
(
mzero
)));
cp
=
fdctconsts
;
cnsts0
=
vec_ld
(
0
,
cp
);
cp
++
;
cnsts1
=
vec_ld
(
0
,
cp
);
...
...
libavcodec/ppc/gmc_altivec.c
View file @
b7d24fd4
...
...
@@ -30,6 +30,7 @@
void
ff_gmc1_altivec
(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align1 */
,
int
stride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
{
int
i
;
const
DECLARE_ALIGNED
(
16
,
unsigned
short
,
rounder_a
)
=
rounder
;
const
DECLARE_ALIGNED
(
16
,
unsigned
short
,
ABCD
)[
8
]
=
{
(
16
-
x16
)
*
(
16
-
y16
),
/* A */
...
...
@@ -42,28 +43,26 @@ void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */,
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vcsr8
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
8
);
register
vector
unsigned
char
dstv
,
dstv2
,
src_0
,
src_1
,
srcvA
,
srcvB
,
srcvC
,
srcvD
;
register
vector
unsigned
short
Av
,
Bv
,
Cv
,
Dv
,
rounderV
,
tempA
,
tempB
,
tempC
,
tempD
;
int
i
;
register
vector
unsigned
char
dstv
,
dstv2
,
srcvB
,
srcvC
,
srcvD
;
register
vector
unsigned
short
tempB
,
tempC
,
tempD
;
unsigned
long
dst_odd
=
(
unsigned
long
)
dst
&
0x0000000F
;
unsigned
long
src_really_odd
=
(
unsigned
long
)
src
&
0x0000000F
;
tempA
=
vec_ld
(
0
,
(
const
unsigned
short
*
)
ABCD
);
Av
=
vec_splat
(
tempA
,
0
);
Bv
=
vec_splat
(
tempA
,
1
);
Cv
=
vec_splat
(
tempA
,
2
);
Dv
=
vec_splat
(
tempA
,
3
);
rounderV
=
vec_splat
((
vec_u16
)
vec_lde
(
0
,
&
rounder_a
),
0
);
register
vector
unsigned
short
tempA
=
vec_ld
(
0
,
(
const
unsigned
short
*
)
ABCD
);
register
vector
unsigned
short
Av
=
vec_splat
(
tempA
,
0
);
register
vector
unsigned
short
Bv
=
vec_splat
(
tempA
,
1
);
register
vector
unsigned
short
Cv
=
vec_splat
(
tempA
,
2
);
register
vector
unsigned
short
Dv
=
vec_splat
(
tempA
,
3
);
register
vector
unsigned
short
rounderV
=
vec_splat
((
vec_u16
)
vec_lde
(
0
,
&
rounder_a
),
0
);
/* we'll be able to pick-up our 9 char elements at src from those
* 32 bytes we load the first batch here, as inside the loop we can
* reuse 'src + stride' from one iteration as the 'src' of the next. */
src_0
=
vec_ld
(
0
,
src
);
src_1
=
vec_ld
(
16
,
src
);
srcvA
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
0
,
src
));
register
vector
unsigned
char
src_0
=
vec_ld
(
0
,
src
);
register
vector
unsigned
char
src_1
=
vec_ld
(
16
,
src
);
register
vector
unsigned
char
srcvA
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
0
,
src
));
if
(
src_really_odd
!=
0x0000000F
)
/* If src & 0xF == 0xF, then (src + 1) is properly aligned
...
...
libavcodec/ppc/idct_altivec.c
View file @
b7d24fd4
...
...
@@ -76,31 +76,28 @@
vy4 = vec_subs(t2, t6);
#define IDCT \
vec_s16 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
vec_s16 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
vec_s16 a0, a1, a2, ma2, c4, mc4, zero, bias; \
vec_s16 t0, t1, t2, t3, t4, t5, t6, t7, t8; \
vec_u16 shift; \
\
c4 = vec_splat(constants[0], 0);
\
a0 = vec_splat(constants[0], 1);
\
a1 = vec_splat(constants[0], 2);
\
a2 = vec_splat(constants[0], 3);
\
mc4 = vec_splat(constants[0], 4);
\
ma2 = vec_splat(constants[0], 5);
\
bias = (vec_s16) vec_splat((vec_s32) constants[0], 3);
\
vec_s16 c4 = vec_splat(constants[0], 0);
\
vec_s16 a0 = vec_splat(constants[0], 1);
\
vec_s16 a1 = vec_splat(constants[0], 2);
\
vec_s16 a2 = vec_splat(constants[0], 3);
\
vec_s16 mc4 = vec_splat(constants[0], 4);
\
vec_s16 ma2 = vec_splat(constants[0], 5);
\
vec_s16 bias = (vec_s16) vec_splat((vec_s32) constants[0], 3);
\
\
zero = vec_splat_s16(0);
\
shift = vec_splat_u16(4);
\
vec_s16 zero = vec_splat_s16(0);
\
vec_u16 shift = vec_splat_u16(4);
\
\
v
x0 = vec_mradds(vec_sl(block[0], shift), constants[1], zero);
\
v
x1 = vec_mradds(vec_sl(block[1], shift), constants[2], zero);
\
v
x2 = vec_mradds(vec_sl(block[2], shift), constants[3], zero);
\
v
x3 = vec_mradds(vec_sl(block[3], shift), constants[4], zero);
\
v
x4 = vec_mradds(vec_sl(block[4], shift), constants[1], zero);
\
v
x5 = vec_mradds(vec_sl(block[5], shift), constants[4], zero);
\
v
x6 = vec_mradds(vec_sl(block[6], shift), constants[3], zero);
\
v
x7 = vec_mradds(vec_sl(block[7], shift), constants[2], zero);
\
v
ec_s16 vx0 = vec_mradds(vec_sl(block[0], shift), constants[1], zero);
\
v
ec_s16 vx1 = vec_mradds(vec_sl(block[1], shift), constants[2], zero);
\
v
ec_s16 vx2 = vec_mradds(vec_sl(block[2], shift), constants[3], zero);
\
v
ec_s16 vx3 = vec_mradds(vec_sl(block[3], shift), constants[4], zero);
\
v
ec_s16 vx4 = vec_mradds(vec_sl(block[4], shift), constants[1], zero);
\
v
ec_s16 vx5 = vec_mradds(vec_sl(block[5], shift), constants[4], zero);
\
v
ec_s16 vx6 = vec_mradds(vec_sl(block[6], shift), constants[3], zero);
\
v
ec_s16 vx7 = vec_mradds(vec_sl(block[7], shift), constants[2], zero);
\
\
IDCT_HALF \
\
...
...
libavcodec/ppc/int_altivec.c
View file @
b7d24fd4
...
...
@@ -36,21 +36,19 @@
static
int
ssd_int8_vs_int16_altivec
(
const
int8_t
*
pix1
,
const
int16_t
*
pix2
,
int
size
)
{
int
i
,
size16
;
int
i
,
size16
=
size
>>
4
;
vector
signed
char
vpix1
;
vector
signed
short
vpix2
,
vdiff
,
vpix1l
,
vpix1h
;
union
{
vector
signed
int
vscore
;
int32_t
score
[
4
];
}
u
;
u
.
vscore
=
vec_splat_s32
(
0
);
}
u
=
{
.
vscore
=
vec_splat_s32
(
0
)
};
// XXX lazy way, fix it later
#define vec_unaligned_load(b) \
vec_perm(vec_ld(0, b), vec_ld(15, b), vec_lvsl(0, b));
size16
=
size
>>
4
;
while
(
size16
)
{
// score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
// load pix1 and the first batch of pix2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment