Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
53c20f17
Commit
53c20f17
authored
Mar 28, 2014
by
Vittorio Giovara
Committed by
Diego Biurrun
Mar 29, 2014
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp8: K&R formatting cosmetics
Signed-off-by:
Diego Biurrun
<
diego@biurrun.de
>
parent
6adf3bc4
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
934 additions
and
807 deletions
+934
-807
vp8.c
libavcodec/vp8.c
+516
-433
vp8.h
libavcodec/vp8.h
+10
-9
vp8_parser.c
libavcodec/vp8_parser.c
+8
-7
vp8data.h
libavcodec/vp8data.h
+96
-92
vp8dsp.c
libavcodec/vp8dsp.c
+302
-264
vp8dsp.h
libavcodec/vp8dsp.h
+2
-2
No files found.
libavcodec/vp8.c
View file @
53c20f17
...
...
@@ -24,12 +24,13 @@
*/
#include "libavutil/imgutils.h"
#include "avcodec.h"
#include "internal.h"
#include "vp8.h"
#include "vp8data.h"
#include "rectangle.h"
#include "thread.h"
#include "vp8.h"
#include "vp8data.h"
#if ARCH_ARM
# include "arm/vp8.h"
...
...
@@ -91,7 +92,6 @@ static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
return
0
;
}
static
void
vp8_decode_flush_impl
(
AVCodecContext
*
avctx
,
int
free_mem
)
{
VP8Context
*
s
=
avctx
->
priv_data
;
...
...
@@ -124,22 +124,25 @@ static int update_dimensions(VP8Context *s, int width, int height)
return
ret
;
}
s
->
mb_width
=
(
s
->
avctx
->
coded_width
+
15
)
/
16
;
s
->
mb_height
=
(
s
->
avctx
->
coded_height
+
15
)
/
16
;
s
->
mb_width
=
(
s
->
avctx
->
coded_width
+
15
)
/
16
;
s
->
mb_height
=
(
s
->
avctx
->
coded_height
+
15
)
/
16
;
s
->
mb_layout
=
(
avctx
->
active_thread_type
==
FF_THREAD_SLICE
)
&&
(
FFMIN
(
s
->
num_coeff_partitions
,
avctx
->
thread_count
)
>
1
);
s
->
mb_layout
=
(
avctx
->
active_thread_type
==
FF_THREAD_SLICE
)
&&
(
FFMIN
(
s
->
num_coeff_partitions
,
avctx
->
thread_count
)
>
1
);
if
(
!
s
->
mb_layout
)
{
// Frame threading and one thread
s
->
macroblocks_base
=
av_mallocz
((
s
->
mb_width
+
s
->
mb_height
*
2
+
1
)
*
sizeof
(
*
s
->
macroblocks
));
s
->
intra4x4_pred_mode_top
=
av_mallocz
(
s
->
mb_width
*
4
);
}
else
// Sliced threading
s
->
macroblocks_base
=
av_mallocz
((
s
->
mb_width
+
2
)
*
(
s
->
mb_height
+
2
)
*
sizeof
(
*
s
->
macroblocks
));
s
->
top_nnz
=
av_mallocz
(
s
->
mb_width
*
sizeof
(
*
s
->
top_nnz
));
s
->
top_border
=
av_mallocz
((
s
->
mb_width
+
1
)
*
sizeof
(
*
s
->
top_border
));
s
->
thread_data
=
av_mallocz
(
MAX_THREADS
*
sizeof
(
VP8ThreadData
));
s
->
macroblocks_base
=
av_mallocz
((
s
->
mb_width
+
s
->
mb_height
*
2
+
1
)
*
sizeof
(
*
s
->
macroblocks
));
s
->
intra4x4_pred_mode_top
=
av_mallocz
(
s
->
mb_width
*
4
);
}
else
// Sliced threading
s
->
macroblocks_base
=
av_mallocz
((
s
->
mb_width
+
2
)
*
(
s
->
mb_height
+
2
)
*
sizeof
(
*
s
->
macroblocks
));
s
->
top_nnz
=
av_mallocz
(
s
->
mb_width
*
sizeof
(
*
s
->
top_nnz
));
s
->
top_border
=
av_mallocz
((
s
->
mb_width
+
1
)
*
sizeof
(
*
s
->
top_border
));
s
->
thread_data
=
av_mallocz
(
MAX_THREADS
*
sizeof
(
VP8ThreadData
));
for
(
i
=
0
;
i
<
MAX_THREADS
;
i
++
)
{
s
->
thread_data
[
i
].
filter_strength
=
av_mallocz
(
s
->
mb_width
*
sizeof
(
*
s
->
thread_data
[
0
].
filter_strength
));
s
->
thread_data
[
i
].
filter_strength
=
av_mallocz
(
s
->
mb_width
*
sizeof
(
*
s
->
thread_data
[
0
].
filter_strength
));
#if HAVE_THREADS
pthread_mutex_init
(
&
s
->
thread_data
[
i
].
lock
,
NULL
);
pthread_cond_init
(
&
s
->
thread_data
[
i
].
cond
,
NULL
);
...
...
@@ -150,7 +153,7 @@ static int update_dimensions(VP8Context *s, int width, int height)
(
!
s
->
intra4x4_pred_mode_top
&&
!
s
->
mb_layout
))
return
AVERROR
(
ENOMEM
);
s
->
macroblocks
=
s
->
macroblocks_base
+
1
;
s
->
macroblocks
=
s
->
macroblocks_base
+
1
;
return
0
;
}
...
...
@@ -207,13 +210,13 @@ static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
s
->
num_coeff_partitions
=
1
<<
vp8_rac_get_uint
(
&
s
->
c
,
2
);
buf
+=
3
*
(
s
->
num_coeff_partitions
-
1
);
buf_size
-=
3
*
(
s
->
num_coeff_partitions
-
1
);
buf
+=
3
*
(
s
->
num_coeff_partitions
-
1
);
buf_size
-=
3
*
(
s
->
num_coeff_partitions
-
1
);
if
(
buf_size
<
0
)
return
-
1
;
for
(
i
=
0
;
i
<
s
->
num_coeff_partitions
-
1
;
i
++
)
{
int
size
=
AV_RL24
(
sizes
+
3
*
i
);
for
(
i
=
0
;
i
<
s
->
num_coeff_partitions
-
1
;
i
++
)
{
int
size
=
AV_RL24
(
sizes
+
3
*
i
);
if
(
buf_size
-
size
<
0
)
return
-
1
;
...
...
@@ -246,13 +249,13 @@ static void get_quants(VP8Context *s)
}
else
base_qi
=
yac_qi
;
s
->
qmat
[
i
].
luma_qmul
[
0
]
=
vp8_dc_qlookup
[
av_clip_uintp2
(
base_qi
+
ydc_delta
,
7
)];
s
->
qmat
[
i
].
luma_qmul
[
1
]
=
vp8_ac_qlookup
[
av_clip_uintp2
(
base_qi
,
7
)];
s
->
qmat
[
i
].
luma_dc_qmul
[
0
]
=
2
*
vp8_dc_qlookup
[
av_clip_uintp2
(
base_qi
+
y2dc_delta
,
7
)]
;
s
->
qmat
[
i
].
luma_qmul
[
0
]
=
vp8_dc_qlookup
[
av_clip_uintp2
(
base_qi
+
ydc_delta
,
7
)];
s
->
qmat
[
i
].
luma_qmul
[
1
]
=
vp8_ac_qlookup
[
av_clip_uintp2
(
base_qi
,
7
)];
s
->
qmat
[
i
].
luma_dc_qmul
[
0
]
=
vp8_dc_qlookup
[
av_clip_uintp2
(
base_qi
+
y2dc_delta
,
7
)]
*
2
;
/* 101581>>16 is equivalent to 155/100 */
s
->
qmat
[
i
].
luma_dc_qmul
[
1
]
=
(
101581
*
vp8_ac_qlookup
[
av_clip_uintp2
(
base_qi
+
y2ac_delta
,
7
)])
>>
16
;
s
->
qmat
[
i
].
chroma_qmul
[
0
]
=
vp8_dc_qlookup
[
av_clip_uintp2
(
base_qi
+
uvdc_delta
,
7
)];
s
->
qmat
[
i
].
chroma_qmul
[
1
]
=
vp8_ac_qlookup
[
av_clip_uintp2
(
base_qi
+
uvac_delta
,
7
)];
s
->
qmat
[
i
].
luma_dc_qmul
[
1
]
=
vp8_ac_qlookup
[
av_clip_uintp2
(
base_qi
+
y2ac_delta
,
7
)]
*
101581
>>
16
;
s
->
qmat
[
i
].
chroma_qmul
[
0
]
=
vp8_dc_qlookup
[
av_clip_uintp2
(
base_qi
+
uvdc_delta
,
7
)];
s
->
qmat
[
i
].
chroma_qmul
[
1
]
=
vp8_ac_qlookup
[
av_clip_uintp2
(
base_qi
+
uvac_delta
,
7
)];
s
->
qmat
[
i
].
luma_dc_qmul
[
1
]
=
FFMAX
(
s
->
qmat
[
i
].
luma_dc_qmul
[
1
],
8
);
s
->
qmat
[
i
].
chroma_qmul
[
0
]
=
FFMIN
(
s
->
qmat
[
i
].
chroma_qmul
[
0
],
132
);
...
...
@@ -317,24 +320,27 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
av_log
(
s
->
avctx
,
AV_LOG_WARNING
,
"Unknown profile %d
\n
"
,
s
->
profile
);
if
(
!
s
->
profile
)
memcpy
(
s
->
put_pixels_tab
,
s
->
vp8dsp
.
put_vp8_epel_pixels_tab
,
sizeof
(
s
->
put_pixels_tab
));
memcpy
(
s
->
put_pixels_tab
,
s
->
vp8dsp
.
put_vp8_epel_pixels_tab
,
sizeof
(
s
->
put_pixels_tab
));
else
// profile 1-3 use bilinear, 4+ aren't defined so whatever
memcpy
(
s
->
put_pixels_tab
,
s
->
vp8dsp
.
put_vp8_bilinear_pixels_tab
,
sizeof
(
s
->
put_pixels_tab
));
memcpy
(
s
->
put_pixels_tab
,
s
->
vp8dsp
.
put_vp8_bilinear_pixels_tab
,
sizeof
(
s
->
put_pixels_tab
));
if
(
header_size
>
buf_size
-
7
*
s
->
keyframe
)
{
if
(
header_size
>
buf_size
-
7
*
s
->
keyframe
)
{
av_log
(
s
->
avctx
,
AV_LOG_ERROR
,
"Header size larger than data provided
\n
"
);
return
AVERROR_INVALIDDATA
;
}
if
(
s
->
keyframe
)
{
if
(
AV_RL24
(
buf
)
!=
0x2a019d
)
{
av_log
(
s
->
avctx
,
AV_LOG_ERROR
,
"Invalid start code 0x%x
\n
"
,
AV_RL24
(
buf
));
av_log
(
s
->
avctx
,
AV_LOG_ERROR
,
"Invalid start code 0x%x
\n
"
,
AV_RL24
(
buf
));
return
AVERROR_INVALIDDATA
;
}
width
=
AV_RL16
(
buf
+
3
)
&
0x3fff
;
height
=
AV_RL16
(
buf
+
5
)
&
0x3fff
;
hscale
=
buf
[
4
]
>>
6
;
vscale
=
buf
[
6
]
>>
6
;
width
=
AV_RL16
(
buf
+
3
)
&
0x3fff
;
height
=
AV_RL16
(
buf
+
5
)
&
0x3fff
;
hscale
=
buf
[
4
]
>>
6
;
vscale
=
buf
[
6
]
>>
6
;
buf
+=
7
;
buf_size
-=
7
;
...
...
@@ -344,11 +350,15 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
s
->
update_golden
=
s
->
update_altref
=
VP56_FRAME_CURRENT
;
for
(
i
=
0
;
i
<
4
;
i
++
)
for
(
j
=
0
;
j
<
16
;
j
++
)
memcpy
(
s
->
prob
->
token
[
i
][
j
],
vp8_token_default_probs
[
i
][
vp8_coeff_band
[
j
]],
memcpy
(
s
->
prob
->
token
[
i
][
j
],
vp8_token_default_probs
[
i
][
vp8_coeff_band
[
j
]],
sizeof
(
s
->
prob
->
token
[
i
][
j
]));
memcpy
(
s
->
prob
->
pred16x16
,
vp8_pred16x16_prob_inter
,
sizeof
(
s
->
prob
->
pred16x16
));
memcpy
(
s
->
prob
->
pred8x8c
,
vp8_pred8x8c_prob_inter
,
sizeof
(
s
->
prob
->
pred8x8c
));
memcpy
(
s
->
prob
->
mvc
,
vp8_mv_default_prob
,
sizeof
(
s
->
prob
->
mvc
));
memcpy
(
s
->
prob
->
pred16x16
,
vp8_pred16x16_prob_inter
,
sizeof
(
s
->
prob
->
pred16x16
));
memcpy
(
s
->
prob
->
pred8x8c
,
vp8_pred8x8c_prob_inter
,
sizeof
(
s
->
prob
->
pred8x8c
));
memcpy
(
s
->
prob
->
mvc
,
vp8_mv_default_prob
,
sizeof
(
s
->
prob
->
mvc
));
memset
(
&
s
->
segmentation
,
0
,
sizeof
(
s
->
segmentation
));
memset
(
&
s
->
lf_delta
,
0
,
sizeof
(
s
->
lf_delta
));
}
...
...
@@ -382,10 +392,9 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
}
if
(
!
s
->
macroblocks_base
||
/* first frame */
width
!=
s
->
avctx
->
width
||
height
!=
s
->
avctx
->
height
)
{
width
!=
s
->
avctx
->
width
||
height
!=
s
->
avctx
->
height
)
if
((
ret
=
update_dimensions
(
s
,
width
,
height
))
<
0
)
return
ret
;
}
get_quants
(
s
);
...
...
@@ -405,7 +414,7 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
for
(
i
=
0
;
i
<
4
;
i
++
)
for
(
j
=
0
;
j
<
8
;
j
++
)
for
(
k
=
0
;
k
<
3
;
k
++
)
for
(
l
=
0
;
l
<
NUM_DCT_TOKENS
-
1
;
l
++
)
for
(
l
=
0
;
l
<
NUM_DCT_TOKENS
-
1
;
l
++
)
if
(
vp56_rac_get_prob_branchy
(
c
,
vp8_token_update_probs
[
i
][
j
][
k
][
l
]))
{
int
prob
=
vp8_rac_get_uint
(
c
,
8
);
for
(
m
=
0
;
vp8_coeff_band_indexes
[
j
][
m
]
>=
0
;
m
++
)
...
...
@@ -437,7 +446,8 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
return
0
;
}
static
av_always_inline
void
clamp_mv
(
VP8Context
*
s
,
VP56mv
*
dst
,
const
VP56mv
*
src
)
static
av_always_inline
void
clamp_mv
(
VP8Context
*
s
,
VP56mv
*
dst
,
const
VP56mv
*
src
)
{
dst
->
x
=
av_clip
(
src
->
x
,
s
->
mv_min
.
x
,
s
->
mv_max
.
x
);
dst
->
y
=
av_clip
(
src
->
y
,
s
->
mv_min
.
y
,
s
->
mv_max
.
y
);
...
...
@@ -461,13 +471,13 @@ static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
x
+=
8
;
}
else
{
// small_mvtree
const
uint8_t
*
ps
=
p
+
2
;
const
uint8_t
*
ps
=
p
+
2
;
bit
=
vp56_rac_get_prob
(
c
,
*
ps
);
ps
+=
1
+
3
*
bit
;
x
+=
4
*
bit
;
ps
+=
1
+
3
*
bit
;
x
+=
4
*
bit
;
bit
=
vp56_rac_get_prob
(
c
,
*
ps
);
ps
+=
1
+
bit
;
x
+=
2
*
bit
;
x
+=
2
*
bit
;
x
+=
vp56_rac_get_prob
(
c
,
*
ps
);
}
...
...
@@ -478,10 +488,10 @@ static av_always_inline
const
uint8_t
*
get_submv_prob
(
uint32_t
left
,
uint32_t
top
)
{
if
(
left
==
top
)
return
vp8_submv_prob
[
4
-
!!
left
];
return
vp8_submv_prob
[
4
-
!!
left
];
if
(
!
top
)
return
vp8_submv_prob
[
2
];
return
vp8_submv_prob
[
1
-
!!
left
];
return
vp8_submv_prob
[
1
-
!!
left
];
}
/**
...
...
@@ -495,9 +505,8 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay
int
n
,
num
;
VP8Macroblock
*
top_mb
;
VP8Macroblock
*
left_mb
=
&
mb
[
-
1
];
const
uint8_t
*
mbsplits_left
=
vp8_mbsplits
[
left_mb
->
partitioning
],
*
mbsplits_top
,
*
mbsplits_cur
,
*
firstidx
;
const
uint8_t
*
mbsplits_left
=
vp8_mbsplits
[
left_mb
->
partitioning
];
const
uint8_t
*
mbsplits_top
,
*
mbsplits_cur
,
*
firstidx
;
VP56mv
*
top_mv
;
VP56mv
*
left_mv
=
left_mb
->
bmv
;
VP56mv
*
cur_mv
=
mb
->
bmv
;
...
...
@@ -505,23 +514,22 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay
if
(
!
layout
)
// layout is inlined, s->mb_layout is not
top_mb
=
&
mb
[
2
];
else
top_mb
=
&
mb
[
-
s
->
mb_width
-
1
];
top_mb
=
&
mb
[
-
s
->
mb_width
-
1
];
mbsplits_top
=
vp8_mbsplits
[
top_mb
->
partitioning
];
top_mv
=
top_mb
->
bmv
;
top_mv
=
top_mb
->
bmv
;
if
(
vp56_rac_get_prob_branchy
(
c
,
vp8_mbsplit_prob
[
0
]))
{
if
(
vp56_rac_get_prob_branchy
(
c
,
vp8_mbsplit_prob
[
1
]))
{
if
(
vp56_rac_get_prob_branchy
(
c
,
vp8_mbsplit_prob
[
1
]))
part_idx
=
VP8_SPLITMVMODE_16x8
+
vp56_rac_get_prob
(
c
,
vp8_mbsplit_prob
[
2
]);
}
else
{
else
part_idx
=
VP8_SPLITMVMODE_8x8
;
}
}
else
{
part_idx
=
VP8_SPLITMVMODE_4x4
;
}
num
=
vp8_mbsplit_count
[
part_idx
];
mbsplits_cur
=
vp8_mbsplits
[
part_idx
],
firstidx
=
vp8_mbfirstidx
[
part_idx
];
num
=
vp8_mbsplit_count
[
part_idx
];
mbsplits_cur
=
vp8_mbsplits
[
part_idx
],
firstidx
=
vp8_mbfirstidx
[
part_idx
];
mb
->
partitioning
=
part_idx
;
for
(
n
=
0
;
n
<
num
;
n
++
)
{
...
...
@@ -532,7 +540,7 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay
if
(
!
(
k
&
3
))
left
=
AV_RN32A
(
&
left_mv
[
mbsplits_left
[
k
+
3
]]);
else
left
=
AV_RN32A
(
&
cur_mv
[
mbsplits_cur
[
k
-
1
]]);
left
=
AV_RN32A
(
&
cur_mv
[
mbsplits_cur
[
k
-
1
]]);
if
(
k
<=
3
)
above
=
AV_RN32A
(
&
top_mv
[
mbsplits_top
[
k
+
12
]]);
else
...
...
@@ -560,11 +568,12 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay
}
static
av_always_inline
void
decode_mvs
(
VP8Context
*
s
,
VP8Macroblock
*
mb
,
int
mb_x
,
int
mb_y
,
int
layout
)
void
decode_mvs
(
VP8Context
*
s
,
VP8Macroblock
*
mb
,
int
mb_x
,
int
mb_y
,
int
layout
)
{
VP8Macroblock
*
mb_edge
[
3
]
=
{
0
/* top */
,
VP8Macroblock
*
mb_edge
[
3
]
=
{
0
/* top */
,
mb
-
1
/* left */
,
0
/* top-left */
};
0
/* top-left */
};
enum
{
CNT_ZERO
,
CNT_NEAREST
,
CNT_NEAR
,
CNT_SPLITMV
};
enum
{
VP8_EDGE_TOP
,
VP8_EDGE_LEFT
,
VP8_EDGE_TOPLEFT
};
int
idx
=
CNT_ZERO
;
...
...
@@ -577,10 +586,9 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout
if
(
!
layout
)
{
// layout is inlined (s->mb_layout is not)
mb_edge
[
0
]
=
mb
+
2
;
mb_edge
[
2
]
=
mb
+
1
;
}
else
{
mb_edge
[
0
]
=
mb
-
s
->
mb_width
-
1
;
mb_edge
[
2
]
=
mb
-
s
->
mb_width
-
2
;
}
else
{
mb_edge
[
0
]
=
mb
-
s
->
mb_width
-
1
;
mb_edge
[
2
]
=
mb
-
s
->
mb_width
-
2
;
}
AV_ZERO32
(
&
near_mv
[
0
]);
...
...
@@ -588,24 +596,25 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout
AV_ZERO32
(
&
near_mv
[
2
]);
/* Process MB on top, left and top-left */
#define MV_EDGE_CHECK(n)\
{\
VP8Macroblock *edge = mb_edge[n];\
int edge_ref = edge->ref_frame;\
if (edge_ref != VP56_FRAME_CURRENT) {\
uint32_t mv = AV_RN32A(&edge->mv);\
if (mv) {\
if (cur_sign_bias != sign_bias[edge_ref]) {\
/* SWAR negate of the values in mv. */
\
mv = ~mv;\
mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
}\
if (!n || mv != AV_RN32A(&near_mv[idx]))\
AV_WN32A(&near_mv[++idx], mv);\
cnt[idx] += 1 + (n != 2);\
} else\
cnt[CNT_ZERO] += 1 + (n != 2);\
}\
#define MV_EDGE_CHECK(n) \
{ \
VP8Macroblock *edge = mb_edge[n]; \
int edge_ref = edge->ref_frame; \
if (edge_ref != VP56_FRAME_CURRENT) { \
uint32_t mv = AV_RN32A(&edge->mv); \
if (mv) { \
if (cur_sign_bias != sign_bias[edge_ref]) { \
/* SWAR negate of the values in mv. */
\
mv = ~mv; \
mv = ((mv & 0x7fff7fff) + \
0x00010001) ^ (mv & 0x80008000); \
} \
if (!n || mv != AV_RN32A(&near_mv[idx])) \
AV_WN32A(&near_mv[++idx], mv); \
cnt[idx] += 1 + (n != 2); \
} else \
cnt[CNT_ZERO] += 1 + (n != 2); \
} \
}
MV_EDGE_CHECK
(
0
)
...
...
@@ -617,7 +626,8 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout
mb
->
mode
=
VP8_MVMODE_MV
;
/* If we have three distinct MVs, merge first and last if they're the same */
if
(
cnt
[
CNT_SPLITMV
]
&&
AV_RN32A
(
&
near_mv
[
1
+
VP8_EDGE_TOP
])
==
AV_RN32A
(
&
near_mv
[
1
+
VP8_EDGE_TOPLEFT
]))
if
(
cnt
[
CNT_SPLITMV
]
&&
AV_RN32A
(
&
near_mv
[
1
+
VP8_EDGE_TOP
])
==
AV_RN32A
(
&
near_mv
[
1
+
VP8_EDGE_TOPLEFT
]))
cnt
[
CNT_NEAREST
]
+=
1
;
/* Swap near and nearest if necessary */
...
...
@@ -628,7 +638,6 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout
if
(
vp56_rac_get_prob_branchy
(
c
,
vp8_mode_contexts
[
cnt
[
CNT_NEAREST
]][
1
]))
{
if
(
vp56_rac_get_prob_branchy
(
c
,
vp8_mode_contexts
[
cnt
[
CNT_NEAR
]][
2
]))
{
/* Choose the best mv out of 0,0 and the nearest mv */
clamp_mv
(
s
,
&
mb
->
mv
,
&
near_mv
[
CNT_ZERO
+
(
cnt
[
CNT_NEAREST
]
>=
cnt
[
CNT_ZERO
])]);
cnt
[
CNT_SPLITMV
]
=
((
mb_edge
[
VP8_EDGE_LEFT
]
->
mode
==
VP8_MVMODE_SPLIT
)
+
...
...
@@ -637,10 +646,10 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout
if
(
vp56_rac_get_prob_branchy
(
c
,
vp8_mode_contexts
[
cnt
[
CNT_SPLITMV
]][
3
]))
{
mb
->
mode
=
VP8_MVMODE_SPLIT
;
mb
->
mv
=
mb
->
bmv
[
decode_splitmvs
(
s
,
c
,
mb
,
layout
)
-
1
];
mb
->
mv
=
mb
->
bmv
[
decode_splitmvs
(
s
,
c
,
mb
,
layout
)
-
1
];
}
else
{
mb
->
mv
.
y
+=
read_mv_component
(
c
,
s
->
prob
->
mvc
[
0
]);
mb
->
mv
.
x
+=
read_mv_component
(
c
,
s
->
prob
->
mvc
[
1
]);
mb
->
mv
.
y
+=
read_mv_component
(
c
,
s
->
prob
->
mvc
[
0
]);
mb
->
mv
.
x
+=
read_mv_component
(
c
,
s
->
prob
->
mvc
[
1
]);
mb
->
bmv
[
0
]
=
mb
->
mv
;
}
}
else
{
...
...
@@ -670,8 +679,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
}
if
(
keyframe
)
{
int
x
,
y
;
uint8_t
*
top
;
uint8_t
*
const
left
=
s
->
intra4x4_pred_mode_left
;
uint8_t
*
top
;
uint8_t
*
const
left
=
s
->
intra4x4_pred_mode_left
;
if
(
layout
==
1
)
top
=
mb
->
intra4x4_pred_mode_top
;
else
...
...
@@ -679,16 +688,17 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
for
(
y
=
0
;
y
<
4
;
y
++
)
{
for
(
x
=
0
;
x
<
4
;
x
++
)
{
const
uint8_t
*
ctx
;
ctx
=
vp8_pred4x4_prob_intra
[
top
[
x
]][
left
[
y
]];
ctx
=
vp8_pred4x4_prob_intra
[
top
[
x
]][
left
[
y
]];
*
intra4x4
=
vp8_rac_get_tree
(
c
,
vp8_pred4x4_tree
,
ctx
);
left
[
y
]
=
top
[
x
]
=
*
intra4x4
;
left
[
y
]
=
top
[
x
]
=
*
intra4x4
;
intra4x4
++
;
}
}
}
else
{
int
i
;
for
(
i
=
0
;
i
<
16
;
i
++
)
intra4x4
[
i
]
=
vp8_rac_get_tree
(
c
,
vp8_pred4x4_tree
,
vp8_pred4x4_prob_inter
);
intra4x4
[
i
]
=
vp8_rac_get_tree
(
c
,
vp8_pred4x4_tree
,
vp8_pred4x4_prob_inter
);
}
}
...
...
@@ -707,7 +717,8 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
mb
->
skip
=
s
->
mbskip_enabled
?
vp56_rac_get_prob
(
c
,
s
->
prob
->
mbskip
)
:
0
;
if
(
s
->
keyframe
)
{
mb
->
mode
=
vp8_rac_get_tree
(
c
,
vp8_pred16x16_tree_intra
,
vp8_pred16x16_prob_intra
);
mb
->
mode
=
vp8_rac_get_tree
(
c
,
vp8_pred16x16_tree_intra
,
vp8_pred16x16_prob_intra
);
if
(
mb
->
mode
==
MODE_I4x4
)
{
decode_intra4x4_modes
(
s
,
c
,
mb
,
mb_x
,
1
,
layout
);
...
...
@@ -717,19 +728,21 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
AV_WN32A
(
mb
->
intra4x4_pred_mode_top
,
modes
);
else
AV_WN32A
(
s
->
intra4x4_pred_mode_top
+
4
*
mb_x
,
modes
);
AV_WN32A
(
s
->
intra4x4_pred_mode_left
,
modes
);
AV_WN32A
(
s
->
intra4x4_pred_mode_left
,
modes
);
}
mb
->
chroma_pred_mode
=
vp8_rac_get_tree
(
c
,
vp8_pred8x8c_tree
,
vp8_pred8x8c_prob_intra
);
mb
->
ref_frame
=
VP56_FRAME_CURRENT
;
mb
->
chroma_pred_mode
=
vp8_rac_get_tree
(
c
,
vp8_pred8x8c_tree
,
vp8_pred8x8c_prob_intra
);
mb
->
ref_frame
=
VP56_FRAME_CURRENT
;
}
else
if
(
vp56_rac_get_prob_branchy
(
c
,
s
->
prob
->
intra
))
{
// inter MB, 16.2
if
(
vp56_rac_get_prob_branchy
(
c
,
s
->
prob
->
last
))
mb
->
ref_frame
=
vp56_rac_get_prob
(
c
,
s
->
prob
->
golden
)
?
VP56_FRAME_GOLDEN2
/* altref */
:
VP56_FRAME_GOLDEN
;
mb
->
ref_frame
=
vp56_rac_get_prob
(
c
,
s
->
prob
->
golden
)
?
VP56_FRAME_GOLDEN2
/* altref */
:
VP56_FRAME_GOLDEN
;
else
mb
->
ref_frame
=
VP56_FRAME_PREVIOUS
;
s
->
ref_count
[
mb
->
ref_frame
-
1
]
++
;
s
->
ref_count
[
mb
->
ref_frame
-
1
]
++
;
// motion vectors, 16.3
decode_mvs
(
s
,
mb
,
mb_x
,
mb_y
,
layout
);
...
...
@@ -740,26 +753,29 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
if
(
mb
->
mode
==
MODE_I4x4
)
decode_intra4x4_modes
(
s
,
c
,
mb
,
mb_x
,
0
,
layout
);
mb
->
chroma_pred_mode
=
vp8_rac_get_tree
(
c
,
vp8_pred8x8c_tree
,
s
->
prob
->
pred8x8c
);
mb
->
ref_frame
=
VP56_FRAME_CURRENT
;
mb
->
partitioning
=
VP8_SPLITMVMODE_NONE
;
mb
->
chroma_pred_mode
=
vp8_rac_get_tree
(
c
,
vp8_pred8x8c_tree
,
s
->
prob
->
pred8x8c
);
mb
->
ref_frame
=
VP56_FRAME_CURRENT
;
mb
->
partitioning
=
VP8_SPLITMVMODE_NONE
;
AV_ZERO32
(
&
mb
->
bmv
[
0
]);
}
}
#ifndef decode_block_coeffs_internal
/**
* @param r arithmetic bitstream reader context
* @param r
arithmetic bitstream reader context
* @param block destination for block coefficients
* @param probs probabilities to use when reading trees from the bitstream
* @param i initial coeff index, 0 unless a separate DC block is coded
* @param qmul array holding the dc/ac dequant factor at position 0/1
* @param i initial coeff index, 0 unless a separate DC block is coded
* @param qmul array holding the dc/ac dequant factor at position 0/1
*
* @return 0 if no coeffs were decoded
* otherwise, the index of the last coeff decoded plus one
*/
static
int
decode_block_coeffs_internal
(
VP56RangeCoder
*
r
,
int16_t
block
[
16
],
uint8_t
probs
[
16
][
3
][
NUM_DCT_TOKENS
-
1
],
int
i
,
uint8_t
*
token_prob
,
int16_t
qmul
[
2
])
uint8_t
probs
[
16
][
3
][
NUM_DCT_TOKENS
-
1
],
int
i
,
uint8_t
*
token_prob
,
int16_t
qmul
[
2
])
{
VP56RangeCoder
c
=
*
r
;
goto
skip_eob
;
...
...
@@ -778,7 +794,7 @@ skip_eob:
if
(
!
vp56_rac_get_prob_branchy
(
&
c
,
token_prob
[
2
]))
{
// DCT_1
coeff
=
1
;
token_prob
=
probs
[
i
+
1
][
1
];
token_prob
=
probs
[
i
+
1
][
1
];
}
else
{
if
(
!
vp56_rac_get_prob_branchy
(
&
c
,
token_prob
[
3
]))
{
// DCT 2,3,4
coeff
=
vp56_rac_get_prob_branchy
(
&
c
,
token_prob
[
4
]);
...
...
@@ -789,21 +805,21 @@ skip_eob:
// DCT_CAT*
if
(
!
vp56_rac_get_prob_branchy
(
&
c
,
token_prob
[
6
]))
{
if
(
!
vp56_rac_get_prob_branchy
(
&
c
,
token_prob
[
7
]))
{
// DCT_CAT1
coeff
=
5
+
vp56_rac_get_prob
(
&
c
,
vp8_dct_cat1_prob
[
0
]);
coeff
=
5
+
vp56_rac_get_prob
(
&
c
,
vp8_dct_cat1_prob
[
0
]);
}
else
{
// DCT_CAT2
coeff
=
7
;
coeff
+=
vp56_rac_get_prob
(
&
c
,
vp8_dct_cat2_prob
[
0
])
<<
1
;
coeff
+=
vp56_rac_get_prob
(
&
c
,
vp8_dct_cat2_prob
[
1
]);
}
}
else
{
// DCT_CAT3 and up
int
a
=
vp56_rac_get_prob
(
&
c
,
token_prob
[
8
]);
int
b
=
vp56_rac_get_prob
(
&
c
,
token_prob
[
9
+
a
]);
int
cat
=
(
a
<<
1
)
+
b
;
coeff
=
3
+
(
8
<<
cat
);
int
a
=
vp56_rac_get_prob
(
&
c
,
token_prob
[
8
]);
int
b
=
vp56_rac_get_prob
(
&
c
,
token_prob
[
9
+
a
]);
int
cat
=
(
a
<<
1
)
+
b
;
coeff
=
3
+
(
8
<<
cat
);
coeff
+=
vp8_rac_get_coeff
(
&
c
,
ff_vp8_dct_cat_prob
[
cat
]);
}
}
token_prob
=
probs
[
i
+
1
][
2
];
token_prob
=
probs
[
i
+
1
][
2
];
}
block
[
zigzag_scan
[
i
]]
=
(
vp8_rac_get
(
&
c
)
?
-
coeff
:
coeff
)
*
qmul
[
!!
i
];
}
while
(
++
i
<
16
);
...
...
@@ -814,19 +830,20 @@ skip_eob:
#endif
/**
* @param c arithmetic bitstream reader context
* @param block destination for block coefficients
* @param probs probabilities to use when reading trees from the bitstream
* @param i initial coeff index, 0 unless a separate DC block is coded
* @param c
arithmetic bitstream reader context
* @param block
destination for block coefficients
* @param probs
probabilities to use when reading trees from the bitstream
* @param i
initial coeff index, 0 unless a separate DC block is coded
* @param zero_nhood the initial prediction context for number of surrounding
* all-zero blocks (only left/top, so 0-2)
* @param qmul array holding the dc/ac dequant factor at position 0/1
* @param qmul array holding the dc/ac dequant factor at position 0/1
*
* @return 0 if no coeffs were decoded
* otherwise, the index of the last coeff decoded plus one
*/
static
av_always_inline
int
decode_block_coeffs
(
VP56RangeCoder
*
c
,
int16_t
block
[
16
],
uint8_t
probs
[
16
][
3
][
NUM_DCT_TOKENS
-
1
],
uint8_t
probs
[
16
][
3
][
NUM_DCT_TOKENS
-
1
],
int
i
,
int
zero_nhood
,
int16_t
qmul
[
2
])
{
uint8_t
*
token_prob
=
probs
[
i
][
zero_nhood
];
...
...
@@ -836,8 +853,8 @@ int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
}
static
av_always_inline
void
decode_mb_coeffs
(
VP8Context
*
s
,
VP8ThreadData
*
td
,
VP56RangeCoder
*
c
,
VP8Macroblock
*
mb
,
uint8_t
t_nnz
[
9
],
uint8_t
l_nnz
[
9
])
void
decode_mb_coeffs
(
VP8Context
*
s
,
VP8ThreadData
*
td
,
VP56RangeCoder
*
c
,
VP8Macroblock
*
mb
,
uint8_t
t_nnz
[
9
],
uint8_t
l_nnz
[
9
])
{
int
i
,
x
,
y
,
luma_start
=
0
,
luma_ctx
=
3
;
int
nnz_pred
,
nnz
,
nnz_total
=
0
;
...
...
@@ -848,28 +865,31 @@ void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Ma
nnz_pred
=
t_nnz
[
8
]
+
l_nnz
[
8
];
// decode DC values and do hadamard
nnz
=
decode_block_coeffs
(
c
,
td
->
block_dc
,
s
->
prob
->
token
[
1
],
0
,
nnz_pred
,
s
->
qmat
[
segment
].
luma_dc_qmul
);
nnz
=
decode_block_coeffs
(
c
,
td
->
block_dc
,
s
->
prob
->
token
[
1
],
0
,
nnz_pred
,
s
->
qmat
[
segment
].
luma_dc_qmul
);
l_nnz
[
8
]
=
t_nnz
[
8
]
=
!!
nnz
;
if
(
nnz
)
{
nnz_total
+=
nnz
;
block_dc
=
1
;
block_dc
=
1
;
if
(
nnz
==
1
)
s
->
vp8dsp
.
vp8_luma_dc_wht_dc
(
td
->
block
,
td
->
block_dc
);
else
s
->
vp8dsp
.
vp8_luma_dc_wht
(
td
->
block
,
td
->
block_dc
);
}
luma_start
=
1
;
luma_ctx
=
0
;
luma_ctx
=
0
;
}
// luma blocks
for
(
y
=
0
;
y
<
4
;
y
++
)
for
(
x
=
0
;
x
<
4
;
x
++
)
{
nnz_pred
=
l_nnz
[
y
]
+
t_nnz
[
x
];
nnz
=
decode_block_coeffs
(
c
,
td
->
block
[
y
][
x
],
s
->
prob
->
token
[
luma_ctx
],
luma_start
,
nnz_pred
,
s
->
qmat
[
segment
].
luma_qmul
);
// nnz+block_dc may be one more than the actual last index, but we don't care
nnz
=
decode_block_coeffs
(
c
,
td
->
block
[
y
][
x
],
s
->
prob
->
token
[
luma_ctx
],
luma_start
,
nnz_pred
,
s
->
qmat
[
segment
].
luma_qmul
);
/* nnz+block_dc may be one more than the actual last index,
* but we don't care */
td
->
non_zero_count_cache
[
y
][
x
]
=
nnz
+
block_dc
;
t_nnz
[
x
]
=
l_nnz
[
y
]
=
!!
nnz
;
nnz_total
+=
nnz
;
...
...
@@ -881,12 +901,14 @@ void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Ma
for
(
i
=
4
;
i
<
6
;
i
++
)
for
(
y
=
0
;
y
<
2
;
y
++
)
for
(
x
=
0
;
x
<
2
;
x
++
)
{
nnz_pred
=
l_nnz
[
i
+
2
*
y
]
+
t_nnz
[
i
+
2
*
x
];
nnz
=
decode_block_coeffs
(
c
,
td
->
block
[
i
][(
y
<<
1
)
+
x
],
s
->
prob
->
token
[
2
],
0
,
nnz_pred
,
s
->
qmat
[
segment
].
chroma_qmul
);
td
->
non_zero_count_cache
[
i
][(
y
<<
1
)
+
x
]
=
nnz
;
t_nnz
[
i
+
2
*
x
]
=
l_nnz
[
i
+
2
*
y
]
=
!!
nnz
;
nnz_total
+=
nnz
;
nnz_pred
=
l_nnz
[
i
+
2
*
y
]
+
t_nnz
[
i
+
2
*
x
];
nnz
=
decode_block_coeffs
(
c
,
td
->
block
[
i
][(
y
<<
1
)
+
x
],
s
->
prob
->
token
[
2
],
0
,
nnz_pred
,
s
->
qmat
[
segment
].
chroma_qmul
);
td
->
non_zero_count_cache
[
i
][(
y
<<
1
)
+
x
]
=
nnz
;
t_nnz
[
i
+
2
*
x
]
=
l_nnz
[
i
+
2
*
y
]
=
!!
nnz
;
nnz_total
+=
nnz
;
}
// if there were no coded coeffs despite the macroblock not being marked skip,
...
...
@@ -897,65 +919,67 @@ void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Ma
}
static
av_always_inline
void
backup_mb_border
(
uint8_t
*
top_border
,
uint8_t
*
src_y
,
uint8_t
*
src_cb
,
uint8_t
*
src_cr
,
void
backup_mb_border
(
uint8_t
*
top_border
,
uint8_t
*
src_y
,
uint8_t
*
src_cb
,
uint8_t
*
src_cr
,
int
linesize
,
int
uvlinesize
,
int
simple
)
{
AV_COPY128
(
top_border
,
src_y
+
15
*
linesize
);
AV_COPY128
(
top_border
,
src_y
+
15
*
linesize
);
if
(
!
simple
)
{
AV_COPY64
(
top_border
+
16
,
src_cb
+
7
*
uvlinesize
);
AV_COPY64
(
top_border
+
24
,
src_cr
+
7
*
uvlinesize
);
AV_COPY64
(
top_border
+
16
,
src_cb
+
7
*
uvlinesize
);
AV_COPY64
(
top_border
+
24
,
src_cr
+
7
*
uvlinesize
);
}
}
static
av_always_inline
void
xchg_mb_border
(
uint8_t
*
top_border
,
uint8_t
*
src_y
,
uint8_t
*
src_cb
,
uint8_t
*
src_cr
,
int
linesize
,
int
uvlinesize
,
int
mb_x
,
int
mb_y
,
int
mb_width
,
int
simple
,
int
xchg
)
void
xchg_mb_border
(
uint8_t
*
top_border
,
uint8_t
*
src_y
,
uint8_t
*
src_cb
,
uint8_t
*
src_cr
,
int
linesize
,
int
uvlinesize
,
int
mb_x
,
int
mb_y
,
int
mb_width
,
int
simple
,
int
xchg
)
{
uint8_t
*
top_border_m1
=
top_border
-
32
;
// for TL prediction
src_y
-=
linesize
;
uint8_t
*
top_border_m1
=
top_border
-
32
;
// for TL prediction
src_y
-=
linesize
;
src_cb
-=
uvlinesize
;
src_cr
-=
uvlinesize
;
#define XCHG(a,b,xchg) do { \
if (xchg) AV_SWAP64(b,a); \
else AV_COPY64(b,a); \
#define XCHG(a, b, xchg) \
do { \
if (xchg) \
AV_SWAP64(b, a); \
else \
AV_COPY64(b, a); \
} while (0)
XCHG
(
top_border_m1
+
8
,
src_y
-
8
,
xchg
);
XCHG
(
top_border
,
src_y
,
xchg
);
XCHG
(
top_border
+
8
,
src_y
+
8
,
1
);
if
(
mb_x
<
mb_width
-
1
)
XCHG
(
top_border
+
32
,
src_y
+
16
,
1
);
XCHG
(
top_border_m1
+
8
,
src_y
-
8
,
xchg
);
XCHG
(
top_border
,
src_y
,
xchg
);
XCHG
(
top_border
+
8
,
src_y
+
8
,
1
);
if
(
mb_x
<
mb_width
-
1
)
XCHG
(
top_border
+
32
,
src_y
+
16
,
1
);
// only copy chroma for normal loop filter
// or to initialize the top row to 127
if
(
!
simple
||
!
mb_y
)
{
XCHG
(
top_border_m1
+
16
,
src_cb
-
8
,
xchg
);
XCHG
(
top_border_m1
+
24
,
src_cr
-
8
,
xchg
);
XCHG
(
top_border
+
16
,
src_cb
,
1
);
XCHG
(
top_border
+
24
,
src_cr
,
1
);
XCHG
(
top_border_m1
+
16
,
src_cb
-
8
,
xchg
);
XCHG
(
top_border_m1
+
24
,
src_cr
-
8
,
xchg
);
XCHG
(
top_border
+
16
,
src_cb
,
1
);
XCHG
(
top_border
+
24
,
src_cr
,
1
);
}
}
static
av_always_inline
int
check_dc_pred8x8_mode
(
int
mode
,
int
mb_x
,
int
mb_y
)
{
if
(
!
mb_x
)
{
if
(
!
mb_x
)
return
mb_y
?
TOP_DC_PRED8x8
:
DC_128_PRED8x8
;
}
else
{
else
return
mb_y
?
mode
:
LEFT_DC_PRED8x8
;
}
}
static
av_always_inline
int
check_tm_pred8x8_mode
(
int
mode
,
int
mb_x
,
int
mb_y
)
{
if
(
!
mb_x
)
{
if
(
!
mb_x
)
return
mb_y
?
VERT_PRED8x8
:
DC_129_PRED8x8
;
}
else
{
else
return
mb_y
?
mode
:
HOR_PRED8x8
;
}
}
static
av_always_inline
...
...
@@ -968,7 +992,7 @@ int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
return
!
mb_y
?
DC_127_PRED8x8
:
mode
;
case
HOR_PRED8x8
:
return
!
mb_x
?
DC_129_PRED8x8
:
mode
;
case
PLANE_PRED8x8
/*TM*/
:
case
PLANE_PRED8x8
:
/* TM */
return
check_tm_pred8x8_mode
(
mode
,
mb_x
,
mb_y
);
}
return
mode
;
...
...
@@ -1007,7 +1031,8 @@ int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf
return
!
mb_x
?
DC_129_PRED
:
mode
;
case
TM_VP8_PRED
:
return
check_tm_pred4x4_mode
(
mode
,
mb_x
,
mb_y
);
case
DC_PRED
:
// 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
case
DC_PRED
:
/* 4x4 DC doesn't use the same "H.264-style" exceptions
* as 16x16/8x8 DC */
case
DIAG_DOWN_RIGHT_PRED
:
case
VERT_RIGHT_PRED
:
case
HOR_DOWN_PRED
:
...
...
@@ -1025,10 +1050,10 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
int
x
,
y
,
mode
,
nnz
;
uint32_t
tr
;
/
/ for the first row, we need to run xchg_mb_border to init the top edge to 127
// otherwise, skip it if we aren't going to deblock
/
* for the first row, we need to run xchg_mb_border to init the top edge
* to 127 otherwise, skip it if we aren't going to deblock */
if
(
mb_y
&&
(
s
->
deblock_filter
||
!
mb_y
)
&&
td
->
thread_nr
==
0
)
xchg_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
dst
[
1
],
dst
[
2
],
xchg_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
dst
[
1
],
dst
[
2
],
s
->
linesize
,
s
->
uvlinesize
,
mb_x
,
mb_y
,
s
->
mb_width
,
s
->
filter
.
simple
,
1
);
...
...
@@ -1046,10 +1071,9 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
// if we're on the right edge of the frame, said edge is extended
// from the top macroblock
if
(
mb_y
&&
mb_x
==
s
->
mb_width
-
1
)
{
tr
=
tr_right
[
-
1
]
*
0x01010101u
;
tr_right
=
(
uint8_t
*
)
&
tr
;
if
(
mb_y
&&
mb_x
==
s
->
mb_width
-
1
)
{
tr
=
tr_right
[
-
1
]
*
0x01010101u
;
tr_right
=
(
uint8_t
*
)
&
tr
;
}
if
(
mb
->
skip
)
...
...
@@ -1059,27 +1083,29 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
uint8_t
*
topright
=
ptr
+
4
-
s
->
linesize
;
for
(
x
=
0
;
x
<
4
;
x
++
)
{
int
copy
=
0
,
linesize
=
s
->
linesize
;
uint8_t
*
dst
=
ptr
+
4
*
x
;
DECLARE_ALIGNED
(
4
,
uint8_t
,
copy_dst
)[
5
*
8
];
uint8_t
*
dst
=
ptr
+
4
*
x
;
DECLARE_ALIGNED
(
4
,
uint8_t
,
copy_dst
)[
5
*
8
];
if
((
y
==
0
||
x
==
3
)
&&
mb_y
==
0
)
{
topright
=
tr_top
;
}
else
if
(
x
==
3
)
topright
=
tr_right
;
mode
=
check_intra_pred4x4_mode_emuedge
(
intra4x4
[
x
],
mb_x
+
x
,
mb_y
+
y
,
&
copy
);
mode
=
check_intra_pred4x4_mode_emuedge
(
intra4x4
[
x
],
mb_x
+
x
,
mb_y
+
y
,
&
copy
);
if
(
copy
)
{
dst
=
copy_dst
+
12
;
dst
=
copy_dst
+
12
;
linesize
=
8
;
if
(
!
(
mb_y
+
y
))
{
copy_dst
[
3
]
=
127U
;
AV_WN32A
(
copy_dst
+
4
,
127U
*
0x01010101U
);
AV_WN32A
(
copy_dst
+
4
,
127U
*
0x01010101U
);
}
else
{
AV_COPY32
(
copy_dst
+
4
,
ptr
+
4
*
x
-
s
->
linesize
);
AV_COPY32
(
copy_dst
+
4
,
ptr
+
4
*
x
-
s
->
linesize
);
if
(
!
(
mb_x
+
x
))
{
copy_dst
[
3
]
=
129U
;
}
else
{
copy_dst
[
3
]
=
ptr
[
4
*
x
-
s
->
linesize
-
1
];
copy_dst
[
3
]
=
ptr
[
4
*
x
-
s
->
linesize
-
1
];
}
}
if
(
!
(
mb_x
+
x
))
{
...
...
@@ -1088,31 +1114,33 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
copy_dst
[
27
]
=
copy_dst
[
35
]
=
129U
;
}
else
{
copy_dst
[
11
]
=
ptr
[
4
*
x
-
1
];
copy_dst
[
19
]
=
ptr
[
4
*
x
+
s
->
linesize
-
1
];
copy_dst
[
27
]
=
ptr
[
4
*
x
+
s
->
linesize
*
2
-
1
];
copy_dst
[
35
]
=
ptr
[
4
*
x
+
s
->
linesize
*
3
-
1
];
copy_dst
[
11
]
=
ptr
[
4
*
x
-
1
];
copy_dst
[
19
]
=
ptr
[
4
*
x
+
s
->
linesize
-
1
];
copy_dst
[
27
]
=
ptr
[
4
*
x
+
s
->
linesize
*
2
-
1
];
copy_dst
[
35
]
=
ptr
[
4
*
x
+
s
->
linesize
*
3
-
1
];
}
}
s
->
hpc
.
pred4x4
[
mode
](
dst
,
topright
,
linesize
);
if
(
copy
)
{
AV_COPY32
(
ptr
+
4
*
x
,
copy_dst
+
12
);
AV_COPY32
(
ptr
+
4
*
x
+
s
->
linesize
,
copy_dst
+
20
);
AV_COPY32
(
ptr
+
4
*
x
+
s
->
linesize
*
2
,
copy_dst
+
28
);
AV_COPY32
(
ptr
+
4
*
x
+
s
->
linesize
*
3
,
copy_dst
+
36
);
AV_COPY32
(
ptr
+
4
*
x
,
copy_dst
+
12
);
AV_COPY32
(
ptr
+
4
*
x
+
s
->
linesize
,
copy_dst
+
20
);
AV_COPY32
(
ptr
+
4
*
x
+
s
->
linesize
*
2
,
copy_dst
+
28
);
AV_COPY32
(
ptr
+
4
*
x
+
s
->
linesize
*
3
,
copy_dst
+
36
);
}
nnz
=
td
->
non_zero_count_cache
[
y
][
x
];
if
(
nnz
)
{
if
(
nnz
==
1
)
s
->
vp8dsp
.
vp8_idct_dc_add
(
ptr
+
4
*
x
,
td
->
block
[
y
][
x
],
s
->
linesize
);
s
->
vp8dsp
.
vp8_idct_dc_add
(
ptr
+
4
*
x
,
td
->
block
[
y
][
x
],
s
->
linesize
);
else
s
->
vp8dsp
.
vp8_idct_add
(
ptr
+
4
*
x
,
td
->
block
[
y
][
x
],
s
->
linesize
);
s
->
vp8dsp
.
vp8_idct_add
(
ptr
+
4
*
x
,
td
->
block
[
y
][
x
],
s
->
linesize
);
}
topright
+=
4
;
}
ptr
+=
4
*
s
->
linesize
;
ptr
+=
4
*
s
->
linesize
;
intra4x4
+=
4
;
}
}
...
...
@@ -1122,7 +1150,7 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
s
->
hpc
.
pred8x8
[
mode
](
dst
[
2
],
s
->
uvlinesize
);
if
(
mb_y
&&
(
s
->
deblock_filter
||
!
mb_y
)
&&
td
->
thread_nr
==
0
)
xchg_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
dst
[
1
],
dst
[
2
],
xchg_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
dst
[
1
],
dst
[
2
],
s
->
linesize
,
s
->
uvlinesize
,
mb_x
,
mb_y
,
s
->
mb_width
,
s
->
filter
.
simple
,
0
);
}
...
...
@@ -1137,18 +1165,18 @@ static const uint8_t subpel_idx[3][8] = {
/**
* luma MC function
*
* @param s VP8 decoding context
* @param dst target buffer for block data at block position
* @param ref reference picture buffer at origin (0, 0)
* @param mv motion vector (relative to block position) to get pixel data from
* @param x_off horizontal position of block from origin (0, 0)
* @param y_off vertical position of block from origin (0, 0)
* @param block_w width of block (16, 8 or 4)
* @param block_h height of block (always same as block_w)
* @param width width of src/dst plane data
* @param height height of src/dst plane data
* @param s
VP8 decoding context
* @param dst
target buffer for block data at block position
* @param ref
reference picture buffer at origin (0, 0)
* @param mv
motion vector (relative to block position) to get pixel data from
* @param x_off
horizontal position of block from origin (0, 0)
* @param y_off
vertical position of block from origin (0, 0)
* @param block_w
width of block (16, 8 or 4)
* @param block_h
height of block (always same as block_w)
* @param width
width of src/dst plane data
* @param height
height of src/dst plane data
* @param linesize size of a single line of plane data, including padding
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)
* @param mc_func
motion compensation function pointers (bilinear or sixtap MC)
*/
static
av_always_inline
void
vp8_mc_luma
(
VP8Context
*
s
,
VP8ThreadData
*
td
,
uint8_t
*
dst
,
...
...
@@ -1162,8 +1190,8 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
if
(
AV_RN32A
(
mv
))
{
int
src_linesize
=
linesize
;
int
mx
=
(
mv
->
x
<<
1
)
&
7
,
mx_idx
=
subpel_idx
[
0
][
mx
];
int
my
=
(
mv
->
y
<<
1
)
&
7
,
my_idx
=
subpel_idx
[
0
][
my
];
int
mx
=
(
mv
->
x
<<
1
)
&
7
,
mx_idx
=
subpel_idx
[
0
][
mx
];
int
my
=
(
mv
->
y
<<
1
)
&
7
,
my_idx
=
subpel_idx
[
0
][
my
];
x_off
+=
mv
->
x
>>
2
;
y_off
+=
mv
->
y
>>
2
;
...
...
@@ -1176,46 +1204,50 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
s
->
vdsp
.
emulated_edge_mc
(
td
->
edge_emu_buffer
,
src
-
my_idx
*
linesize
-
mx_idx
,
EDGE_EMU_LINESIZE
,
linesize
,
block_w
+
subpel_idx
[
1
][
mx
],
block_h
+
subpel_idx
[
1
][
my
],
x_off
-
mx_idx
,
y_off
-
my_idx
,
width
,
height
);
block_w
+
subpel_idx
[
1
][
mx
],
block_h
+
subpel_idx
[
1
][
my
],
x_off
-
mx_idx
,
y_off
-
my_idx
,
width
,
height
);
src
=
td
->
edge_emu_buffer
+
mx_idx
+
EDGE_EMU_LINESIZE
*
my_idx
;
src_linesize
=
EDGE_EMU_LINESIZE
;
}
mc_func
[
my_idx
][
mx_idx
](
dst
,
linesize
,
src
,
src_linesize
,
block_h
,
mx
,
my
);
}
else
{
ff_thread_await_progress
(
ref
,
(
3
+
y_off
+
block_h
)
>>
4
,
0
);
mc_func
[
0
][
0
](
dst
,
linesize
,
src
+
y_off
*
linesize
+
x_off
,
linesize
,
block_h
,
0
,
0
);
mc_func
[
0
][
0
](
dst
,
linesize
,
src
+
y_off
*
linesize
+
x_off
,
linesize
,
block_h
,
0
,
0
);
}
}
/**
* chroma MC function
*
* @param s VP8 decoding context
* @param dst1 target buffer for block data at block position (U plane)
* @param dst2 target buffer for block data at block position (V plane)
* @param ref reference picture buffer at origin (0, 0)
* @param mv motion vector (relative to block position) to get pixel data from
* @param x_off horizontal position of block from origin (0, 0)
* @param y_off vertical position of block from origin (0, 0)
* @param block_w width of block (16, 8 or 4)
* @param block_h height of block (always same as block_w)
* @param width width of src/dst plane data
* @param height height of src/dst plane data
* @param s
VP8 decoding context
* @param dst1
target buffer for block data at block position (U plane)
* @param dst2
target buffer for block data at block position (V plane)
* @param ref
reference picture buffer at origin (0, 0)
* @param mv
motion vector (relative to block position) to get pixel data from
* @param x_off
horizontal position of block from origin (0, 0)
* @param y_off
vertical position of block from origin (0, 0)
* @param block_w
width of block (16, 8 or 4)
* @param block_h
height of block (always same as block_w)
* @param width
width of src/dst plane data
* @param height
height of src/dst plane data
* @param linesize size of a single line of plane data, including padding
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)
* @param mc_func
motion compensation function pointers (bilinear or sixtap MC)
*/
static
av_always_inline
void
vp8_mc_chroma
(
VP8Context
*
s
,
VP8ThreadData
*
td
,
uint8_t
*
dst1
,
uint8_t
*
dst2
,
ThreadFrame
*
ref
,
const
VP56mv
*
mv
,
int
x_off
,
int
y_off
,
int
block_w
,
int
block_h
,
int
width
,
int
height
,
ptrdiff_t
linesize
,
void
vp8_mc_chroma
(
VP8Context
*
s
,
VP8ThreadData
*
td
,
uint8_t
*
dst1
,
uint8_t
*
dst2
,
ThreadFrame
*
ref
,
const
VP56mv
*
mv
,
int
x_off
,
int
y_off
,
int
block_w
,
int
block_h
,
int
width
,
int
height
,
ptrdiff_t
linesize
,
vp8_mc_func
mc_func
[
3
][
3
])
{
uint8_t
*
src1
=
ref
->
f
->
data
[
1
],
*
src2
=
ref
->
f
->
data
[
2
];
if
(
AV_RN32A
(
mv
))
{
int
mx
=
mv
->
x
&
7
,
mx_idx
=
subpel_idx
[
0
][
mx
];
int
my
=
mv
->
y
&
7
,
my_idx
=
subpel_idx
[
0
][
my
];
int
mx
=
mv
->
x
&
7
,
mx_idx
=
subpel_idx
[
0
][
mx
];
int
my
=
mv
->
y
&
7
,
my_idx
=
subpel_idx
[
0
][
my
];
x_off
+=
mv
->
x
>>
3
;
y_off
+=
mv
->
y
>>
3
;
...
...
@@ -1239,7 +1271,7 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst
EDGE_EMU_LINESIZE
,
linesize
,
block_w
+
subpel_idx
[
1
][
mx
],
block_h
+
subpel_idx
[
1
][
my
],
x_off
-
mx_idx
,
y_off
-
my_idx
,
width
,
height
);
src2
=
td
->
edge_emu_buffer
+
mx_idx
+
EDGE_EMU_LINESIZE
*
my_idx
;
src2
=
td
->
edge_emu_buffer
+
mx_idx
+
EDGE_EMU_LINESIZE
*
my_idx
;
mc_func
[
my_idx
][
mx_idx
](
dst2
,
linesize
,
src2
,
EDGE_EMU_LINESIZE
,
block_h
,
mx
,
my
);
}
else
{
mc_func
[
my_idx
][
mx_idx
](
dst1
,
linesize
,
src1
,
linesize
,
block_h
,
mx
,
my
);
...
...
@@ -1255,8 +1287,7 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst
static
av_always_inline
void
vp8_mc_part
(
VP8Context
*
s
,
VP8ThreadData
*
td
,
uint8_t
*
dst
[
3
],
ThreadFrame
*
ref_frame
,
int
x_off
,
int
y_off
,
int
bx_off
,
int
by_off
,
int
block_w
,
int
block_h
,
int
bx_off
,
int
by_off
,
int
block_w
,
int
block_h
,
int
width
,
int
height
,
VP56mv
*
mv
)
{
VP56mv
uvmv
=
*
mv
;
...
...
@@ -1272,10 +1303,14 @@ void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
uvmv
.
x
&=
~
7
;
uvmv
.
y
&=
~
7
;
}
x_off
>>=
1
;
y_off
>>=
1
;
bx_off
>>=
1
;
by_off
>>=
1
;
width
>>=
1
;
height
>>=
1
;
block_w
>>=
1
;
block_h
>>=
1
;
x_off
>>=
1
;
y_off
>>=
1
;
bx_off
>>=
1
;
by_off
>>=
1
;
width
>>=
1
;
height
>>=
1
;
block_w
>>=
1
;
block_h
>>=
1
;
vp8_mc_chroma
(
s
,
td
,
dst
[
1
]
+
by_off
*
s
->
uvlinesize
+
bx_off
,
dst
[
2
]
+
by_off
*
s
->
uvlinesize
+
bx_off
,
ref_frame
,
&
uvmv
,
x_off
+
bx_off
,
y_off
+
by_off
,
...
...
@@ -1284,22 +1319,24 @@ void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
}
/* Fetch pixels for estimated mv 4 macroblocks ahead.
* Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
static
av_always_inline
void
prefetch_motion
(
VP8Context
*
s
,
VP8Macroblock
*
mb
,
int
mb_x
,
int
mb_y
,
int
mb_xy
,
int
ref
)
* Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
static
av_always_inline
void
prefetch_motion
(
VP8Context
*
s
,
VP8Macroblock
*
mb
,
int
mb_x
,
int
mb_y
,
int
mb_xy
,
int
ref
)
{
/* Don't prefetch refs that haven't been used very often this frame. */
if
(
s
->
ref_count
[
ref
-
1
]
>
(
mb_xy
>>
5
))
{
if
(
s
->
ref_count
[
ref
-
1
]
>
(
mb_xy
>>
5
))
{
int
x_off
=
mb_x
<<
4
,
y_off
=
mb_y
<<
4
;
int
mx
=
(
mb
->
mv
.
x
>>
2
)
+
x_off
+
8
;
int
my
=
(
mb
->
mv
.
y
>>
2
)
+
y_off
;
uint8_t
**
src
=
s
->
framep
[
ref
]
->
tf
.
f
->
data
;
int
off
=
mx
+
(
my
+
(
mb_x
&
3
)
*
4
)
*
s
->
linesize
+
64
;
int
mx
=
(
mb
->
mv
.
x
>>
2
)
+
x_off
+
8
;
int
my
=
(
mb
->
mv
.
y
>>
2
)
+
y_off
;
uint8_t
**
src
=
s
->
framep
[
ref
]
->
tf
.
f
->
data
;
int
off
=
mx
+
(
my
+
(
mb_x
&
3
)
*
4
)
*
s
->
linesize
+
64
;
/* For threading, a ff_thread_await_progress here might be useful, but
* it actually slows down the decoder. Since a bad prefetch doesn't
* generate bad decoder output, we don't run it here. */
s
->
vdsp
.
prefetch
(
src
[
0
]
+
off
,
s
->
linesize
,
4
);
off
=
(
mx
>>
1
)
+
((
my
>>
1
)
+
(
mb_x
&
7
))
*
s
->
uvlinesize
+
64
;
s
->
vdsp
.
prefetch
(
src
[
1
]
+
off
,
src
[
2
]
-
src
[
1
],
2
);
s
->
vdsp
.
prefetch
(
src
[
0
]
+
off
,
s
->
linesize
,
4
);
off
=
(
mx
>>
1
)
+
((
my
>>
1
)
+
(
mb_x
&
7
))
*
s
->
uvlinesize
+
64
;
s
->
vdsp
.
prefetch
(
src
[
1
]
+
off
,
src
[
2
]
-
src
[
1
],
2
);
}
}
...
...
@@ -1311,7 +1348,7 @@ void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
VP8Macroblock
*
mb
,
int
mb_x
,
int
mb_y
)
{
int
x_off
=
mb_x
<<
4
,
y_off
=
mb_y
<<
4
;
int
width
=
16
*
s
->
mb_width
,
height
=
16
*
s
->
mb_height
;
int
width
=
16
*
s
->
mb_width
,
height
=
16
*
s
->
mb_height
;
ThreadFrame
*
ref
=
&
s
->
framep
[
mb
->
ref_frame
]
->
tf
;
VP56mv
*
bmv
=
mb
->
bmv
;
...
...
@@ -1327,35 +1364,38 @@ void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
/* Y */
for
(
y
=
0
;
y
<
4
;
y
++
)
{
for
(
x
=
0
;
x
<
4
;
x
++
)
{
vp8_mc_luma
(
s
,
td
,
dst
[
0
]
+
4
*
y
*
s
->
linesize
+
x
*
4
,
ref
,
&
bmv
[
4
*
y
+
x
],
4
*
x
+
x_off
,
4
*
y
+
y_off
,
4
,
4
,
vp8_mc_luma
(
s
,
td
,
dst
[
0
]
+
4
*
y
*
s
->
linesize
+
x
*
4
,
ref
,
&
bmv
[
4
*
y
+
x
],
4
*
x
+
x_off
,
4
*
y
+
y_off
,
4
,
4
,
width
,
height
,
s
->
linesize
,
s
->
put_pixels_tab
[
2
]);
}
}
/* U/V */
x_off
>>=
1
;
y_off
>>=
1
;
width
>>=
1
;
height
>>=
1
;
x_off
>>=
1
;
y_off
>>=
1
;
width
>>=
1
;
height
>>=
1
;
for
(
y
=
0
;
y
<
2
;
y
++
)
{
for
(
x
=
0
;
x
<
2
;
x
++
)
{
uvmv
.
x
=
mb
->
bmv
[
2
*
y
*
4
+
2
*
x
].
x
+
mb
->
bmv
[
2
*
y
*
4
+
2
*
x
+
1
].
x
+
mb
->
bmv
[(
2
*
y
+
1
)
*
4
+
2
*
x
].
x
+
mb
->
bmv
[(
2
*
y
+
1
)
*
4
+
2
*
x
+
1
].
x
;
uvmv
.
y
=
mb
->
bmv
[
2
*
y
*
4
+
2
*
x
].
y
+
mb
->
bmv
[
2
*
y
*
4
+
2
*
x
+
1
].
y
+
mb
->
bmv
[(
2
*
y
+
1
)
*
4
+
2
*
x
].
y
+
mb
->
bmv
[(
2
*
y
+
1
)
*
4
+
2
*
x
+
1
].
y
;
uvmv
.
x
=
(
uvmv
.
x
+
2
+
(
uvmv
.
x
>>
(
INT_BIT
-
1
)))
>>
2
;
uvmv
.
y
=
(
uvmv
.
y
+
2
+
(
uvmv
.
y
>>
(
INT_BIT
-
1
)))
>>
2
;
uvmv
.
x
=
mb
->
bmv
[
2
*
y
*
4
+
2
*
x
].
x
+
mb
->
bmv
[
2
*
y
*
4
+
2
*
x
+
1
].
x
+
mb
->
bmv
[(
2
*
y
+
1
)
*
4
+
2
*
x
].
x
+
mb
->
bmv
[(
2
*
y
+
1
)
*
4
+
2
*
x
+
1
].
x
;
uvmv
.
y
=
mb
->
bmv
[
2
*
y
*
4
+
2
*
x
].
y
+
mb
->
bmv
[
2
*
y
*
4
+
2
*
x
+
1
].
y
+
mb
->
bmv
[(
2
*
y
+
1
)
*
4
+
2
*
x
].
y
+
mb
->
bmv
[(
2
*
y
+
1
)
*
4
+
2
*
x
+
1
].
y
;
uvmv
.
x
=
(
uvmv
.
x
+
2
+
(
uvmv
.
x
>>
(
INT_BIT
-
1
)))
>>
2
;
uvmv
.
y
=
(
uvmv
.
y
+
2
+
(
uvmv
.
y
>>
(
INT_BIT
-
1
)))
>>
2
;
if
(
s
->
profile
==
3
)
{
uvmv
.
x
&=
~
7
;
uvmv
.
y
&=
~
7
;
}
vp8_mc_chroma
(
s
,
td
,
dst
[
1
]
+
4
*
y
*
s
->
uvlinesize
+
x
*
4
,
dst
[
2
]
+
4
*
y
*
s
->
uvlinesize
+
x
*
4
,
ref
,
&
uvmv
,
4
*
x
+
x_off
,
4
*
y
+
y_off
,
4
,
4
,
vp8_mc_chroma
(
s
,
td
,
dst
[
1
]
+
4
*
y
*
s
->
uvlinesize
+
x
*
4
,
dst
[
2
]
+
4
*
y
*
s
->
uvlinesize
+
x
*
4
,
ref
,
&
uvmv
,
4
*
x
+
x_off
,
4
*
y
+
y_off
,
4
,
4
,
width
,
height
,
s
->
uvlinesize
,
s
->
put_pixels_tab
[
2
]);
}
...
...
@@ -1387,8 +1427,8 @@ void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
}
}
static
av_always_inline
void
idct_mb
(
VP8Context
*
s
,
VP8ThreadData
*
td
,
uint8_t
*
dst
[
3
],
VP8Macroblock
*
mb
)
static
av_always_inline
void
idct_mb
(
VP8Context
*
s
,
VP8ThreadData
*
td
,
uint8_t
*
dst
[
3
],
VP8Macroblock
*
mb
)
{
int
x
,
y
,
ch
;
...
...
@@ -1397,12 +1437,16 @@ static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
for
(
y
=
0
;
y
<
4
;
y
++
)
{
uint32_t
nnz4
=
AV_RL32
(
td
->
non_zero_count_cache
[
y
]);
if
(
nnz4
)
{
if
(
nnz4
&
~
0x01010101
)
{
if
(
nnz4
&
~
0x01010101
)
{
for
(
x
=
0
;
x
<
4
;
x
++
)
{
if
((
uint8_t
)
nnz4
==
1
)
s
->
vp8dsp
.
vp8_idct_dc_add
(
y_dst
+
4
*
x
,
td
->
block
[
y
][
x
],
s
->
linesize
);
else
if
((
uint8_t
)
nnz4
>
1
)
s
->
vp8dsp
.
vp8_idct_add
(
y_dst
+
4
*
x
,
td
->
block
[
y
][
x
],
s
->
linesize
);
if
((
uint8_t
)
nnz4
==
1
)
s
->
vp8dsp
.
vp8_idct_dc_add
(
y_dst
+
4
*
x
,
td
->
block
[
y
][
x
],
s
->
linesize
);
else
if
((
uint8_t
)
nnz4
>
1
)
s
->
vp8dsp
.
vp8_idct_add
(
y_dst
+
4
*
x
,
td
->
block
[
y
][
x
],
s
->
linesize
);
nnz4
>>=
8
;
if
(
!
nnz4
)
break
;
...
...
@@ -1411,36 +1455,42 @@ static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
s
->
vp8dsp
.
vp8_idct_dc_add4y
(
y_dst
,
td
->
block
[
y
],
s
->
linesize
);
}
}
y_dst
+=
4
*
s
->
linesize
;
y_dst
+=
4
*
s
->
linesize
;
}
}
for
(
ch
=
0
;
ch
<
2
;
ch
++
)
{
uint32_t
nnz4
=
AV_RL32
(
td
->
non_zero_count_cache
[
4
+
ch
]);
uint32_t
nnz4
=
AV_RL32
(
td
->
non_zero_count_cache
[
4
+
ch
]);
if
(
nnz4
)
{
uint8_t
*
ch_dst
=
dst
[
1
+
ch
];
if
(
nnz4
&
~
0x01010101
)
{
uint8_t
*
ch_dst
=
dst
[
1
+
ch
];
if
(
nnz4
&
~
0x01010101
)
{
for
(
y
=
0
;
y
<
2
;
y
++
)
{
for
(
x
=
0
;
x
<
2
;
x
++
)
{
if
((
uint8_t
)
nnz4
==
1
)
s
->
vp8dsp
.
vp8_idct_dc_add
(
ch_dst
+
4
*
x
,
td
->
block
[
4
+
ch
][(
y
<<
1
)
+
x
],
s
->
uvlinesize
);
else
if
((
uint8_t
)
nnz4
>
1
)
s
->
vp8dsp
.
vp8_idct_add
(
ch_dst
+
4
*
x
,
td
->
block
[
4
+
ch
][(
y
<<
1
)
+
x
],
s
->
uvlinesize
);
if
((
uint8_t
)
nnz4
==
1
)
s
->
vp8dsp
.
vp8_idct_dc_add
(
ch_dst
+
4
*
x
,
td
->
block
[
4
+
ch
][(
y
<<
1
)
+
x
],
s
->
uvlinesize
);
else
if
((
uint8_t
)
nnz4
>
1
)
s
->
vp8dsp
.
vp8_idct_add
(
ch_dst
+
4
*
x
,
td
->
block
[
4
+
ch
][(
y
<<
1
)
+
x
],
s
->
uvlinesize
);
nnz4
>>=
8
;
if
(
!
nnz4
)
goto
chroma_idct_end
;
}
ch_dst
+=
4
*
s
->
uvlinesize
;
ch_dst
+=
4
*
s
->
uvlinesize
;
}
}
else
{
s
->
vp8dsp
.
vp8_idct_dc_add4uv
(
ch_dst
,
td
->
block
[
4
+
ch
],
s
->
uvlinesize
);
s
->
vp8dsp
.
vp8_idct_dc_add4uv
(
ch_dst
,
td
->
block
[
4
+
ch
],
s
->
uvlinesize
);
}
}
chroma_idct_end:
;
chroma_idct_end:
;
}
}
static
av_always_inline
void
filter_level_for_mb
(
VP8Context
*
s
,
VP8Macroblock
*
mb
,
VP8FilterStrength
*
f
)
static
av_always_inline
void
filter_level_for_mb
(
VP8Context
*
s
,
VP8Macroblock
*
mb
,
VP8FilterStrength
*
f
)
{
int
interior_limit
,
filter_level
;
...
...
@@ -1467,10 +1517,13 @@ static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *m
f
->
filter_level
=
filter_level
;
f
->
inner_limit
=
interior_limit
;
f
->
inner_filter
=
!
mb
->
skip
||
mb
->
mode
==
MODE_I4x4
||
mb
->
mode
==
VP8_MVMODE_SPLIT
;
f
->
inner_filter
=
!
mb
->
skip
||
mb
->
mode
==
MODE_I4x4
||
mb
->
mode
==
VP8_MVMODE_SPLIT
;
}
static
av_always_inline
void
filter_mb
(
VP8Context
*
s
,
uint8_t
*
dst
[
3
],
VP8FilterStrength
*
f
,
int
mb_x
,
int
mb_y
)
static
av_always_inline
void
filter_mb
(
VP8Context
*
s
,
uint8_t
*
dst
[
3
],
VP8FilterStrength
*
f
,
int
mb_x
,
int
mb_y
)
{
int
mbedge_lim
,
bedge_lim
,
hev_thresh
;
int
filter_level
=
f
->
filter_level
;
...
...
@@ -1492,82 +1545,84 @@ static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Filter
if
(
!
filter_level
)
return
;
bedge_lim
=
2
*
filter_level
+
inner_limit
;
bedge_lim
=
2
*
filter_level
+
inner_limit
;
mbedge_lim
=
bedge_lim
+
4
;
hev_thresh
=
hev_thresh_lut
[
s
->
keyframe
][
filter_level
];
if
(
mb_x
)
{
s
->
vp8dsp
.
vp8_h_loop_filter16y
(
dst
[
0
],
linesize
,
s
->
vp8dsp
.
vp8_h_loop_filter16y
(
dst
[
0
],
linesize
,
mbedge_lim
,
inner_limit
,
hev_thresh
);
s
->
vp8dsp
.
vp8_h_loop_filter8uv
(
dst
[
1
],
dst
[
2
],
uvlinesize
,
s
->
vp8dsp
.
vp8_h_loop_filter8uv
(
dst
[
1
],
dst
[
2
],
uvlinesize
,
mbedge_lim
,
inner_limit
,
hev_thresh
);
}
if
(
inner_filter
)
{
s
->
vp8dsp
.
vp8_h_loop_filter16y_inner
(
dst
[
0
]
+
4
,
linesize
,
bedge_lim
,
s
->
vp8dsp
.
vp8_h_loop_filter16y_inner
(
dst
[
0
]
+
4
,
linesize
,
bedge_lim
,
inner_limit
,
hev_thresh
);
s
->
vp8dsp
.
vp8_h_loop_filter16y_inner
(
dst
[
0
]
+
8
,
linesize
,
bedge_lim
,
s
->
vp8dsp
.
vp8_h_loop_filter16y_inner
(
dst
[
0
]
+
8
,
linesize
,
bedge_lim
,
inner_limit
,
hev_thresh
);
s
->
vp8dsp
.
vp8_h_loop_filter16y_inner
(
dst
[
0
]
+
12
,
linesize
,
bedge_lim
,
s
->
vp8dsp
.
vp8_h_loop_filter16y_inner
(
dst
[
0
]
+
12
,
linesize
,
bedge_lim
,
inner_limit
,
hev_thresh
);
s
->
vp8dsp
.
vp8_h_loop_filter8uv_inner
(
dst
[
1
]
+
4
,
dst
[
2
]
+
4
,
uvlinesize
,
bedge_lim
,
s
->
vp8dsp
.
vp8_h_loop_filter8uv_inner
(
dst
[
1
]
+
4
,
dst
[
2
]
+
4
,
uvlinesize
,
bedge_lim
,
inner_limit
,
hev_thresh
);
}
if
(
mb_y
)
{
s
->
vp8dsp
.
vp8_v_loop_filter16y
(
dst
[
0
],
linesize
,
s
->
vp8dsp
.
vp8_v_loop_filter16y
(
dst
[
0
],
linesize
,
mbedge_lim
,
inner_limit
,
hev_thresh
);
s
->
vp8dsp
.
vp8_v_loop_filter8uv
(
dst
[
1
],
dst
[
2
],
uvlinesize
,
s
->
vp8dsp
.
vp8_v_loop_filter8uv
(
dst
[
1
],
dst
[
2
],
uvlinesize
,
mbedge_lim
,
inner_limit
,
hev_thresh
);
}
if
(
inner_filter
)
{
s
->
vp8dsp
.
vp8_v_loop_filter16y_inner
(
dst
[
0
]
+
4
*
linesize
,
linesize
,
bedge_lim
,
s
->
vp8dsp
.
vp8_v_loop_filter16y_inner
(
dst
[
0
]
+
4
*
linesize
,
linesize
,
bedge_lim
,
inner_limit
,
hev_thresh
);
s
->
vp8dsp
.
vp8_v_loop_filter16y_inner
(
dst
[
0
]
+
8
*
linesize
,
linesize
,
bedge_lim
,
s
->
vp8dsp
.
vp8_v_loop_filter16y_inner
(
dst
[
0
]
+
8
*
linesize
,
linesize
,
bedge_lim
,
inner_limit
,
hev_thresh
);
s
->
vp8dsp
.
vp8_v_loop_filter16y_inner
(
dst
[
0
]
+
12
*
linesize
,
linesize
,
bedge_lim
,
s
->
vp8dsp
.
vp8_v_loop_filter16y_inner
(
dst
[
0
]
+
12
*
linesize
,
linesize
,
bedge_lim
,
inner_limit
,
hev_thresh
);
s
->
vp8dsp
.
vp8_v_loop_filter8uv_inner
(
dst
[
1
]
+
4
*
uvlinesize
,
dst
[
2
]
+
4
*
uvlinesize
,
uvlinesize
,
bedge_lim
,
s
->
vp8dsp
.
vp8_v_loop_filter8uv_inner
(
dst
[
1
]
+
4
*
uvlinesize
,
dst
[
2
]
+
4
*
uvlinesize
,
uvlinesize
,
bedge_lim
,
inner_limit
,
hev_thresh
);
}
}
static
av_always_inline
void
filter_mb_simple
(
VP8Context
*
s
,
uint8_t
*
dst
,
VP8FilterStrength
*
f
,
int
mb_x
,
int
mb_y
)
static
av_always_inline
void
filter_mb_simple
(
VP8Context
*
s
,
uint8_t
*
dst
,
VP8FilterStrength
*
f
,
int
mb_x
,
int
mb_y
)
{
int
mbedge_lim
,
bedge_lim
;
int
filter_level
=
f
->
filter_level
;
int
inner_limit
=
f
->
inner_limit
;
int
inner_limit
=
f
->
inner_limit
;
int
inner_filter
=
f
->
inner_filter
;
int
linesize
=
s
->
linesize
;
int
linesize
=
s
->
linesize
;
if
(
!
filter_level
)
return
;
bedge_lim
=
2
*
filter_level
+
inner_limit
;
bedge_lim
=
2
*
filter_level
+
inner_limit
;
mbedge_lim
=
bedge_lim
+
4
;
if
(
mb_x
)
s
->
vp8dsp
.
vp8_h_loop_filter_simple
(
dst
,
linesize
,
mbedge_lim
);
if
(
inner_filter
)
{
s
->
vp8dsp
.
vp8_h_loop_filter_simple
(
dst
+
4
,
linesize
,
bedge_lim
);
s
->
vp8dsp
.
vp8_h_loop_filter_simple
(
dst
+
8
,
linesize
,
bedge_lim
);
s
->
vp8dsp
.
vp8_h_loop_filter_simple
(
dst
+
12
,
linesize
,
bedge_lim
);
s
->
vp8dsp
.
vp8_h_loop_filter_simple
(
dst
+
4
,
linesize
,
bedge_lim
);
s
->
vp8dsp
.
vp8_h_loop_filter_simple
(
dst
+
8
,
linesize
,
bedge_lim
);
s
->
vp8dsp
.
vp8_h_loop_filter_simple
(
dst
+
12
,
linesize
,
bedge_lim
);
}
if
(
mb_y
)
s
->
vp8dsp
.
vp8_v_loop_filter_simple
(
dst
,
linesize
,
mbedge_lim
);
if
(
inner_filter
)
{
s
->
vp8dsp
.
vp8_v_loop_filter_simple
(
dst
+
4
*
linesize
,
linesize
,
bedge_lim
);
s
->
vp8dsp
.
vp8_v_loop_filter_simple
(
dst
+
8
*
linesize
,
linesize
,
bedge_lim
);
s
->
vp8dsp
.
vp8_v_loop_filter_simple
(
dst
+
12
*
linesize
,
linesize
,
bedge_lim
);
s
->
vp8dsp
.
vp8_v_loop_filter_simple
(
dst
+
4
*
linesize
,
linesize
,
bedge_lim
);
s
->
vp8dsp
.
vp8_v_loop_filter_simple
(
dst
+
8
*
linesize
,
linesize
,
bedge_lim
);
s
->
vp8dsp
.
vp8_v_loop_filter_simple
(
dst
+
12
*
linesize
,
linesize
,
bedge_lim
);
}
}
...
...
@@ -1581,16 +1636,18 @@ static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
s
->
mv_min
.
y
=
-
MARGIN
;
s
->
mv_max
.
y
=
((
s
->
mb_height
-
1
)
<<
6
)
+
MARGIN
;
for
(
mb_y
=
0
;
mb_y
<
s
->
mb_height
;
mb_y
++
)
{
VP8Macroblock
*
mb
=
s
->
macroblocks_base
+
((
s
->
mb_width
+
1
)
*
(
mb_y
+
1
)
+
1
);
int
mb_xy
=
mb_y
*
s
->
mb_width
;
VP8Macroblock
*
mb
=
s
->
macroblocks_base
+
((
s
->
mb_width
+
1
)
*
(
mb_y
+
1
)
+
1
);
int
mb_xy
=
mb_y
*
s
->
mb_width
;
AV_WN32A
(
s
->
intra4x4_pred_mode_left
,
DC_PRED
*
0x01010101
);
AV_WN32A
(
s
->
intra4x4_pred_mode_left
,
DC_PRED
*
0x01010101
);
s
->
mv_min
.
x
=
-
MARGIN
;
s
->
mv_max
.
x
=
((
s
->
mb_width
-
1
)
<<
6
)
+
MARGIN
;
for
(
mb_x
=
0
;
mb_x
<
s
->
mb_width
;
mb_x
++
,
mb_xy
++
,
mb
++
)
{
if
(
mb_y
==
0
)
AV_WN32A
((
mb
-
s
->
mb_width
-
1
)
->
intra4x4_pred_mode_top
,
DC_PRED
*
0x01010101
);
AV_WN32A
((
mb
-
s
->
mb_width
-
1
)
->
intra4x4_pred_mode_top
,
DC_PRED
*
0x01010101
);
decode_mb_mode
(
s
,
mb
,
mb_x
,
mb_y
,
curframe
->
seg_map
->
data
+
mb_xy
,
prev_frame
&&
prev_frame
->
seg_map
?
prev_frame
->
seg_map
->
data
+
mb_xy
:
NULL
,
1
);
...
...
@@ -1603,37 +1660,40 @@ static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
}
#if HAVE_THREADS
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
do {\
int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
if (otd->thread_mb_pos < tmp) {\
pthread_mutex_lock(&otd->lock);\
td->wait_mb_pos = tmp;\
do {\
if (otd->thread_mb_pos >= tmp)\
break;\
pthread_cond_wait(&otd->cond, &otd->lock);\
} while (1);\
td->wait_mb_pos = INT_MAX;\
pthread_mutex_unlock(&otd->lock);\
}\
} while(0);
#define update_pos(td, mb_y, mb_x)\
do {\
int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
int is_null = (next_td == NULL) || (prev_td == NULL);\
int pos_check = (is_null) ? 1 :\
(next_td != td && pos >= next_td->wait_mb_pos) ||\
(prev_td != td && pos >= prev_td->wait_mb_pos);\
td->thread_mb_pos = pos;\
if (sliced_threading && pos_check) {\
pthread_mutex_lock(&td->lock);\
pthread_cond_broadcast(&td->cond);\
pthread_mutex_unlock(&td->lock);\
}\
} while(0);
#define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
do { \
int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
if (otd->thread_mb_pos < tmp) { \
pthread_mutex_lock(&otd->lock); \
td->wait_mb_pos = tmp; \
do { \
if (otd->thread_mb_pos >= tmp) \
break; \
pthread_cond_wait(&otd->cond, &otd->lock); \
} while (1); \
td->wait_mb_pos = INT_MAX; \
pthread_mutex_unlock(&otd->lock); \
} \
} while (0);
#define update_pos(td, mb_y, mb_x) \
do { \
int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
(num_jobs > 1); \
int is_null = (next_td == NULL) || (prev_td == NULL); \
int pos_check = (is_null) ? 1 \
: (next_td != td && \
pos >= next_td->wait_mb_pos) || \
(prev_td != td && \
pos >= prev_td->wait_mb_pos); \
td->thread_mb_pos = pos; \
if (sliced_threading && pos_check) { \
pthread_mutex_lock(&td->lock); \
pthread_cond_broadcast(&td->cond); \
pthread_mutex_unlock(&td->lock); \
} \
} while (0);
#else
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
#define update_pos(td, mb_y, mb_x)
...
...
@@ -1644,51 +1704,58 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
{
VP8Context
*
s
=
avctx
->
priv_data
;
VP8ThreadData
*
prev_td
,
*
next_td
,
*
td
=
&
s
->
thread_data
[
threadnr
];
int
mb_y
=
td
->
thread_mb_pos
>>
16
;
int
mb_x
,
mb_xy
=
mb_y
*
s
->
mb_width
;
int
mb_y
=
td
->
thread_mb_pos
>>
16
;
int
mb_x
,
mb_xy
=
mb_y
*
s
->
mb_width
;
int
num_jobs
=
s
->
num_jobs
;
VP8Frame
*
curframe
=
s
->
curframe
,
*
prev_frame
=
s
->
prev_frame
;
VP56RangeCoder
*
c
=
&
s
->
coeff_partition
[
mb_y
&
(
s
->
num_coeff_partitions
-
1
)];
VP56RangeCoder
*
c
=
&
s
->
coeff_partition
[
mb_y
&
(
s
->
num_coeff_partitions
-
1
)];
VP8Macroblock
*
mb
;
uint8_t
*
dst
[
3
]
=
{
curframe
->
tf
.
f
->
data
[
0
]
+
16
*
mb_y
*
s
->
linesize
,
curframe
->
tf
.
f
->
data
[
1
]
+
8
*
mb_y
*
s
->
uvlinesize
,
curframe
->
tf
.
f
->
data
[
2
]
+
8
*
mb_y
*
s
->
uvlinesize
curframe
->
tf
.
f
->
data
[
0
]
+
16
*
mb_y
*
s
->
linesize
,
curframe
->
tf
.
f
->
data
[
1
]
+
8
*
mb_y
*
s
->
uvlinesize
,
curframe
->
tf
.
f
->
data
[
2
]
+
8
*
mb_y
*
s
->
uvlinesize
};
if
(
mb_y
==
0
)
prev_td
=
td
;
else
prev_td
=
&
s
->
thread_data
[(
jobnr
+
num_jobs
-
1
)
%
num_jobs
];
if
(
mb_y
==
s
->
mb_height
-
1
)
next_td
=
td
;
else
next_td
=
&
s
->
thread_data
[(
jobnr
+
1
)
%
num_jobs
];
if
(
mb_y
==
0
)
prev_td
=
td
;
else
prev_td
=
&
s
->
thread_data
[(
jobnr
+
num_jobs
-
1
)
%
num_jobs
];
if
(
mb_y
==
s
->
mb_height
-
1
)
next_td
=
td
;
else
next_td
=
&
s
->
thread_data
[(
jobnr
+
1
)
%
num_jobs
];
if
(
s
->
mb_layout
==
1
)
mb
=
s
->
macroblocks_base
+
((
s
->
mb_width
+
1
)
*
(
mb_y
+
1
)
+
1
);
mb
=
s
->
macroblocks_base
+
((
s
->
mb_width
+
1
)
*
(
mb_y
+
1
)
+
1
);
else
{
// Make sure the previous frame has read its segmentation map,
// if we re-use the same map.
if
(
prev_frame
&&
s
->
segmentation
.
enabled
&&
!
s
->
segmentation
.
update_map
)
ff_thread_await_progress
(
&
prev_frame
->
tf
,
mb_y
,
0
);
mb
=
s
->
macroblocks
+
(
s
->
mb_height
-
mb_y
-
1
)
*
2
;
mb
=
s
->
macroblocks
+
(
s
->
mb_height
-
mb_y
-
1
)
*
2
;
memset
(
mb
-
1
,
0
,
sizeof
(
*
mb
));
// zero left macroblock
AV_WN32A
(
s
->
intra4x4_pred_mode_left
,
DC_PRED
*
0x01010101
);
AV_WN32A
(
s
->
intra4x4_pred_mode_left
,
DC_PRED
*
0x01010101
);
}
memset
(
td
->
left_nnz
,
0
,
sizeof
(
td
->
left_nnz
));
s
->
mv_min
.
x
=
-
MARGIN
;
s
->
mv_max
.
x
=
((
s
->
mb_width
-
1
)
<<
6
)
+
MARGIN
;
s
->
mv_max
.
x
=
((
s
->
mb_width
-
1
)
<<
6
)
+
MARGIN
;
for
(
mb_x
=
0
;
mb_x
<
s
->
mb_width
;
mb_x
++
,
mb_xy
++
,
mb
++
)
{
// Wait for previous thread to read mb_x+2, and reach mb_y-1.
if
(
prev_td
!=
td
)
{
if
(
threadnr
!=
0
)
{
check_thread_pos
(
td
,
prev_td
,
mb_x
+
1
,
mb_y
-
1
);
check_thread_pos
(
td
,
prev_td
,
mb_x
+
1
,
mb_y
-
1
);
}
else
{
check_thread_pos
(
td
,
prev_td
,
(
s
->
mb_width
+
3
)
+
(
mb_x
+
1
),
mb_y
-
1
);
check_thread_pos
(
td
,
prev_td
,
(
s
->
mb_width
+
3
)
+
(
mb_x
+
1
),
mb_y
-
1
);
}
}
s
->
vdsp
.
prefetch
(
dst
[
0
]
+
(
mb_x
&
3
)
*
4
*
s
->
linesize
+
64
,
s
->
linesize
,
4
);
s
->
vdsp
.
prefetch
(
dst
[
1
]
+
(
mb_x
&
7
)
*
s
->
uvlinesize
+
64
,
dst
[
2
]
-
dst
[
1
],
2
);
s
->
vdsp
.
prefetch
(
dst
[
0
]
+
(
mb_x
&
3
)
*
4
*
s
->
linesize
+
64
,
s
->
linesize
,
4
);
s
->
vdsp
.
prefetch
(
dst
[
1
]
+
(
mb_x
&
7
)
*
s
->
uvlinesize
+
64
,
dst
[
2
]
-
dst
[
1
],
2
);
if
(
!
s
->
mb_layout
)
decode_mb_mode
(
s
,
mb
,
mb_x
,
mb_y
,
curframe
->
seg_map
->
data
+
mb_xy
,
...
...
@@ -1713,7 +1780,8 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
AV_ZERO64
(
td
->
left_nnz
);
AV_WN64
(
s
->
top_nnz
[
mb_x
],
0
);
// array of 9, so unaligned
// Reset DC block predictors if they would exist if the mb had coefficients
/* Reset DC block predictors if they would exist
* if the mb had coefficients */
if
(
mb
->
mode
!=
MODE_I4x4
&&
mb
->
mode
!=
VP8_MVMODE_SPLIT
)
{
td
->
left_nnz
[
8
]
=
0
;
s
->
top_nnz
[
mb_x
][
8
]
=
0
;
...
...
@@ -1723,23 +1791,25 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
if
(
s
->
deblock_filter
)
filter_level_for_mb
(
s
,
mb
,
&
td
->
filter_strength
[
mb_x
]);
if
(
s
->
deblock_filter
&&
num_jobs
!=
1
&&
threadnr
==
num_jobs
-
1
)
{
if
(
s
->
deblock_filter
&&
num_jobs
!=
1
&&
threadnr
==
num_jobs
-
1
)
{
if
(
s
->
filter
.
simple
)
backup_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
NULL
,
NULL
,
s
->
linesize
,
0
,
1
);
backup_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
NULL
,
NULL
,
s
->
linesize
,
0
,
1
);
else
backup_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
dst
[
1
],
dst
[
2
],
s
->
linesize
,
s
->
uvlinesize
,
0
);
backup_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
dst
[
1
],
dst
[
2
],
s
->
linesize
,
s
->
uvlinesize
,
0
);
}
prefetch_motion
(
s
,
mb
,
mb_x
,
mb_y
,
mb_xy
,
VP56_FRAME_GOLDEN2
);
dst
[
0
]
+=
16
;
dst
[
1
]
+=
8
;
dst
[
2
]
+=
8
;
dst
[
0
]
+=
16
;
dst
[
1
]
+=
8
;
dst
[
2
]
+=
8
;
s
->
mv_min
.
x
-=
64
;
s
->
mv_max
.
x
-=
64
;
if
(
mb_x
==
s
->
mb_width
+
1
)
{
update_pos
(
td
,
mb_y
,
s
->
mb_width
+
3
);
if
(
mb_x
==
s
->
mb_width
+
1
)
{
update_pos
(
td
,
mb_y
,
s
->
mb_width
+
3
);
}
else
{
update_pos
(
td
,
mb_y
,
mb_x
);
}
...
...
@@ -1751,41 +1821,46 @@ static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
{
VP8Context
*
s
=
avctx
->
priv_data
;
VP8ThreadData
*
td
=
&
s
->
thread_data
[
threadnr
];
int
mb_x
,
mb_y
=
td
->
thread_mb_pos
>>
16
,
num_jobs
=
s
->
num_jobs
;
int
mb_x
,
mb_y
=
td
->
thread_mb_pos
>>
16
,
num_jobs
=
s
->
num_jobs
;
AVFrame
*
curframe
=
s
->
curframe
->
tf
.
f
;
VP8Macroblock
*
mb
;
VP8ThreadData
*
prev_td
,
*
next_td
;
uint8_t
*
dst
[
3
]
=
{
curframe
->
data
[
0
]
+
16
*
mb_y
*
s
->
linesize
,
curframe
->
data
[
1
]
+
8
*
mb_y
*
s
->
uvlinesize
,
curframe
->
data
[
2
]
+
8
*
mb_y
*
s
->
uvlinesize
curframe
->
data
[
0
]
+
16
*
mb_y
*
s
->
linesize
,
curframe
->
data
[
1
]
+
8
*
mb_y
*
s
->
uvlinesize
,
curframe
->
data
[
2
]
+
8
*
mb_y
*
s
->
uvlinesize
};
if
(
s
->
mb_layout
==
1
)
mb
=
s
->
macroblocks_base
+
((
s
->
mb_width
+
1
)
*
(
mb_y
+
1
)
+
1
);
mb
=
s
->
macroblocks_base
+
((
s
->
mb_width
+
1
)
*
(
mb_y
+
1
)
+
1
);
else
mb
=
s
->
macroblocks
+
(
s
->
mb_height
-
mb_y
-
1
)
*
2
;
mb
=
s
->
macroblocks
+
(
s
->
mb_height
-
mb_y
-
1
)
*
2
;
if
(
mb_y
==
0
)
prev_td
=
td
;
else
prev_td
=
&
s
->
thread_data
[(
jobnr
+
num_jobs
-
1
)
%
num_jobs
];
if
(
mb_y
==
s
->
mb_height
-
1
)
next_td
=
td
;
else
next_td
=
&
s
->
thread_data
[(
jobnr
+
1
)
%
num_jobs
];
if
(
mb_y
==
0
)
prev_td
=
td
;
else
prev_td
=
&
s
->
thread_data
[(
jobnr
+
num_jobs
-
1
)
%
num_jobs
];
if
(
mb_y
==
s
->
mb_height
-
1
)
next_td
=
td
;
else
next_td
=
&
s
->
thread_data
[(
jobnr
+
1
)
%
num_jobs
];
for
(
mb_x
=
0
;
mb_x
<
s
->
mb_width
;
mb_x
++
,
mb
++
)
{
VP8FilterStrength
*
f
=
&
td
->
filter_strength
[
mb_x
];
if
(
prev_td
!=
td
)
{
check_thread_pos
(
td
,
prev_td
,
(
mb_x
+
1
)
+
(
s
->
mb_width
+
3
),
mb_y
-
1
);
}
if
(
prev_td
!=
td
)
check_thread_pos
(
td
,
prev_td
,
(
mb_x
+
1
)
+
(
s
->
mb_width
+
3
),
mb_y
-
1
);
if
(
next_td
!=
td
)
if
(
next_td
!=
&
s
->
thread_data
[
0
])
{
check_thread_pos
(
td
,
next_td
,
mb_x
+
1
,
mb_y
+
1
);
}
if
(
next_td
!=
&
s
->
thread_data
[
0
])
check_thread_pos
(
td
,
next_td
,
mb_x
+
1
,
mb_y
+
1
);
if
(
num_jobs
==
1
)
{
if
(
s
->
filter
.
simple
)
backup_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
NULL
,
NULL
,
s
->
linesize
,
0
,
1
);
backup_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
NULL
,
NULL
,
s
->
linesize
,
0
,
1
);
else
backup_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
dst
[
1
],
dst
[
2
],
s
->
linesize
,
s
->
uvlinesize
,
0
);
backup_mb_border
(
s
->
top_border
[
mb_x
+
1
],
dst
[
0
],
dst
[
1
],
dst
[
2
],
s
->
linesize
,
s
->
uvlinesize
,
0
);
}
if
(
s
->
filter
.
simple
)
...
...
@@ -1796,7 +1871,7 @@ static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
dst
[
1
]
+=
8
;
dst
[
2
]
+=
8
;
update_pos
(
td
,
mb_y
,
(
s
->
mb_width
+
3
)
+
mb_x
);
update_pos
(
td
,
mb_y
,
(
s
->
mb_width
+
3
)
+
mb_x
);
}
}
...
...
@@ -1808,10 +1883,12 @@ static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
VP8ThreadData
*
next_td
=
NULL
,
*
prev_td
=
NULL
;
VP8Frame
*
curframe
=
s
->
curframe
;
int
mb_y
,
num_jobs
=
s
->
num_jobs
;
td
->
thread_nr
=
threadnr
;
for
(
mb_y
=
jobnr
;
mb_y
<
s
->
mb_height
;
mb_y
+=
num_jobs
)
{
if
(
mb_y
>=
s
->
mb_height
)
break
;
td
->
thread_mb_pos
=
mb_y
<<
16
;
if
(
mb_y
>=
s
->
mb_height
)
break
;
td
->
thread_mb_pos
=
mb_y
<<
16
;
vp8_decode_mb_row_no_filter
(
avctx
,
tdata
,
jobnr
,
threadnr
);
if
(
s
->
deblock_filter
)
vp8_filter_mb_row
(
avctx
,
tdata
,
jobnr
,
threadnr
);
...
...
@@ -1840,11 +1917,12 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
prev_frame
=
s
->
framep
[
VP56_FRAME_CURRENT
];
referenced
=
s
->
update_last
||
s
->
update_golden
==
VP56_FRAME_CURRENT
||
s
->
update_altref
==
VP56_FRAME_CURRENT
;
referenced
=
s
->
update_last
||
s
->
update_golden
==
VP56_FRAME_CURRENT
||
s
->
update_altref
==
VP56_FRAME_CURRENT
;
skip_thresh
=
!
referenced
?
AVDISCARD_NONREF
:
!
s
->
keyframe
?
AVDISCARD_NONKEY
:
AVDISCARD_ALL
;
skip_thresh
=
!
referenced
?
AVDISCARD_NONREF
:
!
s
->
keyframe
?
AVDISCARD_NONKEY
:
AVDISCARD_ALL
;
if
(
avctx
->
skip_frame
>=
skip_thresh
)
{
s
->
invisible
=
1
;
...
...
@@ -1858,7 +1936,7 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
if
(
s
->
frames
[
i
].
tf
.
f
->
data
[
0
]
&&
&
s
->
frames
[
i
]
!=
prev_frame
&&
&
s
->
frames
[
i
]
!=
s
->
framep
[
VP56_FRAME_PREVIOUS
]
&&
&
s
->
frames
[
i
]
!=
s
->
framep
[
VP56_FRAME_GOLDEN
]
&&
&
s
->
frames
[
i
]
!=
s
->
framep
[
VP56_FRAME_GOLDEN
]
&&
&
s
->
frames
[
i
]
!=
s
->
framep
[
VP56_FRAME_GOLDEN2
])
vp8_release_frame
(
s
,
&
s
->
frames
[
i
]);
...
...
@@ -1866,7 +1944,7 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
for
(
i
=
0
;
i
<
5
;
i
++
)
if
(
&
s
->
frames
[
i
]
!=
prev_frame
&&
&
s
->
frames
[
i
]
!=
s
->
framep
[
VP56_FRAME_PREVIOUS
]
&&
&
s
->
frames
[
i
]
!=
s
->
framep
[
VP56_FRAME_GOLDEN
]
&&
&
s
->
frames
[
i
]
!=
s
->
framep
[
VP56_FRAME_GOLDEN
]
&&
&
s
->
frames
[
i
]
!=
s
->
framep
[
VP56_FRAME_GOLDEN2
])
{
curframe
=
s
->
framep
[
VP56_FRAME_CURRENT
]
=
&
s
->
frames
[
i
];
break
;
...
...
@@ -1878,57 +1956,61 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
if
(
curframe
->
tf
.
f
->
data
[
0
])
vp8_release_frame
(
s
,
curframe
);
// Given that arithmetic probabilities are updated every frame, it's quite likely
// that the values we have on a random interframe are complete junk if we didn't
// start decode on a keyframe. So just don't display anything rather than junk.
/* Given that arithmetic probabilities are updated every frame, it's quite
* likely that the values we have on a random interframe are complete
* junk if we didn't start decode on a keyframe. So just don't display
* anything rather than junk. */
if
(
!
s
->
keyframe
&&
(
!
s
->
framep
[
VP56_FRAME_PREVIOUS
]
||
!
s
->
framep
[
VP56_FRAME_GOLDEN
]
||
!
s
->
framep
[
VP56_FRAME_GOLDEN
]
||
!
s
->
framep
[
VP56_FRAME_GOLDEN2
]))
{
av_log
(
avctx
,
AV_LOG_WARNING
,
"Discarding interframe without a prior keyframe!
\n
"
);
av_log
(
avctx
,
AV_LOG_WARNING
,
"Discarding interframe without a prior keyframe!
\n
"
);
ret
=
AVERROR_INVALIDDATA
;
goto
err
;
}
curframe
->
tf
.
f
->
key_frame
=
s
->
keyframe
;
curframe
->
tf
.
f
->
pict_type
=
s
->
keyframe
?
AV_PICTURE_TYPE_I
:
AV_PICTURE_TYPE_P
;
curframe
->
tf
.
f
->
pict_type
=
s
->
keyframe
?
AV_PICTURE_TYPE_I
:
AV_PICTURE_TYPE_P
;
if
((
ret
=
vp8_alloc_frame
(
s
,
curframe
,
referenced
)))
{
av_log
(
avctx
,
AV_LOG_ERROR
,
"get_buffer() failed!
\n
"
);
goto
err
;
}
// check if golden and altref are swapped
if
(
s
->
update_altref
!=
VP56_FRAME_NONE
)
{
s
->
next_framep
[
VP56_FRAME_GOLDEN2
]
=
s
->
framep
[
s
->
update_altref
];
}
else
{
s
->
next_framep
[
VP56_FRAME_GOLDEN2
]
=
s
->
framep
[
VP56_FRAME_GOLDEN2
];
}
if
(
s
->
update_golden
!=
VP56_FRAME_NONE
)
{
s
->
next_framep
[
VP56_FRAME_GOLDEN
]
=
s
->
framep
[
s
->
update_golden
];
}
else
{
s
->
next_framep
[
VP56_FRAME_GOLDEN
]
=
s
->
framep
[
VP56_FRAME_GOLDEN
];
}
if
(
s
->
update_last
)
{
if
(
s
->
update_altref
!=
VP56_FRAME_NONE
)
s
->
next_framep
[
VP56_FRAME_GOLDEN2
]
=
s
->
framep
[
s
->
update_altref
];
else
s
->
next_framep
[
VP56_FRAME_GOLDEN2
]
=
s
->
framep
[
VP56_FRAME_GOLDEN2
];
if
(
s
->
update_golden
!=
VP56_FRAME_NONE
)
s
->
next_framep
[
VP56_FRAME_GOLDEN
]
=
s
->
framep
[
s
->
update_golden
];
else
s
->
next_framep
[
VP56_FRAME_GOLDEN
]
=
s
->
framep
[
VP56_FRAME_GOLDEN
];
if
(
s
->
update_last
)
s
->
next_framep
[
VP56_FRAME_PREVIOUS
]
=
curframe
;
}
else
{
else
s
->
next_framep
[
VP56_FRAME_PREVIOUS
]
=
s
->
framep
[
VP56_FRAME_PREVIOUS
];
}
s
->
next_framep
[
VP56_FRAME_CURRENT
]
=
curframe
;
s
->
next_framep
[
VP56_FRAME_CURRENT
]
=
curframe
;
ff_thread_finish_setup
(
avctx
);
s
->
linesize
=
curframe
->
tf
.
f
->
linesize
[
0
];
s
->
uvlinesize
=
curframe
->
tf
.
f
->
linesize
[
1
];
memset
(
s
->
top_nnz
,
0
,
s
->
mb_width
*
sizeof
(
*
s
->
top_nnz
));
/* Zero macroblock structures for top/top-left prediction from outside the frame. */
memset
(
s
->
top_nnz
,
0
,
s
->
mb_width
*
sizeof
(
*
s
->
top_nnz
));
/* Zero macroblock structures for top/top-left prediction
* from outside the frame. */
if
(
!
s
->
mb_layout
)
memset
(
s
->
macroblocks
+
s
->
mb_height
*
2
-
1
,
0
,
(
s
->
mb_width
+
1
)
*
sizeof
(
*
s
->
macroblocks
));
memset
(
s
->
macroblocks
+
s
->
mb_height
*
2
-
1
,
0
,
(
s
->
mb_width
+
1
)
*
sizeof
(
*
s
->
macroblocks
));
if
(
!
s
->
mb_layout
&&
s
->
keyframe
)
memset
(
s
->
intra4x4_pred_mode_top
,
DC_PRED
,
s
->
mb_width
*
4
);
memset
(
s
->
intra4x4_pred_mode_top
,
DC_PRED
,
s
->
mb_width
*
4
);
memset
(
s
->
ref_count
,
0
,
sizeof
(
s
->
ref_count
));
if
(
s
->
mb_layout
==
1
)
{
// Make sure the previous frame has read its segmentation map,
// if we re-use the same map.
...
...
@@ -1949,9 +2031,10 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
s
->
mv_max
.
y
=
((
s
->
mb_height
-
1
)
<<
6
)
+
MARGIN
;
for
(
i
=
0
;
i
<
MAX_THREADS
;
i
++
)
{
s
->
thread_data
[
i
].
thread_mb_pos
=
0
;
s
->
thread_data
[
i
].
wait_mb_pos
=
INT_MAX
;
s
->
thread_data
[
i
].
wait_mb_pos
=
INT_MAX
;
}
avctx
->
execute2
(
avctx
,
vp8_decode_mb_row_sliced
,
s
->
thread_data
,
NULL
,
num_jobs
);
avctx
->
execute2
(
avctx
,
vp8_decode_mb_row_sliced
,
s
->
thread_data
,
NULL
,
num_jobs
);
ff_thread_report_progress
(
&
curframe
->
tf
,
INT_MAX
,
0
);
memcpy
(
&
s
->
framep
[
0
],
&
s
->
next_framep
[
0
],
sizeof
(
s
->
framep
[
0
])
*
4
);
...
...
@@ -1965,7 +2048,7 @@ skip_decode:
if
(
!
s
->
invisible
)
{
if
((
ret
=
av_frame_ref
(
data
,
curframe
->
tf
.
f
))
<
0
)
return
ret
;
*
got_frame
=
1
;
*
got_frame
=
1
;
}
return
avpkt
->
size
;
...
...
@@ -2033,10 +2116,10 @@ static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
return
0
;
}
#define REBASE(pic) \
pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
#define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
static
int
vp8_decode_update_thread_context
(
AVCodecContext
*
dst
,
const
AVCodecContext
*
src
)
static
int
vp8_decode_update_thread_context
(
AVCodecContext
*
dst
,
const
AVCodecContext
*
src
)
{
VP8Context
*
s
=
dst
->
priv_data
,
*
s_src
=
src
->
priv_data
;
int
i
;
...
...
@@ -2048,9 +2131,9 @@ static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
s
->
mb_height
=
s_src
->
mb_height
;
}
s
->
prob
[
0
]
=
s_src
->
prob
[
!
s_src
->
update_probabilities
];
s
->
prob
[
0
]
=
s_src
->
prob
[
!
s_src
->
update_probabilities
];
s
->
segmentation
=
s_src
->
segmentation
;
s
->
lf_delta
=
s_src
->
lf_delta
;
s
->
lf_delta
=
s_src
->
lf_delta
;
memcpy
(
s
->
sign_bias
,
s_src
->
sign_bias
,
sizeof
(
s
->
sign_bias
));
for
(
i
=
0
;
i
<
FF_ARRAY_ELEMS
(
s_src
->
frames
);
i
++
)
{
...
...
libavcodec/vp8.h
View file @
53c20f17
...
...
@@ -28,14 +28,15 @@
#include "libavutil/buffer.h"
#include "vp56.h"
#include "vp8dsp.h"
#include "h264pred.h"
#include "thread.h"
#include "vp56.h"
#include "vp8dsp.h"
#if HAVE_PTHREADS
#include <pthread.h>
#
include <pthread.h>
#elif HAVE_W32THREADS
#include "compat/w32pthreads.h"
#
include "compat/w32pthreads.h"
#endif
#define VP8_MAX_QUANT 127
...
...
@@ -82,7 +83,7 @@ typedef struct VP8FilterStrength {
typedef
struct
VP8Macroblock
{
uint8_t
skip
;
//
todo
: make it possible to check for at least (i4x4 or split_mv)
//
TODO
: make it possible to check for at least (i4x4 or split_mv)
// in one op. are others needed?
uint8_t
mode
;
uint8_t
ref_frame
;
...
...
@@ -116,7 +117,7 @@ typedef struct VP8ThreadData {
int
thread_nr
;
#if HAVE_THREADS
pthread_mutex_t
lock
;
pthread_cond_t
cond
;
pthread_cond_t
cond
;
#endif
int
thread_mb_pos
;
// (mb_y << 16) | (mb_x & 0xFFFF)
int
wait_mb_pos
;
// What the current thread is waiting on.
...
...
@@ -203,7 +204,7 @@ typedef struct VP8Context {
* [7] - split mv
* i16x16 modes never have any adjustment
*/
int8_t
mode
[
VP8_MVMODE_SPLIT
+
1
];
int8_t
mode
[
VP8_MVMODE_SPLIT
+
1
];
/**
* filter strength adjustment for macroblocks that reference:
...
...
@@ -215,7 +216,7 @@ typedef struct VP8Context {
int8_t
ref
[
4
];
}
lf_delta
;
uint8_t
(
*
top_border
)[
16
+
8
+
8
];
uint8_t
(
*
top_border
)[
16
+
8
+
8
];
uint8_t
(
*
top_nnz
)[
9
];
VP56RangeCoder
c
;
///< header context, includes mb modes and motion vectors
...
...
@@ -234,7 +235,7 @@ typedef struct VP8Context {
uint8_t
golden
;
uint8_t
pred16x16
[
4
];
uint8_t
pred8x8c
[
3
];
uint8_t
token
[
4
][
16
][
3
][
NUM_DCT_TOKENS
-
1
];
uint8_t
token
[
4
][
16
][
3
][
NUM_DCT_TOKENS
-
1
];
uint8_t
mvc
[
2
][
19
];
}
prob
[
2
];
...
...
libavcodec/vp8_parser.c
View file @
53c20f17
...
...
@@ -21,18 +21,19 @@
#include "parser.h"
static
int
parse
(
AVCodecParserContext
*
s
,
AVCodecContext
*
avctx
,
const
uint8_t
**
poutbuf
,
int
*
poutbuf_size
,
const
uint8_t
*
buf
,
int
buf_size
)
AVCodecContext
*
avctx
,
const
uint8_t
**
poutbuf
,
int
*
poutbuf_size
,
const
uint8_t
*
buf
,
int
buf_size
)
{
s
->
pict_type
=
(
buf
[
0
]
&
0x01
)
?
AV_PICTURE_TYPE_P
:
AV_PICTURE_TYPE_I
;
s
->
pict_type
=
(
buf
[
0
]
&
0x01
)
?
AV_PICTURE_TYPE_P
:
AV_PICTURE_TYPE_I
;
*
poutbuf
=
buf
;
*
poutbuf
=
buf
;
*
poutbuf_size
=
buf_size
;
return
buf_size
;
}
AVCodecParser
ff_vp8_parser
=
{
.
codec_ids
=
{
AV_CODEC_ID_VP8
},
.
parser_parse
=
parse
,
.
codec_ids
=
{
AV_CODEC_ID_VP8
},
.
parser_parse
=
parse
,
};
libavcodec/vp8data.h
View file @
53c20f17
...
...
@@ -30,28 +30,25 @@
#include "vp8.h"
#include "h264pred.h"
static
const
uint8_t
vp8_pred4x4_mode
[]
=
{
static
const
uint8_t
vp8_pred4x4_mode
[]
=
{
[
DC_PRED8x8
]
=
DC_PRED
,
[
VERT_PRED8x8
]
=
VERT_PRED
,
[
HOR_PRED8x8
]
=
HOR_PRED
,
[
PLANE_PRED8x8
]
=
TM_VP8_PRED
,
};
static
const
int8_t
vp8_pred16x16_tree_intra
[
4
][
2
]
=
{
{
-
MODE_I4x4
,
1
},
// '0'
{
2
,
3
},
{
-
DC_PRED8x8
,
-
VERT_PRED8x8
},
// '100', '101'
{
-
HOR_PRED8x8
,
-
PLANE_PRED8x8
},
// '110', '111'
static
const
int8_t
vp8_pred16x16_tree_intra
[
4
][
2
]
=
{
{
-
MODE_I4x4
,
1
},
// '0'
{
2
,
3
},
{
-
DC_PRED8x8
,
-
VERT_PRED8x8
},
// '100', '101'
{
-
HOR_PRED8x8
,
-
PLANE_PRED8x8
},
// '110', '111'
};
static
const
int8_t
vp8_pred16x16_tree_inter
[
4
][
2
]
=
{
{
-
DC_PRED8x8
,
1
},
// '0'
{
2
,
3
},
{
-
VERT_PRED8x8
,
-
HOR_PRED8x8
},
// '100', '101'
{
-
PLANE_PRED8x8
,
-
MODE_I4x4
},
// '110', '111'
static
const
int8_t
vp8_pred16x16_tree_inter
[
4
][
2
]
=
{
{
-
DC_PRED8x8
,
1
},
// '0'
{
2
,
3
},
{
-
VERT_PRED8x8
,
-
HOR_PRED8x8
},
// '100', '101'
{
-
PLANE_PRED8x8
,
-
MODE_I4x4
},
// '110', '111'
};
static
const
int
vp8_mode_contexts
[
6
][
4
]
=
{
...
...
@@ -64,26 +61,26 @@ static const int vp8_mode_contexts[6][4] = {
};
static
const
uint8_t
vp8_mbsplits
[
5
][
16
]
=
{
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
0
,
0
,
1
,
1
,
0
,
0
,
1
,
1
,
0
,
0
,
1
,
1
,
0
,
0
,
1
,
1
},
{
0
,
0
,
1
,
1
,
0
,
0
,
1
,
1
,
2
,
2
,
3
,
3
,
2
,
2
,
3
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
},
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
}
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
0
,
0
,
1
,
1
,
0
,
0
,
1
,
1
,
0
,
0
,
1
,
1
,
0
,
0
,
1
,
1
},
{
0
,
0
,
1
,
1
,
0
,
0
,
1
,
1
,
2
,
2
,
3
,
3
,
2
,
2
,
3
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
},
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
}
};
static
const
uint8_t
vp8_mbfirstidx
[
4
][
16
]
=
{
{
0
,
8
},
{
0
,
2
},
{
0
,
2
,
8
,
10
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
}
{
0
,
8
},
{
0
,
2
},
{
0
,
2
,
8
,
10
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
}
};
static
const
uint8_t
vp8_mbsplit_count
[
4
]
=
{
2
,
2
,
4
,
16
};
static
const
uint8_t
vp8_mbsplit_prob
[
3
]
=
{
110
,
111
,
150
};
static
const
uint8_t
vp8_mbsplit_count
[
4
]
=
{
2
,
2
,
4
,
16
};
static
const
uint8_t
vp8_mbsplit_prob
[
3
]
=
{
110
,
111
,
150
};
static
const
uint8_t
vp8_submv_prob
[
5
][
3
]
=
{
{
147
,
136
,
18
},
...
...
@@ -93,39 +90,42 @@ static const uint8_t vp8_submv_prob[5][3] = {
{
208
,
1
,
1
}
};
static
const
uint8_t
vp8_pred16x16_prob_intra
[
4
]
=
{
145
,
156
,
163
,
128
};
static
const
uint8_t
vp8_pred16x16_prob_inter
[
4
]
=
{
112
,
86
,
140
,
37
};
static
const
int8_t
vp8_pred4x4_tree
[
9
][
2
]
=
{
{
-
DC_PRED
,
1
},
// '0'
{
-
TM_VP8_PRED
,
2
},
// '10'
{
-
VERT_PRED
,
3
},
// '110'
{
4
,
6
},
{
-
HOR_PRED
,
5
},
// '11100'
{
-
DIAG_DOWN_RIGHT_PRED
,
-
VERT_RIGHT_PRED
},
// '111010', '111011'
{
-
DIAG_DOWN_LEFT_PRED
,
7
},
// '11110'
{
-
VERT_LEFT_PRED
,
8
},
// '111110'
{
-
HOR_DOWN_PRED
,
-
HOR_UP_PRED
},
// '1111110', '1111111'
static
const
uint8_t
vp8_pred16x16_prob_intra
[
4
]
=
{
145
,
156
,
163
,
128
};
static
const
uint8_t
vp8_pred16x16_prob_inter
[
4
]
=
{
112
,
86
,
140
,
37
};
static
const
int8_t
vp8_pred8x8c_tree
[
3
][
2
]
=
{
{
-
DC_PRED8x8
,
1
},
// '0'
{
-
VERT_PRED8x8
,
2
},
// '10
{
-
HOR_PRED8x8
,
-
PLANE_PRED8x8
},
// '110', '111'
static
const
int8_t
vp8_pred4x4_tree
[
9
][
2
]
=
{
{
-
DC_PRED
,
1
},
// '0'
{
-
TM_VP8_PRED
,
2
},
// '10'
{
-
VERT_PRED
,
3
},
// '110'
{
4
,
6
},
{
-
HOR_PRED
,
5
},
// '11100'
{
-
DIAG_DOWN_RIGHT_PRED
,
-
VERT_RIGHT_PRED
},
// '111010', '111011'
{
-
DIAG_DOWN_LEFT_PRED
,
7
},
// '11110'
{
-
VERT_LEFT_PRED
,
8
},
// '111110'
{
-
HOR_DOWN_PRED
,
-
HOR_UP_PRED
},
// '1111110', '1111111'
};
static
const
uint8_t
vp8_pred8x8c_prob_intra
[
3
]
=
{
142
,
114
,
183
};
static
const
uint8_t
vp8_pred8x8c_prob_inter
[
3
]
=
{
162
,
101
,
204
};
static
const
int8_t
vp8_pred8x8c_tree
[
3
][
2
]
=
{
{
-
DC_PRED8x8
,
1
},
// '0'
{
-
VERT_PRED8x8
,
2
},
// '10
{
-
HOR_PRED8x8
,
-
PLANE_PRED8x8
},
// '110', '111'
};
static
const
uint8_t
vp8_pred4x4_prob_inter
[
9
]
=
{
static
const
uint8_t
vp8_pred8x8c_prob_intra
[
3
]
=
{
142
,
114
,
183
};
static
const
uint8_t
vp8_pred8x8c_prob_inter
[
3
]
=
{
162
,
101
,
204
};
static
const
uint8_t
vp8_pred4x4_prob_inter
[
9
]
=
{
120
,
90
,
79
,
133
,
87
,
85
,
80
,
111
,
151
};
static
const
uint8_t
vp8_pred4x4_prob_intra
[
10
][
10
][
9
]
=
{
static
const
uint8_t
vp8_pred4x4_prob_intra
[
10
][
10
][
9
]
=
{
{
{
39
,
53
,
200
,
87
,
26
,
21
,
43
,
232
,
171
},
{
56
,
34
,
51
,
104
,
114
,
102
,
29
,
93
,
77
},
...
...
@@ -248,50 +248,57 @@ static const uint8_t vp8_pred4x4_prob_intra[10][10][9] =
},
};
static
const
int8_t
vp8_segmentid_tree
[][
2
]
=
{
{
1
,
2
},
{
-
0
,
-
1
},
// '00', '01'
{
-
2
,
-
3
},
// '10', '11'
static
const
int8_t
vp8_segmentid_tree
[][
2
]
=
{
{
1
,
2
},
{
-
0
,
-
1
},
// '00', '01'
{
-
2
,
-
3
},
// '10', '11'
};
static
const
uint8_t
vp8_coeff_band
[
16
]
=
{
static
const
uint8_t
vp8_coeff_band
[
16
]
=
{
0
,
1
,
2
,
3
,
6
,
4
,
5
,
6
,
6
,
6
,
6
,
6
,
6
,
6
,
6
,
7
};
/* Inverse of vp8_coeff_band: mappings of bands to coefficient indexes.
* Each list is -1-terminated. */
static
const
int8_t
vp8_coeff_band_indexes
[
8
][
10
]
=
{
{
0
,
-
1
},
{
1
,
-
1
},
{
2
,
-
1
},
{
3
,
-
1
},
{
5
,
-
1
},
{
6
,
-
1
},
{
4
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
-
1
},
{
15
,
-
1
}
static
const
int8_t
vp8_coeff_band_indexes
[
8
][
10
]
=
{
{
0
,
-
1
},
{
1
,
-
1
},
{
2
,
-
1
},
{
3
,
-
1
},
{
5
,
-
1
},
{
6
,
-
1
},
{
4
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
-
1
},
{
15
,
-
1
}
};
static
const
uint8_t
vp8_dct_cat1_prob
[]
=
{
159
,
0
};
static
const
uint8_t
vp8_dct_cat2_prob
[]
=
{
165
,
145
,
0
};
static
const
uint8_t
vp8_dct_cat3_prob
[]
=
{
173
,
148
,
140
,
0
};
static
const
uint8_t
vp8_dct_cat4_prob
[]
=
{
176
,
155
,
140
,
135
,
0
};
static
const
uint8_t
vp8_dct_cat5_prob
[]
=
{
180
,
157
,
141
,
134
,
130
,
0
};
static
const
uint8_t
vp8_dct_cat6_prob
[]
=
{
254
,
254
,
243
,
230
,
196
,
177
,
153
,
140
,
133
,
130
,
129
,
0
};
static
const
uint8_t
vp8_dct_cat1_prob
[]
=
{
159
,
0
};
static
const
uint8_t
vp8_dct_cat2_prob
[]
=
{
165
,
145
,
0
};
static
const
uint8_t
vp8_dct_cat3_prob
[]
=
{
173
,
148
,
140
,
0
};
static
const
uint8_t
vp8_dct_cat4_prob
[]
=
{
176
,
155
,
140
,
135
,
0
};
static
const
uint8_t
vp8_dct_cat5_prob
[]
=
{
180
,
157
,
141
,
134
,
130
,
0
};
static
const
uint8_t
vp8_dct_cat6_prob
[]
=
{
254
,
254
,
243
,
230
,
196
,
177
,
153
,
140
,
133
,
130
,
129
,
0
};
// only used for cat3 and above; cat 1 and 2 are referenced directly
const
uint8_t
*
const
ff_vp8_dct_cat_prob
[]
=
{
const
uint8_t
*
const
ff_vp8_dct_cat_prob
[]
=
{
vp8_dct_cat3_prob
,
vp8_dct_cat4_prob
,
vp8_dct_cat5_prob
,
vp8_dct_cat6_prob
,
};
static
const
uint8_t
vp8_token_default_probs
[
4
][
8
][
3
][
NUM_DCT_TOKENS
-
1
]
=
{
static
const
uint8_t
vp8_token_default_probs
[
4
][
8
][
3
][
NUM_DCT_TOKENS
-
1
]
=
{
{
{
{
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
},
...
...
@@ -462,8 +469,7 @@ static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS-1] =
},
};
static
const
uint8_t
vp8_token_update_probs
[
4
][
8
][
3
][
NUM_DCT_TOKENS
-
1
]
=
{
static
const
uint8_t
vp8_token_update_probs
[
4
][
8
][
3
][
NUM_DCT_TOKENS
-
1
]
=
{
{
{
{
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
},
...
...
@@ -635,15 +641,14 @@ static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS-1] =
};
// fixme: copied from h264data.h
static
const
uint8_t
zigzag_scan
[
16
]
=
{
0
+
0
*
4
,
1
+
0
*
4
,
0
+
1
*
4
,
0
+
2
*
4
,
1
+
1
*
4
,
2
+
0
*
4
,
3
+
0
*
4
,
2
+
1
*
4
,
1
+
2
*
4
,
0
+
3
*
4
,
1
+
3
*
4
,
2
+
2
*
4
,
3
+
1
*
4
,
3
+
2
*
4
,
2
+
3
*
4
,
3
+
3
*
4
,
static
const
uint8_t
zigzag_scan
[
16
]
=
{
0
+
0
*
4
,
1
+
0
*
4
,
0
+
1
*
4
,
0
+
2
*
4
,
1
+
1
*
4
,
2
+
0
*
4
,
3
+
0
*
4
,
2
+
1
*
4
,
1
+
2
*
4
,
0
+
3
*
4
,
1
+
3
*
4
,
2
+
2
*
4
,
3
+
1
*
4
,
3
+
2
*
4
,
2
+
3
*
4
,
3
+
3
*
4
,
};
static
const
uint8_t
vp8_dc_qlookup
[
VP8_MAX_QUANT
+
1
]
=
{
static
const
uint8_t
vp8_dc_qlookup
[
VP8_MAX_QUANT
+
1
]
=
{
4
,
5
,
6
,
7
,
8
,
9
,
10
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
17
,
18
,
19
,
20
,
20
,
21
,
21
,
22
,
22
,
23
,
23
,
24
,
25
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
,
36
,
37
,
37
,
38
,
39
,
40
,
41
,
42
,
43
,
...
...
@@ -654,8 +659,7 @@ static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT+1] =
122
,
124
,
126
,
128
,
130
,
132
,
134
,
136
,
138
,
140
,
143
,
145
,
148
,
151
,
154
,
157
,
};
static
const
uint16_t
vp8_ac_qlookup
[
VP8_MAX_QUANT
+
1
]
=
{
static
const
uint16_t
vp8_ac_qlookup
[
VP8_MAX_QUANT
+
1
]
=
{
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
,
36
,
37
,
38
,
39
,
40
,
41
,
42
,
43
,
44
,
45
,
46
,
47
,
48
,
49
,
50
,
51
,
...
...
libavcodec/vp8dsp.c
View file @
53c20f17
...
...
@@ -24,9 +24,10 @@
* VP8 compatible video decoder
*/
#include "libavutil/common.h"
#include "mathops.h"
#include "vp8dsp.h"
#include "libavutil/common.h"
// TODO: Maybe add dequant
static
void
vp8_luma_dc_wht_c
(
int16_t
block
[
4
][
4
][
16
],
int16_t
dc
[
16
])
...
...
@@ -34,26 +35,26 @@ static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
int
i
,
t0
,
t1
,
t2
,
t3
;
for
(
i
=
0
;
i
<
4
;
i
++
)
{
t0
=
dc
[
0
*
4
+
i
]
+
dc
[
3
*
4
+
i
];
t1
=
dc
[
1
*
4
+
i
]
+
dc
[
2
*
4
+
i
];
t2
=
dc
[
1
*
4
+
i
]
-
dc
[
2
*
4
+
i
];
t3
=
dc
[
0
*
4
+
i
]
-
dc
[
3
*
4
+
i
];
dc
[
0
*
4
+
i
]
=
t0
+
t1
;
dc
[
1
*
4
+
i
]
=
t3
+
t2
;
dc
[
2
*
4
+
i
]
=
t0
-
t1
;
dc
[
3
*
4
+
i
]
=
t3
-
t2
;
t0
=
dc
[
0
*
4
+
i
]
+
dc
[
3
*
4
+
i
];
t1
=
dc
[
1
*
4
+
i
]
+
dc
[
2
*
4
+
i
];
t2
=
dc
[
1
*
4
+
i
]
-
dc
[
2
*
4
+
i
];
t3
=
dc
[
0
*
4
+
i
]
-
dc
[
3
*
4
+
i
];
dc
[
0
*
4
+
i
]
=
t0
+
t1
;
dc
[
1
*
4
+
i
]
=
t3
+
t2
;
dc
[
2
*
4
+
i
]
=
t0
-
t1
;
dc
[
3
*
4
+
i
]
=
t3
-
t2
;
}
for
(
i
=
0
;
i
<
4
;
i
++
)
{
t0
=
dc
[
i
*
4
+
0
]
+
dc
[
i
*
4
+
3
]
+
3
;
// rounding
t1
=
dc
[
i
*
4
+
1
]
+
dc
[
i
*
4
+
2
];
t2
=
dc
[
i
*
4
+
1
]
-
dc
[
i
*
4
+
2
];
t3
=
dc
[
i
*
4
+
0
]
-
dc
[
i
*
4
+
3
]
+
3
;
// rounding
dc
[
i
*
4
+
0
]
=
0
;
dc
[
i
*
4
+
1
]
=
0
;
dc
[
i
*
4
+
2
]
=
0
;
dc
[
i
*
4
+
3
]
=
0
;
t0
=
dc
[
i
*
4
+
0
]
+
dc
[
i
*
4
+
3
]
+
3
;
// rounding
t1
=
dc
[
i
*
4
+
1
]
+
dc
[
i
*
4
+
2
];
t2
=
dc
[
i
*
4
+
1
]
-
dc
[
i
*
4
+
2
];
t3
=
dc
[
i
*
4
+
0
]
-
dc
[
i
*
4
+
3
]
+
3
;
// rounding
dc
[
i
*
4
+
0
]
=
0
;
dc
[
i
*
4
+
1
]
=
0
;
dc
[
i
*
4
+
2
]
=
0
;
dc
[
i
*
4
+
3
]
=
0
;
block
[
i
][
0
][
0
]
=
(
t0
+
t1
)
>>
3
;
block
[
i
][
1
][
0
]
=
(
t3
+
t2
)
>>
3
;
...
...
@@ -75,8 +76,8 @@ static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
}
}
#define MUL_20091(a) ((((a)
*
20091) >> 16) + (a))
#define MUL_35468(a) (((a)
*
35468) >> 16)
#define MUL_20091(a) ((((a)
*
20091) >> 16) + (a))
#define MUL_35468(a) (((a)
*
35468) >> 16)
static
void
vp8_idct_add_c
(
uint8_t
*
dst
,
int16_t
block
[
16
],
ptrdiff_t
stride
)
{
...
...
@@ -84,32 +85,32 @@ static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
int16_t
tmp
[
16
];
for
(
i
=
0
;
i
<
4
;
i
++
)
{
t0
=
block
[
0
*
4
+
i
]
+
block
[
2
*
4
+
i
];
t1
=
block
[
0
*
4
+
i
]
-
block
[
2
*
4
+
i
];
t2
=
MUL_35468
(
block
[
1
*
4
+
i
])
-
MUL_20091
(
block
[
3
*
4
+
i
]);
t3
=
MUL_20091
(
block
[
1
*
4
+
i
])
+
MUL_35468
(
block
[
3
*
4
+
i
]);
block
[
0
*
4
+
i
]
=
0
;
block
[
1
*
4
+
i
]
=
0
;
block
[
2
*
4
+
i
]
=
0
;
block
[
3
*
4
+
i
]
=
0
;
tmp
[
i
*
4
+
0
]
=
t0
+
t3
;
tmp
[
i
*
4
+
1
]
=
t1
+
t2
;
tmp
[
i
*
4
+
2
]
=
t1
-
t2
;
tmp
[
i
*
4
+
3
]
=
t0
-
t3
;
t0
=
block
[
0
*
4
+
i
]
+
block
[
2
*
4
+
i
];
t1
=
block
[
0
*
4
+
i
]
-
block
[
2
*
4
+
i
];
t2
=
MUL_35468
(
block
[
1
*
4
+
i
])
-
MUL_20091
(
block
[
3
*
4
+
i
]);
t3
=
MUL_20091
(
block
[
1
*
4
+
i
])
+
MUL_35468
(
block
[
3
*
4
+
i
]);
block
[
0
*
4
+
i
]
=
0
;
block
[
1
*
4
+
i
]
=
0
;
block
[
2
*
4
+
i
]
=
0
;
block
[
3
*
4
+
i
]
=
0
;
tmp
[
i
*
4
+
0
]
=
t0
+
t3
;
tmp
[
i
*
4
+
1
]
=
t1
+
t2
;
tmp
[
i
*
4
+
2
]
=
t1
-
t2
;
tmp
[
i
*
4
+
3
]
=
t0
-
t3
;
}
for
(
i
=
0
;
i
<
4
;
i
++
)
{
t0
=
tmp
[
0
*
4
+
i
]
+
tmp
[
2
*
4
+
i
];
t1
=
tmp
[
0
*
4
+
i
]
-
tmp
[
2
*
4
+
i
];
t2
=
MUL_35468
(
tmp
[
1
*
4
+
i
])
-
MUL_20091
(
tmp
[
3
*
4
+
i
]);
t3
=
MUL_20091
(
tmp
[
1
*
4
+
i
])
+
MUL_35468
(
tmp
[
3
*
4
+
i
]);
t0
=
tmp
[
0
*
4
+
i
]
+
tmp
[
2
*
4
+
i
];
t1
=
tmp
[
0
*
4
+
i
]
-
tmp
[
2
*
4
+
i
];
t2
=
MUL_35468
(
tmp
[
1
*
4
+
i
])
-
MUL_20091
(
tmp
[
3
*
4
+
i
]);
t3
=
MUL_20091
(
tmp
[
1
*
4
+
i
])
+
MUL_35468
(
tmp
[
3
*
4
+
i
]);
dst
[
0
]
=
av_clip_uint8
(
dst
[
0
]
+
((
t0
+
t3
+
4
)
>>
3
));
dst
[
1
]
=
av_clip_uint8
(
dst
[
1
]
+
((
t1
+
t2
+
4
)
>>
3
));
dst
[
2
]
=
av_clip_uint8
(
dst
[
2
]
+
((
t1
-
t2
+
4
)
>>
3
));
dst
[
3
]
=
av_clip_uint8
(
dst
[
3
]
+
((
t0
-
t3
+
4
)
>>
3
));
dst
+=
stride
;
dst
+=
stride
;
}
}
...
...
@@ -123,46 +124,49 @@ static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
dst
[
1
]
=
av_clip_uint8
(
dst
[
1
]
+
dc
);
dst
[
2
]
=
av_clip_uint8
(
dst
[
2
]
+
dc
);
dst
[
3
]
=
av_clip_uint8
(
dst
[
3
]
+
dc
);
dst
+=
stride
;
dst
+=
stride
;
}
}
static
void
vp8_idct_dc_add4uv_c
(
uint8_t
*
dst
,
int16_t
block
[
4
][
16
],
ptrdiff_t
stride
)
static
void
vp8_idct_dc_add4uv_c
(
uint8_t
*
dst
,
int16_t
block
[
4
][
16
],
ptrdiff_t
stride
)
{
vp8_idct_dc_add_c
(
dst
+
stride
*
0
+
0
,
block
[
0
],
stride
);
vp8_idct_dc_add_c
(
dst
+
stride
*
0
+
4
,
block
[
1
],
stride
);
vp8_idct_dc_add_c
(
dst
+
stride
*
4
+
0
,
block
[
2
],
stride
);
vp8_idct_dc_add_c
(
dst
+
stride
*
4
+
4
,
block
[
3
],
stride
);
vp8_idct_dc_add_c
(
dst
+
stride
*
0
+
0
,
block
[
0
],
stride
);
vp8_idct_dc_add_c
(
dst
+
stride
*
0
+
4
,
block
[
1
],
stride
);
vp8_idct_dc_add_c
(
dst
+
stride
*
4
+
0
,
block
[
2
],
stride
);
vp8_idct_dc_add_c
(
dst
+
stride
*
4
+
4
,
block
[
3
],
stride
);
}
static
void
vp8_idct_dc_add4y_c
(
uint8_t
*
dst
,
int16_t
block
[
4
][
16
],
ptrdiff_t
stride
)
static
void
vp8_idct_dc_add4y_c
(
uint8_t
*
dst
,
int16_t
block
[
4
][
16
],
ptrdiff_t
stride
)
{
vp8_idct_dc_add_c
(
dst
+
0
,
block
[
0
],
stride
);
vp8_idct_dc_add_c
(
dst
+
4
,
block
[
1
],
stride
);
vp8_idct_dc_add_c
(
dst
+
8
,
block
[
2
],
stride
);
vp8_idct_dc_add_c
(
dst
+
12
,
block
[
3
],
stride
);
vp8_idct_dc_add_c
(
dst
+
0
,
block
[
0
],
stride
);
vp8_idct_dc_add_c
(
dst
+
4
,
block
[
1
],
stride
);
vp8_idct_dc_add_c
(
dst
+
8
,
block
[
2
],
stride
);
vp8_idct_dc_add_c
(
dst
+
12
,
block
[
3
],
stride
);
}
// because I like only having two parameters to pass functions...
#define LOAD_PIXELS\
int av_unused p3 = p[-4*stride];\
int av_unused p2 = p[-3*stride];\
int av_unused p1 = p[-2*stride];\
int av_unused p0 = p[-1*stride];\
int av_unused q0 = p[ 0*stride];\
int av_unused q1 = p[ 1*stride];\
int av_unused q2 = p[ 2*stride];\
int av_unused q3 = p[ 3*stride];
#define clip_int8(n) (cm[n+0x80]-0x80)
static
av_always_inline
void
filter_common
(
uint8_t
*
p
,
ptrdiff_t
stride
,
int
is4tap
)
#define LOAD_PIXELS \
int av_unused p3 = p[-4 * stride]; \
int av_unused p2 = p[-3 * stride]; \
int av_unused p1 = p[-2 * stride]; \
int av_unused p0 = p[-1 * stride]; \
int av_unused q0 = p[ 0 * stride]; \
int av_unused q1 = p[ 1 * stride]; \
int av_unused q2 = p[ 2 * stride]; \
int av_unused q3 = p[ 3 * stride];
#define clip_int8(n) (cm[n + 0x80] - 0x80)
static
av_always_inline
void
filter_common
(
uint8_t
*
p
,
ptrdiff_t
stride
,
int
is4tap
)
{
LOAD_PIXELS
int
a
,
f1
,
f2
;
const
uint8_t
*
cm
=
ff_crop_tab
+
MAX_NEG_CROP
;
a
=
3
*
(
q0
-
p0
);
a
=
3
*
(
q0
-
p0
);
if
(
is4tap
)
a
+=
clip_int8
(
p1
-
q1
);
...
...
@@ -171,45 +175,50 @@ static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, int is4
// We deviate from the spec here with c(a+3) >> 3
// since that's what libvpx does.
f1
=
FFMIN
(
a
+
4
,
127
)
>>
3
;
f2
=
FFMIN
(
a
+
3
,
127
)
>>
3
;
f1
=
FFMIN
(
a
+
4
,
127
)
>>
3
;
f2
=
FFMIN
(
a
+
3
,
127
)
>>
3
;
// Despite what the spec says, we do need to clamp here to
// be bitexact with libvpx.
p
[
-
1
*
stride
]
=
cm
[
p0
+
f2
];
p
[
0
*
stride
]
=
cm
[
q0
-
f1
];
p
[
-
1
*
stride
]
=
cm
[
p0
+
f2
];
p
[
0
*
stride
]
=
cm
[
q0
-
f1
];
// only used for _inner on blocks without high edge variance
if
(
!
is4tap
)
{
a
=
(
f1
+
1
)
>>
1
;
p
[
-
2
*
stride
]
=
cm
[
p1
+
a
];
p
[
1
*
stride
]
=
cm
[
q1
-
a
];
a
=
(
f1
+
1
)
>>
1
;
p
[
-
2
*
stride
]
=
cm
[
p1
+
a
];
p
[
1
*
stride
]
=
cm
[
q1
-
a
];
}
}
static
av_always_inline
int
simple_limit
(
uint8_t
*
p
,
ptrdiff_t
stride
,
int
flim
)
{
LOAD_PIXELS
return
2
*
FFABS
(
p0
-
q0
)
+
(
FFABS
(
p1
-
q1
)
>>
1
)
<=
flim
;
return
2
*
FFABS
(
p0
-
q0
)
+
(
FFABS
(
p1
-
q1
)
>>
1
)
<=
flim
;
}
/**
* E - limit at the macroblock edge
* I - limit for interior difference
*/
static
av_always_inline
int
normal_limit
(
uint8_t
*
p
,
ptrdiff_t
stride
,
int
E
,
int
I
)
static
av_always_inline
int
normal_limit
(
uint8_t
*
p
,
ptrdiff_t
stride
,
int
E
,
int
I
)
{
LOAD_PIXELS
return
simple_limit
(
p
,
stride
,
E
)
&&
FFABS
(
p3
-
p2
)
<=
I
&&
FFABS
(
p2
-
p1
)
<=
I
&&
FFABS
(
p1
-
p0
)
<=
I
&&
FFABS
(
q3
-
q2
)
<=
I
&&
FFABS
(
q2
-
q1
)
<=
I
&&
FFABS
(
q1
-
q0
)
<=
I
;
return
simple_limit
(
p
,
stride
,
E
)
&&
FFABS
(
p3
-
p2
)
<=
I
&&
FFABS
(
p2
-
p1
)
<=
I
&&
FFABS
(
p1
-
p0
)
<=
I
&&
FFABS
(
q3
-
q2
)
<=
I
&&
FFABS
(
q2
-
q1
)
<=
I
&&
FFABS
(
q1
-
q0
)
<=
I
;
}
// high edge variance
static
av_always_inline
int
hev
(
uint8_t
*
p
,
ptrdiff_t
stride
,
int
thresh
)
{
LOAD_PIXELS
return
FFABS
(
p1
-
p0
)
>
thresh
||
FFABS
(
q1
-
q0
)
>
thresh
;
return
FFABS
(
p1
-
p0
)
>
thresh
||
FFABS
(
q1
-
q0
)
>
thresh
;
}
static
av_always_inline
void
filter_mbedge
(
uint8_t
*
p
,
ptrdiff_t
stride
)
...
...
@@ -219,67 +228,75 @@ static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
LOAD_PIXELS
w
=
clip_int8
(
p1
-
q1
);
w
=
clip_int8
(
w
+
3
*
(
q0
-
p0
));
w
=
clip_int8
(
p1
-
q1
);
w
=
clip_int8
(
w
+
3
*
(
q0
-
p0
));
a0
=
(
27
*
w
+
63
)
>>
7
;
a1
=
(
18
*
w
+
63
)
>>
7
;
a2
=
(
9
*
w
+
63
)
>>
7
;
a0
=
(
27
*
w
+
63
)
>>
7
;
a1
=
(
18
*
w
+
63
)
>>
7
;
a2
=
(
9
*
w
+
63
)
>>
7
;
p
[
-
3
*
stride
]
=
cm
[
p2
+
a2
];
p
[
-
2
*
stride
]
=
cm
[
p1
+
a1
];
p
[
-
1
*
stride
]
=
cm
[
p0
+
a0
];
p
[
0
*
stride
]
=
cm
[
q0
-
a0
];
p
[
1
*
stride
]
=
cm
[
q1
-
a1
];
p
[
2
*
stride
]
=
cm
[
q2
-
a2
];
p
[
-
3
*
stride
]
=
cm
[
p2
+
a2
];
p
[
-
2
*
stride
]
=
cm
[
p1
+
a1
];
p
[
-
1
*
stride
]
=
cm
[
p0
+
a0
];
p
[
0
*
stride
]
=
cm
[
q0
-
a0
];
p
[
1
*
stride
]
=
cm
[
q1
-
a1
];
p
[
2
*
stride
]
=
cm
[
q2
-
a2
];
}
#define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \
static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, ptrdiff_t stride,\
int flim_E, int flim_I, int hev_thresh)\
{\
int i;\
\
for (i = 0; i < size; i++)\
if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
if (hev(dst+i*stridea, strideb, hev_thresh))\
filter_common(dst+i*stridea, strideb, 1);\
else\
filter_mbedge(dst+i*stridea, strideb);\
}\
}\
\
static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, ptrdiff_t stride,\
int flim_E, int flim_I, int hev_thresh)\
{\
int i;\
\
for (i = 0; i < size; i++)\
if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
int hv = hev(dst+i*stridea, strideb, hev_thresh);\
if (hv) \
filter_common(dst+i*stridea, strideb, 1);\
else \
filter_common(dst+i*stridea, strideb, 0);\
}\
#define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \
static maybe_inline \
void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, \
ptrdiff_t stride, \
int flim_E, int flim_I, \
int hev_thresh) \
{ \
int i; \
for (i = 0; i < size; i++) \
if (normal_limit(dst + i * stridea, strideb, flim_E, flim_I)) { \
if (hev(dst + i * stridea, strideb, hev_thresh)) \
filter_common(dst + i * stridea, strideb, 1); \
else \
filter_mbedge(dst + i * stridea, strideb); \
} \
} \
\
static maybe_inline \
void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, \
ptrdiff_t stride, \
int flim_E, int flim_I, \
int hev_thresh) \
{ \
int i; \
for (i = 0; i < size; i++) \
if (normal_limit(dst + i * stridea, strideb, flim_E, flim_I)) { \
int hv = hev(dst + i * stridea, strideb, hev_thresh); \
if (hv) \
filter_common(dst + i * stridea, strideb, 1); \
else \
filter_common(dst + i * stridea, strideb, 0); \
} \
}
LOOP_FILTER
(
v
,
16
,
1
,
stride
,)
LOOP_FILTER
(
h
,
16
,
stride
,
1
,)
#define UV_LOOP_FILTER(dir, stridea, strideb) \
LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \
static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\
int fE, int fI, int hev_thresh)\
{\
vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\
vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\
}\
static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\
int fE, int fI, int hev_thresh)\
{\
vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\
vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\
LOOP_FILTER
(
v
,
16
,
1
,
stride
,
)
LOOP_FILTER
(
h
,
16
,
stride
,
1
,
)
#define UV_LOOP_FILTER(dir, stridea, strideb) \
LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \
static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, \
ptrdiff_t stride, int fE, \
int fI, int hev_thresh) \
{ \
vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \
vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \
} \
\
static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, \
uint8_t *dstV, \
ptrdiff_t stride, int fE, \
int fI, int hev_thresh) \
{ \
vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh); \
vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh); \
}
UV_LOOP_FILTER
(
v
,
1
,
stride
)
...
...
@@ -290,8 +307,8 @@ static void vp8_v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim)
int
i
;
for
(
i
=
0
;
i
<
16
;
i
++
)
if
(
simple_limit
(
dst
+
i
,
stride
,
flim
))
filter_common
(
dst
+
i
,
stride
,
1
);
if
(
simple_limit
(
dst
+
i
,
stride
,
flim
))
filter_common
(
dst
+
i
,
stride
,
1
);
}
static
void
vp8_h_loop_filter_simple_c
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
int
flim
)
...
...
@@ -299,94 +316,110 @@ static void vp8_h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim)
int
i
;
for
(
i
=
0
;
i
<
16
;
i
++
)
if
(
simple_limit
(
dst
+
i
*
stride
,
1
,
flim
))
filter_common
(
dst
+
i
*
stride
,
1
,
1
);
if
(
simple_limit
(
dst
+
i
*
stride
,
1
,
flim
))
filter_common
(
dst
+
i
*
stride
,
1
,
1
);
}
static
const
uint8_t
subpel_filters
[
7
][
6
]
=
{
{
0
,
6
,
123
,
12
,
1
,
0
},
{
2
,
11
,
108
,
36
,
8
,
1
},
{
0
,
9
,
93
,
50
,
6
,
0
},
{
3
,
16
,
77
,
77
,
16
,
3
},
{
0
,
6
,
50
,
93
,
9
,
0
},
{
1
,
8
,
36
,
108
,
11
,
2
},
{
0
,
1
,
12
,
123
,
6
,
0
},
{
0
,
6
,
123
,
12
,
1
,
0
},
{
2
,
11
,
108
,
36
,
8
,
1
},
{
0
,
9
,
93
,
50
,
6
,
0
},
{
3
,
16
,
77
,
77
,
16
,
3
},
{
0
,
6
,
50
,
93
,
9
,
0
},
{
1
,
8
,
36
,
108
,
11
,
2
},
{
0
,
1
,
12
,
123
,
6
,
0
},
};
#define PUT_PIXELS(WIDTH) \
static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y) { \
int i; \
for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \
memcpy(dst, src, WIDTH); \
} \
#define PUT_PIXELS(WIDTH) \
static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride, \
uint8_t *src, ptrdiff_t srcstride, \
int h, int x, int y) \
{ \
int i; \
for (i = 0; i < h; i++, dst += dststride, src += srcstride) \
memcpy(dst, src, WIDTH); \
}
PUT_PIXELS
(
16
)
PUT_PIXELS
(
8
)
PUT_PIXELS
(
4
)
#define FILTER_6TAP(src, F, stride) \
cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \
F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7]
#define FILTER_4TAP(src, F, stride) \
cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \
F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7]
#define VP8_EPEL_H(SIZE, TAPS) \
static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
{ \
const uint8_t *filter = subpel_filters[mx-1]; \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
int x, y; \
\
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \
dst += dststride; \
src += srcstride; \
} \
#define FILTER_6TAP(src, F, stride) \
cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \
F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
#define FILTER_4TAP(src, F, stride) \
cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
#define VP8_EPEL_H(SIZE, TAPS) \
static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, \
ptrdiff_t dststride, \
uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my) \
{ \
const uint8_t *filter = subpel_filters[mx - 1]; \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
int x, y; \
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \
dst += dststride; \
src += srcstride; \
} \
}
#define VP8_EPEL_V(SIZE, TAPS) \
static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
{ \
const uint8_t *filter = subpel_filters[my-1]; \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
int x, y; \
\
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \
dst += dststride; \
src += srcstride; \
} \
#define VP8_EPEL_V(SIZE, TAPS) \
static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, \
ptrdiff_t dststride, \
uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my) \
{ \
const uint8_t *filter = subpel_filters[my - 1]; \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
int x, y; \
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \
dst += dststride; \
src += srcstride; \
} \
}
#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \
static void put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
{ \
const uint8_t *filter = subpel_filters[mx-1]; \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
int x, y; \
uint8_t tmp_array[(2*SIZE+VTAPS-1)*SIZE]; \
uint8_t *tmp = tmp_array; \
src -= (2-(VTAPS==4))*srcstride; \
\
for (y = 0; y < h+VTAPS-1; y++) { \
for (x = 0; x < SIZE; x++) \
tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \
tmp += SIZE; \
src += srcstride; \
} \
\
tmp = tmp_array + (2-(VTAPS==4))*SIZE; \
filter = subpel_filters[my-1]; \
\
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \
dst += dststride; \
tmp += SIZE; \
} \
#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \
static void \
put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, \
ptrdiff_t dststride, \
uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, \
int my) \
{ \
const uint8_t *filter = subpel_filters[mx - 1]; \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
int x, y; \
uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \
uint8_t *tmp = tmp_array; \
src -= (2 - (VTAPS == 4)) * srcstride; \
\
for (y = 0; y < h + VTAPS - 1; y++) { \
for (x = 0; x < SIZE; x++) \
tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \
tmp += SIZE; \
src += srcstride; \
} \
tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \
filter = subpel_filters[my - 1]; \
\
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \
dst += dststride; \
tmp += SIZE; \
} \
}
VP8_EPEL_H
(
16
,
4
)
...
...
@@ -401,6 +434,7 @@ VP8_EPEL_V(4, 4)
VP8_EPEL_V
(
16
,
6
)
VP8_EPEL_V
(
8
,
6
)
VP8_EPEL_V
(
4
,
6
)
VP8_EPEL_HV
(
16
,
4
,
4
)
VP8_EPEL_HV
(
8
,
4
,
4
)
VP8_EPEL_HV
(
4
,
4
,
4
)
...
...
@@ -414,73 +448,77 @@ VP8_EPEL_HV(16, 6, 6)
VP8_EPEL_HV
(
8
,
6
,
6
)
VP8_EPEL_HV
(
4
,
6
,
6
)
#define VP8_BILINEAR(SIZE) \
static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
{ \
int a = 8-mx, b = mx; \
int x, y; \
\
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
dst += dstride; \
src += sstride; \
} \
} \
static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
{ \
int c = 8-my, d = my; \
int x, y; \
\
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = (c*src[x] + d*src[x+sstride] + 4) >> 3; \
dst += dstride; \
src += sstride; \
} \
} \
\
static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
{ \
int a = 8-mx, b = mx; \
int c = 8-my, d = my; \
int x, y; \
uint8_t tmp_array[(2*SIZE+1)*SIZE]; \
uint8_t *tmp = tmp_array; \
\
for (y = 0; y < h+1; y++) { \
for (x = 0; x < SIZE; x++) \
tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
tmp += SIZE; \
src += sstride; \
} \
\
tmp = tmp_array; \
\
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \
dst += dstride; \
tmp += SIZE; \
} \
#define VP8_BILINEAR(SIZE) \
static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \
uint8_t *src, ptrdiff_t sstride, \
int h, int mx, int my) \
{ \
int a = 8 - mx, b = mx; \
int x, y; \
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \
dst += dstride; \
src += sstride; \
} \
} \
\
static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \
uint8_t *src, ptrdiff_t sstride, \
int h, int mx, int my) \
{ \
int c = 8 - my, d = my; \
int x, y; \
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \
dst += dstride; \
src += sstride; \
} \
} \
\
static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, \
ptrdiff_t dstride, \
uint8_t *src, \
ptrdiff_t sstride, \
int h, int mx, int my) \
{ \
int a = 8 - mx, b = mx; \
int c = 8 - my, d = my; \
int x, y; \
uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \
uint8_t *tmp = tmp_array; \
for (y = 0; y < h + 1; y++) { \
for (x = 0; x < SIZE; x++) \
tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \
tmp += SIZE; \
src += sstride; \
} \
tmp = tmp_array; \
for (y = 0; y < h; y++) { \
for (x = 0; x < SIZE; x++) \
dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \
dst += dstride; \
tmp += SIZE; \
} \
}
VP8_BILINEAR
(
16
)
VP8_BILINEAR
(
8
)
VP8_BILINEAR
(
4
)
#define VP8_MC_FUNC(IDX, SIZE) \
dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \
dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \
dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \
#define VP8_MC_FUNC(IDX, SIZE)
\
dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c;
\
dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c;
\
dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c;
\
dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c;
\
dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \
dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c;
\
dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
#define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \
#define VP8_BILINEAR_MC_FUNC(IDX, SIZE)
\
dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \
dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \
...
...
libavcodec/vp8dsp.h
View file @
53c20f17
...
...
@@ -30,8 +30,8 @@
#include <stddef.h>
#include <stdint.h>
typedef
void
(
*
vp8_mc_func
)(
uint8_t
*
dst
/*align 8
*/
,
ptrdiff_t
dstStride
,
uint8_t
*
src
/*align 1
*/
,
ptrdiff_t
srcStride
,
typedef
void
(
*
vp8_mc_func
)(
uint8_t
*
dst
/* align 8
*/
,
ptrdiff_t
dstStride
,
uint8_t
*
src
/* align 1
*/
,
ptrdiff_t
srcStride
,
int
h
,
int
x
,
int
y
);
typedef
struct
VP8DSPContext
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment