Replace deprecated FF_*_TYPE symbols with AV_PICTURE_TYPE_*.
[libav.git] / libavcodec / vp8.c
CommitLineData
3b636f21
DC
1/**
2 * VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
13a1304b 6 * Copyright (C) 2010 Jason Garrett-Glaser
3b636f21 7 *
2912e87a 8 * This file is part of Libav.
3b636f21 9 *
2912e87a 10 * Libav is free software; you can redistribute it and/or
3b636f21
DC
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
2912e87a 15 * Libav is distributed in the hope that it will be useful,
3b636f21
DC
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
2912e87a 21 * License along with Libav; if not, write to the Free Software
3b636f21
DC
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
737eb597 25#include "libavutil/imgutils.h"
3b636f21 26#include "avcodec.h"
bcf4568f 27#include "vp8.h"
3b636f21 28#include "vp8data.h"
3b636f21
DC
29#include "rectangle.h"
30
a7878c9f
MR
31#if ARCH_ARM
32# include "arm/vp8.h"
33#endif
34
3b636f21
DC
35static void vp8_decode_flush(AVCodecContext *avctx)
36{
37 VP8Context *s = avctx->priv_data;
38 int i;
39
40 for (i = 0; i < 4; i++)
41 if (s->frames[i].data[0])
42 avctx->release_buffer(avctx, &s->frames[i]);
43 memset(s->framep, 0, sizeof(s->framep));
44
45 av_freep(&s->macroblocks_base);
fca05ea8 46 av_freep(&s->filter_strength);
d2840fa4 47 av_freep(&s->intra4x4_pred_mode_top);
3b636f21
DC
48 av_freep(&s->top_nnz);
49 av_freep(&s->edge_emu_buffer);
9ac831c2 50 av_freep(&s->top_border);
c55e0d34 51 av_freep(&s->segmentation_map);
3b636f21
DC
52
53 s->macroblocks = NULL;
3b636f21
DC
54}
55
56static int update_dimensions(VP8Context *s, int width, int height)
57{
e16f217c 58 if (av_image_check_size(width, height, 0, s->avctx))
3b636f21
DC
59 return AVERROR_INVALIDDATA;
60
61 vp8_decode_flush(s->avctx);
62
63 avcodec_set_dimensions(s->avctx, width, height);
64
65 s->mb_width = (s->avctx->coded_width +15) / 16;
66 s->mb_height = (s->avctx->coded_height+15) / 16;
67
aa93c52c
PM
68 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
69 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
ccf13f9e 70 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
3b636f21 71 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
9ac831c2 72 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
aa93c52c 73 s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
3b636f21 74
d2840fa4 75 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
c55e0d34 76 !s->top_nnz || !s->top_border || !s->segmentation_map)
b6c420ce
DC
77 return AVERROR(ENOMEM);
78
c55e0d34 79 s->macroblocks = s->macroblocks_base + 1;
3b636f21
DC
80
81 return 0;
82}
83
84static void parse_segment_info(VP8Context *s)
85{
86 VP56RangeCoder *c = &s->c;
87 int i;
88
89 s->segmentation.update_map = vp8_rac_get(c);
90
91 if (vp8_rac_get(c)) { // update segment feature data
92 s->segmentation.absolute_vals = vp8_rac_get(c);
93
94 for (i = 0; i < 4; i++)
95 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
96
97 for (i = 0; i < 4; i++)
98 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
99 }
100 if (s->segmentation.update_map)
101 for (i = 0; i < 3; i++)
102 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
103}
104
105static void update_lf_deltas(VP8Context *s)
106{
107 VP56RangeCoder *c = &s->c;
108 int i;
109
110 for (i = 0; i < 4; i++)
111 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6);
112
dd18c9a0 113 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
3b636f21
DC
114 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
115}
116
117static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
118{
119 const uint8_t *sizes = buf;
120 int i;
121
122 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
123
124 buf += 3*(s->num_coeff_partitions-1);
125 buf_size -= 3*(s->num_coeff_partitions-1);
126 if (buf_size < 0)
127 return -1;
128
129 for (i = 0; i < s->num_coeff_partitions-1; i++) {
06d50ca8 130 int size = AV_RL24(sizes + 3*i);
3b636f21
DC
131 if (buf_size - size < 0)
132 return -1;
133
905ef0d0 134 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
3b636f21
DC
135 buf += size;
136 buf_size -= size;
137 }
905ef0d0 138 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
3b636f21
DC
139
140 return 0;
141}
142
143static void get_quants(VP8Context *s)
144{
145 VP56RangeCoder *c = &s->c;
146 int i, base_qi;
147
148 int yac_qi = vp8_rac_get_uint(c, 7);
149 int ydc_delta = vp8_rac_get_sint(c, 4);
150 int y2dc_delta = vp8_rac_get_sint(c, 4);
151 int y2ac_delta = vp8_rac_get_sint(c, 4);
152 int uvdc_delta = vp8_rac_get_sint(c, 4);
153 int uvac_delta = vp8_rac_get_sint(c, 4);
154
155 for (i = 0; i < 4; i++) {
156 if (s->segmentation.enabled) {
157 base_qi = s->segmentation.base_quant[i];
158 if (!s->segmentation.absolute_vals)
159 base_qi += yac_qi;
160 } else
161 base_qi = yac_qi;
162
163 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)];
164 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)];
165 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)];
a8ab0ccc
PM
166 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100;
167 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)];
3b636f21 168 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)];
a8ab0ccc
PM
169
170 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
171 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
3b636f21
DC
172 }
173}
174
175/**
176 * Determine which buffers golden and altref should be updated with after this frame.
177 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
178 *
179 * Intra frames update all 3 references
180 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
181 * If the update (golden|altref) flag is set, it's updated with the current frame
182 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
183 * If the flag is not set, the number read means:
184 * 0: no update
185 * 1: VP56_FRAME_PREVIOUS
186 * 2: update golden with altref, or update altref with golden
187 */
188static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
189{
190 VP56RangeCoder *c = &s->c;
191
192 if (update)
193 return VP56_FRAME_CURRENT;
194
195 switch (vp8_rac_get_uint(c, 2)) {
196 case 1:
197 return VP56_FRAME_PREVIOUS;
198 case 2:
199 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
200 }
201 return VP56_FRAME_NONE;
202}
203
204static void update_refs(VP8Context *s)
205{
206 VP56RangeCoder *c = &s->c;
207
208 int update_golden = vp8_rac_get(c);
209 int update_altref = vp8_rac_get(c);
210
211 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
212 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
213}
214
215static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
216{
217 VP56RangeCoder *c = &s->c;
370b622a 218 int header_size, hscale, vscale, i, j, k, l, m, ret;
3b636f21
DC
219 int width = s->avctx->width;
220 int height = s->avctx->height;
221
222 s->keyframe = !(buf[0] & 1);
223 s->profile = (buf[0]>>1) & 7;
224 s->invisible = !(buf[0] & 0x10);
06d50ca8 225 header_size = AV_RL24(buf) >> 5;
3b636f21
DC
226 buf += 3;
227 buf_size -= 3;
228
0ef1dbed
DC
229 if (s->profile > 3)
230 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
231
232 if (!s->profile)
233 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
234 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
235 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
3b636f21
DC
236
237 if (header_size > buf_size - 7*s->keyframe) {
238 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
239 return AVERROR_INVALIDDATA;
240 }
241
242 if (s->keyframe) {
06d50ca8
JGG
243 if (AV_RL24(buf) != 0x2a019d) {
244 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
3b636f21
DC
245 return AVERROR_INVALIDDATA;
246 }
247 width = AV_RL16(buf+3) & 0x3fff;
248 height = AV_RL16(buf+5) & 0x3fff;
249 hscale = buf[4] >> 6;
250 vscale = buf[6] >> 6;
251 buf += 7;
252 buf_size -= 7;
253
92a54426
MR
254 if (hscale || vscale)
255 av_log_missing_feature(s->avctx, "Upscaling", 1);
256
3b636f21 257 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
370b622a
JGG
258 for (i = 0; i < 4; i++)
259 for (j = 0; j < 16; j++)
260 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
261 sizeof(s->prob->token[i][j]));
3b636f21
DC
262 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
263 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
264 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
265 memset(&s->segmentation, 0, sizeof(s->segmentation));
266 }
267
268 if (!s->macroblocks_base || /* first frame */
269 width != s->avctx->width || height != s->avctx->height) {
270 if ((ret = update_dimensions(s, width, height) < 0))
271 return ret;
272 }
273
905ef0d0 274 ff_vp56_init_range_decoder(c, buf, header_size);
3b636f21
DC
275 buf += header_size;
276 buf_size -= header_size;
277
278 if (s->keyframe) {
279 if (vp8_rac_get(c))
280 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
281 vp8_rac_get(c); // whether we can skip clamping in dsp functions
282 }
283
284 if ((s->segmentation.enabled = vp8_rac_get(c)))
285 parse_segment_info(s);
286 else
287 s->segmentation.update_map = 0; // FIXME: move this to some init function?
288
289 s->filter.simple = vp8_rac_get(c);
290 s->filter.level = vp8_rac_get_uint(c, 6);
291 s->filter.sharpness = vp8_rac_get_uint(c, 3);
292
293 if ((s->lf_delta.enabled = vp8_rac_get(c)))
294 if (vp8_rac_get(c))
295 update_lf_deltas(s);
296
297 if (setup_partitions(s, buf, buf_size)) {
298 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
299 return AVERROR_INVALIDDATA;
300 }
301
302 get_quants(s);
303
304 if (!s->keyframe) {
305 update_refs(s);
306 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
307 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
308 }
309
310 // if we aren't saving this frame's probabilities for future frames,
311 // make a copy of the current probabilities
312 if (!(s->update_probabilities = vp8_rac_get(c)))
313 s->prob[1] = s->prob[0];
314
315 s->update_last = s->keyframe || vp8_rac_get(c);
316
317 for (i = 0; i < 4; i++)
318 for (j = 0; j < 8; j++)
319 for (k = 0; k < 3; k++)
320 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
370b622a
JGG
321 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
322 int prob = vp8_rac_get_uint(c, 8);
b0d58795
JGG
323 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
324 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
370b622a 325 }
3b636f21
DC
326
327 if ((s->mbskip_enabled = vp8_rac_get(c)))
a8ab0ccc 328 s->prob->mbskip = vp8_rac_get_uint(c, 8);
3b636f21
DC
329
330 if (!s->keyframe) {
a8ab0ccc
PM
331 s->prob->intra = vp8_rac_get_uint(c, 8);
332 s->prob->last = vp8_rac_get_uint(c, 8);
333 s->prob->golden = vp8_rac_get_uint(c, 8);
3b636f21
DC
334
335 if (vp8_rac_get(c))
336 for (i = 0; i < 4; i++)
337 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
338 if (vp8_rac_get(c))
339 for (i = 0; i < 3; i++)
340 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
341
342 // 17.2 MV probability update
343 for (i = 0; i < 2; i++)
344 for (j = 0; j < 19; j++)
7697cdcf 345 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
3b636f21
DC
346 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
347 }
348
349 return 0;
350}
351
7634771e 352static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
3b636f21 353{
7634771e
JGG
354 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
355 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
3b636f21
DC
356}
357
3b636f21
DC
358/**
359 * Motion vector coding, 17.1.
360 */
361static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
362{
ca18a478 363 int bit, x = 0;
3b636f21 364
7697cdcf 365 if (vp56_rac_get_prob_branchy(c, p[0])) {
3b636f21
DC
366 int i;
367
368 for (i = 0; i < 3; i++)
369 x += vp56_rac_get_prob(c, p[9 + i]) << i;
370 for (i = 9; i > 3; i--)
371 x += vp56_rac_get_prob(c, p[9 + i]) << i;
372 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
373 x += 8;
ca18a478
DC
374 } else {
375 // small_mvtree
376 const uint8_t *ps = p+2;
377 bit = vp56_rac_get_prob(c, *ps);
378 ps += 1 + 3*bit;
379 x += 4*bit;
380 bit = vp56_rac_get_prob(c, *ps);
381 ps += 1 + bit;
382 x += 2*bit;
383 x += vp56_rac_get_prob(c, *ps);
384 }
3b636f21
DC
385
386 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
387}
388
414ac27d
JGG
389static av_always_inline
390const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
3b636f21 391{
7bf254c4
JGG
392 if (left == top)
393 return vp8_submv_prob[4-!!left];
394 if (!top)
3b636f21 395 return vp8_submv_prob[2];
7bf254c4 396 return vp8_submv_prob[1-!!left];
3b636f21
DC
397}
398
399/**
400 * Split motion vector prediction, 16.4.
7ed06b2b 401 * @returns the number of motion vectors parsed (2, 4 or 16)
3b636f21 402 */
414ac27d
JGG
403static av_always_inline
404int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
3b636f21 405{
0908f1b9
JGG
406 int part_idx;
407 int n, num;
c55e0d34 408 VP8Macroblock *top_mb = &mb[2];
7bf254c4
JGG
409 VP8Macroblock *left_mb = &mb[-1];
410 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
411 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
0908f1b9 412 *mbsplits_cur, *firstidx;
c55e0d34
JGG
413 VP56mv *top_mv = top_mb->bmv;
414 VP56mv *left_mv = left_mb->bmv;
415 VP56mv *cur_mv = mb->bmv;
3b636f21 416
0908f1b9
JGG
417 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
418 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
419 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
420 } else {
421 part_idx = VP8_SPLITMVMODE_8x8;
422 }
423 } else {
424 part_idx = VP8_SPLITMVMODE_4x4;
425 }
426
427 num = vp8_mbsplit_count[part_idx];
428 mbsplits_cur = vp8_mbsplits[part_idx],
429 firstidx = vp8_mbfirstidx[part_idx];
430 mb->partitioning = part_idx;
431
3b636f21 432 for (n = 0; n < num; n++) {
7ed06b2b 433 int k = firstidx[n];
7bf254c4 434 uint32_t left, above;
7ed06b2b
RB
435 const uint8_t *submv_prob;
436
7bf254c4
JGG
437 if (!(k & 3))
438 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
439 else
440 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
441 if (k <= 3)
442 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
443 else
444 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
7ed06b2b
RB
445
446 submv_prob = get_submv_prob(left, above);
3b636f21 447
c5dec7f1
JGG
448 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
449 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
450 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
451 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
452 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
453 } else {
454 AV_ZERO32(&mb->bmv[n]);
455 }
456 } else {
457 AV_WN32A(&mb->bmv[n], above);
458 }
459 } else {
7bf254c4 460 AV_WN32A(&mb->bmv[n], left);
3b636f21 461 }
3b636f21 462 }
7ed06b2b
RB
463
464 return num;
3b636f21
DC
465}
466
414ac27d 467static av_always_inline
f3d09d44
JGG
468void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
469{
470 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
471 mb - 1 /* left */,
472 mb + 1 /* top-left */ };
473 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
66f608a6 474 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
f3d09d44
JGG
475 int idx = CNT_ZERO;
476 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1eeca886 477 int8_t *sign_bias = s->sign_bias;
f3d09d44
JGG
478 VP56mv near_mv[4];
479 uint8_t cnt[4] = { 0 };
480 VP56RangeCoder *c = &s->c;
481
482 AV_ZERO32(&near_mv[0]);
483 AV_ZERO32(&near_mv[1]);
f3d09d44
JGG
484
485 /* Process MB on top, left and top-left */
486 #define MV_EDGE_CHECK(n)\
487 {\
488 VP8Macroblock *edge = mb_edge[n];\
489 int edge_ref = edge->ref_frame;\
490 if (edge_ref != VP56_FRAME_CURRENT) {\
491 uint32_t mv = AV_RN32A(&edge->mv);\
492 if (mv) {\
493 if (cur_sign_bias != sign_bias[edge_ref]) {\
494 /* SWAR negate of the values in mv. */\
495 mv = ~mv;\
496 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
497 }\
498 if (!n || mv != AV_RN32A(&near_mv[idx]))\
499 AV_WN32A(&near_mv[++idx], mv);\
500 cnt[idx] += 1 + (n != 2);\
501 } else\
502 cnt[CNT_ZERO] += 1 + (n != 2);\
503 }\
504 }
505
506 MV_EDGE_CHECK(0)
507 MV_EDGE_CHECK(1)
508 MV_EDGE_CHECK(2)
509
510 mb->partitioning = VP8_SPLITMVMODE_NONE;
511 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
512 mb->mode = VP8_MVMODE_MV;
513
514 /* If we have three distinct MVs, merge first and last if they're the same */
66f608a6 515 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
f3d09d44
JGG
516 cnt[CNT_NEAREST] += 1;
517
518 /* Swap near and nearest if necessary */
519 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
520 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
521 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
522 }
523
524 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
525 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
526
527 /* Choose the best mv out of 0,0 and the nearest mv */
7634771e 528 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
66f608a6
AS
529 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
530 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
531 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
f3d09d44
JGG
532
533 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
534 mb->mode = VP8_MVMODE_SPLIT;
535 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
536 } else {
537 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
538 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
539 mb->bmv[0] = mb->mv;
540 }
541 } else {
7634771e 542 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
f3d09d44
JGG
543 mb->bmv[0] = mb->mv;
544 }
545 } else {
7634771e 546 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
f3d09d44
JGG
547 mb->bmv[0] = mb->mv;
548 }
549 } else {
550 mb->mode = VP8_MVMODE_ZERO;
551 AV_ZERO32(&mb->mv);
552 mb->bmv[0] = mb->mv;
553 }
554}
555
556static av_always_inline
d2840fa4
PM
557void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
558 int mb_x, int keyframe)
3b636f21 559{
d2840fa4 560 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
d1c58fce 561 if (keyframe) {
d2840fa4
PM
562 int x, y;
563 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
564 uint8_t* const left = s->intra4x4_pred_mode_left;
d1c58fce
JGG
565 for (y = 0; y < 4; y++) {
566 for (x = 0; x < 4; x++) {
d2840fa4
PM
567 const uint8_t *ctx;
568 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
569 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
570 left[y] = top[x] = *intra4x4;
571 intra4x4++;
3b636f21 572 }
3b636f21 573 }
d1c58fce 574 } else {
d2840fa4 575 int i;
d1c58fce
JGG
576 for (i = 0; i < 16; i++)
577 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
3b636f21
DC
578 }
579}
580
414ac27d 581static av_always_inline
d2840fa4 582void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment)
3b636f21
DC
583{
584 VP56RangeCoder *c = &s->c;
3b636f21
DC
585
586 if (s->segmentation.update_map)
c55e0d34 587 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
b9a7186b 588 s->segment = *segment;
3b636f21 589
a8ab0ccc 590 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
3b636f21
DC
591
592 if (s->keyframe) {
593 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
594
595 if (mb->mode == MODE_I4x4) {
d2840fa4
PM
596 decode_intra4x4_modes(s, c, mb_x, 1);
597 } else {
598 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
599 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
600 AV_WN32A(s->intra4x4_pred_mode_left, modes);
601 }
3b636f21
DC
602
603 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
604 mb->ref_frame = VP56_FRAME_CURRENT;
a8ab0ccc 605 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
3b636f21 606 // inter MB, 16.2
a8ab0ccc
PM
607 if (vp56_rac_get_prob_branchy(c, s->prob->last))
608 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
3b636f21
DC
609 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
610 else
611 mb->ref_frame = VP56_FRAME_PREVIOUS;
c4211046 612 s->ref_count[mb->ref_frame-1]++;
3b636f21
DC
613
614 // motion vectors, 16.3
f3d09d44 615 decode_mvs(s, mb, mb_x, mb_y);
3b636f21
DC
616 } else {
617 // intra MB, 16.1
618 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
619
158e062c 620 if (mb->mode == MODE_I4x4)
d2840fa4 621 decode_intra4x4_modes(s, c, mb_x, 0);
3b636f21
DC
622
623 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
624 mb->ref_frame = VP56_FRAME_CURRENT;
b946111f 625 mb->partitioning = VP8_SPLITMVMODE_NONE;
14767f35 626 AV_ZERO32(&mb->bmv[0]);
3b636f21
DC
627 }
628}
629
a7878c9f 630#ifndef decode_block_coeffs_internal
3b636f21 631/**
e394953e
RB
632 * @param c arithmetic bitstream reader context
633 * @param block destination for block coefficients
634 * @param probs probabilities to use when reading trees from the bitstream
3b636f21
DC
635 * @param i initial coeff index, 0 unless a separate DC block is coded
636 * @param zero_nhood the initial prediction context for number of surrounding
637 * all-zero blocks (only left/top, so 0-2)
3fa76268 638 * @param qmul array holding the dc/ac dequant factor at position 0/1
3b636f21
DC
639 * @return 0 if no coeffs were decoded
640 * otherwise, the index of the last coeff decoded plus one
641 */
1e739679 642static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
3efbe137 643 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1e739679 644 int i, uint8_t *token_prob, int16_t qmul[2])
3b636f21 645{
afb54a85 646 goto skip_eob;
fe1b5d97 647 do {
1e739679 648 int coeff;
fe1b5d97 649 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
afb54a85 650 return i;
3b636f21 651
fe1b5d97
DC
652skip_eob:
653 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
c22b4468 654 if (++i == 16)
afb54a85 655 return i; // invalid input; blocks should end with EOB
370b622a 656 token_prob = probs[i][0];
c22b4468 657 goto skip_eob;
fe1b5d97
DC
658 }
659
660 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
661 coeff = 1;
370b622a 662 token_prob = probs[i+1][1];
fe1b5d97 663 } else {
fe1b5d97 664 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
476be414 665 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
fe1b5d97
DC
666 if (coeff)
667 coeff += vp56_rac_get_prob(c, token_prob[5]);
668 coeff += 2;
669 } else {
670 // DCT_CAT*
671 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
672 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
673 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
674 } else { // DCT_CAT2
675 coeff = 7;
676 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
677 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
678 }
679 } else { // DCT_CAT3 and up
680 int a = vp56_rac_get_prob(c, token_prob[8]);
681 int b = vp56_rac_get_prob(c, token_prob[9+a]);
682 int cat = (a<<1) + b;
683 coeff = 3 + (8<<cat);
a7878c9f 684 coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
fe1b5d97
DC
685 }
686 }
370b622a 687 token_prob = probs[i+1][2];
fe1b5d97 688 }
fe1b5d97 689 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
afb54a85 690 } while (++i < 16);
fe1b5d97 691
afb54a85 692 return i;
3b636f21 693}
a7878c9f 694#endif
3b636f21 695
414ac27d 696static av_always_inline
1e739679 697int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
81a13131 698 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1e739679
JGG
699 int i, int zero_nhood, int16_t qmul[2])
700{
701 uint8_t *token_prob = probs[i][zero_nhood];
702 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
703 return 0;
704 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
705}
706
707static av_always_inline
414ac27d
JGG
708void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
709 uint8_t t_nnz[9], uint8_t l_nnz[9])
3b636f21 710{
3b636f21
DC
711 int i, x, y, luma_start = 0, luma_ctx = 3;
712 int nnz_pred, nnz, nnz_total = 0;
b9a7186b 713 int segment = s->segment;
f311208c 714 int block_dc = 0;
3b636f21 715
3b636f21 716 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
3b636f21
DC
717 nnz_pred = t_nnz[8] + l_nnz[8];
718
719 // decode DC values and do hadamard
827d43bb 720 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
3b636f21
DC
721 s->qmat[segment].luma_dc_qmul);
722 l_nnz[8] = t_nnz[8] = !!nnz;
f311208c
JGG
723 if (nnz) {
724 nnz_total += nnz;
725 block_dc = 1;
726 if (nnz == 1)
727 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
728 else
729 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
730 }
3b636f21
DC
731 luma_start = 1;
732 luma_ctx = 0;
733 }
734
735 // luma blocks
736 for (y = 0; y < 4; y++)
737 for (x = 0; x < 4; x++) {
ffbf0794 738 nnz_pred = l_nnz[y] + t_nnz[x];
3b636f21 739 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
ffbf0794 740 nnz_pred, s->qmat[segment].luma_qmul);
f311208c
JGG
741 // nnz+block_dc may be one more than the actual last index, but we don't care
742 s->non_zero_count_cache[y][x] = nnz + block_dc;
3b636f21
DC
743 t_nnz[x] = l_nnz[y] = !!nnz;
744 nnz_total += nnz;
745 }
746
747 // chroma blocks
748 // TODO: what to do about dimensions? 2nd dim for luma is x,
749 // but for chroma it's (y<<1)|x
750 for (i = 4; i < 6; i++)
751 for (y = 0; y < 2; y++)
752 for (x = 0; x < 2; x++) {
753 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
754 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
755 nnz_pred, s->qmat[segment].chroma_qmul);
756 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
757 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
758 nnz_total += nnz;
759 }
760
761 // if there were no coded coeffs despite the macroblock not being marked skip,
762 // we MUST not do the inner loop filter and should not do IDCT
763 // Since skip isn't used for bitstream prediction, just manually set it.
764 if (!nnz_total)
765 mb->skip = 1;
766}
767
9ac831c2
DC
768static av_always_inline
769void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
770 int linesize, int uvlinesize, int simple)
771{
772 AV_COPY128(top_border, src_y + 15*linesize);
773 if (!simple) {
774 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
775 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
776 }
777}
778
779static av_always_inline
780void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
781 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
782 int simple, int xchg)
783{
784 uint8_t *top_border_m1 = top_border-32; // for TL prediction
785 src_y -= linesize;
786 src_cb -= uvlinesize;
787 src_cr -= uvlinesize;
788
096971e8
MR
789#define XCHG(a,b,xchg) do { \
790 if (xchg) AV_SWAP64(b,a); \
791 else AV_COPY64(b,a); \
792 } while (0)
9ac831c2
DC
793
794 XCHG(top_border_m1+8, src_y-8, xchg);
795 XCHG(top_border, src_y, xchg);
796 XCHG(top_border+8, src_y+8, 1);
070ce7ef 797 if (mb_x < mb_width-1)
9ac831c2 798 XCHG(top_border+32, src_y+16, 1);
070ce7ef 799
9ac831c2
DC
800 // only copy chroma for normal loop filter
801 // or to initialize the top row to 127
802 if (!simple || !mb_y) {
803 XCHG(top_border_m1+16, src_cb-8, xchg);
804 XCHG(top_border_m1+24, src_cr-8, xchg);
805 XCHG(top_border+16, src_cb, 1);
806 XCHG(top_border+24, src_cr, 1);
807 }
808}
809
414ac27d 810static av_always_inline
ee555de7
RB
811int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
812{
813 if (!mb_x) {
814 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
815 } else {
816 return mb_y ? mode : LEFT_DC_PRED8x8;
817 }
818}
819
820static av_always_inline
821int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
822{
823 if (!mb_x) {
824 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
825 } else {
826 return mb_y ? mode : HOR_PRED8x8;
827 }
828}
829
830static av_always_inline
831int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
3b636f21
DC
832{
833 if (mode == DC_PRED8x8) {
ee555de7
RB
834 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
835 } else {
836 return mode;
837 }
838}
839
840static av_always_inline
841int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
842{
843 switch (mode) {
844 case DC_PRED8x8:
845 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
846 case VERT_PRED8x8:
847 return !mb_y ? DC_127_PRED8x8 : mode;
848 case HOR_PRED8x8:
849 return !mb_x ? DC_129_PRED8x8 : mode;
850 case PLANE_PRED8x8 /*TM*/:
851 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
852 }
853 return mode;
854}
855
856static av_always_inline
857int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
858{
859 if (!mb_x) {
860 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
861 } else {
862 return mb_y ? mode : HOR_VP8_PRED;
863 }
864}
865
866static av_always_inline
867int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
868{
869 switch (mode) {
870 case VERT_PRED:
871 if (!mb_x && mb_y) {
872 *copy_buf = 1;
873 return mode;
874 }
875 /* fall-through */
876 case DIAG_DOWN_LEFT_PRED:
877 case VERT_LEFT_PRED:
878 return !mb_y ? DC_127_PRED : mode;
879 case HOR_PRED:
880 if (!mb_y) {
881 *copy_buf = 1;
882 return mode;
a71abb71 883 }
ee555de7
RB
884 /* fall-through */
885 case HOR_UP_PRED:
886 return !mb_x ? DC_129_PRED : mode;
887 case TM_VP8_PRED:
888 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
889 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
890 case DIAG_DOWN_RIGHT_PRED:
891 case VERT_RIGHT_PRED:
892 case HOR_DOWN_PRED:
893 if (!mb_y || !mb_x)
894 *copy_buf = 1;
895 return mode;
3b636f21
DC
896 }
897 return mode;
898}
899
414ac27d
JGG
900static av_always_inline
901void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
d2840fa4 902 int mb_x, int mb_y)
3b636f21 903{
ee555de7 904 AVCodecContext *avctx = s->avctx;
3b636f21
DC
905 int x, y, mode, nnz, tr;
906
9ac831c2
DC
907 // for the first row, we need to run xchg_mb_border to init the top edge to 127
908 // otherwise, skip it if we aren't going to deblock
ee555de7 909 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
9ac831c2
DC
910 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
911 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
912 s->filter.simple, 1);
913
3b636f21 914 if (mb->mode < MODE_I4x4) {
ee555de7
RB
915 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
916 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
917 } else {
918 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
919 }
3b636f21
DC
920 s->hpc.pred16x16[mode](dst[0], s->linesize);
921 } else {
922 uint8_t *ptr = dst[0];
d2840fa4 923 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
ee555de7 924 uint8_t tr_top[4] = { 127, 127, 127, 127 };
3b636f21
DC
925
926 // all blocks on the right edge of the macroblock use bottom edge
927 // the top macroblock for their topright edge
928 uint8_t *tr_right = ptr - s->linesize + 16;
929
930 // if we're on the right edge of the frame, said edge is extended
931 // from the top macroblock
7148da48
RB
932 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
933 mb_x == s->mb_width-1) {
3b636f21
DC
934 tr = tr_right[-1]*0x01010101;
935 tr_right = (uint8_t *)&tr;
936 }
937
b74f70d6
JGG
938 if (mb->skip)
939 AV_ZERO128(s->non_zero_count_cache);
940
3b636f21
DC
941 for (y = 0; y < 4; y++) {
942 uint8_t *topright = ptr + 4 - s->linesize;
943 for (x = 0; x < 4; x++) {
ee555de7
RB
944 int copy = 0, linesize = s->linesize;
945 uint8_t *dst = ptr+4*x;
946 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
947
948 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
949 topright = tr_top;
950 } else if (x == 3)
3b636f21
DC
951 topright = tr_right;
952
ee555de7
RB
953 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
954 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
955 if (copy) {
956 dst = copy_dst + 12;
957 linesize = 8;
958 if (!(mb_y + y)) {
959 copy_dst[3] = 127U;
9d4bdcb7 960 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
ee555de7 961 } else {
9d4bdcb7 962 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
ee555de7
RB
963 if (!(mb_x + x)) {
964 copy_dst[3] = 129U;
965 } else {
966 copy_dst[3] = ptr[4*x-s->linesize-1];
967 }
968 }
969 if (!(mb_x + x)) {
970 copy_dst[11] =
971 copy_dst[19] =
972 copy_dst[27] =
973 copy_dst[35] = 129U;
974 } else {
975 copy_dst[11] = ptr[4*x -1];
976 copy_dst[19] = ptr[4*x+s->linesize -1];
977 copy_dst[27] = ptr[4*x+s->linesize*2-1];
978 copy_dst[35] = ptr[4*x+s->linesize*3-1];
979 }
980 }
981 } else {
982 mode = intra4x4[x];
983 }
984 s->hpc.pred4x4[mode](dst, topright, linesize);
985 if (copy) {
9d4bdcb7
RB
986 AV_COPY32(ptr+4*x , copy_dst+12);
987 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
988 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
989 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
ee555de7 990 }
3b636f21
DC
991
992 nnz = s->non_zero_count_cache[y][x];
993 if (nnz) {
994 if (nnz == 1)
995 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
996 else
997 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
998 }
999 topright += 4;
1000 }
1001
1002 ptr += 4*s->linesize;
d2840fa4 1003 intra4x4 += 4;
3b636f21
DC
1004 }
1005 }
1006
ee555de7
RB
1007 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1008 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1009 } else {
1010 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1011 }
3b636f21
DC
1012 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1013 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
9ac831c2 1014
ee555de7 1015 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
9ac831c2
DC
1016 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1017 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1018 s->filter.simple, 0);
3b636f21
DC
1019}
1020
64233e70
JGG
1021static const uint8_t subpel_idx[3][8] = {
1022 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1023 // also function pointer index
1024 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1025 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1026};
1027
3b636f21
DC
1028/**
1029 * Generic MC function.
1030 *
1031 * @param s VP8 decoding context
1032 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
1033 * @param dst target buffer for block data at block position
1034 * @param src reference picture buffer at origin (0, 0)
1035 * @param mv motion vector (relative to block position) to get pixel data from
1036 * @param x_off horizontal position of block from origin (0, 0)
1037 * @param y_off vertical position of block from origin (0, 0)
1038 * @param block_w width of block (16, 8 or 4)
1039 * @param block_h height of block (always same as block_w)
1040 * @param width width of src/dst plane data
1041 * @param height height of src/dst plane data
1042 * @param linesize size of a single line of plane data, including padding
e394953e 1043 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
3b636f21 1044 */
414ac27d 1045static av_always_inline
64233e70
JGG
1046void vp8_mc_luma(VP8Context *s, uint8_t *dst, uint8_t *src, const VP56mv *mv,
1047 int x_off, int y_off, int block_w, int block_h,
1048 int width, int height, int linesize,
1049 vp8_mc_func mc_func[3][3])
3b636f21 1050{
c0498b30 1051 if (AV_RN32A(mv)) {
64233e70
JGG
1052
1053 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1054 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1055
1056 x_off += mv->x >> 2;
1057 y_off += mv->y >> 2;
c0498b30
JGG
1058
1059 // edge emulation
1060 src += y_off * linesize + x_off;
64233e70
JGG
1061 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1062 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
2e279598 1063 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
64233e70
JGG
1064 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1065 x_off - mx_idx, y_off - my_idx, width, height);
44002d83 1066 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
c0498b30
JGG
1067 }
1068 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1069 } else
1070 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
3b636f21
DC
1071}
1072
414ac27d 1073static av_always_inline
64233e70
JGG
1074void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, uint8_t *src1,
1075 uint8_t *src2, const VP56mv *mv, int x_off, int y_off,
1076 int block_w, int block_h, int width, int height, int linesize,
1077 vp8_mc_func mc_func[3][3])
1078{
1079 if (AV_RN32A(mv)) {
1080 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1081 int my = mv->y&7, my_idx = subpel_idx[0][my];
1082
1083 x_off += mv->x >> 3;
1084 y_off += mv->y >> 3;
1085
1086 // edge emulation
1087 src1 += y_off * linesize + x_off;
1088 src2 += y_off * linesize + x_off;
1089 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1090 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1091 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1092 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1093 x_off - mx_idx, y_off - my_idx, width, height);
1094 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1095 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1096
1097 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1098 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1099 x_off - mx_idx, y_off - my_idx, width, height);
1100 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1101 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1102 } else {
1103 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1104 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1105 }
1106 } else {
1107 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1108 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1109 }
1110}
1111
1112static av_always_inline
414ac27d
JGG
1113void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1114 AVFrame *ref_frame, int x_off, int y_off,
1115 int bx_off, int by_off,
1116 int block_w, int block_h,
1117 int width, int height, VP56mv *mv)
7c4dcf81
RB
1118{
1119 VP56mv uvmv = *mv;
1120
1121 /* Y */
64233e70
JGG
1122 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1123 ref_frame->data[0], mv, x_off + bx_off, y_off + by_off,
1124 block_w, block_h, width, height, s->linesize,
1125 s->put_pixels_tab[block_w == 8]);
7c4dcf81
RB
1126
1127 /* U/V */
1128 if (s->profile == 3) {
1129 uvmv.x &= ~7;
1130 uvmv.y &= ~7;
1131 }
1132 x_off >>= 1; y_off >>= 1;
1133 bx_off >>= 1; by_off >>= 1;
1134 width >>= 1; height >>= 1;
1135 block_w >>= 1; block_h >>= 1;
64233e70
JGG
1136 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1137 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame->data[1],
1138 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off,
1139 block_w, block_h, width, height, s->uvlinesize,
1140 s->put_pixels_tab[1 + (block_w == 4)]);
7c4dcf81
RB
1141}
1142
d864dee8
JGG
1143/* Fetch pixels for estimated mv 4 macroblocks ahead.
1144 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
414ac27d 1145static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
d864dee8 1146{
ef38842f
JGG
1147 /* Don't prefetch refs that haven't been used very often this frame. */
1148 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
c4211046 1149 int x_off = mb_x << 4, y_off = mb_y << 4;
7e13022a
JGG
1150 int mx = (mb->mv.x>>2) + x_off + 8;
1151 int my = (mb->mv.y>>2) + y_off;
c4211046
JGG
1152 uint8_t **src= s->framep[ref]->data;
1153 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1154 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1155 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1156 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1157 }
d864dee8
JGG
1158}
1159
3b636f21
DC
1160/**
1161 * Apply motion vectors to prediction buffer, chapter 18.
1162 */
414ac27d
JGG
1163static av_always_inline
1164void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1165 int mb_x, int mb_y)
3b636f21
DC
1166{
1167 int x_off = mb_x << 4, y_off = mb_y << 4;
1168 int width = 16*s->mb_width, height = 16*s->mb_height;
d292c345
JGG
1169 AVFrame *ref = s->framep[mb->ref_frame];
1170 VP56mv *bmv = mb->bmv;
3b636f21 1171
73be29b0
JGG
1172 switch (mb->partitioning) {
1173 case VP8_SPLITMVMODE_NONE:
d292c345 1174 vp8_mc_part(s, dst, ref, x_off, y_off,
7c4dcf81 1175 0, 0, 16, 16, width, height, &mb->mv);
73be29b0 1176 break;
7c4dcf81 1177 case VP8_SPLITMVMODE_4x4: {
3b636f21 1178 int x, y;
7c4dcf81 1179 VP56mv uvmv;
3b636f21
DC
1180
1181 /* Y */
1182 for (y = 0; y < 4; y++) {
1183 for (x = 0; x < 4; x++) {
64233e70
JGG
1184 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1185 ref->data[0], &bmv[4*y + x],
1186 4*x + x_off, 4*y + y_off, 4, 4,
1187 width, height, s->linesize,
1188 s->put_pixels_tab[2]);
3b636f21
DC
1189 }
1190 }
1191
1192 /* U/V */
1193 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1194 for (y = 0; y < 2; y++) {
1195 for (x = 0; x < 2; x++) {
1196 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1197 mb->bmv[ 2*y * 4 + 2*x+1].x +
1198 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1199 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1200 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1201 mb->bmv[ 2*y * 4 + 2*x+1].y +
1202 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1203 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
8f910a56
SG
1204 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1205 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
3b636f21
DC
1206 if (s->profile == 3) {
1207 uvmv.x &= ~7;
1208 uvmv.y &= ~7;
1209 }
64233e70
JGG
1210 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1211 dst[2] + 4*y*s->uvlinesize + x*4,
1212 ref->data[1], ref->data[2], &uvmv,
1213 4*x + x_off, 4*y + y_off, 4, 4,
1214 width, height, s->uvlinesize,
1215 s->put_pixels_tab[2]);
3b636f21
DC
1216 }
1217 }
7c4dcf81
RB
1218 break;
1219 }
1220 case VP8_SPLITMVMODE_16x8:
d292c345
JGG
1221 vp8_mc_part(s, dst, ref, x_off, y_off,
1222 0, 0, 16, 8, width, height, &bmv[0]);
1223 vp8_mc_part(s, dst, ref, x_off, y_off,
1224 0, 8, 16, 8, width, height, &bmv[1]);
7c4dcf81
RB
1225 break;
1226 case VP8_SPLITMVMODE_8x16:
d292c345
JGG
1227 vp8_mc_part(s, dst, ref, x_off, y_off,
1228 0, 0, 8, 16, width, height, &bmv[0]);
1229 vp8_mc_part(s, dst, ref, x_off, y_off,
1230 8, 0, 8, 16, width, height, &bmv[1]);
7c4dcf81
RB
1231 break;
1232 case VP8_SPLITMVMODE_8x8:
d292c345
JGG
1233 vp8_mc_part(s, dst, ref, x_off, y_off,
1234 0, 0, 8, 8, width, height, &bmv[0]);
1235 vp8_mc_part(s, dst, ref, x_off, y_off,
1236 8, 0, 8, 8, width, height, &bmv[1]);
1237 vp8_mc_part(s, dst, ref, x_off, y_off,
1238 0, 8, 8, 8, width, height, &bmv[2]);
1239 vp8_mc_part(s, dst, ref, x_off, y_off,
1240 8, 8, 8, 8, width, height, &bmv[3]);
7c4dcf81 1241 break;
3b636f21
DC
1242 }
1243}
1244
414ac27d 1245static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
3b636f21 1246{
3df56f41 1247 int x, y, ch;
3b636f21 1248
8a467b2d
JGG
1249 if (mb->mode != MODE_I4x4) {
1250 uint8_t *y_dst = dst[0];
3b636f21 1251 for (y = 0; y < 4; y++) {
62457f90 1252 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
3df56f41
JGG
1253 if (nnz4) {
1254 if (nnz4&~0x01010101) {
8a467b2d 1255 for (x = 0; x < 4; x++) {
62457f90
JGG
1256 if ((uint8_t)nnz4 == 1)
1257 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1258 else if((uint8_t)nnz4 > 1)
1259 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1260 nnz4 >>= 8;
1261 if (!nnz4)
1262 break;
8a467b2d
JGG
1263 }
1264 } else {
3ae079a3 1265 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
3b636f21
DC
1266 }
1267 }
1268 y_dst += 4*s->linesize;
1269 }
8a467b2d 1270 }
3b636f21 1271
8a467b2d 1272 for (ch = 0; ch < 2; ch++) {
62457f90 1273 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
3ae079a3 1274 if (nnz4) {
8a467b2d 1275 uint8_t *ch_dst = dst[1+ch];
3ae079a3
JGG
1276 if (nnz4&~0x01010101) {
1277 for (y = 0; y < 2; y++) {
1278 for (x = 0; x < 2; x++) {
62457f90
JGG
1279 if ((uint8_t)nnz4 == 1)
1280 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1281 else if((uint8_t)nnz4 > 1)
1282 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1283 nnz4 >>= 8;
1284 if (!nnz4)
628b48db 1285 goto chroma_idct_end;
8a467b2d 1286 }
3ae079a3 1287 ch_dst += 4*s->uvlinesize;
8a467b2d 1288 }
3ae079a3
JGG
1289 } else {
1290 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
3b636f21
DC
1291 }
1292 }
628b48db 1293chroma_idct_end: ;
3b636f21
DC
1294 }
1295}
1296
414ac27d 1297static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
3b636f21
DC
1298{
1299 int interior_limit, filter_level;
1300
1301 if (s->segmentation.enabled) {
b9a7186b 1302 filter_level = s->segmentation.filter_level[s->segment];
3b636f21
DC
1303 if (!s->segmentation.absolute_vals)
1304 filter_level += s->filter.level;
1305 } else
1306 filter_level = s->filter.level;
1307
1308 if (s->lf_delta.enabled) {
1309 filter_level += s->lf_delta.ref[mb->ref_frame];
dd18c9a0 1310 filter_level += s->lf_delta.mode[mb->mode];
3b636f21 1311 }
a1b227bb
JGG
1312
1313/* Like av_clip for inputs 0 and max, where max is equal to (2^n-1) */
1314#define POW2CLIP(x,max) (((x) & ~max) ? (-(x))>>31 & max : (x));
1315 filter_level = POW2CLIP(filter_level, 63);
3b636f21
DC
1316
1317 interior_limit = filter_level;
1318 if (s->filter.sharpness) {
8a2c99b4 1319 interior_limit >>= (s->filter.sharpness + 3) >> 2;
3b636f21
DC
1320 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1321 }
1322 interior_limit = FFMAX(interior_limit, 1);
1323
968570d6
JGG
1324 f->filter_level = filter_level;
1325 f->inner_limit = interior_limit;
c55e0d34 1326 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
3b636f21
DC
1327}
1328
414ac27d 1329static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1330{
968570d6
JGG
1331 int mbedge_lim, bedge_lim, hev_thresh;
1332 int filter_level = f->filter_level;
1333 int inner_limit = f->inner_limit;
c55e0d34 1334 int inner_filter = f->inner_filter;
145d3186
JGG
1335 int linesize = s->linesize;
1336 int uvlinesize = s->uvlinesize;
79dec154
JGG
1337 static const uint8_t hev_thresh_lut[2][64] = {
1338 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1339 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1340 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1341 3, 3, 3, 3 },
1342 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1343 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1344 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1345 2, 2, 2, 2 }
1346 };
3b636f21 1347
3b636f21
DC
1348 if (!filter_level)
1349 return;
1350
79dec154
JGG
1351 bedge_lim = 2*filter_level + inner_limit;
1352 mbedge_lim = bedge_lim + 4;
968570d6 1353
79dec154 1354 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
5245c04d 1355
3b636f21 1356 if (mb_x) {
145d3186 1357 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
3facfc99 1358 mbedge_lim, inner_limit, hev_thresh);
145d3186 1359 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1360 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1361 }
1362
c55e0d34 1363 if (inner_filter) {
145d3186
JGG
1364 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1365 inner_limit, hev_thresh);
1366 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1367 inner_limit, hev_thresh);
1368 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1369 inner_limit, hev_thresh);
1370 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1371 uvlinesize, bedge_lim,
1372 inner_limit, hev_thresh);
3b636f21
DC
1373 }
1374
1375 if (mb_y) {
145d3186 1376 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
3facfc99 1377 mbedge_lim, inner_limit, hev_thresh);
145d3186 1378 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1379 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1380 }
1381
c55e0d34 1382 if (inner_filter) {
145d3186
JGG
1383 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1384 linesize, bedge_lim,
1385 inner_limit, hev_thresh);
1386 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1387 linesize, bedge_lim,
1388 inner_limit, hev_thresh);
1389 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1390 linesize, bedge_lim,
1391 inner_limit, hev_thresh);
1392 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1393 dst[2] + 4 * uvlinesize,
1394 uvlinesize, bedge_lim,
3facfc99 1395 inner_limit, hev_thresh);
3b636f21
DC
1396 }
1397}
1398
414ac27d 1399static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1400{
968570d6
JGG
1401 int mbedge_lim, bedge_lim;
1402 int filter_level = f->filter_level;
1403 int inner_limit = f->inner_limit;
c55e0d34 1404 int inner_filter = f->inner_filter;
145d3186 1405 int linesize = s->linesize;
3b636f21 1406
3b636f21
DC
1407 if (!filter_level)
1408 return;
1409
79dec154
JGG
1410 bedge_lim = 2*filter_level + inner_limit;
1411 mbedge_lim = bedge_lim + 4;
3b636f21
DC
1412
1413 if (mb_x)
145d3186 1414 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1415 if (inner_filter) {
145d3186
JGG
1416 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1417 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1418 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
3b636f21
DC
1419 }
1420
1421 if (mb_y)
145d3186 1422 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1423 if (inner_filter) {
145d3186
JGG
1424 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1425 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1426 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
3b636f21
DC
1427 }
1428}
1429
1430static void filter_mb_row(VP8Context *s, int mb_y)
1431{
968570d6 1432 VP8FilterStrength *f = s->filter_strength;
3b636f21
DC
1433 uint8_t *dst[3] = {
1434 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize,
1435 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize,
1436 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize
1437 };
1438 int mb_x;
1439
1440 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
9ac831c2 1441 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
c55e0d34 1442 filter_mb(s, dst, f++, mb_x, mb_y);
3b636f21
DC
1443 dst[0] += 16;
1444 dst[1] += 8;
1445 dst[2] += 8;
1446 }
1447}
1448
1449static void filter_mb_row_simple(VP8Context *s, int mb_y)
1450{
968570d6 1451 VP8FilterStrength *f = s->filter_strength;
968570d6 1452 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize;
3b636f21
DC
1453 int mb_x;
1454
1455 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
9ac831c2 1456 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
c55e0d34 1457 filter_mb_simple(s, dst, f++, mb_x, mb_y);
3b636f21
DC
1458 dst += 16;
1459 }
1460}
1461
1462static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1463 AVPacket *avpkt)
1464{
1465 VP8Context *s = avctx->priv_data;
1466 int ret, mb_x, mb_y, i, y, referenced;
1467 enum AVDiscard skip_thresh;
28e241de 1468 AVFrame *av_uninit(curframe);
3b636f21
DC
1469
1470 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1471 return ret;
1472
1473 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1474 || s->update_altref == VP56_FRAME_CURRENT;
1475
1476 skip_thresh = !referenced ? AVDISCARD_NONREF :
1477 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1478
1479 if (avctx->skip_frame >= skip_thresh) {
1480 s->invisible = 1;
1481 goto skip_decode;
1482 }
9ac831c2 1483 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
3b636f21
DC
1484
1485 for (i = 0; i < 4; i++)
1486 if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1487 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1488 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1489 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1490 break;
1491 }
1492 if (curframe->data[0])
1493 avctx->release_buffer(avctx, curframe);
1494
1495 curframe->key_frame = s->keyframe;
975a1447 1496 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3b636f21
DC
1497 curframe->reference = referenced ? 3 : 0;
1498 if ((ret = avctx->get_buffer(avctx, curframe))) {
1499 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1500 return ret;
1501 }
1502
1503 // Given that arithmetic probabilities are updated every frame, it's quite likely
1504 // that the values we have on a random interframe are complete junk if we didn't
1505 // start decode on a keyframe. So just don't display anything rather than junk.
1506 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1507 !s->framep[VP56_FRAME_GOLDEN] ||
1508 !s->framep[VP56_FRAME_GOLDEN2])) {
1509 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1510 return AVERROR_INVALIDDATA;
1511 }
1512
1513 s->linesize = curframe->linesize[0];
1514 s->uvlinesize = curframe->linesize[1];
1515
1516 if (!s->edge_emu_buffer)
1517 s->edge_emu_buffer = av_malloc(21*s->linesize);
1518
1519 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1520
aa93c52c
PM
1521 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1522 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
c55e0d34 1523
3b636f21 1524 // top edge of 127 for intra prediction
ee555de7
RB
1525 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1526 s->top_border[0][15] = s->top_border[0][23] = 127;
1527 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1528 }
c4211046 1529 memset(s->ref_count, 0, sizeof(s->ref_count));
d2840fa4 1530 if (s->keyframe)
ccf13f9e 1531 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
3b636f21 1532
7634771e
JGG
1533 #define MARGIN (16 << 2)
1534 s->mv_min.y = -MARGIN;
1535 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1536
3b636f21
DC
1537 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1538 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
c55e0d34 1539 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
aa93c52c 1540 int mb_xy = mb_y*s->mb_width;
3b636f21
DC
1541 uint8_t *dst[3] = {
1542 curframe->data[0] + 16*mb_y*s->linesize,
1543 curframe->data[1] + 8*mb_y*s->uvlinesize,
1544 curframe->data[2] + 8*mb_y*s->uvlinesize
1545 };
1546
aa93c52c 1547 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
3b636f21 1548 memset(s->left_nnz, 0, sizeof(s->left_nnz));
d2840fa4 1549 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
3b636f21
DC
1550
1551 // left edge of 129 for intra prediction
ee555de7 1552 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
3b636f21
DC
1553 for (i = 0; i < 3; i++)
1554 for (y = 0; y < 16>>!!i; y++)
1555 dst[i][y*curframe->linesize[i]-1] = 129;
ee555de7
RB
1556 if (mb_y == 1) // top left edge is also 129
1557 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1558 }
3b636f21 1559
7634771e
JGG
1560 s->mv_min.x = -MARGIN;
1561 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1562
ef38842f 1563 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
d864dee8
JGG
1564 /* Prefetch the current frame, 4 MBs ahead */
1565 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1566 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1567
aa93c52c 1568 decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy);
3b636f21 1569
ef38842f 1570 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
c4211046 1571
3b636f21
DC
1572 if (!mb->skip)
1573 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
3b636f21 1574
b946111f 1575 if (mb->mode <= MODE_I4x4)
d2840fa4 1576 intra_predict(s, dst, mb, mb_x, mb_y);
b946111f 1577 else
3b636f21 1578 inter_predict(s, dst, mb, mb_x, mb_y);
3b636f21 1579
ef38842f 1580 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
c4211046 1581
3b636f21 1582 if (!mb->skip) {
8a467b2d 1583 idct_mb(s, dst, mb);
3b636f21
DC
1584 } else {
1585 AV_ZERO64(s->left_nnz);
1586 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1587
1588 // Reset DC block predictors if they would exist if the mb had coefficients
1589 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1590 s->left_nnz[8] = 0;
1591 s->top_nnz[mb_x][8] = 0;
1592 }
1593 }
1594
968570d6
JGG
1595 if (s->deblock_filter)
1596 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1597
ef38842f 1598 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
c4211046 1599
3b636f21
DC
1600 dst[0] += 16;
1601 dst[1] += 8;
1602 dst[2] += 8;
7634771e
JGG
1603 s->mv_min.x -= 64;
1604 s->mv_max.x -= 64;
3b636f21 1605 }
9ac831c2 1606 if (s->deblock_filter) {
3b636f21 1607 if (s->filter.simple)
9ac831c2 1608 filter_mb_row_simple(s, mb_y);
3b636f21 1609 else
9ac831c2 1610 filter_mb_row(s, mb_y);
3b636f21 1611 }
7634771e
JGG
1612 s->mv_min.y -= 64;
1613 s->mv_max.y -= 64;
3b636f21 1614 }
3b636f21
DC
1615
1616skip_decode:
1617 // if future frames don't use the updated probabilities,
1618 // reset them to the values we saved
1619 if (!s->update_probabilities)
1620 s->prob[0] = s->prob[1];
1621
1622 // check if golden and altref are swapped
1623 if (s->update_altref == VP56_FRAME_GOLDEN &&
1624 s->update_golden == VP56_FRAME_GOLDEN2)
1625 FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]);
1626 else {
1627 if (s->update_altref != VP56_FRAME_NONE)
1628 s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1629
1630 if (s->update_golden != VP56_FRAME_NONE)
1631 s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1632 }
1633
1634 if (s->update_last) // move cur->prev
1635 s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT];
1636
1637 // release no longer referenced frames
1638 for (i = 0; i < 4; i++)
1639 if (s->frames[i].data[0] &&
1640 &s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
1641 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1642 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1643 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1644 avctx->release_buffer(avctx, &s->frames[i]);
1645
1646 if (!s->invisible) {
1647 *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT];
1648 *data_size = sizeof(AVFrame);
1649 }
1650
1651 return avpkt->size;
1652}
1653
1654static av_cold int vp8_decode_init(AVCodecContext *avctx)
1655{
1656 VP8Context *s = avctx->priv_data;
1657
1658 s->avctx = avctx;
1659 avctx->pix_fmt = PIX_FMT_YUV420P;
1660
1661 dsputil_init(&s->dsp, avctx);
1662 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
1663 ff_vp8dsp_init(&s->vp8dsp);
1664
3b636f21
DC
1665 return 0;
1666}
1667
1668static av_cold int vp8_decode_free(AVCodecContext *avctx)
1669{
1670 vp8_decode_flush(avctx);
1671 return 0;
1672}
1673
d36beb3f 1674AVCodec ff_vp8_decoder = {
3b636f21
DC
1675 "vp8",
1676 AVMEDIA_TYPE_VIDEO,
1677 CODEC_ID_VP8,
1678 sizeof(VP8Context),
1679 vp8_decode_init,
1680 NULL,
1681 vp8_decode_free,
1682 vp8_decode_frame,
1683 CODEC_CAP_DR1,
1684 .flush = vp8_decode_flush,
1685 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1686};