vp8: refactor decoding a single mb_row
[libav.git] / libavcodec / vp8.c
1 /*
2 * VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 *
8 * This file is part of Libav.
9 *
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include "libavutil/imgutils.h"
26 #include "avcodec.h"
27 #include "internal.h"
28 #include "vp8.h"
29 #include "vp8data.h"
30 #include "rectangle.h"
31 #include "thread.h"
32
33 #if ARCH_ARM
34 # include "arm/vp8.h"
35 #endif
36
37 static void free_buffers(VP8Context *s)
38 {
39 av_freep(&s->macroblocks_base);
40 av_freep(&s->filter_strength);
41 av_freep(&s->intra4x4_pred_mode_top);
42 av_freep(&s->top_nnz);
43 av_freep(&s->edge_emu_buffer);
44 av_freep(&s->top_border);
45
46 s->macroblocks = NULL;
47 }
48
49 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
50 {
51 int ret;
52 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
53 return ret;
54 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
55 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
56 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
57 ff_thread_release_buffer(s->avctx, f);
58 return AVERROR(ENOMEM);
59 }
60 return 0;
61 }
62
63 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
64 {
65 if (f->ref_index[0]) {
66 if (prefer_delayed_free) {
67 /* Upon a size change, we want to free the maps but other threads may still
68 * be using them, so queue them. Upon a seek, all threads are inactive so
69 * we want to cache one to prevent re-allocation in the next decoding
70 * iteration, but the rest we can free directly. */
71 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
72 if (s->num_maps_to_be_freed < max_queued_maps) {
73 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
74 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
75 av_free(f->ref_index[0]);
76 } /* else: MEMLEAK (should never happen, but better that than crash) */
77 f->ref_index[0] = NULL;
78 } else /* vp8_decode_free() */ {
79 av_free(f->ref_index[0]);
80 }
81 }
82 ff_thread_release_buffer(s->avctx, f);
83 }
84
85 static void vp8_decode_flush_impl(AVCodecContext *avctx,
86 int prefer_delayed_free, int can_direct_free, int free_mem)
87 {
88 VP8Context *s = avctx->priv_data;
89 int i;
90
91 if (!avctx->internal->is_copy) {
92 for (i = 0; i < 5; i++)
93 if (s->frames[i].data[0])
94 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
95 }
96 memset(s->framep, 0, sizeof(s->framep));
97
98 if (free_mem) {
99 free_buffers(s);
100 s->maps_are_invalid = 1;
101 }
102 }
103
104 static void vp8_decode_flush(AVCodecContext *avctx)
105 {
106 vp8_decode_flush_impl(avctx, 1, 1, 0);
107 }
108
109 static int update_dimensions(VP8Context *s, int width, int height)
110 {
111 if (width != s->avctx->width ||
112 height != s->avctx->height) {
113 if (av_image_check_size(width, height, 0, s->avctx))
114 return AVERROR_INVALIDDATA;
115
116 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
117
118 avcodec_set_dimensions(s->avctx, width, height);
119 }
120
121 s->mb_width = (s->avctx->coded_width +15) / 16;
122 s->mb_height = (s->avctx->coded_height+15) / 16;
123
124 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
125 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
126 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
127 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
128 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
129
130 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
131 !s->top_nnz || !s->top_border)
132 return AVERROR(ENOMEM);
133
134 s->macroblocks = s->macroblocks_base + 1;
135
136 return 0;
137 }
138
139 static void parse_segment_info(VP8Context *s)
140 {
141 VP56RangeCoder *c = &s->c;
142 int i;
143
144 s->segmentation.update_map = vp8_rac_get(c);
145
146 if (vp8_rac_get(c)) { // update segment feature data
147 s->segmentation.absolute_vals = vp8_rac_get(c);
148
149 for (i = 0; i < 4; i++)
150 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
151
152 for (i = 0; i < 4; i++)
153 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
154 }
155 if (s->segmentation.update_map)
156 for (i = 0; i < 3; i++)
157 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
158 }
159
160 static void update_lf_deltas(VP8Context *s)
161 {
162 VP56RangeCoder *c = &s->c;
163 int i;
164
165 for (i = 0; i < 4; i++) {
166 if (vp8_rac_get(c)) {
167 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
168
169 if (vp8_rac_get(c))
170 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
171 }
172 }
173
174 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
175 if (vp8_rac_get(c)) {
176 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
177
178 if (vp8_rac_get(c))
179 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
180 }
181 }
182 }
183
184 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
185 {
186 const uint8_t *sizes = buf;
187 int i;
188
189 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
190
191 buf += 3*(s->num_coeff_partitions-1);
192 buf_size -= 3*(s->num_coeff_partitions-1);
193 if (buf_size < 0)
194 return -1;
195
196 for (i = 0; i < s->num_coeff_partitions-1; i++) {
197 int size = AV_RL24(sizes + 3*i);
198 if (buf_size - size < 0)
199 return -1;
200
201 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
202 buf += size;
203 buf_size -= size;
204 }
205 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
206
207 return 0;
208 }
209
210 static void get_quants(VP8Context *s)
211 {
212 VP56RangeCoder *c = &s->c;
213 int i, base_qi;
214
215 int yac_qi = vp8_rac_get_uint(c, 7);
216 int ydc_delta = vp8_rac_get_sint(c, 4);
217 int y2dc_delta = vp8_rac_get_sint(c, 4);
218 int y2ac_delta = vp8_rac_get_sint(c, 4);
219 int uvdc_delta = vp8_rac_get_sint(c, 4);
220 int uvac_delta = vp8_rac_get_sint(c, 4);
221
222 for (i = 0; i < 4; i++) {
223 if (s->segmentation.enabled) {
224 base_qi = s->segmentation.base_quant[i];
225 if (!s->segmentation.absolute_vals)
226 base_qi += yac_qi;
227 } else
228 base_qi = yac_qi;
229
230 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
231 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
232 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
233 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
234 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
235 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
236
237 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
238 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
239 }
240 }
241
242 /**
243 * Determine which buffers golden and altref should be updated with after this frame.
244 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
245 *
246 * Intra frames update all 3 references
247 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
248 * If the update (golden|altref) flag is set, it's updated with the current frame
249 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
250 * If the flag is not set, the number read means:
251 * 0: no update
252 * 1: VP56_FRAME_PREVIOUS
253 * 2: update golden with altref, or update altref with golden
254 */
255 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
256 {
257 VP56RangeCoder *c = &s->c;
258
259 if (update)
260 return VP56_FRAME_CURRENT;
261
262 switch (vp8_rac_get_uint(c, 2)) {
263 case 1:
264 return VP56_FRAME_PREVIOUS;
265 case 2:
266 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
267 }
268 return VP56_FRAME_NONE;
269 }
270
271 static void update_refs(VP8Context *s)
272 {
273 VP56RangeCoder *c = &s->c;
274
275 int update_golden = vp8_rac_get(c);
276 int update_altref = vp8_rac_get(c);
277
278 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
279 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
280 }
281
282 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
283 {
284 VP56RangeCoder *c = &s->c;
285 int header_size, hscale, vscale, i, j, k, l, m, ret;
286 int width = s->avctx->width;
287 int height = s->avctx->height;
288
289 s->keyframe = !(buf[0] & 1);
290 s->profile = (buf[0]>>1) & 7;
291 s->invisible = !(buf[0] & 0x10);
292 header_size = AV_RL24(buf) >> 5;
293 buf += 3;
294 buf_size -= 3;
295
296 if (s->profile > 3)
297 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
298
299 if (!s->profile)
300 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
301 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
302 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
303
304 if (header_size > buf_size - 7*s->keyframe) {
305 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
306 return AVERROR_INVALIDDATA;
307 }
308
309 if (s->keyframe) {
310 if (AV_RL24(buf) != 0x2a019d) {
311 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
312 return AVERROR_INVALIDDATA;
313 }
314 width = AV_RL16(buf+3) & 0x3fff;
315 height = AV_RL16(buf+5) & 0x3fff;
316 hscale = buf[4] >> 6;
317 vscale = buf[6] >> 6;
318 buf += 7;
319 buf_size -= 7;
320
321 if (hscale || vscale)
322 av_log_missing_feature(s->avctx, "Upscaling", 1);
323
324 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
325 for (i = 0; i < 4; i++)
326 for (j = 0; j < 16; j++)
327 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
328 sizeof(s->prob->token[i][j]));
329 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
330 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
331 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
332 memset(&s->segmentation, 0, sizeof(s->segmentation));
333 }
334
335 if (!s->macroblocks_base || /* first frame */
336 width != s->avctx->width || height != s->avctx->height) {
337 if ((ret = update_dimensions(s, width, height)) < 0)
338 return ret;
339 }
340
341 ff_vp56_init_range_decoder(c, buf, header_size);
342 buf += header_size;
343 buf_size -= header_size;
344
345 if (s->keyframe) {
346 if (vp8_rac_get(c))
347 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
348 vp8_rac_get(c); // whether we can skip clamping in dsp functions
349 }
350
351 if ((s->segmentation.enabled = vp8_rac_get(c)))
352 parse_segment_info(s);
353 else
354 s->segmentation.update_map = 0; // FIXME: move this to some init function?
355
356 s->filter.simple = vp8_rac_get(c);
357 s->filter.level = vp8_rac_get_uint(c, 6);
358 s->filter.sharpness = vp8_rac_get_uint(c, 3);
359
360 if ((s->lf_delta.enabled = vp8_rac_get(c)))
361 if (vp8_rac_get(c))
362 update_lf_deltas(s);
363
364 if (setup_partitions(s, buf, buf_size)) {
365 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
366 return AVERROR_INVALIDDATA;
367 }
368
369 get_quants(s);
370
371 if (!s->keyframe) {
372 update_refs(s);
373 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
374 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
375 }
376
377 // if we aren't saving this frame's probabilities for future frames,
378 // make a copy of the current probabilities
379 if (!(s->update_probabilities = vp8_rac_get(c)))
380 s->prob[1] = s->prob[0];
381
382 s->update_last = s->keyframe || vp8_rac_get(c);
383
384 for (i = 0; i < 4; i++)
385 for (j = 0; j < 8; j++)
386 for (k = 0; k < 3; k++)
387 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
388 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
389 int prob = vp8_rac_get_uint(c, 8);
390 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
391 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
392 }
393
394 if ((s->mbskip_enabled = vp8_rac_get(c)))
395 s->prob->mbskip = vp8_rac_get_uint(c, 8);
396
397 if (!s->keyframe) {
398 s->prob->intra = vp8_rac_get_uint(c, 8);
399 s->prob->last = vp8_rac_get_uint(c, 8);
400 s->prob->golden = vp8_rac_get_uint(c, 8);
401
402 if (vp8_rac_get(c))
403 for (i = 0; i < 4; i++)
404 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
405 if (vp8_rac_get(c))
406 for (i = 0; i < 3; i++)
407 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
408
409 // 17.2 MV probability update
410 for (i = 0; i < 2; i++)
411 for (j = 0; j < 19; j++)
412 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
413 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
414 }
415
416 return 0;
417 }
418
419 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
420 {
421 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
422 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
423 }
424
425 /**
426 * Motion vector coding, 17.1.
427 */
428 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
429 {
430 int bit, x = 0;
431
432 if (vp56_rac_get_prob_branchy(c, p[0])) {
433 int i;
434
435 for (i = 0; i < 3; i++)
436 x += vp56_rac_get_prob(c, p[9 + i]) << i;
437 for (i = 9; i > 3; i--)
438 x += vp56_rac_get_prob(c, p[9 + i]) << i;
439 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
440 x += 8;
441 } else {
442 // small_mvtree
443 const uint8_t *ps = p+2;
444 bit = vp56_rac_get_prob(c, *ps);
445 ps += 1 + 3*bit;
446 x += 4*bit;
447 bit = vp56_rac_get_prob(c, *ps);
448 ps += 1 + bit;
449 x += 2*bit;
450 x += vp56_rac_get_prob(c, *ps);
451 }
452
453 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
454 }
455
456 static av_always_inline
457 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
458 {
459 if (left == top)
460 return vp8_submv_prob[4-!!left];
461 if (!top)
462 return vp8_submv_prob[2];
463 return vp8_submv_prob[1-!!left];
464 }
465
466 /**
467 * Split motion vector prediction, 16.4.
468 * @returns the number of motion vectors parsed (2, 4 or 16)
469 */
470 static av_always_inline
471 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
472 {
473 int part_idx;
474 int n, num;
475 VP8Macroblock *top_mb = &mb[2];
476 VP8Macroblock *left_mb = &mb[-1];
477 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
478 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
479 *mbsplits_cur, *firstidx;
480 VP56mv *top_mv = top_mb->bmv;
481 VP56mv *left_mv = left_mb->bmv;
482 VP56mv *cur_mv = mb->bmv;
483
484 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
485 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
486 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
487 } else {
488 part_idx = VP8_SPLITMVMODE_8x8;
489 }
490 } else {
491 part_idx = VP8_SPLITMVMODE_4x4;
492 }
493
494 num = vp8_mbsplit_count[part_idx];
495 mbsplits_cur = vp8_mbsplits[part_idx],
496 firstidx = vp8_mbfirstidx[part_idx];
497 mb->partitioning = part_idx;
498
499 for (n = 0; n < num; n++) {
500 int k = firstidx[n];
501 uint32_t left, above;
502 const uint8_t *submv_prob;
503
504 if (!(k & 3))
505 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
506 else
507 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
508 if (k <= 3)
509 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
510 else
511 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
512
513 submv_prob = get_submv_prob(left, above);
514
515 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
516 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
517 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
518 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
519 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
520 } else {
521 AV_ZERO32(&mb->bmv[n]);
522 }
523 } else {
524 AV_WN32A(&mb->bmv[n], above);
525 }
526 } else {
527 AV_WN32A(&mb->bmv[n], left);
528 }
529 }
530
531 return num;
532 }
533
534 static av_always_inline
535 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
536 {
537 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
538 mb - 1 /* left */,
539 mb + 1 /* top-left */ };
540 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
541 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
542 int idx = CNT_ZERO;
543 int cur_sign_bias = s->sign_bias[mb->ref_frame];
544 int8_t *sign_bias = s->sign_bias;
545 VP56mv near_mv[4];
546 uint8_t cnt[4] = { 0 };
547 VP56RangeCoder *c = &s->c;
548
549 AV_ZERO32(&near_mv[0]);
550 AV_ZERO32(&near_mv[1]);
551 AV_ZERO32(&near_mv[2]);
552
553 /* Process MB on top, left and top-left */
554 #define MV_EDGE_CHECK(n)\
555 {\
556 VP8Macroblock *edge = mb_edge[n];\
557 int edge_ref = edge->ref_frame;\
558 if (edge_ref != VP56_FRAME_CURRENT) {\
559 uint32_t mv = AV_RN32A(&edge->mv);\
560 if (mv) {\
561 if (cur_sign_bias != sign_bias[edge_ref]) {\
562 /* SWAR negate of the values in mv. */\
563 mv = ~mv;\
564 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
565 }\
566 if (!n || mv != AV_RN32A(&near_mv[idx]))\
567 AV_WN32A(&near_mv[++idx], mv);\
568 cnt[idx] += 1 + (n != 2);\
569 } else\
570 cnt[CNT_ZERO] += 1 + (n != 2);\
571 }\
572 }
573
574 MV_EDGE_CHECK(0)
575 MV_EDGE_CHECK(1)
576 MV_EDGE_CHECK(2)
577
578 mb->partitioning = VP8_SPLITMVMODE_NONE;
579 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
580 mb->mode = VP8_MVMODE_MV;
581
582 /* If we have three distinct MVs, merge first and last if they're the same */
583 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
584 cnt[CNT_NEAREST] += 1;
585
586 /* Swap near and nearest if necessary */
587 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
588 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
589 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
590 }
591
592 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
593 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
594
595 /* Choose the best mv out of 0,0 and the nearest mv */
596 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
597 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
598 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
599 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
600
601 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
602 mb->mode = VP8_MVMODE_SPLIT;
603 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
604 } else {
605 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
606 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
607 mb->bmv[0] = mb->mv;
608 }
609 } else {
610 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
611 mb->bmv[0] = mb->mv;
612 }
613 } else {
614 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
615 mb->bmv[0] = mb->mv;
616 }
617 } else {
618 mb->mode = VP8_MVMODE_ZERO;
619 AV_ZERO32(&mb->mv);
620 mb->bmv[0] = mb->mv;
621 }
622 }
623
624 static av_always_inline
625 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
626 int mb_x, int keyframe)
627 {
628 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
629 if (keyframe) {
630 int x, y;
631 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
632 uint8_t* const left = s->intra4x4_pred_mode_left;
633 for (y = 0; y < 4; y++) {
634 for (x = 0; x < 4; x++) {
635 const uint8_t *ctx;
636 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
637 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
638 left[y] = top[x] = *intra4x4;
639 intra4x4++;
640 }
641 }
642 } else {
643 int i;
644 for (i = 0; i < 16; i++)
645 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
646 }
647 }
648
649 static av_always_inline
650 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
651 {
652 VP56RangeCoder *c = &s->c;
653
654 if (s->segmentation.update_map)
655 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
656 else if (s->segmentation.enabled)
657 *segment = ref ? *ref : *segment;
658 s->segment = *segment;
659
660 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
661
662 if (s->keyframe) {
663 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
664
665 if (mb->mode == MODE_I4x4) {
666 decode_intra4x4_modes(s, c, mb_x, 1);
667 } else {
668 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
669 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
670 AV_WN32A(s->intra4x4_pred_mode_left, modes);
671 }
672
673 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
674 mb->ref_frame = VP56_FRAME_CURRENT;
675 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
676 // inter MB, 16.2
677 if (vp56_rac_get_prob_branchy(c, s->prob->last))
678 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
679 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
680 else
681 mb->ref_frame = VP56_FRAME_PREVIOUS;
682 s->ref_count[mb->ref_frame-1]++;
683
684 // motion vectors, 16.3
685 decode_mvs(s, mb, mb_x, mb_y);
686 } else {
687 // intra MB, 16.1
688 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
689
690 if (mb->mode == MODE_I4x4)
691 decode_intra4x4_modes(s, c, mb_x, 0);
692
693 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
694 mb->ref_frame = VP56_FRAME_CURRENT;
695 mb->partitioning = VP8_SPLITMVMODE_NONE;
696 AV_ZERO32(&mb->bmv[0]);
697 }
698 }
699
700 #ifndef decode_block_coeffs_internal
701 /**
702 * @param c arithmetic bitstream reader context
703 * @param block destination for block coefficients
704 * @param probs probabilities to use when reading trees from the bitstream
705 * @param i initial coeff index, 0 unless a separate DC block is coded
706 * @param qmul array holding the dc/ac dequant factor at position 0/1
707 * @return 0 if no coeffs were decoded
708 * otherwise, the index of the last coeff decoded plus one
709 */
710 static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
711 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
712 int i, uint8_t *token_prob, int16_t qmul[2])
713 {
714 VP56RangeCoder c = *r;
715 goto skip_eob;
716 do {
717 int coeff;
718 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
719 break;
720
721 skip_eob:
722 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
723 if (++i == 16)
724 break; // invalid input; blocks should end with EOB
725 token_prob = probs[i][0];
726 goto skip_eob;
727 }
728
729 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
730 coeff = 1;
731 token_prob = probs[i+1][1];
732 } else {
733 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
734 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
735 if (coeff)
736 coeff += vp56_rac_get_prob(&c, token_prob[5]);
737 coeff += 2;
738 } else {
739 // DCT_CAT*
740 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
741 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
742 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
743 } else { // DCT_CAT2
744 coeff = 7;
745 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
746 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
747 }
748 } else { // DCT_CAT3 and up
749 int a = vp56_rac_get_prob(&c, token_prob[8]);
750 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
751 int cat = (a<<1) + b;
752 coeff = 3 + (8<<cat);
753 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
754 }
755 }
756 token_prob = probs[i+1][2];
757 }
758 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
759 } while (++i < 16);
760
761 *r = c;
762 return i;
763 }
764 #endif
765
766 /**
767 * @param c arithmetic bitstream reader context
768 * @param block destination for block coefficients
769 * @param probs probabilities to use when reading trees from the bitstream
770 * @param i initial coeff index, 0 unless a separate DC block is coded
771 * @param zero_nhood the initial prediction context for number of surrounding
772 * all-zero blocks (only left/top, so 0-2)
773 * @param qmul array holding the dc/ac dequant factor at position 0/1
774 * @return 0 if no coeffs were decoded
775 * otherwise, the index of the last coeff decoded plus one
776 */
777 static av_always_inline
778 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
779 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
780 int i, int zero_nhood, int16_t qmul[2])
781 {
782 uint8_t *token_prob = probs[i][zero_nhood];
783 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
784 return 0;
785 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
786 }
787
788 static av_always_inline
789 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
790 uint8_t t_nnz[9], uint8_t l_nnz[9])
791 {
792 int i, x, y, luma_start = 0, luma_ctx = 3;
793 int nnz_pred, nnz, nnz_total = 0;
794 int segment = s->segment;
795 int block_dc = 0;
796
797 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
798 nnz_pred = t_nnz[8] + l_nnz[8];
799
800 // decode DC values and do hadamard
801 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
802 s->qmat[segment].luma_dc_qmul);
803 l_nnz[8] = t_nnz[8] = !!nnz;
804 if (nnz) {
805 nnz_total += nnz;
806 block_dc = 1;
807 if (nnz == 1)
808 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
809 else
810 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
811 }
812 luma_start = 1;
813 luma_ctx = 0;
814 }
815
816 // luma blocks
817 for (y = 0; y < 4; y++)
818 for (x = 0; x < 4; x++) {
819 nnz_pred = l_nnz[y] + t_nnz[x];
820 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
821 nnz_pred, s->qmat[segment].luma_qmul);
822 // nnz+block_dc may be one more than the actual last index, but we don't care
823 s->non_zero_count_cache[y][x] = nnz + block_dc;
824 t_nnz[x] = l_nnz[y] = !!nnz;
825 nnz_total += nnz;
826 }
827
828 // chroma blocks
829 // TODO: what to do about dimensions? 2nd dim for luma is x,
830 // but for chroma it's (y<<1)|x
831 for (i = 4; i < 6; i++)
832 for (y = 0; y < 2; y++)
833 for (x = 0; x < 2; x++) {
834 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
835 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
836 nnz_pred, s->qmat[segment].chroma_qmul);
837 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
838 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
839 nnz_total += nnz;
840 }
841
842 // if there were no coded coeffs despite the macroblock not being marked skip,
843 // we MUST not do the inner loop filter and should not do IDCT
844 // Since skip isn't used for bitstream prediction, just manually set it.
845 if (!nnz_total)
846 mb->skip = 1;
847 }
848
849 static av_always_inline
850 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
851 int linesize, int uvlinesize, int simple)
852 {
853 AV_COPY128(top_border, src_y + 15*linesize);
854 if (!simple) {
855 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
856 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
857 }
858 }
859
860 static av_always_inline
861 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
862 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
863 int simple, int xchg)
864 {
865 uint8_t *top_border_m1 = top_border-32; // for TL prediction
866 src_y -= linesize;
867 src_cb -= uvlinesize;
868 src_cr -= uvlinesize;
869
870 #define XCHG(a,b,xchg) do { \
871 if (xchg) AV_SWAP64(b,a); \
872 else AV_COPY64(b,a); \
873 } while (0)
874
875 XCHG(top_border_m1+8, src_y-8, xchg);
876 XCHG(top_border, src_y, xchg);
877 XCHG(top_border+8, src_y+8, 1);
878 if (mb_x < mb_width-1)
879 XCHG(top_border+32, src_y+16, 1);
880
881 // only copy chroma for normal loop filter
882 // or to initialize the top row to 127
883 if (!simple || !mb_y) {
884 XCHG(top_border_m1+16, src_cb-8, xchg);
885 XCHG(top_border_m1+24, src_cr-8, xchg);
886 XCHG(top_border+16, src_cb, 1);
887 XCHG(top_border+24, src_cr, 1);
888 }
889 }
890
891 static av_always_inline
892 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
893 {
894 if (!mb_x) {
895 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
896 } else {
897 return mb_y ? mode : LEFT_DC_PRED8x8;
898 }
899 }
900
901 static av_always_inline
902 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
903 {
904 if (!mb_x) {
905 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
906 } else {
907 return mb_y ? mode : HOR_PRED8x8;
908 }
909 }
910
911 static av_always_inline
912 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
913 {
914 if (mode == DC_PRED8x8) {
915 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
916 } else {
917 return mode;
918 }
919 }
920
921 static av_always_inline
922 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
923 {
924 switch (mode) {
925 case DC_PRED8x8:
926 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
927 case VERT_PRED8x8:
928 return !mb_y ? DC_127_PRED8x8 : mode;
929 case HOR_PRED8x8:
930 return !mb_x ? DC_129_PRED8x8 : mode;
931 case PLANE_PRED8x8 /*TM*/:
932 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
933 }
934 return mode;
935 }
936
937 static av_always_inline
938 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
939 {
940 if (!mb_x) {
941 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
942 } else {
943 return mb_y ? mode : HOR_VP8_PRED;
944 }
945 }
946
947 static av_always_inline
948 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
949 {
950 switch (mode) {
951 case VERT_PRED:
952 if (!mb_x && mb_y) {
953 *copy_buf = 1;
954 return mode;
955 }
956 /* fall-through */
957 case DIAG_DOWN_LEFT_PRED:
958 case VERT_LEFT_PRED:
959 return !mb_y ? DC_127_PRED : mode;
960 case HOR_PRED:
961 if (!mb_y) {
962 *copy_buf = 1;
963 return mode;
964 }
965 /* fall-through */
966 case HOR_UP_PRED:
967 return !mb_x ? DC_129_PRED : mode;
968 case TM_VP8_PRED:
969 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
970 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
971 case DIAG_DOWN_RIGHT_PRED:
972 case VERT_RIGHT_PRED:
973 case HOR_DOWN_PRED:
974 if (!mb_y || !mb_x)
975 *copy_buf = 1;
976 return mode;
977 }
978 return mode;
979 }
980
981 static av_always_inline
982 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
983 int mb_x, int mb_y)
984 {
985 AVCodecContext *avctx = s->avctx;
986 int x, y, mode, nnz;
987 uint32_t tr;
988
989 // for the first row, we need to run xchg_mb_border to init the top edge to 127
990 // otherwise, skip it if we aren't going to deblock
991 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
992 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
993 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
994 s->filter.simple, 1);
995
996 if (mb->mode < MODE_I4x4) {
997 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
998 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
999 } else {
1000 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1001 }
1002 s->hpc.pred16x16[mode](dst[0], s->linesize);
1003 } else {
1004 uint8_t *ptr = dst[0];
1005 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
1006 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1007
1008 // all blocks on the right edge of the macroblock use bottom edge
1009 // the top macroblock for their topright edge
1010 uint8_t *tr_right = ptr - s->linesize + 16;
1011
1012 // if we're on the right edge of the frame, said edge is extended
1013 // from the top macroblock
1014 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1015 mb_x == s->mb_width-1) {
1016 tr = tr_right[-1]*0x01010101u;
1017 tr_right = (uint8_t *)&tr;
1018 }
1019
1020 if (mb->skip)
1021 AV_ZERO128(s->non_zero_count_cache);
1022
1023 for (y = 0; y < 4; y++) {
1024 uint8_t *topright = ptr + 4 - s->linesize;
1025 for (x = 0; x < 4; x++) {
1026 int copy = 0, linesize = s->linesize;
1027 uint8_t *dst = ptr+4*x;
1028 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1029
1030 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1031 topright = tr_top;
1032 } else if (x == 3)
1033 topright = tr_right;
1034
1035 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1036 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1037 if (copy) {
1038 dst = copy_dst + 12;
1039 linesize = 8;
1040 if (!(mb_y + y)) {
1041 copy_dst[3] = 127U;
1042 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1043 } else {
1044 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1045 if (!(mb_x + x)) {
1046 copy_dst[3] = 129U;
1047 } else {
1048 copy_dst[3] = ptr[4*x-s->linesize-1];
1049 }
1050 }
1051 if (!(mb_x + x)) {
1052 copy_dst[11] =
1053 copy_dst[19] =
1054 copy_dst[27] =
1055 copy_dst[35] = 129U;
1056 } else {
1057 copy_dst[11] = ptr[4*x -1];
1058 copy_dst[19] = ptr[4*x+s->linesize -1];
1059 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1060 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1061 }
1062 }
1063 } else {
1064 mode = intra4x4[x];
1065 }
1066 s->hpc.pred4x4[mode](dst, topright, linesize);
1067 if (copy) {
1068 AV_COPY32(ptr+4*x , copy_dst+12);
1069 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1070 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1071 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1072 }
1073
1074 nnz = s->non_zero_count_cache[y][x];
1075 if (nnz) {
1076 if (nnz == 1)
1077 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1078 else
1079 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1080 }
1081 topright += 4;
1082 }
1083
1084 ptr += 4*s->linesize;
1085 intra4x4 += 4;
1086 }
1087 }
1088
1089 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1090 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1091 } else {
1092 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1093 }
1094 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1095 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1096
1097 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1098 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1099 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1100 s->filter.simple, 0);
1101 }
1102
1103 static const uint8_t subpel_idx[3][8] = {
1104 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1105 // also function pointer index
1106 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1107 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1108 };
1109
1110 /**
1111 * luma MC function
1112 *
1113 * @param s VP8 decoding context
1114 * @param dst target buffer for block data at block position
1115 * @param ref reference picture buffer at origin (0, 0)
1116 * @param mv motion vector (relative to block position) to get pixel data from
1117 * @param x_off horizontal position of block from origin (0, 0)
1118 * @param y_off vertical position of block from origin (0, 0)
1119 * @param block_w width of block (16, 8 or 4)
1120 * @param block_h height of block (always same as block_w)
1121 * @param width width of src/dst plane data
1122 * @param height height of src/dst plane data
1123 * @param linesize size of a single line of plane data, including padding
1124 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1125 */
1126 static av_always_inline
1127 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
1128 int x_off, int y_off, int block_w, int block_h,
1129 int width, int height, int linesize,
1130 vp8_mc_func mc_func[3][3])
1131 {
1132 uint8_t *src = ref->data[0];
1133
1134 if (AV_RN32A(mv)) {
1135
1136 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1137 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1138
1139 x_off += mv->x >> 2;
1140 y_off += mv->y >> 2;
1141
1142 // edge emulation
1143 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1144 src += y_off * linesize + x_off;
1145 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1146 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1147 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1148 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1149 x_off - mx_idx, y_off - my_idx, width, height);
1150 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1151 }
1152 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1153 } else {
1154 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1155 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1156 }
1157 }
1158
1159 /**
1160 * chroma MC function
1161 *
1162 * @param s VP8 decoding context
1163 * @param dst1 target buffer for block data at block position (U plane)
1164 * @param dst2 target buffer for block data at block position (V plane)
1165 * @param ref reference picture buffer at origin (0, 0)
1166 * @param mv motion vector (relative to block position) to get pixel data from
1167 * @param x_off horizontal position of block from origin (0, 0)
1168 * @param y_off vertical position of block from origin (0, 0)
1169 * @param block_w width of block (16, 8 or 4)
1170 * @param block_h height of block (always same as block_w)
1171 * @param width width of src/dst plane data
1172 * @param height height of src/dst plane data
1173 * @param linesize size of a single line of plane data, including padding
1174 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1175 */
1176 static av_always_inline
1177 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1178 const VP56mv *mv, int x_off, int y_off,
1179 int block_w, int block_h, int width, int height, int linesize,
1180 vp8_mc_func mc_func[3][3])
1181 {
1182 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1183
1184 if (AV_RN32A(mv)) {
1185 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1186 int my = mv->y&7, my_idx = subpel_idx[0][my];
1187
1188 x_off += mv->x >> 3;
1189 y_off += mv->y >> 3;
1190
1191 // edge emulation
1192 src1 += y_off * linesize + x_off;
1193 src2 += y_off * linesize + x_off;
1194 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1195 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1196 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1197 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1198 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1199 x_off - mx_idx, y_off - my_idx, width, height);
1200 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1201 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1202
1203 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1204 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1205 x_off - mx_idx, y_off - my_idx, width, height);
1206 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1207 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1208 } else {
1209 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1210 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1211 }
1212 } else {
1213 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1214 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1215 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1216 }
1217 }
1218
1219 static av_always_inline
1220 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1221 AVFrame *ref_frame, int x_off, int y_off,
1222 int bx_off, int by_off,
1223 int block_w, int block_h,
1224 int width, int height, VP56mv *mv)
1225 {
1226 VP56mv uvmv = *mv;
1227
1228 /* Y */
1229 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1230 ref_frame, mv, x_off + bx_off, y_off + by_off,
1231 block_w, block_h, width, height, s->linesize,
1232 s->put_pixels_tab[block_w == 8]);
1233
1234 /* U/V */
1235 if (s->profile == 3) {
1236 uvmv.x &= ~7;
1237 uvmv.y &= ~7;
1238 }
1239 x_off >>= 1; y_off >>= 1;
1240 bx_off >>= 1; by_off >>= 1;
1241 width >>= 1; height >>= 1;
1242 block_w >>= 1; block_h >>= 1;
1243 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1244 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1245 &uvmv, x_off + bx_off, y_off + by_off,
1246 block_w, block_h, width, height, s->uvlinesize,
1247 s->put_pixels_tab[1 + (block_w == 4)]);
1248 }
1249
1250 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1251 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1252 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1253 {
1254 /* Don't prefetch refs that haven't been used very often this frame. */
1255 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1256 int x_off = mb_x << 4, y_off = mb_y << 4;
1257 int mx = (mb->mv.x>>2) + x_off + 8;
1258 int my = (mb->mv.y>>2) + y_off;
1259 uint8_t **src= s->framep[ref]->data;
1260 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1261 /* For threading, a ff_thread_await_progress here might be useful, but
1262 * it actually slows down the decoder. Since a bad prefetch doesn't
1263 * generate bad decoder output, we don't run it here. */
1264 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1265 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1266 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1267 }
1268 }
1269
1270 /**
1271 * Apply motion vectors to prediction buffer, chapter 18.
1272 */
1273 static av_always_inline
1274 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1275 int mb_x, int mb_y)
1276 {
1277 int x_off = mb_x << 4, y_off = mb_y << 4;
1278 int width = 16*s->mb_width, height = 16*s->mb_height;
1279 AVFrame *ref = s->framep[mb->ref_frame];
1280 VP56mv *bmv = mb->bmv;
1281
1282 switch (mb->partitioning) {
1283 case VP8_SPLITMVMODE_NONE:
1284 vp8_mc_part(s, dst, ref, x_off, y_off,
1285 0, 0, 16, 16, width, height, &mb->mv);
1286 break;
1287 case VP8_SPLITMVMODE_4x4: {
1288 int x, y;
1289 VP56mv uvmv;
1290
1291 /* Y */
1292 for (y = 0; y < 4; y++) {
1293 for (x = 0; x < 4; x++) {
1294 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1295 ref, &bmv[4*y + x],
1296 4*x + x_off, 4*y + y_off, 4, 4,
1297 width, height, s->linesize,
1298 s->put_pixels_tab[2]);
1299 }
1300 }
1301
1302 /* U/V */
1303 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1304 for (y = 0; y < 2; y++) {
1305 for (x = 0; x < 2; x++) {
1306 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1307 mb->bmv[ 2*y * 4 + 2*x+1].x +
1308 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1309 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1310 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1311 mb->bmv[ 2*y * 4 + 2*x+1].y +
1312 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1313 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1314 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1315 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1316 if (s->profile == 3) {
1317 uvmv.x &= ~7;
1318 uvmv.y &= ~7;
1319 }
1320 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1321 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1322 4*x + x_off, 4*y + y_off, 4, 4,
1323 width, height, s->uvlinesize,
1324 s->put_pixels_tab[2]);
1325 }
1326 }
1327 break;
1328 }
1329 case VP8_SPLITMVMODE_16x8:
1330 vp8_mc_part(s, dst, ref, x_off, y_off,
1331 0, 0, 16, 8, width, height, &bmv[0]);
1332 vp8_mc_part(s, dst, ref, x_off, y_off,
1333 0, 8, 16, 8, width, height, &bmv[1]);
1334 break;
1335 case VP8_SPLITMVMODE_8x16:
1336 vp8_mc_part(s, dst, ref, x_off, y_off,
1337 0, 0, 8, 16, width, height, &bmv[0]);
1338 vp8_mc_part(s, dst, ref, x_off, y_off,
1339 8, 0, 8, 16, width, height, &bmv[1]);
1340 break;
1341 case VP8_SPLITMVMODE_8x8:
1342 vp8_mc_part(s, dst, ref, x_off, y_off,
1343 0, 0, 8, 8, width, height, &bmv[0]);
1344 vp8_mc_part(s, dst, ref, x_off, y_off,
1345 8, 0, 8, 8, width, height, &bmv[1]);
1346 vp8_mc_part(s, dst, ref, x_off, y_off,
1347 0, 8, 8, 8, width, height, &bmv[2]);
1348 vp8_mc_part(s, dst, ref, x_off, y_off,
1349 8, 8, 8, 8, width, height, &bmv[3]);
1350 break;
1351 }
1352 }
1353
1354 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1355 {
1356 int x, y, ch;
1357
1358 if (mb->mode != MODE_I4x4) {
1359 uint8_t *y_dst = dst[0];
1360 for (y = 0; y < 4; y++) {
1361 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1362 if (nnz4) {
1363 if (nnz4&~0x01010101) {
1364 for (x = 0; x < 4; x++) {
1365 if ((uint8_t)nnz4 == 1)
1366 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1367 else if((uint8_t)nnz4 > 1)
1368 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1369 nnz4 >>= 8;
1370 if (!nnz4)
1371 break;
1372 }
1373 } else {
1374 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1375 }
1376 }
1377 y_dst += 4*s->linesize;
1378 }
1379 }
1380
1381 for (ch = 0; ch < 2; ch++) {
1382 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1383 if (nnz4) {
1384 uint8_t *ch_dst = dst[1+ch];
1385 if (nnz4&~0x01010101) {
1386 for (y = 0; y < 2; y++) {
1387 for (x = 0; x < 2; x++) {
1388 if ((uint8_t)nnz4 == 1)
1389 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1390 else if((uint8_t)nnz4 > 1)
1391 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1392 nnz4 >>= 8;
1393 if (!nnz4)
1394 goto chroma_idct_end;
1395 }
1396 ch_dst += 4*s->uvlinesize;
1397 }
1398 } else {
1399 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1400 }
1401 }
1402 chroma_idct_end: ;
1403 }
1404 }
1405
1406 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1407 {
1408 int interior_limit, filter_level;
1409
1410 if (s->segmentation.enabled) {
1411 filter_level = s->segmentation.filter_level[s->segment];
1412 if (!s->segmentation.absolute_vals)
1413 filter_level += s->filter.level;
1414 } else
1415 filter_level = s->filter.level;
1416
1417 if (s->lf_delta.enabled) {
1418 filter_level += s->lf_delta.ref[mb->ref_frame];
1419 filter_level += s->lf_delta.mode[mb->mode];
1420 }
1421
1422 filter_level = av_clip_uintp2(filter_level, 6);
1423
1424 interior_limit = filter_level;
1425 if (s->filter.sharpness) {
1426 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1427 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1428 }
1429 interior_limit = FFMAX(interior_limit, 1);
1430
1431 f->filter_level = filter_level;
1432 f->inner_limit = interior_limit;
1433 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1434 }
1435
1436 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1437 {
1438 int mbedge_lim, bedge_lim, hev_thresh;
1439 int filter_level = f->filter_level;
1440 int inner_limit = f->inner_limit;
1441 int inner_filter = f->inner_filter;
1442 int linesize = s->linesize;
1443 int uvlinesize = s->uvlinesize;
1444 static const uint8_t hev_thresh_lut[2][64] = {
1445 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1446 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1447 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1448 3, 3, 3, 3 },
1449 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1450 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1451 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1452 2, 2, 2, 2 }
1453 };
1454
1455 if (!filter_level)
1456 return;
1457
1458 bedge_lim = 2*filter_level + inner_limit;
1459 mbedge_lim = bedge_lim + 4;
1460
1461 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1462
1463 if (mb_x) {
1464 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1465 mbedge_lim, inner_limit, hev_thresh);
1466 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1467 mbedge_lim, inner_limit, hev_thresh);
1468 }
1469
1470 if (inner_filter) {
1471 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1472 inner_limit, hev_thresh);
1473 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1474 inner_limit, hev_thresh);
1475 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1476 inner_limit, hev_thresh);
1477 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1478 uvlinesize, bedge_lim,
1479 inner_limit, hev_thresh);
1480 }
1481
1482 if (mb_y) {
1483 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1484 mbedge_lim, inner_limit, hev_thresh);
1485 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1486 mbedge_lim, inner_limit, hev_thresh);
1487 }
1488
1489 if (inner_filter) {
1490 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1491 linesize, bedge_lim,
1492 inner_limit, hev_thresh);
1493 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1494 linesize, bedge_lim,
1495 inner_limit, hev_thresh);
1496 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1497 linesize, bedge_lim,
1498 inner_limit, hev_thresh);
1499 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1500 dst[2] + 4 * uvlinesize,
1501 uvlinesize, bedge_lim,
1502 inner_limit, hev_thresh);
1503 }
1504 }
1505
1506 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1507 {
1508 int mbedge_lim, bedge_lim;
1509 int filter_level = f->filter_level;
1510 int inner_limit = f->inner_limit;
1511 int inner_filter = f->inner_filter;
1512 int linesize = s->linesize;
1513
1514 if (!filter_level)
1515 return;
1516
1517 bedge_lim = 2*filter_level + inner_limit;
1518 mbedge_lim = bedge_lim + 4;
1519
1520 if (mb_x)
1521 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1522 if (inner_filter) {
1523 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1524 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1525 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1526 }
1527
1528 if (mb_y)
1529 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1530 if (inner_filter) {
1531 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1532 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1533 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1534 }
1535 }
1536
1537 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
1538 {
1539 VP8FilterStrength *f = s->filter_strength;
1540 uint8_t *dst[3] = {
1541 curframe->data[0] + 16*mb_y*s->linesize,
1542 curframe->data[1] + 8*mb_y*s->uvlinesize,
1543 curframe->data[2] + 8*mb_y*s->uvlinesize
1544 };
1545 int mb_x;
1546
1547 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1548 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1549 filter_mb(s, dst, f++, mb_x, mb_y);
1550 dst[0] += 16;
1551 dst[1] += 8;
1552 dst[2] += 8;
1553 }
1554 }
1555
1556 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
1557 {
1558 VP8FilterStrength *f = s->filter_strength;
1559 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
1560 int mb_x;
1561
1562 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1563 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1564 filter_mb_simple(s, dst, f++, mb_x, mb_y);
1565 dst += 16;
1566 }
1567 }
1568
1569 static void release_queued_segmaps(VP8Context *s, int is_close)
1570 {
1571 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1572 while (s->num_maps_to_be_freed > leave_behind)
1573 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1574 s->maps_are_invalid = 0;
1575 }
1576
1577 #define MARGIN (16 << 2)
1578 static void vp8_decode_mb_row(AVCodecContext *avctx, AVFrame *curframe,
1579 AVFrame *prev_frame, int mb_y)
1580 {
1581 VP8Context *s = avctx->priv_data;
1582 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1583 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1584 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1585 uint8_t *dst[3] = {
1586 curframe->data[0] + 16*mb_y*s->linesize,
1587 curframe->data[1] + 8*mb_y*s->uvlinesize,
1588 curframe->data[2] + 8*mb_y*s->uvlinesize
1589 };
1590
1591 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1592 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1593 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1594
1595 // left edge of 129 for intra prediction
1596 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1597 for (i = 0; i < 3; i++)
1598 for (y = 0; y < 16>>!!i; y++)
1599 dst[i][y*curframe->linesize[i]-1] = 129;
1600 if (mb_y == 1) // top left edge is also 129
1601 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1602 }
1603
1604 s->mv_min.x = -MARGIN;
1605 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1606
1607 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1608 /* Prefetch the current frame, 4 MBs ahead */
1609 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1610 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1611
1612 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1613 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
1614
1615 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1616
1617 if (!mb->skip)
1618 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1619
1620 if (mb->mode <= MODE_I4x4)
1621 intra_predict(s, dst, mb, mb_x, mb_y);
1622 else
1623 inter_predict(s, dst, mb, mb_x, mb_y);
1624
1625 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1626
1627 if (!mb->skip) {
1628 idct_mb(s, dst, mb);
1629 } else {
1630 AV_ZERO64(s->left_nnz);
1631 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1632
1633 // Reset DC block predictors if they would exist if the mb had coefficients
1634 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1635 s->left_nnz[8] = 0;
1636 s->top_nnz[mb_x][8] = 0;
1637 }
1638 }
1639
1640 if (s->deblock_filter)
1641 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1642
1643 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1644
1645 dst[0] += 16;
1646 dst[1] += 8;
1647 dst[2] += 8;
1648 s->mv_min.x -= 64;
1649 s->mv_max.x -= 64;
1650 }
1651 if (s->deblock_filter) {
1652 if (s->filter.simple)
1653 filter_mb_row_simple(s, curframe, mb_y);
1654 else
1655 filter_mb_row(s, curframe, mb_y);
1656 }
1657 s->mv_min.y -= 64;
1658 s->mv_max.y -= 64;
1659 }
1660
1661 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1662 AVPacket *avpkt)
1663 {
1664 VP8Context *s = avctx->priv_data;
1665 int ret, mb_y, i, referenced;
1666 enum AVDiscard skip_thresh;
1667 AVFrame *av_uninit(curframe), *prev_frame;
1668
1669 release_queued_segmaps(s, 0);
1670
1671 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1672 goto err;
1673
1674 prev_frame = s->framep[VP56_FRAME_CURRENT];
1675
1676 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1677 || s->update_altref == VP56_FRAME_CURRENT;
1678
1679 skip_thresh = !referenced ? AVDISCARD_NONREF :
1680 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1681
1682 if (avctx->skip_frame >= skip_thresh) {
1683 s->invisible = 1;
1684 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1685 goto skip_decode;
1686 }
1687 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1688
1689 // release no longer referenced frames
1690 for (i = 0; i < 5; i++)
1691 if (s->frames[i].data[0] &&
1692 &s->frames[i] != prev_frame &&
1693 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1694 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1695 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1696 vp8_release_frame(s, &s->frames[i], 1, 0);
1697
1698 // find a free buffer
1699 for (i = 0; i < 5; i++)
1700 if (&s->frames[i] != prev_frame &&
1701 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1702 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1703 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1704 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1705 break;
1706 }
1707 if (i == 5) {
1708 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1709 abort();
1710 }
1711 if (curframe->data[0])
1712 vp8_release_frame(s, curframe, 1, 0);
1713
1714 // Given that arithmetic probabilities are updated every frame, it's quite likely
1715 // that the values we have on a random interframe are complete junk if we didn't
1716 // start decode on a keyframe. So just don't display anything rather than junk.
1717 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1718 !s->framep[VP56_FRAME_GOLDEN] ||
1719 !s->framep[VP56_FRAME_GOLDEN2])) {
1720 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1721 ret = AVERROR_INVALIDDATA;
1722 goto err;
1723 }
1724
1725 curframe->key_frame = s->keyframe;
1726 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1727 curframe->reference = referenced ? 3 : 0;
1728 if ((ret = vp8_alloc_frame(s, curframe))) {
1729 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1730 goto err;
1731 }
1732
1733 // check if golden and altref are swapped
1734 if (s->update_altref != VP56_FRAME_NONE) {
1735 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1736 } else {
1737 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1738 }
1739 if (s->update_golden != VP56_FRAME_NONE) {
1740 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1741 } else {
1742 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1743 }
1744 if (s->update_last) {
1745 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1746 } else {
1747 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1748 }
1749 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1750
1751 ff_thread_finish_setup(avctx);
1752
1753 s->linesize = curframe->linesize[0];
1754 s->uvlinesize = curframe->linesize[1];
1755
1756 if (!s->edge_emu_buffer)
1757 s->edge_emu_buffer = av_malloc(21*s->linesize);
1758
1759 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1760
1761 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1762 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1763
1764 // top edge of 127 for intra prediction
1765 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1766 s->top_border[0][15] = s->top_border[0][23] = 127;
1767 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1768 }
1769 memset(s->ref_count, 0, sizeof(s->ref_count));
1770 if (s->keyframe)
1771 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1772
1773 s->mv_min.y = -MARGIN;
1774 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1775
1776 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1777 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1778 ff_thread_await_progress(prev_frame, mb_y, 0);
1779
1780 vp8_decode_mb_row(avctx, curframe, prev_frame, mb_y);
1781
1782 ff_thread_report_progress(curframe, mb_y, 0);
1783 }
1784
1785 ff_thread_report_progress(curframe, INT_MAX, 0);
1786 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1787
1788 skip_decode:
1789 // if future frames don't use the updated probabilities,
1790 // reset them to the values we saved
1791 if (!s->update_probabilities)
1792 s->prob[0] = s->prob[1];
1793
1794 if (!s->invisible) {
1795 *(AVFrame*)data = *curframe;
1796 *data_size = sizeof(AVFrame);
1797 }
1798
1799 return avpkt->size;
1800 err:
1801 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1802 return ret;
1803 }
1804
1805 static av_cold int vp8_decode_init(AVCodecContext *avctx)
1806 {
1807 VP8Context *s = avctx->priv_data;
1808
1809 s->avctx = avctx;
1810 avctx->pix_fmt = PIX_FMT_YUV420P;
1811
1812 ff_dsputil_init(&s->dsp, avctx);
1813 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
1814 ff_vp8dsp_init(&s->vp8dsp);
1815
1816 return 0;
1817 }
1818
1819 static av_cold int vp8_decode_free(AVCodecContext *avctx)
1820 {
1821 vp8_decode_flush_impl(avctx, 0, 1, 1);
1822 release_queued_segmaps(avctx->priv_data, 1);
1823 return 0;
1824 }
1825
1826 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1827 {
1828 VP8Context *s = avctx->priv_data;
1829
1830 s->avctx = avctx;
1831
1832 return 0;
1833 }
1834
1835 #define REBASE(pic) \
1836 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1837
1838 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1839 {
1840 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1841
1842 if (s->macroblocks_base &&
1843 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
1844 free_buffers(s);
1845 s->maps_are_invalid = 1;
1846 s->mb_width = s_src->mb_width;
1847 s->mb_height = s_src->mb_height;
1848 }
1849
1850 s->prob[0] = s_src->prob[!s_src->update_probabilities];
1851 s->segmentation = s_src->segmentation;
1852 s->lf_delta = s_src->lf_delta;
1853 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1854
1855 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1856 s->framep[0] = REBASE(s_src->next_framep[0]);
1857 s->framep[1] = REBASE(s_src->next_framep[1]);
1858 s->framep[2] = REBASE(s_src->next_framep[2]);
1859 s->framep[3] = REBASE(s_src->next_framep[3]);
1860
1861 return 0;
1862 }
1863
1864 AVCodec ff_vp8_decoder = {
1865 .name = "vp8",
1866 .type = AVMEDIA_TYPE_VIDEO,
1867 .id = CODEC_ID_VP8,
1868 .priv_data_size = sizeof(VP8Context),
1869 .init = vp8_decode_init,
1870 .close = vp8_decode_free,
1871 .decode = vp8_decode_frame,
1872 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1873 .flush = vp8_decode_flush,
1874 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1875 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1876 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
1877 };