vp8: move data from VP8Context->VP8Macroblock
[libav.git] / libavcodec / vp8.c
1 /*
2 * VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 *
8 * This file is part of Libav.
9 *
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include "libavutil/imgutils.h"
26 #include "avcodec.h"
27 #include "internal.h"
28 #include "vp8.h"
29 #include "vp8data.h"
30 #include "rectangle.h"
31 #include "thread.h"
32
33 #if ARCH_ARM
34 # include "arm/vp8.h"
35 #endif
36
37 static void free_buffers(VP8Context *s)
38 {
39 av_freep(&s->macroblocks_base);
40 av_freep(&s->filter_strength);
41 av_freep(&s->intra4x4_pred_mode_top);
42 av_freep(&s->top_nnz);
43 av_freep(&s->edge_emu_buffer);
44 av_freep(&s->top_border);
45
46 s->macroblocks = NULL;
47 }
48
49 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
50 {
51 int ret;
52 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
53 return ret;
54 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
55 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
56 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
57 ff_thread_release_buffer(s->avctx, f);
58 return AVERROR(ENOMEM);
59 }
60 return 0;
61 }
62
63 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
64 {
65 if (f->ref_index[0]) {
66 if (prefer_delayed_free) {
67 /* Upon a size change, we want to free the maps but other threads may still
68 * be using them, so queue them. Upon a seek, all threads are inactive so
69 * we want to cache one to prevent re-allocation in the next decoding
70 * iteration, but the rest we can free directly. */
71 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
72 if (s->num_maps_to_be_freed < max_queued_maps) {
73 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
74 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
75 av_free(f->ref_index[0]);
76 } /* else: MEMLEAK (should never happen, but better that than crash) */
77 f->ref_index[0] = NULL;
78 } else /* vp8_decode_free() */ {
79 av_free(f->ref_index[0]);
80 }
81 }
82 ff_thread_release_buffer(s->avctx, f);
83 }
84
85 static void vp8_decode_flush_impl(AVCodecContext *avctx,
86 int prefer_delayed_free, int can_direct_free, int free_mem)
87 {
88 VP8Context *s = avctx->priv_data;
89 int i;
90
91 if (!avctx->internal->is_copy) {
92 for (i = 0; i < 5; i++)
93 if (s->frames[i].data[0])
94 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
95 }
96 memset(s->framep, 0, sizeof(s->framep));
97
98 if (free_mem) {
99 free_buffers(s);
100 s->maps_are_invalid = 1;
101 }
102 }
103
104 static void vp8_decode_flush(AVCodecContext *avctx)
105 {
106 vp8_decode_flush_impl(avctx, 1, 1, 0);
107 }
108
109 static int update_dimensions(VP8Context *s, int width, int height)
110 {
111 if (width != s->avctx->width ||
112 height != s->avctx->height) {
113 if (av_image_check_size(width, height, 0, s->avctx))
114 return AVERROR_INVALIDDATA;
115
116 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
117
118 avcodec_set_dimensions(s->avctx, width, height);
119 }
120
121 s->mb_width = (s->avctx->coded_width +15) / 16;
122 s->mb_height = (s->avctx->coded_height+15) / 16;
123
124 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
125 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
126 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
127 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
128 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
129
130 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
131 !s->top_nnz || !s->top_border)
132 return AVERROR(ENOMEM);
133
134 s->macroblocks = s->macroblocks_base + 1;
135
136 return 0;
137 }
138
139 static void parse_segment_info(VP8Context *s)
140 {
141 VP56RangeCoder *c = &s->c;
142 int i;
143
144 s->segmentation.update_map = vp8_rac_get(c);
145
146 if (vp8_rac_get(c)) { // update segment feature data
147 s->segmentation.absolute_vals = vp8_rac_get(c);
148
149 for (i = 0; i < 4; i++)
150 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
151
152 for (i = 0; i < 4; i++)
153 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
154 }
155 if (s->segmentation.update_map)
156 for (i = 0; i < 3; i++)
157 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
158 }
159
160 static void update_lf_deltas(VP8Context *s)
161 {
162 VP56RangeCoder *c = &s->c;
163 int i;
164
165 for (i = 0; i < 4; i++) {
166 if (vp8_rac_get(c)) {
167 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
168
169 if (vp8_rac_get(c))
170 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
171 }
172 }
173
174 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
175 if (vp8_rac_get(c)) {
176 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
177
178 if (vp8_rac_get(c))
179 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
180 }
181 }
182 }
183
184 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
185 {
186 const uint8_t *sizes = buf;
187 int i;
188
189 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
190
191 buf += 3*(s->num_coeff_partitions-1);
192 buf_size -= 3*(s->num_coeff_partitions-1);
193 if (buf_size < 0)
194 return -1;
195
196 for (i = 0; i < s->num_coeff_partitions-1; i++) {
197 int size = AV_RL24(sizes + 3*i);
198 if (buf_size - size < 0)
199 return -1;
200
201 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
202 buf += size;
203 buf_size -= size;
204 }
205 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
206
207 return 0;
208 }
209
210 static void get_quants(VP8Context *s)
211 {
212 VP56RangeCoder *c = &s->c;
213 int i, base_qi;
214
215 int yac_qi = vp8_rac_get_uint(c, 7);
216 int ydc_delta = vp8_rac_get_sint(c, 4);
217 int y2dc_delta = vp8_rac_get_sint(c, 4);
218 int y2ac_delta = vp8_rac_get_sint(c, 4);
219 int uvdc_delta = vp8_rac_get_sint(c, 4);
220 int uvac_delta = vp8_rac_get_sint(c, 4);
221
222 for (i = 0; i < 4; i++) {
223 if (s->segmentation.enabled) {
224 base_qi = s->segmentation.base_quant[i];
225 if (!s->segmentation.absolute_vals)
226 base_qi += yac_qi;
227 } else
228 base_qi = yac_qi;
229
230 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
231 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
232 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
233 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
234 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
235 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
236
237 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
238 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
239 }
240 }
241
242 /**
243 * Determine which buffers golden and altref should be updated with after this frame.
244 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
245 *
246 * Intra frames update all 3 references
247 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
248 * If the update (golden|altref) flag is set, it's updated with the current frame
249 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
250 * If the flag is not set, the number read means:
251 * 0: no update
252 * 1: VP56_FRAME_PREVIOUS
253 * 2: update golden with altref, or update altref with golden
254 */
255 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
256 {
257 VP56RangeCoder *c = &s->c;
258
259 if (update)
260 return VP56_FRAME_CURRENT;
261
262 switch (vp8_rac_get_uint(c, 2)) {
263 case 1:
264 return VP56_FRAME_PREVIOUS;
265 case 2:
266 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
267 }
268 return VP56_FRAME_NONE;
269 }
270
271 static void update_refs(VP8Context *s)
272 {
273 VP56RangeCoder *c = &s->c;
274
275 int update_golden = vp8_rac_get(c);
276 int update_altref = vp8_rac_get(c);
277
278 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
279 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
280 }
281
282 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
283 {
284 VP56RangeCoder *c = &s->c;
285 int header_size, hscale, vscale, i, j, k, l, m, ret;
286 int width = s->avctx->width;
287 int height = s->avctx->height;
288
289 s->keyframe = !(buf[0] & 1);
290 s->profile = (buf[0]>>1) & 7;
291 s->invisible = !(buf[0] & 0x10);
292 header_size = AV_RL24(buf) >> 5;
293 buf += 3;
294 buf_size -= 3;
295
296 if (s->profile > 3)
297 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
298
299 if (!s->profile)
300 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
301 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
302 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
303
304 if (header_size > buf_size - 7*s->keyframe) {
305 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
306 return AVERROR_INVALIDDATA;
307 }
308
309 if (s->keyframe) {
310 if (AV_RL24(buf) != 0x2a019d) {
311 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
312 return AVERROR_INVALIDDATA;
313 }
314 width = AV_RL16(buf+3) & 0x3fff;
315 height = AV_RL16(buf+5) & 0x3fff;
316 hscale = buf[4] >> 6;
317 vscale = buf[6] >> 6;
318 buf += 7;
319 buf_size -= 7;
320
321 if (hscale || vscale)
322 av_log_missing_feature(s->avctx, "Upscaling", 1);
323
324 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
325 for (i = 0; i < 4; i++)
326 for (j = 0; j < 16; j++)
327 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
328 sizeof(s->prob->token[i][j]));
329 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
330 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
331 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
332 memset(&s->segmentation, 0, sizeof(s->segmentation));
333 }
334
335 if (!s->macroblocks_base || /* first frame */
336 width != s->avctx->width || height != s->avctx->height) {
337 if ((ret = update_dimensions(s, width, height)) < 0)
338 return ret;
339 }
340
341 ff_vp56_init_range_decoder(c, buf, header_size);
342 buf += header_size;
343 buf_size -= header_size;
344
345 if (s->keyframe) {
346 if (vp8_rac_get(c))
347 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
348 vp8_rac_get(c); // whether we can skip clamping in dsp functions
349 }
350
351 if ((s->segmentation.enabled = vp8_rac_get(c)))
352 parse_segment_info(s);
353 else
354 s->segmentation.update_map = 0; // FIXME: move this to some init function?
355
356 s->filter.simple = vp8_rac_get(c);
357 s->filter.level = vp8_rac_get_uint(c, 6);
358 s->filter.sharpness = vp8_rac_get_uint(c, 3);
359
360 if ((s->lf_delta.enabled = vp8_rac_get(c)))
361 if (vp8_rac_get(c))
362 update_lf_deltas(s);
363
364 if (setup_partitions(s, buf, buf_size)) {
365 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
366 return AVERROR_INVALIDDATA;
367 }
368
369 get_quants(s);
370
371 if (!s->keyframe) {
372 update_refs(s);
373 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
374 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
375 }
376
377 // if we aren't saving this frame's probabilities for future frames,
378 // make a copy of the current probabilities
379 if (!(s->update_probabilities = vp8_rac_get(c)))
380 s->prob[1] = s->prob[0];
381
382 s->update_last = s->keyframe || vp8_rac_get(c);
383
384 for (i = 0; i < 4; i++)
385 for (j = 0; j < 8; j++)
386 for (k = 0; k < 3; k++)
387 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
388 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
389 int prob = vp8_rac_get_uint(c, 8);
390 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
391 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
392 }
393
394 if ((s->mbskip_enabled = vp8_rac_get(c)))
395 s->prob->mbskip = vp8_rac_get_uint(c, 8);
396
397 if (!s->keyframe) {
398 s->prob->intra = vp8_rac_get_uint(c, 8);
399 s->prob->last = vp8_rac_get_uint(c, 8);
400 s->prob->golden = vp8_rac_get_uint(c, 8);
401
402 if (vp8_rac_get(c))
403 for (i = 0; i < 4; i++)
404 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
405 if (vp8_rac_get(c))
406 for (i = 0; i < 3; i++)
407 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
408
409 // 17.2 MV probability update
410 for (i = 0; i < 2; i++)
411 for (j = 0; j < 19; j++)
412 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
413 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
414 }
415
416 return 0;
417 }
418
419 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
420 {
421 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
422 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
423 }
424
425 /**
426 * Motion vector coding, 17.1.
427 */
428 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
429 {
430 int bit, x = 0;
431
432 if (vp56_rac_get_prob_branchy(c, p[0])) {
433 int i;
434
435 for (i = 0; i < 3; i++)
436 x += vp56_rac_get_prob(c, p[9 + i]) << i;
437 for (i = 9; i > 3; i--)
438 x += vp56_rac_get_prob(c, p[9 + i]) << i;
439 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
440 x += 8;
441 } else {
442 // small_mvtree
443 const uint8_t *ps = p+2;
444 bit = vp56_rac_get_prob(c, *ps);
445 ps += 1 + 3*bit;
446 x += 4*bit;
447 bit = vp56_rac_get_prob(c, *ps);
448 ps += 1 + bit;
449 x += 2*bit;
450 x += vp56_rac_get_prob(c, *ps);
451 }
452
453 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
454 }
455
456 static av_always_inline
457 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
458 {
459 if (left == top)
460 return vp8_submv_prob[4-!!left];
461 if (!top)
462 return vp8_submv_prob[2];
463 return vp8_submv_prob[1-!!left];
464 }
465
466 /**
467 * Split motion vector prediction, 16.4.
468 * @returns the number of motion vectors parsed (2, 4 or 16)
469 */
470 static av_always_inline
471 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
472 {
473 int part_idx;
474 int n, num;
475 VP8Macroblock *top_mb = &mb[2];
476 VP8Macroblock *left_mb = &mb[-1];
477 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
478 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
479 *mbsplits_cur, *firstidx;
480 VP56mv *top_mv = top_mb->bmv;
481 VP56mv *left_mv = left_mb->bmv;
482 VP56mv *cur_mv = mb->bmv;
483
484 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
485 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
486 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
487 } else {
488 part_idx = VP8_SPLITMVMODE_8x8;
489 }
490 } else {
491 part_idx = VP8_SPLITMVMODE_4x4;
492 }
493
494 num = vp8_mbsplit_count[part_idx];
495 mbsplits_cur = vp8_mbsplits[part_idx],
496 firstidx = vp8_mbfirstidx[part_idx];
497 mb->partitioning = part_idx;
498
499 for (n = 0; n < num; n++) {
500 int k = firstidx[n];
501 uint32_t left, above;
502 const uint8_t *submv_prob;
503
504 if (!(k & 3))
505 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
506 else
507 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
508 if (k <= 3)
509 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
510 else
511 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
512
513 submv_prob = get_submv_prob(left, above);
514
515 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
516 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
517 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
518 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
519 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
520 } else {
521 AV_ZERO32(&mb->bmv[n]);
522 }
523 } else {
524 AV_WN32A(&mb->bmv[n], above);
525 }
526 } else {
527 AV_WN32A(&mb->bmv[n], left);
528 }
529 }
530
531 return num;
532 }
533
534 static av_always_inline
535 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
536 {
537 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
538 mb - 1 /* left */,
539 mb + 1 /* top-left */ };
540 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
541 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
542 int idx = CNT_ZERO;
543 int cur_sign_bias = s->sign_bias[mb->ref_frame];
544 int8_t *sign_bias = s->sign_bias;
545 VP56mv near_mv[4];
546 uint8_t cnt[4] = { 0 };
547 VP56RangeCoder *c = &s->c;
548
549 AV_ZERO32(&near_mv[0]);
550 AV_ZERO32(&near_mv[1]);
551 AV_ZERO32(&near_mv[2]);
552
553 /* Process MB on top, left and top-left */
554 #define MV_EDGE_CHECK(n)\
555 {\
556 VP8Macroblock *edge = mb_edge[n];\
557 int edge_ref = edge->ref_frame;\
558 if (edge_ref != VP56_FRAME_CURRENT) {\
559 uint32_t mv = AV_RN32A(&edge->mv);\
560 if (mv) {\
561 if (cur_sign_bias != sign_bias[edge_ref]) {\
562 /* SWAR negate of the values in mv. */\
563 mv = ~mv;\
564 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
565 }\
566 if (!n || mv != AV_RN32A(&near_mv[idx]))\
567 AV_WN32A(&near_mv[++idx], mv);\
568 cnt[idx] += 1 + (n != 2);\
569 } else\
570 cnt[CNT_ZERO] += 1 + (n != 2);\
571 }\
572 }
573
574 MV_EDGE_CHECK(0)
575 MV_EDGE_CHECK(1)
576 MV_EDGE_CHECK(2)
577
578 mb->partitioning = VP8_SPLITMVMODE_NONE;
579 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
580 mb->mode = VP8_MVMODE_MV;
581
582 /* If we have three distinct MVs, merge first and last if they're the same */
583 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
584 cnt[CNT_NEAREST] += 1;
585
586 /* Swap near and nearest if necessary */
587 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
588 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
589 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
590 }
591
592 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
593 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
594
595 /* Choose the best mv out of 0,0 and the nearest mv */
596 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
597 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
598 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
599 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
600
601 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
602 mb->mode = VP8_MVMODE_SPLIT;
603 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
604 } else {
605 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
606 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
607 mb->bmv[0] = mb->mv;
608 }
609 } else {
610 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
611 mb->bmv[0] = mb->mv;
612 }
613 } else {
614 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
615 mb->bmv[0] = mb->mv;
616 }
617 } else {
618 mb->mode = VP8_MVMODE_ZERO;
619 AV_ZERO32(&mb->mv);
620 mb->bmv[0] = mb->mv;
621 }
622 }
623
624 static av_always_inline
625 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
626 int mb_x, int keyframe)
627 {
628 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
629
630 if (keyframe) {
631 int x, y;
632 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
633 uint8_t* const left = s->intra4x4_pred_mode_left;
634 for (y = 0; y < 4; y++) {
635 for (x = 0; x < 4; x++) {
636 const uint8_t *ctx;
637 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
638 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
639 left[y] = top[x] = *intra4x4;
640 intra4x4++;
641 }
642 }
643 } else {
644 int i;
645 for (i = 0; i < 16; i++)
646 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
647 }
648 }
649
650 static av_always_inline
651 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
652 {
653 VP56RangeCoder *c = &s->c;
654
655 if (s->segmentation.update_map)
656 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
657 else if (s->segmentation.enabled)
658 *segment = ref ? *ref : *segment;
659 mb->segment = *segment;
660
661 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
662
663 if (s->keyframe) {
664 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
665
666 if (mb->mode == MODE_I4x4) {
667 decode_intra4x4_modes(s, c, mb, mb_x, 1);
668 } else {
669 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
670 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
671 AV_WN32A(s->intra4x4_pred_mode_left, modes);
672 }
673
674 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
675 mb->ref_frame = VP56_FRAME_CURRENT;
676 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
677 // inter MB, 16.2
678 if (vp56_rac_get_prob_branchy(c, s->prob->last))
679 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
680 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
681 else
682 mb->ref_frame = VP56_FRAME_PREVIOUS;
683 s->ref_count[mb->ref_frame-1]++;
684
685 // motion vectors, 16.3
686 decode_mvs(s, mb, mb_x, mb_y);
687 } else {
688 // intra MB, 16.1
689 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
690
691 if (mb->mode == MODE_I4x4)
692 decode_intra4x4_modes(s, c, mb, mb_x, 0);
693
694 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
695 mb->ref_frame = VP56_FRAME_CURRENT;
696 mb->partitioning = VP8_SPLITMVMODE_NONE;
697 AV_ZERO32(&mb->bmv[0]);
698 }
699 }
700
701 #ifndef decode_block_coeffs_internal
702 /**
703 * @param c arithmetic bitstream reader context
704 * @param block destination for block coefficients
705 * @param probs probabilities to use when reading trees from the bitstream
706 * @param i initial coeff index, 0 unless a separate DC block is coded
707 * @param qmul array holding the dc/ac dequant factor at position 0/1
708 * @return 0 if no coeffs were decoded
709 * otherwise, the index of the last coeff decoded plus one
710 */
711 static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
712 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
713 int i, uint8_t *token_prob, int16_t qmul[2])
714 {
715 VP56RangeCoder c = *r;
716 goto skip_eob;
717 do {
718 int coeff;
719 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
720 break;
721
722 skip_eob:
723 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
724 if (++i == 16)
725 break; // invalid input; blocks should end with EOB
726 token_prob = probs[i][0];
727 goto skip_eob;
728 }
729
730 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
731 coeff = 1;
732 token_prob = probs[i+1][1];
733 } else {
734 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
735 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
736 if (coeff)
737 coeff += vp56_rac_get_prob(&c, token_prob[5]);
738 coeff += 2;
739 } else {
740 // DCT_CAT*
741 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
742 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
743 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
744 } else { // DCT_CAT2
745 coeff = 7;
746 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
747 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
748 }
749 } else { // DCT_CAT3 and up
750 int a = vp56_rac_get_prob(&c, token_prob[8]);
751 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
752 int cat = (a<<1) + b;
753 coeff = 3 + (8<<cat);
754 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
755 }
756 }
757 token_prob = probs[i+1][2];
758 }
759 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
760 } while (++i < 16);
761
762 *r = c;
763 return i;
764 }
765 #endif
766
767 /**
768 * @param c arithmetic bitstream reader context
769 * @param block destination for block coefficients
770 * @param probs probabilities to use when reading trees from the bitstream
771 * @param i initial coeff index, 0 unless a separate DC block is coded
772 * @param zero_nhood the initial prediction context for number of surrounding
773 * all-zero blocks (only left/top, so 0-2)
774 * @param qmul array holding the dc/ac dequant factor at position 0/1
775 * @return 0 if no coeffs were decoded
776 * otherwise, the index of the last coeff decoded plus one
777 */
778 static av_always_inline
779 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
780 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
781 int i, int zero_nhood, int16_t qmul[2])
782 {
783 uint8_t *token_prob = probs[i][zero_nhood];
784 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
785 return 0;
786 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
787 }
788
789 static av_always_inline
790 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
791 uint8_t t_nnz[9], uint8_t l_nnz[9])
792 {
793 int i, x, y, luma_start = 0, luma_ctx = 3;
794 int nnz_pred, nnz, nnz_total = 0;
795 int segment = mb->segment;
796 int block_dc = 0;
797
798 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
799 nnz_pred = t_nnz[8] + l_nnz[8];
800
801 // decode DC values and do hadamard
802 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
803 s->qmat[segment].luma_dc_qmul);
804 l_nnz[8] = t_nnz[8] = !!nnz;
805 if (nnz) {
806 nnz_total += nnz;
807 block_dc = 1;
808 if (nnz == 1)
809 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
810 else
811 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
812 }
813 luma_start = 1;
814 luma_ctx = 0;
815 }
816
817 // luma blocks
818 for (y = 0; y < 4; y++)
819 for (x = 0; x < 4; x++) {
820 nnz_pred = l_nnz[y] + t_nnz[x];
821 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
822 nnz_pred, s->qmat[segment].luma_qmul);
823 // nnz+block_dc may be one more than the actual last index, but we don't care
824 s->non_zero_count_cache[y][x] = nnz + block_dc;
825 t_nnz[x] = l_nnz[y] = !!nnz;
826 nnz_total += nnz;
827 }
828
829 // chroma blocks
830 // TODO: what to do about dimensions? 2nd dim for luma is x,
831 // but for chroma it's (y<<1)|x
832 for (i = 4; i < 6; i++)
833 for (y = 0; y < 2; y++)
834 for (x = 0; x < 2; x++) {
835 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
836 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
837 nnz_pred, s->qmat[segment].chroma_qmul);
838 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
839 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
840 nnz_total += nnz;
841 }
842
843 // if there were no coded coeffs despite the macroblock not being marked skip,
844 // we MUST not do the inner loop filter and should not do IDCT
845 // Since skip isn't used for bitstream prediction, just manually set it.
846 if (!nnz_total)
847 mb->skip = 1;
848 }
849
850 static av_always_inline
851 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
852 int linesize, int uvlinesize, int simple)
853 {
854 AV_COPY128(top_border, src_y + 15*linesize);
855 if (!simple) {
856 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
857 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
858 }
859 }
860
861 static av_always_inline
862 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
863 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
864 int simple, int xchg)
865 {
866 uint8_t *top_border_m1 = top_border-32; // for TL prediction
867 src_y -= linesize;
868 src_cb -= uvlinesize;
869 src_cr -= uvlinesize;
870
871 #define XCHG(a,b,xchg) do { \
872 if (xchg) AV_SWAP64(b,a); \
873 else AV_COPY64(b,a); \
874 } while (0)
875
876 XCHG(top_border_m1+8, src_y-8, xchg);
877 XCHG(top_border, src_y, xchg);
878 XCHG(top_border+8, src_y+8, 1);
879 if (mb_x < mb_width-1)
880 XCHG(top_border+32, src_y+16, 1);
881
882 // only copy chroma for normal loop filter
883 // or to initialize the top row to 127
884 if (!simple || !mb_y) {
885 XCHG(top_border_m1+16, src_cb-8, xchg);
886 XCHG(top_border_m1+24, src_cr-8, xchg);
887 XCHG(top_border+16, src_cb, 1);
888 XCHG(top_border+24, src_cr, 1);
889 }
890 }
891
892 static av_always_inline
893 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
894 {
895 if (!mb_x) {
896 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
897 } else {
898 return mb_y ? mode : LEFT_DC_PRED8x8;
899 }
900 }
901
902 static av_always_inline
903 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
904 {
905 if (!mb_x) {
906 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
907 } else {
908 return mb_y ? mode : HOR_PRED8x8;
909 }
910 }
911
912 static av_always_inline
913 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
914 {
915 if (mode == DC_PRED8x8) {
916 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
917 } else {
918 return mode;
919 }
920 }
921
922 static av_always_inline
923 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
924 {
925 switch (mode) {
926 case DC_PRED8x8:
927 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
928 case VERT_PRED8x8:
929 return !mb_y ? DC_127_PRED8x8 : mode;
930 case HOR_PRED8x8:
931 return !mb_x ? DC_129_PRED8x8 : mode;
932 case PLANE_PRED8x8 /*TM*/:
933 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
934 }
935 return mode;
936 }
937
938 static av_always_inline
939 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
940 {
941 if (!mb_x) {
942 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
943 } else {
944 return mb_y ? mode : HOR_VP8_PRED;
945 }
946 }
947
948 static av_always_inline
949 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
950 {
951 switch (mode) {
952 case VERT_PRED:
953 if (!mb_x && mb_y) {
954 *copy_buf = 1;
955 return mode;
956 }
957 /* fall-through */
958 case DIAG_DOWN_LEFT_PRED:
959 case VERT_LEFT_PRED:
960 return !mb_y ? DC_127_PRED : mode;
961 case HOR_PRED:
962 if (!mb_y) {
963 *copy_buf = 1;
964 return mode;
965 }
966 /* fall-through */
967 case HOR_UP_PRED:
968 return !mb_x ? DC_129_PRED : mode;
969 case TM_VP8_PRED:
970 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
971 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
972 case DIAG_DOWN_RIGHT_PRED:
973 case VERT_RIGHT_PRED:
974 case HOR_DOWN_PRED:
975 if (!mb_y || !mb_x)
976 *copy_buf = 1;
977 return mode;
978 }
979 return mode;
980 }
981
982 static av_always_inline
983 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
984 int mb_x, int mb_y)
985 {
986 AVCodecContext *avctx = s->avctx;
987 int x, y, mode, nnz;
988 uint32_t tr;
989
990 // for the first row, we need to run xchg_mb_border to init the top edge to 127
991 // otherwise, skip it if we aren't going to deblock
992 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
993 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
994 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
995 s->filter.simple, 1);
996
997 if (mb->mode < MODE_I4x4) {
998 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
999 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1000 } else {
1001 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1002 }
1003 s->hpc.pred16x16[mode](dst[0], s->linesize);
1004 } else {
1005 uint8_t *ptr = dst[0];
1006 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1007 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1008
1009 // all blocks on the right edge of the macroblock use bottom edge
1010 // the top macroblock for their topright edge
1011 uint8_t *tr_right = ptr - s->linesize + 16;
1012
1013 // if we're on the right edge of the frame, said edge is extended
1014 // from the top macroblock
1015 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1016 mb_x == s->mb_width-1) {
1017 tr = tr_right[-1]*0x01010101u;
1018 tr_right = (uint8_t *)&tr;
1019 }
1020
1021 if (mb->skip)
1022 AV_ZERO128(s->non_zero_count_cache);
1023
1024 for (y = 0; y < 4; y++) {
1025 uint8_t *topright = ptr + 4 - s->linesize;
1026 for (x = 0; x < 4; x++) {
1027 int copy = 0, linesize = s->linesize;
1028 uint8_t *dst = ptr+4*x;
1029 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1030
1031 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1032 topright = tr_top;
1033 } else if (x == 3)
1034 topright = tr_right;
1035
1036 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1037 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1038 if (copy) {
1039 dst = copy_dst + 12;
1040 linesize = 8;
1041 if (!(mb_y + y)) {
1042 copy_dst[3] = 127U;
1043 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1044 } else {
1045 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1046 if (!(mb_x + x)) {
1047 copy_dst[3] = 129U;
1048 } else {
1049 copy_dst[3] = ptr[4*x-s->linesize-1];
1050 }
1051 }
1052 if (!(mb_x + x)) {
1053 copy_dst[11] =
1054 copy_dst[19] =
1055 copy_dst[27] =
1056 copy_dst[35] = 129U;
1057 } else {
1058 copy_dst[11] = ptr[4*x -1];
1059 copy_dst[19] = ptr[4*x+s->linesize -1];
1060 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1061 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1062 }
1063 }
1064 } else {
1065 mode = intra4x4[x];
1066 }
1067 s->hpc.pred4x4[mode](dst, topright, linesize);
1068 if (copy) {
1069 AV_COPY32(ptr+4*x , copy_dst+12);
1070 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1071 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1072 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1073 }
1074
1075 nnz = s->non_zero_count_cache[y][x];
1076 if (nnz) {
1077 if (nnz == 1)
1078 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1079 else
1080 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1081 }
1082 topright += 4;
1083 }
1084
1085 ptr += 4*s->linesize;
1086 intra4x4 += 4;
1087 }
1088 }
1089
1090 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1091 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1092 } else {
1093 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1094 }
1095 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1096 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1097
1098 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1099 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1100 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1101 s->filter.simple, 0);
1102 }
1103
1104 static const uint8_t subpel_idx[3][8] = {
1105 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1106 // also function pointer index
1107 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1108 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1109 };
1110
1111 /**
1112 * luma MC function
1113 *
1114 * @param s VP8 decoding context
1115 * @param dst target buffer for block data at block position
1116 * @param ref reference picture buffer at origin (0, 0)
1117 * @param mv motion vector (relative to block position) to get pixel data from
1118 * @param x_off horizontal position of block from origin (0, 0)
1119 * @param y_off vertical position of block from origin (0, 0)
1120 * @param block_w width of block (16, 8 or 4)
1121 * @param block_h height of block (always same as block_w)
1122 * @param width width of src/dst plane data
1123 * @param height height of src/dst plane data
1124 * @param linesize size of a single line of plane data, including padding
1125 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1126 */
1127 static av_always_inline
1128 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
1129 int x_off, int y_off, int block_w, int block_h,
1130 int width, int height, int linesize,
1131 vp8_mc_func mc_func[3][3])
1132 {
1133 uint8_t *src = ref->data[0];
1134
1135 if (AV_RN32A(mv)) {
1136
1137 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1138 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1139
1140 x_off += mv->x >> 2;
1141 y_off += mv->y >> 2;
1142
1143 // edge emulation
1144 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1145 src += y_off * linesize + x_off;
1146 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1147 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1148 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1149 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1150 x_off - mx_idx, y_off - my_idx, width, height);
1151 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1152 }
1153 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1154 } else {
1155 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1156 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1157 }
1158 }
1159
1160 /**
1161 * chroma MC function
1162 *
1163 * @param s VP8 decoding context
1164 * @param dst1 target buffer for block data at block position (U plane)
1165 * @param dst2 target buffer for block data at block position (V plane)
1166 * @param ref reference picture buffer at origin (0, 0)
1167 * @param mv motion vector (relative to block position) to get pixel data from
1168 * @param x_off horizontal position of block from origin (0, 0)
1169 * @param y_off vertical position of block from origin (0, 0)
1170 * @param block_w width of block (16, 8 or 4)
1171 * @param block_h height of block (always same as block_w)
1172 * @param width width of src/dst plane data
1173 * @param height height of src/dst plane data
1174 * @param linesize size of a single line of plane data, including padding
1175 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1176 */
1177 static av_always_inline
1178 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1179 const VP56mv *mv, int x_off, int y_off,
1180 int block_w, int block_h, int width, int height, int linesize,
1181 vp8_mc_func mc_func[3][3])
1182 {
1183 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1184
1185 if (AV_RN32A(mv)) {
1186 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1187 int my = mv->y&7, my_idx = subpel_idx[0][my];
1188
1189 x_off += mv->x >> 3;
1190 y_off += mv->y >> 3;
1191
1192 // edge emulation
1193 src1 += y_off * linesize + x_off;
1194 src2 += y_off * linesize + x_off;
1195 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1196 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1197 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1198 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1199 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1200 x_off - mx_idx, y_off - my_idx, width, height);
1201 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1202 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1203
1204 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1205 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1206 x_off - mx_idx, y_off - my_idx, width, height);
1207 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1208 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1209 } else {
1210 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1211 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1212 }
1213 } else {
1214 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1215 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1216 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1217 }
1218 }
1219
1220 static av_always_inline
1221 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1222 AVFrame *ref_frame, int x_off, int y_off,
1223 int bx_off, int by_off,
1224 int block_w, int block_h,
1225 int width, int height, VP56mv *mv)
1226 {
1227 VP56mv uvmv = *mv;
1228
1229 /* Y */
1230 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1231 ref_frame, mv, x_off + bx_off, y_off + by_off,
1232 block_w, block_h, width, height, s->linesize,
1233 s->put_pixels_tab[block_w == 8]);
1234
1235 /* U/V */
1236 if (s->profile == 3) {
1237 uvmv.x &= ~7;
1238 uvmv.y &= ~7;
1239 }
1240 x_off >>= 1; y_off >>= 1;
1241 bx_off >>= 1; by_off >>= 1;
1242 width >>= 1; height >>= 1;
1243 block_w >>= 1; block_h >>= 1;
1244 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1245 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1246 &uvmv, x_off + bx_off, y_off + by_off,
1247 block_w, block_h, width, height, s->uvlinesize,
1248 s->put_pixels_tab[1 + (block_w == 4)]);
1249 }
1250
1251 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1252 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1253 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1254 {
1255 /* Don't prefetch refs that haven't been used very often this frame. */
1256 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1257 int x_off = mb_x << 4, y_off = mb_y << 4;
1258 int mx = (mb->mv.x>>2) + x_off + 8;
1259 int my = (mb->mv.y>>2) + y_off;
1260 uint8_t **src= s->framep[ref]->data;
1261 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1262 /* For threading, a ff_thread_await_progress here might be useful, but
1263 * it actually slows down the decoder. Since a bad prefetch doesn't
1264 * generate bad decoder output, we don't run it here. */
1265 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1266 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1267 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1268 }
1269 }
1270
1271 /**
1272 * Apply motion vectors to prediction buffer, chapter 18.
1273 */
1274 static av_always_inline
1275 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1276 int mb_x, int mb_y)
1277 {
1278 int x_off = mb_x << 4, y_off = mb_y << 4;
1279 int width = 16*s->mb_width, height = 16*s->mb_height;
1280 AVFrame *ref = s->framep[mb->ref_frame];
1281 VP56mv *bmv = mb->bmv;
1282
1283 switch (mb->partitioning) {
1284 case VP8_SPLITMVMODE_NONE:
1285 vp8_mc_part(s, dst, ref, x_off, y_off,
1286 0, 0, 16, 16, width, height, &mb->mv);
1287 break;
1288 case VP8_SPLITMVMODE_4x4: {
1289 int x, y;
1290 VP56mv uvmv;
1291
1292 /* Y */
1293 for (y = 0; y < 4; y++) {
1294 for (x = 0; x < 4; x++) {
1295 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1296 ref, &bmv[4*y + x],
1297 4*x + x_off, 4*y + y_off, 4, 4,
1298 width, height, s->linesize,
1299 s->put_pixels_tab[2]);
1300 }
1301 }
1302
1303 /* U/V */
1304 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1305 for (y = 0; y < 2; y++) {
1306 for (x = 0; x < 2; x++) {
1307 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1308 mb->bmv[ 2*y * 4 + 2*x+1].x +
1309 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1310 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1311 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1312 mb->bmv[ 2*y * 4 + 2*x+1].y +
1313 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1314 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1315 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1316 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1317 if (s->profile == 3) {
1318 uvmv.x &= ~7;
1319 uvmv.y &= ~7;
1320 }
1321 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1322 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1323 4*x + x_off, 4*y + y_off, 4, 4,
1324 width, height, s->uvlinesize,
1325 s->put_pixels_tab[2]);
1326 }
1327 }
1328 break;
1329 }
1330 case VP8_SPLITMVMODE_16x8:
1331 vp8_mc_part(s, dst, ref, x_off, y_off,
1332 0, 0, 16, 8, width, height, &bmv[0]);
1333 vp8_mc_part(s, dst, ref, x_off, y_off,
1334 0, 8, 16, 8, width, height, &bmv[1]);
1335 break;
1336 case VP8_SPLITMVMODE_8x16:
1337 vp8_mc_part(s, dst, ref, x_off, y_off,
1338 0, 0, 8, 16, width, height, &bmv[0]);
1339 vp8_mc_part(s, dst, ref, x_off, y_off,
1340 8, 0, 8, 16, width, height, &bmv[1]);
1341 break;
1342 case VP8_SPLITMVMODE_8x8:
1343 vp8_mc_part(s, dst, ref, x_off, y_off,
1344 0, 0, 8, 8, width, height, &bmv[0]);
1345 vp8_mc_part(s, dst, ref, x_off, y_off,
1346 8, 0, 8, 8, width, height, &bmv[1]);
1347 vp8_mc_part(s, dst, ref, x_off, y_off,
1348 0, 8, 8, 8, width, height, &bmv[2]);
1349 vp8_mc_part(s, dst, ref, x_off, y_off,
1350 8, 8, 8, 8, width, height, &bmv[3]);
1351 break;
1352 }
1353 }
1354
1355 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1356 {
1357 int x, y, ch;
1358
1359 if (mb->mode != MODE_I4x4) {
1360 uint8_t *y_dst = dst[0];
1361 for (y = 0; y < 4; y++) {
1362 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1363 if (nnz4) {
1364 if (nnz4&~0x01010101) {
1365 for (x = 0; x < 4; x++) {
1366 if ((uint8_t)nnz4 == 1)
1367 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1368 else if((uint8_t)nnz4 > 1)
1369 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1370 nnz4 >>= 8;
1371 if (!nnz4)
1372 break;
1373 }
1374 } else {
1375 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1376 }
1377 }
1378 y_dst += 4*s->linesize;
1379 }
1380 }
1381
1382 for (ch = 0; ch < 2; ch++) {
1383 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1384 if (nnz4) {
1385 uint8_t *ch_dst = dst[1+ch];
1386 if (nnz4&~0x01010101) {
1387 for (y = 0; y < 2; y++) {
1388 for (x = 0; x < 2; x++) {
1389 if ((uint8_t)nnz4 == 1)
1390 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1391 else if((uint8_t)nnz4 > 1)
1392 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1393 nnz4 >>= 8;
1394 if (!nnz4)
1395 goto chroma_idct_end;
1396 }
1397 ch_dst += 4*s->uvlinesize;
1398 }
1399 } else {
1400 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1401 }
1402 }
1403 chroma_idct_end: ;
1404 }
1405 }
1406
1407 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1408 {
1409 int interior_limit, filter_level;
1410
1411 if (s->segmentation.enabled) {
1412 filter_level = s->segmentation.filter_level[mb->segment];
1413 if (!s->segmentation.absolute_vals)
1414 filter_level += s->filter.level;
1415 } else
1416 filter_level = s->filter.level;
1417
1418 if (s->lf_delta.enabled) {
1419 filter_level += s->lf_delta.ref[mb->ref_frame];
1420 filter_level += s->lf_delta.mode[mb->mode];
1421 }
1422
1423 filter_level = av_clip_uintp2(filter_level, 6);
1424
1425 interior_limit = filter_level;
1426 if (s->filter.sharpness) {
1427 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1428 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1429 }
1430 interior_limit = FFMAX(interior_limit, 1);
1431
1432 f->filter_level = filter_level;
1433 f->inner_limit = interior_limit;
1434 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1435 }
1436
1437 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1438 {
1439 int mbedge_lim, bedge_lim, hev_thresh;
1440 int filter_level = f->filter_level;
1441 int inner_limit = f->inner_limit;
1442 int inner_filter = f->inner_filter;
1443 int linesize = s->linesize;
1444 int uvlinesize = s->uvlinesize;
1445 static const uint8_t hev_thresh_lut[2][64] = {
1446 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1447 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1448 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1449 3, 3, 3, 3 },
1450 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1451 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1452 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1453 2, 2, 2, 2 }
1454 };
1455
1456 if (!filter_level)
1457 return;
1458
1459 bedge_lim = 2*filter_level + inner_limit;
1460 mbedge_lim = bedge_lim + 4;
1461
1462 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1463
1464 if (mb_x) {
1465 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1466 mbedge_lim, inner_limit, hev_thresh);
1467 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1468 mbedge_lim, inner_limit, hev_thresh);
1469 }
1470
1471 if (inner_filter) {
1472 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1473 inner_limit, hev_thresh);
1474 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1475 inner_limit, hev_thresh);
1476 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1477 inner_limit, hev_thresh);
1478 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1479 uvlinesize, bedge_lim,
1480 inner_limit, hev_thresh);
1481 }
1482
1483 if (mb_y) {
1484 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1485 mbedge_lim, inner_limit, hev_thresh);
1486 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1487 mbedge_lim, inner_limit, hev_thresh);
1488 }
1489
1490 if (inner_filter) {
1491 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1492 linesize, bedge_lim,
1493 inner_limit, hev_thresh);
1494 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1495 linesize, bedge_lim,
1496 inner_limit, hev_thresh);
1497 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1498 linesize, bedge_lim,
1499 inner_limit, hev_thresh);
1500 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1501 dst[2] + 4 * uvlinesize,
1502 uvlinesize, bedge_lim,
1503 inner_limit, hev_thresh);
1504 }
1505 }
1506
1507 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1508 {
1509 int mbedge_lim, bedge_lim;
1510 int filter_level = f->filter_level;
1511 int inner_limit = f->inner_limit;
1512 int inner_filter = f->inner_filter;
1513 int linesize = s->linesize;
1514
1515 if (!filter_level)
1516 return;
1517
1518 bedge_lim = 2*filter_level + inner_limit;
1519 mbedge_lim = bedge_lim + 4;
1520
1521 if (mb_x)
1522 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1523 if (inner_filter) {
1524 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1525 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1526 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1527 }
1528
1529 if (mb_y)
1530 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1531 if (inner_filter) {
1532 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1533 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1534 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1535 }
1536 }
1537
1538 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
1539 {
1540 VP8FilterStrength *f = s->filter_strength;
1541 uint8_t *dst[3] = {
1542 curframe->data[0] + 16*mb_y*s->linesize,
1543 curframe->data[1] + 8*mb_y*s->uvlinesize,
1544 curframe->data[2] + 8*mb_y*s->uvlinesize
1545 };
1546 int mb_x;
1547
1548 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1549 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1550 filter_mb(s, dst, f++, mb_x, mb_y);
1551 dst[0] += 16;
1552 dst[1] += 8;
1553 dst[2] += 8;
1554 }
1555 }
1556
1557 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
1558 {
1559 VP8FilterStrength *f = s->filter_strength;
1560 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
1561 int mb_x;
1562
1563 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1564 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1565 filter_mb_simple(s, dst, f++, mb_x, mb_y);
1566 dst += 16;
1567 }
1568 }
1569
1570 static void release_queued_segmaps(VP8Context *s, int is_close)
1571 {
1572 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1573 while (s->num_maps_to_be_freed > leave_behind)
1574 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1575 s->maps_are_invalid = 0;
1576 }
1577
1578 #define MARGIN (16 << 2)
1579 static void vp8_decode_mb_row(AVCodecContext *avctx, AVFrame *curframe,
1580 AVFrame *prev_frame, int mb_y)
1581 {
1582 VP8Context *s = avctx->priv_data;
1583 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1584 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1585 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1586 uint8_t *dst[3] = {
1587 curframe->data[0] + 16*mb_y*s->linesize,
1588 curframe->data[1] + 8*mb_y*s->uvlinesize,
1589 curframe->data[2] + 8*mb_y*s->uvlinesize
1590 };
1591
1592 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1593 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1594 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1595
1596 // left edge of 129 for intra prediction
1597 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1598 for (i = 0; i < 3; i++)
1599 for (y = 0; y < 16>>!!i; y++)
1600 dst[i][y*curframe->linesize[i]-1] = 129;
1601 if (mb_y == 1) // top left edge is also 129
1602 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1603 }
1604
1605 s->mv_min.x = -MARGIN;
1606 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1607
1608 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1609 /* Prefetch the current frame, 4 MBs ahead */
1610 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1611 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1612
1613 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1614 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
1615
1616 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1617
1618 if (!mb->skip)
1619 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1620
1621 if (mb->mode <= MODE_I4x4)
1622 intra_predict(s, dst, mb, mb_x, mb_y);
1623 else
1624 inter_predict(s, dst, mb, mb_x, mb_y);
1625
1626 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1627
1628 if (!mb->skip) {
1629 idct_mb(s, dst, mb);
1630 } else {
1631 AV_ZERO64(s->left_nnz);
1632 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1633
1634 // Reset DC block predictors if they would exist if the mb had coefficients
1635 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1636 s->left_nnz[8] = 0;
1637 s->top_nnz[mb_x][8] = 0;
1638 }
1639 }
1640
1641 if (s->deblock_filter)
1642 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1643
1644 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1645
1646 dst[0] += 16;
1647 dst[1] += 8;
1648 dst[2] += 8;
1649 s->mv_min.x -= 64;
1650 s->mv_max.x -= 64;
1651 }
1652 if (s->deblock_filter) {
1653 if (s->filter.simple)
1654 filter_mb_row_simple(s, curframe, mb_y);
1655 else
1656 filter_mb_row(s, curframe, mb_y);
1657 }
1658 s->mv_min.y -= 64;
1659 s->mv_max.y -= 64;
1660 }
1661
1662 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1663 AVPacket *avpkt)
1664 {
1665 VP8Context *s = avctx->priv_data;
1666 int ret, mb_y, i, referenced;
1667 enum AVDiscard skip_thresh;
1668 AVFrame *av_uninit(curframe), *prev_frame;
1669
1670 release_queued_segmaps(s, 0);
1671
1672 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1673 goto err;
1674
1675 prev_frame = s->framep[VP56_FRAME_CURRENT];
1676
1677 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1678 || s->update_altref == VP56_FRAME_CURRENT;
1679
1680 skip_thresh = !referenced ? AVDISCARD_NONREF :
1681 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1682
1683 if (avctx->skip_frame >= skip_thresh) {
1684 s->invisible = 1;
1685 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1686 goto skip_decode;
1687 }
1688 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1689
1690 // release no longer referenced frames
1691 for (i = 0; i < 5; i++)
1692 if (s->frames[i].data[0] &&
1693 &s->frames[i] != prev_frame &&
1694 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1695 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1696 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1697 vp8_release_frame(s, &s->frames[i], 1, 0);
1698
1699 // find a free buffer
1700 for (i = 0; i < 5; i++)
1701 if (&s->frames[i] != prev_frame &&
1702 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1703 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1704 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1705 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1706 break;
1707 }
1708 if (i == 5) {
1709 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1710 abort();
1711 }
1712 if (curframe->data[0])
1713 vp8_release_frame(s, curframe, 1, 0);
1714
1715 // Given that arithmetic probabilities are updated every frame, it's quite likely
1716 // that the values we have on a random interframe are complete junk if we didn't
1717 // start decode on a keyframe. So just don't display anything rather than junk.
1718 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1719 !s->framep[VP56_FRAME_GOLDEN] ||
1720 !s->framep[VP56_FRAME_GOLDEN2])) {
1721 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1722 ret = AVERROR_INVALIDDATA;
1723 goto err;
1724 }
1725
1726 curframe->key_frame = s->keyframe;
1727 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1728 curframe->reference = referenced ? 3 : 0;
1729 if ((ret = vp8_alloc_frame(s, curframe))) {
1730 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1731 goto err;
1732 }
1733
1734 // check if golden and altref are swapped
1735 if (s->update_altref != VP56_FRAME_NONE) {
1736 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1737 } else {
1738 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1739 }
1740 if (s->update_golden != VP56_FRAME_NONE) {
1741 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1742 } else {
1743 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1744 }
1745 if (s->update_last) {
1746 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1747 } else {
1748 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1749 }
1750 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1751
1752 ff_thread_finish_setup(avctx);
1753
1754 s->linesize = curframe->linesize[0];
1755 s->uvlinesize = curframe->linesize[1];
1756
1757 if (!s->edge_emu_buffer)
1758 s->edge_emu_buffer = av_malloc(21*s->linesize);
1759
1760 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1761
1762 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1763 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1764
1765 // top edge of 127 for intra prediction
1766 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1767 s->top_border[0][15] = s->top_border[0][23] = 127;
1768 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1769 }
1770 memset(s->ref_count, 0, sizeof(s->ref_count));
1771 if (s->keyframe)
1772 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1773
1774 s->mv_min.y = -MARGIN;
1775 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1776
1777 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1778 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1779 ff_thread_await_progress(prev_frame, mb_y, 0);
1780
1781 vp8_decode_mb_row(avctx, curframe, prev_frame, mb_y);
1782
1783 ff_thread_report_progress(curframe, mb_y, 0);
1784 }
1785
1786 ff_thread_report_progress(curframe, INT_MAX, 0);
1787 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1788
1789 skip_decode:
1790 // if future frames don't use the updated probabilities,
1791 // reset them to the values we saved
1792 if (!s->update_probabilities)
1793 s->prob[0] = s->prob[1];
1794
1795 if (!s->invisible) {
1796 *(AVFrame*)data = *curframe;
1797 *data_size = sizeof(AVFrame);
1798 }
1799
1800 return avpkt->size;
1801 err:
1802 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1803 return ret;
1804 }
1805
1806 static av_cold int vp8_decode_init(AVCodecContext *avctx)
1807 {
1808 VP8Context *s = avctx->priv_data;
1809
1810 s->avctx = avctx;
1811 avctx->pix_fmt = PIX_FMT_YUV420P;
1812
1813 ff_dsputil_init(&s->dsp, avctx);
1814 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
1815 ff_vp8dsp_init(&s->vp8dsp);
1816
1817 return 0;
1818 }
1819
1820 static av_cold int vp8_decode_free(AVCodecContext *avctx)
1821 {
1822 vp8_decode_flush_impl(avctx, 0, 1, 1);
1823 release_queued_segmaps(avctx->priv_data, 1);
1824 return 0;
1825 }
1826
1827 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1828 {
1829 VP8Context *s = avctx->priv_data;
1830
1831 s->avctx = avctx;
1832
1833 return 0;
1834 }
1835
1836 #define REBASE(pic) \
1837 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1838
1839 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1840 {
1841 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1842
1843 if (s->macroblocks_base &&
1844 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
1845 free_buffers(s);
1846 s->maps_are_invalid = 1;
1847 s->mb_width = s_src->mb_width;
1848 s->mb_height = s_src->mb_height;
1849 }
1850
1851 s->prob[0] = s_src->prob[!s_src->update_probabilities];
1852 s->segmentation = s_src->segmentation;
1853 s->lf_delta = s_src->lf_delta;
1854 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1855
1856 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1857 s->framep[0] = REBASE(s_src->next_framep[0]);
1858 s->framep[1] = REBASE(s_src->next_framep[1]);
1859 s->framep[2] = REBASE(s_src->next_framep[2]);
1860 s->framep[3] = REBASE(s_src->next_framep[3]);
1861
1862 return 0;
1863 }
1864
1865 AVCodec ff_vp8_decoder = {
1866 .name = "vp8",
1867 .type = AVMEDIA_TYPE_VIDEO,
1868 .id = CODEC_ID_VP8,
1869 .priv_data_size = sizeof(VP8Context),
1870 .init = vp8_decode_init,
1871 .close = vp8_decode_free,
1872 .decode = vp8_decode_frame,
1873 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1874 .flush = vp8_decode_flush,
1875 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1876 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1877 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
1878 };