e39fc0bf93a46e2c794607ed4c1d11543939585c
[libav.git] / libavcodec / vp8.c
1 /*
2 * VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
8 *
9 * This file is part of Libav.
10 *
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26 #include "libavutil/imgutils.h"
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "vp8.h"
30 #include "vp8data.h"
31 #include "rectangle.h"
32 #include "thread.h"
33
34 #if ARCH_ARM
35 # include "arm/vp8.h"
36 #endif
37
38 static void free_buffers(VP8Context *s)
39 {
40 int i;
41 if (s->thread_data)
42 for (i = 0; i < MAX_THREADS; i++) {
43 av_freep(&s->thread_data[i].filter_strength);
44 av_freep(&s->thread_data[i].edge_emu_buffer);
45 }
46 av_freep(&s->thread_data);
47 av_freep(&s->macroblocks_base);
48 av_freep(&s->intra4x4_pred_mode_top);
49 av_freep(&s->top_nnz);
50 av_freep(&s->top_border);
51
52 s->macroblocks = NULL;
53 }
54
55 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
56 {
57 int ret;
58 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
59 return ret;
60 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
61 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
62 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
63 ff_thread_release_buffer(s->avctx, f);
64 return AVERROR(ENOMEM);
65 }
66 return 0;
67 }
68
69 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
70 {
71 if (f->ref_index[0]) {
72 if (prefer_delayed_free) {
73 /* Upon a size change, we want to free the maps but other threads may still
74 * be using them, so queue them. Upon a seek, all threads are inactive so
75 * we want to cache one to prevent re-allocation in the next decoding
76 * iteration, but the rest we can free directly. */
77 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
78 if (s->num_maps_to_be_freed < max_queued_maps) {
79 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
80 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
81 av_free(f->ref_index[0]);
82 } /* else: MEMLEAK (should never happen, but better that than crash) */
83 f->ref_index[0] = NULL;
84 } else /* vp8_decode_free() */ {
85 av_free(f->ref_index[0]);
86 }
87 }
88 ff_thread_release_buffer(s->avctx, f);
89 }
90
91 static void vp8_decode_flush_impl(AVCodecContext *avctx,
92 int prefer_delayed_free, int can_direct_free, int free_mem)
93 {
94 VP8Context *s = avctx->priv_data;
95 int i;
96
97 if (!avctx->internal->is_copy) {
98 for (i = 0; i < 5; i++)
99 if (s->frames[i].data[0])
100 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
101 }
102 memset(s->framep, 0, sizeof(s->framep));
103
104 if (free_mem) {
105 free_buffers(s);
106 s->maps_are_invalid = 1;
107 }
108 }
109
110 static void vp8_decode_flush(AVCodecContext *avctx)
111 {
112 vp8_decode_flush_impl(avctx, 1, 1, 0);
113 }
114
115 static int update_dimensions(VP8Context *s, int width, int height)
116 {
117 AVCodecContext *avctx = s->avctx;
118 int i;
119
120 if (width != s->avctx->width ||
121 height != s->avctx->height) {
122 if (av_image_check_size(width, height, 0, s->avctx))
123 return AVERROR_INVALIDDATA;
124
125 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
126
127 avcodec_set_dimensions(s->avctx, width, height);
128 }
129
130 s->mb_width = (s->avctx->coded_width +15) / 16;
131 s->mb_height = (s->avctx->coded_height+15) / 16;
132
133 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
134 if (!s->mb_layout) { // Frame threading and one thread
135 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
136 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
137 }
138 else // Sliced threading
139 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
140 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
141 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
142 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
143
144 for (i = 0; i < MAX_THREADS; i++) {
145 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
146 pthread_mutex_init(&s->thread_data[i].lock, NULL);
147 pthread_cond_init(&s->thread_data[i].cond, NULL);
148 }
149
150 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
151 (!s->intra4x4_pred_mode_top && !s->mb_layout))
152 return AVERROR(ENOMEM);
153
154 s->macroblocks = s->macroblocks_base + 1;
155
156 return 0;
157 }
158
159 static void parse_segment_info(VP8Context *s)
160 {
161 VP56RangeCoder *c = &s->c;
162 int i;
163
164 s->segmentation.update_map = vp8_rac_get(c);
165
166 if (vp8_rac_get(c)) { // update segment feature data
167 s->segmentation.absolute_vals = vp8_rac_get(c);
168
169 for (i = 0; i < 4; i++)
170 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
171
172 for (i = 0; i < 4; i++)
173 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
174 }
175 if (s->segmentation.update_map)
176 for (i = 0; i < 3; i++)
177 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
178 }
179
180 static void update_lf_deltas(VP8Context *s)
181 {
182 VP56RangeCoder *c = &s->c;
183 int i;
184
185 for (i = 0; i < 4; i++) {
186 if (vp8_rac_get(c)) {
187 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
188
189 if (vp8_rac_get(c))
190 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
191 }
192 }
193
194 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
195 if (vp8_rac_get(c)) {
196 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
197
198 if (vp8_rac_get(c))
199 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
200 }
201 }
202 }
203
204 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
205 {
206 const uint8_t *sizes = buf;
207 int i;
208
209 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
210
211 buf += 3*(s->num_coeff_partitions-1);
212 buf_size -= 3*(s->num_coeff_partitions-1);
213 if (buf_size < 0)
214 return -1;
215
216 for (i = 0; i < s->num_coeff_partitions-1; i++) {
217 int size = AV_RL24(sizes + 3*i);
218 if (buf_size - size < 0)
219 return -1;
220
221 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
222 buf += size;
223 buf_size -= size;
224 }
225 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
226
227 return 0;
228 }
229
230 static void get_quants(VP8Context *s)
231 {
232 VP56RangeCoder *c = &s->c;
233 int i, base_qi;
234
235 int yac_qi = vp8_rac_get_uint(c, 7);
236 int ydc_delta = vp8_rac_get_sint(c, 4);
237 int y2dc_delta = vp8_rac_get_sint(c, 4);
238 int y2ac_delta = vp8_rac_get_sint(c, 4);
239 int uvdc_delta = vp8_rac_get_sint(c, 4);
240 int uvac_delta = vp8_rac_get_sint(c, 4);
241
242 for (i = 0; i < 4; i++) {
243 if (s->segmentation.enabled) {
244 base_qi = s->segmentation.base_quant[i];
245 if (!s->segmentation.absolute_vals)
246 base_qi += yac_qi;
247 } else
248 base_qi = yac_qi;
249
250 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
251 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
252 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
253 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
254 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
255 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
256
257 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
258 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
259 }
260 }
261
262 /**
263 * Determine which buffers golden and altref should be updated with after this frame.
264 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
265 *
266 * Intra frames update all 3 references
267 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
268 * If the update (golden|altref) flag is set, it's updated with the current frame
269 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
270 * If the flag is not set, the number read means:
271 * 0: no update
272 * 1: VP56_FRAME_PREVIOUS
273 * 2: update golden with altref, or update altref with golden
274 */
275 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
276 {
277 VP56RangeCoder *c = &s->c;
278
279 if (update)
280 return VP56_FRAME_CURRENT;
281
282 switch (vp8_rac_get_uint(c, 2)) {
283 case 1:
284 return VP56_FRAME_PREVIOUS;
285 case 2:
286 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
287 }
288 return VP56_FRAME_NONE;
289 }
290
291 static void update_refs(VP8Context *s)
292 {
293 VP56RangeCoder *c = &s->c;
294
295 int update_golden = vp8_rac_get(c);
296 int update_altref = vp8_rac_get(c);
297
298 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
299 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
300 }
301
302 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
303 {
304 VP56RangeCoder *c = &s->c;
305 int header_size, hscale, vscale, i, j, k, l, m, ret;
306 int width = s->avctx->width;
307 int height = s->avctx->height;
308
309 s->keyframe = !(buf[0] & 1);
310 s->profile = (buf[0]>>1) & 7;
311 s->invisible = !(buf[0] & 0x10);
312 header_size = AV_RL24(buf) >> 5;
313 buf += 3;
314 buf_size -= 3;
315
316 if (s->profile > 3)
317 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
318
319 if (!s->profile)
320 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
321 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
322 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
323
324 if (header_size > buf_size - 7*s->keyframe) {
325 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
326 return AVERROR_INVALIDDATA;
327 }
328
329 if (s->keyframe) {
330 if (AV_RL24(buf) != 0x2a019d) {
331 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
332 return AVERROR_INVALIDDATA;
333 }
334 width = AV_RL16(buf+3) & 0x3fff;
335 height = AV_RL16(buf+5) & 0x3fff;
336 hscale = buf[4] >> 6;
337 vscale = buf[6] >> 6;
338 buf += 7;
339 buf_size -= 7;
340
341 if (hscale || vscale)
342 av_log_missing_feature(s->avctx, "Upscaling", 1);
343
344 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
345 for (i = 0; i < 4; i++)
346 for (j = 0; j < 16; j++)
347 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
348 sizeof(s->prob->token[i][j]));
349 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
350 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
351 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
352 memset(&s->segmentation, 0, sizeof(s->segmentation));
353 }
354
355 ff_vp56_init_range_decoder(c, buf, header_size);
356 buf += header_size;
357 buf_size -= header_size;
358
359 if (s->keyframe) {
360 if (vp8_rac_get(c))
361 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
362 vp8_rac_get(c); // whether we can skip clamping in dsp functions
363 }
364
365 if ((s->segmentation.enabled = vp8_rac_get(c)))
366 parse_segment_info(s);
367 else
368 s->segmentation.update_map = 0; // FIXME: move this to some init function?
369
370 s->filter.simple = vp8_rac_get(c);
371 s->filter.level = vp8_rac_get_uint(c, 6);
372 s->filter.sharpness = vp8_rac_get_uint(c, 3);
373
374 if ((s->lf_delta.enabled = vp8_rac_get(c)))
375 if (vp8_rac_get(c))
376 update_lf_deltas(s);
377
378 if (setup_partitions(s, buf, buf_size)) {
379 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
380 return AVERROR_INVALIDDATA;
381 }
382
383 if (!s->macroblocks_base || /* first frame */
384 width != s->avctx->width || height != s->avctx->height) {
385 if ((ret = update_dimensions(s, width, height)) < 0)
386 return ret;
387 }
388
389 get_quants(s);
390
391 if (!s->keyframe) {
392 update_refs(s);
393 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
394 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
395 }
396
397 // if we aren't saving this frame's probabilities for future frames,
398 // make a copy of the current probabilities
399 if (!(s->update_probabilities = vp8_rac_get(c)))
400 s->prob[1] = s->prob[0];
401
402 s->update_last = s->keyframe || vp8_rac_get(c);
403
404 for (i = 0; i < 4; i++)
405 for (j = 0; j < 8; j++)
406 for (k = 0; k < 3; k++)
407 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
408 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
409 int prob = vp8_rac_get_uint(c, 8);
410 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
411 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
412 }
413
414 if ((s->mbskip_enabled = vp8_rac_get(c)))
415 s->prob->mbskip = vp8_rac_get_uint(c, 8);
416
417 if (!s->keyframe) {
418 s->prob->intra = vp8_rac_get_uint(c, 8);
419 s->prob->last = vp8_rac_get_uint(c, 8);
420 s->prob->golden = vp8_rac_get_uint(c, 8);
421
422 if (vp8_rac_get(c))
423 for (i = 0; i < 4; i++)
424 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
425 if (vp8_rac_get(c))
426 for (i = 0; i < 3; i++)
427 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
428
429 // 17.2 MV probability update
430 for (i = 0; i < 2; i++)
431 for (j = 0; j < 19; j++)
432 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
433 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
434 }
435
436 return 0;
437 }
438
439 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
440 {
441 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
442 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
443 }
444
445 /**
446 * Motion vector coding, 17.1.
447 */
448 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
449 {
450 int bit, x = 0;
451
452 if (vp56_rac_get_prob_branchy(c, p[0])) {
453 int i;
454
455 for (i = 0; i < 3; i++)
456 x += vp56_rac_get_prob(c, p[9 + i]) << i;
457 for (i = 9; i > 3; i--)
458 x += vp56_rac_get_prob(c, p[9 + i]) << i;
459 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
460 x += 8;
461 } else {
462 // small_mvtree
463 const uint8_t *ps = p+2;
464 bit = vp56_rac_get_prob(c, *ps);
465 ps += 1 + 3*bit;
466 x += 4*bit;
467 bit = vp56_rac_get_prob(c, *ps);
468 ps += 1 + bit;
469 x += 2*bit;
470 x += vp56_rac_get_prob(c, *ps);
471 }
472
473 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
474 }
475
476 static av_always_inline
477 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
478 {
479 if (left == top)
480 return vp8_submv_prob[4-!!left];
481 if (!top)
482 return vp8_submv_prob[2];
483 return vp8_submv_prob[1-!!left];
484 }
485
486 /**
487 * Split motion vector prediction, 16.4.
488 * @returns the number of motion vectors parsed (2, 4 or 16)
489 */
490 static av_always_inline
491 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
492 {
493 int part_idx;
494 int n, num;
495 VP8Macroblock *top_mb;
496 VP8Macroblock *left_mb = &mb[-1];
497 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
498 *mbsplits_top,
499 *mbsplits_cur, *firstidx;
500 VP56mv *top_mv;
501 VP56mv *left_mv = left_mb->bmv;
502 VP56mv *cur_mv = mb->bmv;
503
504 if (!layout) // layout is inlined, s->mb_layout is not
505 top_mb = &mb[2];
506 else
507 top_mb = &mb[-s->mb_width-1];
508 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
509 top_mv = top_mb->bmv;
510
511 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
512 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
513 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
514 } else {
515 part_idx = VP8_SPLITMVMODE_8x8;
516 }
517 } else {
518 part_idx = VP8_SPLITMVMODE_4x4;
519 }
520
521 num = vp8_mbsplit_count[part_idx];
522 mbsplits_cur = vp8_mbsplits[part_idx],
523 firstidx = vp8_mbfirstidx[part_idx];
524 mb->partitioning = part_idx;
525
526 for (n = 0; n < num; n++) {
527 int k = firstidx[n];
528 uint32_t left, above;
529 const uint8_t *submv_prob;
530
531 if (!(k & 3))
532 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
533 else
534 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
535 if (k <= 3)
536 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
537 else
538 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
539
540 submv_prob = get_submv_prob(left, above);
541
542 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
543 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
544 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
545 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
546 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
547 } else {
548 AV_ZERO32(&mb->bmv[n]);
549 }
550 } else {
551 AV_WN32A(&mb->bmv[n], above);
552 }
553 } else {
554 AV_WN32A(&mb->bmv[n], left);
555 }
556 }
557
558 return num;
559 }
560
561 static av_always_inline
562 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
563 {
564 VP8Macroblock *mb_edge[3] = { 0 /* top */,
565 mb - 1 /* left */,
566 0 /* top-left */ };
567 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
568 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
569 int idx = CNT_ZERO;
570 int cur_sign_bias = s->sign_bias[mb->ref_frame];
571 int8_t *sign_bias = s->sign_bias;
572 VP56mv near_mv[4];
573 uint8_t cnt[4] = { 0 };
574 VP56RangeCoder *c = &s->c;
575
576 if (!layout) { // layout is inlined (s->mb_layout is not)
577 mb_edge[0] = mb + 2;
578 mb_edge[2] = mb + 1;
579 }
580 else {
581 mb_edge[0] = mb - s->mb_width-1;
582 mb_edge[2] = mb - s->mb_width-2;
583 }
584
585 AV_ZERO32(&near_mv[0]);
586 AV_ZERO32(&near_mv[1]);
587 AV_ZERO32(&near_mv[2]);
588
589 /* Process MB on top, left and top-left */
590 #define MV_EDGE_CHECK(n)\
591 {\
592 VP8Macroblock *edge = mb_edge[n];\
593 int edge_ref = edge->ref_frame;\
594 if (edge_ref != VP56_FRAME_CURRENT) {\
595 uint32_t mv = AV_RN32A(&edge->mv);\
596 if (mv) {\
597 if (cur_sign_bias != sign_bias[edge_ref]) {\
598 /* SWAR negate of the values in mv. */\
599 mv = ~mv;\
600 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
601 }\
602 if (!n || mv != AV_RN32A(&near_mv[idx]))\
603 AV_WN32A(&near_mv[++idx], mv);\
604 cnt[idx] += 1 + (n != 2);\
605 } else\
606 cnt[CNT_ZERO] += 1 + (n != 2);\
607 }\
608 }
609
610 MV_EDGE_CHECK(0)
611 MV_EDGE_CHECK(1)
612 MV_EDGE_CHECK(2)
613
614 mb->partitioning = VP8_SPLITMVMODE_NONE;
615 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
616 mb->mode = VP8_MVMODE_MV;
617
618 /* If we have three distinct MVs, merge first and last if they're the same */
619 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
620 cnt[CNT_NEAREST] += 1;
621
622 /* Swap near and nearest if necessary */
623 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
624 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
625 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
626 }
627
628 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
629 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
630
631 /* Choose the best mv out of 0,0 and the nearest mv */
632 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
633 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
634 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
635 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
636
637 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
638 mb->mode = VP8_MVMODE_SPLIT;
639 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
640 } else {
641 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
642 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
643 mb->bmv[0] = mb->mv;
644 }
645 } else {
646 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
647 mb->bmv[0] = mb->mv;
648 }
649 } else {
650 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
651 mb->bmv[0] = mb->mv;
652 }
653 } else {
654 mb->mode = VP8_MVMODE_ZERO;
655 AV_ZERO32(&mb->mv);
656 mb->bmv[0] = mb->mv;
657 }
658 }
659
660 static av_always_inline
661 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
662 int mb_x, int keyframe, int layout)
663 {
664 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
665
666 if (layout == 1) {
667 VP8Macroblock *mb_top = mb - s->mb_width - 1;
668 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
669 }
670 if (keyframe) {
671 int x, y;
672 uint8_t* top;
673 uint8_t* const left = s->intra4x4_pred_mode_left;
674 if (layout == 1)
675 top = mb->intra4x4_pred_mode_top;
676 else
677 top = s->intra4x4_pred_mode_top + 4 * mb_x;
678 for (y = 0; y < 4; y++) {
679 for (x = 0; x < 4; x++) {
680 const uint8_t *ctx;
681 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
682 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
683 left[y] = top[x] = *intra4x4;
684 intra4x4++;
685 }
686 }
687 } else {
688 int i;
689 for (i = 0; i < 16; i++)
690 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
691 }
692 }
693
694 static av_always_inline
695 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
696 uint8_t *segment, uint8_t *ref, int layout)
697 {
698 VP56RangeCoder *c = &s->c;
699
700 if (s->segmentation.update_map)
701 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
702 else if (s->segmentation.enabled)
703 *segment = ref ? *ref : *segment;
704 mb->segment = *segment;
705
706 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
707
708 if (s->keyframe) {
709 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
710
711 if (mb->mode == MODE_I4x4) {
712 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
713 } else {
714 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
715 if (s->mb_layout == 1)
716 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
717 else
718 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
719 AV_WN32A( s->intra4x4_pred_mode_left, modes);
720 }
721
722 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
723 mb->ref_frame = VP56_FRAME_CURRENT;
724 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
725 // inter MB, 16.2
726 if (vp56_rac_get_prob_branchy(c, s->prob->last))
727 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
728 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
729 else
730 mb->ref_frame = VP56_FRAME_PREVIOUS;
731 s->ref_count[mb->ref_frame-1]++;
732
733 // motion vectors, 16.3
734 decode_mvs(s, mb, mb_x, mb_y, layout);
735 } else {
736 // intra MB, 16.1
737 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
738
739 if (mb->mode == MODE_I4x4)
740 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
741
742 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
743 mb->ref_frame = VP56_FRAME_CURRENT;
744 mb->partitioning = VP8_SPLITMVMODE_NONE;
745 AV_ZERO32(&mb->bmv[0]);
746 }
747 }
748
749 #ifndef decode_block_coeffs_internal
750 /**
751 * @param c arithmetic bitstream reader context
752 * @param block destination for block coefficients
753 * @param probs probabilities to use when reading trees from the bitstream
754 * @param i initial coeff index, 0 unless a separate DC block is coded
755 * @param qmul array holding the dc/ac dequant factor at position 0/1
756 * @return 0 if no coeffs were decoded
757 * otherwise, the index of the last coeff decoded plus one
758 */
759 static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
760 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
761 int i, uint8_t *token_prob, int16_t qmul[2])
762 {
763 VP56RangeCoder c = *r;
764 goto skip_eob;
765 do {
766 int coeff;
767 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
768 break;
769
770 skip_eob:
771 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
772 if (++i == 16)
773 break; // invalid input; blocks should end with EOB
774 token_prob = probs[i][0];
775 goto skip_eob;
776 }
777
778 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
779 coeff = 1;
780 token_prob = probs[i+1][1];
781 } else {
782 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
783 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
784 if (coeff)
785 coeff += vp56_rac_get_prob(&c, token_prob[5]);
786 coeff += 2;
787 } else {
788 // DCT_CAT*
789 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
790 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
791 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
792 } else { // DCT_CAT2
793 coeff = 7;
794 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
795 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
796 }
797 } else { // DCT_CAT3 and up
798 int a = vp56_rac_get_prob(&c, token_prob[8]);
799 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
800 int cat = (a<<1) + b;
801 coeff = 3 + (8<<cat);
802 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
803 }
804 }
805 token_prob = probs[i+1][2];
806 }
807 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
808 } while (++i < 16);
809
810 *r = c;
811 return i;
812 }
813 #endif
814
815 /**
816 * @param c arithmetic bitstream reader context
817 * @param block destination for block coefficients
818 * @param probs probabilities to use when reading trees from the bitstream
819 * @param i initial coeff index, 0 unless a separate DC block is coded
820 * @param zero_nhood the initial prediction context for number of surrounding
821 * all-zero blocks (only left/top, so 0-2)
822 * @param qmul array holding the dc/ac dequant factor at position 0/1
823 * @return 0 if no coeffs were decoded
824 * otherwise, the index of the last coeff decoded plus one
825 */
826 static av_always_inline
827 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
828 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
829 int i, int zero_nhood, int16_t qmul[2])
830 {
831 uint8_t *token_prob = probs[i][zero_nhood];
832 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
833 return 0;
834 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
835 }
836
837 static av_always_inline
838 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
839 uint8_t t_nnz[9], uint8_t l_nnz[9])
840 {
841 int i, x, y, luma_start = 0, luma_ctx = 3;
842 int nnz_pred, nnz, nnz_total = 0;
843 int segment = mb->segment;
844 int block_dc = 0;
845
846 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
847 nnz_pred = t_nnz[8] + l_nnz[8];
848
849 // decode DC values and do hadamard
850 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
851 s->qmat[segment].luma_dc_qmul);
852 l_nnz[8] = t_nnz[8] = !!nnz;
853 if (nnz) {
854 nnz_total += nnz;
855 block_dc = 1;
856 if (nnz == 1)
857 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
858 else
859 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
860 }
861 luma_start = 1;
862 luma_ctx = 0;
863 }
864
865 // luma blocks
866 for (y = 0; y < 4; y++)
867 for (x = 0; x < 4; x++) {
868 nnz_pred = l_nnz[y] + t_nnz[x];
869 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
870 nnz_pred, s->qmat[segment].luma_qmul);
871 // nnz+block_dc may be one more than the actual last index, but we don't care
872 td->non_zero_count_cache[y][x] = nnz + block_dc;
873 t_nnz[x] = l_nnz[y] = !!nnz;
874 nnz_total += nnz;
875 }
876
877 // chroma blocks
878 // TODO: what to do about dimensions? 2nd dim for luma is x,
879 // but for chroma it's (y<<1)|x
880 for (i = 4; i < 6; i++)
881 for (y = 0; y < 2; y++)
882 for (x = 0; x < 2; x++) {
883 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
884 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
885 nnz_pred, s->qmat[segment].chroma_qmul);
886 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
887 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
888 nnz_total += nnz;
889 }
890
891 // if there were no coded coeffs despite the macroblock not being marked skip,
892 // we MUST not do the inner loop filter and should not do IDCT
893 // Since skip isn't used for bitstream prediction, just manually set it.
894 if (!nnz_total)
895 mb->skip = 1;
896 }
897
898 static av_always_inline
899 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
900 int linesize, int uvlinesize, int simple)
901 {
902 AV_COPY128(top_border, src_y + 15*linesize);
903 if (!simple) {
904 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
905 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
906 }
907 }
908
909 static av_always_inline
910 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
911 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
912 int simple, int xchg)
913 {
914 uint8_t *top_border_m1 = top_border-32; // for TL prediction
915 src_y -= linesize;
916 src_cb -= uvlinesize;
917 src_cr -= uvlinesize;
918
919 #define XCHG(a,b,xchg) do { \
920 if (xchg) AV_SWAP64(b,a); \
921 else AV_COPY64(b,a); \
922 } while (0)
923
924 XCHG(top_border_m1+8, src_y-8, xchg);
925 XCHG(top_border, src_y, xchg);
926 XCHG(top_border+8, src_y+8, 1);
927 if (mb_x < mb_width-1)
928 XCHG(top_border+32, src_y+16, 1);
929
930 // only copy chroma for normal loop filter
931 // or to initialize the top row to 127
932 if (!simple || !mb_y) {
933 XCHG(top_border_m1+16, src_cb-8, xchg);
934 XCHG(top_border_m1+24, src_cr-8, xchg);
935 XCHG(top_border+16, src_cb, 1);
936 XCHG(top_border+24, src_cr, 1);
937 }
938 }
939
940 static av_always_inline
941 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
942 {
943 if (!mb_x) {
944 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
945 } else {
946 return mb_y ? mode : LEFT_DC_PRED8x8;
947 }
948 }
949
950 static av_always_inline
951 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
952 {
953 if (!mb_x) {
954 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
955 } else {
956 return mb_y ? mode : HOR_PRED8x8;
957 }
958 }
959
960 static av_always_inline
961 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
962 {
963 if (mode == DC_PRED8x8) {
964 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
965 } else {
966 return mode;
967 }
968 }
969
970 static av_always_inline
971 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
972 {
973 switch (mode) {
974 case DC_PRED8x8:
975 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
976 case VERT_PRED8x8:
977 return !mb_y ? DC_127_PRED8x8 : mode;
978 case HOR_PRED8x8:
979 return !mb_x ? DC_129_PRED8x8 : mode;
980 case PLANE_PRED8x8 /*TM*/:
981 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
982 }
983 return mode;
984 }
985
986 static av_always_inline
987 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
988 {
989 if (!mb_x) {
990 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
991 } else {
992 return mb_y ? mode : HOR_VP8_PRED;
993 }
994 }
995
996 static av_always_inline
997 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
998 {
999 switch (mode) {
1000 case VERT_PRED:
1001 if (!mb_x && mb_y) {
1002 *copy_buf = 1;
1003 return mode;
1004 }
1005 /* fall-through */
1006 case DIAG_DOWN_LEFT_PRED:
1007 case VERT_LEFT_PRED:
1008 return !mb_y ? DC_127_PRED : mode;
1009 case HOR_PRED:
1010 if (!mb_y) {
1011 *copy_buf = 1;
1012 return mode;
1013 }
1014 /* fall-through */
1015 case HOR_UP_PRED:
1016 return !mb_x ? DC_129_PRED : mode;
1017 case TM_VP8_PRED:
1018 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1019 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1020 case DIAG_DOWN_RIGHT_PRED:
1021 case VERT_RIGHT_PRED:
1022 case HOR_DOWN_PRED:
1023 if (!mb_y || !mb_x)
1024 *copy_buf = 1;
1025 return mode;
1026 }
1027 return mode;
1028 }
1029
1030 static av_always_inline
1031 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1032 VP8Macroblock *mb, int mb_x, int mb_y)
1033 {
1034 AVCodecContext *avctx = s->avctx;
1035 int x, y, mode, nnz;
1036 uint32_t tr;
1037
1038 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1039 // otherwise, skip it if we aren't going to deblock
1040 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1041 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1042 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1043 s->filter.simple, 1);
1044
1045 if (mb->mode < MODE_I4x4) {
1046 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1047 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1048 } else {
1049 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1050 }
1051 s->hpc.pred16x16[mode](dst[0], s->linesize);
1052 } else {
1053 uint8_t *ptr = dst[0];
1054 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1055 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1056
1057 // all blocks on the right edge of the macroblock use bottom edge
1058 // the top macroblock for their topright edge
1059 uint8_t *tr_right = ptr - s->linesize + 16;
1060
1061 // if we're on the right edge of the frame, said edge is extended
1062 // from the top macroblock
1063 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1064 mb_x == s->mb_width-1) {
1065 tr = tr_right[-1]*0x01010101u;
1066 tr_right = (uint8_t *)&tr;
1067 }
1068
1069 if (mb->skip)
1070 AV_ZERO128(td->non_zero_count_cache);
1071
1072 for (y = 0; y < 4; y++) {
1073 uint8_t *topright = ptr + 4 - s->linesize;
1074 for (x = 0; x < 4; x++) {
1075 int copy = 0, linesize = s->linesize;
1076 uint8_t *dst = ptr+4*x;
1077 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1078
1079 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1080 topright = tr_top;
1081 } else if (x == 3)
1082 topright = tr_right;
1083
1084 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1085 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1086 if (copy) {
1087 dst = copy_dst + 12;
1088 linesize = 8;
1089 if (!(mb_y + y)) {
1090 copy_dst[3] = 127U;
1091 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1092 } else {
1093 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1094 if (!(mb_x + x)) {
1095 copy_dst[3] = 129U;
1096 } else {
1097 copy_dst[3] = ptr[4*x-s->linesize-1];
1098 }
1099 }
1100 if (!(mb_x + x)) {
1101 copy_dst[11] =
1102 copy_dst[19] =
1103 copy_dst[27] =
1104 copy_dst[35] = 129U;
1105 } else {
1106 copy_dst[11] = ptr[4*x -1];
1107 copy_dst[19] = ptr[4*x+s->linesize -1];
1108 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1109 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1110 }
1111 }
1112 } else {
1113 mode = intra4x4[x];
1114 }
1115 s->hpc.pred4x4[mode](dst, topright, linesize);
1116 if (copy) {
1117 AV_COPY32(ptr+4*x , copy_dst+12);
1118 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1119 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1120 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1121 }
1122
1123 nnz = td->non_zero_count_cache[y][x];
1124 if (nnz) {
1125 if (nnz == 1)
1126 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1127 else
1128 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1129 }
1130 topright += 4;
1131 }
1132
1133 ptr += 4*s->linesize;
1134 intra4x4 += 4;
1135 }
1136 }
1137
1138 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1139 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1140 } else {
1141 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1142 }
1143 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1144 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1145
1146 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1147 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1148 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1149 s->filter.simple, 0);
1150 }
1151
1152 static const uint8_t subpel_idx[3][8] = {
1153 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1154 // also function pointer index
1155 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1156 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1157 };
1158
1159 /**
1160 * luma MC function
1161 *
1162 * @param s VP8 decoding context
1163 * @param dst target buffer for block data at block position
1164 * @param ref reference picture buffer at origin (0, 0)
1165 * @param mv motion vector (relative to block position) to get pixel data from
1166 * @param x_off horizontal position of block from origin (0, 0)
1167 * @param y_off vertical position of block from origin (0, 0)
1168 * @param block_w width of block (16, 8 or 4)
1169 * @param block_h height of block (always same as block_w)
1170 * @param width width of src/dst plane data
1171 * @param height height of src/dst plane data
1172 * @param linesize size of a single line of plane data, including padding
1173 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1174 */
1175 static av_always_inline
1176 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1177 AVFrame *ref, const VP56mv *mv,
1178 int x_off, int y_off, int block_w, int block_h,
1179 int width, int height, int linesize,
1180 vp8_mc_func mc_func[3][3])
1181 {
1182 uint8_t *src = ref->data[0];
1183
1184 if (AV_RN32A(mv)) {
1185
1186 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1187 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1188
1189 x_off += mv->x >> 2;
1190 y_off += mv->y >> 2;
1191
1192 // edge emulation
1193 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1194 src += y_off * linesize + x_off;
1195 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1196 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1197 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1198 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1199 x_off - mx_idx, y_off - my_idx, width, height);
1200 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1201 }
1202 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1203 } else {
1204 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1205 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1206 }
1207 }
1208
1209 /**
1210 * chroma MC function
1211 *
1212 * @param s VP8 decoding context
1213 * @param dst1 target buffer for block data at block position (U plane)
1214 * @param dst2 target buffer for block data at block position (V plane)
1215 * @param ref reference picture buffer at origin (0, 0)
1216 * @param mv motion vector (relative to block position) to get pixel data from
1217 * @param x_off horizontal position of block from origin (0, 0)
1218 * @param y_off vertical position of block from origin (0, 0)
1219 * @param block_w width of block (16, 8 or 4)
1220 * @param block_h height of block (always same as block_w)
1221 * @param width width of src/dst plane data
1222 * @param height height of src/dst plane data
1223 * @param linesize size of a single line of plane data, including padding
1224 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1225 */
1226 static av_always_inline
1227 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1228 AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
1229 int block_w, int block_h, int width, int height, int linesize,
1230 vp8_mc_func mc_func[3][3])
1231 {
1232 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1233
1234 if (AV_RN32A(mv)) {
1235 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1236 int my = mv->y&7, my_idx = subpel_idx[0][my];
1237
1238 x_off += mv->x >> 3;
1239 y_off += mv->y >> 3;
1240
1241 // edge emulation
1242 src1 += y_off * linesize + x_off;
1243 src2 += y_off * linesize + x_off;
1244 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1245 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1246 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1247 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1248 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1249 x_off - mx_idx, y_off - my_idx, width, height);
1250 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1251 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1252
1253 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1254 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1255 x_off - mx_idx, y_off - my_idx, width, height);
1256 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1257 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1258 } else {
1259 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1260 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1261 }
1262 } else {
1263 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1264 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1265 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1266 }
1267 }
1268
1269 static av_always_inline
1270 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1271 AVFrame *ref_frame, int x_off, int y_off,
1272 int bx_off, int by_off,
1273 int block_w, int block_h,
1274 int width, int height, VP56mv *mv)
1275 {
1276 VP56mv uvmv = *mv;
1277
1278 /* Y */
1279 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1280 ref_frame, mv, x_off + bx_off, y_off + by_off,
1281 block_w, block_h, width, height, s->linesize,
1282 s->put_pixels_tab[block_w == 8]);
1283
1284 /* U/V */
1285 if (s->profile == 3) {
1286 uvmv.x &= ~7;
1287 uvmv.y &= ~7;
1288 }
1289 x_off >>= 1; y_off >>= 1;
1290 bx_off >>= 1; by_off >>= 1;
1291 width >>= 1; height >>= 1;
1292 block_w >>= 1; block_h >>= 1;
1293 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1294 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1295 &uvmv, x_off + bx_off, y_off + by_off,
1296 block_w, block_h, width, height, s->uvlinesize,
1297 s->put_pixels_tab[1 + (block_w == 4)]);
1298 }
1299
1300 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1301 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1302 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1303 {
1304 /* Don't prefetch refs that haven't been used very often this frame. */
1305 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1306 int x_off = mb_x << 4, y_off = mb_y << 4;
1307 int mx = (mb->mv.x>>2) + x_off + 8;
1308 int my = (mb->mv.y>>2) + y_off;
1309 uint8_t **src= s->framep[ref]->data;
1310 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1311 /* For threading, a ff_thread_await_progress here might be useful, but
1312 * it actually slows down the decoder. Since a bad prefetch doesn't
1313 * generate bad decoder output, we don't run it here. */
1314 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1315 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1316 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1317 }
1318 }
1319
1320 /**
1321 * Apply motion vectors to prediction buffer, chapter 18.
1322 */
1323 static av_always_inline
1324 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1325 VP8Macroblock *mb, int mb_x, int mb_y)
1326 {
1327 int x_off = mb_x << 4, y_off = mb_y << 4;
1328 int width = 16*s->mb_width, height = 16*s->mb_height;
1329 AVFrame *ref = s->framep[mb->ref_frame];
1330 VP56mv *bmv = mb->bmv;
1331
1332 switch (mb->partitioning) {
1333 case VP8_SPLITMVMODE_NONE:
1334 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1335 0, 0, 16, 16, width, height, &mb->mv);
1336 break;
1337 case VP8_SPLITMVMODE_4x4: {
1338 int x, y;
1339 VP56mv uvmv;
1340
1341 /* Y */
1342 for (y = 0; y < 4; y++) {
1343 for (x = 0; x < 4; x++) {
1344 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1345 ref, &bmv[4*y + x],
1346 4*x + x_off, 4*y + y_off, 4, 4,
1347 width, height, s->linesize,
1348 s->put_pixels_tab[2]);
1349 }
1350 }
1351
1352 /* U/V */
1353 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1354 for (y = 0; y < 2; y++) {
1355 for (x = 0; x < 2; x++) {
1356 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1357 mb->bmv[ 2*y * 4 + 2*x+1].x +
1358 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1359 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1360 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1361 mb->bmv[ 2*y * 4 + 2*x+1].y +
1362 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1363 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1364 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1365 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1366 if (s->profile == 3) {
1367 uvmv.x &= ~7;
1368 uvmv.y &= ~7;
1369 }
1370 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1371 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1372 4*x + x_off, 4*y + y_off, 4, 4,
1373 width, height, s->uvlinesize,
1374 s->put_pixels_tab[2]);
1375 }
1376 }
1377 break;
1378 }
1379 case VP8_SPLITMVMODE_16x8:
1380 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1381 0, 0, 16, 8, width, height, &bmv[0]);
1382 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1383 0, 8, 16, 8, width, height, &bmv[1]);
1384 break;
1385 case VP8_SPLITMVMODE_8x16:
1386 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1387 0, 0, 8, 16, width, height, &bmv[0]);
1388 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1389 8, 0, 8, 16, width, height, &bmv[1]);
1390 break;
1391 case VP8_SPLITMVMODE_8x8:
1392 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1393 0, 0, 8, 8, width, height, &bmv[0]);
1394 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1395 8, 0, 8, 8, width, height, &bmv[1]);
1396 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1397 0, 8, 8, 8, width, height, &bmv[2]);
1398 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1399 8, 8, 8, 8, width, height, &bmv[3]);
1400 break;
1401 }
1402 }
1403
1404 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1405 uint8_t *dst[3], VP8Macroblock *mb)
1406 {
1407 int x, y, ch;
1408
1409 if (mb->mode != MODE_I4x4) {
1410 uint8_t *y_dst = dst[0];
1411 for (y = 0; y < 4; y++) {
1412 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1413 if (nnz4) {
1414 if (nnz4&~0x01010101) {
1415 for (x = 0; x < 4; x++) {
1416 if ((uint8_t)nnz4 == 1)
1417 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1418 else if((uint8_t)nnz4 > 1)
1419 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1420 nnz4 >>= 8;
1421 if (!nnz4)
1422 break;
1423 }
1424 } else {
1425 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1426 }
1427 }
1428 y_dst += 4*s->linesize;
1429 }
1430 }
1431
1432 for (ch = 0; ch < 2; ch++) {
1433 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1434 if (nnz4) {
1435 uint8_t *ch_dst = dst[1+ch];
1436 if (nnz4&~0x01010101) {
1437 for (y = 0; y < 2; y++) {
1438 for (x = 0; x < 2; x++) {
1439 if ((uint8_t)nnz4 == 1)
1440 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1441 else if((uint8_t)nnz4 > 1)
1442 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1443 nnz4 >>= 8;
1444 if (!nnz4)
1445 goto chroma_idct_end;
1446 }
1447 ch_dst += 4*s->uvlinesize;
1448 }
1449 } else {
1450 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1451 }
1452 }
1453 chroma_idct_end: ;
1454 }
1455 }
1456
1457 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1458 {
1459 int interior_limit, filter_level;
1460
1461 if (s->segmentation.enabled) {
1462 filter_level = s->segmentation.filter_level[mb->segment];
1463 if (!s->segmentation.absolute_vals)
1464 filter_level += s->filter.level;
1465 } else
1466 filter_level = s->filter.level;
1467
1468 if (s->lf_delta.enabled) {
1469 filter_level += s->lf_delta.ref[mb->ref_frame];
1470 filter_level += s->lf_delta.mode[mb->mode];
1471 }
1472
1473 filter_level = av_clip_uintp2(filter_level, 6);
1474
1475 interior_limit = filter_level;
1476 if (s->filter.sharpness) {
1477 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1478 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1479 }
1480 interior_limit = FFMAX(interior_limit, 1);
1481
1482 f->filter_level = filter_level;
1483 f->inner_limit = interior_limit;
1484 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1485 }
1486
1487 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1488 {
1489 int mbedge_lim, bedge_lim, hev_thresh;
1490 int filter_level = f->filter_level;
1491 int inner_limit = f->inner_limit;
1492 int inner_filter = f->inner_filter;
1493 int linesize = s->linesize;
1494 int uvlinesize = s->uvlinesize;
1495 static const uint8_t hev_thresh_lut[2][64] = {
1496 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1497 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1498 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1499 3, 3, 3, 3 },
1500 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1501 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1502 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1503 2, 2, 2, 2 }
1504 };
1505
1506 if (!filter_level)
1507 return;
1508
1509 bedge_lim = 2*filter_level + inner_limit;
1510 mbedge_lim = bedge_lim + 4;
1511
1512 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1513
1514 if (mb_x) {
1515 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1516 mbedge_lim, inner_limit, hev_thresh);
1517 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1518 mbedge_lim, inner_limit, hev_thresh);
1519 }
1520
1521 if (inner_filter) {
1522 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1523 inner_limit, hev_thresh);
1524 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1525 inner_limit, hev_thresh);
1526 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1527 inner_limit, hev_thresh);
1528 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1529 uvlinesize, bedge_lim,
1530 inner_limit, hev_thresh);
1531 }
1532
1533 if (mb_y) {
1534 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1535 mbedge_lim, inner_limit, hev_thresh);
1536 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1537 mbedge_lim, inner_limit, hev_thresh);
1538 }
1539
1540 if (inner_filter) {
1541 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1542 linesize, bedge_lim,
1543 inner_limit, hev_thresh);
1544 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1545 linesize, bedge_lim,
1546 inner_limit, hev_thresh);
1547 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1548 linesize, bedge_lim,
1549 inner_limit, hev_thresh);
1550 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1551 dst[2] + 4 * uvlinesize,
1552 uvlinesize, bedge_lim,
1553 inner_limit, hev_thresh);
1554 }
1555 }
1556
1557 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1558 {
1559 int mbedge_lim, bedge_lim;
1560 int filter_level = f->filter_level;
1561 int inner_limit = f->inner_limit;
1562 int inner_filter = f->inner_filter;
1563 int linesize = s->linesize;
1564
1565 if (!filter_level)
1566 return;
1567
1568 bedge_lim = 2*filter_level + inner_limit;
1569 mbedge_lim = bedge_lim + 4;
1570
1571 if (mb_x)
1572 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1573 if (inner_filter) {
1574 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1575 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1576 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1577 }
1578
1579 if (mb_y)
1580 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1581 if (inner_filter) {
1582 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1583 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1584 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1585 }
1586 }
1587
1588 static void release_queued_segmaps(VP8Context *s, int is_close)
1589 {
1590 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1591 while (s->num_maps_to_be_freed > leave_behind)
1592 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1593 s->maps_are_invalid = 0;
1594 }
1595
1596 #define MARGIN (16 << 2)
1597 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
1598 AVFrame *prev_frame)
1599 {
1600 VP8Context *s = avctx->priv_data;
1601 int mb_x, mb_y;
1602
1603 s->mv_min.y = -MARGIN;
1604 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1605 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1606 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1607 int mb_xy = mb_y*s->mb_width;
1608
1609 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1610
1611 s->mv_min.x = -MARGIN;
1612 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1613 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1614 if (mb_y == 0)
1615 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1616 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1617 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
1618 s->mv_min.x -= 64;
1619 s->mv_max.x -= 64;
1620 }
1621 s->mv_min.y -= 64;
1622 s->mv_max.y -= 64;
1623 }
1624 }
1625
1626 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1627 do {\
1628 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1629 if (otd->thread_mb_pos < tmp) {\
1630 pthread_mutex_lock(&otd->lock);\
1631 td->wait_mb_pos = tmp;\
1632 do {\
1633 if (otd->thread_mb_pos >= tmp)\
1634 break;\
1635 pthread_cond_wait(&otd->cond, &otd->lock);\
1636 } while (1);\
1637 td->wait_mb_pos = INT_MAX;\
1638 pthread_mutex_unlock(&otd->lock);\
1639 }\
1640 } while(0);
1641
1642 #define update_pos(td, mb_y, mb_x)\
1643 do {\
1644 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1645 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1646 int is_null = (next_td == NULL) || (prev_td == NULL);\
1647 int pos_check = (is_null) ? 1 :\
1648 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1649 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1650 td->thread_mb_pos = pos;\
1651 if (sliced_threading && pos_check) {\
1652 pthread_mutex_lock(&td->lock);\
1653 pthread_cond_broadcast(&td->cond);\
1654 pthread_mutex_unlock(&td->lock);\
1655 }\
1656 } while(0);
1657
1658 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1659 int jobnr, int threadnr)
1660 {
1661 VP8Context *s = avctx->priv_data;
1662 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1663 int mb_y = td->thread_mb_pos>>16;
1664 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1665 int num_jobs = s->num_jobs;
1666 AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
1667 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1668 VP8Macroblock *mb;
1669 uint8_t *dst[3] = {
1670 curframe->data[0] + 16*mb_y*s->linesize,
1671 curframe->data[1] + 8*mb_y*s->uvlinesize,
1672 curframe->data[2] + 8*mb_y*s->uvlinesize
1673 };
1674 if (mb_y == 0) prev_td = td;
1675 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1676 if (mb_y == s->mb_height-1) next_td = td;
1677 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1678 if (s->mb_layout == 1)
1679 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1680 else {
1681 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1682 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1683 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1684 }
1685
1686 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1687 // left edge of 129 for intra prediction
1688 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1689 for (i = 0; i < 3; i++)
1690 for (y = 0; y < 16>>!!i; y++)
1691 dst[i][y*curframe->linesize[i]-1] = 129;
1692 if (mb_y == 1) {
1693 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1694 }
1695 }
1696
1697 s->mv_min.x = -MARGIN;
1698 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1699
1700 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1701 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1702 if (prev_td != td) {
1703 if (threadnr != 0) {
1704 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1705 } else {
1706 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1707 }
1708 }
1709
1710 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1711 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1712
1713 if (!s->mb_layout)
1714 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1715 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
1716
1717 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1718
1719 if (!mb->skip)
1720 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1721
1722 if (mb->mode <= MODE_I4x4)
1723 intra_predict(s, td, dst, mb, mb_x, mb_y);
1724 else
1725 inter_predict(s, td, dst, mb, mb_x, mb_y);
1726
1727 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1728
1729 if (!mb->skip) {
1730 idct_mb(s, td, dst, mb);
1731 } else {
1732 AV_ZERO64(td->left_nnz);
1733 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1734
1735 // Reset DC block predictors if they would exist if the mb had coefficients
1736 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1737 td->left_nnz[8] = 0;
1738 s->top_nnz[mb_x][8] = 0;
1739 }
1740 }
1741
1742 if (s->deblock_filter)
1743 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1744
1745 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1746 if (s->filter.simple)
1747 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1748 else
1749 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1750 }
1751
1752 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1753
1754 dst[0] += 16;
1755 dst[1] += 8;
1756 dst[2] += 8;
1757 s->mv_min.x -= 64;
1758 s->mv_max.x -= 64;
1759
1760 if (mb_x == s->mb_width+1) {
1761 update_pos(td, mb_y, s->mb_width+3);
1762 } else {
1763 update_pos(td, mb_y, mb_x);
1764 }
1765 }
1766 }
1767
1768 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1769 int jobnr, int threadnr)
1770 {
1771 VP8Context *s = avctx->priv_data;
1772 VP8ThreadData *td = &s->thread_data[threadnr];
1773 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1774 AVFrame *curframe = s->curframe;
1775 VP8Macroblock *mb;
1776 VP8ThreadData *prev_td, *next_td;
1777 uint8_t *dst[3] = {
1778 curframe->data[0] + 16*mb_y*s->linesize,
1779 curframe->data[1] + 8*mb_y*s->uvlinesize,
1780 curframe->data[2] + 8*mb_y*s->uvlinesize
1781 };
1782
1783 if (s->mb_layout == 1)
1784 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1785 else
1786 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1787
1788 if (mb_y == 0) prev_td = td;
1789 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1790 if (mb_y == s->mb_height-1) next_td = td;
1791 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1792
1793 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1794 VP8FilterStrength *f = &td->filter_strength[mb_x];
1795 if (prev_td != td) {
1796 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1797 }
1798 if (next_td != td)
1799 if (next_td != &s->thread_data[0]) {
1800 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1801 }
1802
1803 if (num_jobs == 1) {
1804 if (s->filter.simple)
1805 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1806 else
1807 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1808 }
1809
1810 if (s->filter.simple)
1811 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1812 else
1813 filter_mb(s, dst, f, mb_x, mb_y);
1814 dst[0] += 16;
1815 dst[1] += 8;
1816 dst[2] += 8;
1817
1818 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1819 }
1820 }
1821
1822 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1823 int jobnr, int threadnr)
1824 {
1825 VP8Context *s = avctx->priv_data;
1826 VP8ThreadData *td = &s->thread_data[jobnr];
1827 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1828 AVFrame *curframe = s->curframe;
1829 int mb_y, num_jobs = s->num_jobs;
1830 td->thread_nr = threadnr;
1831 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1832 if (mb_y >= s->mb_height) break;
1833 td->thread_mb_pos = mb_y<<16;
1834 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1835 if (s->deblock_filter)
1836 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1837 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1838
1839 s->mv_min.y -= 64;
1840 s->mv_max.y -= 64;
1841
1842 if (avctx->active_thread_type == FF_THREAD_FRAME)
1843 ff_thread_report_progress(curframe, mb_y, 0);
1844 }
1845
1846 return 0;
1847 }
1848
1849 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1850 AVPacket *avpkt)
1851 {
1852 VP8Context *s = avctx->priv_data;
1853 int ret, i, referenced, num_jobs;
1854 enum AVDiscard skip_thresh;
1855 AVFrame *av_uninit(curframe), *prev_frame;
1856
1857 release_queued_segmaps(s, 0);
1858
1859 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1860 goto err;
1861
1862 prev_frame = s->framep[VP56_FRAME_CURRENT];
1863
1864 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1865 || s->update_altref == VP56_FRAME_CURRENT;
1866
1867 skip_thresh = !referenced ? AVDISCARD_NONREF :
1868 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1869
1870 if (avctx->skip_frame >= skip_thresh) {
1871 s->invisible = 1;
1872 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1873 goto skip_decode;
1874 }
1875 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1876
1877 // release no longer referenced frames
1878 for (i = 0; i < 5; i++)
1879 if (s->frames[i].data[0] &&
1880 &s->frames[i] != prev_frame &&
1881 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1882 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1883 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1884 vp8_release_frame(s, &s->frames[i], 1, 0);
1885
1886 // find a free buffer
1887 for (i = 0; i < 5; i++)
1888 if (&s->frames[i] != prev_frame &&
1889 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1890 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1891 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1892 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1893 break;
1894 }
1895 if (i == 5) {
1896 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1897 abort();
1898 }
1899 if (curframe->data[0])
1900 vp8_release_frame(s, curframe, 1, 0);
1901
1902 // Given that arithmetic probabilities are updated every frame, it's quite likely
1903 // that the values we have on a random interframe are complete junk if we didn't
1904 // start decode on a keyframe. So just don't display anything rather than junk.
1905 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1906 !s->framep[VP56_FRAME_GOLDEN] ||
1907 !s->framep[VP56_FRAME_GOLDEN2])) {
1908 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1909 ret = AVERROR_INVALIDDATA;
1910 goto err;
1911 }
1912
1913 curframe->key_frame = s->keyframe;
1914 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1915 curframe->reference = referenced ? 3 : 0;
1916 if ((ret = vp8_alloc_frame(s, curframe))) {
1917 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1918 goto err;
1919 }
1920
1921 // check if golden and altref are swapped
1922 if (s->update_altref != VP56_FRAME_NONE) {
1923 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1924 } else {
1925 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1926 }
1927 if (s->update_golden != VP56_FRAME_NONE) {
1928 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1929 } else {
1930 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1931 }
1932 if (s->update_last) {
1933 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1934 } else {
1935 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1936 }
1937 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1938
1939 ff_thread_finish_setup(avctx);
1940
1941 s->linesize = curframe->linesize[0];
1942 s->uvlinesize = curframe->linesize[1];
1943
1944 if (!s->thread_data[0].edge_emu_buffer)
1945 for (i = 0; i < MAX_THREADS; i++)
1946 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1947
1948 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1949 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1950 if (!s->mb_layout)
1951 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1952 if (!s->mb_layout && s->keyframe)
1953 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1954
1955 // top edge of 127 for intra prediction
1956 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1957 s->top_border[0][15] = s->top_border[0][23] = 127;
1958 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1959 }
1960 memset(s->ref_count, 0, sizeof(s->ref_count));
1961
1962
1963 // Make sure the previous frame has read its segmentation map,
1964 // if we re-use the same map.
1965 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1966 ff_thread_await_progress(prev_frame, 1, 0);
1967
1968 if (s->mb_layout == 1)
1969 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1970
1971 if (avctx->active_thread_type == FF_THREAD_FRAME)
1972 num_jobs = 1;
1973 else
1974 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1975 s->num_jobs = num_jobs;
1976 s->curframe = curframe;
1977 s->prev_frame = prev_frame;
1978 s->mv_min.y = -MARGIN;
1979 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1980 for (i = 0; i < MAX_THREADS; i++) {
1981 s->thread_data[i].thread_mb_pos = 0;
1982 s->thread_data[i].wait_mb_pos = INT_MAX;
1983 }
1984 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1985
1986 ff_thread_report_progress(curframe, INT_MAX, 0);
1987 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1988
1989 skip_decode:
1990 // if future frames don't use the updated probabilities,
1991 // reset them to the values we saved
1992 if (!s->update_probabilities)
1993 s->prob[0] = s->prob[1];
1994
1995 if (!s->invisible) {
1996 *(AVFrame*)data = *curframe;
1997 *data_size = sizeof(AVFrame);
1998 }
1999
2000 return avpkt->size;
2001 err:
2002 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2003 return ret;
2004 }
2005
2006 static av_cold int vp8_decode_init(AVCodecContext *avctx)
2007 {
2008 VP8Context *s = avctx->priv_data;
2009
2010 s->avctx = avctx;
2011 avctx->pix_fmt = PIX_FMT_YUV420P;
2012
2013 ff_dsputil_init(&s->dsp, avctx);
2014 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
2015 ff_vp8dsp_init(&s->vp8dsp);
2016
2017 return 0;
2018 }
2019
2020 static av_cold int vp8_decode_free(AVCodecContext *avctx)
2021 {
2022 vp8_decode_flush_impl(avctx, 0, 1, 1);
2023 release_queued_segmaps(avctx->priv_data, 1);
2024 return 0;
2025 }
2026
2027 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2028 {
2029 VP8Context *s = avctx->priv_data;
2030
2031 s->avctx = avctx;
2032
2033 return 0;
2034 }
2035
2036 #define REBASE(pic) \
2037 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2038
2039 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2040 {
2041 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2042
2043 if (s->macroblocks_base &&
2044 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2045 free_buffers(s);
2046 s->maps_are_invalid = 1;
2047 s->mb_width = s_src->mb_width;
2048 s->mb_height = s_src->mb_height;
2049 }
2050
2051 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2052 s->segmentation = s_src->segmentation;
2053 s->lf_delta = s_src->lf_delta;
2054 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2055
2056 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
2057 s->framep[0] = REBASE(s_src->next_framep[0]);
2058 s->framep[1] = REBASE(s_src->next_framep[1]);
2059 s->framep[2] = REBASE(s_src->next_framep[2]);
2060 s->framep[3] = REBASE(s_src->next_framep[3]);
2061
2062 return 0;
2063 }
2064
2065 AVCodec ff_vp8_decoder = {
2066 .name = "vp8",
2067 .type = AVMEDIA_TYPE_VIDEO,
2068 .id = CODEC_ID_VP8,
2069 .priv_data_size = sizeof(VP8Context),
2070 .init = vp8_decode_init,
2071 .close = vp8_decode_free,
2072 .decode = vp8_decode_frame,
2073 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2074 .flush = vp8_decode_flush,
2075 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2076 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2077 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2078 };