lavc/vaapi: Add VP8 decode hwaccel
[libav.git] / libavcodec / vp8.c
1 /*
2 * VP7/VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
9 *
10 * This file is part of Libav.
11 *
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27 #include "libavutil/imgutils.h"
28
29 #include "avcodec.h"
30 #include "internal.h"
31 #include "mathops.h"
32 #include "rectangle.h"
33 #include "thread.h"
34 #include "vp8.h"
35 #include "vp8data.h"
36
37 #if ARCH_ARM
38 # include "arm/vp8.h"
39 #endif
40
41 static void free_buffers(VP8Context *s)
42 {
43 int i;
44 if (s->thread_data)
45 for (i = 0; i < MAX_THREADS; i++) {
46 #if HAVE_THREADS
47 pthread_cond_destroy(&s->thread_data[i].cond);
48 pthread_mutex_destroy(&s->thread_data[i].lock);
49 #endif
50 av_freep(&s->thread_data[i].filter_strength);
51 }
52 av_freep(&s->thread_data);
53 av_freep(&s->macroblocks_base);
54 av_freep(&s->intra4x4_pred_mode_top);
55 av_freep(&s->top_nnz);
56 av_freep(&s->top_border);
57
58 s->macroblocks = NULL;
59 }
60
61 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
62 {
63 int ret;
64 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
65 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
66 return ret;
67 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
68 goto fail;
69 if (s->avctx->hwaccel) {
70 const AVHWAccel *hwaccel = s->avctx->hwaccel;
71 if (hwaccel->frame_priv_data_size) {
72 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
73 if (!f->hwaccel_priv_buf)
74 goto fail;
75 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
76 }
77 }
78 return 0;
79
80 fail:
81 av_buffer_unref(&f->seg_map);
82 ff_thread_release_buffer(s->avctx, &f->tf);
83 return AVERROR(ENOMEM);
84 }
85
86 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
87 {
88 av_buffer_unref(&f->seg_map);
89 av_buffer_unref(&f->hwaccel_priv_buf);
90 f->hwaccel_picture_private = NULL;
91 ff_thread_release_buffer(s->avctx, &f->tf);
92 }
93
94 #if CONFIG_VP8_DECODER
95 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
96 {
97 int ret;
98
99 vp8_release_frame(s, dst);
100
101 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
102 return ret;
103 if (src->seg_map &&
104 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
105 vp8_release_frame(s, dst);
106 return AVERROR(ENOMEM);
107 }
108 if (src->hwaccel_picture_private) {
109 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
110 if (!dst->hwaccel_priv_buf)
111 return AVERROR(ENOMEM);
112 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
113 }
114
115 return 0;
116 }
117 #endif /* CONFIG_VP8_DECODER */
118
119 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
120 {
121 VP8Context *s = avctx->priv_data;
122 int i;
123
124 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
125 vp8_release_frame(s, &s->frames[i]);
126 memset(s->framep, 0, sizeof(s->framep));
127
128 if (free_mem)
129 free_buffers(s);
130 }
131
132 static void vp8_decode_flush(AVCodecContext *avctx)
133 {
134 vp8_decode_flush_impl(avctx, 0);
135 }
136
137 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
138 {
139 VP8Frame *frame = NULL;
140 int i;
141
142 // find a free buffer
143 for (i = 0; i < 5; i++)
144 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
145 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
146 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
147 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
148 frame = &s->frames[i];
149 break;
150 }
151 if (i == 5) {
152 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
153 abort();
154 }
155 if (frame->tf.f->buf[0])
156 vp8_release_frame(s, frame);
157
158 return frame;
159 }
160
161 static av_always_inline
162 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
163 {
164 AVCodecContext *avctx = s->avctx;
165 int i, ret;
166
167 if (width != s->avctx->width ||
168 height != s->avctx->height) {
169 vp8_decode_flush_impl(s->avctx, 1);
170
171 ret = ff_set_dimensions(s->avctx, width, height);
172 if (ret < 0)
173 return ret;
174 }
175
176 s->mb_width = (s->avctx->coded_width + 15) / 16;
177 s->mb_height = (s->avctx->coded_height + 15) / 16;
178
179 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
180 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
181 if (!s->mb_layout) { // Frame threading and one thread
182 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
183 sizeof(*s->macroblocks));
184 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
185 } else // Sliced threading
186 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
187 sizeof(*s->macroblocks));
188 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
189 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
190 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
191
192 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
193 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
194 free_buffers(s);
195 return AVERROR(ENOMEM);
196 }
197
198 for (i = 0; i < MAX_THREADS; i++) {
199 s->thread_data[i].filter_strength =
200 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
201 if (!s->thread_data[i].filter_strength) {
202 free_buffers(s);
203 return AVERROR(ENOMEM);
204 }
205 #if HAVE_THREADS
206 pthread_mutex_init(&s->thread_data[i].lock, NULL);
207 pthread_cond_init(&s->thread_data[i].cond, NULL);
208 #endif
209 }
210
211 s->macroblocks = s->macroblocks_base + 1;
212
213 return 0;
214 }
215
216 static int vp7_update_dimensions(VP8Context *s, int width, int height)
217 {
218 return update_dimensions(s, width, height, IS_VP7);
219 }
220
221 static int vp8_update_dimensions(VP8Context *s, int width, int height)
222 {
223 return update_dimensions(s, width, height, IS_VP8);
224 }
225
226 static void parse_segment_info(VP8Context *s)
227 {
228 VP56RangeCoder *c = &s->c;
229 int i;
230
231 s->segmentation.update_map = vp8_rac_get(c);
232 s->segmentation.update_feature_data = vp8_rac_get(c);
233
234 if (s->segmentation.update_feature_data) {
235 s->segmentation.absolute_vals = vp8_rac_get(c);
236
237 for (i = 0; i < 4; i++)
238 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
239
240 for (i = 0; i < 4; i++)
241 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
242 }
243 if (s->segmentation.update_map)
244 for (i = 0; i < 3; i++)
245 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
246 }
247
248 static void update_lf_deltas(VP8Context *s)
249 {
250 VP56RangeCoder *c = &s->c;
251 int i;
252
253 for (i = 0; i < 4; i++) {
254 if (vp8_rac_get(c)) {
255 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
256
257 if (vp8_rac_get(c))
258 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
259 }
260 }
261
262 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
263 if (vp8_rac_get(c)) {
264 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
265
266 if (vp8_rac_get(c))
267 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
268 }
269 }
270 }
271
272 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
273 {
274 const uint8_t *sizes = buf;
275 int i;
276
277 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
278
279 buf += 3 * (s->num_coeff_partitions - 1);
280 buf_size -= 3 * (s->num_coeff_partitions - 1);
281 if (buf_size < 0)
282 return -1;
283
284 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
285 int size = AV_RL24(sizes + 3 * i);
286 if (buf_size - size < 0)
287 return -1;
288 s->coeff_partition_size[i] = size;
289
290 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
291 buf += size;
292 buf_size -= size;
293 }
294
295 s->coeff_partition_size[i] = buf_size;
296 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
297
298 return 0;
299 }
300
301 static void vp7_get_quants(VP8Context *s)
302 {
303 VP56RangeCoder *c = &s->c;
304
305 int yac_qi = vp8_rac_get_uint(c, 7);
306 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
307 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
308 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
309 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
310 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
311
312 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
313 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
314 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
315 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
316 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
317 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
318 }
319
320 static void get_quants(VP8Context *s)
321 {
322 VP56RangeCoder *c = &s->c;
323 int i, base_qi;
324
325 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
326 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
327 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
328 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
329 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
330 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
331
332 for (i = 0; i < 4; i++) {
333 if (s->segmentation.enabled) {
334 base_qi = s->segmentation.base_quant[i];
335 if (!s->segmentation.absolute_vals)
336 base_qi += s->quant.yac_qi;
337 } else
338 base_qi = s->quant.yac_qi;
339
340 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
341 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
342 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
343 /* 101581>>16 is equivalent to 155/100 */
344 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
345 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
346 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
347
348 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
349 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
350 }
351 }
352
353 /**
354 * Determine which buffers golden and altref should be updated with after this frame.
355 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
356 *
357 * Intra frames update all 3 references
358 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
359 * If the update (golden|altref) flag is set, it's updated with the current frame
360 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
361 * If the flag is not set, the number read means:
362 * 0: no update
363 * 1: VP56_FRAME_PREVIOUS
364 * 2: update golden with altref, or update altref with golden
365 */
366 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
367 {
368 VP56RangeCoder *c = &s->c;
369
370 if (update)
371 return VP56_FRAME_CURRENT;
372
373 switch (vp8_rac_get_uint(c, 2)) {
374 case 1:
375 return VP56_FRAME_PREVIOUS;
376 case 2:
377 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
378 }
379 return VP56_FRAME_NONE;
380 }
381
382 static void vp78_reset_probability_tables(VP8Context *s)
383 {
384 int i, j;
385 for (i = 0; i < 4; i++)
386 for (j = 0; j < 16; j++)
387 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
388 sizeof(s->prob->token[i][j]));
389 }
390
391 static void vp78_update_probability_tables(VP8Context *s)
392 {
393 VP56RangeCoder *c = &s->c;
394 int i, j, k, l, m;
395
396 for (i = 0; i < 4; i++)
397 for (j = 0; j < 8; j++)
398 for (k = 0; k < 3; k++)
399 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
400 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
401 int prob = vp8_rac_get_uint(c, 8);
402 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
403 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
404 }
405 }
406
407 #define VP7_MVC_SIZE 17
408 #define VP8_MVC_SIZE 19
409
410 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
411 int mvc_size)
412 {
413 VP56RangeCoder *c = &s->c;
414 int i, j;
415
416 if (vp8_rac_get(c))
417 for (i = 0; i < 4; i++)
418 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
419 if (vp8_rac_get(c))
420 for (i = 0; i < 3; i++)
421 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
422
423 // 17.2 MV probability update
424 for (i = 0; i < 2; i++)
425 for (j = 0; j < mvc_size; j++)
426 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
427 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
428 }
429
430 static void update_refs(VP8Context *s)
431 {
432 VP56RangeCoder *c = &s->c;
433
434 int update_golden = vp8_rac_get(c);
435 int update_altref = vp8_rac_get(c);
436
437 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
438 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
439 }
440
441 static void copy_luma(AVFrame *dst, AVFrame *src, int width, int height)
442 {
443 int i, j;
444
445 for (j = 1; j < 3; j++) {
446 for (i = 0; i < height / 2; i++)
447 memcpy(dst->data[j] + i * dst->linesize[j],
448 src->data[j] + i * src->linesize[j], width / 2);
449 }
450 }
451
452 static void fade(uint8_t *dst, uint8_t *src,
453 int width, int height, ptrdiff_t linesize,
454 int alpha, int beta)
455 {
456 int i, j;
457
458 for (j = 0; j < height; j++) {
459 for (i = 0; i < width; i++) {
460 uint8_t y = src[j * linesize + i];
461 dst[j * linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
462 }
463 }
464 }
465
466 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
467 {
468 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
469 int beta = (int8_t) vp8_rac_get_uint(c, 8);
470 int ret;
471
472 if (!s->keyframe && (alpha || beta)) {
473 int width = s->mb_width * 16;
474 int height = s->mb_height * 16;
475 AVFrame *src, *dst;
476
477 if (!s->framep[VP56_FRAME_PREVIOUS])
478 return AVERROR_INVALIDDATA;
479
480 dst =
481 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
482
483 /* preserve the golden frame, write a new previous frame */
484 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
485 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
486 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
487 return ret;
488
489 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
490
491 copy_luma(dst, src, width, height);
492 }
493
494 fade(dst->data[0], src->data[0],
495 width, height, dst->linesize[0], alpha, beta);
496 }
497
498 return 0;
499 }
500
501 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
502 {
503 VP56RangeCoder *c = &s->c;
504 int part1_size, hscale, vscale, i, j, ret;
505 int width = s->avctx->width;
506 int height = s->avctx->height;
507
508 if (buf_size < 4) {
509 return AVERROR_INVALIDDATA;
510 }
511
512 s->profile = (buf[0] >> 1) & 7;
513 if (s->profile > 1) {
514 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
515 return AVERROR_INVALIDDATA;
516 }
517
518 s->keyframe = !(buf[0] & 1);
519 s->invisible = 0;
520 part1_size = AV_RL24(buf) >> 4;
521
522 buf += 4 - s->profile;
523 buf_size -= 4 - s->profile;
524
525 if (buf_size < part1_size) {
526 return AVERROR_INVALIDDATA;
527 }
528
529 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
530
531 ff_vp56_init_range_decoder(c, buf, part1_size);
532 buf += part1_size;
533 buf_size -= part1_size;
534
535 /* A. Dimension information (keyframes only) */
536 if (s->keyframe) {
537 width = vp8_rac_get_uint(c, 12);
538 height = vp8_rac_get_uint(c, 12);
539 hscale = vp8_rac_get_uint(c, 2);
540 vscale = vp8_rac_get_uint(c, 2);
541 if (hscale || vscale)
542 avpriv_request_sample(s->avctx, "Upscaling");
543
544 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
545 vp78_reset_probability_tables(s);
546 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
547 sizeof(s->prob->pred16x16));
548 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
549 sizeof(s->prob->pred8x8c));
550 for (i = 0; i < 2; i++)
551 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
552 sizeof(vp7_mv_default_prob[i]));
553 memset(&s->segmentation, 0, sizeof(s->segmentation));
554 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
555 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
556 }
557
558 if (s->keyframe || s->profile > 0)
559 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
560
561 /* B. Decoding information for all four macroblock-level features */
562 for (i = 0; i < 4; i++) {
563 s->feature_enabled[i] = vp8_rac_get(c);
564 if (s->feature_enabled[i]) {
565 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
566
567 for (j = 0; j < 3; j++)
568 s->feature_index_prob[i][j] =
569 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
570
571 if (vp7_feature_value_size[s->profile][i])
572 for (j = 0; j < 4; j++)
573 s->feature_value[i][j] =
574 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
575 }
576 }
577
578 s->segmentation.enabled = 0;
579 s->segmentation.update_map = 0;
580 s->lf_delta.enabled = 0;
581
582 s->num_coeff_partitions = 1;
583 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
584
585 if (!s->macroblocks_base || /* first frame */
586 width != s->avctx->width || height != s->avctx->height ||
587 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
588 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
589 return ret;
590 }
591
592 /* C. Dequantization indices */
593 vp7_get_quants(s);
594
595 /* D. Golden frame update flag (a Flag) for interframes only */
596 if (!s->keyframe) {
597 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
598 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
599 }
600
601 s->update_last = 1;
602 s->update_probabilities = 1;
603 s->fade_present = 1;
604
605 if (s->profile > 0) {
606 s->update_probabilities = vp8_rac_get(c);
607 if (!s->update_probabilities)
608 s->prob[1] = s->prob[0];
609
610 if (!s->keyframe)
611 s->fade_present = vp8_rac_get(c);
612 }
613
614 /* E. Fading information for previous frame */
615 if (s->fade_present && vp8_rac_get(c)) {
616 if ((ret = vp7_fade_frame(s ,c)) < 0)
617 return ret;
618 }
619
620 /* F. Loop filter type */
621 if (!s->profile)
622 s->filter.simple = vp8_rac_get(c);
623
624 /* G. DCT coefficient ordering specification */
625 if (vp8_rac_get(c))
626 for (i = 1; i < 16; i++)
627 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
628
629 /* H. Loop filter levels */
630 if (s->profile > 0)
631 s->filter.simple = vp8_rac_get(c);
632 s->filter.level = vp8_rac_get_uint(c, 6);
633 s->filter.sharpness = vp8_rac_get_uint(c, 3);
634
635 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
636 vp78_update_probability_tables(s);
637
638 s->mbskip_enabled = 0;
639
640 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
641 if (!s->keyframe) {
642 s->prob->intra = vp8_rac_get_uint(c, 8);
643 s->prob->last = vp8_rac_get_uint(c, 8);
644 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
645 }
646
647 return 0;
648 }
649
650 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
651 {
652 VP56RangeCoder *c = &s->c;
653 int header_size, hscale, vscale, ret;
654 int width = s->avctx->width;
655 int height = s->avctx->height;
656
657 s->keyframe = !(buf[0] & 1);
658 s->profile = (buf[0]>>1) & 7;
659 s->invisible = !(buf[0] & 0x10);
660 header_size = AV_RL24(buf) >> 5;
661 buf += 3;
662 buf_size -= 3;
663
664 s->header_partition_size = header_size;
665
666 if (s->profile > 3)
667 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
668
669 if (!s->profile)
670 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
671 sizeof(s->put_pixels_tab));
672 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
673 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
674 sizeof(s->put_pixels_tab));
675
676 if (header_size > buf_size - 7 * s->keyframe) {
677 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
678 return AVERROR_INVALIDDATA;
679 }
680
681 if (s->keyframe) {
682 if (AV_RL24(buf) != 0x2a019d) {
683 av_log(s->avctx, AV_LOG_ERROR,
684 "Invalid start code 0x%x\n", AV_RL24(buf));
685 return AVERROR_INVALIDDATA;
686 }
687 width = AV_RL16(buf + 3) & 0x3fff;
688 height = AV_RL16(buf + 5) & 0x3fff;
689 hscale = buf[4] >> 6;
690 vscale = buf[6] >> 6;
691 buf += 7;
692 buf_size -= 7;
693
694 if (hscale || vscale)
695 avpriv_request_sample(s->avctx, "Upscaling");
696
697 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
698 vp78_reset_probability_tables(s);
699 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
700 sizeof(s->prob->pred16x16));
701 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
702 sizeof(s->prob->pred8x8c));
703 memcpy(s->prob->mvc, vp8_mv_default_prob,
704 sizeof(s->prob->mvc));
705 memset(&s->segmentation, 0, sizeof(s->segmentation));
706 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
707 }
708
709 ff_vp56_init_range_decoder(c, buf, header_size);
710 buf += header_size;
711 buf_size -= header_size;
712
713 if (s->keyframe) {
714 s->colorspace = vp8_rac_get(c);
715 if (s->colorspace)
716 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
717 s->fullrange = vp8_rac_get(c);
718 }
719
720 if ((s->segmentation.enabled = vp8_rac_get(c)))
721 parse_segment_info(s);
722 else
723 s->segmentation.update_map = 0; // FIXME: move this to some init function?
724
725 s->filter.simple = vp8_rac_get(c);
726 s->filter.level = vp8_rac_get_uint(c, 6);
727 s->filter.sharpness = vp8_rac_get_uint(c, 3);
728
729 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
730 s->lf_delta.update = vp8_rac_get(c);
731 if (s->lf_delta.update)
732 update_lf_deltas(s);
733 }
734
735 if (setup_partitions(s, buf, buf_size)) {
736 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
737 return AVERROR_INVALIDDATA;
738 }
739
740 if (!s->macroblocks_base || /* first frame */
741 width != s->avctx->width || height != s->avctx->height)
742 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
743 return ret;
744
745 get_quants(s);
746
747 if (!s->keyframe) {
748 update_refs(s);
749 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
750 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
751 }
752
753 // if we aren't saving this frame's probabilities for future frames,
754 // make a copy of the current probabilities
755 if (!(s->update_probabilities = vp8_rac_get(c)))
756 s->prob[1] = s->prob[0];
757
758 s->update_last = s->keyframe || vp8_rac_get(c);
759
760 vp78_update_probability_tables(s);
761
762 if ((s->mbskip_enabled = vp8_rac_get(c)))
763 s->prob->mbskip = vp8_rac_get_uint(c, 8);
764
765 if (!s->keyframe) {
766 s->prob->intra = vp8_rac_get_uint(c, 8);
767 s->prob->last = vp8_rac_get_uint(c, 8);
768 s->prob->golden = vp8_rac_get_uint(c, 8);
769 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
770 }
771
772 // Record the entropy coder state here so that hwaccels can use it.
773 s->c.code_word = vp56_rac_renorm(&s->c);
774 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
775 s->coder_state_at_header_end.range = s->c.high;
776 s->coder_state_at_header_end.value = s->c.code_word >> 16;
777 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
778
779 return 0;
780 }
781
782 static av_always_inline
783 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
784 {
785 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
786 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
787 }
788
789 /**
790 * Motion vector coding, 17.1.
791 */
792 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
793 {
794 int bit, x = 0;
795
796 if (vp56_rac_get_prob_branchy(c, p[0])) {
797 int i;
798
799 for (i = 0; i < 3; i++)
800 x += vp56_rac_get_prob(c, p[9 + i]) << i;
801 for (i = (vp7 ? 7 : 9); i > 3; i--)
802 x += vp56_rac_get_prob(c, p[9 + i]) << i;
803 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
804 x += 8;
805 } else {
806 // small_mvtree
807 const uint8_t *ps = p + 2;
808 bit = vp56_rac_get_prob(c, *ps);
809 ps += 1 + 3 * bit;
810 x += 4 * bit;
811 bit = vp56_rac_get_prob(c, *ps);
812 ps += 1 + bit;
813 x += 2 * bit;
814 x += vp56_rac_get_prob(c, *ps);
815 }
816
817 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
818 }
819
820 static av_always_inline
821 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
822 {
823 if (is_vp7)
824 return vp7_submv_prob;
825
826 if (left == top)
827 return vp8_submv_prob[4 - !!left];
828 if (!top)
829 return vp8_submv_prob[2];
830 return vp8_submv_prob[1 - !!left];
831 }
832
833 /**
834 * Split motion vector prediction, 16.4.
835 * @returns the number of motion vectors parsed (2, 4 or 16)
836 */
837 static av_always_inline
838 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
839 int layout, int is_vp7)
840 {
841 int part_idx;
842 int n, num;
843 VP8Macroblock *top_mb;
844 VP8Macroblock *left_mb = &mb[-1];
845 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
846 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
847 VP56mv *top_mv;
848 VP56mv *left_mv = left_mb->bmv;
849 VP56mv *cur_mv = mb->bmv;
850
851 if (!layout) // layout is inlined, s->mb_layout is not
852 top_mb = &mb[2];
853 else
854 top_mb = &mb[-s->mb_width - 1];
855 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
856 top_mv = top_mb->bmv;
857
858 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
859 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
860 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
861 else
862 part_idx = VP8_SPLITMVMODE_8x8;
863 } else {
864 part_idx = VP8_SPLITMVMODE_4x4;
865 }
866
867 num = vp8_mbsplit_count[part_idx];
868 mbsplits_cur = vp8_mbsplits[part_idx],
869 firstidx = vp8_mbfirstidx[part_idx];
870 mb->partitioning = part_idx;
871
872 for (n = 0; n < num; n++) {
873 int k = firstidx[n];
874 uint32_t left, above;
875 const uint8_t *submv_prob;
876
877 if (!(k & 3))
878 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
879 else
880 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
881 if (k <= 3)
882 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
883 else
884 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
885
886 submv_prob = get_submv_prob(left, above, is_vp7);
887
888 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
889 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
890 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
891 mb->bmv[n].y = mb->mv.y +
892 read_mv_component(c, s->prob->mvc[0], is_vp7);
893 mb->bmv[n].x = mb->mv.x +
894 read_mv_component(c, s->prob->mvc[1], is_vp7);
895 } else {
896 AV_ZERO32(&mb->bmv[n]);
897 }
898 } else {
899 AV_WN32A(&mb->bmv[n], above);
900 }
901 } else {
902 AV_WN32A(&mb->bmv[n], left);
903 }
904 }
905
906 return num;
907 }
908
909 /**
910 * The vp7 reference decoder uses a padding macroblock column (added to right
911 * edge of the frame) to guard against illegal macroblock offsets. The
912 * algorithm has bugs that permit offsets to straddle the padding column.
913 * This function replicates those bugs.
914 *
915 * @param[out] edge_x macroblock x address
916 * @param[out] edge_y macroblock y address
917 *
918 * @return macroblock offset legal (boolean)
919 */
920 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
921 int xoffset, int yoffset, int boundary,
922 int *edge_x, int *edge_y)
923 {
924 int vwidth = mb_width + 1;
925 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
926 if (new < boundary || new % vwidth == vwidth - 1)
927 return 0;
928 *edge_y = new / vwidth;
929 *edge_x = new % vwidth;
930 return 1;
931 }
932
933 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
934 {
935 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
936 }
937
938 static av_always_inline
939 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
940 int mb_x, int mb_y, int layout)
941 {
942 VP8Macroblock *mb_edge[12];
943 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
944 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
945 int idx = CNT_ZERO;
946 VP56mv near_mv[3];
947 uint8_t cnt[3] = { 0 };
948 VP56RangeCoder *c = &s->c;
949 int i;
950
951 AV_ZERO32(&near_mv[0]);
952 AV_ZERO32(&near_mv[1]);
953 AV_ZERO32(&near_mv[2]);
954
955 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
956 const VP7MVPred * pred = &vp7_mv_pred[i];
957 int edge_x, edge_y;
958
959 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
960 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
961 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
962 ? s->macroblocks_base + 1 + edge_x +
963 (s->mb_width + 1) * (edge_y + 1)
964 : s->macroblocks + edge_x +
965 (s->mb_height - edge_y - 1) * 2;
966 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
967 if (mv) {
968 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
969 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
970 idx = CNT_NEAREST;
971 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
972 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
973 continue;
974 idx = CNT_NEAR;
975 } else {
976 AV_WN32A(&near_mv[CNT_NEAR], mv);
977 idx = CNT_NEAR;
978 }
979 } else {
980 AV_WN32A(&near_mv[CNT_NEAREST], mv);
981 idx = CNT_NEAREST;
982 }
983 } else {
984 idx = CNT_ZERO;
985 }
986 } else {
987 idx = CNT_ZERO;
988 }
989 cnt[idx] += vp7_mv_pred[i].score;
990 }
991
992 mb->partitioning = VP8_SPLITMVMODE_NONE;
993
994 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
995 mb->mode = VP8_MVMODE_MV;
996
997 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
998
999 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1000
1001 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1002 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1003 else
1004 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1005
1006 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1007 mb->mode = VP8_MVMODE_SPLIT;
1008 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1009 } else {
1010 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP7);
1011 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP7);
1012 mb->bmv[0] = mb->mv;
1013 }
1014 } else {
1015 mb->mv = near_mv[CNT_NEAR];
1016 mb->bmv[0] = mb->mv;
1017 }
1018 } else {
1019 mb->mv = near_mv[CNT_NEAREST];
1020 mb->bmv[0] = mb->mv;
1021 }
1022 } else {
1023 mb->mode = VP8_MVMODE_ZERO;
1024 AV_ZERO32(&mb->mv);
1025 mb->bmv[0] = mb->mv;
1026 }
1027 }
1028
1029 static av_always_inline
1030 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1031 int mb_x, int mb_y, int layout)
1032 {
1033 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1034 mb - 1 /* left */,
1035 0 /* top-left */ };
1036 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1037 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1038 int idx = CNT_ZERO;
1039 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1040 int8_t *sign_bias = s->sign_bias;
1041 VP56mv near_mv[4];
1042 uint8_t cnt[4] = { 0 };
1043 VP56RangeCoder *c = &s->c;
1044
1045 if (!layout) { // layout is inlined (s->mb_layout is not)
1046 mb_edge[0] = mb + 2;
1047 mb_edge[2] = mb + 1;
1048 } else {
1049 mb_edge[0] = mb - s->mb_width - 1;
1050 mb_edge[2] = mb - s->mb_width - 2;
1051 }
1052
1053 AV_ZERO32(&near_mv[0]);
1054 AV_ZERO32(&near_mv[1]);
1055 AV_ZERO32(&near_mv[2]);
1056
1057 /* Process MB on top, left and top-left */
1058 #define MV_EDGE_CHECK(n) \
1059 { \
1060 VP8Macroblock *edge = mb_edge[n]; \
1061 int edge_ref = edge->ref_frame; \
1062 if (edge_ref != VP56_FRAME_CURRENT) { \
1063 uint32_t mv = AV_RN32A(&edge->mv); \
1064 if (mv) { \
1065 if (cur_sign_bias != sign_bias[edge_ref]) { \
1066 /* SWAR negate of the values in mv. */ \
1067 mv = ~mv; \
1068 mv = ((mv & 0x7fff7fff) + \
1069 0x00010001) ^ (mv & 0x80008000); \
1070 } \
1071 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1072 AV_WN32A(&near_mv[++idx], mv); \
1073 cnt[idx] += 1 + (n != 2); \
1074 } else \
1075 cnt[CNT_ZERO] += 1 + (n != 2); \
1076 } \
1077 }
1078
1079 MV_EDGE_CHECK(0)
1080 MV_EDGE_CHECK(1)
1081 MV_EDGE_CHECK(2)
1082
1083 mb->partitioning = VP8_SPLITMVMODE_NONE;
1084 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1085 mb->mode = VP8_MVMODE_MV;
1086
1087 /* If we have three distinct MVs, merge first and last if they're the same */
1088 if (cnt[CNT_SPLITMV] &&
1089 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1090 cnt[CNT_NEAREST] += 1;
1091
1092 /* Swap near and nearest if necessary */
1093 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1094 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1095 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1096 }
1097
1098 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1099 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1100 /* Choose the best mv out of 0,0 and the nearest mv */
1101 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1102 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1103 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1104 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1105
1106 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1107 mb->mode = VP8_MVMODE_SPLIT;
1108 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1109 } else {
1110 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP8);
1111 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP8);
1112 mb->bmv[0] = mb->mv;
1113 }
1114 } else {
1115 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1116 mb->bmv[0] = mb->mv;
1117 }
1118 } else {
1119 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1120 mb->bmv[0] = mb->mv;
1121 }
1122 } else {
1123 mb->mode = VP8_MVMODE_ZERO;
1124 AV_ZERO32(&mb->mv);
1125 mb->bmv[0] = mb->mv;
1126 }
1127 }
1128
1129 static av_always_inline
1130 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1131 int mb_x, int keyframe, int layout)
1132 {
1133 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1134
1135 if (layout == 1) {
1136 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1137 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1138 }
1139 if (keyframe) {
1140 int x, y;
1141 uint8_t *top;
1142 uint8_t *const left = s->intra4x4_pred_mode_left;
1143 if (layout == 1)
1144 top = mb->intra4x4_pred_mode_top;
1145 else
1146 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1147 for (y = 0; y < 4; y++) {
1148 for (x = 0; x < 4; x++) {
1149 const uint8_t *ctx;
1150 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1151 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1152 left[y] = top[x] = *intra4x4;
1153 intra4x4++;
1154 }
1155 }
1156 } else {
1157 int i;
1158 for (i = 0; i < 16; i++)
1159 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1160 vp8_pred4x4_prob_inter);
1161 }
1162 }
1163
1164 static av_always_inline
1165 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1166 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1167 {
1168 VP56RangeCoder *c = &s->c;
1169 static const char *vp7_feature_name[] = { "q-index",
1170 "lf-delta",
1171 "partial-golden-update",
1172 "blit-pitch" };
1173 if (is_vp7) {
1174 int i;
1175 *segment = 0;
1176 for (i = 0; i < 4; i++) {
1177 if (s->feature_enabled[i]) {
1178 if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
1179 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1180 s->feature_index_prob[i]);
1181 av_log(s->avctx, AV_LOG_WARNING,
1182 "Feature %s present in macroblock (value 0x%x)\n",
1183 vp7_feature_name[i], s->feature_value[i][index]);
1184 }
1185 }
1186 }
1187 } else if (s->segmentation.update_map)
1188 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
1189 else if (s->segmentation.enabled)
1190 *segment = ref ? *ref : *segment;
1191 mb->segment = *segment;
1192
1193 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1194
1195 if (s->keyframe) {
1196 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1197 vp8_pred16x16_prob_intra);
1198
1199 if (mb->mode == MODE_I4x4) {
1200 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1201 } else {
1202 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1203 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1204 if (s->mb_layout == 1)
1205 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1206 else
1207 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1208 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1209 }
1210
1211 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1212 vp8_pred8x8c_prob_intra);
1213 mb->ref_frame = VP56_FRAME_CURRENT;
1214 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1215 // inter MB, 16.2
1216 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1217 mb->ref_frame =
1218 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1219 : VP56_FRAME_GOLDEN;
1220 else
1221 mb->ref_frame = VP56_FRAME_PREVIOUS;
1222 s->ref_count[mb->ref_frame - 1]++;
1223
1224 // motion vectors, 16.3
1225 if (is_vp7)
1226 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1227 else
1228 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1229 } else {
1230 // intra MB, 16.1
1231 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1232
1233 if (mb->mode == MODE_I4x4)
1234 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1235
1236 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1237 s->prob->pred8x8c);
1238 mb->ref_frame = VP56_FRAME_CURRENT;
1239 mb->partitioning = VP8_SPLITMVMODE_NONE;
1240 AV_ZERO32(&mb->bmv[0]);
1241 }
1242 }
1243
1244 /**
1245 * @param r arithmetic bitstream reader context
1246 * @param block destination for block coefficients
1247 * @param probs probabilities to use when reading trees from the bitstream
1248 * @param i initial coeff index, 0 unless a separate DC block is coded
1249 * @param qmul array holding the dc/ac dequant factor at position 0/1
1250 *
1251 * @return 0 if no coeffs were decoded
1252 * otherwise, the index of the last coeff decoded plus one
1253 */
1254 static av_always_inline
1255 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1256 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1257 int i, uint8_t *token_prob, int16_t qmul[2],
1258 const uint8_t scan[16], int vp7)
1259 {
1260 VP56RangeCoder c = *r;
1261 goto skip_eob;
1262 do {
1263 int coeff;
1264 restart:
1265 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1266 break;
1267
1268 skip_eob:
1269 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1270 if (++i == 16)
1271 break; // invalid input; blocks should end with EOB
1272 token_prob = probs[i][0];
1273 if (vp7)
1274 goto restart;
1275 goto skip_eob;
1276 }
1277
1278 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1279 coeff = 1;
1280 token_prob = probs[i + 1][1];
1281 } else {
1282 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1283 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1284 if (coeff)
1285 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1286 coeff += 2;
1287 } else {
1288 // DCT_CAT*
1289 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1290 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1291 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1292 } else { // DCT_CAT2
1293 coeff = 7;
1294 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1295 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1296 }
1297 } else { // DCT_CAT3 and up
1298 int a = vp56_rac_get_prob(&c, token_prob[8]);
1299 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1300 int cat = (a << 1) + b;
1301 coeff = 3 + (8 << cat);
1302 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1303 }
1304 }
1305 token_prob = probs[i + 1][2];
1306 }
1307 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1308 } while (++i < 16);
1309
1310 *r = c;
1311 return i;
1312 }
1313
1314 static av_always_inline
1315 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1316 {
1317 int16_t dc = block[0];
1318 int ret = 0;
1319
1320 if (pred[1] > 3) {
1321 dc += pred[0];
1322 ret = 1;
1323 }
1324
1325 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1326 block[0] = pred[0] = dc;
1327 pred[1] = 0;
1328 } else {
1329 if (pred[0] == dc)
1330 pred[1]++;
1331 block[0] = pred[0] = dc;
1332 }
1333
1334 return ret;
1335 }
1336
1337 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1338 int16_t block[16],
1339 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1340 int i, uint8_t *token_prob,
1341 int16_t qmul[2],
1342 const uint8_t scan[16])
1343 {
1344 return decode_block_coeffs_internal(r, block, probs, i,
1345 token_prob, qmul, scan, IS_VP7);
1346 }
1347
1348 #ifndef vp8_decode_block_coeffs_internal
1349 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1350 int16_t block[16],
1351 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1352 int i, uint8_t *token_prob,
1353 int16_t qmul[2])
1354 {
1355 return decode_block_coeffs_internal(r, block, probs, i,
1356 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1357 }
1358 #endif
1359
1360 /**
1361 * @param c arithmetic bitstream reader context
1362 * @param block destination for block coefficients
1363 * @param probs probabilities to use when reading trees from the bitstream
1364 * @param i initial coeff index, 0 unless a separate DC block is coded
1365 * @param zero_nhood the initial prediction context for number of surrounding
1366 * all-zero blocks (only left/top, so 0-2)
1367 * @param qmul array holding the dc/ac dequant factor at position 0/1
1368 *
1369 * @return 0 if no coeffs were decoded
1370 * otherwise, the index of the last coeff decoded plus one
1371 */
1372 static av_always_inline
1373 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1374 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1375 int i, int zero_nhood, int16_t qmul[2],
1376 const uint8_t scan[16], int vp7)
1377 {
1378 uint8_t *token_prob = probs[i][zero_nhood];
1379 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1380 return 0;
1381 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1382 token_prob, qmul, scan)
1383 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1384 token_prob, qmul);
1385 }
1386
1387 static av_always_inline
1388 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1389 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1390 int is_vp7)
1391 {
1392 int i, x, y, luma_start = 0, luma_ctx = 3;
1393 int nnz_pred, nnz, nnz_total = 0;
1394 int segment = mb->segment;
1395 int block_dc = 0;
1396
1397 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1398 nnz_pred = t_nnz[8] + l_nnz[8];
1399
1400 // decode DC values and do hadamard
1401 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1402 nnz_pred, s->qmat[segment].luma_dc_qmul,
1403 ff_zigzag_scan, is_vp7);
1404 l_nnz[8] = t_nnz[8] = !!nnz;
1405
1406 if (is_vp7 && mb->mode > MODE_I4x4) {
1407 nnz |= inter_predict_dc(td->block_dc,
1408 s->inter_dc_pred[mb->ref_frame - 1]);
1409 }
1410
1411 if (nnz) {
1412 nnz_total += nnz;
1413 block_dc = 1;
1414 if (nnz == 1)
1415 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1416 else
1417 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1418 }
1419 luma_start = 1;
1420 luma_ctx = 0;
1421 }
1422
1423 // luma blocks
1424 for (y = 0; y < 4; y++)
1425 for (x = 0; x < 4; x++) {
1426 nnz_pred = l_nnz[y] + t_nnz[x];
1427 nnz = decode_block_coeffs(c, td->block[y][x],
1428 s->prob->token[luma_ctx],
1429 luma_start, nnz_pred,
1430 s->qmat[segment].luma_qmul,
1431 s->prob[0].scan, is_vp7);
1432 /* nnz+block_dc may be one more than the actual last index,
1433 * but we don't care */
1434 td->non_zero_count_cache[y][x] = nnz + block_dc;
1435 t_nnz[x] = l_nnz[y] = !!nnz;
1436 nnz_total += nnz;
1437 }
1438
1439 // chroma blocks
1440 // TODO: what to do about dimensions? 2nd dim for luma is x,
1441 // but for chroma it's (y<<1)|x
1442 for (i = 4; i < 6; i++)
1443 for (y = 0; y < 2; y++)
1444 for (x = 0; x < 2; x++) {
1445 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1446 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1447 s->prob->token[2], 0, nnz_pred,
1448 s->qmat[segment].chroma_qmul,
1449 s->prob[0].scan, is_vp7);
1450 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1451 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1452 nnz_total += nnz;
1453 }
1454
1455 // if there were no coded coeffs despite the macroblock not being marked skip,
1456 // we MUST not do the inner loop filter and should not do IDCT
1457 // Since skip isn't used for bitstream prediction, just manually set it.
1458 if (!nnz_total)
1459 mb->skip = 1;
1460 }
1461
1462 static av_always_inline
1463 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1464 uint8_t *src_cb, uint8_t *src_cr,
1465 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1466 {
1467 AV_COPY128(top_border, src_y + 15 * linesize);
1468 if (!simple) {
1469 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1470 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1471 }
1472 }
1473
1474 static av_always_inline
1475 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1476 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1477 int mb_y, int mb_width, int simple, int xchg)
1478 {
1479 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1480 src_y -= linesize;
1481 src_cb -= uvlinesize;
1482 src_cr -= uvlinesize;
1483
1484 #define XCHG(a, b, xchg) \
1485 do { \
1486 if (xchg) \
1487 AV_SWAP64(b, a); \
1488 else \
1489 AV_COPY64(b, a); \
1490 } while (0)
1491
1492 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1493 XCHG(top_border, src_y, xchg);
1494 XCHG(top_border + 8, src_y + 8, 1);
1495 if (mb_x < mb_width - 1)
1496 XCHG(top_border + 32, src_y + 16, 1);
1497
1498 // only copy chroma for normal loop filter
1499 // or to initialize the top row to 127
1500 if (!simple || !mb_y) {
1501 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1502 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1503 XCHG(top_border + 16, src_cb, 1);
1504 XCHG(top_border + 24, src_cr, 1);
1505 }
1506 }
1507
1508 static av_always_inline
1509 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1510 {
1511 if (!mb_x)
1512 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1513 else
1514 return mb_y ? mode : LEFT_DC_PRED8x8;
1515 }
1516
1517 static av_always_inline
1518 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1519 {
1520 if (!mb_x)
1521 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1522 else
1523 return mb_y ? mode : HOR_PRED8x8;
1524 }
1525
1526 static av_always_inline
1527 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1528 {
1529 switch (mode) {
1530 case DC_PRED8x8:
1531 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1532 case VERT_PRED8x8:
1533 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1534 case HOR_PRED8x8:
1535 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1536 case PLANE_PRED8x8: /* TM */
1537 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1538 }
1539 return mode;
1540 }
1541
1542 static av_always_inline
1543 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1544 {
1545 if (!mb_x) {
1546 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1547 } else {
1548 return mb_y ? mode : HOR_VP8_PRED;
1549 }
1550 }
1551
1552 static av_always_inline
1553 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1554 int *copy_buf, int vp7)
1555 {
1556 switch (mode) {
1557 case VERT_PRED:
1558 if (!mb_x && mb_y) {
1559 *copy_buf = 1;
1560 return mode;
1561 }
1562 /* fall-through */
1563 case DIAG_DOWN_LEFT_PRED:
1564 case VERT_LEFT_PRED:
1565 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1566 case HOR_PRED:
1567 if (!mb_y) {
1568 *copy_buf = 1;
1569 return mode;
1570 }
1571 /* fall-through */
1572 case HOR_UP_PRED:
1573 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1574 case TM_VP8_PRED:
1575 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1576 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1577 * as 16x16/8x8 DC */
1578 case DIAG_DOWN_RIGHT_PRED:
1579 case VERT_RIGHT_PRED:
1580 case HOR_DOWN_PRED:
1581 if (!mb_y || !mb_x)
1582 *copy_buf = 1;
1583 return mode;
1584 }
1585 return mode;
1586 }
1587
1588 static av_always_inline
1589 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1590 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1591 {
1592 int x, y, mode, nnz;
1593 uint32_t tr;
1594
1595 /* for the first row, we need to run xchg_mb_border to init the top edge
1596 * to 127 otherwise, skip it if we aren't going to deblock */
1597 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1598 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1599 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1600 s->filter.simple, 1);
1601
1602 if (mb->mode < MODE_I4x4) {
1603 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1604 s->hpc.pred16x16[mode](dst[0], s->linesize);
1605 } else {
1606 uint8_t *ptr = dst[0];
1607 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1608 const uint8_t lo = is_vp7 ? 128 : 127;
1609 const uint8_t hi = is_vp7 ? 128 : 129;
1610 uint8_t tr_top[4] = { lo, lo, lo, lo };
1611
1612 // all blocks on the right edge of the macroblock use bottom edge
1613 // the top macroblock for their topright edge
1614 uint8_t *tr_right = ptr - s->linesize + 16;
1615
1616 // if we're on the right edge of the frame, said edge is extended
1617 // from the top macroblock
1618 if (mb_y && mb_x == s->mb_width - 1) {
1619 tr = tr_right[-1] * 0x01010101u;
1620 tr_right = (uint8_t *) &tr;
1621 }
1622
1623 if (mb->skip)
1624 AV_ZERO128(td->non_zero_count_cache);
1625
1626 for (y = 0; y < 4; y++) {
1627 uint8_t *topright = ptr + 4 - s->linesize;
1628 for (x = 0; x < 4; x++) {
1629 int copy = 0;
1630 ptrdiff_t linesize = s->linesize;
1631 uint8_t *dst = ptr + 4 * x;
1632 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1633
1634 if ((y == 0 || x == 3) && mb_y == 0) {
1635 topright = tr_top;
1636 } else if (x == 3)
1637 topright = tr_right;
1638
1639 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1640 mb_y + y, &copy, is_vp7);
1641 if (copy) {
1642 dst = copy_dst + 12;
1643 linesize = 8;
1644 if (!(mb_y + y)) {
1645 copy_dst[3] = lo;
1646 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1647 } else {
1648 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1649 if (!(mb_x + x)) {
1650 copy_dst[3] = hi;
1651 } else {
1652 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1653 }
1654 }
1655 if (!(mb_x + x)) {
1656 copy_dst[11] =
1657 copy_dst[19] =
1658 copy_dst[27] =
1659 copy_dst[35] = hi;
1660 } else {
1661 copy_dst[11] = ptr[4 * x - 1];
1662 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1663 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1664 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1665 }
1666 }
1667 s->hpc.pred4x4[mode](dst, topright, linesize);
1668 if (copy) {
1669 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1670 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1671 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1672 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1673 }
1674
1675 nnz = td->non_zero_count_cache[y][x];
1676 if (nnz) {
1677 if (nnz == 1)
1678 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1679 td->block[y][x], s->linesize);
1680 else
1681 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1682 td->block[y][x], s->linesize);
1683 }
1684 topright += 4;
1685 }
1686
1687 ptr += 4 * s->linesize;
1688 intra4x4 += 4;
1689 }
1690 }
1691
1692 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1693 mb_x, mb_y, is_vp7);
1694 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1695 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1696
1697 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1698 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1699 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1700 s->filter.simple, 0);
1701 }
1702
1703 static const uint8_t subpel_idx[3][8] = {
1704 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1705 // also function pointer index
1706 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1707 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1708 };
1709
1710 /**
1711 * luma MC function
1712 *
1713 * @param s VP8 decoding context
1714 * @param dst target buffer for block data at block position
1715 * @param ref reference picture buffer at origin (0, 0)
1716 * @param mv motion vector (relative to block position) to get pixel data from
1717 * @param x_off horizontal position of block from origin (0, 0)
1718 * @param y_off vertical position of block from origin (0, 0)
1719 * @param block_w width of block (16, 8 or 4)
1720 * @param block_h height of block (always same as block_w)
1721 * @param width width of src/dst plane data
1722 * @param height height of src/dst plane data
1723 * @param linesize size of a single line of plane data, including padding
1724 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1725 */
1726 static av_always_inline
1727 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1728 ThreadFrame *ref, const VP56mv *mv,
1729 int x_off, int y_off, int block_w, int block_h,
1730 int width, int height, ptrdiff_t linesize,
1731 vp8_mc_func mc_func[3][3])
1732 {
1733 uint8_t *src = ref->f->data[0];
1734
1735 if (AV_RN32A(mv)) {
1736 ptrdiff_t src_linesize = linesize;
1737
1738 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1739 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1740
1741 x_off += mv->x >> 2;
1742 y_off += mv->y >> 2;
1743
1744 // edge emulation
1745 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1746 src += y_off * linesize + x_off;
1747 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1748 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1749 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1750 src - my_idx * linesize - mx_idx,
1751 EDGE_EMU_LINESIZE, linesize,
1752 block_w + subpel_idx[1][mx],
1753 block_h + subpel_idx[1][my],
1754 x_off - mx_idx, y_off - my_idx,
1755 width, height);
1756 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1757 src_linesize = EDGE_EMU_LINESIZE;
1758 }
1759 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1760 } else {
1761 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1762 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1763 linesize, block_h, 0, 0);
1764 }
1765 }
1766
1767 /**
1768 * chroma MC function
1769 *
1770 * @param s VP8 decoding context
1771 * @param dst1 target buffer for block data at block position (U plane)
1772 * @param dst2 target buffer for block data at block position (V plane)
1773 * @param ref reference picture buffer at origin (0, 0)
1774 * @param mv motion vector (relative to block position) to get pixel data from
1775 * @param x_off horizontal position of block from origin (0, 0)
1776 * @param y_off vertical position of block from origin (0, 0)
1777 * @param block_w width of block (16, 8 or 4)
1778 * @param block_h height of block (always same as block_w)
1779 * @param width width of src/dst plane data
1780 * @param height height of src/dst plane data
1781 * @param linesize size of a single line of plane data, including padding
1782 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1783 */
1784 static av_always_inline
1785 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1786 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1787 int x_off, int y_off, int block_w, int block_h,
1788 int width, int height, ptrdiff_t linesize,
1789 vp8_mc_func mc_func[3][3])
1790 {
1791 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1792
1793 if (AV_RN32A(mv)) {
1794 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1795 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1796
1797 x_off += mv->x >> 3;
1798 y_off += mv->y >> 3;
1799
1800 // edge emulation
1801 src1 += y_off * linesize + x_off;
1802 src2 += y_off * linesize + x_off;
1803 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1804 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1805 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1806 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1807 src1 - my_idx * linesize - mx_idx,
1808 EDGE_EMU_LINESIZE, linesize,
1809 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1810 x_off - mx_idx, y_off - my_idx, width, height);
1811 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1812 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1813
1814 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1815 src2 - my_idx * linesize - mx_idx,
1816 EDGE_EMU_LINESIZE, linesize,
1817 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1818 x_off - mx_idx, y_off - my_idx, width, height);
1819 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1820 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1821 } else {
1822 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1823 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1824 }
1825 } else {
1826 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1827 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1828 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1829 }
1830 }
1831
1832 static av_always_inline
1833 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1834 ThreadFrame *ref_frame, int x_off, int y_off,
1835 int bx_off, int by_off, int block_w, int block_h,
1836 int width, int height, VP56mv *mv)
1837 {
1838 VP56mv uvmv = *mv;
1839
1840 /* Y */
1841 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1842 ref_frame, mv, x_off + bx_off, y_off + by_off,
1843 block_w, block_h, width, height, s->linesize,
1844 s->put_pixels_tab[block_w == 8]);
1845
1846 /* U/V */
1847 if (s->profile == 3) {
1848 /* this block only applies VP8; it is safe to check
1849 * only the profile, as VP7 profile <= 1 */
1850 uvmv.x &= ~7;
1851 uvmv.y &= ~7;
1852 }
1853 x_off >>= 1;
1854 y_off >>= 1;
1855 bx_off >>= 1;
1856 by_off >>= 1;
1857 width >>= 1;
1858 height >>= 1;
1859 block_w >>= 1;
1860 block_h >>= 1;
1861 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1862 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1863 &uvmv, x_off + bx_off, y_off + by_off,
1864 block_w, block_h, width, height, s->uvlinesize,
1865 s->put_pixels_tab[1 + (block_w == 4)]);
1866 }
1867
1868 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1869 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1870 static av_always_inline
1871 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1872 int mb_xy, int ref)
1873 {
1874 /* Don't prefetch refs that haven't been used very often this frame. */
1875 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1876 int x_off = mb_x << 4, y_off = mb_y << 4;
1877 int mx = (mb->mv.x >> 2) + x_off + 8;
1878 int my = (mb->mv.y >> 2) + y_off;
1879 uint8_t **src = s->framep[ref]->tf.f->data;
1880 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1881 /* For threading, a ff_thread_await_progress here might be useful, but
1882 * it actually slows down the decoder. Since a bad prefetch doesn't
1883 * generate bad decoder output, we don't run it here. */
1884 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1885 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1886 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1887 }
1888 }
1889
1890 /**
1891 * Apply motion vectors to prediction buffer, chapter 18.
1892 */
1893 static av_always_inline
1894 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1895 VP8Macroblock *mb, int mb_x, int mb_y)
1896 {
1897 int x_off = mb_x << 4, y_off = mb_y << 4;
1898 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1899 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1900 VP56mv *bmv = mb->bmv;
1901
1902 switch (mb->partitioning) {
1903 case VP8_SPLITMVMODE_NONE:
1904 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1905 0, 0, 16, 16, width, height, &mb->mv);
1906 break;
1907 case VP8_SPLITMVMODE_4x4: {
1908 int x, y;
1909 VP56mv uvmv;
1910
1911 /* Y */
1912 for (y = 0; y < 4; y++) {
1913 for (x = 0; x < 4; x++) {
1914 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1915 ref, &bmv[4 * y + x],
1916 4 * x + x_off, 4 * y + y_off, 4, 4,
1917 width, height, s->linesize,
1918 s->put_pixels_tab[2]);
1919 }
1920 }
1921
1922 /* U/V */
1923 x_off >>= 1;
1924 y_off >>= 1;
1925 width >>= 1;
1926 height >>= 1;
1927 for (y = 0; y < 2; y++) {
1928 for (x = 0; x < 2; x++) {
1929 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1930 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1931 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1932 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1933 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1934 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1935 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1936 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1937 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1938 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1939 if (s->profile == 3) {
1940 uvmv.x &= ~7;
1941 uvmv.y &= ~7;
1942 }
1943 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1944 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1945 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1946 width, height, s->uvlinesize,
1947 s->put_pixels_tab[2]);
1948 }
1949 }
1950 break;
1951 }
1952 case VP8_SPLITMVMODE_16x8:
1953 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1954 0, 0, 16, 8, width, height, &bmv[0]);
1955 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1956 0, 8, 16, 8, width, height, &bmv[1]);
1957 break;
1958 case VP8_SPLITMVMODE_8x16:
1959 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1960 0, 0, 8, 16, width, height, &bmv[0]);
1961 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1962 8, 0, 8, 16, width, height, &bmv[1]);
1963 break;
1964 case VP8_SPLITMVMODE_8x8:
1965 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1966 0, 0, 8, 8, width, height, &bmv[0]);
1967 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1968 8, 0, 8, 8, width, height, &bmv[1]);
1969 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1970 0, 8, 8, 8, width, height, &bmv[2]);
1971 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1972 8, 8, 8, 8, width, height, &bmv[3]);
1973 break;
1974 }
1975 }
1976
1977 static av_always_inline
1978 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1979 {
1980 int x, y, ch;
1981
1982 if (mb->mode != MODE_I4x4) {
1983 uint8_t *y_dst = dst[0];
1984 for (y = 0; y < 4; y++) {
1985 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1986 if (nnz4) {
1987 if (nnz4 & ~0x01010101) {
1988 for (x = 0; x < 4; x++) {
1989 if ((uint8_t) nnz4 == 1)
1990 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1991 td->block[y][x],
1992 s->linesize);
1993 else if ((uint8_t) nnz4 > 1)
1994 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1995 td->block[y][x],
1996 s->linesize);
1997 nnz4 >>= 8;
1998 if (!nnz4)
1999 break;
2000 }
2001 } else {
2002 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2003 }
2004 }
2005 y_dst += 4 * s->linesize;
2006 }
2007 }
2008
2009 for (ch = 0; ch < 2; ch++) {
2010 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2011 if (nnz4) {
2012 uint8_t *ch_dst = dst[1 + ch];
2013 if (nnz4 & ~0x01010101) {
2014 for (y = 0; y < 2; y++) {
2015 for (x = 0; x < 2; x++) {
2016 if ((uint8_t) nnz4 == 1)
2017 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2018 td->block[4 + ch][(y << 1) + x],
2019 s->uvlinesize);
2020 else if ((uint8_t) nnz4 > 1)
2021 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2022 td->block[4 + ch][(y << 1) + x],
2023 s->uvlinesize);
2024 nnz4 >>= 8;
2025 if (!nnz4)
2026 goto chroma_idct_end;
2027 }
2028 ch_dst += 4 * s->uvlinesize;
2029 }
2030 } else {
2031 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2032 }
2033 }
2034 chroma_idct_end:
2035 ;
2036 }
2037 }
2038
2039 static av_always_inline
2040 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2041 VP8FilterStrength *f, int is_vp7)
2042 {
2043 int interior_limit, filter_level;
2044
2045 if (s->segmentation.enabled) {
2046 filter_level = s->segmentation.filter_level[mb->segment];
2047 if (!s->segmentation.absolute_vals)
2048 filter_level += s->filter.level;
2049 } else
2050 filter_level = s->filter.level;
2051
2052 if (s->lf_delta.enabled) {
2053 filter_level += s->lf_delta.ref[mb->ref_frame];
2054 filter_level += s->lf_delta.mode[mb->mode];
2055 }
2056
2057 filter_level = av_clip_uintp2(filter_level, 6);
2058
2059 interior_limit = filter_level;
2060 if (s->filter.sharpness) {
2061 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2062 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2063 }
2064 interior_limit = FFMAX(interior_limit, 1);
2065
2066 f->filter_level = filter_level;
2067 f->inner_limit = interior_limit;
2068 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2069 mb->mode == VP8_MVMODE_SPLIT;
2070 }
2071
2072 static av_always_inline
2073 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2074 int mb_x, int mb_y, int is_vp7)
2075 {
2076 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2077 int filter_level = f->filter_level;
2078 int inner_limit = f->inner_limit;
2079 int inner_filter = f->inner_filter;
2080 ptrdiff_t linesize = s->linesize;
2081 ptrdiff_t uvlinesize = s->uvlinesize;
2082 static const uint8_t hev_thresh_lut[2][64] = {
2083 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2084 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2085 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2086 3, 3, 3, 3 },
2087 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2088 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2089 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2090 2, 2, 2, 2 }
2091 };
2092
2093 if (!filter_level)
2094 return;
2095
2096 if (is_vp7) {
2097 bedge_lim_y = filter_level;
2098 bedge_lim_uv = filter_level * 2;
2099 mbedge_lim = filter_level + 2;
2100 } else {
2101 bedge_lim_y =
2102 bedge_lim_uv = filter_level * 2 + inner_limit;
2103 mbedge_lim = bedge_lim_y + 4;
2104 }
2105
2106 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2107
2108 if (mb_x) {
2109 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2110 mbedge_lim, inner_limit, hev_thresh);
2111 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2112 mbedge_lim, inner_limit, hev_thresh);
2113 }
2114
2115 #define H_LOOP_FILTER_16Y_INNER(cond) \
2116 if (cond && inner_filter) { \
2117 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2118 bedge_lim_y, inner_limit, \
2119 hev_thresh); \
2120 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2121 bedge_lim_y, inner_limit, \
2122 hev_thresh); \
2123 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2124 bedge_lim_y, inner_limit, \
2125 hev_thresh); \
2126 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2127 uvlinesize, bedge_lim_uv, \
2128 inner_limit, hev_thresh); \
2129 }
2130
2131 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2132
2133 if (mb_y) {
2134 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2135 mbedge_lim, inner_limit, hev_thresh);
2136 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2137 mbedge_lim, inner_limit, hev_thresh);
2138 }
2139
2140 if (inner_filter) {
2141 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2142 linesize, bedge_lim_y,
2143 inner_limit, hev_thresh);
2144 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2145 linesize, bedge_lim_y,
2146 inner_limit, hev_thresh);
2147 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2148 linesize, bedge_lim_y,
2149 inner_limit, hev_thresh);
2150 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2151 dst[2] + 4 * uvlinesize,
2152 uvlinesize, bedge_lim_uv,
2153 inner_limit, hev_thresh);
2154 }
2155
2156 H_LOOP_FILTER_16Y_INNER(is_vp7)
2157 }
2158
2159 static av_always_inline
2160 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2161 int mb_x, int mb_y)
2162 {
2163 int mbedge_lim, bedge_lim;
2164 int filter_level = f->filter_level;
2165 int inner_limit = f->inner_limit;
2166 int inner_filter = f->inner_filter;
2167 ptrdiff_t linesize = s->linesize;
2168
2169 if (!filter_level)
2170 return;
2171
2172 bedge_lim = 2 * filter_level + inner_limit;
2173 mbedge_lim = bedge_lim + 4;
2174
2175 if (mb_x)
2176 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2177 if (inner_filter) {
2178 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2179 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2180 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2181 }
2182
2183 if (mb_y)
2184 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2185 if (inner_filter) {
2186 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2187 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2188 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2189 }
2190 }
2191
2192 #define MARGIN (16 << 2)
2193 static av_always_inline
2194 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2195 VP8Frame *prev_frame, int is_vp7)
2196 {
2197 VP8Context *s = avctx->priv_data;
2198 int mb_x, mb_y;
2199
2200 s->mv_min.y = -MARGIN;
2201 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2202 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2203 VP8Macroblock *mb = s->macroblocks_base +
2204 ((s->mb_width + 1) * (mb_y + 1) + 1);
2205 int mb_xy = mb_y * s->mb_width;
2206
2207 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2208
2209 s->mv_min.x = -MARGIN;
2210 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2211 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2212 if (mb_y == 0)
2213 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2214 DC_PRED * 0x01010101);
2215 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2216 prev_frame && prev_frame->seg_map ?
2217 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2218 s->mv_min.x -= 64;
2219 s->mv_max.x -= 64;
2220 }
2221 s->mv_min.y -= 64;
2222 s->mv_max.y -= 64;
2223 }
2224 }
2225
2226 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2227 VP8Frame *prev_frame)
2228 {
2229 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2230 }
2231
2232 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2233 VP8Frame *prev_frame)
2234 {
2235 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2236 }
2237
2238 #if HAVE_THREADS
2239 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2240 do { \
2241 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2242 if (otd->thread_mb_pos < tmp) { \
2243 pthread_mutex_lock(&otd->lock); \
2244 td->wait_mb_pos = tmp; \
2245 do { \
2246 if (otd->thread_mb_pos >= tmp) \
2247 break; \
2248 pthread_cond_wait(&otd->cond, &otd->lock); \
2249 } while (1); \
2250 td->wait_mb_pos = INT_MAX; \
2251 pthread_mutex_unlock(&otd->lock); \
2252 } \
2253 } while (0);
2254
2255 #define update_pos(td, mb_y, mb_x) \
2256 do { \
2257 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2258 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2259 (num_jobs > 1); \
2260 int is_null = !next_td || !prev_td; \
2261 int pos_check = (is_null) ? 1 \
2262 : (next_td != td && \
2263 pos >= next_td->wait_mb_pos) || \
2264 (prev_td != td && \
2265 pos >= prev_td->wait_mb_pos); \
2266 td->thread_mb_pos = pos; \
2267 if (sliced_threading && pos_check) { \
2268 pthread_mutex_lock(&td->lock); \
2269 pthread_cond_broadcast(&td->cond); \
2270 pthread_mutex_unlock(&td->lock); \
2271 } \
2272 } while (0);
2273 #else
2274 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2275 #define update_pos(td, mb_y, mb_x)
2276 #endif
2277
2278 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2279 int jobnr, int threadnr, int is_vp7)
2280 {
2281 VP8Context *s = avctx->priv_data;
2282 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2283 int mb_y = td->thread_mb_pos >> 16;
2284 int mb_x, mb_xy = mb_y * s->mb_width;
2285 int num_jobs = s->num_jobs;
2286 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2287 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2288 VP8Macroblock *mb;
2289 uint8_t *dst[3] = {
2290 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2291 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2292 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2293 };
2294 if (mb_y == 0)
2295 prev_td = td;
2296 else
2297 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2298 if (mb_y == s->mb_height - 1)
2299 next_td = td;
2300 else
2301 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2302 if (s->mb_layout == 1)
2303 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2304 else {
2305 // Make sure the previous frame has read its segmentation map,
2306 // if we re-use the same map.
2307 if (prev_frame && s->segmentation.enabled &&
2308 !s->segmentation.update_map)
2309 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2310 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2311 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2312 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2313 }
2314
2315 if (!is_vp7 || mb_y == 0)
2316 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2317
2318 s->mv_min.x = -MARGIN;
2319 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2320
2321 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2322 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2323 if (prev_td != td) {
2324 if (threadnr != 0) {
2325 check_thread_pos(td, prev_td,
2326 mb_x + (is_vp7 ? 2 : 1),
2327 mb_y - (is_vp7 ? 2 : 1));
2328 } else {
2329 check_thread_pos(td, prev_td,
2330 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2331 mb_y - (is_vp7 ? 2 : 1));
2332 }
2333 }
2334
2335 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2336 s->linesize, 4);
2337 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2338 dst[2] - dst[1], 2);
2339
2340 if (!s->mb_layout)
2341 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2342 prev_frame && prev_frame->seg_map ?
2343 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2344
2345 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2346
2347 if (!mb->skip)
2348 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2349
2350 if (mb->mode <= MODE_I4x4)
2351 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2352 else
2353 inter_predict(s, td, dst, mb, mb_x, mb_y);
2354
2355 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2356
2357 if (!mb->skip) {
2358 idct_mb(s, td, dst, mb);
2359 } else {
2360 AV_ZERO64(td->left_nnz);
2361 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2362
2363 /* Reset DC block predictors if they would exist
2364 * if the mb had coefficients */
2365 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2366 td->left_nnz[8] = 0;
2367 s->top_nnz[mb_x][8] = 0;
2368 }
2369 }
2370
2371 if (s->deblock_filter)
2372 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2373
2374 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2375 if (s->filter.simple)
2376 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2377 NULL, NULL, s->linesize, 0, 1);
2378 else
2379 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2380 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2381 }
2382
2383 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2384
2385 dst[0] += 16;
2386 dst[1] += 8;
2387 dst[2] += 8;
2388 s->mv_min.x -= 64;
2389 s->mv_max.x -= 64;
2390
2391 if (mb_x == s->mb_width + 1) {
2392 update_pos(td, mb_y, s->mb_width + 3);
2393 } else {
2394 update_pos(td, mb_y, mb_x);
2395 }
2396 }
2397 }
2398
2399 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2400 int jobnr, int threadnr, int is_vp7)
2401 {
2402 VP8Context *s = avctx->priv_data;
2403 VP8ThreadData *td = &s->thread_data[threadnr];
2404 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2405 AVFrame *curframe = s->curframe->tf.f;
2406 VP8Macroblock *mb;
2407 VP8ThreadData *prev_td, *next_td;
2408 uint8_t *dst[3] = {
2409 curframe->data[0] + 16 * mb_y * s->linesize,
2410 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2411 curframe->data[2] + 8 * mb_y * s->uvlinesize
2412 };
2413
2414 if (s->mb_layout == 1)
2415 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2416 else
2417 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2418
2419 if (mb_y == 0)
2420 prev_td = td;
2421 else
2422 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2423 if (mb_y == s->mb_height - 1)
2424 next_td = td;
2425 else
2426 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2427
2428 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2429 VP8FilterStrength *f = &td->filter_strength[mb_x];
2430 if (prev_td != td)
2431 check_thread_pos(td, prev_td,
2432 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2433 if (next_td != td)
2434 if (next_td != &s->thread_data[0])
2435 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2436
2437 if (num_jobs == 1) {
2438 if (s->filter.simple)
2439 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2440 NULL, NULL, s->linesize, 0, 1);
2441 else
2442 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2443 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2444 }
2445
2446 if (s->filter.simple)
2447 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2448 else
2449 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2450 dst[0] += 16;
2451 dst[1] += 8;
2452 dst[2] += 8;
2453
2454 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2455 }
2456 }
2457
2458 static av_always_inline
2459 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2460 int threadnr, int is_vp7)
2461 {
2462 VP8Context *s = avctx->priv_data;
2463 VP8ThreadData *td = &s->thread_data[jobnr];
2464 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2465 VP8Frame *curframe = s->curframe;
2466 int mb_y, num_jobs = s->num_jobs;
2467
2468 td->thread_nr = threadnr;
2469 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2470 if (mb_y >= s->mb_height)
2471 break;
2472 td->thread_mb_pos = mb_y << 16;
2473 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, is_vp7);
2474 if (s->deblock_filter)
2475 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr, is_vp7);
2476 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2477
2478 s->mv_min.y -= 64;
2479 s->mv_max.y -= 64;
2480
2481 if (avctx->active_thread_type == FF_THREAD_FRAME)
2482 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2483 }
2484
2485 return 0;
2486 }
2487
2488 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2489 int jobnr, int threadnr)
2490 {
2491 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2492 }
2493
2494 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2495 int jobnr, int threadnr)
2496 {
2497 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2498 }
2499
2500 static av_always_inline
2501 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2502 AVPacket *avpkt, int is_vp7)
2503 {
2504 VP8Context *s = avctx->priv_data;
2505 int ret, i, referenced, num_jobs;
2506 enum AVDiscard skip_thresh;
2507 VP8Frame *av_uninit(curframe), *prev_frame;
2508
2509 if (is_vp7)
2510 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2511 else
2512 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2513
2514 if (ret < 0)
2515 goto err;
2516
2517 if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2518 enum AVPixelFormat pix_fmts[] = {
2519 #if CONFIG_VP8_VAAPI_HWACCEL
2520 AV_PIX_FMT_VAAPI,
2521 #endif
2522 AV_PIX_FMT_YUV420P,
2523 AV_PIX_FMT_NONE,
2524 };
2525
2526 s->pix_fmt = ff_get_format(s->avctx, pix_fmts);
2527 if (s->pix_fmt < 0) {
2528 ret = AVERROR(EINVAL);
2529 goto err;
2530 }
2531 avctx->pix_fmt = s->pix_fmt;
2532 }
2533
2534 prev_frame = s->framep[VP56_FRAME_CURRENT];
2535
2536 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2537 s->update_altref == VP56_FRAME_CURRENT;
2538
2539 skip_thresh = !referenced ? AVDISCARD_NONREF
2540 : !s->keyframe ? AVDISCARD_NONKEY
2541 : AVDISCARD_ALL;
2542
2543 if (avctx->skip_frame >= skip_thresh) {
2544 s->invisible = 1;
2545 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2546 goto skip_decode;
2547 }
2548 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2549
2550 // release no longer referenced frames
2551 for (i = 0; i < 5; i++)
2552 if (s->frames[i].tf.f->data[0] &&
2553 &s->frames[i] != prev_frame &&
2554 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2555 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2556 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2557 vp8_release_frame(s, &s->frames[i]);
2558
2559 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2560
2561 if (!s->colorspace)
2562 avctx->colorspace = AVCOL_SPC_BT470BG;
2563 if (s->fullrange)
2564 avctx->color_range = AVCOL_RANGE_JPEG;
2565 else
2566 avctx->color_range = AVCOL_RANGE_MPEG;
2567
2568 /* Given that arithmetic probabilities are updated every frame, it's quite
2569 * likely that the values we have on a random interframe are complete
2570 * junk if we didn't start decode on a keyframe. So just don't display
2571 * anything rather than junk. */
2572 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2573 !s->framep[VP56_FRAME_GOLDEN] ||
2574 !s->framep[VP56_FRAME_GOLDEN2])) {
2575 av_log(avctx, AV_LOG_WARNING,
2576 "Discarding interframe without a prior keyframe!\n");
2577 ret = AVERROR_INVALIDDATA;
2578 goto err;
2579 }
2580
2581 curframe->tf.f->key_frame = s->keyframe;
2582 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2583 : AV_PICTURE_TYPE_P;
2584 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
2585 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
2586 goto err;
2587 }
2588
2589 // check if golden and altref are swapped
2590 if (s->update_altref != VP56_FRAME_NONE)
2591 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2592 else
2593 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2594
2595 if (s->update_golden != VP56_FRAME_NONE)
2596 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2597 else
2598 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2599
2600 if (s->update_last)
2601 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2602 else
2603 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2604
2605 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2606
2607 ff_thread_finish_setup(avctx);
2608
2609 if (avctx->hwaccel) {
2610 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2611 if (ret < 0)
2612 goto err;
2613
2614 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2615 if (ret < 0)
2616 goto err;
2617
2618 ret = avctx->hwaccel->end_frame(avctx);
2619 if (ret < 0)
2620 goto err;
2621
2622 } else {
2623 s->linesize = curframe->tf.f->linesize[0];
2624 s->uvlinesize = curframe->tf.f->linesize[1];
2625
2626 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2627 /* Zero macroblock structures for top/top-left prediction
2628 * from outside the frame. */
2629 if (!s->mb_layout)
2630 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2631 (s->mb_width + 1) * sizeof(*s->macroblocks));
2632 if (!s->mb_layout && s->keyframe)
2633 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2634
2635 memset(s->ref_count, 0, sizeof(s->ref_count));
2636
2637 if (s->mb_layout == 1) {
2638 // Make sure the previous frame has read its segmentation map,
2639 // if we re-use the same map.
2640 if (prev_frame && s->segmentation.enabled &&
2641 !s->segmentation.update_map)
2642 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2643 if (is_vp7)
2644 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2645 else
2646 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2647 }
2648
2649 if (avctx->active_thread_type == FF_THREAD_FRAME)
2650 num_jobs = 1;
2651 else
2652 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2653 s->num_jobs = num_jobs;
2654 s->curframe = curframe;
2655 s->prev_frame = prev_frame;
2656 s->mv_min.y = -MARGIN;
2657 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2658 for (i = 0; i < MAX_THREADS; i++) {
2659 s->thread_data[i].thread_mb_pos = 0;
2660 s->thread_data[i].wait_mb_pos = INT_MAX;
2661 }
2662
2663 if (is_vp7)
2664 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2665 num_jobs);
2666 else
2667 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2668 num_jobs);
2669 }
2670
2671 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2672 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2673
2674 skip_decode:
2675 // if future frames don't use the updated probabilities,
2676 // reset them to the values we saved
2677 if (!s->update_probabilities)
2678 s->prob[0] = s->prob[1];
2679
2680 if (!s->invisible) {
2681 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2682 return ret;
2683 *got_frame = 1;
2684 }
2685
2686 return avpkt->size;
2687 err:
2688 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2689 return ret;
2690 }
2691
2692 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2693 AVPacket *avpkt)
2694 {
2695 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2696 }
2697
2698 #if CONFIG_VP7_DECODER
2699 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2700 AVPacket *avpkt)
2701 {
2702 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2703 }
2704 #endif /* CONFIG_VP7_DECODER */
2705
2706 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2707 {
2708 VP8Context *s = avctx->priv_data;
2709 int i;
2710
2711 vp8_decode_flush_impl(avctx, 1);
2712 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2713 av_frame_free(&s->frames[i].tf.f);
2714
2715 return 0;
2716 }
2717
2718 static av_cold int vp8_init_frames(VP8Context *s)
2719 {
2720 int i;
2721 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2722 s->frames[i].tf.f = av_frame_alloc();
2723 if (!s->frames[i].tf.f)
2724 return AVERROR(ENOMEM);
2725 }
2726 return 0;
2727 }
2728
2729 static av_always_inline
2730 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2731 {
2732 VP8Context *s = avctx->priv_data;
2733 int ret;
2734
2735 s->avctx = avctx;
2736 s->pix_fmt = AV_PIX_FMT_NONE;
2737 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2738 avctx->internal->allocate_progress = 1;
2739
2740 ff_videodsp_init(&s->vdsp, 8);
2741
2742 ff_vp78dsp_init(&s->vp8dsp);
2743 if (CONFIG_VP7_DECODER && is_vp7) {
2744 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2745 ff_vp7dsp_init(&s->vp8dsp);
2746 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2747 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2748 ff_vp8dsp_init(&s->vp8dsp);
2749 }
2750
2751 /* does not change for VP8 */
2752 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2753
2754 if ((ret = vp8_init_frames(s)) < 0) {
2755 ff_vp8_decode_free(avctx);
2756 return ret;
2757 }
2758
2759 return 0;
2760 }
2761
2762 #if CONFIG_VP7_DECODER
2763 static int vp7_decode_init(AVCodecContext *avctx)
2764 {
2765 return vp78_decode_init(avctx, IS_VP7);
2766 }
2767 #endif /* CONFIG_VP7_DECODER */
2768
2769 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2770 {
2771 return vp78_decode_init(avctx, IS_VP8);
2772 }
2773
2774 #if CONFIG_VP8_DECODER
2775 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2776 {
2777 VP8Context *s = avctx->priv_data;
2778 int ret;
2779
2780 s->avctx = avctx;
2781
2782 if ((ret = vp8_init_frames(s)) < 0) {
2783 ff_vp8_decode_free(avctx);
2784 return ret;
2785 }
2786
2787 return 0;
2788 }
2789
2790 #define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2791
2792 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2793 const AVCodecContext *src)
2794 {
2795 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2796 int i;
2797
2798 if (s->macroblocks_base &&
2799 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2800 free_buffers(s);
2801 s->mb_width = s_src->mb_width;
2802 s->mb_height = s_src->mb_height;
2803 }
2804
2805 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2806 s->segmentation = s_src->segmentation;
2807 s->lf_delta = s_src->lf_delta;
2808 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2809
2810 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2811 if (s_src->frames[i].tf.f->data[0]) {
2812 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2813 if (ret < 0)
2814 return ret;
2815 }
2816 }
2817
2818 s->framep[0] = REBASE(s_src->next_framep[0]);
2819 s->framep[1] = REBASE(s_src->next_framep[1]);
2820 s->framep[2] = REBASE(s_src->next_framep[2]);
2821 s->framep[3] = REBASE(s_src->next_framep[3]);
2822
2823 return 0;
2824 }
2825 #endif /* CONFIG_VP8_DECODER */
2826
2827 #if CONFIG_VP7_DECODER
2828 AVCodec ff_vp7_decoder = {
2829 .name = "vp7",
2830 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2831 .type = AVMEDIA_TYPE_VIDEO,
2832 .id = AV_CODEC_ID_VP7,
2833 .priv_data_size = sizeof(VP8Context),
2834 .init = vp7_decode_init,
2835 .close = ff_vp8_decode_free,
2836 .decode = vp7_decode_frame,
2837 .capabilities = AV_CODEC_CAP_DR1,
2838 .flush = vp8_decode_flush,
2839 };
2840 #endif /* CONFIG_VP7_DECODER */
2841
2842 #if CONFIG_VP8_DECODER
2843 AVCodec ff_vp8_decoder = {
2844 .name = "vp8",
2845 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2846 .type = AVMEDIA_TYPE_VIDEO,
2847 .id = AV_CODEC_ID_VP8,
2848 .priv_data_size = sizeof(VP8Context),
2849 .init = ff_vp8_decode_init,
2850 .close = ff_vp8_decode_free,
2851 .decode = ff_vp8_decode_frame,
2852 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2853 AV_CODEC_CAP_SLICE_THREADS,
2854 .flush = vp8_decode_flush,
2855 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2856 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2857 };
2858 #endif /* CONFIG_VP7_DECODER */