2660395964a0ae54e3c2a606210e19d3b132d26a
[libav.git] / libavcodec / vp8.c
1 /*
2 * VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
8 *
9 * This file is part of Libav.
10 *
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26 #include "libavutil/imgutils.h"
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "vp8.h"
30 #include "vp8data.h"
31 #include "rectangle.h"
32 #include "thread.h"
33
34 #if ARCH_ARM
35 # include "arm/vp8.h"
36 #endif
37
38 static void free_buffers(VP8Context *s)
39 {
40 int i;
41 if (s->thread_data)
42 for (i = 0; i < MAX_THREADS; i++) {
43 #if HAVE_THREADS
44 pthread_cond_destroy(&s->thread_data[i].cond);
45 pthread_mutex_destroy(&s->thread_data[i].lock);
46 #endif
47 av_freep(&s->thread_data[i].filter_strength);
48 av_freep(&s->thread_data[i].edge_emu_buffer);
49 }
50 av_freep(&s->thread_data);
51 av_freep(&s->macroblocks_base);
52 av_freep(&s->intra4x4_pred_mode_top);
53 av_freep(&s->top_nnz);
54 av_freep(&s->top_border);
55
56 s->macroblocks = NULL;
57 }
58
59 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
60 {
61 int ret;
62 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
63 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
64 return ret;
65 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
66 ff_thread_release_buffer(s->avctx, &f->tf);
67 return AVERROR(ENOMEM);
68 }
69 return 0;
70 }
71
72 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
73 {
74 av_buffer_unref(&f->seg_map);
75 ff_thread_release_buffer(s->avctx, &f->tf);
76 }
77
78 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
79 {
80 int ret;
81
82 vp8_release_frame(s, dst);
83
84 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
85 return ret;
86 if (src->seg_map &&
87 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
88 vp8_release_frame(s, dst);
89 return AVERROR(ENOMEM);
90 }
91
92 return 0;
93 }
94
95
96 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
97 {
98 VP8Context *s = avctx->priv_data;
99 int i;
100
101 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
102 vp8_release_frame(s, &s->frames[i]);
103 memset(s->framep, 0, sizeof(s->framep));
104
105 if (free_mem)
106 free_buffers(s);
107 }
108
109 static void vp8_decode_flush(AVCodecContext *avctx)
110 {
111 vp8_decode_flush_impl(avctx, 0);
112 }
113
114 static int update_dimensions(VP8Context *s, int width, int height)
115 {
116 AVCodecContext *avctx = s->avctx;
117 int i, ret;
118
119 if (width != s->avctx->width ||
120 height != s->avctx->height) {
121 vp8_decode_flush_impl(s->avctx, 1);
122
123 ret = ff_set_dimensions(s->avctx, width, height);
124 if (ret < 0)
125 return ret;
126 }
127
128 s->mb_width = (s->avctx->coded_width +15) / 16;
129 s->mb_height = (s->avctx->coded_height+15) / 16;
130
131 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
132 if (!s->mb_layout) { // Frame threading and one thread
133 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
134 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
135 }
136 else // Sliced threading
137 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
138 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
139 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
140 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
141
142 for (i = 0; i < MAX_THREADS; i++) {
143 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
144 #if HAVE_THREADS
145 pthread_mutex_init(&s->thread_data[i].lock, NULL);
146 pthread_cond_init(&s->thread_data[i].cond, NULL);
147 #endif
148 }
149
150 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
151 (!s->intra4x4_pred_mode_top && !s->mb_layout))
152 return AVERROR(ENOMEM);
153
154 s->macroblocks = s->macroblocks_base + 1;
155
156 return 0;
157 }
158
159 static void parse_segment_info(VP8Context *s)
160 {
161 VP56RangeCoder *c = &s->c;
162 int i;
163
164 s->segmentation.update_map = vp8_rac_get(c);
165
166 if (vp8_rac_get(c)) { // update segment feature data
167 s->segmentation.absolute_vals = vp8_rac_get(c);
168
169 for (i = 0; i < 4; i++)
170 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
171
172 for (i = 0; i < 4; i++)
173 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
174 }
175 if (s->segmentation.update_map)
176 for (i = 0; i < 3; i++)
177 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
178 }
179
180 static void update_lf_deltas(VP8Context *s)
181 {
182 VP56RangeCoder *c = &s->c;
183 int i;
184
185 for (i = 0; i < 4; i++) {
186 if (vp8_rac_get(c)) {
187 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
188
189 if (vp8_rac_get(c))
190 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
191 }
192 }
193
194 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
195 if (vp8_rac_get(c)) {
196 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
197
198 if (vp8_rac_get(c))
199 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
200 }
201 }
202 }
203
204 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
205 {
206 const uint8_t *sizes = buf;
207 int i;
208
209 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
210
211 buf += 3*(s->num_coeff_partitions-1);
212 buf_size -= 3*(s->num_coeff_partitions-1);
213 if (buf_size < 0)
214 return -1;
215
216 for (i = 0; i < s->num_coeff_partitions-1; i++) {
217 int size = AV_RL24(sizes + 3*i);
218 if (buf_size - size < 0)
219 return -1;
220
221 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
222 buf += size;
223 buf_size -= size;
224 }
225 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
226
227 return 0;
228 }
229
230 static void get_quants(VP8Context *s)
231 {
232 VP56RangeCoder *c = &s->c;
233 int i, base_qi;
234
235 int yac_qi = vp8_rac_get_uint(c, 7);
236 int ydc_delta = vp8_rac_get_sint(c, 4);
237 int y2dc_delta = vp8_rac_get_sint(c, 4);
238 int y2ac_delta = vp8_rac_get_sint(c, 4);
239 int uvdc_delta = vp8_rac_get_sint(c, 4);
240 int uvac_delta = vp8_rac_get_sint(c, 4);
241
242 for (i = 0; i < 4; i++) {
243 if (s->segmentation.enabled) {
244 base_qi = s->segmentation.base_quant[i];
245 if (!s->segmentation.absolute_vals)
246 base_qi += yac_qi;
247 } else
248 base_qi = yac_qi;
249
250 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
251 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
252 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
253 /* 101581>>16 is equivalent to 155/100 */
254 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
255 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
256 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
257
258 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
259 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
260 }
261 }
262
263 /**
264 * Determine which buffers golden and altref should be updated with after this frame.
265 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
266 *
267 * Intra frames update all 3 references
268 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
269 * If the update (golden|altref) flag is set, it's updated with the current frame
270 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
271 * If the flag is not set, the number read means:
272 * 0: no update
273 * 1: VP56_FRAME_PREVIOUS
274 * 2: update golden with altref, or update altref with golden
275 */
276 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
277 {
278 VP56RangeCoder *c = &s->c;
279
280 if (update)
281 return VP56_FRAME_CURRENT;
282
283 switch (vp8_rac_get_uint(c, 2)) {
284 case 1:
285 return VP56_FRAME_PREVIOUS;
286 case 2:
287 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
288 }
289 return VP56_FRAME_NONE;
290 }
291
292 static void update_refs(VP8Context *s)
293 {
294 VP56RangeCoder *c = &s->c;
295
296 int update_golden = vp8_rac_get(c);
297 int update_altref = vp8_rac_get(c);
298
299 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
300 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
301 }
302
303 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
304 {
305 VP56RangeCoder *c = &s->c;
306 int header_size, hscale, vscale, i, j, k, l, m, ret;
307 int width = s->avctx->width;
308 int height = s->avctx->height;
309
310 s->keyframe = !(buf[0] & 1);
311 s->profile = (buf[0]>>1) & 7;
312 s->invisible = !(buf[0] & 0x10);
313 header_size = AV_RL24(buf) >> 5;
314 buf += 3;
315 buf_size -= 3;
316
317 if (s->profile > 3)
318 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
319
320 if (!s->profile)
321 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
322 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
323 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
324
325 if (header_size > buf_size - 7*s->keyframe) {
326 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
327 return AVERROR_INVALIDDATA;
328 }
329
330 if (s->keyframe) {
331 if (AV_RL24(buf) != 0x2a019d) {
332 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
333 return AVERROR_INVALIDDATA;
334 }
335 width = AV_RL16(buf+3) & 0x3fff;
336 height = AV_RL16(buf+5) & 0x3fff;
337 hscale = buf[4] >> 6;
338 vscale = buf[6] >> 6;
339 buf += 7;
340 buf_size -= 7;
341
342 if (hscale || vscale)
343 avpriv_request_sample(s->avctx, "Upscaling");
344
345 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
346 for (i = 0; i < 4; i++)
347 for (j = 0; j < 16; j++)
348 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
349 sizeof(s->prob->token[i][j]));
350 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
351 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
352 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
353 memset(&s->segmentation, 0, sizeof(s->segmentation));
354 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
355 }
356
357 ff_vp56_init_range_decoder(c, buf, header_size);
358 buf += header_size;
359 buf_size -= header_size;
360
361 if (s->keyframe) {
362 if (vp8_rac_get(c))
363 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
364 vp8_rac_get(c); // whether we can skip clamping in dsp functions
365 }
366
367 if ((s->segmentation.enabled = vp8_rac_get(c)))
368 parse_segment_info(s);
369 else
370 s->segmentation.update_map = 0; // FIXME: move this to some init function?
371
372 s->filter.simple = vp8_rac_get(c);
373 s->filter.level = vp8_rac_get_uint(c, 6);
374 s->filter.sharpness = vp8_rac_get_uint(c, 3);
375
376 if ((s->lf_delta.enabled = vp8_rac_get(c)))
377 if (vp8_rac_get(c))
378 update_lf_deltas(s);
379
380 if (setup_partitions(s, buf, buf_size)) {
381 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
382 return AVERROR_INVALIDDATA;
383 }
384
385 if (!s->macroblocks_base || /* first frame */
386 width != s->avctx->width || height != s->avctx->height) {
387 if ((ret = update_dimensions(s, width, height)) < 0)
388 return ret;
389 }
390
391 get_quants(s);
392
393 if (!s->keyframe) {
394 update_refs(s);
395 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
396 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
397 }
398
399 // if we aren't saving this frame's probabilities for future frames,
400 // make a copy of the current probabilities
401 if (!(s->update_probabilities = vp8_rac_get(c)))
402 s->prob[1] = s->prob[0];
403
404 s->update_last = s->keyframe || vp8_rac_get(c);
405
406 for (i = 0; i < 4; i++)
407 for (j = 0; j < 8; j++)
408 for (k = 0; k < 3; k++)
409 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
410 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
411 int prob = vp8_rac_get_uint(c, 8);
412 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
413 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
414 }
415
416 if ((s->mbskip_enabled = vp8_rac_get(c)))
417 s->prob->mbskip = vp8_rac_get_uint(c, 8);
418
419 if (!s->keyframe) {
420 s->prob->intra = vp8_rac_get_uint(c, 8);
421 s->prob->last = vp8_rac_get_uint(c, 8);
422 s->prob->golden = vp8_rac_get_uint(c, 8);
423
424 if (vp8_rac_get(c))
425 for (i = 0; i < 4; i++)
426 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
427 if (vp8_rac_get(c))
428 for (i = 0; i < 3; i++)
429 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
430
431 // 17.2 MV probability update
432 for (i = 0; i < 2; i++)
433 for (j = 0; j < 19; j++)
434 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
435 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
436 }
437
438 return 0;
439 }
440
441 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
442 {
443 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
444 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
445 }
446
447 /**
448 * Motion vector coding, 17.1.
449 */
450 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
451 {
452 int bit, x = 0;
453
454 if (vp56_rac_get_prob_branchy(c, p[0])) {
455 int i;
456
457 for (i = 0; i < 3; i++)
458 x += vp56_rac_get_prob(c, p[9 + i]) << i;
459 for (i = 9; i > 3; i--)
460 x += vp56_rac_get_prob(c, p[9 + i]) << i;
461 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
462 x += 8;
463 } else {
464 // small_mvtree
465 const uint8_t *ps = p+2;
466 bit = vp56_rac_get_prob(c, *ps);
467 ps += 1 + 3*bit;
468 x += 4*bit;
469 bit = vp56_rac_get_prob(c, *ps);
470 ps += 1 + bit;
471 x += 2*bit;
472 x += vp56_rac_get_prob(c, *ps);
473 }
474
475 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
476 }
477
478 static av_always_inline
479 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
480 {
481 if (left == top)
482 return vp8_submv_prob[4-!!left];
483 if (!top)
484 return vp8_submv_prob[2];
485 return vp8_submv_prob[1-!!left];
486 }
487
488 /**
489 * Split motion vector prediction, 16.4.
490 * @returns the number of motion vectors parsed (2, 4 or 16)
491 */
492 static av_always_inline
493 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
494 {
495 int part_idx;
496 int n, num;
497 VP8Macroblock *top_mb;
498 VP8Macroblock *left_mb = &mb[-1];
499 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
500 *mbsplits_top,
501 *mbsplits_cur, *firstidx;
502 VP56mv *top_mv;
503 VP56mv *left_mv = left_mb->bmv;
504 VP56mv *cur_mv = mb->bmv;
505
506 if (!layout) // layout is inlined, s->mb_layout is not
507 top_mb = &mb[2];
508 else
509 top_mb = &mb[-s->mb_width-1];
510 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
511 top_mv = top_mb->bmv;
512
513 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
514 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
515 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
516 } else {
517 part_idx = VP8_SPLITMVMODE_8x8;
518 }
519 } else {
520 part_idx = VP8_SPLITMVMODE_4x4;
521 }
522
523 num = vp8_mbsplit_count[part_idx];
524 mbsplits_cur = vp8_mbsplits[part_idx],
525 firstidx = vp8_mbfirstidx[part_idx];
526 mb->partitioning = part_idx;
527
528 for (n = 0; n < num; n++) {
529 int k = firstidx[n];
530 uint32_t left, above;
531 const uint8_t *submv_prob;
532
533 if (!(k & 3))
534 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
535 else
536 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
537 if (k <= 3)
538 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
539 else
540 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
541
542 submv_prob = get_submv_prob(left, above);
543
544 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
545 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
546 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
547 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
548 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
549 } else {
550 AV_ZERO32(&mb->bmv[n]);
551 }
552 } else {
553 AV_WN32A(&mb->bmv[n], above);
554 }
555 } else {
556 AV_WN32A(&mb->bmv[n], left);
557 }
558 }
559
560 return num;
561 }
562
563 static av_always_inline
564 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
565 {
566 VP8Macroblock *mb_edge[3] = { 0 /* top */,
567 mb - 1 /* left */,
568 0 /* top-left */ };
569 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
570 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
571 int idx = CNT_ZERO;
572 int cur_sign_bias = s->sign_bias[mb->ref_frame];
573 int8_t *sign_bias = s->sign_bias;
574 VP56mv near_mv[4];
575 uint8_t cnt[4] = { 0 };
576 VP56RangeCoder *c = &s->c;
577
578 if (!layout) { // layout is inlined (s->mb_layout is not)
579 mb_edge[0] = mb + 2;
580 mb_edge[2] = mb + 1;
581 }
582 else {
583 mb_edge[0] = mb - s->mb_width-1;
584 mb_edge[2] = mb - s->mb_width-2;
585 }
586
587 AV_ZERO32(&near_mv[0]);
588 AV_ZERO32(&near_mv[1]);
589 AV_ZERO32(&near_mv[2]);
590
591 /* Process MB on top, left and top-left */
592 #define MV_EDGE_CHECK(n)\
593 {\
594 VP8Macroblock *edge = mb_edge[n];\
595 int edge_ref = edge->ref_frame;\
596 if (edge_ref != VP56_FRAME_CURRENT) {\
597 uint32_t mv = AV_RN32A(&edge->mv);\
598 if (mv) {\
599 if (cur_sign_bias != sign_bias[edge_ref]) {\
600 /* SWAR negate of the values in mv. */\
601 mv = ~mv;\
602 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
603 }\
604 if (!n || mv != AV_RN32A(&near_mv[idx]))\
605 AV_WN32A(&near_mv[++idx], mv);\
606 cnt[idx] += 1 + (n != 2);\
607 } else\
608 cnt[CNT_ZERO] += 1 + (n != 2);\
609 }\
610 }
611
612 MV_EDGE_CHECK(0)
613 MV_EDGE_CHECK(1)
614 MV_EDGE_CHECK(2)
615
616 mb->partitioning = VP8_SPLITMVMODE_NONE;
617 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
618 mb->mode = VP8_MVMODE_MV;
619
620 /* If we have three distinct MVs, merge first and last if they're the same */
621 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
622 cnt[CNT_NEAREST] += 1;
623
624 /* Swap near and nearest if necessary */
625 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
626 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
627 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
628 }
629
630 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
631 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
632
633 /* Choose the best mv out of 0,0 and the nearest mv */
634 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
635 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
636 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
637 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
638
639 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
640 mb->mode = VP8_MVMODE_SPLIT;
641 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
642 } else {
643 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
644 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
645 mb->bmv[0] = mb->mv;
646 }
647 } else {
648 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
649 mb->bmv[0] = mb->mv;
650 }
651 } else {
652 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
653 mb->bmv[0] = mb->mv;
654 }
655 } else {
656 mb->mode = VP8_MVMODE_ZERO;
657 AV_ZERO32(&mb->mv);
658 mb->bmv[0] = mb->mv;
659 }
660 }
661
662 static av_always_inline
663 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
664 int mb_x, int keyframe, int layout)
665 {
666 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
667
668 if (layout == 1) {
669 VP8Macroblock *mb_top = mb - s->mb_width - 1;
670 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
671 }
672 if (keyframe) {
673 int x, y;
674 uint8_t* top;
675 uint8_t* const left = s->intra4x4_pred_mode_left;
676 if (layout == 1)
677 top = mb->intra4x4_pred_mode_top;
678 else
679 top = s->intra4x4_pred_mode_top + 4 * mb_x;
680 for (y = 0; y < 4; y++) {
681 for (x = 0; x < 4; x++) {
682 const uint8_t *ctx;
683 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
684 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
685 left[y] = top[x] = *intra4x4;
686 intra4x4++;
687 }
688 }
689 } else {
690 int i;
691 for (i = 0; i < 16; i++)
692 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
693 }
694 }
695
696 static av_always_inline
697 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
698 uint8_t *segment, uint8_t *ref, int layout)
699 {
700 VP56RangeCoder *c = &s->c;
701
702 if (s->segmentation.update_map)
703 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
704 else if (s->segmentation.enabled)
705 *segment = ref ? *ref : *segment;
706 mb->segment = *segment;
707
708 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
709
710 if (s->keyframe) {
711 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
712
713 if (mb->mode == MODE_I4x4) {
714 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
715 } else {
716 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
717 if (s->mb_layout == 1)
718 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
719 else
720 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
721 AV_WN32A( s->intra4x4_pred_mode_left, modes);
722 }
723
724 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
725 mb->ref_frame = VP56_FRAME_CURRENT;
726 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
727 // inter MB, 16.2
728 if (vp56_rac_get_prob_branchy(c, s->prob->last))
729 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
730 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
731 else
732 mb->ref_frame = VP56_FRAME_PREVIOUS;
733 s->ref_count[mb->ref_frame-1]++;
734
735 // motion vectors, 16.3
736 decode_mvs(s, mb, mb_x, mb_y, layout);
737 } else {
738 // intra MB, 16.1
739 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
740
741 if (mb->mode == MODE_I4x4)
742 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
743
744 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
745 mb->ref_frame = VP56_FRAME_CURRENT;
746 mb->partitioning = VP8_SPLITMVMODE_NONE;
747 AV_ZERO32(&mb->bmv[0]);
748 }
749 }
750
751 #ifndef decode_block_coeffs_internal
752 /**
753 * @param r arithmetic bitstream reader context
754 * @param block destination for block coefficients
755 * @param probs probabilities to use when reading trees from the bitstream
756 * @param i initial coeff index, 0 unless a separate DC block is coded
757 * @param qmul array holding the dc/ac dequant factor at position 0/1
758 * @return 0 if no coeffs were decoded
759 * otherwise, the index of the last coeff decoded plus one
760 */
761 static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
762 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
763 int i, uint8_t *token_prob, int16_t qmul[2])
764 {
765 VP56RangeCoder c = *r;
766 goto skip_eob;
767 do {
768 int coeff;
769 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
770 break;
771
772 skip_eob:
773 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
774 if (++i == 16)
775 break; // invalid input; blocks should end with EOB
776 token_prob = probs[i][0];
777 goto skip_eob;
778 }
779
780 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
781 coeff = 1;
782 token_prob = probs[i+1][1];
783 } else {
784 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
785 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
786 if (coeff)
787 coeff += vp56_rac_get_prob(&c, token_prob[5]);
788 coeff += 2;
789 } else {
790 // DCT_CAT*
791 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
792 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
793 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
794 } else { // DCT_CAT2
795 coeff = 7;
796 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
797 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
798 }
799 } else { // DCT_CAT3 and up
800 int a = vp56_rac_get_prob(&c, token_prob[8]);
801 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
802 int cat = (a<<1) + b;
803 coeff = 3 + (8<<cat);
804 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
805 }
806 }
807 token_prob = probs[i+1][2];
808 }
809 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
810 } while (++i < 16);
811
812 *r = c;
813 return i;
814 }
815 #endif
816
817 /**
818 * @param c arithmetic bitstream reader context
819 * @param block destination for block coefficients
820 * @param probs probabilities to use when reading trees from the bitstream
821 * @param i initial coeff index, 0 unless a separate DC block is coded
822 * @param zero_nhood the initial prediction context for number of surrounding
823 * all-zero blocks (only left/top, so 0-2)
824 * @param qmul array holding the dc/ac dequant factor at position 0/1
825 * @return 0 if no coeffs were decoded
826 * otherwise, the index of the last coeff decoded plus one
827 */
828 static av_always_inline
829 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
830 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
831 int i, int zero_nhood, int16_t qmul[2])
832 {
833 uint8_t *token_prob = probs[i][zero_nhood];
834 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
835 return 0;
836 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
837 }
838
839 static av_always_inline
840 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
841 uint8_t t_nnz[9], uint8_t l_nnz[9])
842 {
843 int i, x, y, luma_start = 0, luma_ctx = 3;
844 int nnz_pred, nnz, nnz_total = 0;
845 int segment = mb->segment;
846 int block_dc = 0;
847
848 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
849 nnz_pred = t_nnz[8] + l_nnz[8];
850
851 // decode DC values and do hadamard
852 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
853 s->qmat[segment].luma_dc_qmul);
854 l_nnz[8] = t_nnz[8] = !!nnz;
855 if (nnz) {
856 nnz_total += nnz;
857 block_dc = 1;
858 if (nnz == 1)
859 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
860 else
861 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
862 }
863 luma_start = 1;
864 luma_ctx = 0;
865 }
866
867 // luma blocks
868 for (y = 0; y < 4; y++)
869 for (x = 0; x < 4; x++) {
870 nnz_pred = l_nnz[y] + t_nnz[x];
871 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
872 nnz_pred, s->qmat[segment].luma_qmul);
873 // nnz+block_dc may be one more than the actual last index, but we don't care
874 td->non_zero_count_cache[y][x] = nnz + block_dc;
875 t_nnz[x] = l_nnz[y] = !!nnz;
876 nnz_total += nnz;
877 }
878
879 // chroma blocks
880 // TODO: what to do about dimensions? 2nd dim for luma is x,
881 // but for chroma it's (y<<1)|x
882 for (i = 4; i < 6; i++)
883 for (y = 0; y < 2; y++)
884 for (x = 0; x < 2; x++) {
885 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
886 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
887 nnz_pred, s->qmat[segment].chroma_qmul);
888 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
889 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
890 nnz_total += nnz;
891 }
892
893 // if there were no coded coeffs despite the macroblock not being marked skip,
894 // we MUST not do the inner loop filter and should not do IDCT
895 // Since skip isn't used for bitstream prediction, just manually set it.
896 if (!nnz_total)
897 mb->skip = 1;
898 }
899
900 static av_always_inline
901 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
902 int linesize, int uvlinesize, int simple)
903 {
904 AV_COPY128(top_border, src_y + 15*linesize);
905 if (!simple) {
906 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
907 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
908 }
909 }
910
911 static av_always_inline
912 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
913 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
914 int simple, int xchg)
915 {
916 uint8_t *top_border_m1 = top_border-32; // for TL prediction
917 src_y -= linesize;
918 src_cb -= uvlinesize;
919 src_cr -= uvlinesize;
920
921 #define XCHG(a,b,xchg) do { \
922 if (xchg) AV_SWAP64(b,a); \
923 else AV_COPY64(b,a); \
924 } while (0)
925
926 XCHG(top_border_m1+8, src_y-8, xchg);
927 XCHG(top_border, src_y, xchg);
928 XCHG(top_border+8, src_y+8, 1);
929 if (mb_x < mb_width-1)
930 XCHG(top_border+32, src_y+16, 1);
931
932 // only copy chroma for normal loop filter
933 // or to initialize the top row to 127
934 if (!simple || !mb_y) {
935 XCHG(top_border_m1+16, src_cb-8, xchg);
936 XCHG(top_border_m1+24, src_cr-8, xchg);
937 XCHG(top_border+16, src_cb, 1);
938 XCHG(top_border+24, src_cr, 1);
939 }
940 }
941
942 static av_always_inline
943 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
944 {
945 if (!mb_x) {
946 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
947 } else {
948 return mb_y ? mode : LEFT_DC_PRED8x8;
949 }
950 }
951
952 static av_always_inline
953 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
954 {
955 if (!mb_x) {
956 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
957 } else {
958 return mb_y ? mode : HOR_PRED8x8;
959 }
960 }
961
962 static av_always_inline
963 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
964 {
965 if (mode == DC_PRED8x8) {
966 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
967 } else {
968 return mode;
969 }
970 }
971
972 static av_always_inline
973 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
974 {
975 switch (mode) {
976 case DC_PRED8x8:
977 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
978 case VERT_PRED8x8:
979 return !mb_y ? DC_127_PRED8x8 : mode;
980 case HOR_PRED8x8:
981 return !mb_x ? DC_129_PRED8x8 : mode;
982 case PLANE_PRED8x8 /*TM*/:
983 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
984 }
985 return mode;
986 }
987
988 static av_always_inline
989 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
990 {
991 if (!mb_x) {
992 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
993 } else {
994 return mb_y ? mode : HOR_VP8_PRED;
995 }
996 }
997
998 static av_always_inline
999 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1000 {
1001 switch (mode) {
1002 case VERT_PRED:
1003 if (!mb_x && mb_y) {
1004 *copy_buf = 1;
1005 return mode;
1006 }
1007 /* fall-through */
1008 case DIAG_DOWN_LEFT_PRED:
1009 case VERT_LEFT_PRED:
1010 return !mb_y ? DC_127_PRED : mode;
1011 case HOR_PRED:
1012 if (!mb_y) {
1013 *copy_buf = 1;
1014 return mode;
1015 }
1016 /* fall-through */
1017 case HOR_UP_PRED:
1018 return !mb_x ? DC_129_PRED : mode;
1019 case TM_VP8_PRED:
1020 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1021 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1022 case DIAG_DOWN_RIGHT_PRED:
1023 case VERT_RIGHT_PRED:
1024 case HOR_DOWN_PRED:
1025 if (!mb_y || !mb_x)
1026 *copy_buf = 1;
1027 return mode;
1028 }
1029 return mode;
1030 }
1031
1032 static av_always_inline
1033 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1034 VP8Macroblock *mb, int mb_x, int mb_y)
1035 {
1036 AVCodecContext *avctx = s->avctx;
1037 int x, y, mode, nnz;
1038 uint32_t tr;
1039
1040 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1041 // otherwise, skip it if we aren't going to deblock
1042 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1043 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1044 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1045 s->filter.simple, 1);
1046
1047 if (mb->mode < MODE_I4x4) {
1048 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1049 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1050 } else {
1051 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1052 }
1053 s->hpc.pred16x16[mode](dst[0], s->linesize);
1054 } else {
1055 uint8_t *ptr = dst[0];
1056 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1057 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1058
1059 // all blocks on the right edge of the macroblock use bottom edge
1060 // the top macroblock for their topright edge
1061 uint8_t *tr_right = ptr - s->linesize + 16;
1062
1063 // if we're on the right edge of the frame, said edge is extended
1064 // from the top macroblock
1065 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1066 mb_x == s->mb_width-1) {
1067 tr = tr_right[-1]*0x01010101u;
1068 tr_right = (uint8_t *)&tr;
1069 }
1070
1071 if (mb->skip)
1072 AV_ZERO128(td->non_zero_count_cache);
1073
1074 for (y = 0; y < 4; y++) {
1075 uint8_t *topright = ptr + 4 - s->linesize;
1076 for (x = 0; x < 4; x++) {
1077 int copy = 0, linesize = s->linesize;
1078 uint8_t *dst = ptr+4*x;
1079 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1080
1081 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1082 topright = tr_top;
1083 } else if (x == 3)
1084 topright = tr_right;
1085
1086 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1087 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1088 if (copy) {
1089 dst = copy_dst + 12;
1090 linesize = 8;
1091 if (!(mb_y + y)) {
1092 copy_dst[3] = 127U;
1093 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1094 } else {
1095 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1096 if (!(mb_x + x)) {
1097 copy_dst[3] = 129U;
1098 } else {
1099 copy_dst[3] = ptr[4*x-s->linesize-1];
1100 }
1101 }
1102 if (!(mb_x + x)) {
1103 copy_dst[11] =
1104 copy_dst[19] =
1105 copy_dst[27] =
1106 copy_dst[35] = 129U;
1107 } else {
1108 copy_dst[11] = ptr[4*x -1];
1109 copy_dst[19] = ptr[4*x+s->linesize -1];
1110 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1111 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1112 }
1113 }
1114 } else {
1115 mode = intra4x4[x];
1116 }
1117 s->hpc.pred4x4[mode](dst, topright, linesize);
1118 if (copy) {
1119 AV_COPY32(ptr+4*x , copy_dst+12);
1120 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1121 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1122 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1123 }
1124
1125 nnz = td->non_zero_count_cache[y][x];
1126 if (nnz) {
1127 if (nnz == 1)
1128 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1129 else
1130 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1131 }
1132 topright += 4;
1133 }
1134
1135 ptr += 4*s->linesize;
1136 intra4x4 += 4;
1137 }
1138 }
1139
1140 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1141 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1142 } else {
1143 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1144 }
1145 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1146 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1147
1148 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1149 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1150 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1151 s->filter.simple, 0);
1152 }
1153
1154 static const uint8_t subpel_idx[3][8] = {
1155 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1156 // also function pointer index
1157 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1158 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1159 };
1160
1161 /**
1162 * luma MC function
1163 *
1164 * @param s VP8 decoding context
1165 * @param dst target buffer for block data at block position
1166 * @param ref reference picture buffer at origin (0, 0)
1167 * @param mv motion vector (relative to block position) to get pixel data from
1168 * @param x_off horizontal position of block from origin (0, 0)
1169 * @param y_off vertical position of block from origin (0, 0)
1170 * @param block_w width of block (16, 8 or 4)
1171 * @param block_h height of block (always same as block_w)
1172 * @param width width of src/dst plane data
1173 * @param height height of src/dst plane data
1174 * @param linesize size of a single line of plane data, including padding
1175 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1176 */
1177 static av_always_inline
1178 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1179 ThreadFrame *ref, const VP56mv *mv,
1180 int x_off, int y_off, int block_w, int block_h,
1181 int width, int height, ptrdiff_t linesize,
1182 vp8_mc_func mc_func[3][3])
1183 {
1184 uint8_t *src = ref->f->data[0];
1185
1186 if (AV_RN32A(mv)) {
1187
1188 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1189 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1190
1191 x_off += mv->x >> 2;
1192 y_off += mv->y >> 2;
1193
1194 // edge emulation
1195 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1196 src += y_off * linesize + x_off;
1197 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1198 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1199 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1200 src - my_idx * linesize - mx_idx,
1201 linesize, linesize,
1202 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1203 x_off - mx_idx, y_off - my_idx, width, height);
1204 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1205 }
1206 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1207 } else {
1208 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1209 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1210 }
1211 }
1212
1213 /**
1214 * chroma MC function
1215 *
1216 * @param s VP8 decoding context
1217 * @param dst1 target buffer for block data at block position (U plane)
1218 * @param dst2 target buffer for block data at block position (V plane)
1219 * @param ref reference picture buffer at origin (0, 0)
1220 * @param mv motion vector (relative to block position) to get pixel data from
1221 * @param x_off horizontal position of block from origin (0, 0)
1222 * @param y_off vertical position of block from origin (0, 0)
1223 * @param block_w width of block (16, 8 or 4)
1224 * @param block_h height of block (always same as block_w)
1225 * @param width width of src/dst plane data
1226 * @param height height of src/dst plane data
1227 * @param linesize size of a single line of plane data, including padding
1228 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1229 */
1230 static av_always_inline
1231 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1232 ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1233 int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1234 vp8_mc_func mc_func[3][3])
1235 {
1236 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1237
1238 if (AV_RN32A(mv)) {
1239 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1240 int my = mv->y&7, my_idx = subpel_idx[0][my];
1241
1242 x_off += mv->x >> 3;
1243 y_off += mv->y >> 3;
1244
1245 // edge emulation
1246 src1 += y_off * linesize + x_off;
1247 src2 += y_off * linesize + x_off;
1248 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1249 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1250 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1251 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1252 src1 - my_idx * linesize - mx_idx,
1253 linesize, linesize,
1254 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1255 x_off - mx_idx, y_off - my_idx, width, height);
1256 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1257 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1258
1259 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1260 src2 - my_idx * linesize - mx_idx,
1261 linesize, linesize,
1262 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1263 x_off - mx_idx, y_off - my_idx, width, height);
1264 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1265 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1266 } else {
1267 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1268 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1269 }
1270 } else {
1271 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1272 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1273 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1274 }
1275 }
1276
1277 static av_always_inline
1278 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1279 ThreadFrame *ref_frame, int x_off, int y_off,
1280 int bx_off, int by_off,
1281 int block_w, int block_h,
1282 int width, int height, VP56mv *mv)
1283 {
1284 VP56mv uvmv = *mv;
1285
1286 /* Y */
1287 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1288 ref_frame, mv, x_off + bx_off, y_off + by_off,
1289 block_w, block_h, width, height, s->linesize,
1290 s->put_pixels_tab[block_w == 8]);
1291
1292 /* U/V */
1293 if (s->profile == 3) {
1294 uvmv.x &= ~7;
1295 uvmv.y &= ~7;
1296 }
1297 x_off >>= 1; y_off >>= 1;
1298 bx_off >>= 1; by_off >>= 1;
1299 width >>= 1; height >>= 1;
1300 block_w >>= 1; block_h >>= 1;
1301 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1302 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1303 &uvmv, x_off + bx_off, y_off + by_off,
1304 block_w, block_h, width, height, s->uvlinesize,
1305 s->put_pixels_tab[1 + (block_w == 4)]);
1306 }
1307
1308 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1309 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1310 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1311 {
1312 /* Don't prefetch refs that haven't been used very often this frame. */
1313 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1314 int x_off = mb_x << 4, y_off = mb_y << 4;
1315 int mx = (mb->mv.x>>2) + x_off + 8;
1316 int my = (mb->mv.y>>2) + y_off;
1317 uint8_t **src= s->framep[ref]->tf.f->data;
1318 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1319 /* For threading, a ff_thread_await_progress here might be useful, but
1320 * it actually slows down the decoder. Since a bad prefetch doesn't
1321 * generate bad decoder output, we don't run it here. */
1322 s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1323 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1324 s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1325 }
1326 }
1327
1328 /**
1329 * Apply motion vectors to prediction buffer, chapter 18.
1330 */
1331 static av_always_inline
1332 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1333 VP8Macroblock *mb, int mb_x, int mb_y)
1334 {
1335 int x_off = mb_x << 4, y_off = mb_y << 4;
1336 int width = 16*s->mb_width, height = 16*s->mb_height;
1337 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1338 VP56mv *bmv = mb->bmv;
1339
1340 switch (mb->partitioning) {
1341 case VP8_SPLITMVMODE_NONE:
1342 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1343 0, 0, 16, 16, width, height, &mb->mv);
1344 break;
1345 case VP8_SPLITMVMODE_4x4: {
1346 int x, y;
1347 VP56mv uvmv;
1348
1349 /* Y */
1350 for (y = 0; y < 4; y++) {
1351 for (x = 0; x < 4; x++) {
1352 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1353 ref, &bmv[4*y + x],
1354 4*x + x_off, 4*y + y_off, 4, 4,
1355 width, height, s->linesize,
1356 s->put_pixels_tab[2]);
1357 }
1358 }
1359
1360 /* U/V */
1361 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1362 for (y = 0; y < 2; y++) {
1363 for (x = 0; x < 2; x++) {
1364 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1365 mb->bmv[ 2*y * 4 + 2*x+1].x +
1366 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1367 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1368 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1369 mb->bmv[ 2*y * 4 + 2*x+1].y +
1370 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1371 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1372 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1373 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1374 if (s->profile == 3) {
1375 uvmv.x &= ~7;
1376 uvmv.y &= ~7;
1377 }
1378 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1379 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1380 4*x + x_off, 4*y + y_off, 4, 4,
1381 width, height, s->uvlinesize,
1382 s->put_pixels_tab[2]);
1383 }
1384 }
1385 break;
1386 }
1387 case VP8_SPLITMVMODE_16x8:
1388 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1389 0, 0, 16, 8, width, height, &bmv[0]);
1390 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1391 0, 8, 16, 8, width, height, &bmv[1]);
1392 break;
1393 case VP8_SPLITMVMODE_8x16:
1394 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1395 0, 0, 8, 16, width, height, &bmv[0]);
1396 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1397 8, 0, 8, 16, width, height, &bmv[1]);
1398 break;
1399 case VP8_SPLITMVMODE_8x8:
1400 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1401 0, 0, 8, 8, width, height, &bmv[0]);
1402 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1403 8, 0, 8, 8, width, height, &bmv[1]);
1404 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1405 0, 8, 8, 8, width, height, &bmv[2]);
1406 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1407 8, 8, 8, 8, width, height, &bmv[3]);
1408 break;
1409 }
1410 }
1411
1412 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1413 uint8_t *dst[3], VP8Macroblock *mb)
1414 {
1415 int x, y, ch;
1416
1417 if (mb->mode != MODE_I4x4) {
1418 uint8_t *y_dst = dst[0];
1419 for (y = 0; y < 4; y++) {
1420 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1421 if (nnz4) {
1422 if (nnz4&~0x01010101) {
1423 for (x = 0; x < 4; x++) {
1424 if ((uint8_t)nnz4 == 1)
1425 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1426 else if((uint8_t)nnz4 > 1)
1427 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1428 nnz4 >>= 8;
1429 if (!nnz4)
1430 break;
1431 }
1432 } else {
1433 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1434 }
1435 }
1436 y_dst += 4*s->linesize;
1437 }
1438 }
1439
1440 for (ch = 0; ch < 2; ch++) {
1441 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1442 if (nnz4) {
1443 uint8_t *ch_dst = dst[1+ch];
1444 if (nnz4&~0x01010101) {
1445 for (y = 0; y < 2; y++) {
1446 for (x = 0; x < 2; x++) {
1447 if ((uint8_t)nnz4 == 1)
1448 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1449 else if((uint8_t)nnz4 > 1)
1450 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1451 nnz4 >>= 8;
1452 if (!nnz4)
1453 goto chroma_idct_end;
1454 }
1455 ch_dst += 4*s->uvlinesize;
1456 }
1457 } else {
1458 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1459 }
1460 }
1461 chroma_idct_end: ;
1462 }
1463 }
1464
1465 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1466 {
1467 int interior_limit, filter_level;
1468
1469 if (s->segmentation.enabled) {
1470 filter_level = s->segmentation.filter_level[mb->segment];
1471 if (!s->segmentation.absolute_vals)
1472 filter_level += s->filter.level;
1473 } else
1474 filter_level = s->filter.level;
1475
1476 if (s->lf_delta.enabled) {
1477 filter_level += s->lf_delta.ref[mb->ref_frame];
1478 filter_level += s->lf_delta.mode[mb->mode];
1479 }
1480
1481 filter_level = av_clip_uintp2(filter_level, 6);
1482
1483 interior_limit = filter_level;
1484 if (s->filter.sharpness) {
1485 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1486 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1487 }
1488 interior_limit = FFMAX(interior_limit, 1);
1489
1490 f->filter_level = filter_level;
1491 f->inner_limit = interior_limit;
1492 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1493 }
1494
1495 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1496 {
1497 int mbedge_lim, bedge_lim, hev_thresh;
1498 int filter_level = f->filter_level;
1499 int inner_limit = f->inner_limit;
1500 int inner_filter = f->inner_filter;
1501 int linesize = s->linesize;
1502 int uvlinesize = s->uvlinesize;
1503 static const uint8_t hev_thresh_lut[2][64] = {
1504 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1505 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1506 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1507 3, 3, 3, 3 },
1508 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1509 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1510 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1511 2, 2, 2, 2 }
1512 };
1513
1514 if (!filter_level)
1515 return;
1516
1517 bedge_lim = 2*filter_level + inner_limit;
1518 mbedge_lim = bedge_lim + 4;
1519
1520 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1521
1522 if (mb_x) {
1523 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1524 mbedge_lim, inner_limit, hev_thresh);
1525 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1526 mbedge_lim, inner_limit, hev_thresh);
1527 }
1528
1529 if (inner_filter) {
1530 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1531 inner_limit, hev_thresh);
1532 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1533 inner_limit, hev_thresh);
1534 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1535 inner_limit, hev_thresh);
1536 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1537 uvlinesize, bedge_lim,
1538 inner_limit, hev_thresh);
1539 }
1540
1541 if (mb_y) {
1542 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1543 mbedge_lim, inner_limit, hev_thresh);
1544 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1545 mbedge_lim, inner_limit, hev_thresh);
1546 }
1547
1548 if (inner_filter) {
1549 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1550 linesize, bedge_lim,
1551 inner_limit, hev_thresh);
1552 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1553 linesize, bedge_lim,
1554 inner_limit, hev_thresh);
1555 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1556 linesize, bedge_lim,
1557 inner_limit, hev_thresh);
1558 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1559 dst[2] + 4 * uvlinesize,
1560 uvlinesize, bedge_lim,
1561 inner_limit, hev_thresh);
1562 }
1563 }
1564
1565 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1566 {
1567 int mbedge_lim, bedge_lim;
1568 int filter_level = f->filter_level;
1569 int inner_limit = f->inner_limit;
1570 int inner_filter = f->inner_filter;
1571 int linesize = s->linesize;
1572
1573 if (!filter_level)
1574 return;
1575
1576 bedge_lim = 2*filter_level + inner_limit;
1577 mbedge_lim = bedge_lim + 4;
1578
1579 if (mb_x)
1580 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1581 if (inner_filter) {
1582 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1583 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1584 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1585 }
1586
1587 if (mb_y)
1588 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1589 if (inner_filter) {
1590 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1591 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1592 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1593 }
1594 }
1595
1596 #define MARGIN (16 << 2)
1597 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1598 VP8Frame *prev_frame)
1599 {
1600 VP8Context *s = avctx->priv_data;
1601 int mb_x, mb_y;
1602
1603 s->mv_min.y = -MARGIN;
1604 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1605 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1606 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1607 int mb_xy = mb_y*s->mb_width;
1608
1609 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1610
1611 s->mv_min.x = -MARGIN;
1612 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1613 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1614 if (mb_y == 0)
1615 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1616 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1617 prev_frame && prev_frame->seg_map ?
1618 prev_frame->seg_map->data + mb_xy : NULL, 1);
1619 s->mv_min.x -= 64;
1620 s->mv_max.x -= 64;
1621 }
1622 s->mv_min.y -= 64;
1623 s->mv_max.y -= 64;
1624 }
1625 }
1626
1627 #if HAVE_THREADS
1628 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1629 do {\
1630 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1631 if (otd->thread_mb_pos < tmp) {\
1632 pthread_mutex_lock(&otd->lock);\
1633 td->wait_mb_pos = tmp;\
1634 do {\
1635 if (otd->thread_mb_pos >= tmp)\
1636 break;\
1637 pthread_cond_wait(&otd->cond, &otd->lock);\
1638 } while (1);\
1639 td->wait_mb_pos = INT_MAX;\
1640 pthread_mutex_unlock(&otd->lock);\
1641 }\
1642 } while(0);
1643
1644 #define update_pos(td, mb_y, mb_x)\
1645 do {\
1646 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1647 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1648 int is_null = (next_td == NULL) || (prev_td == NULL);\
1649 int pos_check = (is_null) ? 1 :\
1650 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1651 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1652 td->thread_mb_pos = pos;\
1653 if (sliced_threading && pos_check) {\
1654 pthread_mutex_lock(&td->lock);\
1655 pthread_cond_broadcast(&td->cond);\
1656 pthread_mutex_unlock(&td->lock);\
1657 }\
1658 } while(0);
1659 #else
1660 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1661 #define update_pos(td, mb_y, mb_x)
1662 #endif
1663
1664 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1665 int jobnr, int threadnr)
1666 {
1667 VP8Context *s = avctx->priv_data;
1668 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1669 int mb_y = td->thread_mb_pos>>16;
1670 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1671 int num_jobs = s->num_jobs;
1672 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1673 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1674 VP8Macroblock *mb;
1675 uint8_t *dst[3] = {
1676 curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1677 curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1678 curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1679 };
1680 if (mb_y == 0) prev_td = td;
1681 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1682 if (mb_y == s->mb_height-1) next_td = td;
1683 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1684 if (s->mb_layout == 1)
1685 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1686 else {
1687 // Make sure the previous frame has read its segmentation map,
1688 // if we re-use the same map.
1689 if (prev_frame && s->segmentation.enabled &&
1690 !s->segmentation.update_map)
1691 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
1692 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1693 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1694 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1695 }
1696
1697 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1698 // left edge of 129 for intra prediction
1699 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1700 for (i = 0; i < 3; i++)
1701 for (y = 0; y < 16>>!!i; y++)
1702 dst[i][y*curframe->tf.f->linesize[i]-1] = 129;
1703 if (mb_y == 1) {
1704 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1705 }
1706 }
1707
1708 s->mv_min.x = -MARGIN;
1709 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1710
1711 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1712 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1713 if (prev_td != td) {
1714 if (threadnr != 0) {
1715 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1716 } else {
1717 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1718 }
1719 }
1720
1721 s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1722 s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1723
1724 if (!s->mb_layout)
1725 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1726 prev_frame && prev_frame->seg_map ?
1727 prev_frame->seg_map->data + mb_xy : NULL, 0);
1728
1729 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1730
1731 if (!mb->skip)
1732 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1733
1734 if (mb->mode <= MODE_I4x4)
1735 intra_predict(s, td, dst, mb, mb_x, mb_y);
1736 else
1737 inter_predict(s, td, dst, mb, mb_x, mb_y);
1738
1739 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1740
1741 if (!mb->skip) {
1742 idct_mb(s, td, dst, mb);
1743 } else {
1744 AV_ZERO64(td->left_nnz);
1745 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1746
1747 // Reset DC block predictors if they would exist if the mb had coefficients
1748 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1749 td->left_nnz[8] = 0;
1750 s->top_nnz[mb_x][8] = 0;
1751 }
1752 }
1753
1754 if (s->deblock_filter)
1755 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1756
1757 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1758 if (s->filter.simple)
1759 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1760 else
1761 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1762 }
1763
1764 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1765
1766 dst[0] += 16;
1767 dst[1] += 8;
1768 dst[2] += 8;
1769 s->mv_min.x -= 64;
1770 s->mv_max.x -= 64;
1771
1772 if (mb_x == s->mb_width+1) {
1773 update_pos(td, mb_y, s->mb_width+3);
1774 } else {
1775 update_pos(td, mb_y, mb_x);
1776 }
1777 }
1778 }
1779
1780 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1781 int jobnr, int threadnr)
1782 {
1783 VP8Context *s = avctx->priv_data;
1784 VP8ThreadData *td = &s->thread_data[threadnr];
1785 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1786 AVFrame *curframe = s->curframe->tf.f;
1787 VP8Macroblock *mb;
1788 VP8ThreadData *prev_td, *next_td;
1789 uint8_t *dst[3] = {
1790 curframe->data[0] + 16*mb_y*s->linesize,
1791 curframe->data[1] + 8*mb_y*s->uvlinesize,
1792 curframe->data[2] + 8*mb_y*s->uvlinesize
1793 };
1794
1795 if (s->mb_layout == 1)
1796 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1797 else
1798 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1799
1800 if (mb_y == 0) prev_td = td;
1801 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1802 if (mb_y == s->mb_height-1) next_td = td;
1803 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1804
1805 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1806 VP8FilterStrength *f = &td->filter_strength[mb_x];
1807 if (prev_td != td) {
1808 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1809 }
1810 if (next_td != td)
1811 if (next_td != &s->thread_data[0]) {
1812 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1813 }
1814
1815 if (num_jobs == 1) {
1816 if (s->filter.simple)
1817 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1818 else
1819 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1820 }
1821
1822 if (s->filter.simple)
1823 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1824 else
1825 filter_mb(s, dst, f, mb_x, mb_y);
1826 dst[0] += 16;
1827 dst[1] += 8;
1828 dst[2] += 8;
1829
1830 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1831 }
1832 }
1833
1834 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1835 int jobnr, int threadnr)
1836 {
1837 VP8Context *s = avctx->priv_data;
1838 VP8ThreadData *td = &s->thread_data[jobnr];
1839 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1840 VP8Frame *curframe = s->curframe;
1841 int mb_y, num_jobs = s->num_jobs;
1842 td->thread_nr = threadnr;
1843 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1844 if (mb_y >= s->mb_height) break;
1845 td->thread_mb_pos = mb_y<<16;
1846 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1847 if (s->deblock_filter)
1848 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1849 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1850
1851 s->mv_min.y -= 64;
1852 s->mv_max.y -= 64;
1853
1854 if (avctx->active_thread_type == FF_THREAD_FRAME)
1855 ff_thread_report_progress(&curframe->tf, mb_y, 0);
1856 }
1857
1858 return 0;
1859 }
1860
1861 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1862 AVPacket *avpkt)
1863 {
1864 VP8Context *s = avctx->priv_data;
1865 int ret, i, referenced, num_jobs;
1866 enum AVDiscard skip_thresh;
1867 VP8Frame *av_uninit(curframe), *prev_frame;
1868
1869 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1870 goto err;
1871
1872 prev_frame = s->framep[VP56_FRAME_CURRENT];
1873
1874 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1875 || s->update_altref == VP56_FRAME_CURRENT;
1876
1877 skip_thresh = !referenced ? AVDISCARD_NONREF :
1878 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1879
1880 if (avctx->skip_frame >= skip_thresh) {
1881 s->invisible = 1;
1882 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1883 goto skip_decode;
1884 }
1885 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1886
1887 // release no longer referenced frames
1888 for (i = 0; i < 5; i++)
1889 if (s->frames[i].tf.f->data[0] &&
1890 &s->frames[i] != prev_frame &&
1891 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1892 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1893 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1894 vp8_release_frame(s, &s->frames[i]);
1895
1896 // find a free buffer
1897 for (i = 0; i < 5; i++)
1898 if (&s->frames[i] != prev_frame &&
1899 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1900 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1901 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1902 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1903 break;
1904 }
1905 if (i == 5) {
1906 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1907 abort();
1908 }
1909 if (curframe->tf.f->data[0])
1910 vp8_release_frame(s, curframe);
1911
1912 // Given that arithmetic probabilities are updated every frame, it's quite likely
1913 // that the values we have on a random interframe are complete junk if we didn't
1914 // start decode on a keyframe. So just don't display anything rather than junk.
1915 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1916 !s->framep[VP56_FRAME_GOLDEN] ||
1917 !s->framep[VP56_FRAME_GOLDEN2])) {
1918 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1919 ret = AVERROR_INVALIDDATA;
1920 goto err;
1921 }
1922
1923 curframe->tf.f->key_frame = s->keyframe;
1924 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1925 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
1926 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1927 goto err;
1928 }
1929
1930 // check if golden and altref are swapped
1931 if (s->update_altref != VP56_FRAME_NONE) {
1932 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1933 } else {
1934 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1935 }
1936 if (s->update_golden != VP56_FRAME_NONE) {
1937 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1938 } else {
1939 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1940 }
1941 if (s->update_last) {
1942 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1943 } else {
1944 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1945 }
1946 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1947
1948 ff_thread_finish_setup(avctx);
1949
1950 s->linesize = curframe->tf.f->linesize[0];
1951 s->uvlinesize = curframe->tf.f->linesize[1];
1952
1953 if (!s->thread_data[0].edge_emu_buffer)
1954 for (i = 0; i < MAX_THREADS; i++)
1955 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1956
1957 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1958 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1959 if (!s->mb_layout)
1960 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1961 if (!s->mb_layout && s->keyframe)
1962 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1963
1964 // top edge of 127 for intra prediction
1965 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1966 s->top_border[0][15] = s->top_border[0][23] = 127;
1967 s->top_border[0][31] = 127;
1968 memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1969 }
1970 memset(s->ref_count, 0, sizeof(s->ref_count));
1971
1972
1973 if (s->mb_layout == 1) {
1974 // Make sure the previous frame has read its segmentation map,
1975 // if we re-use the same map.
1976 if (prev_frame && s->segmentation.enabled &&
1977 !s->segmentation.update_map)
1978 ff_thread_await_progress(&prev_frame->tf, 1, 0);
1979 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1980 }
1981
1982 if (avctx->active_thread_type == FF_THREAD_FRAME)
1983 num_jobs = 1;
1984 else
1985 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1986 s->num_jobs = num_jobs;
1987 s->curframe = curframe;
1988 s->prev_frame = prev_frame;
1989 s->mv_min.y = -MARGIN;
1990 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1991 for (i = 0; i < MAX_THREADS; i++) {
1992 s->thread_data[i].thread_mb_pos = 0;
1993 s->thread_data[i].wait_mb_pos = INT_MAX;
1994 }
1995 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1996
1997 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1998 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1999
2000 skip_decode:
2001 // if future frames don't use the updated probabilities,
2002 // reset them to the values we saved
2003 if (!s->update_probabilities)
2004 s->prob[0] = s->prob[1];
2005
2006 if (!s->invisible) {
2007 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2008 return ret;
2009 *got_frame = 1;
2010 }
2011
2012 return avpkt->size;
2013 err:
2014 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2015 return ret;
2016 }
2017
2018 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2019 {
2020 VP8Context *s = avctx->priv_data;
2021 int i;
2022
2023 vp8_decode_flush_impl(avctx, 1);
2024 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2025 av_frame_free(&s->frames[i].tf.f);
2026
2027 return 0;
2028 }
2029
2030 static av_cold int vp8_init_frames(VP8Context *s)
2031 {
2032 int i;
2033 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2034 s->frames[i].tf.f = av_frame_alloc();
2035 if (!s->frames[i].tf.f)
2036 return AVERROR(ENOMEM);
2037 }
2038 return 0;
2039 }
2040
2041 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2042 {
2043 VP8Context *s = avctx->priv_data;
2044 int ret;
2045
2046 s->avctx = avctx;
2047 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2048 avctx->internal->allocate_progress = 1;
2049
2050 ff_videodsp_init(&s->vdsp, 8);
2051 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2052 ff_vp8dsp_init(&s->vp8dsp);
2053
2054 if ((ret = vp8_init_frames(s)) < 0) {
2055 ff_vp8_decode_free(avctx);
2056 return ret;
2057 }
2058
2059 return 0;
2060 }
2061
2062 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2063 {
2064 VP8Context *s = avctx->priv_data;
2065 int ret;
2066
2067 s->avctx = avctx;
2068
2069 if ((ret = vp8_init_frames(s)) < 0) {
2070 ff_vp8_decode_free(avctx);
2071 return ret;
2072 }
2073
2074 return 0;
2075 }
2076
2077 #define REBASE(pic) \
2078 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2079
2080 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2081 {
2082 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2083 int i;
2084
2085 if (s->macroblocks_base &&
2086 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2087 free_buffers(s);
2088 s->mb_width = s_src->mb_width;
2089 s->mb_height = s_src->mb_height;
2090 }
2091
2092 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2093 s->segmentation = s_src->segmentation;
2094 s->lf_delta = s_src->lf_delta;
2095 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2096
2097 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2098 if (s_src->frames[i].tf.f->data[0]) {
2099 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2100 if (ret < 0)
2101 return ret;
2102 }
2103 }
2104
2105 s->framep[0] = REBASE(s_src->next_framep[0]);
2106 s->framep[1] = REBASE(s_src->next_framep[1]);
2107 s->framep[2] = REBASE(s_src->next_framep[2]);
2108 s->framep[3] = REBASE(s_src->next_framep[3]);
2109
2110 return 0;
2111 }
2112
2113 AVCodec ff_vp8_decoder = {
2114 .name = "vp8",
2115 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2116 .type = AVMEDIA_TYPE_VIDEO,
2117 .id = AV_CODEC_ID_VP8,
2118 .priv_data_size = sizeof(VP8Context),
2119 .init = ff_vp8_decode_init,
2120 .close = ff_vp8_decode_free,
2121 .decode = ff_vp8_decode_frame,
2122 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2123 .flush = vp8_decode_flush,
2124 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2125 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2126 };