lavc/vaapi: Add VP8 decode hwaccel
[libav.git] / libavcodec / vp8.c
CommitLineData
32f3c541 1/*
ac4b32df 2 * VP7/VP8 compatible video decoder
3b636f21
DC
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
79793f83 6 * Copyright (C) 2010 Fiona Glaser
951455c1 7 * Copyright (C) 2012 Daniel Kang
ac4b32df 8 * Copyright (C) 2014 Peter Ross
3b636f21 9 *
2912e87a 10 * This file is part of Libav.
3b636f21 11 *
2912e87a 12 * Libav is free software; you can redistribute it and/or
3b636f21
DC
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
2912e87a 17 * Libav is distributed in the hope that it will be useful,
3b636f21
DC
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
2912e87a 23 * License along with Libav; if not, write to the Free Software
3b636f21
DC
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
737eb597 27#include "libavutil/imgutils.h"
53c20f17 28
3b636f21 29#include "avcodec.h"
f3a29b75 30#include "internal.h"
f4d581cd 31#include "mathops.h"
3b636f21 32#include "rectangle.h"
4773d904 33#include "thread.h"
53c20f17
VG
34#include "vp8.h"
35#include "vp8data.h"
3b636f21 36
a7878c9f
MR
37#if ARCH_ARM
38# include "arm/vp8.h"
39#endif
40
56535793
RB
41static void free_buffers(VP8Context *s)
42{
951455c1
DK
43 int i;
44 if (s->thread_data)
45 for (i = 0; i < MAX_THREADS; i++) {
1d6e6189
MW
46#if HAVE_THREADS
47 pthread_cond_destroy(&s->thread_data[i].cond);
48 pthread_mutex_destroy(&s->thread_data[i].lock);
49#endif
951455c1 50 av_freep(&s->thread_data[i].filter_strength);
951455c1
DK
51 }
52 av_freep(&s->thread_data);
56535793 53 av_freep(&s->macroblocks_base);
56535793
RB
54 av_freep(&s->intra4x4_pred_mode_top);
55 av_freep(&s->top_nnz);
56535793 56 av_freep(&s->top_border);
56535793
RB
57
58 s->macroblocks = NULL;
59}
60
759001c5 61static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
ce42a048
RB
62{
63 int ret;
759001c5
AK
64 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
65 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
ce42a048 66 return ret;
4e528206
MT
67 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
68 goto fail;
69 if (s->avctx->hwaccel) {
70 const AVHWAccel *hwaccel = s->avctx->hwaccel;
71 if (hwaccel->frame_priv_data_size) {
72 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
73 if (!f->hwaccel_priv_buf)
74 goto fail;
75 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
76 }
ce42a048
RB
77 }
78 return 0;
4e528206
MT
79
80fail:
81 av_buffer_unref(&f->seg_map);
82 ff_thread_release_buffer(s->avctx, &f->tf);
83 return AVERROR(ENOMEM);
ce42a048
RB
84}
85
759001c5 86static void vp8_release_frame(VP8Context *s, VP8Frame *f)
ce42a048 87{
759001c5 88 av_buffer_unref(&f->seg_map);
4e528206
MT
89 av_buffer_unref(&f->hwaccel_priv_buf);
90 f->hwaccel_picture_private = NULL;
759001c5
AK
91 ff_thread_release_buffer(s->avctx, &f->tf);
92}
93
ac4b32df 94#if CONFIG_VP8_DECODER
759001c5
AK
95static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
96{
97 int ret;
98
99 vp8_release_frame(s, dst);
100
101 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
102 return ret;
103 if (src->seg_map &&
104 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
105 vp8_release_frame(s, dst);
106 return AVERROR(ENOMEM);
ce42a048 107 }
4e528206
MT
108 if (src->hwaccel_picture_private) {
109 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
110 if (!dst->hwaccel_priv_buf)
111 return AVERROR(ENOMEM);
112 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
113 }
759001c5
AK
114
115 return 0;
ce42a048 116}
ac4b32df 117#endif /* CONFIG_VP8_DECODER */
ce42a048 118
759001c5 119static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
3b636f21
DC
120{
121 VP8Context *s = avctx->priv_data;
122 int i;
123
759001c5
AK
124 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
125 vp8_release_frame(s, &s->frames[i]);
3b636f21
DC
126 memset(s->framep, 0, sizeof(s->framep));
127
759001c5 128 if (free_mem)
bfa0f965 129 free_buffers(s);
ce42a048
RB
130}
131
132static void vp8_decode_flush(AVCodecContext *avctx)
133{
759001c5 134 vp8_decode_flush_impl(avctx, 0);
3b636f21
DC
135}
136
ac4b32df
PR
137static VP8Frame *vp8_find_free_buffer(VP8Context *s)
138{
139 VP8Frame *frame = NULL;
140 int i;
141
142 // find a free buffer
143 for (i = 0; i < 5; i++)
144 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
145 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
146 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
147 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
148 frame = &s->frames[i];
149 break;
150 }
151 if (i == 5) {
152 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
153 abort();
154 }
4e528206 155 if (frame->tf.f->buf[0])
ac4b32df
PR
156 vp8_release_frame(s, frame);
157
158 return frame;
159}
160
161static av_always_inline
162int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
3b636f21 163{
951455c1 164 AVCodecContext *avctx = s->avctx;
757d5e8e 165 int i, ret;
951455c1 166
4773d904
RB
167 if (width != s->avctx->width ||
168 height != s->avctx->height) {
759001c5 169 vp8_decode_flush_impl(s->avctx, 1);
3b636f21 170
757d5e8e
AK
171 ret = ff_set_dimensions(s->avctx, width, height);
172 if (ret < 0)
173 return ret;
4773d904 174 }
3b636f21 175
53c20f17
VG
176 s->mb_width = (s->avctx->coded_width + 15) / 16;
177 s->mb_height = (s->avctx->coded_height + 15) / 16;
3b636f21 178
ac4b32df
PR
179 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
180 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
951455c1 181 if (!s->mb_layout) { // Frame threading and one thread
53c20f17
VG
182 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
183 sizeof(*s->macroblocks));
184 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
185 } else // Sliced threading
186 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
187 sizeof(*s->macroblocks));
188 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
189 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
190 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
3b636f21 191
014b6b41
VG
192 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
193 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
194 free_buffers(s);
195 return AVERROR(ENOMEM);
196 }
197
951455c1 198 for (i = 0; i < MAX_THREADS; i++) {
53c20f17
VG
199 s->thread_data[i].filter_strength =
200 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
014b6b41
VG
201 if (!s->thread_data[i].filter_strength) {
202 free_buffers(s);
203 return AVERROR(ENOMEM);
204 }
25f056e6 205#if HAVE_THREADS
951455c1
DK
206 pthread_mutex_init(&s->thread_data[i].lock, NULL);
207 pthread_cond_init(&s->thread_data[i].cond, NULL);
25f056e6 208#endif
951455c1
DK
209 }
210
53c20f17 211 s->macroblocks = s->macroblocks_base + 1;
3b636f21
DC
212
213 return 0;
214}
215
ac4b32df
PR
216static int vp7_update_dimensions(VP8Context *s, int width, int height)
217{
218 return update_dimensions(s, width, height, IS_VP7);
219}
220
221static int vp8_update_dimensions(VP8Context *s, int width, int height)
222{
223 return update_dimensions(s, width, height, IS_VP8);
224}
225
3b636f21
DC
226static void parse_segment_info(VP8Context *s)
227{
228 VP56RangeCoder *c = &s->c;
229 int i;
230
231 s->segmentation.update_map = vp8_rac_get(c);
4e528206 232 s->segmentation.update_feature_data = vp8_rac_get(c);
3b636f21 233
4e528206 234 if (s->segmentation.update_feature_data) {
3b636f21
DC
235 s->segmentation.absolute_vals = vp8_rac_get(c);
236
237 for (i = 0; i < 4; i++)
238 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
239
240 for (i = 0; i < 4; i++)
241 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
242 }
243 if (s->segmentation.update_map)
244 for (i = 0; i < 3; i++)
245 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
246}
247
248static void update_lf_deltas(VP8Context *s)
249{
250 VP56RangeCoder *c = &s->c;
251 int i;
252
14ba7472
JS
253 for (i = 0; i < 4; i++) {
254 if (vp8_rac_get(c)) {
255 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
3b636f21 256
14ba7472
JS
257 if (vp8_rac_get(c))
258 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
259 }
260 }
261
262 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
263 if (vp8_rac_get(c)) {
264 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
265
266 if (vp8_rac_get(c))
267 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
268 }
269 }
3b636f21
DC
270}
271
272static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
273{
274 const uint8_t *sizes = buf;
275 int i;
276
277 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
278
53c20f17
VG
279 buf += 3 * (s->num_coeff_partitions - 1);
280 buf_size -= 3 * (s->num_coeff_partitions - 1);
3b636f21
DC
281 if (buf_size < 0)
282 return -1;
283
53c20f17
VG
284 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
285 int size = AV_RL24(sizes + 3 * i);
3b636f21
DC
286 if (buf_size - size < 0)
287 return -1;
4e528206 288 s->coeff_partition_size[i] = size;
3b636f21 289
905ef0d0 290 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
3b636f21
DC
291 buf += size;
292 buf_size -= size;
293 }
4e528206
MT
294
295 s->coeff_partition_size[i] = buf_size;
905ef0d0 296 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
3b636f21
DC
297
298 return 0;
299}
300
ac4b32df
PR
301static void vp7_get_quants(VP8Context *s)
302{
303 VP56RangeCoder *c = &s->c;
304
305 int yac_qi = vp8_rac_get_uint(c, 7);
306 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
307 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
308 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
309 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
310 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
311
312 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
313 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
314 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
315 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
316 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
317 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
318}
319
3b636f21
DC
320static void get_quants(VP8Context *s)
321{
322 VP56RangeCoder *c = &s->c;
323 int i, base_qi;
324
4e528206
MT
325 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
326 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
327 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
328 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
329 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
330 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
3b636f21
DC
331
332 for (i = 0; i < 4; i++) {
333 if (s->segmentation.enabled) {
334 base_qi = s->segmentation.base_quant[i];
335 if (!s->segmentation.absolute_vals)
4e528206 336 base_qi += s->quant.yac_qi;
3b636f21 337 } else
4e528206 338 base_qi = s->quant.yac_qi;
3b636f21 339
4e528206 340 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
53c20f17 341 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
4e528206 342 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
48098788 343 /* 101581>>16 is equivalent to 155/100 */
4e528206
MT
344 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
345 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
346 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
a8ab0ccc
PM
347
348 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
349 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
3b636f21
DC
350 }
351}
352
353/**
354 * Determine which buffers golden and altref should be updated with after this frame.
355 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
356 *
357 * Intra frames update all 3 references
358 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
359 * If the update (golden|altref) flag is set, it's updated with the current frame
360 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
361 * If the flag is not set, the number read means:
362 * 0: no update
363 * 1: VP56_FRAME_PREVIOUS
364 * 2: update golden with altref, or update altref with golden
365 */
366static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
367{
368 VP56RangeCoder *c = &s->c;
369
370 if (update)
371 return VP56_FRAME_CURRENT;
372
373 switch (vp8_rac_get_uint(c, 2)) {
374 case 1:
375 return VP56_FRAME_PREVIOUS;
376 case 2:
377 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
378 }
379 return VP56_FRAME_NONE;
380}
381
ac4b32df
PR
382static void vp78_reset_probability_tables(VP8Context *s)
383{
384 int i, j;
385 for (i = 0; i < 4; i++)
386 for (j = 0; j < 16; j++)
387 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
388 sizeof(s->prob->token[i][j]));
389}
390
391static void vp78_update_probability_tables(VP8Context *s)
392{
393 VP56RangeCoder *c = &s->c;
394 int i, j, k, l, m;
395
396 for (i = 0; i < 4; i++)
397 for (j = 0; j < 8; j++)
398 for (k = 0; k < 3; k++)
399 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
400 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
401 int prob = vp8_rac_get_uint(c, 8);
402 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
403 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
404 }
405}
406
407#define VP7_MVC_SIZE 17
408#define VP8_MVC_SIZE 19
409
410static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
411 int mvc_size)
412{
413 VP56RangeCoder *c = &s->c;
414 int i, j;
415
416 if (vp8_rac_get(c))
417 for (i = 0; i < 4; i++)
418 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
419 if (vp8_rac_get(c))
420 for (i = 0; i < 3; i++)
421 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
422
423 // 17.2 MV probability update
424 for (i = 0; i < 2; i++)
425 for (j = 0; j < mvc_size; j++)
426 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
427 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
428}
429
3b636f21
DC
430static void update_refs(VP8Context *s)
431{
432 VP56RangeCoder *c = &s->c;
433
434 int update_golden = vp8_rac_get(c);
435 int update_altref = vp8_rac_get(c);
436
437 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
438 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
439}
440
ac4b32df
PR
441static void copy_luma(AVFrame *dst, AVFrame *src, int width, int height)
442{
443 int i, j;
444
445 for (j = 1; j < 3; j++) {
446 for (i = 0; i < height / 2; i++)
447 memcpy(dst->data[j] + i * dst->linesize[j],
448 src->data[j] + i * src->linesize[j], width / 2);
449 }
450}
451
452static void fade(uint8_t *dst, uint8_t *src,
87c6c786 453 int width, int height, ptrdiff_t linesize,
ac4b32df
PR
454 int alpha, int beta)
455{
456 int i, j;
457
458 for (j = 0; j < height; j++) {
459 for (i = 0; i < width; i++) {
460 uint8_t y = src[j * linesize + i];
461 dst[j * linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
462 }
463 }
464}
465
466static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
467{
468 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
469 int beta = (int8_t) vp8_rac_get_uint(c, 8);
470 int ret;
471
472 if (!s->keyframe && (alpha || beta)) {
473 int width = s->mb_width * 16;
474 int height = s->mb_height * 16;
475 AVFrame *src, *dst;
476
477 if (!s->framep[VP56_FRAME_PREVIOUS])
478 return AVERROR_INVALIDDATA;
479
480 dst =
481 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
482
483 /* preserve the golden frame, write a new previous frame */
484 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
485 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
486 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
487 return ret;
488
489 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
490
491 copy_luma(dst, src, width, height);
492 }
493
494 fade(dst->data[0], src->data[0],
495 width, height, dst->linesize[0], alpha, beta);
496 }
497
498 return 0;
499}
500
501static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
3b636f21
DC
502{
503 VP56RangeCoder *c = &s->c;
ac4b32df
PR
504 int part1_size, hscale, vscale, i, j, ret;
505 int width = s->avctx->width;
506 int height = s->avctx->height;
507
7bf96472
FT
508 if (buf_size < 4) {
509 return AVERROR_INVALIDDATA;
510 }
511
ac4b32df
PR
512 s->profile = (buf[0] >> 1) & 7;
513 if (s->profile > 1) {
514 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
515 return AVERROR_INVALIDDATA;
516 }
517
518 s->keyframe = !(buf[0] & 1);
519 s->invisible = 0;
520 part1_size = AV_RL24(buf) >> 4;
521
522 buf += 4 - s->profile;
523 buf_size -= 4 - s->profile;
524
7bf96472
FT
525 if (buf_size < part1_size) {
526 return AVERROR_INVALIDDATA;
527 }
528
ac4b32df
PR
529 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
530
531 ff_vp56_init_range_decoder(c, buf, part1_size);
532 buf += part1_size;
533 buf_size -= part1_size;
534
535 /* A. Dimension information (keyframes only) */
536 if (s->keyframe) {
537 width = vp8_rac_get_uint(c, 12);
538 height = vp8_rac_get_uint(c, 12);
539 hscale = vp8_rac_get_uint(c, 2);
540 vscale = vp8_rac_get_uint(c, 2);
541 if (hscale || vscale)
542 avpriv_request_sample(s->avctx, "Upscaling");
543
544 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
545 vp78_reset_probability_tables(s);
546 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
547 sizeof(s->prob->pred16x16));
548 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
549 sizeof(s->prob->pred8x8c));
550 for (i = 0; i < 2; i++)
551 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
552 sizeof(vp7_mv_default_prob[i]));
553 memset(&s->segmentation, 0, sizeof(s->segmentation));
554 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
f4d581cd 555 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
ac4b32df
PR
556 }
557
558 if (s->keyframe || s->profile > 0)
559 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
560
561 /* B. Decoding information for all four macroblock-level features */
562 for (i = 0; i < 4; i++) {
563 s->feature_enabled[i] = vp8_rac_get(c);
564 if (s->feature_enabled[i]) {
565 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
566
567 for (j = 0; j < 3; j++)
568 s->feature_index_prob[i][j] =
569 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
570
29234f56 571 if (vp7_feature_value_size[s->profile][i])
ac4b32df
PR
572 for (j = 0; j < 4; j++)
573 s->feature_value[i][j] =
574 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
575 }
576 }
577
578 s->segmentation.enabled = 0;
579 s->segmentation.update_map = 0;
580 s->lf_delta.enabled = 0;
581
582 s->num_coeff_partitions = 1;
583 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
584
585 if (!s->macroblocks_base || /* first frame */
586 width != s->avctx->width || height != s->avctx->height ||
587 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
588 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
589 return ret;
590 }
591
592 /* C. Dequantization indices */
593 vp7_get_quants(s);
594
595 /* D. Golden frame update flag (a Flag) for interframes only */
596 if (!s->keyframe) {
597 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
598 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
599 }
600
601 s->update_last = 1;
602 s->update_probabilities = 1;
603 s->fade_present = 1;
604
605 if (s->profile > 0) {
606 s->update_probabilities = vp8_rac_get(c);
607 if (!s->update_probabilities)
608 s->prob[1] = s->prob[0];
609
610 if (!s->keyframe)
611 s->fade_present = vp8_rac_get(c);
612 }
613
614 /* E. Fading information for previous frame */
615 if (s->fade_present && vp8_rac_get(c)) {
616 if ((ret = vp7_fade_frame(s ,c)) < 0)
617 return ret;
618 }
619
620 /* F. Loop filter type */
621 if (!s->profile)
622 s->filter.simple = vp8_rac_get(c);
623
624 /* G. DCT coefficient ordering specification */
625 if (vp8_rac_get(c))
626 for (i = 1; i < 16; i++)
f4d581cd 627 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
ac4b32df
PR
628
629 /* H. Loop filter levels */
630 if (s->profile > 0)
631 s->filter.simple = vp8_rac_get(c);
632 s->filter.level = vp8_rac_get_uint(c, 6);
633 s->filter.sharpness = vp8_rac_get_uint(c, 3);
634
635 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
636 vp78_update_probability_tables(s);
637
638 s->mbskip_enabled = 0;
639
640 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
641 if (!s->keyframe) {
642 s->prob->intra = vp8_rac_get_uint(c, 8);
643 s->prob->last = vp8_rac_get_uint(c, 8);
644 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
645 }
646
647 return 0;
648}
649
650static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
651{
652 VP56RangeCoder *c = &s->c;
653 int header_size, hscale, vscale, ret;
3b636f21
DC
654 int width = s->avctx->width;
655 int height = s->avctx->height;
656
657 s->keyframe = !(buf[0] & 1);
658 s->profile = (buf[0]>>1) & 7;
659 s->invisible = !(buf[0] & 0x10);
06d50ca8 660 header_size = AV_RL24(buf) >> 5;
3b636f21
DC
661 buf += 3;
662 buf_size -= 3;
663
4e528206
MT
664 s->header_partition_size = header_size;
665
0ef1dbed
DC
666 if (s->profile > 3)
667 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
668
669 if (!s->profile)
53c20f17
VG
670 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
671 sizeof(s->put_pixels_tab));
0ef1dbed 672 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
53c20f17
VG
673 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
674 sizeof(s->put_pixels_tab));
3b636f21 675
53c20f17 676 if (header_size > buf_size - 7 * s->keyframe) {
3b636f21
DC
677 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
678 return AVERROR_INVALIDDATA;
679 }
680
681 if (s->keyframe) {
06d50ca8 682 if (AV_RL24(buf) != 0x2a019d) {
53c20f17
VG
683 av_log(s->avctx, AV_LOG_ERROR,
684 "Invalid start code 0x%x\n", AV_RL24(buf));
3b636f21
DC
685 return AVERROR_INVALIDDATA;
686 }
53c20f17
VG
687 width = AV_RL16(buf + 3) & 0x3fff;
688 height = AV_RL16(buf + 5) & 0x3fff;
689 hscale = buf[4] >> 6;
690 vscale = buf[6] >> 6;
3b636f21
DC
691 buf += 7;
692 buf_size -= 7;
693
92a54426 694 if (hscale || vscale)
12e25ed2 695 avpriv_request_sample(s->avctx, "Upscaling");
92a54426 696
3b636f21 697 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
ac4b32df 698 vp78_reset_probability_tables(s);
53c20f17
VG
699 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
700 sizeof(s->prob->pred16x16));
701 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
702 sizeof(s->prob->pred8x8c));
703 memcpy(s->prob->mvc, vp8_mv_default_prob,
704 sizeof(s->prob->mvc));
3b636f21 705 memset(&s->segmentation, 0, sizeof(s->segmentation));
0bf511d5 706 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
3b636f21
DC
707 }
708
905ef0d0 709 ff_vp56_init_range_decoder(c, buf, header_size);
3b636f21
DC
710 buf += header_size;
711 buf_size -= header_size;
712
713 if (s->keyframe) {
65875a8b
VG
714 s->colorspace = vp8_rac_get(c);
715 if (s->colorspace)
3b636f21 716 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
65875a8b 717 s->fullrange = vp8_rac_get(c);
3b636f21
DC
718 }
719
720 if ((s->segmentation.enabled = vp8_rac_get(c)))
721 parse_segment_info(s);
722 else
723 s->segmentation.update_map = 0; // FIXME: move this to some init function?
724
725 s->filter.simple = vp8_rac_get(c);
726 s->filter.level = vp8_rac_get_uint(c, 6);
727 s->filter.sharpness = vp8_rac_get_uint(c, 3);
728
4e528206
MT
729 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
730 s->lf_delta.update = vp8_rac_get(c);
731 if (s->lf_delta.update)
3b636f21 732 update_lf_deltas(s);
4e528206 733 }
3b636f21
DC
734
735 if (setup_partitions(s, buf, buf_size)) {
736 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
737 return AVERROR_INVALIDDATA;
738 }
739
951455c1 740 if (!s->macroblocks_base || /* first frame */
53c20f17 741 width != s->avctx->width || height != s->avctx->height)
ac4b32df 742 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
951455c1 743 return ret;
951455c1 744
3b636f21
DC
745 get_quants(s);
746
747 if (!s->keyframe) {
748 update_refs(s);
749 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
750 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
751 }
752
753 // if we aren't saving this frame's probabilities for future frames,
754 // make a copy of the current probabilities
755 if (!(s->update_probabilities = vp8_rac_get(c)))
756 s->prob[1] = s->prob[0];
757
758 s->update_last = s->keyframe || vp8_rac_get(c);
759
ac4b32df 760 vp78_update_probability_tables(s);
3b636f21
DC
761
762 if ((s->mbskip_enabled = vp8_rac_get(c)))
a8ab0ccc 763 s->prob->mbskip = vp8_rac_get_uint(c, 8);
3b636f21
DC
764
765 if (!s->keyframe) {
a8ab0ccc
PM
766 s->prob->intra = vp8_rac_get_uint(c, 8);
767 s->prob->last = vp8_rac_get_uint(c, 8);
768 s->prob->golden = vp8_rac_get_uint(c, 8);
ac4b32df 769 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
3b636f21
DC
770 }
771
4e528206
MT
772 // Record the entropy coder state here so that hwaccels can use it.
773 s->c.code_word = vp56_rac_renorm(&s->c);
774 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
775 s->coder_state_at_header_end.range = s->c.high;
776 s->coder_state_at_header_end.value = s->c.code_word >> 16;
777 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
778
3b636f21
DC
779 return 0;
780}
781
53c20f17
VG
782static av_always_inline
783void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
3b636f21 784{
7634771e
JGG
785 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
786 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
3b636f21
DC
787}
788
3b636f21
DC
789/**
790 * Motion vector coding, 17.1.
791 */
ac4b32df 792static int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
3b636f21 793{
ca18a478 794 int bit, x = 0;
3b636f21 795
7697cdcf 796 if (vp56_rac_get_prob_branchy(c, p[0])) {
3b636f21
DC
797 int i;
798
799 for (i = 0; i < 3; i++)
800 x += vp56_rac_get_prob(c, p[9 + i]) << i;
ac4b32df 801 for (i = (vp7 ? 7 : 9); i > 3; i--)
3b636f21 802 x += vp56_rac_get_prob(c, p[9 + i]) << i;
ac4b32df 803 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
3b636f21 804 x += 8;
ca18a478
DC
805 } else {
806 // small_mvtree
53c20f17 807 const uint8_t *ps = p + 2;
ca18a478 808 bit = vp56_rac_get_prob(c, *ps);
53c20f17
VG
809 ps += 1 + 3 * bit;
810 x += 4 * bit;
ca18a478
DC
811 bit = vp56_rac_get_prob(c, *ps);
812 ps += 1 + bit;
53c20f17 813 x += 2 * bit;
ca18a478
DC
814 x += vp56_rac_get_prob(c, *ps);
815 }
3b636f21
DC
816
817 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
818}
819
414ac27d 820static av_always_inline
ac4b32df 821const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
3b636f21 822{
ac4b32df
PR
823 if (is_vp7)
824 return vp7_submv_prob;
825
7bf254c4 826 if (left == top)
53c20f17 827 return vp8_submv_prob[4 - !!left];
7bf254c4 828 if (!top)
3b636f21 829 return vp8_submv_prob[2];
53c20f17 830 return vp8_submv_prob[1 - !!left];
3b636f21
DC
831}
832
833/**
834 * Split motion vector prediction, 16.4.
7ed06b2b 835 * @returns the number of motion vectors parsed (2, 4 or 16)
3b636f21 836 */
414ac27d 837static av_always_inline
ac4b32df
PR
838int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
839 int layout, int is_vp7)
3b636f21 840{
0908f1b9
JGG
841 int part_idx;
842 int n, num;
951455c1 843 VP8Macroblock *top_mb;
7bf254c4 844 VP8Macroblock *left_mb = &mb[-1];
53c20f17
VG
845 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
846 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
951455c1 847 VP56mv *top_mv;
c55e0d34
JGG
848 VP56mv *left_mv = left_mb->bmv;
849 VP56mv *cur_mv = mb->bmv;
3b636f21 850
951455c1
DK
851 if (!layout) // layout is inlined, s->mb_layout is not
852 top_mb = &mb[2];
853 else
53c20f17 854 top_mb = &mb[-s->mb_width - 1];
951455c1 855 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
53c20f17 856 top_mv = top_mb->bmv;
951455c1 857
0908f1b9 858 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
53c20f17 859 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
0908f1b9 860 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
53c20f17 861 else
0908f1b9 862 part_idx = VP8_SPLITMVMODE_8x8;
0908f1b9
JGG
863 } else {
864 part_idx = VP8_SPLITMVMODE_4x4;
865 }
866
53c20f17
VG
867 num = vp8_mbsplit_count[part_idx];
868 mbsplits_cur = vp8_mbsplits[part_idx],
869 firstidx = vp8_mbfirstidx[part_idx];
0908f1b9
JGG
870 mb->partitioning = part_idx;
871
3b636f21 872 for (n = 0; n < num; n++) {
7ed06b2b 873 int k = firstidx[n];
7bf254c4 874 uint32_t left, above;
7ed06b2b
RB
875 const uint8_t *submv_prob;
876
7bf254c4
JGG
877 if (!(k & 3))
878 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
879 else
53c20f17 880 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
7bf254c4
JGG
881 if (k <= 3)
882 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
883 else
884 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
7ed06b2b 885
ac4b32df 886 submv_prob = get_submv_prob(left, above, is_vp7);
3b636f21 887
c5dec7f1
JGG
888 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
889 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
890 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
ac4b32df
PR
891 mb->bmv[n].y = mb->mv.y +
892 read_mv_component(c, s->prob->mvc[0], is_vp7);
893 mb->bmv[n].x = mb->mv.x +
894 read_mv_component(c, s->prob->mvc[1], is_vp7);
c5dec7f1
JGG
895 } else {
896 AV_ZERO32(&mb->bmv[n]);
897 }
898 } else {
899 AV_WN32A(&mb->bmv[n], above);
900 }
901 } else {
7bf254c4 902 AV_WN32A(&mb->bmv[n], left);
3b636f21 903 }
3b636f21 904 }
7ed06b2b
RB
905
906 return num;
3b636f21
DC
907}
908
ac4b32df
PR
909/**
910 * The vp7 reference decoder uses a padding macroblock column (added to right
911 * edge of the frame) to guard against illegal macroblock offsets. The
912 * algorithm has bugs that permit offsets to straddle the padding column.
913 * This function replicates those bugs.
914 *
915 * @param[out] edge_x macroblock x address
916 * @param[out] edge_y macroblock y address
917 *
918 * @return macroblock offset legal (boolean)
919 */
920static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
921 int xoffset, int yoffset, int boundary,
922 int *edge_x, int *edge_y)
923{
924 int vwidth = mb_width + 1;
925 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
926 if (new < boundary || new % vwidth == vwidth - 1)
927 return 0;
928 *edge_y = new / vwidth;
929 *edge_x = new % vwidth;
930 return 1;
931}
932
933static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
934{
935 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
936}
937
414ac27d 938static av_always_inline
ac4b32df
PR
939void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
940 int mb_x, int mb_y, int layout)
941{
942 VP8Macroblock *mb_edge[12];
943 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
944 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
945 int idx = CNT_ZERO;
946 VP56mv near_mv[3];
947 uint8_t cnt[3] = { 0 };
948 VP56RangeCoder *c = &s->c;
949 int i;
950
951 AV_ZERO32(&near_mv[0]);
952 AV_ZERO32(&near_mv[1]);
953 AV_ZERO32(&near_mv[2]);
954
955 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
956 const VP7MVPred * pred = &vp7_mv_pred[i];
957 int edge_x, edge_y;
958
959 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
960 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
961 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
962 ? s->macroblocks_base + 1 + edge_x +
963 (s->mb_width + 1) * (edge_y + 1)
964 : s->macroblocks + edge_x +
965 (s->mb_height - edge_y - 1) * 2;
966 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
967 if (mv) {
968 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
969 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
970 idx = CNT_NEAREST;
971 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
972 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
973 continue;
974 idx = CNT_NEAR;
975 } else {
976 AV_WN32A(&near_mv[CNT_NEAR], mv);
977 idx = CNT_NEAR;
978 }
979 } else {
980 AV_WN32A(&near_mv[CNT_NEAREST], mv);
981 idx = CNT_NEAREST;
982 }
983 } else {
984 idx = CNT_ZERO;
985 }
986 } else {
987 idx = CNT_ZERO;
988 }
989 cnt[idx] += vp7_mv_pred[i].score;
990 }
991
992 mb->partitioning = VP8_SPLITMVMODE_NONE;
993
994 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
995 mb->mode = VP8_MVMODE_MV;
996
997 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
998
999 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1000
1001 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1002 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1003 else
1004 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1005
1006 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1007 mb->mode = VP8_MVMODE_SPLIT;
1008 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1009 } else {
1010 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP7);
1011 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP7);
1012 mb->bmv[0] = mb->mv;
1013 }
1014 } else {
1015 mb->mv = near_mv[CNT_NEAR];
1016 mb->bmv[0] = mb->mv;
1017 }
1018 } else {
1019 mb->mv = near_mv[CNT_NEAREST];
1020 mb->bmv[0] = mb->mv;
1021 }
1022 } else {
1023 mb->mode = VP8_MVMODE_ZERO;
1024 AV_ZERO32(&mb->mv);
1025 mb->bmv[0] = mb->mv;
1026 }
1027}
1028
1029static av_always_inline
1030void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1031 int mb_x, int mb_y, int layout)
f3d09d44 1032{
53c20f17 1033 VP8Macroblock *mb_edge[3] = { 0 /* top */,
f3d09d44 1034 mb - 1 /* left */,
53c20f17 1035 0 /* top-left */ };
f3d09d44 1036 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
66f608a6 1037 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
f3d09d44
JGG
1038 int idx = CNT_ZERO;
1039 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1eeca886 1040 int8_t *sign_bias = s->sign_bias;
f3d09d44
JGG
1041 VP56mv near_mv[4];
1042 uint8_t cnt[4] = { 0 };
1043 VP56RangeCoder *c = &s->c;
1044
951455c1
DK
1045 if (!layout) { // layout is inlined (s->mb_layout is not)
1046 mb_edge[0] = mb + 2;
1047 mb_edge[2] = mb + 1;
53c20f17
VG
1048 } else {
1049 mb_edge[0] = mb - s->mb_width - 1;
1050 mb_edge[2] = mb - s->mb_width - 2;
951455c1
DK
1051 }
1052
f3d09d44
JGG
1053 AV_ZERO32(&near_mv[0]);
1054 AV_ZERO32(&near_mv[1]);
0f0b5d64 1055 AV_ZERO32(&near_mv[2]);
f3d09d44
JGG
1056
1057 /* Process MB on top, left and top-left */
53c20f17
VG
1058#define MV_EDGE_CHECK(n) \
1059 { \
1060 VP8Macroblock *edge = mb_edge[n]; \
1061 int edge_ref = edge->ref_frame; \
1062 if (edge_ref != VP56_FRAME_CURRENT) { \
1063 uint32_t mv = AV_RN32A(&edge->mv); \
1064 if (mv) { \
1065 if (cur_sign_bias != sign_bias[edge_ref]) { \
1066 /* SWAR negate of the values in mv. */ \
1067 mv = ~mv; \
1068 mv = ((mv & 0x7fff7fff) + \
1069 0x00010001) ^ (mv & 0x80008000); \
1070 } \
1071 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1072 AV_WN32A(&near_mv[++idx], mv); \
1073 cnt[idx] += 1 + (n != 2); \
1074 } else \
1075 cnt[CNT_ZERO] += 1 + (n != 2); \
1076 } \
f3d09d44
JGG
1077 }
1078
1079 MV_EDGE_CHECK(0)
1080 MV_EDGE_CHECK(1)
1081 MV_EDGE_CHECK(2)
1082
1083 mb->partitioning = VP8_SPLITMVMODE_NONE;
1084 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1085 mb->mode = VP8_MVMODE_MV;
1086
1087 /* If we have three distinct MVs, merge first and last if they're the same */
53c20f17
VG
1088 if (cnt[CNT_SPLITMV] &&
1089 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
f3d09d44
JGG
1090 cnt[CNT_NEAREST] += 1;
1091
1092 /* Swap near and nearest if necessary */
1093 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1094 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1095 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1096 }
1097
1098 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1099 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
f3d09d44 1100 /* Choose the best mv out of 0,0 and the nearest mv */
7634771e 1101 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
66f608a6
AS
1102 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1103 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1104 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
f3d09d44
JGG
1105
1106 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1107 mb->mode = VP8_MVMODE_SPLIT;
ac4b32df 1108 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
f3d09d44 1109 } else {
ac4b32df
PR
1110 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP8);
1111 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP8);
f3d09d44
JGG
1112 mb->bmv[0] = mb->mv;
1113 }
1114 } else {
7634771e 1115 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
f3d09d44
JGG
1116 mb->bmv[0] = mb->mv;
1117 }
1118 } else {
7634771e 1119 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
f3d09d44
JGG
1120 mb->bmv[0] = mb->mv;
1121 }
1122 } else {
1123 mb->mode = VP8_MVMODE_ZERO;
1124 AV_ZERO32(&mb->mv);
1125 mb->bmv[0] = mb->mv;
1126 }
1127}
1128
1129static av_always_inline
17343e39 1130void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
951455c1 1131 int mb_x, int keyframe, int layout)
3b636f21 1132{
17343e39
DK
1133 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1134
951455c1
DK
1135 if (layout == 1) {
1136 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1137 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1138 }
d1c58fce 1139 if (keyframe) {
d2840fa4 1140 int x, y;
53c20f17
VG
1141 uint8_t *top;
1142 uint8_t *const left = s->intra4x4_pred_mode_left;
951455c1
DK
1143 if (layout == 1)
1144 top = mb->intra4x4_pred_mode_top;
1145 else
1146 top = s->intra4x4_pred_mode_top + 4 * mb_x;
d1c58fce
JGG
1147 for (y = 0; y < 4; y++) {
1148 for (x = 0; x < 4; x++) {
d2840fa4 1149 const uint8_t *ctx;
53c20f17 1150 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
d2840fa4 1151 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
53c20f17 1152 left[y] = top[x] = *intra4x4;
d2840fa4 1153 intra4x4++;
3b636f21 1154 }
3b636f21 1155 }
d1c58fce 1156 } else {
d2840fa4 1157 int i;
d1c58fce 1158 for (i = 0; i < 16; i++)
53c20f17
VG
1159 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1160 vp8_pred4x4_prob_inter);
3b636f21
DC
1161 }
1162}
1163
414ac27d 1164static av_always_inline
951455c1 1165void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
ac4b32df 1166 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
3b636f21
DC
1167{
1168 VP56RangeCoder *c = &s->c;
5afb94c8
DB
1169 static const char *vp7_feature_name[] = { "q-index",
1170 "lf-delta",
1171 "partial-golden-update",
1172 "blit-pitch" };
ac4b32df
PR
1173 if (is_vp7) {
1174 int i;
1175 *segment = 0;
1176 for (i = 0; i < 4; i++) {
1177 if (s->feature_enabled[i]) {
1178 if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
1179 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1180 s->feature_index_prob[i]);
1181 av_log(s->avctx, AV_LOG_WARNING,
1182 "Feature %s present in macroblock (value 0x%x)\n",
1183 vp7_feature_name[i], s->feature_value[i][index]);
1184 }
1185 }
1186 }
1187 } else if (s->segmentation.update_map)
c55e0d34 1188 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
30011bf2 1189 else if (s->segmentation.enabled)
4773d904 1190 *segment = ref ? *ref : *segment;
17343e39 1191 mb->segment = *segment;
3b636f21 1192
a8ab0ccc 1193 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
3b636f21
DC
1194
1195 if (s->keyframe) {
53c20f17
VG
1196 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1197 vp8_pred16x16_prob_intra);
3b636f21
DC
1198
1199 if (mb->mode == MODE_I4x4) {
951455c1 1200 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
d2840fa4 1201 } else {
ac4b32df
PR
1202 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1203 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
951455c1
DK
1204 if (s->mb_layout == 1)
1205 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1206 else
1207 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
53c20f17 1208 AV_WN32A(s->intra4x4_pred_mode_left, modes);
d2840fa4 1209 }
3b636f21 1210
53c20f17
VG
1211 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1212 vp8_pred8x8c_prob_intra);
1213 mb->ref_frame = VP56_FRAME_CURRENT;
a8ab0ccc 1214 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
3b636f21 1215 // inter MB, 16.2
a8ab0ccc 1216 if (vp56_rac_get_prob_branchy(c, s->prob->last))
53c20f17 1217 mb->ref_frame =
ac4b32df
PR
1218 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1219 : VP56_FRAME_GOLDEN;
3b636f21
DC
1220 else
1221 mb->ref_frame = VP56_FRAME_PREVIOUS;
53c20f17 1222 s->ref_count[mb->ref_frame - 1]++;
3b636f21
DC
1223
1224 // motion vectors, 16.3
ac4b32df
PR
1225 if (is_vp7)
1226 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1227 else
1228 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
3b636f21
DC
1229 } else {
1230 // intra MB, 16.1
1231 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1232
158e062c 1233 if (mb->mode == MODE_I4x4)
951455c1 1234 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
3b636f21 1235
53c20f17
VG
1236 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1237 s->prob->pred8x8c);
1238 mb->ref_frame = VP56_FRAME_CURRENT;
1239 mb->partitioning = VP8_SPLITMVMODE_NONE;
14767f35 1240 AV_ZERO32(&mb->bmv[0]);
3b636f21
DC
1241 }
1242}
1243
1244/**
53c20f17 1245 * @param r arithmetic bitstream reader context
e394953e
RB
1246 * @param block destination for block coefficients
1247 * @param probs probabilities to use when reading trees from the bitstream
53c20f17
VG
1248 * @param i initial coeff index, 0 unless a separate DC block is coded
1249 * @param qmul array holding the dc/ac dequant factor at position 0/1
1250 *
3b636f21
DC
1251 * @return 0 if no coeffs were decoded
1252 * otherwise, the index of the last coeff decoded plus one
1253 */
ac4b32df
PR
1254static av_always_inline
1255int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1256 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1257 int i, uint8_t *token_prob, int16_t qmul[2],
1258 const uint8_t scan[16], int vp7)
3b636f21 1259{
6163d880 1260 VP56RangeCoder c = *r;
afb54a85 1261 goto skip_eob;
fe1b5d97 1262 do {
1e739679 1263 int coeff;
ac4b32df 1264restart:
6163d880
RB
1265 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1266 break;
3b636f21 1267
fe1b5d97 1268skip_eob:
6163d880 1269 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
c22b4468 1270 if (++i == 16)
6163d880 1271 break; // invalid input; blocks should end with EOB
370b622a 1272 token_prob = probs[i][0];
ac4b32df
PR
1273 if (vp7)
1274 goto restart;
c22b4468 1275 goto skip_eob;
fe1b5d97
DC
1276 }
1277
6163d880 1278 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
fe1b5d97 1279 coeff = 1;
53c20f17 1280 token_prob = probs[i + 1][1];
fe1b5d97 1281 } else {
6163d880
RB
1282 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1283 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
fe1b5d97 1284 if (coeff)
6163d880 1285 coeff += vp56_rac_get_prob(&c, token_prob[5]);
fe1b5d97
DC
1286 coeff += 2;
1287 } else {
1288 // DCT_CAT*
6163d880
RB
1289 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1290 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
53c20f17 1291 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
fe1b5d97
DC
1292 } else { // DCT_CAT2
1293 coeff = 7;
6163d880
RB
1294 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1295 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
fe1b5d97
DC
1296 }
1297 } else { // DCT_CAT3 and up
53c20f17
VG
1298 int a = vp56_rac_get_prob(&c, token_prob[8]);
1299 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1300 int cat = (a << 1) + b;
1301 coeff = 3 + (8 << cat);
6163d880 1302 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
fe1b5d97
DC
1303 }
1304 }
53c20f17 1305 token_prob = probs[i + 1][2];
fe1b5d97 1306 }
ac4b32df 1307 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
afb54a85 1308 } while (++i < 16);
fe1b5d97 1309
6163d880 1310 *r = c;
afb54a85 1311 return i;
3b636f21 1312}
ac4b32df
PR
1313
1314static av_always_inline
1315int inter_predict_dc(int16_t block[16], int16_t pred[2])
1316{
1317 int16_t dc = block[0];
1318 int ret = 0;
1319
1320 if (pred[1] > 3) {
1321 dc += pred[0];
1322 ret = 1;
1323 }
1324
1325 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1326 block[0] = pred[0] = dc;
1327 pred[1] = 0;
1328 } else {
1329 if (pred[0] == dc)
1330 pred[1]++;
1331 block[0] = pred[0] = dc;
1332 }
1333
1334 return ret;
1335}
1336
1337static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1338 int16_t block[16],
1339 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1340 int i, uint8_t *token_prob,
1341 int16_t qmul[2],
1342 const uint8_t scan[16])
1343{
1344 return decode_block_coeffs_internal(r, block, probs, i,
1345 token_prob, qmul, scan, IS_VP7);
1346}
1347
1348#ifndef vp8_decode_block_coeffs_internal
1349static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1350 int16_t block[16],
1351 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1352 int i, uint8_t *token_prob,
1353 int16_t qmul[2])
1354{
1355 return decode_block_coeffs_internal(r, block, probs, i,
f4d581cd 1356 token_prob, qmul, ff_zigzag_scan, IS_VP8);
ac4b32df 1357}
a7878c9f 1358#endif
3b636f21 1359
3c432e11 1360/**
53c20f17
VG
1361 * @param c arithmetic bitstream reader context
1362 * @param block destination for block coefficients
1363 * @param probs probabilities to use when reading trees from the bitstream
1364 * @param i initial coeff index, 0 unless a separate DC block is coded
3c432e11
DB
1365 * @param zero_nhood the initial prediction context for number of surrounding
1366 * all-zero blocks (only left/top, so 0-2)
53c20f17
VG
1367 * @param qmul array holding the dc/ac dequant factor at position 0/1
1368 *
3c432e11
DB
1369 * @return 0 if no coeffs were decoded
1370 * otherwise, the index of the last coeff decoded plus one
1371 */
414ac27d 1372static av_always_inline
88bd7fdc 1373int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
53c20f17 1374 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
ac4b32df
PR
1375 int i, int zero_nhood, int16_t qmul[2],
1376 const uint8_t scan[16], int vp7)
1e739679
JGG
1377{
1378 uint8_t *token_prob = probs[i][zero_nhood];
1379 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1380 return 0;
ac4b32df
PR
1381 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1382 token_prob, qmul, scan)
1383 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1384 token_prob, qmul);
1e739679
JGG
1385}
1386
1387static av_always_inline
53c20f17 1388void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
ac4b32df
PR
1389 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1390 int is_vp7)
3b636f21 1391{
3b636f21
DC
1392 int i, x, y, luma_start = 0, luma_ctx = 3;
1393 int nnz_pred, nnz, nnz_total = 0;
17343e39 1394 int segment = mb->segment;
f311208c 1395 int block_dc = 0;
3b636f21 1396
ac4b32df 1397 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
3b636f21
DC
1398 nnz_pred = t_nnz[8] + l_nnz[8];
1399
1400 // decode DC values and do hadamard
53c20f17 1401 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
ac4b32df 1402 nnz_pred, s->qmat[segment].luma_dc_qmul,
f4d581cd 1403 ff_zigzag_scan, is_vp7);
3b636f21 1404 l_nnz[8] = t_nnz[8] = !!nnz;
ac4b32df
PR
1405
1406 if (is_vp7 && mb->mode > MODE_I4x4) {
1407 nnz |= inter_predict_dc(td->block_dc,
1408 s->inter_dc_pred[mb->ref_frame - 1]);
1409 }
1410
f311208c
JGG
1411 if (nnz) {
1412 nnz_total += nnz;
53c20f17 1413 block_dc = 1;
f311208c 1414 if (nnz == 1)
951455c1 1415 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
f311208c 1416 else
951455c1 1417 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
f311208c 1418 }
3b636f21 1419 luma_start = 1;
53c20f17 1420 luma_ctx = 0;
3b636f21
DC
1421 }
1422
1423 // luma blocks
1424 for (y = 0; y < 4; y++)
1425 for (x = 0; x < 4; x++) {
ffbf0794 1426 nnz_pred = l_nnz[y] + t_nnz[x];
53c20f17
VG
1427 nnz = decode_block_coeffs(c, td->block[y][x],
1428 s->prob->token[luma_ctx],
1429 luma_start, nnz_pred,
ac4b32df
PR
1430 s->qmat[segment].luma_qmul,
1431 s->prob[0].scan, is_vp7);
53c20f17
VG
1432 /* nnz+block_dc may be one more than the actual last index,
1433 * but we don't care */
951455c1 1434 td->non_zero_count_cache[y][x] = nnz + block_dc;
3b636f21
DC
1435 t_nnz[x] = l_nnz[y] = !!nnz;
1436 nnz_total += nnz;
1437 }
1438
1439 // chroma blocks
1440 // TODO: what to do about dimensions? 2nd dim for luma is x,
1441 // but for chroma it's (y<<1)|x
1442 for (i = 4; i < 6; i++)
1443 for (y = 0; y < 2; y++)
1444 for (x = 0; x < 2; x++) {
53c20f17 1445 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
ac4b32df
PR
1446 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1447 s->prob->token[2], 0, nnz_pred,
1448 s->qmat[segment].chroma_qmul,
1449 s->prob[0].scan, is_vp7);
53c20f17
VG
1450 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1451 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
ac4b32df 1452 nnz_total += nnz;
3b636f21
DC
1453 }
1454
1455 // if there were no coded coeffs despite the macroblock not being marked skip,
1456 // we MUST not do the inner loop filter and should not do IDCT
1457 // Since skip isn't used for bitstream prediction, just manually set it.
1458 if (!nnz_total)
1459 mb->skip = 1;
1460}
1461
9ac831c2 1462static av_always_inline
53c20f17
VG
1463void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1464 uint8_t *src_cb, uint8_t *src_cr,
87c6c786 1465 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
9ac831c2 1466{
53c20f17 1467 AV_COPY128(top_border, src_y + 15 * linesize);
9ac831c2 1468 if (!simple) {
53c20f17
VG
1469 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1470 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
9ac831c2
DC
1471 }
1472}
1473
1474static av_always_inline
53c20f17 1475void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
87c6c786 1476 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
53c20f17 1477 int mb_y, int mb_width, int simple, int xchg)
9ac831c2 1478{
53c20f17
VG
1479 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1480 src_y -= linesize;
9ac831c2
DC
1481 src_cb -= uvlinesize;
1482 src_cr -= uvlinesize;
1483
53c20f17
VG
1484#define XCHG(a, b, xchg) \
1485 do { \
1486 if (xchg) \
1487 AV_SWAP64(b, a); \
1488 else \
1489 AV_COPY64(b, a); \
096971e8 1490 } while (0)
9ac831c2 1491
53c20f17
VG
1492 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1493 XCHG(top_border, src_y, xchg);
1494 XCHG(top_border + 8, src_y + 8, 1);
1495 if (mb_x < mb_width - 1)
1496 XCHG(top_border + 32, src_y + 16, 1);
070ce7ef 1497
9ac831c2
DC
1498 // only copy chroma for normal loop filter
1499 // or to initialize the top row to 127
1500 if (!simple || !mb_y) {
53c20f17
VG
1501 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1502 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1503 XCHG(top_border + 16, src_cb, 1);
1504 XCHG(top_border + 24, src_cr, 1);
9ac831c2
DC
1505 }
1506}
1507
414ac27d 1508static av_always_inline
ee555de7
RB
1509int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1510{
53c20f17 1511 if (!mb_x)
ee555de7 1512 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
53c20f17 1513 else
ee555de7 1514 return mb_y ? mode : LEFT_DC_PRED8x8;
ee555de7
RB
1515}
1516
1517static av_always_inline
ac4b32df 1518int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
ee555de7 1519{
53c20f17 1520 if (!mb_x)
ac4b32df 1521 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
53c20f17 1522 else
ee555de7 1523 return mb_y ? mode : HOR_PRED8x8;
ee555de7
RB
1524}
1525
1526static av_always_inline
ac4b32df 1527int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
ee555de7
RB
1528{
1529 switch (mode) {
1530 case DC_PRED8x8:
1531 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1532 case VERT_PRED8x8:
ac4b32df 1533 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
ee555de7 1534 case HOR_PRED8x8:
ac4b32df 1535 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
53c20f17 1536 case PLANE_PRED8x8: /* TM */
ac4b32df 1537 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
ee555de7
RB
1538 }
1539 return mode;
1540}
1541
1542static av_always_inline
ac4b32df 1543int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
ee555de7
RB
1544{
1545 if (!mb_x) {
ac4b32df 1546 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
ee555de7
RB
1547 } else {
1548 return mb_y ? mode : HOR_VP8_PRED;
1549 }
1550}
1551
1552static av_always_inline
ac4b32df
PR
1553int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1554 int *copy_buf, int vp7)
ee555de7
RB
1555{
1556 switch (mode) {
1557 case VERT_PRED:
1558 if (!mb_x && mb_y) {
1559 *copy_buf = 1;
1560 return mode;
1561 }
1562 /* fall-through */
1563 case DIAG_DOWN_LEFT_PRED:
1564 case VERT_LEFT_PRED:
ac4b32df 1565 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
ee555de7
RB
1566 case HOR_PRED:
1567 if (!mb_y) {
1568 *copy_buf = 1;
1569 return mode;
a71abb71 1570 }
ee555de7
RB
1571 /* fall-through */
1572 case HOR_UP_PRED:
ac4b32df 1573 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
ee555de7 1574 case TM_VP8_PRED:
ac4b32df 1575 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
53c20f17
VG
1576 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1577 * as 16x16/8x8 DC */
ee555de7
RB
1578 case DIAG_DOWN_RIGHT_PRED:
1579 case VERT_RIGHT_PRED:
1580 case HOR_DOWN_PRED:
1581 if (!mb_y || !mb_x)
1582 *copy_buf = 1;
1583 return mode;
3b636f21
DC
1584 }
1585 return mode;
1586}
1587
414ac27d 1588static av_always_inline
951455c1 1589void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
ac4b32df 1590 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
3b636f21 1591{
bb591566
MR
1592 int x, y, mode, nnz;
1593 uint32_t tr;
3b636f21 1594
53c20f17
VG
1595 /* for the first row, we need to run xchg_mb_border to init the top edge
1596 * to 127 otherwise, skip it if we aren't going to deblock */
ef8c93e2 1597 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
53c20f17 1598 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
9ac831c2
DC
1599 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1600 s->filter.simple, 1);
1601
3b636f21 1602 if (mb->mode < MODE_I4x4) {
ac4b32df 1603 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
3b636f21
DC
1604 s->hpc.pred16x16[mode](dst[0], s->linesize);
1605 } else {
1606 uint8_t *ptr = dst[0];
17343e39 1607 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
ac4b32df
PR
1608 const uint8_t lo = is_vp7 ? 128 : 127;
1609 const uint8_t hi = is_vp7 ? 128 : 129;
1610 uint8_t tr_top[4] = { lo, lo, lo, lo };
3b636f21
DC
1611
1612 // all blocks on the right edge of the macroblock use bottom edge
1613 // the top macroblock for their topright edge
1614 uint8_t *tr_right = ptr - s->linesize + 16;
1615
1616 // if we're on the right edge of the frame, said edge is extended
1617 // from the top macroblock
53c20f17
VG
1618 if (mb_y && mb_x == s->mb_width - 1) {
1619 tr = tr_right[-1] * 0x01010101u;
1620 tr_right = (uint8_t *) &tr;
3b636f21
DC
1621 }
1622
b74f70d6 1623 if (mb->skip)
951455c1 1624 AV_ZERO128(td->non_zero_count_cache);
b74f70d6 1625
3b636f21
DC
1626 for (y = 0; y < 4; y++) {
1627 uint8_t *topright = ptr + 4 - s->linesize;
1628 for (x = 0; x < 4; x++) {
87c6c786
DB
1629 int copy = 0;
1630 ptrdiff_t linesize = s->linesize;
53c20f17
VG
1631 uint8_t *dst = ptr + 4 * x;
1632 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
ee555de7 1633
ef8c93e2 1634 if ((y == 0 || x == 3) && mb_y == 0) {
ee555de7
RB
1635 topright = tr_top;
1636 } else if (x == 3)
3b636f21
DC
1637 topright = tr_right;
1638
ac4b32df
PR
1639 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1640 mb_y + y, &copy, is_vp7);
ef8c93e2 1641 if (copy) {
53c20f17 1642 dst = copy_dst + 12;
ef8c93e2
AK
1643 linesize = 8;
1644 if (!(mb_y + y)) {
ac4b32df
PR
1645 copy_dst[3] = lo;
1646 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
ef8c93e2 1647 } else {
53c20f17 1648 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
ee555de7 1649 if (!(mb_x + x)) {
ac4b32df 1650 copy_dst[3] = hi;
ee555de7 1651 } else {
53c20f17 1652 copy_dst[3] = ptr[4 * x - s->linesize - 1];
ee555de7
RB
1653 }
1654 }
ef8c93e2
AK
1655 if (!(mb_x + x)) {
1656 copy_dst[11] =
1657 copy_dst[19] =
1658 copy_dst[27] =
ac4b32df 1659 copy_dst[35] = hi;
ef8c93e2 1660 } else {
53c20f17
VG
1661 copy_dst[11] = ptr[4 * x - 1];
1662 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1663 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1664 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
ef8c93e2 1665 }
ee555de7
RB
1666 }
1667 s->hpc.pred4x4[mode](dst, topright, linesize);
1668 if (copy) {
53c20f17
VG
1669 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1670 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1671 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1672 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
ee555de7 1673 }
3b636f21 1674
951455c1 1675 nnz = td->non_zero_count_cache[y][x];
3b636f21
DC
1676 if (nnz) {
1677 if (nnz == 1)
53c20f17
VG
1678 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1679 td->block[y][x], s->linesize);
3b636f21 1680 else
53c20f17
VG
1681 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1682 td->block[y][x], s->linesize);
3b636f21
DC
1683 }
1684 topright += 4;
1685 }
1686
53c20f17 1687 ptr += 4 * s->linesize;
d2840fa4 1688 intra4x4 += 4;
3b636f21
DC
1689 }
1690 }
1691
ac4b32df
PR
1692 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1693 mb_x, mb_y, is_vp7);
3b636f21
DC
1694 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1695 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
9ac831c2 1696
ef8c93e2 1697 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
53c20f17 1698 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
9ac831c2
DC
1699 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1700 s->filter.simple, 0);
3b636f21
DC
1701}
1702
64233e70
JGG
1703static const uint8_t subpel_idx[3][8] = {
1704 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1705 // also function pointer index
1706 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1707 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1708};
1709
3b636f21 1710/**
3c432e11 1711 * luma MC function
3b636f21 1712 *
53c20f17
VG
1713 * @param s VP8 decoding context
1714 * @param dst target buffer for block data at block position
1715 * @param ref reference picture buffer at origin (0, 0)
1716 * @param mv motion vector (relative to block position) to get pixel data from
1717 * @param x_off horizontal position of block from origin (0, 0)
1718 * @param y_off vertical position of block from origin (0, 0)
1719 * @param block_w width of block (16, 8 or 4)
1720 * @param block_h height of block (always same as block_w)
1721 * @param width width of src/dst plane data
1722 * @param height height of src/dst plane data
3b636f21 1723 * @param linesize size of a single line of plane data, including padding
53c20f17 1724 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
3b636f21 1725 */
414ac27d 1726static av_always_inline
951455c1 1727void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
759001c5 1728 ThreadFrame *ref, const VP56mv *mv,
64233e70 1729 int x_off, int y_off, int block_w, int block_h,
93f30547 1730 int width, int height, ptrdiff_t linesize,
64233e70 1731 vp8_mc_func mc_func[3][3])
3b636f21 1732{
759001c5 1733 uint8_t *src = ref->f->data[0];
4773d904 1734
c0498b30 1735 if (AV_RN32A(mv)) {
87c6c786 1736 ptrdiff_t src_linesize = linesize;
64233e70 1737
53c20f17
VG
1738 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1739 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
64233e70
JGG
1740
1741 x_off += mv->x >> 2;
1742 y_off += mv->y >> 2;
c0498b30
JGG
1743
1744 // edge emulation
4773d904 1745 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
c0498b30 1746 src += y_off * linesize + x_off;
64233e70
JGG
1747 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1748 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
458446ac
RB
1749 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1750 src - my_idx * linesize - mx_idx,
e46ad30a 1751 EDGE_EMU_LINESIZE, linesize,
53c20f17
VG
1752 block_w + subpel_idx[1][mx],
1753 block_h + subpel_idx[1][my],
1754 x_off - mx_idx, y_off - my_idx,
1755 width, height);
e46ad30a
AK
1756 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1757 src_linesize = EDGE_EMU_LINESIZE;
c0498b30 1758 }
e46ad30a 1759 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
4773d904
RB
1760 } else {
1761 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
53c20f17
VG
1762 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1763 linesize, block_h, 0, 0);
4773d904 1764 }
3b636f21
DC
1765}
1766
3c432e11
DB
1767/**
1768 * chroma MC function
1769 *
53c20f17
VG
1770 * @param s VP8 decoding context
1771 * @param dst1 target buffer for block data at block position (U plane)
1772 * @param dst2 target buffer for block data at block position (V plane)
1773 * @param ref reference picture buffer at origin (0, 0)
1774 * @param mv motion vector (relative to block position) to get pixel data from
1775 * @param x_off horizontal position of block from origin (0, 0)
1776 * @param y_off vertical position of block from origin (0, 0)
1777 * @param block_w width of block (16, 8 or 4)
1778 * @param block_h height of block (always same as block_w)
1779 * @param width width of src/dst plane data
1780 * @param height height of src/dst plane data
3c432e11 1781 * @param linesize size of a single line of plane data, including padding
53c20f17 1782 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
3c432e11 1783 */
414ac27d 1784static av_always_inline
53c20f17
VG
1785void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1786 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1787 int x_off, int y_off, int block_w, int block_h,
1788 int width, int height, ptrdiff_t linesize,
64233e70
JGG
1789 vp8_mc_func mc_func[3][3])
1790{
759001c5 1791 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
4773d904 1792
64233e70 1793 if (AV_RN32A(mv)) {
53c20f17
VG
1794 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1795 int my = mv->y & 7, my_idx = subpel_idx[0][my];
64233e70
JGG
1796
1797 x_off += mv->x >> 3;
1798 y_off += mv->y >> 3;
1799
1800 // edge emulation
1801 src1 += y_off * linesize + x_off;
1802 src2 += y_off * linesize + x_off;
4773d904 1803 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
64233e70
JGG
1804 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1805 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
458446ac
RB
1806 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1807 src1 - my_idx * linesize - mx_idx,
e46ad30a 1808 EDGE_EMU_LINESIZE, linesize,
8c53d39e
RB
1809 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1810 x_off - mx_idx, y_off - my_idx, width, height);
e46ad30a
AK
1811 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1812 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
64233e70 1813
458446ac
RB
1814 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1815 src2 - my_idx * linesize - mx_idx,
e46ad30a 1816 EDGE_EMU_LINESIZE, linesize,
8c53d39e
RB
1817 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1818 x_off - mx_idx, y_off - my_idx, width, height);
53c20f17 1819 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
e46ad30a 1820 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
64233e70
JGG
1821 } else {
1822 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1823 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1824 }
1825 } else {
4773d904 1826 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
64233e70
JGG
1827 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1828 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1829 }
1830}
1831
1832static av_always_inline
951455c1 1833void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
759001c5 1834 ThreadFrame *ref_frame, int x_off, int y_off,
53c20f17 1835 int bx_off, int by_off, int block_w, int block_h,
414ac27d 1836 int width, int height, VP56mv *mv)
7c4dcf81
RB
1837{
1838 VP56mv uvmv = *mv;
1839
1840 /* Y */
951455c1 1841 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
4773d904 1842 ref_frame, mv, x_off + bx_off, y_off + by_off,
64233e70
JGG
1843 block_w, block_h, width, height, s->linesize,
1844 s->put_pixels_tab[block_w == 8]);
7c4dcf81
RB
1845
1846 /* U/V */
1847 if (s->profile == 3) {
ac4b32df
PR
1848 /* this block only applies VP8; it is safe to check
1849 * only the profile, as VP7 profile <= 1 */
7c4dcf81
RB
1850 uvmv.x &= ~7;
1851 uvmv.y &= ~7;
1852 }
53c20f17
VG
1853 x_off >>= 1;
1854 y_off >>= 1;
1855 bx_off >>= 1;
1856 by_off >>= 1;
1857 width >>= 1;
1858 height >>= 1;
1859 block_w >>= 1;
1860 block_h >>= 1;
951455c1 1861 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
4773d904
RB
1862 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1863 &uvmv, x_off + bx_off, y_off + by_off,
64233e70
JGG
1864 block_w, block_h, width, height, s->uvlinesize,
1865 s->put_pixels_tab[1 + (block_w == 4)]);
7c4dcf81
RB
1866}
1867
d864dee8 1868/* Fetch pixels for estimated mv 4 macroblocks ahead.
53c20f17
VG
1869 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1870static av_always_inline
1871void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1872 int mb_xy, int ref)
d864dee8 1873{
ef38842f 1874 /* Don't prefetch refs that haven't been used very often this frame. */
53c20f17 1875 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
c4211046 1876 int x_off = mb_x << 4, y_off = mb_y << 4;
53c20f17
VG
1877 int mx = (mb->mv.x >> 2) + x_off + 8;
1878 int my = (mb->mv.y >> 2) + y_off;
1879 uint8_t **src = s->framep[ref]->tf.f->data;
1880 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
4773d904
RB
1881 /* For threading, a ff_thread_await_progress here might be useful, but
1882 * it actually slows down the decoder. Since a bad prefetch doesn't
1883 * generate bad decoder output, we don't run it here. */
53c20f17
VG
1884 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1885 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1886 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
c4211046 1887 }
d864dee8
JGG
1888}
1889
3b636f21
DC
1890/**
1891 * Apply motion vectors to prediction buffer, chapter 18.
1892 */
414ac27d 1893static av_always_inline
951455c1
DK
1894void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1895 VP8Macroblock *mb, int mb_x, int mb_y)
3b636f21
DC
1896{
1897 int x_off = mb_x << 4, y_off = mb_y << 4;
53c20f17 1898 int width = 16 * s->mb_width, height = 16 * s->mb_height;
759001c5 1899 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
d292c345 1900 VP56mv *bmv = mb->bmv;
3b636f21 1901
73be29b0
JGG
1902 switch (mb->partitioning) {
1903 case VP8_SPLITMVMODE_NONE:
951455c1 1904 vp8_mc_part(s, td, dst, ref, x_off, y_off,
7c4dcf81 1905 0, 0, 16, 16, width, height, &mb->mv);
73be29b0 1906 break;
7c4dcf81 1907 case VP8_SPLITMVMODE_4x4: {
3b636f21 1908 int x, y;
7c4dcf81 1909 VP56mv uvmv;
3b636f21
DC
1910
1911 /* Y */
1912 for (y = 0; y < 4; y++) {
1913 for (x = 0; x < 4; x++) {
53c20f17
VG
1914 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1915 ref, &bmv[4 * y + x],
1916 4 * x + x_off, 4 * y + y_off, 4, 4,
64233e70
JGG
1917 width, height, s->linesize,
1918 s->put_pixels_tab[2]);
3b636f21
DC
1919 }
1920 }
1921
1922 /* U/V */
53c20f17
VG
1923 x_off >>= 1;
1924 y_off >>= 1;
1925 width >>= 1;
1926 height >>= 1;
3b636f21
DC
1927 for (y = 0; y < 2; y++) {
1928 for (x = 0; x < 2; x++) {
53c20f17
VG
1929 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1930 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1931 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1932 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1933 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1934 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1935 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1936 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
85dc006b
VG
1937 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1938 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
3b636f21
DC
1939 if (s->profile == 3) {
1940 uvmv.x &= ~7;
1941 uvmv.y &= ~7;
1942 }
53c20f17
VG
1943 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1944 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1945 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
64233e70
JGG
1946 width, height, s->uvlinesize,
1947 s->put_pixels_tab[2]);
3b636f21
DC
1948 }
1949 }
7c4dcf81
RB
1950 break;
1951 }
1952 case VP8_SPLITMVMODE_16x8:
951455c1 1953 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1954 0, 0, 16, 8, width, height, &bmv[0]);
951455c1 1955 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1956 0, 8, 16, 8, width, height, &bmv[1]);
7c4dcf81
RB
1957 break;
1958 case VP8_SPLITMVMODE_8x16:
951455c1 1959 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1960 0, 0, 8, 16, width, height, &bmv[0]);
951455c1 1961 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1962 8, 0, 8, 16, width, height, &bmv[1]);
7c4dcf81
RB
1963 break;
1964 case VP8_SPLITMVMODE_8x8:
951455c1 1965 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1966 0, 0, 8, 8, width, height, &bmv[0]);
951455c1 1967 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1968 8, 0, 8, 8, width, height, &bmv[1]);
951455c1 1969 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1970 0, 8, 8, 8, width, height, &bmv[2]);
951455c1 1971 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1972 8, 8, 8, 8, width, height, &bmv[3]);
7c4dcf81 1973 break;
3b636f21
DC
1974 }
1975}
1976
53c20f17
VG
1977static av_always_inline
1978void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
3b636f21 1979{
3df56f41 1980 int x, y, ch;
3b636f21 1981
8a467b2d
JGG
1982 if (mb->mode != MODE_I4x4) {
1983 uint8_t *y_dst = dst[0];
3b636f21 1984 for (y = 0; y < 4; y++) {
951455c1 1985 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
3df56f41 1986 if (nnz4) {
53c20f17 1987 if (nnz4 & ~0x01010101) {
8a467b2d 1988 for (x = 0; x < 4; x++) {
53c20f17
VG
1989 if ((uint8_t) nnz4 == 1)
1990 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1991 td->block[y][x],
1992 s->linesize);
1993 else if ((uint8_t) nnz4 > 1)
1994 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1995 td->block[y][x],
1996 s->linesize);
62457f90
JGG
1997 nnz4 >>= 8;
1998 if (!nnz4)
1999 break;
8a467b2d
JGG
2000 }
2001 } else {
951455c1 2002 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
3b636f21
DC
2003 }
2004 }
53c20f17 2005 y_dst += 4 * s->linesize;
3b636f21 2006 }
8a467b2d 2007 }
3b636f21 2008
8a467b2d 2009 for (ch = 0; ch < 2; ch++) {
53c20f17 2010 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
3ae079a3 2011 if (nnz4) {
53c20f17
VG
2012 uint8_t *ch_dst = dst[1 + ch];
2013 if (nnz4 & ~0x01010101) {
3ae079a3
JGG
2014 for (y = 0; y < 2; y++) {
2015 for (x = 0; x < 2; x++) {
53c20f17
VG
2016 if ((uint8_t) nnz4 == 1)
2017 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2018 td->block[4 + ch][(y << 1) + x],
2019 s->uvlinesize);
2020 else if ((uint8_t) nnz4 > 1)
2021 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2022 td->block[4 + ch][(y << 1) + x],
2023 s->uvlinesize);
62457f90
JGG
2024 nnz4 >>= 8;
2025 if (!nnz4)
628b48db 2026 goto chroma_idct_end;
8a467b2d 2027 }
53c20f17 2028 ch_dst += 4 * s->uvlinesize;
8a467b2d 2029 }
3ae079a3 2030 } else {
53c20f17 2031 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
3b636f21
DC
2032 }
2033 }
53c20f17
VG
2034chroma_idct_end:
2035 ;
3b636f21
DC
2036 }
2037}
2038
53c20f17 2039static av_always_inline
ac4b32df
PR
2040void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2041 VP8FilterStrength *f, int is_vp7)
3b636f21
DC
2042{
2043 int interior_limit, filter_level;
2044
2045 if (s->segmentation.enabled) {
17343e39 2046 filter_level = s->segmentation.filter_level[mb->segment];
3b636f21
DC
2047 if (!s->segmentation.absolute_vals)
2048 filter_level += s->filter.level;
2049 } else
2050 filter_level = s->filter.level;
2051
2052 if (s->lf_delta.enabled) {
2053 filter_level += s->lf_delta.ref[mb->ref_frame];
dd18c9a0 2054 filter_level += s->lf_delta.mode[mb->mode];
3b636f21 2055 }
a1b227bb 2056
1550f45a 2057 filter_level = av_clip_uintp2(filter_level, 6);
3b636f21
DC
2058
2059 interior_limit = filter_level;
2060 if (s->filter.sharpness) {
8a2c99b4 2061 interior_limit >>= (s->filter.sharpness + 3) >> 2;
3b636f21
DC
2062 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2063 }
2064 interior_limit = FFMAX(interior_limit, 1);
2065
968570d6
JGG
2066 f->filter_level = filter_level;
2067 f->inner_limit = interior_limit;
ac4b32df 2068 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
53c20f17 2069 mb->mode == VP8_MVMODE_SPLIT;
3b636f21
DC
2070}
2071
53c20f17
VG
2072static av_always_inline
2073void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
ac4b32df 2074 int mb_x, int mb_y, int is_vp7)
3b636f21 2075{
ac4b32df 2076 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
968570d6
JGG
2077 int filter_level = f->filter_level;
2078 int inner_limit = f->inner_limit;
c55e0d34 2079 int inner_filter = f->inner_filter;
87c6c786
DB
2080 ptrdiff_t linesize = s->linesize;
2081 ptrdiff_t uvlinesize = s->uvlinesize;
79dec154
JGG
2082 static const uint8_t hev_thresh_lut[2][64] = {
2083 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2084 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2085 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2086 3, 3, 3, 3 },
2087 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2088 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2089 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2090 2, 2, 2, 2 }
2091 };
3b636f21 2092
3b636f21
DC
2093 if (!filter_level)
2094 return;
2095
ac4b32df
PR
2096 if (is_vp7) {
2097 bedge_lim_y = filter_level;
2098 bedge_lim_uv = filter_level * 2;
2099 mbedge_lim = filter_level + 2;
2100 } else {
2101 bedge_lim_y =
2102 bedge_lim_uv = filter_level * 2 + inner_limit;
2103 mbedge_lim = bedge_lim_y + 4;
2104 }
968570d6 2105
79dec154 2106 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
5245c04d 2107
3b636f21 2108 if (mb_x) {
53c20f17 2109 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
3facfc99 2110 mbedge_lim, inner_limit, hev_thresh);
53c20f17 2111 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 2112 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
2113 }
2114
ac4b32df
PR
2115#define H_LOOP_FILTER_16Y_INNER(cond) \
2116 if (cond && inner_filter) { \
2117 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2118 bedge_lim_y, inner_limit, \
2119 hev_thresh); \
2120 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2121 bedge_lim_y, inner_limit, \
2122 hev_thresh); \
2123 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2124 bedge_lim_y, inner_limit, \
2125 hev_thresh); \
2126 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2127 uvlinesize, bedge_lim_uv, \
2128 inner_limit, hev_thresh); \
3b636f21
DC
2129 }
2130
ac4b32df
PR
2131 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2132
3b636f21 2133 if (mb_y) {
53c20f17 2134 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
3facfc99 2135 mbedge_lim, inner_limit, hev_thresh);
53c20f17 2136 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 2137 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
2138 }
2139
c55e0d34 2140 if (inner_filter) {
53c20f17 2141 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
ac4b32df 2142 linesize, bedge_lim_y,
145d3186 2143 inner_limit, hev_thresh);
53c20f17 2144 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
ac4b32df 2145 linesize, bedge_lim_y,
145d3186 2146 inner_limit, hev_thresh);
53c20f17 2147 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
ac4b32df 2148 linesize, bedge_lim_y,
145d3186 2149 inner_limit, hev_thresh);
53c20f17
VG
2150 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2151 dst[2] + 4 * uvlinesize,
ac4b32df 2152 uvlinesize, bedge_lim_uv,
3facfc99 2153 inner_limit, hev_thresh);
3b636f21 2154 }
ac4b32df
PR
2155
2156 H_LOOP_FILTER_16Y_INNER(is_vp7)
3b636f21
DC
2157}
2158
53c20f17
VG
2159static av_always_inline
2160void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2161 int mb_x, int mb_y)
3b636f21 2162{
968570d6
JGG
2163 int mbedge_lim, bedge_lim;
2164 int filter_level = f->filter_level;
53c20f17 2165 int inner_limit = f->inner_limit;
c55e0d34 2166 int inner_filter = f->inner_filter;
87c6c786 2167 ptrdiff_t linesize = s->linesize;
3b636f21 2168
3b636f21
DC
2169 if (!filter_level)
2170 return;
2171
53c20f17 2172 bedge_lim = 2 * filter_level + inner_limit;
79dec154 2173 mbedge_lim = bedge_lim + 4;
3b636f21
DC
2174
2175 if (mb_x)
145d3186 2176 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 2177 if (inner_filter) {
53c20f17
VG
2178 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2179 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2180 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
3b636f21
DC
2181 }
2182
2183 if (mb_y)
145d3186 2184 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 2185 if (inner_filter) {
53c20f17
VG
2186 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2187 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2188 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
3b636f21
DC
2189 }
2190}
2191
337ade52 2192#define MARGIN (16 << 2)
ac4b32df
PR
2193static av_always_inline
2194void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2195 VP8Frame *prev_frame, int is_vp7)
337ade52
DK
2196{
2197 VP8Context *s = avctx->priv_data;
951455c1
DK
2198 int mb_x, mb_y;
2199
2200 s->mv_min.y = -MARGIN;
2201 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2202 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
53c20f17
VG
2203 VP8Macroblock *mb = s->macroblocks_base +
2204 ((s->mb_width + 1) * (mb_y + 1) + 1);
2205 int mb_xy = mb_y * s->mb_width;
951455c1 2206
53c20f17 2207 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
951455c1
DK
2208
2209 s->mv_min.x = -MARGIN;
2210 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2211 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2212 if (mb_y == 0)
53c20f17
VG
2213 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2214 DC_PRED * 0x01010101);
759001c5
AK
2215 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2216 prev_frame && prev_frame->seg_map ?
ac4b32df 2217 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
951455c1
DK
2218 s->mv_min.x -= 64;
2219 s->mv_max.x -= 64;
2220 }
2221 s->mv_min.y -= 64;
2222 s->mv_max.y -= 64;
2223 }
2224}
2225
ac4b32df
PR
2226static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2227 VP8Frame *prev_frame)
2228{
2229 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2230}
2231
2232static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2233 VP8Frame *prev_frame)
2234{
2235 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2236}
2237
25f056e6 2238#if HAVE_THREADS
53c20f17
VG
2239#define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2240 do { \
2241 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2242 if (otd->thread_mb_pos < tmp) { \
2243 pthread_mutex_lock(&otd->lock); \
2244 td->wait_mb_pos = tmp; \
2245 do { \
2246 if (otd->thread_mb_pos >= tmp) \
2247 break; \
2248 pthread_cond_wait(&otd->cond, &otd->lock); \
2249 } while (1); \
2250 td->wait_mb_pos = INT_MAX; \
2251 pthread_mutex_unlock(&otd->lock); \
2252 } \
2253 } while (0);
2254
2255#define update_pos(td, mb_y, mb_x) \
2256 do { \
2257 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2258 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2259 (num_jobs > 1); \
f929ab05 2260 int is_null = !next_td || !prev_td; \
53c20f17
VG
2261 int pos_check = (is_null) ? 1 \
2262 : (next_td != td && \
2263 pos >= next_td->wait_mb_pos) || \
2264 (prev_td != td && \
2265 pos >= prev_td->wait_mb_pos); \
2266 td->thread_mb_pos = pos; \
2267 if (sliced_threading && pos_check) { \
2268 pthread_mutex_lock(&td->lock); \
2269 pthread_cond_broadcast(&td->cond); \
2270 pthread_mutex_unlock(&td->lock); \
2271 } \
2272 } while (0);
25f056e6
MS
2273#else
2274#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2275#define update_pos(td, mb_y, mb_x)
2276#endif
951455c1
DK
2277
2278static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
ac4b32df 2279 int jobnr, int threadnr, int is_vp7)
951455c1
DK
2280{
2281 VP8Context *s = avctx->priv_data;
2282 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
53c20f17
VG
2283 int mb_y = td->thread_mb_pos >> 16;
2284 int mb_x, mb_xy = mb_y * s->mb_width;
951455c1 2285 int num_jobs = s->num_jobs;
759001c5 2286 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
53c20f17 2287 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
951455c1 2288 VP8Macroblock *mb;
337ade52 2289 uint8_t *dst[3] = {
53c20f17
VG
2290 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2291 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2292 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
337ade52 2293 };
53c20f17
VG
2294 if (mb_y == 0)
2295 prev_td = td;
2296 else
2297 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2298 if (mb_y == s->mb_height - 1)
2299 next_td = td;
2300 else
2301 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
951455c1 2302 if (s->mb_layout == 1)
53c20f17 2303 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
951455c1 2304 else {
46d208e1
RB
2305 // Make sure the previous frame has read its segmentation map,
2306 // if we re-use the same map.
2307 if (prev_frame && s->segmentation.enabled &&
2308 !s->segmentation.update_map)
2309 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
53c20f17 2310 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
951455c1 2311 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
53c20f17 2312 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
951455c1
DK
2313 }
2314
ac4b32df
PR
2315 if (!is_vp7 || mb_y == 0)
2316 memset(td->left_nnz, 0, sizeof(td->left_nnz));
337ade52
DK
2317
2318 s->mv_min.x = -MARGIN;
53c20f17 2319 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
337ade52
DK
2320
2321 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
951455c1
DK
2322 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2323 if (prev_td != td) {
2324 if (threadnr != 0) {
ac4b32df
PR
2325 check_thread_pos(td, prev_td,
2326 mb_x + (is_vp7 ? 2 : 1),
2327 mb_y - (is_vp7 ? 2 : 1));
951455c1 2328 } else {
53c20f17 2329 check_thread_pos(td, prev_td,
ac4b32df
PR
2330 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2331 mb_y - (is_vp7 ? 2 : 1));
951455c1
DK
2332 }
2333 }
2334
53c20f17
VG
2335 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2336 s->linesize, 4);
2337 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2338 dst[2] - dst[1], 2);
337ade52 2339
951455c1 2340 if (!s->mb_layout)
759001c5
AK
2341 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2342 prev_frame && prev_frame->seg_map ?
ac4b32df 2343 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
337ade52
DK
2344
2345 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2346
2347 if (!mb->skip)
ac4b32df 2348 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
337ade52
DK
2349
2350 if (mb->mode <= MODE_I4x4)
ac4b32df 2351 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
337ade52 2352 else
951455c1 2353 inter_predict(s, td, dst, mb, mb_x, mb_y);
337ade52
DK
2354
2355 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2356
2357 if (!mb->skip) {
951455c1 2358 idct_mb(s, td, dst, mb);
337ade52 2359 } else {
951455c1 2360 AV_ZERO64(td->left_nnz);
337ade52
DK
2361 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2362
53c20f17
VG
2363 /* Reset DC block predictors if they would exist
2364 * if the mb had coefficients */
337ade52 2365 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
951455c1 2366 td->left_nnz[8] = 0;
337ade52
DK
2367 s->top_nnz[mb_x][8] = 0;
2368 }
2369 }
2370
2371 if (s->deblock_filter)
ac4b32df 2372 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
951455c1 2373
53c20f17 2374 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
951455c1 2375 if (s->filter.simple)
53c20f17
VG
2376 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2377 NULL, NULL, s->linesize, 0, 1);
951455c1 2378 else
53c20f17
VG
2379 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2380 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
951455c1 2381 }
337ade52
DK
2382
2383 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2384
53c20f17
VG
2385 dst[0] += 16;
2386 dst[1] += 8;
2387 dst[2] += 8;
337ade52
DK
2388 s->mv_min.x -= 64;
2389 s->mv_max.x -= 64;
951455c1 2390
53c20f17
VG
2391 if (mb_x == s->mb_width + 1) {
2392 update_pos(td, mb_y, s->mb_width + 3);
951455c1
DK
2393 } else {
2394 update_pos(td, mb_y, mb_x);
2395 }
337ade52 2396 }
951455c1
DK
2397}
2398
2399static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
ac4b32df 2400 int jobnr, int threadnr, int is_vp7)
951455c1
DK
2401{
2402 VP8Context *s = avctx->priv_data;
2403 VP8ThreadData *td = &s->thread_data[threadnr];
53c20f17 2404 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
759001c5 2405 AVFrame *curframe = s->curframe->tf.f;
951455c1
DK
2406 VP8Macroblock *mb;
2407 VP8ThreadData *prev_td, *next_td;
2408 uint8_t *dst[3] = {
53c20f17
VG
2409 curframe->data[0] + 16 * mb_y * s->linesize,
2410 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2411 curframe->data[2] + 8 * mb_y * s->uvlinesize
951455c1
DK
2412 };
2413
2414 if (s->mb_layout == 1)
53c20f17 2415 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
951455c1 2416 else
53c20f17 2417 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
951455c1 2418
53c20f17
VG
2419 if (mb_y == 0)
2420 prev_td = td;
2421 else
2422 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2423 if (mb_y == s->mb_height - 1)
2424 next_td = td;
2425 else
2426 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
951455c1
DK
2427
2428 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2429 VP8FilterStrength *f = &td->filter_strength[mb_x];
53c20f17
VG
2430 if (prev_td != td)
2431 check_thread_pos(td, prev_td,
2432 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
951455c1 2433 if (next_td != td)
53c20f17
VG
2434 if (next_td != &s->thread_data[0])
2435 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
951455c1
DK
2436
2437 if (num_jobs == 1) {
2438 if (s->filter.simple)
53c20f17
VG
2439 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2440 NULL, NULL, s->linesize, 0, 1);
951455c1 2441 else
53c20f17
VG
2442 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2443 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
951455c1
DK
2444 }
2445
337ade52 2446 if (s->filter.simple)
951455c1 2447 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
337ade52 2448 else
ac4b32df 2449 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
951455c1
DK
2450 dst[0] += 16;
2451 dst[1] += 8;
2452 dst[2] += 8;
2453
53c20f17 2454 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
951455c1
DK
2455 }
2456}
2457
ac4b32df
PR
2458static av_always_inline
2459int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2460 int threadnr, int is_vp7)
951455c1
DK
2461{
2462 VP8Context *s = avctx->priv_data;
2463 VP8ThreadData *td = &s->thread_data[jobnr];
2464 VP8ThreadData *next_td = NULL, *prev_td = NULL;
759001c5 2465 VP8Frame *curframe = s->curframe;
951455c1 2466 int mb_y, num_jobs = s->num_jobs;
53c20f17 2467
951455c1
DK
2468 td->thread_nr = threadnr;
2469 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
53c20f17
VG
2470 if (mb_y >= s->mb_height)
2471 break;
2472 td->thread_mb_pos = mb_y << 16;
ac4b32df 2473 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, is_vp7);
951455c1 2474 if (s->deblock_filter)
ac4b32df 2475 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr, is_vp7);
951455c1
DK
2476 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2477
2478 s->mv_min.y -= 64;
2479 s->mv_max.y -= 64;
2480
2481 if (avctx->active_thread_type == FF_THREAD_FRAME)
759001c5 2482 ff_thread_report_progress(&curframe->tf, mb_y, 0);
337ade52 2483 }
951455c1
DK
2484
2485 return 0;
337ade52
DK
2486}
2487
ac4b32df
PR
2488static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2489 int jobnr, int threadnr)
2490{
2491 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2492}
2493
2494static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2495 int jobnr, int threadnr)
2496{
2497 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2498}
2499
ac4b32df
PR
2500static av_always_inline
2501int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2502 AVPacket *avpkt, int is_vp7)
3b636f21
DC
2503{
2504 VP8Context *s = avctx->priv_data;
951455c1 2505 int ret, i, referenced, num_jobs;
3b636f21 2506 enum AVDiscard skip_thresh;
759001c5 2507 VP8Frame *av_uninit(curframe), *prev_frame;
ce42a048 2508
ac4b32df
PR
2509 if (is_vp7)
2510 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2511 else
2512 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2513
2514 if (ret < 0)
fb90785e 2515 goto err;
3b636f21 2516
4e528206
MT
2517 if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2518 enum AVPixelFormat pix_fmts[] = {
a9fb1347
MT
2519#if CONFIG_VP8_VAAPI_HWACCEL
2520 AV_PIX_FMT_VAAPI,
2521#endif
4e528206
MT
2522 AV_PIX_FMT_YUV420P,
2523 AV_PIX_FMT_NONE,
2524 };
2525
2526 s->pix_fmt = ff_get_format(s->avctx, pix_fmts);
2527 if (s->pix_fmt < 0) {
2528 ret = AVERROR(EINVAL);
2529 goto err;
2530 }
2531 avctx->pix_fmt = s->pix_fmt;
2532 }
2533
e02dec25
AC
2534 prev_frame = s->framep[VP56_FRAME_CURRENT];
2535
53c20f17
VG
2536 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2537 s->update_altref == VP56_FRAME_CURRENT;
3b636f21 2538
53c20f17
VG
2539 skip_thresh = !referenced ? AVDISCARD_NONREF
2540 : !s->keyframe ? AVDISCARD_NONKEY
2541 : AVDISCARD_ALL;
3b636f21
DC
2542
2543 if (avctx->skip_frame >= skip_thresh) {
2544 s->invisible = 1;
fb90785e 2545 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
3b636f21
DC
2546 goto skip_decode;
2547 }
9ac831c2 2548 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
3b636f21 2549
4773d904
RB
2550 // release no longer referenced frames
2551 for (i = 0; i < 5; i++)
759001c5 2552 if (s->frames[i].tf.f->data[0] &&
4773d904
RB
2553 &s->frames[i] != prev_frame &&
2554 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
53c20f17 2555 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
4773d904 2556 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
759001c5 2557 vp8_release_frame(s, &s->frames[i]);
4773d904 2558
ac4b32df 2559 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
3b636f21 2560
65875a8b
VG
2561 if (!s->colorspace)
2562 avctx->colorspace = AVCOL_SPC_BT470BG;
2563 if (s->fullrange)
2564 avctx->color_range = AVCOL_RANGE_JPEG;
2565 else
2566 avctx->color_range = AVCOL_RANGE_MPEG;
2567
53c20f17
VG
2568 /* Given that arithmetic probabilities are updated every frame, it's quite
2569 * likely that the values we have on a random interframe are complete
2570 * junk if we didn't start decode on a keyframe. So just don't display
2571 * anything rather than junk. */
fb90785e 2572 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
53c20f17 2573 !s->framep[VP56_FRAME_GOLDEN] ||
fb90785e 2574 !s->framep[VP56_FRAME_GOLDEN2])) {
53c20f17
VG
2575 av_log(avctx, AV_LOG_WARNING,
2576 "Discarding interframe without a prior keyframe!\n");
fb90785e
RB
2577 ret = AVERROR_INVALIDDATA;
2578 goto err;
2579 }
2580
759001c5 2581 curframe->tf.f->key_frame = s->keyframe;
53c20f17
VG
2582 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2583 : AV_PICTURE_TYPE_P;
759001c5 2584 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
3b636f21 2585 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
fb90785e 2586 goto err;
3b636f21
DC
2587 }
2588
4773d904 2589 // check if golden and altref are swapped
53c20f17
VG
2590 if (s->update_altref != VP56_FRAME_NONE)
2591 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2592 else
2593 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2594
2595 if (s->update_golden != VP56_FRAME_NONE)
2596 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2597 else
2598 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2599
2600 if (s->update_last)
4773d904 2601 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
53c20f17 2602 else
4773d904 2603 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
53c20f17
VG
2604
2605 s->next_framep[VP56_FRAME_CURRENT] = curframe;
4773d904
RB
2606
2607 ff_thread_finish_setup(avctx);
2608
4e528206
MT
2609 if (avctx->hwaccel) {
2610 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2611 if (ret < 0)
2612 goto err;
3b636f21 2613
4e528206
MT
2614 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2615 if (ret < 0)
2616 goto err;
c55e0d34 2617
4e528206
MT
2618 ret = avctx->hwaccel->end_frame(avctx);
2619 if (ret < 0)
2620 goto err;
3b636f21 2621
4e528206
MT
2622 } else {
2623 s->linesize = curframe->tf.f->linesize[0];
2624 s->uvlinesize = curframe->tf.f->linesize[1];
2625
2626 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2627 /* Zero macroblock structures for top/top-left prediction
2628 * from outside the frame. */
2629 if (!s->mb_layout)
2630 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2631 (s->mb_width + 1) * sizeof(*s->macroblocks));
2632 if (!s->mb_layout && s->keyframe)
2633 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2634
2635 memset(s->ref_count, 0, sizeof(s->ref_count));
2636
2637 if (s->mb_layout == 1) {
2638 // Make sure the previous frame has read its segmentation map,
2639 // if we re-use the same map.
2640 if (prev_frame && s->segmentation.enabled &&
2641 !s->segmentation.update_map)
2642 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2643 if (is_vp7)
2644 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2645 else
2646 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2647 }
2648
2649 if (avctx->active_thread_type == FF_THREAD_FRAME)
2650 num_jobs = 1;
ac4b32df 2651 else
4e528206
MT
2652 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2653 s->num_jobs = num_jobs;
2654 s->curframe = curframe;
2655 s->prev_frame = prev_frame;
2656 s->mv_min.y = -MARGIN;
2657 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2658 for (i = 0; i < MAX_THREADS; i++) {
2659 s->thread_data[i].thread_mb_pos = 0;
2660 s->thread_data[i].wait_mb_pos = INT_MAX;
2661 }
4773d904 2662
4e528206
MT
2663 if (is_vp7)
2664 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2665 num_jobs);
2666 else
2667 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2668 num_jobs);
951455c1 2669 }
3b636f21 2670
759001c5 2671 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
fb90785e
RB
2672 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2673
3b636f21
DC
2674skip_decode:
2675 // if future frames don't use the updated probabilities,
2676 // reset them to the values we saved
2677 if (!s->update_probabilities)
2678 s->prob[0] = s->prob[1];
2679
3b636f21 2680 if (!s->invisible) {
759001c5
AK
2681 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2682 return ret;
53c20f17 2683 *got_frame = 1;
3b636f21
DC
2684 }
2685
2686 return avpkt->size;
fb90785e
RB
2687err:
2688 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2689 return ret;
3b636f21
DC
2690}
2691
ac4b32df
PR
2692int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2693 AVPacket *avpkt)
2694{
2695 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2696}
2697
2698#if CONFIG_VP7_DECODER
2699static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2700 AVPacket *avpkt)
2701{
2702 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2703}
2704#endif /* CONFIG_VP7_DECODER */
2705
c4bfa098 2706av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
759001c5
AK
2707{
2708 VP8Context *s = avctx->priv_data;
2709 int i;
2710
2711 vp8_decode_flush_impl(avctx, 1);
2712 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2713 av_frame_free(&s->frames[i].tf.f);
2714
2715 return 0;
2716}
2717
2718static av_cold int vp8_init_frames(VP8Context *s)
2719{
2720 int i;
2721 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2722 s->frames[i].tf.f = av_frame_alloc();
2723 if (!s->frames[i].tf.f)
2724 return AVERROR(ENOMEM);
2725 }
2726 return 0;
2727}
2728
ac4b32df
PR
2729static av_always_inline
2730int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
3b636f21
DC
2731{
2732 VP8Context *s = avctx->priv_data;
759001c5 2733 int ret;
3b636f21
DC
2734
2735 s->avctx = avctx;
4e528206 2736 s->pix_fmt = AV_PIX_FMT_NONE;
716d413c 2737 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
759001c5 2738 avctx->internal->allocate_progress = 1;
3b636f21 2739
8c53d39e 2740 ff_videodsp_init(&s->vdsp, 8);
ac4b32df
PR
2741
2742 ff_vp78dsp_init(&s->vp8dsp);
2743 if (CONFIG_VP7_DECODER && is_vp7) {
2744 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2745 ff_vp7dsp_init(&s->vp8dsp);
2746 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2747 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2748 ff_vp8dsp_init(&s->vp8dsp);
2749 }
2750
2751 /* does not change for VP8 */
f4d581cd 2752 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
3b636f21 2753
759001c5 2754 if ((ret = vp8_init_frames(s)) < 0) {
c4bfa098 2755 ff_vp8_decode_free(avctx);
759001c5
AK
2756 return ret;
2757 }
3b636f21 2758
3b636f21
DC
2759 return 0;
2760}
2761
ac4b32df
PR
2762#if CONFIG_VP7_DECODER
2763static int vp7_decode_init(AVCodecContext *avctx)
2764{
2765 return vp78_decode_init(avctx, IS_VP7);
2766}
2767#endif /* CONFIG_VP7_DECODER */
2768
2769av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2770{
2771 return vp78_decode_init(avctx, IS_VP8);
2772}
2773
2774#if CONFIG_VP8_DECODER
4773d904
RB
2775static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2776{
2777 VP8Context *s = avctx->priv_data;
759001c5 2778 int ret;
4773d904
RB
2779
2780 s->avctx = avctx;
2781
759001c5 2782 if ((ret = vp8_init_frames(s)) < 0) {
c4bfa098 2783 ff_vp8_decode_free(avctx);
759001c5
AK
2784 return ret;
2785 }
2786
4773d904
RB
2787 return 0;
2788}
2789
53c20f17 2790#define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
4773d904 2791
53c20f17
VG
2792static int vp8_decode_update_thread_context(AVCodecContext *dst,
2793 const AVCodecContext *src)
4773d904
RB
2794{
2795 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
759001c5 2796 int i;
4773d904 2797
56535793
RB
2798 if (s->macroblocks_base &&
2799 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2800 free_buffers(s);
82a0497c
RB
2801 s->mb_width = s_src->mb_width;
2802 s->mb_height = s_src->mb_height;
56535793
RB
2803 }
2804
53c20f17 2805 s->prob[0] = s_src->prob[!s_src->update_probabilities];
4773d904 2806 s->segmentation = s_src->segmentation;
53c20f17 2807 s->lf_delta = s_src->lf_delta;
4773d904
RB
2808 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2809
759001c5
AK
2810 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2811 if (s_src->frames[i].tf.f->data[0]) {
2812 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2813 if (ret < 0)
2814 return ret;
2815 }
2816 }
2817
4773d904
RB
2818 s->framep[0] = REBASE(s_src->next_framep[0]);
2819 s->framep[1] = REBASE(s_src->next_framep[1]);
2820 s->framep[2] = REBASE(s_src->next_framep[2]);
2821 s->framep[3] = REBASE(s_src->next_framep[3]);
2822
2823 return 0;
2824}
ac4b32df
PR
2825#endif /* CONFIG_VP8_DECODER */
2826
2827#if CONFIG_VP7_DECODER
2828AVCodec ff_vp7_decoder = {
2829 .name = "vp7",
2830 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2831 .type = AVMEDIA_TYPE_VIDEO,
2832 .id = AV_CODEC_ID_VP7,
2833 .priv_data_size = sizeof(VP8Context),
2834 .init = vp7_decode_init,
2835 .close = ff_vp8_decode_free,
2836 .decode = vp7_decode_frame,
def97856 2837 .capabilities = AV_CODEC_CAP_DR1,
ac4b32df
PR
2838 .flush = vp8_decode_flush,
2839};
2840#endif /* CONFIG_VP7_DECODER */
4773d904 2841
ac4b32df 2842#if CONFIG_VP8_DECODER
d36beb3f 2843AVCodec ff_vp8_decoder = {
00c3b67b 2844 .name = "vp8",
b2bed932 2845 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
00c3b67b 2846 .type = AVMEDIA_TYPE_VIDEO,
36ef5369 2847 .id = AV_CODEC_ID_VP8,
00c3b67b 2848 .priv_data_size = sizeof(VP8Context),
c4bfa098
JR
2849 .init = ff_vp8_decode_init,
2850 .close = ff_vp8_decode_free,
2851 .decode = ff_vp8_decode_frame,
def97856
VG
2852 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2853 AV_CODEC_CAP_SLICE_THREADS,
00c3b67b 2854 .flush = vp8_decode_flush,
4773d904
RB
2855 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2856 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
3b636f21 2857};
ac4b32df 2858#endif /* CONFIG_VP7_DECODER */