vp9: split superframes in the filtering stage before actual decoding
[libav.git] / libavcodec / vp9.c
CommitLineData
72ca830f
RB
1/*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of Libav.
8 *
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#include "libavutil/avassert.h"
25
26#include "avcodec.h"
27#include "get_bits.h"
28#include "internal.h"
29#include "videodsp.h"
30#include "vp56.h"
31#include "vp9.h"
32#include "vp9data.h"
33
34#define VP9_SYNCCODE 0x498342
35#define MAX_PROB 255
36
bc6e0b64
RB
37static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
38{
39 ff_thread_release_buffer(avctx, &f->tf);
40 av_buffer_unref(&f->segmentation_map_buf);
41 av_buffer_unref(&f->mv_buf);
42 f->segmentation_map = NULL;
43 f->mv = NULL;
44}
45
46static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
47{
48 VP9Context *s = avctx->priv_data;
49 int ret, sz;
50
51 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
52 if (ret < 0)
53 return ret;
54
55 sz = 64 * s->sb_cols * s->sb_rows;
56 f->segmentation_map_buf = av_buffer_allocz(sz * sizeof(*f->segmentation_map));
57 f->mv_buf = av_buffer_allocz(sz * sizeof(*f->mv));
58 if (!f->segmentation_map_buf || !f->mv_buf) {
59 vp9_frame_unref(avctx, f);
60 return AVERROR(ENOMEM);
61 }
62
63 f->segmentation_map = f->segmentation_map_buf->data;
64 f->mv = (VP9MVRefPair*)f->mv_buf->data;
65
66 if (s->segmentation.enabled && !s->segmentation.update_map &&
a451324d 67 !s->keyframe && !s->intraonly && !s->errorres)
bc6e0b64
RB
68 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
69
70 return 0;
71}
72
73static int vp9_frame_ref(VP9Frame *dst, VP9Frame *src)
74{
75 int ret;
76
77 dst->segmentation_map_buf = av_buffer_ref(src->segmentation_map_buf);
78 dst->mv_buf = av_buffer_ref(src->mv_buf);
79 if (!dst->segmentation_map_buf || !dst->mv_buf) {
80 ret = AVERROR(ENOMEM);
81 goto fail;
82 }
83
84 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
85 if (ret < 0)
86 goto fail;
87
88 dst->segmentation_map = src->segmentation_map;
89 dst->mv = src->mv;
90
91 return 0;
92fail:
93 av_buffer_unref(&dst->segmentation_map_buf);
94 av_buffer_unref(&dst->mv_buf);
95 return ret;
96}
97
72ca830f
RB
98static void vp9_decode_flush(AVCodecContext *avctx)
99{
100 VP9Context *s = avctx->priv_data;
101 int i;
102
bc6e0b64
RB
103 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
104 vp9_frame_unref(avctx, &s->frames[i]);
105
72ca830f 106 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
bc6e0b64 107 ff_thread_release_buffer(avctx, &s->refs[i]);
1730a67a
RB
108
109 s->use_last_frame_mvs = 0;
110
111 s->alloc_width = 0;
112 s->alloc_height = 0;
72ca830f
RB
113}
114
115static int update_size(AVCodecContext *avctx, int w, int h)
116{
117 VP9Context *s = avctx->priv_data;
118 uint8_t *p;
1730a67a 119 int nb_blocks, nb_superblocks;
72ca830f 120
1730a67a 121 if (s->above_partition_ctx && w == s->alloc_width && h == s->alloc_height)
72ca830f
RB
122 return 0;
123
124 vp9_decode_flush(avctx);
125
126 if (w <= 0 || h <= 0)
127 return AVERROR_INVALIDDATA;
128
129 avctx->width = w;
130 avctx->height = h;
131 s->sb_cols = (w + 63) >> 6;
132 s->sb_rows = (h + 63) >> 6;
133 s->cols = (w + 7) >> 3;
134 s->rows = (h + 7) >> 3;
135
136#define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
137 av_free(s->above_partition_ctx);
138 p = av_malloc(s->sb_cols *
bc6e0b64 139 (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
72ca830f
RB
140 if (!p)
141 return AVERROR(ENOMEM);
142 assign(s->above_partition_ctx, uint8_t *, 8);
143 assign(s->above_skip_ctx, uint8_t *, 8);
144 assign(s->above_txfm_ctx, uint8_t *, 8);
145 assign(s->above_mode_ctx, uint8_t *, 16);
146 assign(s->above_y_nnz_ctx, uint8_t *, 16);
147 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
148 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
149 assign(s->intra_pred_data[0], uint8_t *, 64);
150 assign(s->intra_pred_data[1], uint8_t *, 32);
151 assign(s->intra_pred_data[2], uint8_t *, 32);
152 assign(s->above_segpred_ctx, uint8_t *, 8);
153 assign(s->above_intra_ctx, uint8_t *, 8);
154 assign(s->above_comp_ctx, uint8_t *, 8);
155 assign(s->above_ref_ctx, uint8_t *, 8);
156 assign(s->above_filter_ctx, uint8_t *, 8);
157 assign(s->lflvl, VP9Filter *, 1);
158 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
72ca830f
RB
159#undef assign
160
5b995452
RB
161 av_freep(&s->b_base);
162 av_freep(&s->block_base);
1730a67a
RB
163
164 if (avctx->active_thread_type & FF_THREAD_FRAME) {
165 nb_blocks = s->cols * s->rows;
166 nb_superblocks = s->sb_cols * s->sb_rows;
167 } else {
168 nb_blocks = nb_superblocks = 1;
169 }
170
171 s->b_base = av_malloc_array(nb_blocks, sizeof(*s->b_base));
172 s->block_base = av_mallocz_array(nb_superblocks, (64 * 64 + 128) * 3);
5b995452
RB
173 if (!s->b_base || !s->block_base)
174 return AVERROR(ENOMEM);
1730a67a
RB
175 s->uvblock_base[0] = s->block_base + nb_superblocks * 64 * 64;
176 s->uvblock_base[1] = s->uvblock_base[0] + nb_superblocks * 32 * 32;
177 s->eob_base = (uint8_t *)(s->uvblock_base[1] + nb_superblocks * 32 * 32);
178 s->uveob_base[0] = s->eob_base + nb_superblocks * 256;
179 s->uveob_base[1] = s->uveob_base[0] + nb_superblocks * 64;
5b995452 180
1730a67a
RB
181 s->alloc_width = w;
182 s->alloc_height = h;
5b995452 183
72ca830f
RB
184 return 0;
185}
186
187// The sign bit is at the end, not the start, of a bit sequence
188static av_always_inline int get_bits_with_sign(GetBitContext *gb, int n)
189{
190 int v = get_bits(gb, n);
191 return get_bits1(gb) ? -v : v;
192}
193
194static av_always_inline int inv_recenter_nonneg(int v, int m)
195{
196 if (v > 2 * m)
197 return v;
198 if (v & 1)
199 return m - ((v + 1) >> 1);
200 return m + (v >> 1);
201}
202
203// differential forward probability updates
204static int update_prob(VP56RangeCoder *c, int p)
205{
206 static const int inv_map_table[MAX_PROB - 1] = {
207 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
208 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
209 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
210 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
211 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
212 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
213 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
214 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
215 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
216 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
217 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
218 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
219 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
220 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
221 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
222 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
223 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
224 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
225 252, 253,
226 };
227 int d;
228
229 /* This code is trying to do a differential probability update. For a
230 * current probability A in the range [1, 255], the difference to a new
231 * probability of any value can be expressed differentially as 1-A, 255-A
232 * where some part of this (absolute range) exists both in positive as
233 * well as the negative part, whereas another part only exists in one
234 * half. We're trying to code this shared part differentially, i.e.
235 * times two where the value of the lowest bit specifies the sign, and
236 * the single part is then coded on top of this. This absolute difference
237 * then again has a value of [0, 254], but a bigger value in this range
238 * indicates that we're further away from the original value A, so we
239 * can code this as a VLC code, since higher values are increasingly
240 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
241 * updates vs. the 'fine, exact' updates further down the range, which
242 * adds one extra dimension to this differential update model. */
243
244 if (!vp8_rac_get(c)) {
245 d = vp8_rac_get_uint(c, 4) + 0;
246 } else if (!vp8_rac_get(c)) {
247 d = vp8_rac_get_uint(c, 4) + 16;
248 } else if (!vp8_rac_get(c)) {
249 d = vp8_rac_get_uint(c, 5) + 32;
250 } else {
251 d = vp8_rac_get_uint(c, 7);
252 if (d >= 65) {
253 d = (d << 1) - 65 + vp8_rac_get(c);
254 d = av_clip(d, 0, MAX_PROB - 65 - 1);
255 }
256 d += 64;
257 }
258
259 return p <= 128
260 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1)
261 : 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
262}
263
264static int decode_frame_header(AVCodecContext *avctx,
265 const uint8_t *data, int size, int *ref)
266{
267 VP9Context *s = avctx->priv_data;
268 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
269 int last_invisible;
270 const uint8_t *data2;
271
272 /* general header */
273 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
274 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
275 return ret;
276 }
277 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
278 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
279 return AVERROR_INVALIDDATA;
280 }
281 s->profile = get_bits1(&s->gb);
282 if (get_bits1(&s->gb)) { // reserved bit
283 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
284 return AVERROR_INVALIDDATA;
285 }
286 if (get_bits1(&s->gb)) {
287 *ref = get_bits(&s->gb, 3);
288 return 0;
289 }
290
291 s->last_keyframe = s->keyframe;
292 s->keyframe = !get_bits1(&s->gb);
293
294 last_invisible = s->invisible;
295 s->invisible = !get_bits1(&s->gb);
296 s->errorres = get_bits1(&s->gb);
72ca830f
RB
297 s->use_last_frame_mvs = !s->errorres && !last_invisible;
298
299 if (s->keyframe) {
300 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
301 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
302 return AVERROR_INVALIDDATA;
303 }
304 s->colorspace = get_bits(&s->gb, 3);
305 if (s->colorspace == 7) { // RGB = profile 1
306 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
307 return AVERROR_INVALIDDATA;
308 }
309 s->fullrange = get_bits1(&s->gb);
3f38d4b8
VG
310
311 // subsampling bits
312 if (s->profile == 1 || s->profile == 3) {
313 s->sub_x = get_bits1(&s->gb);
314 s->sub_y = get_bits1(&s->gb);
315 if (s->sub_x && s->sub_y) {
316 av_log(avctx, AV_LOG_ERROR,
317 "4:2:0 color not supported in profile 1 or 3\n");
318 return AVERROR_INVALIDDATA;
319 }
320 if (get_bits1(&s->gb)) { // reserved bit
321 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
322 return AVERROR_INVALIDDATA;
323 }
324 } else {
325 s->sub_x = s->sub_y = 1;
326 }
327 if (!s->sub_x || !s->sub_y) {
328 avpriv_report_missing_feature(avctx, "Subsampling %d:%d",
329 s->sub_x, s->sub_y);
330 return AVERROR_PATCHWELCOME;
331 }
332
72ca830f
RB
333 s->refreshrefmask = 0xff;
334 w = get_bits(&s->gb, 16) + 1;
335 h = get_bits(&s->gb, 16) + 1;
336 if (get_bits1(&s->gb)) // display size
337 skip_bits(&s->gb, 32);
338 } else {
339 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
340 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
341 if (s->intraonly) {
342 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
343 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
344 return AVERROR_INVALIDDATA;
345 }
346 s->refreshrefmask = get_bits(&s->gb, 8);
347 w = get_bits(&s->gb, 16) + 1;
348 h = get_bits(&s->gb, 16) + 1;
349 if (get_bits1(&s->gb)) // display size
350 skip_bits(&s->gb, 32);
351 } else {
352 s->refreshrefmask = get_bits(&s->gb, 8);
353 s->refidx[0] = get_bits(&s->gb, 3);
354 s->signbias[0] = get_bits1(&s->gb);
355 s->refidx[1] = get_bits(&s->gb, 3);
356 s->signbias[1] = get_bits1(&s->gb);
357 s->refidx[2] = get_bits(&s->gb, 3);
358 s->signbias[2] = get_bits1(&s->gb);
bc6e0b64
RB
359 if (!s->refs[s->refidx[0]].f->buf[0] ||
360 !s->refs[s->refidx[1]].f->buf[0] ||
361 !s->refs[s->refidx[2]].f->buf[0]) {
72ca830f
RB
362 av_log(avctx, AV_LOG_ERROR,
363 "Not all references are available\n");
364 return AVERROR_INVALIDDATA;
365 }
366 if (get_bits1(&s->gb)) {
bc6e0b64
RB
367 w = s->refs[s->refidx[0]].f->width;
368 h = s->refs[s->refidx[0]].f->height;
72ca830f 369 } else if (get_bits1(&s->gb)) {
bc6e0b64
RB
370 w = s->refs[s->refidx[1]].f->width;
371 h = s->refs[s->refidx[1]].f->height;
72ca830f 372 } else if (get_bits1(&s->gb)) {
bc6e0b64
RB
373 w = s->refs[s->refidx[2]].f->width;
374 h = s->refs[s->refidx[2]].f->height;
72ca830f
RB
375 } else {
376 w = get_bits(&s->gb, 16) + 1;
377 h = get_bits(&s->gb, 16) + 1;
378 }
379 if (get_bits1(&s->gb)) // display size
380 skip_bits(&s->gb, 32);
381 s->highprecisionmvs = get_bits1(&s->gb);
382 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
383 get_bits(&s->gb, 2);
384 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
385 s->signbias[0] != s->signbias[2];
386 if (s->allowcompinter) {
387 if (s->signbias[0] == s->signbias[1]) {
388 s->fixcompref = 2;
389 s->varcompref[0] = 0;
390 s->varcompref[1] = 1;
391 } else if (s->signbias[0] == s->signbias[2]) {
392 s->fixcompref = 1;
393 s->varcompref[0] = 0;
394 s->varcompref[1] = 2;
395 } else {
396 s->fixcompref = 0;
397 s->varcompref[0] = 1;
398 s->varcompref[1] = 2;
399 }
400 }
401 }
402 }
403
404 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
405 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
406 s->framectxid = c = get_bits(&s->gb, 2);
407
408 /* loopfilter header data */
409 s->filter.level = get_bits(&s->gb, 6);
410 sharp = get_bits(&s->gb, 3);
411 /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
412 * keep the old cache values since they are still valid. */
413 if (s->filter.sharpness != sharp)
414 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
415 s->filter.sharpness = sharp;
416 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
417 if (get_bits1(&s->gb)) {
418 for (i = 0; i < 4; i++)
419 if (get_bits1(&s->gb))
420 s->lf_delta.ref[i] = get_bits_with_sign(&s->gb, 6);
421 for (i = 0; i < 2; i++)
422 if (get_bits1(&s->gb))
423 s->lf_delta.mode[i] = get_bits_with_sign(&s->gb, 6);
424 }
425 } else {
426 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
427 }
428
429 /* quantization header data */
430 s->yac_qi = get_bits(&s->gb, 8);
431 s->ydc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
432 s->uvdc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
433 s->uvac_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
434 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
435 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
436
437 /* segmentation header info */
438 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
439 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
440 for (i = 0; i < 7; i++)
441 s->prob.seg[i] = get_bits1(&s->gb) ?
442 get_bits(&s->gb, 8) : 255;
443 if ((s->segmentation.temporal = get_bits1(&s->gb)))
444 for (i = 0; i < 3; i++)
445 s->prob.segpred[i] = get_bits1(&s->gb) ?
446 get_bits(&s->gb, 8) : 255;
447 }
448
449 if (get_bits1(&s->gb)) {
450 s->segmentation.absolute_vals = get_bits1(&s->gb);
451 for (i = 0; i < 8; i++) {
452 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
453 s->segmentation.feat[i].q_val = get_bits_with_sign(&s->gb, 8);
454 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
455 s->segmentation.feat[i].lf_val = get_bits_with_sign(&s->gb, 6);
456 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
457 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
458 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
459 }
460 }
461 } else {
462 s->segmentation.feat[0].q_enabled = 0;
463 s->segmentation.feat[0].lf_enabled = 0;
464 s->segmentation.feat[0].skip_enabled = 0;
465 s->segmentation.feat[0].ref_enabled = 0;
466 }
467
468 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
469 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
470 int qyac, qydc, quvac, quvdc, lflvl, sh;
471
472 if (s->segmentation.feat[i].q_enabled) {
473 if (s->segmentation.absolute_vals)
474 qyac = s->segmentation.feat[i].q_val;
475 else
476 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
477 } else {
478 qyac = s->yac_qi;
479 }
480 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
481 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
482 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
483 qyac = av_clip_uintp2(qyac, 8);
484
485 s->segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[qydc];
486 s->segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[qyac];
487 s->segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[quvdc];
488 s->segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[quvac];
489
490 sh = s->filter.level >= 32;
491 if (s->segmentation.feat[i].lf_enabled) {
492 if (s->segmentation.absolute_vals)
493 lflvl = s->segmentation.feat[i].lf_val;
494 else
495 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
496 } else {
497 lflvl = s->filter.level;
498 }
499 s->segmentation.feat[i].lflvl[0][0] =
500 s->segmentation.feat[i].lflvl[0][1] =
501 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
502 for (j = 1; j < 4; j++) {
503 s->segmentation.feat[i].lflvl[j][0] =
504 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
505 s->lf_delta.mode[0]) << sh), 6);
506 s->segmentation.feat[i].lflvl[j][1] =
507 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
508 s->lf_delta.mode[1]) << sh), 6);
509 }
510 }
511
512 /* tiling info */
513 if ((ret = update_size(avctx, w, h)) < 0) {
514 av_log(avctx, AV_LOG_ERROR,
515 "Failed to initialize decoder for %dx%d\n", w, h);
516 return ret;
517 }
518 for (s->tiling.log2_tile_cols = 0;
519 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
520 s->tiling.log2_tile_cols++) ;
521 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
522 max = FFMAX(0, max - 1);
523 while (max > s->tiling.log2_tile_cols) {
524 if (get_bits1(&s->gb))
525 s->tiling.log2_tile_cols++;
526 else
527 break;
528 }
529 s->tiling.log2_tile_rows = decode012(&s->gb);
530 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
531 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
532 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
533 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
534 sizeof(VP56RangeCoder) *
535 s->tiling.tile_cols);
536 if (!s->c_b) {
537 av_log(avctx, AV_LOG_ERROR,
538 "Ran out of memory during range coder init\n");
539 return AVERROR(ENOMEM);
540 }
541 }
542
543 if (s->keyframe || s->errorres || s->intraonly) {
544 s->prob_ctx[0].p =
545 s->prob_ctx[1].p =
546 s->prob_ctx[2].p =
547 s->prob_ctx[3].p = ff_vp9_default_probs;
548 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
549 sizeof(ff_vp9_default_coef_probs));
550 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
551 sizeof(ff_vp9_default_coef_probs));
552 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
553 sizeof(ff_vp9_default_coef_probs));
554 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
555 sizeof(ff_vp9_default_coef_probs));
556 }
557
558 // next 16 bits is size of the rest of the header (arith-coded)
559 size2 = get_bits(&s->gb, 16);
560 data2 = align_get_bits(&s->gb);
561 if (size2 > size - (data2 - data)) {
562 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
563 return AVERROR_INVALIDDATA;
564 }
565 ff_vp56_init_range_decoder(&s->c, data2, size2);
566 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
567 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
568 return AVERROR_INVALIDDATA;
569 }
570
571 if (s->keyframe || s->intraonly)
572 memset(s->counts.coef, 0,
573 sizeof(s->counts.coef) + sizeof(s->counts.eob));
574 else
575 memset(&s->counts, 0, sizeof(s->counts));
576
577 /* FIXME is it faster to not copy here, but do it down in the fw updates
578 * as explicit copies if the fw update is missing (and skip the copy upon
579 * fw update)? */
580 s->prob.p = s->prob_ctx[c].p;
581
582 // txfm updates
583 if (s->lossless) {
584 s->txfmmode = TX_4X4;
585 } else {
586 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
587 if (s->txfmmode == 3)
588 s->txfmmode += vp8_rac_get(&s->c);
589
590 if (s->txfmmode == TX_SWITCHABLE) {
591 for (i = 0; i < 2; i++)
592 if (vp56_rac_get_prob_branchy(&s->c, 252))
593 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
594 for (i = 0; i < 2; i++)
595 for (j = 0; j < 2; j++)
596 if (vp56_rac_get_prob_branchy(&s->c, 252))
597 s->prob.p.tx16p[i][j] =
598 update_prob(&s->c, s->prob.p.tx16p[i][j]);
599 for (i = 0; i < 2; i++)
600 for (j = 0; j < 3; j++)
601 if (vp56_rac_get_prob_branchy(&s->c, 252))
602 s->prob.p.tx32p[i][j] =
603 update_prob(&s->c, s->prob.p.tx32p[i][j]);
604 }
605 }
606
607 // coef updates
608 for (i = 0; i < 4; i++) {
609 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
610 if (vp8_rac_get(&s->c)) {
611 for (j = 0; j < 2; j++)
612 for (k = 0; k < 2; k++)
613 for (l = 0; l < 6; l++)
614 for (m = 0; m < 6; m++) {
615 uint8_t *p = s->prob.coef[i][j][k][l][m];
616 uint8_t *r = ref[j][k][l][m];
617 if (m >= 3 && l == 0) // dc only has 3 pt
618 break;
619 for (n = 0; n < 3; n++) {
620 if (vp56_rac_get_prob_branchy(&s->c, 252))
621 p[n] = update_prob(&s->c, r[n]);
622 else
623 p[n] = r[n];
624 }
625 p[3] = 0;
626 }
627 } else {
628 for (j = 0; j < 2; j++)
629 for (k = 0; k < 2; k++)
630 for (l = 0; l < 6; l++)
631 for (m = 0; m < 6; m++) {
632 uint8_t *p = s->prob.coef[i][j][k][l][m];
633 uint8_t *r = ref[j][k][l][m];
634 if (m > 3 && l == 0) // dc only has 3 pt
635 break;
636 memcpy(p, r, 3);
637 p[3] = 0;
638 }
639 }
640 if (s->txfmmode == i)
641 break;
642 }
643
644 // mode updates
645 for (i = 0; i < 3; i++)
646 if (vp56_rac_get_prob_branchy(&s->c, 252))
647 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
648 if (!s->keyframe && !s->intraonly) {
649 for (i = 0; i < 7; i++)
650 for (j = 0; j < 3; j++)
651 if (vp56_rac_get_prob_branchy(&s->c, 252))
652 s->prob.p.mv_mode[i][j] =
653 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
654
655 if (s->filtermode == FILTER_SWITCHABLE)
656 for (i = 0; i < 4; i++)
657 for (j = 0; j < 2; j++)
658 if (vp56_rac_get_prob_branchy(&s->c, 252))
659 s->prob.p.filter[i][j] =
660 update_prob(&s->c, s->prob.p.filter[i][j]);
661
662 for (i = 0; i < 4; i++)
663 if (vp56_rac_get_prob_branchy(&s->c, 252))
664 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
665
666 if (s->allowcompinter) {
667 s->comppredmode = vp8_rac_get(&s->c);
668 if (s->comppredmode)
669 s->comppredmode += vp8_rac_get(&s->c);
670 if (s->comppredmode == PRED_SWITCHABLE)
671 for (i = 0; i < 5; i++)
672 if (vp56_rac_get_prob_branchy(&s->c, 252))
673 s->prob.p.comp[i] =
674 update_prob(&s->c, s->prob.p.comp[i]);
675 } else {
676 s->comppredmode = PRED_SINGLEREF;
677 }
678
679 if (s->comppredmode != PRED_COMPREF) {
680 for (i = 0; i < 5; i++) {
681 if (vp56_rac_get_prob_branchy(&s->c, 252))
682 s->prob.p.single_ref[i][0] =
683 update_prob(&s->c, s->prob.p.single_ref[i][0]);
684 if (vp56_rac_get_prob_branchy(&s->c, 252))
685 s->prob.p.single_ref[i][1] =
686 update_prob(&s->c, s->prob.p.single_ref[i][1]);
687 }
688 }
689
690 if (s->comppredmode != PRED_SINGLEREF) {
691 for (i = 0; i < 5; i++)
692 if (vp56_rac_get_prob_branchy(&s->c, 252))
693 s->prob.p.comp_ref[i] =
694 update_prob(&s->c, s->prob.p.comp_ref[i]);
695 }
696
697 for (i = 0; i < 4; i++)
698 for (j = 0; j < 9; j++)
699 if (vp56_rac_get_prob_branchy(&s->c, 252))
700 s->prob.p.y_mode[i][j] =
701 update_prob(&s->c, s->prob.p.y_mode[i][j]);
702
703 for (i = 0; i < 4; i++)
704 for (j = 0; j < 4; j++)
705 for (k = 0; k < 3; k++)
706 if (vp56_rac_get_prob_branchy(&s->c, 252))
707 s->prob.p.partition[3 - i][j][k] =
708 update_prob(&s->c,
709 s->prob.p.partition[3 - i][j][k]);
710
711 // mv fields don't use the update_prob subexp model for some reason
712 for (i = 0; i < 3; i++)
713 if (vp56_rac_get_prob_branchy(&s->c, 252))
714 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
715
716 for (i = 0; i < 2; i++) {
717 if (vp56_rac_get_prob_branchy(&s->c, 252))
718 s->prob.p.mv_comp[i].sign =
719 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
720
721 for (j = 0; j < 10; j++)
722 if (vp56_rac_get_prob_branchy(&s->c, 252))
723 s->prob.p.mv_comp[i].classes[j] =
724 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
725
726 if (vp56_rac_get_prob_branchy(&s->c, 252))
727 s->prob.p.mv_comp[i].class0 =
728 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
729
730 for (j = 0; j < 10; j++)
731 if (vp56_rac_get_prob_branchy(&s->c, 252))
732 s->prob.p.mv_comp[i].bits[j] =
733 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
734 }
735
736 for (i = 0; i < 2; i++) {
737 for (j = 0; j < 2; j++)
738 for (k = 0; k < 3; k++)
739 if (vp56_rac_get_prob_branchy(&s->c, 252))
740 s->prob.p.mv_comp[i].class0_fp[j][k] =
741 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
742
743 for (j = 0; j < 3; j++)
744 if (vp56_rac_get_prob_branchy(&s->c, 252))
745 s->prob.p.mv_comp[i].fp[j] =
746 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
747 }
748
749 if (s->highprecisionmvs) {
750 for (i = 0; i < 2; i++) {
751 if (vp56_rac_get_prob_branchy(&s->c, 252))
752 s->prob.p.mv_comp[i].class0_hp =
753 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
754
755 if (vp56_rac_get_prob_branchy(&s->c, 252))
756 s->prob.p.mv_comp[i].hp =
757 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
758 }
759 }
760 }
761
762 return (data2 - data) + size2;
763}
764
765static int decode_subblock(AVCodecContext *avctx, int row, int col,
766 VP9Filter *lflvl,
767 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
768{
769 VP9Context *s = avctx->priv_data;
bc6e0b64 770 AVFrame *f = s->frames[CUR_FRAME].tf.f;
72ca830f
RB
771 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
772 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
773 int ret;
774 const uint8_t *p = s->keyframe ? ff_vp9_default_kf_partition_probs[bl][c]
775 : s->prob.p.partition[bl][c];
776 enum BlockPartition bp;
777 ptrdiff_t hbs = 4 >> bl;
778
779 if (bl == BL_8X8) {
780 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
781 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
782 } else if (col + hbs < s->cols) {
783 if (row + hbs < s->rows) {
784 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
785 switch (bp) {
786 case PARTITION_NONE:
787 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
788 bl, bp);
789 break;
790 case PARTITION_H:
791 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
792 bl, bp);
793 if (!ret) {
bc6e0b64
RB
794 yoff += hbs * 8 * f->linesize[0];
795 uvoff += hbs * 4 * f->linesize[1];
72ca830f
RB
796 ret = ff_vp9_decode_block(avctx, row + hbs, col, lflvl,
797 yoff, uvoff, bl, bp);
798 }
799 break;
800 case PARTITION_V:
801 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
802 bl, bp);
803 if (!ret) {
804 yoff += hbs * 8;
805 uvoff += hbs * 4;
806 ret = ff_vp9_decode_block(avctx, row, col + hbs, lflvl,
807 yoff, uvoff, bl, bp);
808 }
809 break;
810 case PARTITION_SPLIT:
811 ret = decode_subblock(avctx, row, col, lflvl,
812 yoff, uvoff, bl + 1);
813 if (!ret) {
814 ret = decode_subblock(avctx, row, col + hbs, lflvl,
815 yoff + 8 * hbs, uvoff + 4 * hbs,
816 bl + 1);
817 if (!ret) {
bc6e0b64
RB
818 yoff += hbs * 8 * f->linesize[0];
819 uvoff += hbs * 4 * f->linesize[1];
72ca830f
RB
820 ret = decode_subblock(avctx, row + hbs, col, lflvl,
821 yoff, uvoff, bl + 1);
822 if (!ret) {
823 ret = decode_subblock(avctx, row + hbs, col + hbs,
824 lflvl, yoff + 8 * hbs,
825 uvoff + 4 * hbs, bl + 1);
826 }
827 }
828 }
829 break;
830 default:
831 av_log(avctx, AV_LOG_ERROR, "Unexpected partition %d.", bp);
832 return AVERROR_INVALIDDATA;
833 }
834 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
835 bp = PARTITION_SPLIT;
836 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
837 if (!ret)
838 ret = decode_subblock(avctx, row, col + hbs, lflvl,
839 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
840 } else {
841 bp = PARTITION_H;
842 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
843 bl, bp);
844 }
845 } else if (row + hbs < s->rows) {
846 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
847 bp = PARTITION_SPLIT;
848 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
849 if (!ret) {
bc6e0b64
RB
850 yoff += hbs * 8 * f->linesize[0];
851 uvoff += hbs * 4 * f->linesize[1];
72ca830f
RB
852 ret = decode_subblock(avctx, row + hbs, col, lflvl,
853 yoff, uvoff, bl + 1);
854 }
855 } else {
856 bp = PARTITION_V;
857 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
858 bl, bp);
859 }
860 } else {
861 bp = PARTITION_SPLIT;
862 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
863 }
864 s->counts.partition[bl][c][bp]++;
865
866 return ret;
867}
868
1730a67a
RB
869static int decode_superblock_mem(AVCodecContext *avctx, int row, int col, struct VP9Filter *lflvl,
870 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
871{
872 VP9Context *s = avctx->priv_data;
873 VP9Block *b = s->b;
874 ptrdiff_t hbs = 4 >> bl;
875 AVFrame *f = s->frames[CUR_FRAME].tf.f;
876 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
877 int res;
878
879 if (bl == BL_8X8) {
880 av_assert2(b->bl == BL_8X8);
881 res = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
882 } else if (s->b->bl == bl) {
883 if ((res = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp)) < 0)
884 return res;
885 if (b->bp == PARTITION_H && row + hbs < s->rows) {
886 yoff += hbs * 8 * y_stride;
887 uvoff += hbs * 4 * uv_stride;
888 res = ff_vp9_decode_block(avctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
889 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
890 yoff += hbs * 8;
891 uvoff += hbs * 4;
892 res = ff_vp9_decode_block(avctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
893 }
894 } else {
895 if ((res = decode_superblock_mem(avctx, row, col, lflvl, yoff, uvoff, bl + 1)) < 0)
896 return res;
897 if (col + hbs < s->cols) { // FIXME why not <=?
898 if (row + hbs < s->rows) {
899 if ((res = decode_superblock_mem(avctx, row, col + hbs, lflvl, yoff + 8 * hbs,
900 uvoff + 4 * hbs, bl + 1)) < 0)
901 return res;
902 yoff += hbs * 8 * y_stride;
903 uvoff += hbs * 4 * uv_stride;
904 if ((res = decode_superblock_mem(avctx, row + hbs, col, lflvl, yoff,
905 uvoff, bl + 1)) < 0)
906 return res;
907 res = decode_superblock_mem(avctx, row + hbs, col + hbs, lflvl,
908 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
909 } else {
910 yoff += hbs * 8;
911 uvoff += hbs * 4;
912 res = decode_superblock_mem(avctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
913 }
914 } else if (row + hbs < s->rows) {
915 yoff += hbs * 8 * y_stride;
916 uvoff += hbs * 4 * uv_stride;
917 res = decode_superblock_mem(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
918 }
919 }
920
921 return res;
922}
923
72ca830f
RB
924static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
925 int row, int col,
926 ptrdiff_t yoff, ptrdiff_t uvoff)
927{
928 VP9Context *s = avctx->priv_data;
bc6e0b64
RB
929 AVFrame *f = s->frames[CUR_FRAME].tf.f;
930 uint8_t *dst = f->data[0] + yoff;
931 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
932 uint8_t *lvl = lflvl->level;
72ca830f
RB
933 int y, x, p;
934
935 /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
936 * if you think of them as acting on a 8x8 block max, we can interleave
937 * each v/h within the single x loop, but that only works if we work on
938 * 8 pixel blocks, and we won't always do that (we want at least 16px
939 * to use SSE2 optimizations, perhaps 32 for AVX2). */
940
941 // filter edges between columns, Y plane (e.g. block1 | block2)
942 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
943 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
944 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
945 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
946 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
947 unsigned hm = hm1 | hm2 | hm13 | hm23;
948
949 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
950 if (hm1 & x) {
951 int L = *l, H = L >> 4;
952 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
953
954 if (col || x > 1) {
955 if (hmask1[0] & x) {
956 if (hmask2[0] & x) {
957 av_assert2(l[8] == L);
958 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
959 } else {
960 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
961 }
962 } else if (hm2 & x) {
963 L = l[8];
964 H |= (L >> 4) << 8;
965 E |= s->filter.mblim_lut[L] << 8;
966 I |= s->filter.lim_lut[L] << 8;
967 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
968 [!!(hmask2[1] & x)]
969 [0](ptr, ls_y, E, I, H);
970 } else {
971 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
972 [0](ptr, ls_y, E, I, H);
973 }
974 }
975 } else if (hm2 & x) {
976 int L = l[8], H = L >> 4;
977 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
978
979 if (col || x > 1) {
980 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
981 [0](ptr + 8 * ls_y, ls_y, E, I, H);
982 }
983 }
984 if (hm13 & x) {
985 int L = *l, H = L >> 4;
986 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
987
988 if (hm23 & x) {
989 L = l[8];
990 H |= (L >> 4) << 8;
991 E |= s->filter.mblim_lut[L] << 8;
992 I |= s->filter.lim_lut[L] << 8;
993 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
994 } else {
995 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
996 }
997 } else if (hm23 & x) {
998 int L = l[8], H = L >> 4;
999 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1000
1001 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
1002 }
1003 }
1004 }
1005
1006 // block1
1007 // filter edges between rows, Y plane (e.g. ------)
1008 // block2
bc6e0b64 1009 dst = f->data[0] + yoff;
72ca830f
RB
1010 lvl = lflvl->level;
1011 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
1012 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
1013 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
1014
1015 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
1016 if (row || y) {
1017 if (vm & x) {
1018 int L = *l, H = L >> 4;
1019 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1020
1021 if (vmask[0] & x) {
1022 if (vmask[0] & (x << 1)) {
1023 av_assert2(l[1] == L);
1024 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
1025 } else {
1026 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
1027 }
1028 } else if (vm & (x << 1)) {
1029 L = l[1];
1030 H |= (L >> 4) << 8;
1031 E |= s->filter.mblim_lut[L] << 8;
1032 I |= s->filter.lim_lut[L] << 8;
1033 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
1034 [!!(vmask[1] & (x << 1))]
1035 [1](ptr, ls_y, E, I, H);
1036 } else {
1037 s->dsp.loop_filter_8[!!(vmask[1] & x)]
1038 [1](ptr, ls_y, E, I, H);
1039 }
1040 } else if (vm & (x << 1)) {
1041 int L = l[1], H = L >> 4;
1042 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1043
1044 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
1045 [1](ptr + 8, ls_y, E, I, H);
1046 }
1047 }
1048 if (vm3 & x) {
1049 int L = *l, H = L >> 4;
1050 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1051
1052 if (vm3 & (x << 1)) {
1053 L = l[1];
1054 H |= (L >> 4) << 8;
1055 E |= s->filter.mblim_lut[L] << 8;
1056 I |= s->filter.lim_lut[L] << 8;
1057 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
1058 } else {
1059 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
1060 }
1061 } else if (vm3 & (x << 1)) {
1062 int L = l[1], H = L >> 4;
1063 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1064
1065 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
1066 }
1067 }
1068 }
1069
1070 // same principle but for U/V planes
1071 for (p = 0; p < 2; p++) {
1072 lvl = lflvl->level;
bc6e0b64 1073 dst = f->data[1 + p] + uvoff;
72ca830f
RB
1074 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
1075 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
1076 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
1077 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
1078 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
1079
1080 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
1081 if (col || x > 1) {
1082 if (hm1 & x) {
1083 int L = *l, H = L >> 4;
1084 int E = s->filter.mblim_lut[L];
1085 int I = s->filter.lim_lut[L];
1086
1087 if (hmask1[0] & x) {
1088 if (hmask2[0] & x) {
1089 av_assert2(l[16] == L);
1090 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
1091 } else {
1092 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
1093 }
1094 } else if (hm2 & x) {
1095 L = l[16];
1096 H |= (L >> 4) << 8;
1097 E |= s->filter.mblim_lut[L] << 8;
1098 I |= s->filter.lim_lut[L] << 8;
1099 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
1100 [!!(hmask2[1] & x)]
1101 [0](ptr, ls_uv, E, I, H);
1102 } else {
1103 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
1104 [0](ptr, ls_uv, E, I, H);
1105 }
1106 } else if (hm2 & x) {
1107 int L = l[16], H = L >> 4;
1108 int E = s->filter.mblim_lut[L];
1109 int I = s->filter.lim_lut[L];
1110
1111 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
1112 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
1113 }
1114 }
1115 if (x & 0xAA)
1116 l += 2;
1117 }
1118 }
1119 lvl = lflvl->level;
bc6e0b64 1120 dst = f->data[1 + p] + uvoff;
72ca830f
RB
1121 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
1122 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
1123 unsigned vm = vmask[0] | vmask[1] | vmask[2];
1124
1125 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
1126 if (row || y) {
1127 if (vm & x) {
1128 int L = *l, H = L >> 4;
1129 int E = s->filter.mblim_lut[L];
1130 int I = s->filter.lim_lut[L];
1131
1132 if (vmask[0] & x) {
1133 if (vmask[0] & (x << 2)) {
1134 av_assert2(l[2] == L);
1135 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
1136 } else {
1137 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
1138 }
1139 } else if (vm & (x << 2)) {
1140 L = l[2];
1141 H |= (L >> 4) << 8;
1142 E |= s->filter.mblim_lut[L] << 8;
1143 I |= s->filter.lim_lut[L] << 8;
1144 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
1145 [!!(vmask[1] & (x << 2))]
1146 [1](ptr, ls_uv, E, I, H);
1147 } else {
1148 s->dsp.loop_filter_8[!!(vmask[1] & x)]
1149 [1](ptr, ls_uv, E, I, H);
1150 }
1151 } else if (vm & (x << 2)) {
1152 int L = l[2], H = L >> 4;
1153 int E = s->filter.mblim_lut[L];
1154 int I = s->filter.lim_lut[L];
1155
1156 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
1157 [1](ptr + 8, ls_uv, E, I, H);
1158 }
1159 }
1160 }
1161 if (y & 1)
1162 lvl += 16;
1163 }
1164 }
1165}
1166
1167static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1168{
1169 int sb_start = (idx * n) >> log2_n;
1170 int sb_end = ((idx + 1) * n) >> log2_n;
1171 *start = FFMIN(sb_start, n) << 3;
1172 *end = FFMIN(sb_end, n) << 3;
1173}
1174
1730a67a
RB
1175static int update_refs(AVCodecContext *avctx)
1176{
1177 VP9Context *s = avctx->priv_data;
1178 int i, ret;
1179
1180 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
1181 if (s->refreshrefmask & (1 << i)) {
1182 ff_thread_release_buffer(avctx, &s->refs[i]);
1183 ret = ff_thread_ref_frame(&s->refs[i], &s->frames[CUR_FRAME].tf);
1184 if (ret < 0)
1185 return ret;
1186 }
1187
1188 return 0;
1189}
1190
fa1749dd
AK
1191static int vp9_decode_frame(AVCodecContext *avctx, void *output,
1192 int *got_frame, AVPacket *pkt)
72ca830f
RB
1193{
1194 VP9Context *s = avctx->priv_data;
fa1749dd
AK
1195 AVFrame *frame = output;
1196 const uint8_t *data = pkt->data;
1197 int size = pkt->size;
bc6e0b64 1198 AVFrame *f;
72ca830f 1199 int ret, tile_row, tile_col, i, ref = -1, row, col;
72ca830f 1200
fa1749dd
AK
1201 s->setup_finished = 0;
1202
72ca830f
RB
1203 ret = decode_frame_header(avctx, data, size, &ref);
1204 if (ret < 0) {
1205 return ret;
1206 } else if (!ret) {
bc6e0b64 1207 if (!s->refs[ref].f->buf[0]) {
72ca830f
RB
1208 av_log(avctx, AV_LOG_ERROR,
1209 "Requested reference %d not available\n", ref);
1210 return AVERROR_INVALIDDATA;
1211 }
1212
bc6e0b64 1213 ret = av_frame_ref(frame, s->refs[ref].f);
72ca830f
RB
1214 if (ret < 0)
1215 return ret;
1216 *got_frame = 1;
fa1749dd 1217 return pkt->size;
72ca830f
RB
1218 }
1219 data += ret;
1220 size -= ret;
1221
bc6e0b64
RB
1222 vp9_frame_unref(avctx, &s->frames[LAST_FRAME]);
1223 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->buf[0]) {
1224 ret = vp9_frame_ref(&s->frames[LAST_FRAME], &s->frames[CUR_FRAME]);
1225 if (ret < 0)
1226 return ret;
1227 }
72ca830f 1228
bc6e0b64
RB
1229 vp9_frame_unref(avctx, &s->frames[CUR_FRAME]);
1230 ret = vp9_frame_alloc(avctx, &s->frames[CUR_FRAME]);
1231 if (ret < 0)
72ca830f 1232 return ret;
bc6e0b64
RB
1233
1234 f = s->frames[CUR_FRAME].tf.f;
1235 f->key_frame = s->keyframe;
1236 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
72ca830f 1237
eddf9528
VG
1238 if (s->fullrange)
1239 avctx->color_range = AVCOL_RANGE_JPEG;
1240 else
1241 avctx->color_range = AVCOL_RANGE_MPEG;
1242
1243 switch (s->colorspace) {
1244 case 1: avctx->colorspace = AVCOL_SPC_BT470BG; break;
1245 case 2: avctx->colorspace = AVCOL_SPC_BT709; break;
1246 case 3: avctx->colorspace = AVCOL_SPC_SMPTE170M; break;
1247 case 4: avctx->colorspace = AVCOL_SPC_SMPTE240M; break;
1248 }
1249
1730a67a
RB
1250 s->pass = s->uses_2pass =
1251 avctx->active_thread_type & FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
1252
1253 if (s->refreshctx && s->parallelmode) {
1254 int j, k, l, m;
1255 for (i = 0; i < 4; i++) {
1256 for (j = 0; j < 2; j++)
1257 for (k = 0; k < 2; k++)
1258 for (l = 0; l < 6; l++)
1259 for (m = 0; m < 6; m++)
1260 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
1261 s->prob.coef[i][j][k][l][m], 3);
1262 if (s->txfmmode == i)
1263 break;
1264 }
1265 s->prob_ctx[s->framectxid].p = s->prob.p;
1266 }
1267 if ((s->parallelmode || !s->refreshctx) &&
fa1749dd 1268 avctx->active_thread_type & FF_THREAD_FRAME) {
1730a67a
RB
1269 ff_thread_finish_setup(avctx);
1270 s->setup_finished = 1;
1271 }
1272
72ca830f
RB
1273 // main tile decode loop
1274 memset(s->above_partition_ctx, 0, s->cols);
1275 memset(s->above_skip_ctx, 0, s->cols);
1276 if (s->keyframe || s->intraonly)
1277 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1278 else
1279 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1280 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1281 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
1282 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
1283 memset(s->above_segpred_ctx, 0, s->cols);
5b995452 1284
1730a67a
RB
1285 do {
1286 ptrdiff_t yoff = 0, uvoff = 0;
f2143c57
AK
1287 s->b = s->b_base;
1288 s->block = s->block_base;
1289 s->uvblock[0] = s->uvblock_base[0];
1290 s->uvblock[1] = s->uvblock_base[1];
1291 s->eob = s->eob_base;
1292 s->uveob[0] = s->uveob_base[0];
1293 s->uveob[1] = s->uveob_base[1];
1294
1295 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
1296 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
1297 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
1298
1299 if (s->pass != 2) {
1300 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1301 int64_t tile_size;
1302
1303 if (tile_col == s->tiling.tile_cols - 1 &&
1304 tile_row == s->tiling.tile_rows - 1) {
1305 tile_size = size;
1730a67a 1306 } else {
f2143c57
AK
1307 tile_size = AV_RB32(data);
1308 data += 4;
1309 size -= 4;
1310 }
1311 if (tile_size > size) {
1312 ret = AVERROR_INVALIDDATA;
1313 goto fail;
1730a67a 1314 }
f2143c57
AK
1315 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1316 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
1317 ret = AVERROR_INVALIDDATA;
1730a67a 1318 goto fail;
f2143c57
AK
1319 }
1320 data += tile_size;
1321 size -= tile_size;
72ca830f 1322 }
72ca830f
RB
1323 }
1324
f2143c57
AK
1325 for (row = s->tiling.tile_row_start;
1326 row < s->tiling.tile_row_end;
1327 row += 8, yoff += f->linesize[0] * 64,
1328 uvoff += f->linesize[1] * 32) {
1329 VP9Filter *lflvl = s->lflvl;
1330 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1331
1332 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1333 set_tile_offset(&s->tiling.tile_col_start,
1334 &s->tiling.tile_col_end,
1335 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
1336
1337 memset(s->left_partition_ctx, 0, 8);
1338 memset(s->left_skip_ctx, 0, 8);
1339 if (s->keyframe || s->intraonly)
1340 memset(s->left_mode_ctx, DC_PRED, 16);
1341 else
1342 memset(s->left_mode_ctx, NEARESTMV, 8);
1343 memset(s->left_y_nnz_ctx, 0, 16);
1344 memset(s->left_uv_nnz_ctx, 0, 16);
1345 memset(s->left_segpred_ctx, 0, 8);
1346
1347 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1348 for (col = s->tiling.tile_col_start;
1349 col < s->tiling.tile_col_end;
1350 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
1351 // FIXME integrate with lf code (i.e. zero after each
1352 // use, similar to invtxfm coefficients, or similar)
1353 if (s->pass != 1)
1354 memset(lflvl->mask, 0, sizeof(lflvl->mask));
1355
1356 if (s->pass == 2) {
1357 ret = decode_superblock_mem(avctx, row, col, lflvl,
1358 yoff2, uvoff2, BL_64X64);
1359 } else {
1360 ret = decode_subblock(avctx, row, col, lflvl,
1361 yoff2, uvoff2, BL_64X64);
1362 }
1363 if (ret < 0)
1364 goto fail;
1365 }
1366 if (s->pass != 2)
1367 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1368 }
72ca830f 1369
f2143c57
AK
1370 if (s->pass == 1)
1371 continue;
1372
1373 // backup pre-loopfilter reconstruction data for intra
1374 // prediction of next row of sb64s
1375 if (row + 8 < s->rows) {
1376 memcpy(s->intra_pred_data[0],
1377 f->data[0] + yoff +
1378 63 * f->linesize[0],
1379 8 * s->cols);
1380 memcpy(s->intra_pred_data[1],
1381 f->data[1] + uvoff +
1382 31 * f->linesize[1],
1383 4 * s->cols);
1384 memcpy(s->intra_pred_data[2],
1385 f->data[2] + uvoff +
1386 31 * f->linesize[2],
1387 4 * s->cols);
1388 }
1389
1390 // loopfilter one row
1391 if (s->filter.level) {
1392 yoff2 = yoff;
1393 uvoff2 = uvoff;
1394 lflvl = s->lflvl;
1395 for (col = 0; col < s->cols;
1396 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
1397 loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
1398 }
1730a67a 1399
f2143c57
AK
1400 // FIXME maybe we can make this more finegrained by running the
1401 // loopfilter per-block instead of after each sbrow
1402 // In fact that would also make intra pred left preparation easier?
1403 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
1404 }
72ca830f 1405 }
72ca830f 1406
1730a67a 1407 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
72ca830f 1408 ff_vp9_adapt_probs(s);
fa1749dd 1409 if (avctx->active_thread_type & FF_THREAD_FRAME) {
1730a67a
RB
1410 ff_thread_finish_setup(avctx);
1411 s->setup_finished = 1;
1412 }
72ca830f 1413 }
1730a67a
RB
1414 } while (s->pass++ == 1);
1415fail:
1416 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
1417 if (ret < 0)
1418 return ret;
72ca830f
RB
1419
1420 // ref frame setup
1730a67a
RB
1421 if (!s->setup_finished) {
1422 ret = update_refs(avctx);
1423 if (ret < 0)
1424 return ret;
1425 }
72ca830f 1426
bc6e0b64
RB
1427 if (!s->invisible) {
1428 av_frame_unref(frame);
1429 ret = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f);
1430 if (ret < 0)
1431 return ret;
72ca830f 1432 *got_frame = 1;
bc6e0b64 1433 }
72ca830f 1434
fa1749dd 1435 return pkt->size;
72ca830f
RB
1436}
1437
1438static av_cold int vp9_decode_free(AVCodecContext *avctx)
1439{
1440 VP9Context *s = avctx->priv_data;
1441 int i;
1442
bc6e0b64
RB
1443 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
1444 vp9_frame_unref(avctx, &s->frames[i]);
1445 av_frame_free(&s->frames[i].tf.f);
1446 }
1447
1448 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1449 ff_thread_release_buffer(avctx, &s->refs[i]);
1450 av_frame_free(&s->refs[i].f);
1451 }
72ca830f
RB
1452
1453 av_freep(&s->c_b);
1454 av_freep(&s->above_partition_ctx);
5b995452
RB
1455 av_freep(&s->b_base);
1456 av_freep(&s->block_base);
72ca830f
RB
1457
1458 return 0;
1459}
1460
1461static av_cold int vp9_decode_init(AVCodecContext *avctx)
1462{
1463 VP9Context *s = avctx->priv_data;
1464 int i;
1465
1730a67a
RB
1466 memset(s, 0, sizeof(*s));
1467
1468 avctx->internal->allocate_progress = 1;
1469
72ca830f
RB
1470 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
1471
1472 ff_vp9dsp_init(&s->dsp);
1473 ff_videodsp_init(&s->vdsp, 8);
1474
bc6e0b64
RB
1475 s->frames[0].tf.f = av_frame_alloc();
1476 s->frames[1].tf.f = av_frame_alloc();
1477 if (!s->frames[0].tf.f || !s->frames[1].tf.f)
1478 goto fail;
1479
72ca830f 1480 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
bc6e0b64
RB
1481 s->refs[i].f = av_frame_alloc();
1482 if (!s->refs[i].f)
1483 goto fail;
72ca830f
RB
1484 }
1485
1486 s->filter.sharpness = -1;
1487
1488 return 0;
bc6e0b64
RB
1489fail:
1490 vp9_decode_free(avctx);
1491 return AVERROR(ENOMEM);
72ca830f
RB
1492}
1493
1730a67a
RB
1494static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1495{
1496 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1497 int i, ret;
1498
1499 ret = update_size(dst, ssrc->alloc_width, ssrc->alloc_height);
1500 if (ret < 0)
1501 return ret;
1502
1503 for (i = 0; i < 2; i++) {
1504 if (s->frames[i].tf.f->data[0])
1505 vp9_frame_unref(dst, &s->frames[i]);
1506 if (ssrc->frames[i].tf.f->data[0]) {
1507 if ((ret = vp9_frame_ref(&s->frames[i], &ssrc->frames[i])) < 0)
1508 return ret;
1509 }
1510 }
1511 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1512 ff_thread_release_buffer(dst, &s->refs[i]);
1513 if (ssrc->refs[i].f->buf[0]) {
1514 ret = ff_thread_ref_frame(&s->refs[i], &ssrc->refs[i]);
1515 if (ret < 0)
1516 return ret;
1517 }
1518 }
1519
1520 s->refreshrefmask = ssrc->refreshrefmask;
1521 ret = update_refs(dst);
1522 if (ret < 0)
1523 return ret;
1524
1525 s->invisible = ssrc->invisible;
1526 s->keyframe = ssrc->keyframe;
1527 s->last_uses_2pass = ssrc->uses_2pass;
1528
1529 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1530 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
1531 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
1532 sizeof(s->segmentation.feat));
1533
1534 return 0;
1535}
1536
72ca830f 1537AVCodec ff_vp9_decoder = {
f2143c57
AK
1538 .name = "vp9",
1539 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1540 .type = AVMEDIA_TYPE_VIDEO,
1541 .id = AV_CODEC_ID_VP9,
1542 .priv_data_size = sizeof(VP9Context),
1543 .init = vp9_decode_init,
fa1749dd 1544 .decode = vp9_decode_frame,
f2143c57
AK
1545 .flush = vp9_decode_flush,
1546 .close = vp9_decode_free,
1547 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
1730a67a
RB
1548 .init_thread_copy = vp9_decode_init,
1549 .update_thread_context = vp9_decode_update_thread_context,
fa1749dd 1550 .bsfs = "vp9_superframe_split",
72ca830f 1551};