vp9: Return the correct size when decoding a superframe
[libav.git] / libavcodec / vp9.c
1 /*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of Libav.
8 *
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "libavutil/avassert.h"
25
26 #include "avcodec.h"
27 #include "get_bits.h"
28 #include "internal.h"
29 #include "videodsp.h"
30 #include "vp56.h"
31 #include "vp9.h"
32 #include "vp9data.h"
33
34 #define VP9_SYNCCODE 0x498342
35 #define MAX_PROB 255
36
37 static void vp9_decode_flush(AVCodecContext *avctx)
38 {
39 VP9Context *s = avctx->priv_data;
40 int i;
41
42 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
43 av_frame_unref(s->refs[i]);
44 }
45
46 static int update_size(AVCodecContext *avctx, int w, int h)
47 {
48 VP9Context *s = avctx->priv_data;
49 uint8_t *p;
50
51 if (s->above_partition_ctx && w == avctx->width && h == avctx->height)
52 return 0;
53
54 vp9_decode_flush(avctx);
55
56 if (w <= 0 || h <= 0)
57 return AVERROR_INVALIDDATA;
58
59 avctx->width = w;
60 avctx->height = h;
61 s->sb_cols = (w + 63) >> 6;
62 s->sb_rows = (h + 63) >> 6;
63 s->cols = (w + 7) >> 3;
64 s->rows = (h + 7) >> 3;
65
66 #define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
67 av_free(s->above_partition_ctx);
68 p = av_malloc(s->sb_cols *
69 (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx) +
70 64 * s->sb_rows * (1 + sizeof(*s->mv[0]) * 2)));
71 if (!p)
72 return AVERROR(ENOMEM);
73 assign(s->above_partition_ctx, uint8_t *, 8);
74 assign(s->above_skip_ctx, uint8_t *, 8);
75 assign(s->above_txfm_ctx, uint8_t *, 8);
76 assign(s->above_mode_ctx, uint8_t *, 16);
77 assign(s->above_y_nnz_ctx, uint8_t *, 16);
78 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
79 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
80 assign(s->intra_pred_data[0], uint8_t *, 64);
81 assign(s->intra_pred_data[1], uint8_t *, 32);
82 assign(s->intra_pred_data[2], uint8_t *, 32);
83 assign(s->above_segpred_ctx, uint8_t *, 8);
84 assign(s->above_intra_ctx, uint8_t *, 8);
85 assign(s->above_comp_ctx, uint8_t *, 8);
86 assign(s->above_ref_ctx, uint8_t *, 8);
87 assign(s->above_filter_ctx, uint8_t *, 8);
88 assign(s->lflvl, VP9Filter *, 1);
89 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
90 assign(s->segmentation_map, uint8_t *, 64 * s->sb_rows);
91 assign(s->mv[0], VP9MVRefPair *, 64 * s->sb_rows);
92 assign(s->mv[1], VP9MVRefPair *, 64 * s->sb_rows);
93 #undef assign
94
95 return 0;
96 }
97
98 // The sign bit is at the end, not the start, of a bit sequence
99 static av_always_inline int get_bits_with_sign(GetBitContext *gb, int n)
100 {
101 int v = get_bits(gb, n);
102 return get_bits1(gb) ? -v : v;
103 }
104
105 static av_always_inline int inv_recenter_nonneg(int v, int m)
106 {
107 if (v > 2 * m)
108 return v;
109 if (v & 1)
110 return m - ((v + 1) >> 1);
111 return m + (v >> 1);
112 }
113
114 // differential forward probability updates
115 static int update_prob(VP56RangeCoder *c, int p)
116 {
117 static const int inv_map_table[MAX_PROB - 1] = {
118 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
119 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
120 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
121 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
122 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
123 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
124 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
125 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
126 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
127 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
128 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
129 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
130 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
131 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
132 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
133 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
134 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
135 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
136 252, 253,
137 };
138 int d;
139
140 /* This code is trying to do a differential probability update. For a
141 * current probability A in the range [1, 255], the difference to a new
142 * probability of any value can be expressed differentially as 1-A, 255-A
143 * where some part of this (absolute range) exists both in positive as
144 * well as the negative part, whereas another part only exists in one
145 * half. We're trying to code this shared part differentially, i.e.
146 * times two where the value of the lowest bit specifies the sign, and
147 * the single part is then coded on top of this. This absolute difference
148 * then again has a value of [0, 254], but a bigger value in this range
149 * indicates that we're further away from the original value A, so we
150 * can code this as a VLC code, since higher values are increasingly
151 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
152 * updates vs. the 'fine, exact' updates further down the range, which
153 * adds one extra dimension to this differential update model. */
154
155 if (!vp8_rac_get(c)) {
156 d = vp8_rac_get_uint(c, 4) + 0;
157 } else if (!vp8_rac_get(c)) {
158 d = vp8_rac_get_uint(c, 4) + 16;
159 } else if (!vp8_rac_get(c)) {
160 d = vp8_rac_get_uint(c, 5) + 32;
161 } else {
162 d = vp8_rac_get_uint(c, 7);
163 if (d >= 65) {
164 d = (d << 1) - 65 + vp8_rac_get(c);
165 d = av_clip(d, 0, MAX_PROB - 65 - 1);
166 }
167 d += 64;
168 }
169
170 return p <= 128
171 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1)
172 : 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
173 }
174
175 static int decode_frame_header(AVCodecContext *avctx,
176 const uint8_t *data, int size, int *ref)
177 {
178 VP9Context *s = avctx->priv_data;
179 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
180 int last_invisible;
181 const uint8_t *data2;
182
183 /* general header */
184 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
185 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
186 return ret;
187 }
188 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
189 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
190 return AVERROR_INVALIDDATA;
191 }
192 s->profile = get_bits1(&s->gb);
193 if (get_bits1(&s->gb)) { // reserved bit
194 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
195 return AVERROR_INVALIDDATA;
196 }
197 if (get_bits1(&s->gb)) {
198 *ref = get_bits(&s->gb, 3);
199 return 0;
200 }
201
202 s->last_keyframe = s->keyframe;
203 s->keyframe = !get_bits1(&s->gb);
204
205 last_invisible = s->invisible;
206 s->invisible = !get_bits1(&s->gb);
207 s->errorres = get_bits1(&s->gb);
208 // FIXME disable this upon resolution change
209 s->use_last_frame_mvs = !s->errorres && !last_invisible;
210
211 if (s->keyframe) {
212 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
213 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
214 return AVERROR_INVALIDDATA;
215 }
216 s->colorspace = get_bits(&s->gb, 3);
217 if (s->colorspace == 7) { // RGB = profile 1
218 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
219 return AVERROR_INVALIDDATA;
220 }
221 s->fullrange = get_bits1(&s->gb);
222
223 // subsampling bits
224 if (s->profile == 1 || s->profile == 3) {
225 s->sub_x = get_bits1(&s->gb);
226 s->sub_y = get_bits1(&s->gb);
227 if (s->sub_x && s->sub_y) {
228 av_log(avctx, AV_LOG_ERROR,
229 "4:2:0 color not supported in profile 1 or 3\n");
230 return AVERROR_INVALIDDATA;
231 }
232 if (get_bits1(&s->gb)) { // reserved bit
233 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
234 return AVERROR_INVALIDDATA;
235 }
236 } else {
237 s->sub_x = s->sub_y = 1;
238 }
239 if (!s->sub_x || !s->sub_y) {
240 avpriv_report_missing_feature(avctx, "Subsampling %d:%d",
241 s->sub_x, s->sub_y);
242 return AVERROR_PATCHWELCOME;
243 }
244
245 s->refreshrefmask = 0xff;
246 w = get_bits(&s->gb, 16) + 1;
247 h = get_bits(&s->gb, 16) + 1;
248 if (get_bits1(&s->gb)) // display size
249 skip_bits(&s->gb, 32);
250 } else {
251 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
252 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
253 if (s->intraonly) {
254 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
255 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
256 return AVERROR_INVALIDDATA;
257 }
258 s->refreshrefmask = get_bits(&s->gb, 8);
259 w = get_bits(&s->gb, 16) + 1;
260 h = get_bits(&s->gb, 16) + 1;
261 if (get_bits1(&s->gb)) // display size
262 skip_bits(&s->gb, 32);
263 } else {
264 s->refreshrefmask = get_bits(&s->gb, 8);
265 s->refidx[0] = get_bits(&s->gb, 3);
266 s->signbias[0] = get_bits1(&s->gb);
267 s->refidx[1] = get_bits(&s->gb, 3);
268 s->signbias[1] = get_bits1(&s->gb);
269 s->refidx[2] = get_bits(&s->gb, 3);
270 s->signbias[2] = get_bits1(&s->gb);
271 if (!s->refs[s->refidx[0]]->buf[0] ||
272 !s->refs[s->refidx[1]]->buf[0] ||
273 !s->refs[s->refidx[2]]->buf[0]) {
274 av_log(avctx, AV_LOG_ERROR,
275 "Not all references are available\n");
276 return AVERROR_INVALIDDATA;
277 }
278 if (get_bits1(&s->gb)) {
279 w = s->refs[s->refidx[0]]->width;
280 h = s->refs[s->refidx[0]]->height;
281 } else if (get_bits1(&s->gb)) {
282 w = s->refs[s->refidx[1]]->width;
283 h = s->refs[s->refidx[1]]->height;
284 } else if (get_bits1(&s->gb)) {
285 w = s->refs[s->refidx[2]]->width;
286 h = s->refs[s->refidx[2]]->height;
287 } else {
288 w = get_bits(&s->gb, 16) + 1;
289 h = get_bits(&s->gb, 16) + 1;
290 }
291 if (get_bits1(&s->gb)) // display size
292 skip_bits(&s->gb, 32);
293 s->highprecisionmvs = get_bits1(&s->gb);
294 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
295 get_bits(&s->gb, 2);
296 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
297 s->signbias[0] != s->signbias[2];
298 if (s->allowcompinter) {
299 if (s->signbias[0] == s->signbias[1]) {
300 s->fixcompref = 2;
301 s->varcompref[0] = 0;
302 s->varcompref[1] = 1;
303 } else if (s->signbias[0] == s->signbias[2]) {
304 s->fixcompref = 1;
305 s->varcompref[0] = 0;
306 s->varcompref[1] = 2;
307 } else {
308 s->fixcompref = 0;
309 s->varcompref[0] = 1;
310 s->varcompref[1] = 2;
311 }
312 }
313 }
314 }
315
316 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
317 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
318 s->framectxid = c = get_bits(&s->gb, 2);
319
320 /* loopfilter header data */
321 s->filter.level = get_bits(&s->gb, 6);
322 sharp = get_bits(&s->gb, 3);
323 /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
324 * keep the old cache values since they are still valid. */
325 if (s->filter.sharpness != sharp)
326 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
327 s->filter.sharpness = sharp;
328 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
329 if (get_bits1(&s->gb)) {
330 for (i = 0; i < 4; i++)
331 if (get_bits1(&s->gb))
332 s->lf_delta.ref[i] = get_bits_with_sign(&s->gb, 6);
333 for (i = 0; i < 2; i++)
334 if (get_bits1(&s->gb))
335 s->lf_delta.mode[i] = get_bits_with_sign(&s->gb, 6);
336 }
337 } else {
338 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
339 }
340
341 /* quantization header data */
342 s->yac_qi = get_bits(&s->gb, 8);
343 s->ydc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
344 s->uvdc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
345 s->uvac_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
346 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
347 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
348
349 /* segmentation header info */
350 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
351 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
352 for (i = 0; i < 7; i++)
353 s->prob.seg[i] = get_bits1(&s->gb) ?
354 get_bits(&s->gb, 8) : 255;
355 if ((s->segmentation.temporal = get_bits1(&s->gb)))
356 for (i = 0; i < 3; i++)
357 s->prob.segpred[i] = get_bits1(&s->gb) ?
358 get_bits(&s->gb, 8) : 255;
359 }
360
361 if (get_bits1(&s->gb)) {
362 s->segmentation.absolute_vals = get_bits1(&s->gb);
363 for (i = 0; i < 8; i++) {
364 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
365 s->segmentation.feat[i].q_val = get_bits_with_sign(&s->gb, 8);
366 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
367 s->segmentation.feat[i].lf_val = get_bits_with_sign(&s->gb, 6);
368 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
369 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
370 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
371 }
372 }
373 } else {
374 s->segmentation.feat[0].q_enabled = 0;
375 s->segmentation.feat[0].lf_enabled = 0;
376 s->segmentation.feat[0].skip_enabled = 0;
377 s->segmentation.feat[0].ref_enabled = 0;
378 }
379
380 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
381 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
382 int qyac, qydc, quvac, quvdc, lflvl, sh;
383
384 if (s->segmentation.feat[i].q_enabled) {
385 if (s->segmentation.absolute_vals)
386 qyac = s->segmentation.feat[i].q_val;
387 else
388 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
389 } else {
390 qyac = s->yac_qi;
391 }
392 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
393 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
394 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
395 qyac = av_clip_uintp2(qyac, 8);
396
397 s->segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[qydc];
398 s->segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[qyac];
399 s->segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[quvdc];
400 s->segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[quvac];
401
402 sh = s->filter.level >= 32;
403 if (s->segmentation.feat[i].lf_enabled) {
404 if (s->segmentation.absolute_vals)
405 lflvl = s->segmentation.feat[i].lf_val;
406 else
407 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
408 } else {
409 lflvl = s->filter.level;
410 }
411 s->segmentation.feat[i].lflvl[0][0] =
412 s->segmentation.feat[i].lflvl[0][1] =
413 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
414 for (j = 1; j < 4; j++) {
415 s->segmentation.feat[i].lflvl[j][0] =
416 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
417 s->lf_delta.mode[0]) << sh), 6);
418 s->segmentation.feat[i].lflvl[j][1] =
419 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
420 s->lf_delta.mode[1]) << sh), 6);
421 }
422 }
423
424 /* tiling info */
425 if ((ret = update_size(avctx, w, h)) < 0) {
426 av_log(avctx, AV_LOG_ERROR,
427 "Failed to initialize decoder for %dx%d\n", w, h);
428 return ret;
429 }
430 for (s->tiling.log2_tile_cols = 0;
431 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
432 s->tiling.log2_tile_cols++) ;
433 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
434 max = FFMAX(0, max - 1);
435 while (max > s->tiling.log2_tile_cols) {
436 if (get_bits1(&s->gb))
437 s->tiling.log2_tile_cols++;
438 else
439 break;
440 }
441 s->tiling.log2_tile_rows = decode012(&s->gb);
442 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
443 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
444 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
445 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
446 sizeof(VP56RangeCoder) *
447 s->tiling.tile_cols);
448 if (!s->c_b) {
449 av_log(avctx, AV_LOG_ERROR,
450 "Ran out of memory during range coder init\n");
451 return AVERROR(ENOMEM);
452 }
453 }
454
455 if (s->keyframe || s->errorres || s->intraonly) {
456 s->prob_ctx[0].p =
457 s->prob_ctx[1].p =
458 s->prob_ctx[2].p =
459 s->prob_ctx[3].p = ff_vp9_default_probs;
460 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
461 sizeof(ff_vp9_default_coef_probs));
462 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
463 sizeof(ff_vp9_default_coef_probs));
464 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
465 sizeof(ff_vp9_default_coef_probs));
466 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
467 sizeof(ff_vp9_default_coef_probs));
468 }
469
470 // next 16 bits is size of the rest of the header (arith-coded)
471 size2 = get_bits(&s->gb, 16);
472 data2 = align_get_bits(&s->gb);
473 if (size2 > size - (data2 - data)) {
474 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
475 return AVERROR_INVALIDDATA;
476 }
477 ff_vp56_init_range_decoder(&s->c, data2, size2);
478 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
479 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
480 return AVERROR_INVALIDDATA;
481 }
482
483 if (s->keyframe || s->intraonly)
484 memset(s->counts.coef, 0,
485 sizeof(s->counts.coef) + sizeof(s->counts.eob));
486 else
487 memset(&s->counts, 0, sizeof(s->counts));
488
489 /* FIXME is it faster to not copy here, but do it down in the fw updates
490 * as explicit copies if the fw update is missing (and skip the copy upon
491 * fw update)? */
492 s->prob.p = s->prob_ctx[c].p;
493
494 // txfm updates
495 if (s->lossless) {
496 s->txfmmode = TX_4X4;
497 } else {
498 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
499 if (s->txfmmode == 3)
500 s->txfmmode += vp8_rac_get(&s->c);
501
502 if (s->txfmmode == TX_SWITCHABLE) {
503 for (i = 0; i < 2; i++)
504 if (vp56_rac_get_prob_branchy(&s->c, 252))
505 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
506 for (i = 0; i < 2; i++)
507 for (j = 0; j < 2; j++)
508 if (vp56_rac_get_prob_branchy(&s->c, 252))
509 s->prob.p.tx16p[i][j] =
510 update_prob(&s->c, s->prob.p.tx16p[i][j]);
511 for (i = 0; i < 2; i++)
512 for (j = 0; j < 3; j++)
513 if (vp56_rac_get_prob_branchy(&s->c, 252))
514 s->prob.p.tx32p[i][j] =
515 update_prob(&s->c, s->prob.p.tx32p[i][j]);
516 }
517 }
518
519 // coef updates
520 for (i = 0; i < 4; i++) {
521 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
522 if (vp8_rac_get(&s->c)) {
523 for (j = 0; j < 2; j++)
524 for (k = 0; k < 2; k++)
525 for (l = 0; l < 6; l++)
526 for (m = 0; m < 6; m++) {
527 uint8_t *p = s->prob.coef[i][j][k][l][m];
528 uint8_t *r = ref[j][k][l][m];
529 if (m >= 3 && l == 0) // dc only has 3 pt
530 break;
531 for (n = 0; n < 3; n++) {
532 if (vp56_rac_get_prob_branchy(&s->c, 252))
533 p[n] = update_prob(&s->c, r[n]);
534 else
535 p[n] = r[n];
536 }
537 p[3] = 0;
538 }
539 } else {
540 for (j = 0; j < 2; j++)
541 for (k = 0; k < 2; k++)
542 for (l = 0; l < 6; l++)
543 for (m = 0; m < 6; m++) {
544 uint8_t *p = s->prob.coef[i][j][k][l][m];
545 uint8_t *r = ref[j][k][l][m];
546 if (m > 3 && l == 0) // dc only has 3 pt
547 break;
548 memcpy(p, r, 3);
549 p[3] = 0;
550 }
551 }
552 if (s->txfmmode == i)
553 break;
554 }
555
556 // mode updates
557 for (i = 0; i < 3; i++)
558 if (vp56_rac_get_prob_branchy(&s->c, 252))
559 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
560 if (!s->keyframe && !s->intraonly) {
561 for (i = 0; i < 7; i++)
562 for (j = 0; j < 3; j++)
563 if (vp56_rac_get_prob_branchy(&s->c, 252))
564 s->prob.p.mv_mode[i][j] =
565 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
566
567 if (s->filtermode == FILTER_SWITCHABLE)
568 for (i = 0; i < 4; i++)
569 for (j = 0; j < 2; j++)
570 if (vp56_rac_get_prob_branchy(&s->c, 252))
571 s->prob.p.filter[i][j] =
572 update_prob(&s->c, s->prob.p.filter[i][j]);
573
574 for (i = 0; i < 4; i++)
575 if (vp56_rac_get_prob_branchy(&s->c, 252))
576 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
577
578 if (s->allowcompinter) {
579 s->comppredmode = vp8_rac_get(&s->c);
580 if (s->comppredmode)
581 s->comppredmode += vp8_rac_get(&s->c);
582 if (s->comppredmode == PRED_SWITCHABLE)
583 for (i = 0; i < 5; i++)
584 if (vp56_rac_get_prob_branchy(&s->c, 252))
585 s->prob.p.comp[i] =
586 update_prob(&s->c, s->prob.p.comp[i]);
587 } else {
588 s->comppredmode = PRED_SINGLEREF;
589 }
590
591 if (s->comppredmode != PRED_COMPREF) {
592 for (i = 0; i < 5; i++) {
593 if (vp56_rac_get_prob_branchy(&s->c, 252))
594 s->prob.p.single_ref[i][0] =
595 update_prob(&s->c, s->prob.p.single_ref[i][0]);
596 if (vp56_rac_get_prob_branchy(&s->c, 252))
597 s->prob.p.single_ref[i][1] =
598 update_prob(&s->c, s->prob.p.single_ref[i][1]);
599 }
600 }
601
602 if (s->comppredmode != PRED_SINGLEREF) {
603 for (i = 0; i < 5; i++)
604 if (vp56_rac_get_prob_branchy(&s->c, 252))
605 s->prob.p.comp_ref[i] =
606 update_prob(&s->c, s->prob.p.comp_ref[i]);
607 }
608
609 for (i = 0; i < 4; i++)
610 for (j = 0; j < 9; j++)
611 if (vp56_rac_get_prob_branchy(&s->c, 252))
612 s->prob.p.y_mode[i][j] =
613 update_prob(&s->c, s->prob.p.y_mode[i][j]);
614
615 for (i = 0; i < 4; i++)
616 for (j = 0; j < 4; j++)
617 for (k = 0; k < 3; k++)
618 if (vp56_rac_get_prob_branchy(&s->c, 252))
619 s->prob.p.partition[3 - i][j][k] =
620 update_prob(&s->c,
621 s->prob.p.partition[3 - i][j][k]);
622
623 // mv fields don't use the update_prob subexp model for some reason
624 for (i = 0; i < 3; i++)
625 if (vp56_rac_get_prob_branchy(&s->c, 252))
626 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
627
628 for (i = 0; i < 2; i++) {
629 if (vp56_rac_get_prob_branchy(&s->c, 252))
630 s->prob.p.mv_comp[i].sign =
631 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
632
633 for (j = 0; j < 10; j++)
634 if (vp56_rac_get_prob_branchy(&s->c, 252))
635 s->prob.p.mv_comp[i].classes[j] =
636 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
637
638 if (vp56_rac_get_prob_branchy(&s->c, 252))
639 s->prob.p.mv_comp[i].class0 =
640 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
641
642 for (j = 0; j < 10; j++)
643 if (vp56_rac_get_prob_branchy(&s->c, 252))
644 s->prob.p.mv_comp[i].bits[j] =
645 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
646 }
647
648 for (i = 0; i < 2; i++) {
649 for (j = 0; j < 2; j++)
650 for (k = 0; k < 3; k++)
651 if (vp56_rac_get_prob_branchy(&s->c, 252))
652 s->prob.p.mv_comp[i].class0_fp[j][k] =
653 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
654
655 for (j = 0; j < 3; j++)
656 if (vp56_rac_get_prob_branchy(&s->c, 252))
657 s->prob.p.mv_comp[i].fp[j] =
658 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
659 }
660
661 if (s->highprecisionmvs) {
662 for (i = 0; i < 2; i++) {
663 if (vp56_rac_get_prob_branchy(&s->c, 252))
664 s->prob.p.mv_comp[i].class0_hp =
665 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
666
667 if (vp56_rac_get_prob_branchy(&s->c, 252))
668 s->prob.p.mv_comp[i].hp =
669 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
670 }
671 }
672 }
673
674 return (data2 - data) + size2;
675 }
676
677 static int decode_subblock(AVCodecContext *avctx, int row, int col,
678 VP9Filter *lflvl,
679 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
680 {
681 VP9Context *s = avctx->priv_data;
682 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
683 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
684 int ret;
685 const uint8_t *p = s->keyframe ? ff_vp9_default_kf_partition_probs[bl][c]
686 : s->prob.p.partition[bl][c];
687 enum BlockPartition bp;
688 ptrdiff_t hbs = 4 >> bl;
689
690 if (bl == BL_8X8) {
691 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
692 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
693 } else if (col + hbs < s->cols) {
694 if (row + hbs < s->rows) {
695 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
696 switch (bp) {
697 case PARTITION_NONE:
698 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
699 bl, bp);
700 break;
701 case PARTITION_H:
702 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
703 bl, bp);
704 if (!ret) {
705 yoff += hbs * 8 * s->cur_frame->linesize[0];
706 uvoff += hbs * 4 * s->cur_frame->linesize[1];
707 ret = ff_vp9_decode_block(avctx, row + hbs, col, lflvl,
708 yoff, uvoff, bl, bp);
709 }
710 break;
711 case PARTITION_V:
712 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
713 bl, bp);
714 if (!ret) {
715 yoff += hbs * 8;
716 uvoff += hbs * 4;
717 ret = ff_vp9_decode_block(avctx, row, col + hbs, lflvl,
718 yoff, uvoff, bl, bp);
719 }
720 break;
721 case PARTITION_SPLIT:
722 ret = decode_subblock(avctx, row, col, lflvl,
723 yoff, uvoff, bl + 1);
724 if (!ret) {
725 ret = decode_subblock(avctx, row, col + hbs, lflvl,
726 yoff + 8 * hbs, uvoff + 4 * hbs,
727 bl + 1);
728 if (!ret) {
729 yoff += hbs * 8 * s->cur_frame->linesize[0];
730 uvoff += hbs * 4 * s->cur_frame->linesize[1];
731 ret = decode_subblock(avctx, row + hbs, col, lflvl,
732 yoff, uvoff, bl + 1);
733 if (!ret) {
734 ret = decode_subblock(avctx, row + hbs, col + hbs,
735 lflvl, yoff + 8 * hbs,
736 uvoff + 4 * hbs, bl + 1);
737 }
738 }
739 }
740 break;
741 default:
742 av_log(avctx, AV_LOG_ERROR, "Unexpected partition %d.", bp);
743 return AVERROR_INVALIDDATA;
744 }
745 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
746 bp = PARTITION_SPLIT;
747 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
748 if (!ret)
749 ret = decode_subblock(avctx, row, col + hbs, lflvl,
750 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
751 } else {
752 bp = PARTITION_H;
753 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
754 bl, bp);
755 }
756 } else if (row + hbs < s->rows) {
757 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
758 bp = PARTITION_SPLIT;
759 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
760 if (!ret) {
761 yoff += hbs * 8 * s->cur_frame->linesize[0];
762 uvoff += hbs * 4 * s->cur_frame->linesize[1];
763 ret = decode_subblock(avctx, row + hbs, col, lflvl,
764 yoff, uvoff, bl + 1);
765 }
766 } else {
767 bp = PARTITION_V;
768 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
769 bl, bp);
770 }
771 } else {
772 bp = PARTITION_SPLIT;
773 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
774 }
775 s->counts.partition[bl][c][bp]++;
776
777 return ret;
778 }
779
780 static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
781 int row, int col,
782 ptrdiff_t yoff, ptrdiff_t uvoff)
783 {
784 VP9Context *s = avctx->priv_data;
785 uint8_t *dst = s->cur_frame->data[0] + yoff, *lvl = lflvl->level;
786 ptrdiff_t ls_y = s->cur_frame->linesize[0], ls_uv = s->cur_frame->linesize[1];
787 int y, x, p;
788
789 /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
790 * if you think of them as acting on a 8x8 block max, we can interleave
791 * each v/h within the single x loop, but that only works if we work on
792 * 8 pixel blocks, and we won't always do that (we want at least 16px
793 * to use SSE2 optimizations, perhaps 32 for AVX2). */
794
795 // filter edges between columns, Y plane (e.g. block1 | block2)
796 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
797 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
798 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
799 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
800 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
801 unsigned hm = hm1 | hm2 | hm13 | hm23;
802
803 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
804 if (hm1 & x) {
805 int L = *l, H = L >> 4;
806 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
807
808 if (col || x > 1) {
809 if (hmask1[0] & x) {
810 if (hmask2[0] & x) {
811 av_assert2(l[8] == L);
812 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
813 } else {
814 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
815 }
816 } else if (hm2 & x) {
817 L = l[8];
818 H |= (L >> 4) << 8;
819 E |= s->filter.mblim_lut[L] << 8;
820 I |= s->filter.lim_lut[L] << 8;
821 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
822 [!!(hmask2[1] & x)]
823 [0](ptr, ls_y, E, I, H);
824 } else {
825 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
826 [0](ptr, ls_y, E, I, H);
827 }
828 }
829 } else if (hm2 & x) {
830 int L = l[8], H = L >> 4;
831 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
832
833 if (col || x > 1) {
834 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
835 [0](ptr + 8 * ls_y, ls_y, E, I, H);
836 }
837 }
838 if (hm13 & x) {
839 int L = *l, H = L >> 4;
840 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
841
842 if (hm23 & x) {
843 L = l[8];
844 H |= (L >> 4) << 8;
845 E |= s->filter.mblim_lut[L] << 8;
846 I |= s->filter.lim_lut[L] << 8;
847 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
848 } else {
849 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
850 }
851 } else if (hm23 & x) {
852 int L = l[8], H = L >> 4;
853 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
854
855 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
856 }
857 }
858 }
859
860 // block1
861 // filter edges between rows, Y plane (e.g. ------)
862 // block2
863 dst = s->cur_frame->data[0] + yoff;
864 lvl = lflvl->level;
865 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
866 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
867 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
868
869 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
870 if (row || y) {
871 if (vm & x) {
872 int L = *l, H = L >> 4;
873 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
874
875 if (vmask[0] & x) {
876 if (vmask[0] & (x << 1)) {
877 av_assert2(l[1] == L);
878 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
879 } else {
880 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
881 }
882 } else if (vm & (x << 1)) {
883 L = l[1];
884 H |= (L >> 4) << 8;
885 E |= s->filter.mblim_lut[L] << 8;
886 I |= s->filter.lim_lut[L] << 8;
887 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
888 [!!(vmask[1] & (x << 1))]
889 [1](ptr, ls_y, E, I, H);
890 } else {
891 s->dsp.loop_filter_8[!!(vmask[1] & x)]
892 [1](ptr, ls_y, E, I, H);
893 }
894 } else if (vm & (x << 1)) {
895 int L = l[1], H = L >> 4;
896 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
897
898 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
899 [1](ptr + 8, ls_y, E, I, H);
900 }
901 }
902 if (vm3 & x) {
903 int L = *l, H = L >> 4;
904 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
905
906 if (vm3 & (x << 1)) {
907 L = l[1];
908 H |= (L >> 4) << 8;
909 E |= s->filter.mblim_lut[L] << 8;
910 I |= s->filter.lim_lut[L] << 8;
911 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
912 } else {
913 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
914 }
915 } else if (vm3 & (x << 1)) {
916 int L = l[1], H = L >> 4;
917 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
918
919 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
920 }
921 }
922 }
923
924 // same principle but for U/V planes
925 for (p = 0; p < 2; p++) {
926 lvl = lflvl->level;
927 dst = s->cur_frame->data[1 + p] + uvoff;
928 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
929 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
930 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
931 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
932 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
933
934 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
935 if (col || x > 1) {
936 if (hm1 & x) {
937 int L = *l, H = L >> 4;
938 int E = s->filter.mblim_lut[L];
939 int I = s->filter.lim_lut[L];
940
941 if (hmask1[0] & x) {
942 if (hmask2[0] & x) {
943 av_assert2(l[16] == L);
944 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
945 } else {
946 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
947 }
948 } else if (hm2 & x) {
949 L = l[16];
950 H |= (L >> 4) << 8;
951 E |= s->filter.mblim_lut[L] << 8;
952 I |= s->filter.lim_lut[L] << 8;
953 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
954 [!!(hmask2[1] & x)]
955 [0](ptr, ls_uv, E, I, H);
956 } else {
957 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
958 [0](ptr, ls_uv, E, I, H);
959 }
960 } else if (hm2 & x) {
961 int L = l[16], H = L >> 4;
962 int E = s->filter.mblim_lut[L];
963 int I = s->filter.lim_lut[L];
964
965 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
966 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
967 }
968 }
969 if (x & 0xAA)
970 l += 2;
971 }
972 }
973 lvl = lflvl->level;
974 dst = s->cur_frame->data[1 + p] + uvoff;
975 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
976 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
977 unsigned vm = vmask[0] | vmask[1] | vmask[2];
978
979 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
980 if (row || y) {
981 if (vm & x) {
982 int L = *l, H = L >> 4;
983 int E = s->filter.mblim_lut[L];
984 int I = s->filter.lim_lut[L];
985
986 if (vmask[0] & x) {
987 if (vmask[0] & (x << 2)) {
988 av_assert2(l[2] == L);
989 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
990 } else {
991 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
992 }
993 } else if (vm & (x << 2)) {
994 L = l[2];
995 H |= (L >> 4) << 8;
996 E |= s->filter.mblim_lut[L] << 8;
997 I |= s->filter.lim_lut[L] << 8;
998 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
999 [!!(vmask[1] & (x << 2))]
1000 [1](ptr, ls_uv, E, I, H);
1001 } else {
1002 s->dsp.loop_filter_8[!!(vmask[1] & x)]
1003 [1](ptr, ls_uv, E, I, H);
1004 }
1005 } else if (vm & (x << 2)) {
1006 int L = l[2], H = L >> 4;
1007 int E = s->filter.mblim_lut[L];
1008 int I = s->filter.lim_lut[L];
1009
1010 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
1011 [1](ptr + 8, ls_uv, E, I, H);
1012 }
1013 }
1014 }
1015 if (y & 1)
1016 lvl += 16;
1017 }
1018 }
1019 }
1020
1021 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1022 {
1023 int sb_start = (idx * n) >> log2_n;
1024 int sb_end = ((idx + 1) * n) >> log2_n;
1025 *start = FFMIN(sb_start, n) << 3;
1026 *end = FFMIN(sb_end, n) << 3;
1027 }
1028
1029 static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1030 int *got_frame, const uint8_t *data, int size)
1031 {
1032 VP9Context *s = avctx->priv_data;
1033 int ret, tile_row, tile_col, i, ref = -1, row, col;
1034 ptrdiff_t yoff = 0, uvoff = 0;
1035
1036 ret = decode_frame_header(avctx, data, size, &ref);
1037 if (ret < 0) {
1038 return ret;
1039 } else if (!ret) {
1040 if (!s->refs[ref]->buf[0]) {
1041 av_log(avctx, AV_LOG_ERROR,
1042 "Requested reference %d not available\n", ref);
1043 return AVERROR_INVALIDDATA;
1044 }
1045
1046 ret = av_frame_ref(frame, s->refs[ref]);
1047 if (ret < 0)
1048 return ret;
1049 *got_frame = 1;
1050 return 0;
1051 }
1052 data += ret;
1053 size -= ret;
1054
1055 s->cur_frame = frame;
1056
1057 av_frame_unref(s->cur_frame);
1058 if ((ret = ff_get_buffer(avctx, s->cur_frame,
1059 s->refreshrefmask ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1060 return ret;
1061 s->cur_frame->key_frame = s->keyframe;
1062 s->cur_frame->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
1063 : AV_PICTURE_TYPE_P;
1064
1065 if (s->fullrange)
1066 avctx->color_range = AVCOL_RANGE_JPEG;
1067 else
1068 avctx->color_range = AVCOL_RANGE_MPEG;
1069
1070 switch (s->colorspace) {
1071 case 1: avctx->colorspace = AVCOL_SPC_BT470BG; break;
1072 case 2: avctx->colorspace = AVCOL_SPC_BT709; break;
1073 case 3: avctx->colorspace = AVCOL_SPC_SMPTE170M; break;
1074 case 4: avctx->colorspace = AVCOL_SPC_SMPTE240M; break;
1075 }
1076
1077 // main tile decode loop
1078 memset(s->above_partition_ctx, 0, s->cols);
1079 memset(s->above_skip_ctx, 0, s->cols);
1080 if (s->keyframe || s->intraonly)
1081 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1082 else
1083 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1084 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1085 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
1086 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
1087 memset(s->above_segpred_ctx, 0, s->cols);
1088 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
1089 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
1090 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
1091 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1092 int64_t tile_size;
1093
1094 if (tile_col == s->tiling.tile_cols - 1 &&
1095 tile_row == s->tiling.tile_rows - 1) {
1096 tile_size = size;
1097 } else {
1098 tile_size = AV_RB32(data);
1099 data += 4;
1100 size -= 4;
1101 }
1102 if (tile_size > size)
1103 return AVERROR_INVALIDDATA;
1104 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1105 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
1106 return AVERROR_INVALIDDATA;
1107 data += tile_size;
1108 size -= tile_size;
1109 }
1110
1111 for (row = s->tiling.tile_row_start;
1112 row < s->tiling.tile_row_end;
1113 row += 8, yoff += s->cur_frame->linesize[0] * 64,
1114 uvoff += s->cur_frame->linesize[1] * 32) {
1115 VP9Filter *lflvl = s->lflvl;
1116 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1117
1118 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1119 set_tile_offset(&s->tiling.tile_col_start,
1120 &s->tiling.tile_col_end,
1121 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
1122
1123 memset(s->left_partition_ctx, 0, 8);
1124 memset(s->left_skip_ctx, 0, 8);
1125 if (s->keyframe || s->intraonly)
1126 memset(s->left_mode_ctx, DC_PRED, 16);
1127 else
1128 memset(s->left_mode_ctx, NEARESTMV, 8);
1129 memset(s->left_y_nnz_ctx, 0, 16);
1130 memset(s->left_uv_nnz_ctx, 0, 16);
1131 memset(s->left_segpred_ctx, 0, 8);
1132
1133 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1134 for (col = s->tiling.tile_col_start;
1135 col < s->tiling.tile_col_end;
1136 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
1137 // FIXME integrate with lf code (i.e. zero after each
1138 // use, similar to invtxfm coefficients, or similar)
1139 memset(lflvl->mask, 0, sizeof(lflvl->mask));
1140
1141 if ((ret = decode_subblock(avctx, row, col, lflvl,
1142 yoff2, uvoff2, BL_64X64)) < 0)
1143 return ret;
1144 }
1145 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1146 }
1147
1148 // backup pre-loopfilter reconstruction data for intra
1149 // prediction of next row of sb64s
1150 if (row + 8 < s->rows) {
1151 memcpy(s->intra_pred_data[0],
1152 s->cur_frame->data[0] + yoff +
1153 63 * s->cur_frame->linesize[0],
1154 8 * s->cols);
1155 memcpy(s->intra_pred_data[1],
1156 s->cur_frame->data[1] + uvoff +
1157 31 * s->cur_frame->linesize[1],
1158 4 * s->cols);
1159 memcpy(s->intra_pred_data[2],
1160 s->cur_frame->data[2] + uvoff +
1161 31 * s->cur_frame->linesize[2],
1162 4 * s->cols);
1163 }
1164
1165 // loopfilter one row
1166 if (s->filter.level) {
1167 yoff2 = yoff;
1168 uvoff2 = uvoff;
1169 lflvl = s->lflvl;
1170 for (col = 0; col < s->cols;
1171 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
1172 loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
1173 }
1174 }
1175 }
1176
1177 // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
1178 // probability maintenance between frames)
1179 if (s->refreshctx) {
1180 if (s->parallelmode) {
1181 int j, k, l, m;
1182 for (i = 0; i < 4; i++) {
1183 for (j = 0; j < 2; j++)
1184 for (k = 0; k < 2; k++)
1185 for (l = 0; l < 6; l++)
1186 for (m = 0; m < 6; m++)
1187 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
1188 s->prob.coef[i][j][k][l][m], 3);
1189 if (s->txfmmode == i)
1190 break;
1191 }
1192 s->prob_ctx[s->framectxid].p = s->prob.p;
1193 } else {
1194 ff_vp9_adapt_probs(s);
1195 }
1196 }
1197 FFSWAP(VP9MVRefPair *, s->mv[0], s->mv[1]);
1198
1199 // ref frame setup
1200 for (i = 0; i < 8; i++)
1201 if (s->refreshrefmask & (1 << i)) {
1202 av_frame_unref(s->refs[i]);
1203 ret = av_frame_ref(s->refs[i], s->cur_frame);
1204 if (ret < 0)
1205 return ret;
1206 }
1207
1208 if (s->invisible)
1209 av_frame_unref(s->cur_frame);
1210 else
1211 *got_frame = 1;
1212
1213 return 0;
1214 }
1215
1216 static int vp9_decode_packet(AVCodecContext *avctx, void *frame,
1217 int *got_frame, AVPacket *avpkt)
1218 {
1219 const uint8_t *data = avpkt->data;
1220 int size = avpkt->size;
1221 int marker, ret;
1222
1223 /* Read superframe index - this is a collection of individual frames
1224 * that together lead to one visible frame */
1225 marker = data[size - 1];
1226 if ((marker & 0xe0) == 0xc0) {
1227 int nbytes = 1 + ((marker >> 3) & 0x3);
1228 int n_frames = 1 + (marker & 0x7);
1229 int idx_sz = 2 + n_frames * nbytes;
1230
1231 if (size >= idx_sz && data[size - idx_sz] == marker) {
1232 const uint8_t *idx = data + size + 1 - idx_sz;
1233
1234 while (n_frames--) {
1235 unsigned sz = AV_RL32(idx);
1236
1237 if (nbytes < 4)
1238 sz &= (1 << (8 * nbytes)) - 1;
1239 idx += nbytes;
1240
1241 if (sz > size) {
1242 av_log(avctx, AV_LOG_ERROR,
1243 "Superframe packet size too big: %u > %d\n",
1244 sz, size);
1245 return AVERROR_INVALIDDATA;
1246 }
1247
1248 ret = vp9_decode_frame(avctx, frame, got_frame, data, sz);
1249 if (ret < 0)
1250 return ret;
1251 data += sz;
1252 size -= sz;
1253 }
1254 return avpkt->size;
1255 }
1256 }
1257
1258 /* If we get here, there was no valid superframe index, i.e. this is just
1259 * one whole single frame. Decode it as such from the complete input buf. */
1260 if ((ret = vp9_decode_frame(avctx, frame, got_frame, data, size)) < 0)
1261 return ret;
1262 return size;
1263 }
1264
1265 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1266 {
1267 VP9Context *s = avctx->priv_data;
1268 int i;
1269
1270 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
1271 av_frame_free(&s->refs[i]);
1272
1273 av_freep(&s->c_b);
1274 av_freep(&s->above_partition_ctx);
1275
1276 return 0;
1277 }
1278
1279 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1280 {
1281 VP9Context *s = avctx->priv_data;
1282 int i;
1283
1284 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
1285
1286 ff_vp9dsp_init(&s->dsp);
1287 ff_videodsp_init(&s->vdsp, 8);
1288
1289 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1290 s->refs[i] = av_frame_alloc();
1291 if (!s->refs[i]) {
1292 vp9_decode_free(avctx);
1293 return AVERROR(ENOMEM);
1294 }
1295 }
1296
1297 s->filter.sharpness = -1;
1298
1299 return 0;
1300 }
1301
1302 AVCodec ff_vp9_decoder = {
1303 .name = "vp9",
1304 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1305 .type = AVMEDIA_TYPE_VIDEO,
1306 .id = AV_CODEC_ID_VP9,
1307 .priv_data_size = sizeof(VP9Context),
1308 .init = vp9_decode_init,
1309 .decode = vp9_decode_packet,
1310 .flush = vp9_decode_flush,
1311 .close = vp9_decode_free,
1312 .capabilities = AV_CODEC_CAP_DR1,
1313 };