lavc: VP9 decoder
[libav.git] / libavcodec / vp9.c
1 /*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of Libav.
8 *
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "libavutil/avassert.h"
25
26 #include "avcodec.h"
27 #include "get_bits.h"
28 #include "internal.h"
29 #include "videodsp.h"
30 #include "vp56.h"
31 #include "vp9.h"
32 #include "vp9data.h"
33
34 #define VP9_SYNCCODE 0x498342
35 #define MAX_PROB 255
36
37 static void vp9_decode_flush(AVCodecContext *avctx)
38 {
39 VP9Context *s = avctx->priv_data;
40 int i;
41
42 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
43 av_frame_unref(s->refs[i]);
44 }
45
46 static int update_size(AVCodecContext *avctx, int w, int h)
47 {
48 VP9Context *s = avctx->priv_data;
49 uint8_t *p;
50
51 if (s->above_partition_ctx && w == avctx->width && h == avctx->height)
52 return 0;
53
54 vp9_decode_flush(avctx);
55
56 if (w <= 0 || h <= 0)
57 return AVERROR_INVALIDDATA;
58
59 avctx->width = w;
60 avctx->height = h;
61 s->sb_cols = (w + 63) >> 6;
62 s->sb_rows = (h + 63) >> 6;
63 s->cols = (w + 7) >> 3;
64 s->rows = (h + 7) >> 3;
65
66 #define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
67 av_free(s->above_partition_ctx);
68 p = av_malloc(s->sb_cols *
69 (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx) +
70 64 * s->sb_rows * (1 + sizeof(*s->mv[0]) * 2)));
71 if (!p)
72 return AVERROR(ENOMEM);
73 assign(s->above_partition_ctx, uint8_t *, 8);
74 assign(s->above_skip_ctx, uint8_t *, 8);
75 assign(s->above_txfm_ctx, uint8_t *, 8);
76 assign(s->above_mode_ctx, uint8_t *, 16);
77 assign(s->above_y_nnz_ctx, uint8_t *, 16);
78 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
79 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
80 assign(s->intra_pred_data[0], uint8_t *, 64);
81 assign(s->intra_pred_data[1], uint8_t *, 32);
82 assign(s->intra_pred_data[2], uint8_t *, 32);
83 assign(s->above_segpred_ctx, uint8_t *, 8);
84 assign(s->above_intra_ctx, uint8_t *, 8);
85 assign(s->above_comp_ctx, uint8_t *, 8);
86 assign(s->above_ref_ctx, uint8_t *, 8);
87 assign(s->above_filter_ctx, uint8_t *, 8);
88 assign(s->lflvl, VP9Filter *, 1);
89 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
90 assign(s->segmentation_map, uint8_t *, 64 * s->sb_rows);
91 assign(s->mv[0], VP9MVRefPair *, 64 * s->sb_rows);
92 assign(s->mv[1], VP9MVRefPair *, 64 * s->sb_rows);
93 #undef assign
94
95 return 0;
96 }
97
98 // The sign bit is at the end, not the start, of a bit sequence
99 static av_always_inline int get_bits_with_sign(GetBitContext *gb, int n)
100 {
101 int v = get_bits(gb, n);
102 return get_bits1(gb) ? -v : v;
103 }
104
105 static av_always_inline int inv_recenter_nonneg(int v, int m)
106 {
107 if (v > 2 * m)
108 return v;
109 if (v & 1)
110 return m - ((v + 1) >> 1);
111 return m + (v >> 1);
112 }
113
114 // differential forward probability updates
115 static int update_prob(VP56RangeCoder *c, int p)
116 {
117 static const int inv_map_table[MAX_PROB - 1] = {
118 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
119 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
120 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
121 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
122 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
123 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
124 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
125 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
126 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
127 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
128 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
129 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
130 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
131 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
132 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
133 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
134 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
135 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
136 252, 253,
137 };
138 int d;
139
140 /* This code is trying to do a differential probability update. For a
141 * current probability A in the range [1, 255], the difference to a new
142 * probability of any value can be expressed differentially as 1-A, 255-A
143 * where some part of this (absolute range) exists both in positive as
144 * well as the negative part, whereas another part only exists in one
145 * half. We're trying to code this shared part differentially, i.e.
146 * times two where the value of the lowest bit specifies the sign, and
147 * the single part is then coded on top of this. This absolute difference
148 * then again has a value of [0, 254], but a bigger value in this range
149 * indicates that we're further away from the original value A, so we
150 * can code this as a VLC code, since higher values are increasingly
151 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
152 * updates vs. the 'fine, exact' updates further down the range, which
153 * adds one extra dimension to this differential update model. */
154
155 if (!vp8_rac_get(c)) {
156 d = vp8_rac_get_uint(c, 4) + 0;
157 } else if (!vp8_rac_get(c)) {
158 d = vp8_rac_get_uint(c, 4) + 16;
159 } else if (!vp8_rac_get(c)) {
160 d = vp8_rac_get_uint(c, 5) + 32;
161 } else {
162 d = vp8_rac_get_uint(c, 7);
163 if (d >= 65) {
164 d = (d << 1) - 65 + vp8_rac_get(c);
165 d = av_clip(d, 0, MAX_PROB - 65 - 1);
166 }
167 d += 64;
168 }
169
170 return p <= 128
171 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1)
172 : 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
173 }
174
175 static int decode_frame_header(AVCodecContext *avctx,
176 const uint8_t *data, int size, int *ref)
177 {
178 VP9Context *s = avctx->priv_data;
179 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
180 int last_invisible;
181 const uint8_t *data2;
182
183 /* general header */
184 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
185 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
186 return ret;
187 }
188 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
189 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
190 return AVERROR_INVALIDDATA;
191 }
192 s->profile = get_bits1(&s->gb);
193 if (get_bits1(&s->gb)) { // reserved bit
194 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
195 return AVERROR_INVALIDDATA;
196 }
197 if (get_bits1(&s->gb)) {
198 *ref = get_bits(&s->gb, 3);
199 return 0;
200 }
201
202 s->last_keyframe = s->keyframe;
203 s->keyframe = !get_bits1(&s->gb);
204
205 last_invisible = s->invisible;
206 s->invisible = !get_bits1(&s->gb);
207 s->errorres = get_bits1(&s->gb);
208 // FIXME disable this upon resolution change
209 s->use_last_frame_mvs = !s->errorres && !last_invisible;
210
211 if (s->keyframe) {
212 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
213 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
214 return AVERROR_INVALIDDATA;
215 }
216 s->colorspace = get_bits(&s->gb, 3);
217 if (s->colorspace == 7) { // RGB = profile 1
218 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
219 return AVERROR_INVALIDDATA;
220 }
221 s->fullrange = get_bits1(&s->gb);
222 // for profile 1, here follows the subsampling bits
223 s->refreshrefmask = 0xff;
224 w = get_bits(&s->gb, 16) + 1;
225 h = get_bits(&s->gb, 16) + 1;
226 if (get_bits1(&s->gb)) // display size
227 skip_bits(&s->gb, 32);
228 } else {
229 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
230 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
231 if (s->intraonly) {
232 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
233 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
234 return AVERROR_INVALIDDATA;
235 }
236 s->refreshrefmask = get_bits(&s->gb, 8);
237 w = get_bits(&s->gb, 16) + 1;
238 h = get_bits(&s->gb, 16) + 1;
239 if (get_bits1(&s->gb)) // display size
240 skip_bits(&s->gb, 32);
241 } else {
242 s->refreshrefmask = get_bits(&s->gb, 8);
243 s->refidx[0] = get_bits(&s->gb, 3);
244 s->signbias[0] = get_bits1(&s->gb);
245 s->refidx[1] = get_bits(&s->gb, 3);
246 s->signbias[1] = get_bits1(&s->gb);
247 s->refidx[2] = get_bits(&s->gb, 3);
248 s->signbias[2] = get_bits1(&s->gb);
249 if (!s->refs[s->refidx[0]]->buf[0] ||
250 !s->refs[s->refidx[1]]->buf[0] ||
251 !s->refs[s->refidx[2]]->buf[0]) {
252 av_log(avctx, AV_LOG_ERROR,
253 "Not all references are available\n");
254 return AVERROR_INVALIDDATA;
255 }
256 if (get_bits1(&s->gb)) {
257 w = s->refs[s->refidx[0]]->width;
258 h = s->refs[s->refidx[0]]->height;
259 } else if (get_bits1(&s->gb)) {
260 w = s->refs[s->refidx[1]]->width;
261 h = s->refs[s->refidx[1]]->height;
262 } else if (get_bits1(&s->gb)) {
263 w = s->refs[s->refidx[2]]->width;
264 h = s->refs[s->refidx[2]]->height;
265 } else {
266 w = get_bits(&s->gb, 16) + 1;
267 h = get_bits(&s->gb, 16) + 1;
268 }
269 if (get_bits1(&s->gb)) // display size
270 skip_bits(&s->gb, 32);
271 s->highprecisionmvs = get_bits1(&s->gb);
272 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
273 get_bits(&s->gb, 2);
274 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
275 s->signbias[0] != s->signbias[2];
276 if (s->allowcompinter) {
277 if (s->signbias[0] == s->signbias[1]) {
278 s->fixcompref = 2;
279 s->varcompref[0] = 0;
280 s->varcompref[1] = 1;
281 } else if (s->signbias[0] == s->signbias[2]) {
282 s->fixcompref = 1;
283 s->varcompref[0] = 0;
284 s->varcompref[1] = 2;
285 } else {
286 s->fixcompref = 0;
287 s->varcompref[0] = 1;
288 s->varcompref[1] = 2;
289 }
290 }
291 }
292 }
293
294 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
295 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
296 s->framectxid = c = get_bits(&s->gb, 2);
297
298 /* loopfilter header data */
299 s->filter.level = get_bits(&s->gb, 6);
300 sharp = get_bits(&s->gb, 3);
301 /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
302 * keep the old cache values since they are still valid. */
303 if (s->filter.sharpness != sharp)
304 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
305 s->filter.sharpness = sharp;
306 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
307 if (get_bits1(&s->gb)) {
308 for (i = 0; i < 4; i++)
309 if (get_bits1(&s->gb))
310 s->lf_delta.ref[i] = get_bits_with_sign(&s->gb, 6);
311 for (i = 0; i < 2; i++)
312 if (get_bits1(&s->gb))
313 s->lf_delta.mode[i] = get_bits_with_sign(&s->gb, 6);
314 }
315 } else {
316 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
317 }
318
319 /* quantization header data */
320 s->yac_qi = get_bits(&s->gb, 8);
321 s->ydc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
322 s->uvdc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
323 s->uvac_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
324 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
325 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
326
327 /* segmentation header info */
328 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
329 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
330 for (i = 0; i < 7; i++)
331 s->prob.seg[i] = get_bits1(&s->gb) ?
332 get_bits(&s->gb, 8) : 255;
333 if ((s->segmentation.temporal = get_bits1(&s->gb)))
334 for (i = 0; i < 3; i++)
335 s->prob.segpred[i] = get_bits1(&s->gb) ?
336 get_bits(&s->gb, 8) : 255;
337 }
338
339 if (get_bits1(&s->gb)) {
340 s->segmentation.absolute_vals = get_bits1(&s->gb);
341 for (i = 0; i < 8; i++) {
342 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
343 s->segmentation.feat[i].q_val = get_bits_with_sign(&s->gb, 8);
344 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
345 s->segmentation.feat[i].lf_val = get_bits_with_sign(&s->gb, 6);
346 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
347 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
348 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
349 }
350 }
351 } else {
352 s->segmentation.feat[0].q_enabled = 0;
353 s->segmentation.feat[0].lf_enabled = 0;
354 s->segmentation.feat[0].skip_enabled = 0;
355 s->segmentation.feat[0].ref_enabled = 0;
356 }
357
358 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
359 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
360 int qyac, qydc, quvac, quvdc, lflvl, sh;
361
362 if (s->segmentation.feat[i].q_enabled) {
363 if (s->segmentation.absolute_vals)
364 qyac = s->segmentation.feat[i].q_val;
365 else
366 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
367 } else {
368 qyac = s->yac_qi;
369 }
370 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
371 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
372 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
373 qyac = av_clip_uintp2(qyac, 8);
374
375 s->segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[qydc];
376 s->segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[qyac];
377 s->segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[quvdc];
378 s->segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[quvac];
379
380 sh = s->filter.level >= 32;
381 if (s->segmentation.feat[i].lf_enabled) {
382 if (s->segmentation.absolute_vals)
383 lflvl = s->segmentation.feat[i].lf_val;
384 else
385 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
386 } else {
387 lflvl = s->filter.level;
388 }
389 s->segmentation.feat[i].lflvl[0][0] =
390 s->segmentation.feat[i].lflvl[0][1] =
391 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
392 for (j = 1; j < 4; j++) {
393 s->segmentation.feat[i].lflvl[j][0] =
394 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
395 s->lf_delta.mode[0]) << sh), 6);
396 s->segmentation.feat[i].lflvl[j][1] =
397 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
398 s->lf_delta.mode[1]) << sh), 6);
399 }
400 }
401
402 /* tiling info */
403 if ((ret = update_size(avctx, w, h)) < 0) {
404 av_log(avctx, AV_LOG_ERROR,
405 "Failed to initialize decoder for %dx%d\n", w, h);
406 return ret;
407 }
408 for (s->tiling.log2_tile_cols = 0;
409 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
410 s->tiling.log2_tile_cols++) ;
411 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
412 max = FFMAX(0, max - 1);
413 while (max > s->tiling.log2_tile_cols) {
414 if (get_bits1(&s->gb))
415 s->tiling.log2_tile_cols++;
416 else
417 break;
418 }
419 s->tiling.log2_tile_rows = decode012(&s->gb);
420 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
421 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
422 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
423 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
424 sizeof(VP56RangeCoder) *
425 s->tiling.tile_cols);
426 if (!s->c_b) {
427 av_log(avctx, AV_LOG_ERROR,
428 "Ran out of memory during range coder init\n");
429 return AVERROR(ENOMEM);
430 }
431 }
432
433 if (s->keyframe || s->errorres || s->intraonly) {
434 s->prob_ctx[0].p =
435 s->prob_ctx[1].p =
436 s->prob_ctx[2].p =
437 s->prob_ctx[3].p = ff_vp9_default_probs;
438 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
439 sizeof(ff_vp9_default_coef_probs));
440 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
441 sizeof(ff_vp9_default_coef_probs));
442 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
443 sizeof(ff_vp9_default_coef_probs));
444 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
445 sizeof(ff_vp9_default_coef_probs));
446 }
447
448 // next 16 bits is size of the rest of the header (arith-coded)
449 size2 = get_bits(&s->gb, 16);
450 data2 = align_get_bits(&s->gb);
451 if (size2 > size - (data2 - data)) {
452 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
453 return AVERROR_INVALIDDATA;
454 }
455 ff_vp56_init_range_decoder(&s->c, data2, size2);
456 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
457 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
458 return AVERROR_INVALIDDATA;
459 }
460
461 if (s->keyframe || s->intraonly)
462 memset(s->counts.coef, 0,
463 sizeof(s->counts.coef) + sizeof(s->counts.eob));
464 else
465 memset(&s->counts, 0, sizeof(s->counts));
466
467 /* FIXME is it faster to not copy here, but do it down in the fw updates
468 * as explicit copies if the fw update is missing (and skip the copy upon
469 * fw update)? */
470 s->prob.p = s->prob_ctx[c].p;
471
472 // txfm updates
473 if (s->lossless) {
474 s->txfmmode = TX_4X4;
475 } else {
476 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
477 if (s->txfmmode == 3)
478 s->txfmmode += vp8_rac_get(&s->c);
479
480 if (s->txfmmode == TX_SWITCHABLE) {
481 for (i = 0; i < 2; i++)
482 if (vp56_rac_get_prob_branchy(&s->c, 252))
483 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
484 for (i = 0; i < 2; i++)
485 for (j = 0; j < 2; j++)
486 if (vp56_rac_get_prob_branchy(&s->c, 252))
487 s->prob.p.tx16p[i][j] =
488 update_prob(&s->c, s->prob.p.tx16p[i][j]);
489 for (i = 0; i < 2; i++)
490 for (j = 0; j < 3; j++)
491 if (vp56_rac_get_prob_branchy(&s->c, 252))
492 s->prob.p.tx32p[i][j] =
493 update_prob(&s->c, s->prob.p.tx32p[i][j]);
494 }
495 }
496
497 // coef updates
498 for (i = 0; i < 4; i++) {
499 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
500 if (vp8_rac_get(&s->c)) {
501 for (j = 0; j < 2; j++)
502 for (k = 0; k < 2; k++)
503 for (l = 0; l < 6; l++)
504 for (m = 0; m < 6; m++) {
505 uint8_t *p = s->prob.coef[i][j][k][l][m];
506 uint8_t *r = ref[j][k][l][m];
507 if (m >= 3 && l == 0) // dc only has 3 pt
508 break;
509 for (n = 0; n < 3; n++) {
510 if (vp56_rac_get_prob_branchy(&s->c, 252))
511 p[n] = update_prob(&s->c, r[n]);
512 else
513 p[n] = r[n];
514 }
515 p[3] = 0;
516 }
517 } else {
518 for (j = 0; j < 2; j++)
519 for (k = 0; k < 2; k++)
520 for (l = 0; l < 6; l++)
521 for (m = 0; m < 6; m++) {
522 uint8_t *p = s->prob.coef[i][j][k][l][m];
523 uint8_t *r = ref[j][k][l][m];
524 if (m > 3 && l == 0) // dc only has 3 pt
525 break;
526 memcpy(p, r, 3);
527 p[3] = 0;
528 }
529 }
530 if (s->txfmmode == i)
531 break;
532 }
533
534 // mode updates
535 for (i = 0; i < 3; i++)
536 if (vp56_rac_get_prob_branchy(&s->c, 252))
537 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
538 if (!s->keyframe && !s->intraonly) {
539 for (i = 0; i < 7; i++)
540 for (j = 0; j < 3; j++)
541 if (vp56_rac_get_prob_branchy(&s->c, 252))
542 s->prob.p.mv_mode[i][j] =
543 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
544
545 if (s->filtermode == FILTER_SWITCHABLE)
546 for (i = 0; i < 4; i++)
547 for (j = 0; j < 2; j++)
548 if (vp56_rac_get_prob_branchy(&s->c, 252))
549 s->prob.p.filter[i][j] =
550 update_prob(&s->c, s->prob.p.filter[i][j]);
551
552 for (i = 0; i < 4; i++)
553 if (vp56_rac_get_prob_branchy(&s->c, 252))
554 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
555
556 if (s->allowcompinter) {
557 s->comppredmode = vp8_rac_get(&s->c);
558 if (s->comppredmode)
559 s->comppredmode += vp8_rac_get(&s->c);
560 if (s->comppredmode == PRED_SWITCHABLE)
561 for (i = 0; i < 5; i++)
562 if (vp56_rac_get_prob_branchy(&s->c, 252))
563 s->prob.p.comp[i] =
564 update_prob(&s->c, s->prob.p.comp[i]);
565 } else {
566 s->comppredmode = PRED_SINGLEREF;
567 }
568
569 if (s->comppredmode != PRED_COMPREF) {
570 for (i = 0; i < 5; i++) {
571 if (vp56_rac_get_prob_branchy(&s->c, 252))
572 s->prob.p.single_ref[i][0] =
573 update_prob(&s->c, s->prob.p.single_ref[i][0]);
574 if (vp56_rac_get_prob_branchy(&s->c, 252))
575 s->prob.p.single_ref[i][1] =
576 update_prob(&s->c, s->prob.p.single_ref[i][1]);
577 }
578 }
579
580 if (s->comppredmode != PRED_SINGLEREF) {
581 for (i = 0; i < 5; i++)
582 if (vp56_rac_get_prob_branchy(&s->c, 252))
583 s->prob.p.comp_ref[i] =
584 update_prob(&s->c, s->prob.p.comp_ref[i]);
585 }
586
587 for (i = 0; i < 4; i++)
588 for (j = 0; j < 9; j++)
589 if (vp56_rac_get_prob_branchy(&s->c, 252))
590 s->prob.p.y_mode[i][j] =
591 update_prob(&s->c, s->prob.p.y_mode[i][j]);
592
593 for (i = 0; i < 4; i++)
594 for (j = 0; j < 4; j++)
595 for (k = 0; k < 3; k++)
596 if (vp56_rac_get_prob_branchy(&s->c, 252))
597 s->prob.p.partition[3 - i][j][k] =
598 update_prob(&s->c,
599 s->prob.p.partition[3 - i][j][k]);
600
601 // mv fields don't use the update_prob subexp model for some reason
602 for (i = 0; i < 3; i++)
603 if (vp56_rac_get_prob_branchy(&s->c, 252))
604 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
605
606 for (i = 0; i < 2; i++) {
607 if (vp56_rac_get_prob_branchy(&s->c, 252))
608 s->prob.p.mv_comp[i].sign =
609 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
610
611 for (j = 0; j < 10; j++)
612 if (vp56_rac_get_prob_branchy(&s->c, 252))
613 s->prob.p.mv_comp[i].classes[j] =
614 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
615
616 if (vp56_rac_get_prob_branchy(&s->c, 252))
617 s->prob.p.mv_comp[i].class0 =
618 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
619
620 for (j = 0; j < 10; j++)
621 if (vp56_rac_get_prob_branchy(&s->c, 252))
622 s->prob.p.mv_comp[i].bits[j] =
623 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
624 }
625
626 for (i = 0; i < 2; i++) {
627 for (j = 0; j < 2; j++)
628 for (k = 0; k < 3; k++)
629 if (vp56_rac_get_prob_branchy(&s->c, 252))
630 s->prob.p.mv_comp[i].class0_fp[j][k] =
631 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
632
633 for (j = 0; j < 3; j++)
634 if (vp56_rac_get_prob_branchy(&s->c, 252))
635 s->prob.p.mv_comp[i].fp[j] =
636 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
637 }
638
639 if (s->highprecisionmvs) {
640 for (i = 0; i < 2; i++) {
641 if (vp56_rac_get_prob_branchy(&s->c, 252))
642 s->prob.p.mv_comp[i].class0_hp =
643 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
644
645 if (vp56_rac_get_prob_branchy(&s->c, 252))
646 s->prob.p.mv_comp[i].hp =
647 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
648 }
649 }
650 }
651
652 return (data2 - data) + size2;
653 }
654
655 static int decode_subblock(AVCodecContext *avctx, int row, int col,
656 VP9Filter *lflvl,
657 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
658 {
659 VP9Context *s = avctx->priv_data;
660 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
661 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
662 int ret;
663 const uint8_t *p = s->keyframe ? ff_vp9_default_kf_partition_probs[bl][c]
664 : s->prob.p.partition[bl][c];
665 enum BlockPartition bp;
666 ptrdiff_t hbs = 4 >> bl;
667
668 if (bl == BL_8X8) {
669 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
670 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
671 } else if (col + hbs < s->cols) {
672 if (row + hbs < s->rows) {
673 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
674 switch (bp) {
675 case PARTITION_NONE:
676 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
677 bl, bp);
678 break;
679 case PARTITION_H:
680 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
681 bl, bp);
682 if (!ret) {
683 yoff += hbs * 8 * s->cur_frame->linesize[0];
684 uvoff += hbs * 4 * s->cur_frame->linesize[1];
685 ret = ff_vp9_decode_block(avctx, row + hbs, col, lflvl,
686 yoff, uvoff, bl, bp);
687 }
688 break;
689 case PARTITION_V:
690 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
691 bl, bp);
692 if (!ret) {
693 yoff += hbs * 8;
694 uvoff += hbs * 4;
695 ret = ff_vp9_decode_block(avctx, row, col + hbs, lflvl,
696 yoff, uvoff, bl, bp);
697 }
698 break;
699 case PARTITION_SPLIT:
700 ret = decode_subblock(avctx, row, col, lflvl,
701 yoff, uvoff, bl + 1);
702 if (!ret) {
703 ret = decode_subblock(avctx, row, col + hbs, lflvl,
704 yoff + 8 * hbs, uvoff + 4 * hbs,
705 bl + 1);
706 if (!ret) {
707 yoff += hbs * 8 * s->cur_frame->linesize[0];
708 uvoff += hbs * 4 * s->cur_frame->linesize[1];
709 ret = decode_subblock(avctx, row + hbs, col, lflvl,
710 yoff, uvoff, bl + 1);
711 if (!ret) {
712 ret = decode_subblock(avctx, row + hbs, col + hbs,
713 lflvl, yoff + 8 * hbs,
714 uvoff + 4 * hbs, bl + 1);
715 }
716 }
717 }
718 break;
719 default:
720 av_log(avctx, AV_LOG_ERROR, "Unexpected partition %d.", bp);
721 return AVERROR_INVALIDDATA;
722 }
723 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
724 bp = PARTITION_SPLIT;
725 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
726 if (!ret)
727 ret = decode_subblock(avctx, row, col + hbs, lflvl,
728 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
729 } else {
730 bp = PARTITION_H;
731 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
732 bl, bp);
733 }
734 } else if (row + hbs < s->rows) {
735 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
736 bp = PARTITION_SPLIT;
737 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
738 if (!ret) {
739 yoff += hbs * 8 * s->cur_frame->linesize[0];
740 uvoff += hbs * 4 * s->cur_frame->linesize[1];
741 ret = decode_subblock(avctx, row + hbs, col, lflvl,
742 yoff, uvoff, bl + 1);
743 }
744 } else {
745 bp = PARTITION_V;
746 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
747 bl, bp);
748 }
749 } else {
750 bp = PARTITION_SPLIT;
751 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
752 }
753 s->counts.partition[bl][c][bp]++;
754
755 return ret;
756 }
757
758 static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
759 int row, int col,
760 ptrdiff_t yoff, ptrdiff_t uvoff)
761 {
762 VP9Context *s = avctx->priv_data;
763 uint8_t *dst = s->cur_frame->data[0] + yoff, *lvl = lflvl->level;
764 ptrdiff_t ls_y = s->cur_frame->linesize[0], ls_uv = s->cur_frame->linesize[1];
765 int y, x, p;
766
767 /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
768 * if you think of them as acting on a 8x8 block max, we can interleave
769 * each v/h within the single x loop, but that only works if we work on
770 * 8 pixel blocks, and we won't always do that (we want at least 16px
771 * to use SSE2 optimizations, perhaps 32 for AVX2). */
772
773 // filter edges between columns, Y plane (e.g. block1 | block2)
774 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
775 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
776 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
777 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
778 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
779 unsigned hm = hm1 | hm2 | hm13 | hm23;
780
781 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
782 if (hm1 & x) {
783 int L = *l, H = L >> 4;
784 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
785
786 if (col || x > 1) {
787 if (hmask1[0] & x) {
788 if (hmask2[0] & x) {
789 av_assert2(l[8] == L);
790 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
791 } else {
792 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
793 }
794 } else if (hm2 & x) {
795 L = l[8];
796 H |= (L >> 4) << 8;
797 E |= s->filter.mblim_lut[L] << 8;
798 I |= s->filter.lim_lut[L] << 8;
799 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
800 [!!(hmask2[1] & x)]
801 [0](ptr, ls_y, E, I, H);
802 } else {
803 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
804 [0](ptr, ls_y, E, I, H);
805 }
806 }
807 } else if (hm2 & x) {
808 int L = l[8], H = L >> 4;
809 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
810
811 if (col || x > 1) {
812 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
813 [0](ptr + 8 * ls_y, ls_y, E, I, H);
814 }
815 }
816 if (hm13 & x) {
817 int L = *l, H = L >> 4;
818 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
819
820 if (hm23 & x) {
821 L = l[8];
822 H |= (L >> 4) << 8;
823 E |= s->filter.mblim_lut[L] << 8;
824 I |= s->filter.lim_lut[L] << 8;
825 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
826 } else {
827 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
828 }
829 } else if (hm23 & x) {
830 int L = l[8], H = L >> 4;
831 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
832
833 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
834 }
835 }
836 }
837
838 // block1
839 // filter edges between rows, Y plane (e.g. ------)
840 // block2
841 dst = s->cur_frame->data[0] + yoff;
842 lvl = lflvl->level;
843 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
844 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
845 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
846
847 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
848 if (row || y) {
849 if (vm & x) {
850 int L = *l, H = L >> 4;
851 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
852
853 if (vmask[0] & x) {
854 if (vmask[0] & (x << 1)) {
855 av_assert2(l[1] == L);
856 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
857 } else {
858 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
859 }
860 } else if (vm & (x << 1)) {
861 L = l[1];
862 H |= (L >> 4) << 8;
863 E |= s->filter.mblim_lut[L] << 8;
864 I |= s->filter.lim_lut[L] << 8;
865 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
866 [!!(vmask[1] & (x << 1))]
867 [1](ptr, ls_y, E, I, H);
868 } else {
869 s->dsp.loop_filter_8[!!(vmask[1] & x)]
870 [1](ptr, ls_y, E, I, H);
871 }
872 } else if (vm & (x << 1)) {
873 int L = l[1], H = L >> 4;
874 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
875
876 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
877 [1](ptr + 8, ls_y, E, I, H);
878 }
879 }
880 if (vm3 & x) {
881 int L = *l, H = L >> 4;
882 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
883
884 if (vm3 & (x << 1)) {
885 L = l[1];
886 H |= (L >> 4) << 8;
887 E |= s->filter.mblim_lut[L] << 8;
888 I |= s->filter.lim_lut[L] << 8;
889 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
890 } else {
891 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
892 }
893 } else if (vm3 & (x << 1)) {
894 int L = l[1], H = L >> 4;
895 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
896
897 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
898 }
899 }
900 }
901
902 // same principle but for U/V planes
903 for (p = 0; p < 2; p++) {
904 lvl = lflvl->level;
905 dst = s->cur_frame->data[1 + p] + uvoff;
906 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
907 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
908 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
909 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
910 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
911
912 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
913 if (col || x > 1) {
914 if (hm1 & x) {
915 int L = *l, H = L >> 4;
916 int E = s->filter.mblim_lut[L];
917 int I = s->filter.lim_lut[L];
918
919 if (hmask1[0] & x) {
920 if (hmask2[0] & x) {
921 av_assert2(l[16] == L);
922 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
923 } else {
924 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
925 }
926 } else if (hm2 & x) {
927 L = l[16];
928 H |= (L >> 4) << 8;
929 E |= s->filter.mblim_lut[L] << 8;
930 I |= s->filter.lim_lut[L] << 8;
931 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
932 [!!(hmask2[1] & x)]
933 [0](ptr, ls_uv, E, I, H);
934 } else {
935 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
936 [0](ptr, ls_uv, E, I, H);
937 }
938 } else if (hm2 & x) {
939 int L = l[16], H = L >> 4;
940 int E = s->filter.mblim_lut[L];
941 int I = s->filter.lim_lut[L];
942
943 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
944 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
945 }
946 }
947 if (x & 0xAA)
948 l += 2;
949 }
950 }
951 lvl = lflvl->level;
952 dst = s->cur_frame->data[1 + p] + uvoff;
953 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
954 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
955 unsigned vm = vmask[0] | vmask[1] | vmask[2];
956
957 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
958 if (row || y) {
959 if (vm & x) {
960 int L = *l, H = L >> 4;
961 int E = s->filter.mblim_lut[L];
962 int I = s->filter.lim_lut[L];
963
964 if (vmask[0] & x) {
965 if (vmask[0] & (x << 2)) {
966 av_assert2(l[2] == L);
967 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
968 } else {
969 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
970 }
971 } else if (vm & (x << 2)) {
972 L = l[2];
973 H |= (L >> 4) << 8;
974 E |= s->filter.mblim_lut[L] << 8;
975 I |= s->filter.lim_lut[L] << 8;
976 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
977 [!!(vmask[1] & (x << 2))]
978 [1](ptr, ls_uv, E, I, H);
979 } else {
980 s->dsp.loop_filter_8[!!(vmask[1] & x)]
981 [1](ptr, ls_uv, E, I, H);
982 }
983 } else if (vm & (x << 2)) {
984 int L = l[2], H = L >> 4;
985 int E = s->filter.mblim_lut[L];
986 int I = s->filter.lim_lut[L];
987
988 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
989 [1](ptr + 8, ls_uv, E, I, H);
990 }
991 }
992 }
993 if (y & 1)
994 lvl += 16;
995 }
996 }
997 }
998
999 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1000 {
1001 int sb_start = (idx * n) >> log2_n;
1002 int sb_end = ((idx + 1) * n) >> log2_n;
1003 *start = FFMIN(sb_start, n) << 3;
1004 *end = FFMIN(sb_end, n) << 3;
1005 }
1006
1007 static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1008 int *got_frame, const uint8_t *data, int size)
1009 {
1010 VP9Context *s = avctx->priv_data;
1011 int ret, tile_row, tile_col, i, ref = -1, row, col;
1012 ptrdiff_t yoff = 0, uvoff = 0;
1013
1014 ret = decode_frame_header(avctx, data, size, &ref);
1015 if (ret < 0) {
1016 return ret;
1017 } else if (!ret) {
1018 if (!s->refs[ref]->buf[0]) {
1019 av_log(avctx, AV_LOG_ERROR,
1020 "Requested reference %d not available\n", ref);
1021 return AVERROR_INVALIDDATA;
1022 }
1023
1024 ret = av_frame_ref(frame, s->refs[ref]);
1025 if (ret < 0)
1026 return ret;
1027 *got_frame = 1;
1028 return 0;
1029 }
1030 data += ret;
1031 size -= ret;
1032
1033 s->cur_frame = frame;
1034
1035 av_frame_unref(s->cur_frame);
1036 if ((ret = ff_get_buffer(avctx, s->cur_frame,
1037 s->refreshrefmask ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1038 return ret;
1039 s->cur_frame->key_frame = s->keyframe;
1040 s->cur_frame->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
1041 : AV_PICTURE_TYPE_P;
1042
1043 // main tile decode loop
1044 memset(s->above_partition_ctx, 0, s->cols);
1045 memset(s->above_skip_ctx, 0, s->cols);
1046 if (s->keyframe || s->intraonly)
1047 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1048 else
1049 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1050 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1051 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
1052 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
1053 memset(s->above_segpred_ctx, 0, s->cols);
1054 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
1055 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
1056 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
1057 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1058 int64_t tile_size;
1059
1060 if (tile_col == s->tiling.tile_cols - 1 &&
1061 tile_row == s->tiling.tile_rows - 1) {
1062 tile_size = size;
1063 } else {
1064 tile_size = AV_RB32(data);
1065 data += 4;
1066 size -= 4;
1067 }
1068 if (tile_size > size)
1069 return AVERROR_INVALIDDATA;
1070 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1071 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
1072 return AVERROR_INVALIDDATA;
1073 data += tile_size;
1074 size -= tile_size;
1075 }
1076
1077 for (row = s->tiling.tile_row_start;
1078 row < s->tiling.tile_row_end;
1079 row += 8, yoff += s->cur_frame->linesize[0] * 64,
1080 uvoff += s->cur_frame->linesize[1] * 32) {
1081 VP9Filter *lflvl = s->lflvl;
1082 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1083
1084 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1085 set_tile_offset(&s->tiling.tile_col_start,
1086 &s->tiling.tile_col_end,
1087 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
1088
1089 memset(s->left_partition_ctx, 0, 8);
1090 memset(s->left_skip_ctx, 0, 8);
1091 if (s->keyframe || s->intraonly)
1092 memset(s->left_mode_ctx, DC_PRED, 16);
1093 else
1094 memset(s->left_mode_ctx, NEARESTMV, 8);
1095 memset(s->left_y_nnz_ctx, 0, 16);
1096 memset(s->left_uv_nnz_ctx, 0, 16);
1097 memset(s->left_segpred_ctx, 0, 8);
1098
1099 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1100 for (col = s->tiling.tile_col_start;
1101 col < s->tiling.tile_col_end;
1102 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
1103 // FIXME integrate with lf code (i.e. zero after each
1104 // use, similar to invtxfm coefficients, or similar)
1105 memset(lflvl->mask, 0, sizeof(lflvl->mask));
1106
1107 if ((ret = decode_subblock(avctx, row, col, lflvl,
1108 yoff2, uvoff2, BL_64X64)) < 0)
1109 return ret;
1110 }
1111 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1112 }
1113
1114 // backup pre-loopfilter reconstruction data for intra
1115 // prediction of next row of sb64s
1116 if (row + 8 < s->rows) {
1117 memcpy(s->intra_pred_data[0],
1118 s->cur_frame->data[0] + yoff +
1119 63 * s->cur_frame->linesize[0],
1120 8 * s->cols);
1121 memcpy(s->intra_pred_data[1],
1122 s->cur_frame->data[1] + uvoff +
1123 31 * s->cur_frame->linesize[1],
1124 4 * s->cols);
1125 memcpy(s->intra_pred_data[2],
1126 s->cur_frame->data[2] + uvoff +
1127 31 * s->cur_frame->linesize[2],
1128 4 * s->cols);
1129 }
1130
1131 // loopfilter one row
1132 if (s->filter.level) {
1133 yoff2 = yoff;
1134 uvoff2 = uvoff;
1135 lflvl = s->lflvl;
1136 for (col = 0; col < s->cols;
1137 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
1138 loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
1139 }
1140 }
1141 }
1142
1143 // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
1144 // probability maintenance between frames)
1145 if (s->refreshctx) {
1146 if (s->parallelmode) {
1147 memcpy(s->prob_ctx[s->framectxid].coef, s->prob.coef,
1148 sizeof(s->prob.coef));
1149 s->prob_ctx[s->framectxid].p = s->prob.p;
1150 } else {
1151 ff_vp9_adapt_probs(s);
1152 }
1153 }
1154 FFSWAP(VP9MVRefPair *, s->mv[0], s->mv[1]);
1155
1156 // ref frame setup
1157 for (i = 0; i < 8; i++)
1158 if (s->refreshrefmask & (1 << i)) {
1159 av_frame_unref(s->refs[i]);
1160 ret = av_frame_ref(s->refs[i], s->cur_frame);
1161 if (ret < 0)
1162 return ret;
1163 }
1164
1165 if (s->invisible)
1166 av_frame_unref(s->cur_frame);
1167 else
1168 *got_frame = 1;
1169
1170 return 0;
1171 }
1172
1173 static int vp9_decode_packet(AVCodecContext *avctx, void *frame,
1174 int *got_frame, AVPacket *avpkt)
1175 {
1176 const uint8_t *data = avpkt->data;
1177 int size = avpkt->size;
1178 int marker, ret;
1179
1180 /* Read superframe index - this is a collection of individual frames
1181 * that together lead to one visible frame */
1182 marker = data[size - 1];
1183 if ((marker & 0xe0) == 0xc0) {
1184 int nbytes = 1 + ((marker >> 3) & 0x3);
1185 int n_frames = 1 + (marker & 0x7);
1186 int idx_sz = 2 + n_frames * nbytes;
1187
1188 if (size >= idx_sz && data[size - idx_sz] == marker) {
1189 const uint8_t *idx = data + size + 1 - idx_sz;
1190
1191 while (n_frames--) {
1192 int sz = AV_RL32(idx);
1193
1194 if (nbytes < 4)
1195 sz &= (1 << (8 * nbytes)) - 1;
1196 idx += nbytes;
1197
1198 if (sz > size) {
1199 av_log(avctx, AV_LOG_ERROR,
1200 "Superframe packet size too big: %d > %d\n",
1201 sz, size);
1202 return AVERROR_INVALIDDATA;
1203 }
1204
1205 ret = vp9_decode_frame(avctx, frame, got_frame, data, sz);
1206 if (ret < 0)
1207 return ret;
1208 data += sz;
1209 size -= sz;
1210 }
1211 return size;
1212 }
1213 }
1214
1215 /* If we get here, there was no valid superframe index, i.e. this is just
1216 * one whole single frame. Decode it as such from the complete input buf. */
1217 if ((ret = vp9_decode_frame(avctx, frame, got_frame, data, size)) < 0)
1218 return ret;
1219 return size;
1220 }
1221
1222 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1223 {
1224 VP9Context *s = avctx->priv_data;
1225 int i;
1226
1227 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
1228 av_frame_free(&s->refs[i]);
1229
1230 av_freep(&s->c_b);
1231 av_freep(&s->above_partition_ctx);
1232
1233 return 0;
1234 }
1235
1236 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1237 {
1238 VP9Context *s = avctx->priv_data;
1239 int i;
1240
1241 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
1242
1243 ff_vp9dsp_init(&s->dsp);
1244 ff_videodsp_init(&s->vdsp, 8);
1245
1246 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1247 s->refs[i] = av_frame_alloc();
1248 if (!s->refs[i]) {
1249 vp9_decode_free(avctx);
1250 return AVERROR(ENOMEM);
1251 }
1252 }
1253
1254 s->filter.sharpness = -1;
1255
1256 return 0;
1257 }
1258
1259 AVCodec ff_vp9_decoder = {
1260 .name = "vp9",
1261 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1262 .type = AVMEDIA_TYPE_VIDEO,
1263 .id = AV_CODEC_ID_VP9,
1264 .priv_data_size = sizeof(VP9Context),
1265 .init = vp9_decode_init,
1266 .decode = vp9_decode_packet,
1267 .flush = vp9_decode_flush,
1268 .close = vp9_decode_free,
1269 .capabilities = CODEC_CAP_DR1,
1270 };