50b84ae97e6a85482e68f747df38606e0d038d5e
[libav.git] / libavcodec / vp9.c
1 /*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of Libav.
8 *
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "libavutil/avassert.h"
25
26 #include "avcodec.h"
27 #include "get_bits.h"
28 #include "internal.h"
29 #include "videodsp.h"
30 #include "vp56.h"
31 #include "vp9.h"
32 #include "vp9data.h"
33
34 #define VP9_SYNCCODE 0x498342
35 #define MAX_PROB 255
36
37 static void vp9_decode_flush(AVCodecContext *avctx)
38 {
39 VP9Context *s = avctx->priv_data;
40 int i;
41
42 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
43 av_frame_unref(s->refs[i]);
44 }
45
46 static int update_size(AVCodecContext *avctx, int w, int h)
47 {
48 VP9Context *s = avctx->priv_data;
49 uint8_t *p;
50
51 if (s->above_partition_ctx && w == avctx->width && h == avctx->height)
52 return 0;
53
54 vp9_decode_flush(avctx);
55
56 if (w <= 0 || h <= 0)
57 return AVERROR_INVALIDDATA;
58
59 avctx->width = w;
60 avctx->height = h;
61 s->sb_cols = (w + 63) >> 6;
62 s->sb_rows = (h + 63) >> 6;
63 s->cols = (w + 7) >> 3;
64 s->rows = (h + 7) >> 3;
65
66 #define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
67 av_free(s->above_partition_ctx);
68 p = av_malloc(s->sb_cols *
69 (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx) +
70 64 * s->sb_rows * (1 + sizeof(*s->mv[0]) * 2)));
71 if (!p)
72 return AVERROR(ENOMEM);
73 assign(s->above_partition_ctx, uint8_t *, 8);
74 assign(s->above_skip_ctx, uint8_t *, 8);
75 assign(s->above_txfm_ctx, uint8_t *, 8);
76 assign(s->above_mode_ctx, uint8_t *, 16);
77 assign(s->above_y_nnz_ctx, uint8_t *, 16);
78 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
79 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
80 assign(s->intra_pred_data[0], uint8_t *, 64);
81 assign(s->intra_pred_data[1], uint8_t *, 32);
82 assign(s->intra_pred_data[2], uint8_t *, 32);
83 assign(s->above_segpred_ctx, uint8_t *, 8);
84 assign(s->above_intra_ctx, uint8_t *, 8);
85 assign(s->above_comp_ctx, uint8_t *, 8);
86 assign(s->above_ref_ctx, uint8_t *, 8);
87 assign(s->above_filter_ctx, uint8_t *, 8);
88 assign(s->lflvl, VP9Filter *, 1);
89 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
90 assign(s->segmentation_map, uint8_t *, 64 * s->sb_rows);
91 assign(s->mv[0], VP9MVRefPair *, 64 * s->sb_rows);
92 assign(s->mv[1], VP9MVRefPair *, 64 * s->sb_rows);
93 #undef assign
94
95 return 0;
96 }
97
98 // The sign bit is at the end, not the start, of a bit sequence
99 static av_always_inline int get_bits_with_sign(GetBitContext *gb, int n)
100 {
101 int v = get_bits(gb, n);
102 return get_bits1(gb) ? -v : v;
103 }
104
105 static av_always_inline int inv_recenter_nonneg(int v, int m)
106 {
107 if (v > 2 * m)
108 return v;
109 if (v & 1)
110 return m - ((v + 1) >> 1);
111 return m + (v >> 1);
112 }
113
114 // differential forward probability updates
115 static int update_prob(VP56RangeCoder *c, int p)
116 {
117 static const int inv_map_table[MAX_PROB - 1] = {
118 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
119 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
120 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
121 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
122 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
123 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
124 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
125 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
126 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
127 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
128 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
129 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
130 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
131 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
132 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
133 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
134 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
135 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
136 252, 253,
137 };
138 int d;
139
140 /* This code is trying to do a differential probability update. For a
141 * current probability A in the range [1, 255], the difference to a new
142 * probability of any value can be expressed differentially as 1-A, 255-A
143 * where some part of this (absolute range) exists both in positive as
144 * well as the negative part, whereas another part only exists in one
145 * half. We're trying to code this shared part differentially, i.e.
146 * times two where the value of the lowest bit specifies the sign, and
147 * the single part is then coded on top of this. This absolute difference
148 * then again has a value of [0, 254], but a bigger value in this range
149 * indicates that we're further away from the original value A, so we
150 * can code this as a VLC code, since higher values are increasingly
151 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
152 * updates vs. the 'fine, exact' updates further down the range, which
153 * adds one extra dimension to this differential update model. */
154
155 if (!vp8_rac_get(c)) {
156 d = vp8_rac_get_uint(c, 4) + 0;
157 } else if (!vp8_rac_get(c)) {
158 d = vp8_rac_get_uint(c, 4) + 16;
159 } else if (!vp8_rac_get(c)) {
160 d = vp8_rac_get_uint(c, 5) + 32;
161 } else {
162 d = vp8_rac_get_uint(c, 7);
163 if (d >= 65) {
164 d = (d << 1) - 65 + vp8_rac_get(c);
165 d = av_clip(d, 0, MAX_PROB - 65 - 1);
166 }
167 d += 64;
168 }
169
170 return p <= 128
171 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1)
172 : 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
173 }
174
175 static int decode_frame_header(AVCodecContext *avctx,
176 const uint8_t *data, int size, int *ref)
177 {
178 VP9Context *s = avctx->priv_data;
179 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
180 int last_invisible;
181 const uint8_t *data2;
182
183 /* general header */
184 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
185 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
186 return ret;
187 }
188 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
189 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
190 return AVERROR_INVALIDDATA;
191 }
192 s->profile = get_bits1(&s->gb);
193 if (get_bits1(&s->gb)) { // reserved bit
194 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
195 return AVERROR_INVALIDDATA;
196 }
197 if (get_bits1(&s->gb)) {
198 *ref = get_bits(&s->gb, 3);
199 return 0;
200 }
201
202 s->last_keyframe = s->keyframe;
203 s->keyframe = !get_bits1(&s->gb);
204
205 last_invisible = s->invisible;
206 s->invisible = !get_bits1(&s->gb);
207 s->errorres = get_bits1(&s->gb);
208 // FIXME disable this upon resolution change
209 s->use_last_frame_mvs = !s->errorres && !last_invisible;
210
211 if (s->keyframe) {
212 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
213 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
214 return AVERROR_INVALIDDATA;
215 }
216 s->colorspace = get_bits(&s->gb, 3);
217 if (s->colorspace == 7) { // RGB = profile 1
218 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
219 return AVERROR_INVALIDDATA;
220 }
221 s->fullrange = get_bits1(&s->gb);
222 // for profile 1, here follows the subsampling bits
223 s->refreshrefmask = 0xff;
224 w = get_bits(&s->gb, 16) + 1;
225 h = get_bits(&s->gb, 16) + 1;
226 if (get_bits1(&s->gb)) // display size
227 skip_bits(&s->gb, 32);
228 } else {
229 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
230 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
231 if (s->intraonly) {
232 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
233 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
234 return AVERROR_INVALIDDATA;
235 }
236 s->refreshrefmask = get_bits(&s->gb, 8);
237 w = get_bits(&s->gb, 16) + 1;
238 h = get_bits(&s->gb, 16) + 1;
239 if (get_bits1(&s->gb)) // display size
240 skip_bits(&s->gb, 32);
241 } else {
242 s->refreshrefmask = get_bits(&s->gb, 8);
243 s->refidx[0] = get_bits(&s->gb, 3);
244 s->signbias[0] = get_bits1(&s->gb);
245 s->refidx[1] = get_bits(&s->gb, 3);
246 s->signbias[1] = get_bits1(&s->gb);
247 s->refidx[2] = get_bits(&s->gb, 3);
248 s->signbias[2] = get_bits1(&s->gb);
249 if (!s->refs[s->refidx[0]]->buf[0] ||
250 !s->refs[s->refidx[1]]->buf[0] ||
251 !s->refs[s->refidx[2]]->buf[0]) {
252 av_log(avctx, AV_LOG_ERROR,
253 "Not all references are available\n");
254 return AVERROR_INVALIDDATA;
255 }
256 if (get_bits1(&s->gb)) {
257 w = s->refs[s->refidx[0]]->width;
258 h = s->refs[s->refidx[0]]->height;
259 } else if (get_bits1(&s->gb)) {
260 w = s->refs[s->refidx[1]]->width;
261 h = s->refs[s->refidx[1]]->height;
262 } else if (get_bits1(&s->gb)) {
263 w = s->refs[s->refidx[2]]->width;
264 h = s->refs[s->refidx[2]]->height;
265 } else {
266 w = get_bits(&s->gb, 16) + 1;
267 h = get_bits(&s->gb, 16) + 1;
268 }
269 if (get_bits1(&s->gb)) // display size
270 skip_bits(&s->gb, 32);
271 s->highprecisionmvs = get_bits1(&s->gb);
272 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
273 get_bits(&s->gb, 2);
274 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
275 s->signbias[0] != s->signbias[2];
276 if (s->allowcompinter) {
277 if (s->signbias[0] == s->signbias[1]) {
278 s->fixcompref = 2;
279 s->varcompref[0] = 0;
280 s->varcompref[1] = 1;
281 } else if (s->signbias[0] == s->signbias[2]) {
282 s->fixcompref = 1;
283 s->varcompref[0] = 0;
284 s->varcompref[1] = 2;
285 } else {
286 s->fixcompref = 0;
287 s->varcompref[0] = 1;
288 s->varcompref[1] = 2;
289 }
290 }
291 }
292 }
293
294 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
295 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
296 s->framectxid = c = get_bits(&s->gb, 2);
297
298 /* loopfilter header data */
299 s->filter.level = get_bits(&s->gb, 6);
300 sharp = get_bits(&s->gb, 3);
301 /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
302 * keep the old cache values since they are still valid. */
303 if (s->filter.sharpness != sharp)
304 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
305 s->filter.sharpness = sharp;
306 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
307 if (get_bits1(&s->gb)) {
308 for (i = 0; i < 4; i++)
309 if (get_bits1(&s->gb))
310 s->lf_delta.ref[i] = get_bits_with_sign(&s->gb, 6);
311 for (i = 0; i < 2; i++)
312 if (get_bits1(&s->gb))
313 s->lf_delta.mode[i] = get_bits_with_sign(&s->gb, 6);
314 }
315 } else {
316 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
317 }
318
319 /* quantization header data */
320 s->yac_qi = get_bits(&s->gb, 8);
321 s->ydc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
322 s->uvdc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
323 s->uvac_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
324 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
325 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
326
327 /* segmentation header info */
328 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
329 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
330 for (i = 0; i < 7; i++)
331 s->prob.seg[i] = get_bits1(&s->gb) ?
332 get_bits(&s->gb, 8) : 255;
333 if ((s->segmentation.temporal = get_bits1(&s->gb)))
334 for (i = 0; i < 3; i++)
335 s->prob.segpred[i] = get_bits1(&s->gb) ?
336 get_bits(&s->gb, 8) : 255;
337 }
338
339 if (get_bits1(&s->gb)) {
340 s->segmentation.absolute_vals = get_bits1(&s->gb);
341 for (i = 0; i < 8; i++) {
342 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
343 s->segmentation.feat[i].q_val = get_bits_with_sign(&s->gb, 8);
344 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
345 s->segmentation.feat[i].lf_val = get_bits_with_sign(&s->gb, 6);
346 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
347 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
348 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
349 }
350 }
351 } else {
352 s->segmentation.feat[0].q_enabled = 0;
353 s->segmentation.feat[0].lf_enabled = 0;
354 s->segmentation.feat[0].skip_enabled = 0;
355 s->segmentation.feat[0].ref_enabled = 0;
356 }
357
358 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
359 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
360 int qyac, qydc, quvac, quvdc, lflvl, sh;
361
362 if (s->segmentation.feat[i].q_enabled) {
363 if (s->segmentation.absolute_vals)
364 qyac = s->segmentation.feat[i].q_val;
365 else
366 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
367 } else {
368 qyac = s->yac_qi;
369 }
370 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
371 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
372 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
373 qyac = av_clip_uintp2(qyac, 8);
374
375 s->segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[qydc];
376 s->segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[qyac];
377 s->segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[quvdc];
378 s->segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[quvac];
379
380 sh = s->filter.level >= 32;
381 if (s->segmentation.feat[i].lf_enabled) {
382 if (s->segmentation.absolute_vals)
383 lflvl = s->segmentation.feat[i].lf_val;
384 else
385 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
386 } else {
387 lflvl = s->filter.level;
388 }
389 s->segmentation.feat[i].lflvl[0][0] =
390 s->segmentation.feat[i].lflvl[0][1] =
391 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
392 for (j = 1; j < 4; j++) {
393 s->segmentation.feat[i].lflvl[j][0] =
394 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
395 s->lf_delta.mode[0]) << sh), 6);
396 s->segmentation.feat[i].lflvl[j][1] =
397 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
398 s->lf_delta.mode[1]) << sh), 6);
399 }
400 }
401
402 /* tiling info */
403 if ((ret = update_size(avctx, w, h)) < 0) {
404 av_log(avctx, AV_LOG_ERROR,
405 "Failed to initialize decoder for %dx%d\n", w, h);
406 return ret;
407 }
408 for (s->tiling.log2_tile_cols = 0;
409 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
410 s->tiling.log2_tile_cols++) ;
411 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
412 max = FFMAX(0, max - 1);
413 while (max > s->tiling.log2_tile_cols) {
414 if (get_bits1(&s->gb))
415 s->tiling.log2_tile_cols++;
416 else
417 break;
418 }
419 s->tiling.log2_tile_rows = decode012(&s->gb);
420 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
421 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
422 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
423 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
424 sizeof(VP56RangeCoder) *
425 s->tiling.tile_cols);
426 if (!s->c_b) {
427 av_log(avctx, AV_LOG_ERROR,
428 "Ran out of memory during range coder init\n");
429 return AVERROR(ENOMEM);
430 }
431 }
432
433 if (s->keyframe || s->errorres || s->intraonly) {
434 s->prob_ctx[0].p =
435 s->prob_ctx[1].p =
436 s->prob_ctx[2].p =
437 s->prob_ctx[3].p = ff_vp9_default_probs;
438 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
439 sizeof(ff_vp9_default_coef_probs));
440 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
441 sizeof(ff_vp9_default_coef_probs));
442 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
443 sizeof(ff_vp9_default_coef_probs));
444 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
445 sizeof(ff_vp9_default_coef_probs));
446 }
447
448 // next 16 bits is size of the rest of the header (arith-coded)
449 size2 = get_bits(&s->gb, 16);
450 data2 = align_get_bits(&s->gb);
451 if (size2 > size - (data2 - data)) {
452 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
453 return AVERROR_INVALIDDATA;
454 }
455 ff_vp56_init_range_decoder(&s->c, data2, size2);
456 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
457 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
458 return AVERROR_INVALIDDATA;
459 }
460
461 if (s->keyframe || s->intraonly)
462 memset(s->counts.coef, 0,
463 sizeof(s->counts.coef) + sizeof(s->counts.eob));
464 else
465 memset(&s->counts, 0, sizeof(s->counts));
466
467 /* FIXME is it faster to not copy here, but do it down in the fw updates
468 * as explicit copies if the fw update is missing (and skip the copy upon
469 * fw update)? */
470 s->prob.p = s->prob_ctx[c].p;
471
472 // txfm updates
473 if (s->lossless) {
474 s->txfmmode = TX_4X4;
475 } else {
476 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
477 if (s->txfmmode == 3)
478 s->txfmmode += vp8_rac_get(&s->c);
479
480 if (s->txfmmode == TX_SWITCHABLE) {
481 for (i = 0; i < 2; i++)
482 if (vp56_rac_get_prob_branchy(&s->c, 252))
483 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
484 for (i = 0; i < 2; i++)
485 for (j = 0; j < 2; j++)
486 if (vp56_rac_get_prob_branchy(&s->c, 252))
487 s->prob.p.tx16p[i][j] =
488 update_prob(&s->c, s->prob.p.tx16p[i][j]);
489 for (i = 0; i < 2; i++)
490 for (j = 0; j < 3; j++)
491 if (vp56_rac_get_prob_branchy(&s->c, 252))
492 s->prob.p.tx32p[i][j] =
493 update_prob(&s->c, s->prob.p.tx32p[i][j]);
494 }
495 }
496
497 // coef updates
498 for (i = 0; i < 4; i++) {
499 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
500 if (vp8_rac_get(&s->c)) {
501 for (j = 0; j < 2; j++)
502 for (k = 0; k < 2; k++)
503 for (l = 0; l < 6; l++)
504 for (m = 0; m < 6; m++) {
505 uint8_t *p = s->prob.coef[i][j][k][l][m];
506 uint8_t *r = ref[j][k][l][m];
507 if (m >= 3 && l == 0) // dc only has 3 pt
508 break;
509 for (n = 0; n < 3; n++) {
510 if (vp56_rac_get_prob_branchy(&s->c, 252))
511 p[n] = update_prob(&s->c, r[n]);
512 else
513 p[n] = r[n];
514 }
515 p[3] = 0;
516 }
517 } else {
518 for (j = 0; j < 2; j++)
519 for (k = 0; k < 2; k++)
520 for (l = 0; l < 6; l++)
521 for (m = 0; m < 6; m++) {
522 uint8_t *p = s->prob.coef[i][j][k][l][m];
523 uint8_t *r = ref[j][k][l][m];
524 if (m > 3 && l == 0) // dc only has 3 pt
525 break;
526 memcpy(p, r, 3);
527 p[3] = 0;
528 }
529 }
530 if (s->txfmmode == i)
531 break;
532 }
533
534 // mode updates
535 for (i = 0; i < 3; i++)
536 if (vp56_rac_get_prob_branchy(&s->c, 252))
537 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
538 if (!s->keyframe && !s->intraonly) {
539 for (i = 0; i < 7; i++)
540 for (j = 0; j < 3; j++)
541 if (vp56_rac_get_prob_branchy(&s->c, 252))
542 s->prob.p.mv_mode[i][j] =
543 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
544
545 if (s->filtermode == FILTER_SWITCHABLE)
546 for (i = 0; i < 4; i++)
547 for (j = 0; j < 2; j++)
548 if (vp56_rac_get_prob_branchy(&s->c, 252))
549 s->prob.p.filter[i][j] =
550 update_prob(&s->c, s->prob.p.filter[i][j]);
551
552 for (i = 0; i < 4; i++)
553 if (vp56_rac_get_prob_branchy(&s->c, 252))
554 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
555
556 if (s->allowcompinter) {
557 s->comppredmode = vp8_rac_get(&s->c);
558 if (s->comppredmode)
559 s->comppredmode += vp8_rac_get(&s->c);
560 if (s->comppredmode == PRED_SWITCHABLE)
561 for (i = 0; i < 5; i++)
562 if (vp56_rac_get_prob_branchy(&s->c, 252))
563 s->prob.p.comp[i] =
564 update_prob(&s->c, s->prob.p.comp[i]);
565 } else {
566 s->comppredmode = PRED_SINGLEREF;
567 }
568
569 if (s->comppredmode != PRED_COMPREF) {
570 for (i = 0; i < 5; i++) {
571 if (vp56_rac_get_prob_branchy(&s->c, 252))
572 s->prob.p.single_ref[i][0] =
573 update_prob(&s->c, s->prob.p.single_ref[i][0]);
574 if (vp56_rac_get_prob_branchy(&s->c, 252))
575 s->prob.p.single_ref[i][1] =
576 update_prob(&s->c, s->prob.p.single_ref[i][1]);
577 }
578 }
579
580 if (s->comppredmode != PRED_SINGLEREF) {
581 for (i = 0; i < 5; i++)
582 if (vp56_rac_get_prob_branchy(&s->c, 252))
583 s->prob.p.comp_ref[i] =
584 update_prob(&s->c, s->prob.p.comp_ref[i]);
585 }
586
587 for (i = 0; i < 4; i++)
588 for (j = 0; j < 9; j++)
589 if (vp56_rac_get_prob_branchy(&s->c, 252))
590 s->prob.p.y_mode[i][j] =
591 update_prob(&s->c, s->prob.p.y_mode[i][j]);
592
593 for (i = 0; i < 4; i++)
594 for (j = 0; j < 4; j++)
595 for (k = 0; k < 3; k++)
596 if (vp56_rac_get_prob_branchy(&s->c, 252))
597 s->prob.p.partition[3 - i][j][k] =
598 update_prob(&s->c,
599 s->prob.p.partition[3 - i][j][k]);
600
601 // mv fields don't use the update_prob subexp model for some reason
602 for (i = 0; i < 3; i++)
603 if (vp56_rac_get_prob_branchy(&s->c, 252))
604 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
605
606 for (i = 0; i < 2; i++) {
607 if (vp56_rac_get_prob_branchy(&s->c, 252))
608 s->prob.p.mv_comp[i].sign =
609 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
610
611 for (j = 0; j < 10; j++)
612 if (vp56_rac_get_prob_branchy(&s->c, 252))
613 s->prob.p.mv_comp[i].classes[j] =
614 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
615
616 if (vp56_rac_get_prob_branchy(&s->c, 252))
617 s->prob.p.mv_comp[i].class0 =
618 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
619
620 for (j = 0; j < 10; j++)
621 if (vp56_rac_get_prob_branchy(&s->c, 252))
622 s->prob.p.mv_comp[i].bits[j] =
623 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
624 }
625
626 for (i = 0; i < 2; i++) {
627 for (j = 0; j < 2; j++)
628 for (k = 0; k < 3; k++)
629 if (vp56_rac_get_prob_branchy(&s->c, 252))
630 s->prob.p.mv_comp[i].class0_fp[j][k] =
631 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
632
633 for (j = 0; j < 3; j++)
634 if (vp56_rac_get_prob_branchy(&s->c, 252))
635 s->prob.p.mv_comp[i].fp[j] =
636 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
637 }
638
639 if (s->highprecisionmvs) {
640 for (i = 0; i < 2; i++) {
641 if (vp56_rac_get_prob_branchy(&s->c, 252))
642 s->prob.p.mv_comp[i].class0_hp =
643 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
644
645 if (vp56_rac_get_prob_branchy(&s->c, 252))
646 s->prob.p.mv_comp[i].hp =
647 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
648 }
649 }
650 }
651
652 return (data2 - data) + size2;
653 }
654
655 static int decode_subblock(AVCodecContext *avctx, int row, int col,
656 VP9Filter *lflvl,
657 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
658 {
659 VP9Context *s = avctx->priv_data;
660 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
661 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
662 int ret;
663 const uint8_t *p = s->keyframe ? ff_vp9_default_kf_partition_probs[bl][c]
664 : s->prob.p.partition[bl][c];
665 enum BlockPartition bp;
666 ptrdiff_t hbs = 4 >> bl;
667
668 if (bl == BL_8X8) {
669 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
670 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
671 } else if (col + hbs < s->cols) {
672 if (row + hbs < s->rows) {
673 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
674 switch (bp) {
675 case PARTITION_NONE:
676 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
677 bl, bp);
678 break;
679 case PARTITION_H:
680 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
681 bl, bp);
682 if (!ret) {
683 yoff += hbs * 8 * s->cur_frame->linesize[0];
684 uvoff += hbs * 4 * s->cur_frame->linesize[1];
685 ret = ff_vp9_decode_block(avctx, row + hbs, col, lflvl,
686 yoff, uvoff, bl, bp);
687 }
688 break;
689 case PARTITION_V:
690 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
691 bl, bp);
692 if (!ret) {
693 yoff += hbs * 8;
694 uvoff += hbs * 4;
695 ret = ff_vp9_decode_block(avctx, row, col + hbs, lflvl,
696 yoff, uvoff, bl, bp);
697 }
698 break;
699 case PARTITION_SPLIT:
700 ret = decode_subblock(avctx, row, col, lflvl,
701 yoff, uvoff, bl + 1);
702 if (!ret) {
703 ret = decode_subblock(avctx, row, col + hbs, lflvl,
704 yoff + 8 * hbs, uvoff + 4 * hbs,
705 bl + 1);
706 if (!ret) {
707 yoff += hbs * 8 * s->cur_frame->linesize[0];
708 uvoff += hbs * 4 * s->cur_frame->linesize[1];
709 ret = decode_subblock(avctx, row + hbs, col, lflvl,
710 yoff, uvoff, bl + 1);
711 if (!ret) {
712 ret = decode_subblock(avctx, row + hbs, col + hbs,
713 lflvl, yoff + 8 * hbs,
714 uvoff + 4 * hbs, bl + 1);
715 }
716 }
717 }
718 break;
719 default:
720 av_log(avctx, AV_LOG_ERROR, "Unexpected partition %d.", bp);
721 return AVERROR_INVALIDDATA;
722 }
723 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
724 bp = PARTITION_SPLIT;
725 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
726 if (!ret)
727 ret = decode_subblock(avctx, row, col + hbs, lflvl,
728 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
729 } else {
730 bp = PARTITION_H;
731 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
732 bl, bp);
733 }
734 } else if (row + hbs < s->rows) {
735 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
736 bp = PARTITION_SPLIT;
737 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
738 if (!ret) {
739 yoff += hbs * 8 * s->cur_frame->linesize[0];
740 uvoff += hbs * 4 * s->cur_frame->linesize[1];
741 ret = decode_subblock(avctx, row + hbs, col, lflvl,
742 yoff, uvoff, bl + 1);
743 }
744 } else {
745 bp = PARTITION_V;
746 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
747 bl, bp);
748 }
749 } else {
750 bp = PARTITION_SPLIT;
751 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
752 }
753 s->counts.partition[bl][c][bp]++;
754
755 return ret;
756 }
757
758 static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
759 int row, int col,
760 ptrdiff_t yoff, ptrdiff_t uvoff)
761 {
762 VP9Context *s = avctx->priv_data;
763 uint8_t *dst = s->cur_frame->data[0] + yoff, *lvl = lflvl->level;
764 ptrdiff_t ls_y = s->cur_frame->linesize[0], ls_uv = s->cur_frame->linesize[1];
765 int y, x, p;
766
767 /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
768 * if you think of them as acting on a 8x8 block max, we can interleave
769 * each v/h within the single x loop, but that only works if we work on
770 * 8 pixel blocks, and we won't always do that (we want at least 16px
771 * to use SSE2 optimizations, perhaps 32 for AVX2). */
772
773 // filter edges between columns, Y plane (e.g. block1 | block2)
774 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
775 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
776 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
777 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
778 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
779 unsigned hm = hm1 | hm2 | hm13 | hm23;
780
781 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
782 if (hm1 & x) {
783 int L = *l, H = L >> 4;
784 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
785
786 if (col || x > 1) {
787 if (hmask1[0] & x) {
788 if (hmask2[0] & x) {
789 av_assert2(l[8] == L);
790 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
791 } else {
792 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
793 }
794 } else if (hm2 & x) {
795 L = l[8];
796 H |= (L >> 4) << 8;
797 E |= s->filter.mblim_lut[L] << 8;
798 I |= s->filter.lim_lut[L] << 8;
799 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
800 [!!(hmask2[1] & x)]
801 [0](ptr, ls_y, E, I, H);
802 } else {
803 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
804 [0](ptr, ls_y, E, I, H);
805 }
806 }
807 } else if (hm2 & x) {
808 int L = l[8], H = L >> 4;
809 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
810
811 if (col || x > 1) {
812 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
813 [0](ptr + 8 * ls_y, ls_y, E, I, H);
814 }
815 }
816 if (hm13 & x) {
817 int L = *l, H = L >> 4;
818 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
819
820 if (hm23 & x) {
821 L = l[8];
822 H |= (L >> 4) << 8;
823 E |= s->filter.mblim_lut[L] << 8;
824 I |= s->filter.lim_lut[L] << 8;
825 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
826 } else {
827 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
828 }
829 } else if (hm23 & x) {
830 int L = l[8], H = L >> 4;
831 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
832
833 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
834 }
835 }
836 }
837
838 // block1
839 // filter edges between rows, Y plane (e.g. ------)
840 // block2
841 dst = s->cur_frame->data[0] + yoff;
842 lvl = lflvl->level;
843 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
844 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
845 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
846
847 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
848 if (row || y) {
849 if (vm & x) {
850 int L = *l, H = L >> 4;
851 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
852
853 if (vmask[0] & x) {
854 if (vmask[0] & (x << 1)) {
855 av_assert2(l[1] == L);
856 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
857 } else {
858 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
859 }
860 } else if (vm & (x << 1)) {
861 L = l[1];
862 H |= (L >> 4) << 8;
863 E |= s->filter.mblim_lut[L] << 8;
864 I |= s->filter.lim_lut[L] << 8;
865 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
866 [!!(vmask[1] & (x << 1))]
867 [1](ptr, ls_y, E, I, H);
868 } else {
869 s->dsp.loop_filter_8[!!(vmask[1] & x)]
870 [1](ptr, ls_y, E, I, H);
871 }
872 } else if (vm & (x << 1)) {
873 int L = l[1], H = L >> 4;
874 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
875
876 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
877 [1](ptr + 8, ls_y, E, I, H);
878 }
879 }
880 if (vm3 & x) {
881 int L = *l, H = L >> 4;
882 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
883
884 if (vm3 & (x << 1)) {
885 L = l[1];
886 H |= (L >> 4) << 8;
887 E |= s->filter.mblim_lut[L] << 8;
888 I |= s->filter.lim_lut[L] << 8;
889 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
890 } else {
891 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
892 }
893 } else if (vm3 & (x << 1)) {
894 int L = l[1], H = L >> 4;
895 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
896
897 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
898 }
899 }
900 }
901
902 // same principle but for U/V planes
903 for (p = 0; p < 2; p++) {
904 lvl = lflvl->level;
905 dst = s->cur_frame->data[1 + p] + uvoff;
906 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
907 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
908 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
909 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
910 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
911
912 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
913 if (col || x > 1) {
914 if (hm1 & x) {
915 int L = *l, H = L >> 4;
916 int E = s->filter.mblim_lut[L];
917 int I = s->filter.lim_lut[L];
918
919 if (hmask1[0] & x) {
920 if (hmask2[0] & x) {
921 av_assert2(l[16] == L);
922 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
923 } else {
924 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
925 }
926 } else if (hm2 & x) {
927 L = l[16];
928 H |= (L >> 4) << 8;
929 E |= s->filter.mblim_lut[L] << 8;
930 I |= s->filter.lim_lut[L] << 8;
931 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
932 [!!(hmask2[1] & x)]
933 [0](ptr, ls_uv, E, I, H);
934 } else {
935 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
936 [0](ptr, ls_uv, E, I, H);
937 }
938 } else if (hm2 & x) {
939 int L = l[16], H = L >> 4;
940 int E = s->filter.mblim_lut[L];
941 int I = s->filter.lim_lut[L];
942
943 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
944 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
945 }
946 }
947 if (x & 0xAA)
948 l += 2;
949 }
950 }
951 lvl = lflvl->level;
952 dst = s->cur_frame->data[1 + p] + uvoff;
953 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
954 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
955 unsigned vm = vmask[0] | vmask[1] | vmask[2];
956
957 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
958 if (row || y) {
959 if (vm & x) {
960 int L = *l, H = L >> 4;
961 int E = s->filter.mblim_lut[L];
962 int I = s->filter.lim_lut[L];
963
964 if (vmask[0] & x) {
965 if (vmask[0] & (x << 2)) {
966 av_assert2(l[2] == L);
967 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
968 } else {
969 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
970 }
971 } else if (vm & (x << 2)) {
972 L = l[2];
973 H |= (L >> 4) << 8;
974 E |= s->filter.mblim_lut[L] << 8;
975 I |= s->filter.lim_lut[L] << 8;
976 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
977 [!!(vmask[1] & (x << 2))]
978 [1](ptr, ls_uv, E, I, H);
979 } else {
980 s->dsp.loop_filter_8[!!(vmask[1] & x)]
981 [1](ptr, ls_uv, E, I, H);
982 }
983 } else if (vm & (x << 2)) {
984 int L = l[2], H = L >> 4;
985 int E = s->filter.mblim_lut[L];
986 int I = s->filter.lim_lut[L];
987
988 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
989 [1](ptr + 8, ls_uv, E, I, H);
990 }
991 }
992 }
993 if (y & 1)
994 lvl += 16;
995 }
996 }
997 }
998
999 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1000 {
1001 int sb_start = (idx * n) >> log2_n;
1002 int sb_end = ((idx + 1) * n) >> log2_n;
1003 *start = FFMIN(sb_start, n) << 3;
1004 *end = FFMIN(sb_end, n) << 3;
1005 }
1006
1007 static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1008 int *got_frame, const uint8_t *data, int size)
1009 {
1010 VP9Context *s = avctx->priv_data;
1011 int ret, tile_row, tile_col, i, ref = -1, row, col;
1012 ptrdiff_t yoff = 0, uvoff = 0;
1013
1014 ret = decode_frame_header(avctx, data, size, &ref);
1015 if (ret < 0) {
1016 return ret;
1017 } else if (!ret) {
1018 if (!s->refs[ref]->buf[0]) {
1019 av_log(avctx, AV_LOG_ERROR,
1020 "Requested reference %d not available\n", ref);
1021 return AVERROR_INVALIDDATA;
1022 }
1023
1024 ret = av_frame_ref(frame, s->refs[ref]);
1025 if (ret < 0)
1026 return ret;
1027 *got_frame = 1;
1028 return 0;
1029 }
1030 data += ret;
1031 size -= ret;
1032
1033 s->cur_frame = frame;
1034
1035 av_frame_unref(s->cur_frame);
1036 if ((ret = ff_get_buffer(avctx, s->cur_frame,
1037 s->refreshrefmask ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1038 return ret;
1039 s->cur_frame->key_frame = s->keyframe;
1040 s->cur_frame->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
1041 : AV_PICTURE_TYPE_P;
1042
1043 if (s->fullrange)
1044 avctx->color_range = AVCOL_RANGE_JPEG;
1045 else
1046 avctx->color_range = AVCOL_RANGE_MPEG;
1047
1048 switch (s->colorspace) {
1049 case 1: avctx->colorspace = AVCOL_SPC_BT470BG; break;
1050 case 2: avctx->colorspace = AVCOL_SPC_BT709; break;
1051 case 3: avctx->colorspace = AVCOL_SPC_SMPTE170M; break;
1052 case 4: avctx->colorspace = AVCOL_SPC_SMPTE240M; break;
1053 }
1054
1055 // main tile decode loop
1056 memset(s->above_partition_ctx, 0, s->cols);
1057 memset(s->above_skip_ctx, 0, s->cols);
1058 if (s->keyframe || s->intraonly)
1059 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1060 else
1061 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1062 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1063 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
1064 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
1065 memset(s->above_segpred_ctx, 0, s->cols);
1066 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
1067 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
1068 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
1069 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1070 int64_t tile_size;
1071
1072 if (tile_col == s->tiling.tile_cols - 1 &&
1073 tile_row == s->tiling.tile_rows - 1) {
1074 tile_size = size;
1075 } else {
1076 tile_size = AV_RB32(data);
1077 data += 4;
1078 size -= 4;
1079 }
1080 if (tile_size > size)
1081 return AVERROR_INVALIDDATA;
1082 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1083 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
1084 return AVERROR_INVALIDDATA;
1085 data += tile_size;
1086 size -= tile_size;
1087 }
1088
1089 for (row = s->tiling.tile_row_start;
1090 row < s->tiling.tile_row_end;
1091 row += 8, yoff += s->cur_frame->linesize[0] * 64,
1092 uvoff += s->cur_frame->linesize[1] * 32) {
1093 VP9Filter *lflvl = s->lflvl;
1094 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1095
1096 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1097 set_tile_offset(&s->tiling.tile_col_start,
1098 &s->tiling.tile_col_end,
1099 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
1100
1101 memset(s->left_partition_ctx, 0, 8);
1102 memset(s->left_skip_ctx, 0, 8);
1103 if (s->keyframe || s->intraonly)
1104 memset(s->left_mode_ctx, DC_PRED, 16);
1105 else
1106 memset(s->left_mode_ctx, NEARESTMV, 8);
1107 memset(s->left_y_nnz_ctx, 0, 16);
1108 memset(s->left_uv_nnz_ctx, 0, 16);
1109 memset(s->left_segpred_ctx, 0, 8);
1110
1111 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1112 for (col = s->tiling.tile_col_start;
1113 col < s->tiling.tile_col_end;
1114 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
1115 // FIXME integrate with lf code (i.e. zero after each
1116 // use, similar to invtxfm coefficients, or similar)
1117 memset(lflvl->mask, 0, sizeof(lflvl->mask));
1118
1119 if ((ret = decode_subblock(avctx, row, col, lflvl,
1120 yoff2, uvoff2, BL_64X64)) < 0)
1121 return ret;
1122 }
1123 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1124 }
1125
1126 // backup pre-loopfilter reconstruction data for intra
1127 // prediction of next row of sb64s
1128 if (row + 8 < s->rows) {
1129 memcpy(s->intra_pred_data[0],
1130 s->cur_frame->data[0] + yoff +
1131 63 * s->cur_frame->linesize[0],
1132 8 * s->cols);
1133 memcpy(s->intra_pred_data[1],
1134 s->cur_frame->data[1] + uvoff +
1135 31 * s->cur_frame->linesize[1],
1136 4 * s->cols);
1137 memcpy(s->intra_pred_data[2],
1138 s->cur_frame->data[2] + uvoff +
1139 31 * s->cur_frame->linesize[2],
1140 4 * s->cols);
1141 }
1142
1143 // loopfilter one row
1144 if (s->filter.level) {
1145 yoff2 = yoff;
1146 uvoff2 = uvoff;
1147 lflvl = s->lflvl;
1148 for (col = 0; col < s->cols;
1149 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
1150 loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
1151 }
1152 }
1153 }
1154
1155 // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
1156 // probability maintenance between frames)
1157 if (s->refreshctx) {
1158 if (s->parallelmode) {
1159 int j, k, l, m;
1160 for (i = 0; i < 4; i++) {
1161 for (j = 0; j < 2; j++)
1162 for (k = 0; k < 2; k++)
1163 for (l = 0; l < 6; l++)
1164 for (m = 0; m < 6; m++)
1165 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
1166 s->prob.coef[i][j][k][l][m], 3);
1167 if (s->txfmmode == i)
1168 break;
1169 }
1170 s->prob_ctx[s->framectxid].p = s->prob.p;
1171 } else {
1172 ff_vp9_adapt_probs(s);
1173 }
1174 }
1175 FFSWAP(VP9MVRefPair *, s->mv[0], s->mv[1]);
1176
1177 // ref frame setup
1178 for (i = 0; i < 8; i++)
1179 if (s->refreshrefmask & (1 << i)) {
1180 av_frame_unref(s->refs[i]);
1181 ret = av_frame_ref(s->refs[i], s->cur_frame);
1182 if (ret < 0)
1183 return ret;
1184 }
1185
1186 if (s->invisible)
1187 av_frame_unref(s->cur_frame);
1188 else
1189 *got_frame = 1;
1190
1191 return 0;
1192 }
1193
1194 static int vp9_decode_packet(AVCodecContext *avctx, void *frame,
1195 int *got_frame, AVPacket *avpkt)
1196 {
1197 const uint8_t *data = avpkt->data;
1198 int size = avpkt->size;
1199 int marker, ret;
1200
1201 /* Read superframe index - this is a collection of individual frames
1202 * that together lead to one visible frame */
1203 marker = data[size - 1];
1204 if ((marker & 0xe0) == 0xc0) {
1205 int nbytes = 1 + ((marker >> 3) & 0x3);
1206 int n_frames = 1 + (marker & 0x7);
1207 int idx_sz = 2 + n_frames * nbytes;
1208
1209 if (size >= idx_sz && data[size - idx_sz] == marker) {
1210 const uint8_t *idx = data + size + 1 - idx_sz;
1211
1212 while (n_frames--) {
1213 unsigned sz = AV_RL32(idx);
1214
1215 if (nbytes < 4)
1216 sz &= (1 << (8 * nbytes)) - 1;
1217 idx += nbytes;
1218
1219 if (sz > size) {
1220 av_log(avctx, AV_LOG_ERROR,
1221 "Superframe packet size too big: %u > %d\n",
1222 sz, size);
1223 return AVERROR_INVALIDDATA;
1224 }
1225
1226 ret = vp9_decode_frame(avctx, frame, got_frame, data, sz);
1227 if (ret < 0)
1228 return ret;
1229 data += sz;
1230 size -= sz;
1231 }
1232 return size;
1233 }
1234 }
1235
1236 /* If we get here, there was no valid superframe index, i.e. this is just
1237 * one whole single frame. Decode it as such from the complete input buf. */
1238 if ((ret = vp9_decode_frame(avctx, frame, got_frame, data, size)) < 0)
1239 return ret;
1240 return size;
1241 }
1242
1243 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1244 {
1245 VP9Context *s = avctx->priv_data;
1246 int i;
1247
1248 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
1249 av_frame_free(&s->refs[i]);
1250
1251 av_freep(&s->c_b);
1252 av_freep(&s->above_partition_ctx);
1253
1254 return 0;
1255 }
1256
1257 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1258 {
1259 VP9Context *s = avctx->priv_data;
1260 int i;
1261
1262 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
1263
1264 ff_vp9dsp_init(&s->dsp);
1265 ff_videodsp_init(&s->vdsp, 8);
1266
1267 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1268 s->refs[i] = av_frame_alloc();
1269 if (!s->refs[i]) {
1270 vp9_decode_free(avctx);
1271 return AVERROR(ENOMEM);
1272 }
1273 }
1274
1275 s->filter.sharpness = -1;
1276
1277 return 0;
1278 }
1279
1280 AVCodec ff_vp9_decoder = {
1281 .name = "vp9",
1282 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1283 .type = AVMEDIA_TYPE_VIDEO,
1284 .id = AV_CODEC_ID_VP9,
1285 .priv_data_size = sizeof(VP9Context),
1286 .init = vp9_decode_init,
1287 .decode = vp9_decode_packet,
1288 .flush = vp9_decode_flush,
1289 .close = vp9_decode_free,
1290 .capabilities = CODEC_CAP_DR1,
1291 };