vp9: split last/cur_frame from the reference buffers.
[libav.git] / libavcodec / vp9.c
1 /*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of Libav.
8 *
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "libavutil/avassert.h"
25
26 #include "avcodec.h"
27 #include "get_bits.h"
28 #include "internal.h"
29 #include "videodsp.h"
30 #include "vp56.h"
31 #include "vp9.h"
32 #include "vp9data.h"
33
34 #define VP9_SYNCCODE 0x498342
35 #define MAX_PROB 255
36
37 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
38 {
39 ff_thread_release_buffer(avctx, &f->tf);
40 av_buffer_unref(&f->segmentation_map_buf);
41 av_buffer_unref(&f->mv_buf);
42 f->segmentation_map = NULL;
43 f->mv = NULL;
44 }
45
46 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
47 {
48 VP9Context *s = avctx->priv_data;
49 int ret, sz;
50
51 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
52 if (ret < 0)
53 return ret;
54
55 sz = 64 * s->sb_cols * s->sb_rows;
56 f->segmentation_map_buf = av_buffer_allocz(sz * sizeof(*f->segmentation_map));
57 f->mv_buf = av_buffer_allocz(sz * sizeof(*f->mv));
58 if (!f->segmentation_map_buf || !f->mv_buf) {
59 vp9_frame_unref(avctx, f);
60 return AVERROR(ENOMEM);
61 }
62
63 f->segmentation_map = f->segmentation_map_buf->data;
64 f->mv = (VP9MVRefPair*)f->mv_buf->data;
65
66 if (s->segmentation.enabled && !s->segmentation.update_map &&
67 !s->keyframe && !s->intraonly)
68 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
69
70 return 0;
71 }
72
73 static int vp9_frame_ref(VP9Frame *dst, VP9Frame *src)
74 {
75 int ret;
76
77 dst->segmentation_map_buf = av_buffer_ref(src->segmentation_map_buf);
78 dst->mv_buf = av_buffer_ref(src->mv_buf);
79 if (!dst->segmentation_map_buf || !dst->mv_buf) {
80 ret = AVERROR(ENOMEM);
81 goto fail;
82 }
83
84 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
85 if (ret < 0)
86 goto fail;
87
88 dst->segmentation_map = src->segmentation_map;
89 dst->mv = src->mv;
90
91 return 0;
92 fail:
93 av_buffer_unref(&dst->segmentation_map_buf);
94 av_buffer_unref(&dst->mv_buf);
95 return ret;
96 }
97
98 static void vp9_decode_flush(AVCodecContext *avctx)
99 {
100 VP9Context *s = avctx->priv_data;
101 int i;
102
103 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
104 vp9_frame_unref(avctx, &s->frames[i]);
105
106 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
107 ff_thread_release_buffer(avctx, &s->refs[i]);
108 }
109
110 static int update_size(AVCodecContext *avctx, int w, int h)
111 {
112 VP9Context *s = avctx->priv_data;
113 uint8_t *p;
114
115 if (s->above_partition_ctx && w == avctx->width && h == avctx->height)
116 return 0;
117
118 vp9_decode_flush(avctx);
119
120 if (w <= 0 || h <= 0)
121 return AVERROR_INVALIDDATA;
122
123 avctx->width = w;
124 avctx->height = h;
125 s->sb_cols = (w + 63) >> 6;
126 s->sb_rows = (h + 63) >> 6;
127 s->cols = (w + 7) >> 3;
128 s->rows = (h + 7) >> 3;
129
130 #define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
131 av_free(s->above_partition_ctx);
132 p = av_malloc(s->sb_cols *
133 (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
134 if (!p)
135 return AVERROR(ENOMEM);
136 assign(s->above_partition_ctx, uint8_t *, 8);
137 assign(s->above_skip_ctx, uint8_t *, 8);
138 assign(s->above_txfm_ctx, uint8_t *, 8);
139 assign(s->above_mode_ctx, uint8_t *, 16);
140 assign(s->above_y_nnz_ctx, uint8_t *, 16);
141 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
142 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
143 assign(s->intra_pred_data[0], uint8_t *, 64);
144 assign(s->intra_pred_data[1], uint8_t *, 32);
145 assign(s->intra_pred_data[2], uint8_t *, 32);
146 assign(s->above_segpred_ctx, uint8_t *, 8);
147 assign(s->above_intra_ctx, uint8_t *, 8);
148 assign(s->above_comp_ctx, uint8_t *, 8);
149 assign(s->above_ref_ctx, uint8_t *, 8);
150 assign(s->above_filter_ctx, uint8_t *, 8);
151 assign(s->lflvl, VP9Filter *, 1);
152 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
153 #undef assign
154
155 return 0;
156 }
157
158 // The sign bit is at the end, not the start, of a bit sequence
159 static av_always_inline int get_bits_with_sign(GetBitContext *gb, int n)
160 {
161 int v = get_bits(gb, n);
162 return get_bits1(gb) ? -v : v;
163 }
164
165 static av_always_inline int inv_recenter_nonneg(int v, int m)
166 {
167 if (v > 2 * m)
168 return v;
169 if (v & 1)
170 return m - ((v + 1) >> 1);
171 return m + (v >> 1);
172 }
173
174 // differential forward probability updates
175 static int update_prob(VP56RangeCoder *c, int p)
176 {
177 static const int inv_map_table[MAX_PROB - 1] = {
178 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
179 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
180 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
181 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
182 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
183 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
184 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
185 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
186 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
187 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
188 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
189 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
190 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
191 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
192 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
193 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
194 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
195 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
196 252, 253,
197 };
198 int d;
199
200 /* This code is trying to do a differential probability update. For a
201 * current probability A in the range [1, 255], the difference to a new
202 * probability of any value can be expressed differentially as 1-A, 255-A
203 * where some part of this (absolute range) exists both in positive as
204 * well as the negative part, whereas another part only exists in one
205 * half. We're trying to code this shared part differentially, i.e.
206 * times two where the value of the lowest bit specifies the sign, and
207 * the single part is then coded on top of this. This absolute difference
208 * then again has a value of [0, 254], but a bigger value in this range
209 * indicates that we're further away from the original value A, so we
210 * can code this as a VLC code, since higher values are increasingly
211 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
212 * updates vs. the 'fine, exact' updates further down the range, which
213 * adds one extra dimension to this differential update model. */
214
215 if (!vp8_rac_get(c)) {
216 d = vp8_rac_get_uint(c, 4) + 0;
217 } else if (!vp8_rac_get(c)) {
218 d = vp8_rac_get_uint(c, 4) + 16;
219 } else if (!vp8_rac_get(c)) {
220 d = vp8_rac_get_uint(c, 5) + 32;
221 } else {
222 d = vp8_rac_get_uint(c, 7);
223 if (d >= 65) {
224 d = (d << 1) - 65 + vp8_rac_get(c);
225 d = av_clip(d, 0, MAX_PROB - 65 - 1);
226 }
227 d += 64;
228 }
229
230 return p <= 128
231 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1)
232 : 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
233 }
234
235 static int decode_frame_header(AVCodecContext *avctx,
236 const uint8_t *data, int size, int *ref)
237 {
238 VP9Context *s = avctx->priv_data;
239 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
240 int last_invisible;
241 const uint8_t *data2;
242
243 /* general header */
244 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
245 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
246 return ret;
247 }
248 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
249 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
250 return AVERROR_INVALIDDATA;
251 }
252 s->profile = get_bits1(&s->gb);
253 if (get_bits1(&s->gb)) { // reserved bit
254 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
255 return AVERROR_INVALIDDATA;
256 }
257 if (get_bits1(&s->gb)) {
258 *ref = get_bits(&s->gb, 3);
259 return 0;
260 }
261
262 s->last_keyframe = s->keyframe;
263 s->keyframe = !get_bits1(&s->gb);
264
265 last_invisible = s->invisible;
266 s->invisible = !get_bits1(&s->gb);
267 s->errorres = get_bits1(&s->gb);
268 // FIXME disable this upon resolution change
269 s->use_last_frame_mvs = !s->errorres && !last_invisible;
270
271 if (s->keyframe) {
272 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
273 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
274 return AVERROR_INVALIDDATA;
275 }
276 s->colorspace = get_bits(&s->gb, 3);
277 if (s->colorspace == 7) { // RGB = profile 1
278 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
279 return AVERROR_INVALIDDATA;
280 }
281 s->fullrange = get_bits1(&s->gb);
282
283 // subsampling bits
284 if (s->profile == 1 || s->profile == 3) {
285 s->sub_x = get_bits1(&s->gb);
286 s->sub_y = get_bits1(&s->gb);
287 if (s->sub_x && s->sub_y) {
288 av_log(avctx, AV_LOG_ERROR,
289 "4:2:0 color not supported in profile 1 or 3\n");
290 return AVERROR_INVALIDDATA;
291 }
292 if (get_bits1(&s->gb)) { // reserved bit
293 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
294 return AVERROR_INVALIDDATA;
295 }
296 } else {
297 s->sub_x = s->sub_y = 1;
298 }
299 if (!s->sub_x || !s->sub_y) {
300 avpriv_report_missing_feature(avctx, "Subsampling %d:%d",
301 s->sub_x, s->sub_y);
302 return AVERROR_PATCHWELCOME;
303 }
304
305 s->refreshrefmask = 0xff;
306 w = get_bits(&s->gb, 16) + 1;
307 h = get_bits(&s->gb, 16) + 1;
308 if (get_bits1(&s->gb)) // display size
309 skip_bits(&s->gb, 32);
310 } else {
311 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
312 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
313 if (s->intraonly) {
314 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
315 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
316 return AVERROR_INVALIDDATA;
317 }
318 s->refreshrefmask = get_bits(&s->gb, 8);
319 w = get_bits(&s->gb, 16) + 1;
320 h = get_bits(&s->gb, 16) + 1;
321 if (get_bits1(&s->gb)) // display size
322 skip_bits(&s->gb, 32);
323 } else {
324 s->refreshrefmask = get_bits(&s->gb, 8);
325 s->refidx[0] = get_bits(&s->gb, 3);
326 s->signbias[0] = get_bits1(&s->gb);
327 s->refidx[1] = get_bits(&s->gb, 3);
328 s->signbias[1] = get_bits1(&s->gb);
329 s->refidx[2] = get_bits(&s->gb, 3);
330 s->signbias[2] = get_bits1(&s->gb);
331 if (!s->refs[s->refidx[0]].f->buf[0] ||
332 !s->refs[s->refidx[1]].f->buf[0] ||
333 !s->refs[s->refidx[2]].f->buf[0]) {
334 av_log(avctx, AV_LOG_ERROR,
335 "Not all references are available\n");
336 return AVERROR_INVALIDDATA;
337 }
338 if (get_bits1(&s->gb)) {
339 w = s->refs[s->refidx[0]].f->width;
340 h = s->refs[s->refidx[0]].f->height;
341 } else if (get_bits1(&s->gb)) {
342 w = s->refs[s->refidx[1]].f->width;
343 h = s->refs[s->refidx[1]].f->height;
344 } else if (get_bits1(&s->gb)) {
345 w = s->refs[s->refidx[2]].f->width;
346 h = s->refs[s->refidx[2]].f->height;
347 } else {
348 w = get_bits(&s->gb, 16) + 1;
349 h = get_bits(&s->gb, 16) + 1;
350 }
351 if (get_bits1(&s->gb)) // display size
352 skip_bits(&s->gb, 32);
353 s->highprecisionmvs = get_bits1(&s->gb);
354 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
355 get_bits(&s->gb, 2);
356 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
357 s->signbias[0] != s->signbias[2];
358 if (s->allowcompinter) {
359 if (s->signbias[0] == s->signbias[1]) {
360 s->fixcompref = 2;
361 s->varcompref[0] = 0;
362 s->varcompref[1] = 1;
363 } else if (s->signbias[0] == s->signbias[2]) {
364 s->fixcompref = 1;
365 s->varcompref[0] = 0;
366 s->varcompref[1] = 2;
367 } else {
368 s->fixcompref = 0;
369 s->varcompref[0] = 1;
370 s->varcompref[1] = 2;
371 }
372 }
373 }
374 }
375
376 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
377 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
378 s->framectxid = c = get_bits(&s->gb, 2);
379
380 /* loopfilter header data */
381 s->filter.level = get_bits(&s->gb, 6);
382 sharp = get_bits(&s->gb, 3);
383 /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
384 * keep the old cache values since they are still valid. */
385 if (s->filter.sharpness != sharp)
386 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
387 s->filter.sharpness = sharp;
388 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
389 if (get_bits1(&s->gb)) {
390 for (i = 0; i < 4; i++)
391 if (get_bits1(&s->gb))
392 s->lf_delta.ref[i] = get_bits_with_sign(&s->gb, 6);
393 for (i = 0; i < 2; i++)
394 if (get_bits1(&s->gb))
395 s->lf_delta.mode[i] = get_bits_with_sign(&s->gb, 6);
396 }
397 } else {
398 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
399 }
400
401 /* quantization header data */
402 s->yac_qi = get_bits(&s->gb, 8);
403 s->ydc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
404 s->uvdc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
405 s->uvac_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
406 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
407 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
408
409 /* segmentation header info */
410 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
411 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
412 for (i = 0; i < 7; i++)
413 s->prob.seg[i] = get_bits1(&s->gb) ?
414 get_bits(&s->gb, 8) : 255;
415 if ((s->segmentation.temporal = get_bits1(&s->gb)))
416 for (i = 0; i < 3; i++)
417 s->prob.segpred[i] = get_bits1(&s->gb) ?
418 get_bits(&s->gb, 8) : 255;
419 }
420
421 if (get_bits1(&s->gb)) {
422 s->segmentation.absolute_vals = get_bits1(&s->gb);
423 for (i = 0; i < 8; i++) {
424 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
425 s->segmentation.feat[i].q_val = get_bits_with_sign(&s->gb, 8);
426 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
427 s->segmentation.feat[i].lf_val = get_bits_with_sign(&s->gb, 6);
428 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
429 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
430 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
431 }
432 }
433 } else {
434 s->segmentation.feat[0].q_enabled = 0;
435 s->segmentation.feat[0].lf_enabled = 0;
436 s->segmentation.feat[0].skip_enabled = 0;
437 s->segmentation.feat[0].ref_enabled = 0;
438 }
439
440 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
441 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
442 int qyac, qydc, quvac, quvdc, lflvl, sh;
443
444 if (s->segmentation.feat[i].q_enabled) {
445 if (s->segmentation.absolute_vals)
446 qyac = s->segmentation.feat[i].q_val;
447 else
448 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
449 } else {
450 qyac = s->yac_qi;
451 }
452 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
453 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
454 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
455 qyac = av_clip_uintp2(qyac, 8);
456
457 s->segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[qydc];
458 s->segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[qyac];
459 s->segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[quvdc];
460 s->segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[quvac];
461
462 sh = s->filter.level >= 32;
463 if (s->segmentation.feat[i].lf_enabled) {
464 if (s->segmentation.absolute_vals)
465 lflvl = s->segmentation.feat[i].lf_val;
466 else
467 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
468 } else {
469 lflvl = s->filter.level;
470 }
471 s->segmentation.feat[i].lflvl[0][0] =
472 s->segmentation.feat[i].lflvl[0][1] =
473 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
474 for (j = 1; j < 4; j++) {
475 s->segmentation.feat[i].lflvl[j][0] =
476 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
477 s->lf_delta.mode[0]) << sh), 6);
478 s->segmentation.feat[i].lflvl[j][1] =
479 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
480 s->lf_delta.mode[1]) << sh), 6);
481 }
482 }
483
484 /* tiling info */
485 if ((ret = update_size(avctx, w, h)) < 0) {
486 av_log(avctx, AV_LOG_ERROR,
487 "Failed to initialize decoder for %dx%d\n", w, h);
488 return ret;
489 }
490 for (s->tiling.log2_tile_cols = 0;
491 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
492 s->tiling.log2_tile_cols++) ;
493 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
494 max = FFMAX(0, max - 1);
495 while (max > s->tiling.log2_tile_cols) {
496 if (get_bits1(&s->gb))
497 s->tiling.log2_tile_cols++;
498 else
499 break;
500 }
501 s->tiling.log2_tile_rows = decode012(&s->gb);
502 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
503 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
504 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
505 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
506 sizeof(VP56RangeCoder) *
507 s->tiling.tile_cols);
508 if (!s->c_b) {
509 av_log(avctx, AV_LOG_ERROR,
510 "Ran out of memory during range coder init\n");
511 return AVERROR(ENOMEM);
512 }
513 }
514
515 if (s->keyframe || s->errorres || s->intraonly) {
516 s->prob_ctx[0].p =
517 s->prob_ctx[1].p =
518 s->prob_ctx[2].p =
519 s->prob_ctx[3].p = ff_vp9_default_probs;
520 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
521 sizeof(ff_vp9_default_coef_probs));
522 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
523 sizeof(ff_vp9_default_coef_probs));
524 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
525 sizeof(ff_vp9_default_coef_probs));
526 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
527 sizeof(ff_vp9_default_coef_probs));
528 }
529
530 // next 16 bits is size of the rest of the header (arith-coded)
531 size2 = get_bits(&s->gb, 16);
532 data2 = align_get_bits(&s->gb);
533 if (size2 > size - (data2 - data)) {
534 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
535 return AVERROR_INVALIDDATA;
536 }
537 ff_vp56_init_range_decoder(&s->c, data2, size2);
538 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
539 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
540 return AVERROR_INVALIDDATA;
541 }
542
543 if (s->keyframe || s->intraonly)
544 memset(s->counts.coef, 0,
545 sizeof(s->counts.coef) + sizeof(s->counts.eob));
546 else
547 memset(&s->counts, 0, sizeof(s->counts));
548
549 /* FIXME is it faster to not copy here, but do it down in the fw updates
550 * as explicit copies if the fw update is missing (and skip the copy upon
551 * fw update)? */
552 s->prob.p = s->prob_ctx[c].p;
553
554 // txfm updates
555 if (s->lossless) {
556 s->txfmmode = TX_4X4;
557 } else {
558 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
559 if (s->txfmmode == 3)
560 s->txfmmode += vp8_rac_get(&s->c);
561
562 if (s->txfmmode == TX_SWITCHABLE) {
563 for (i = 0; i < 2; i++)
564 if (vp56_rac_get_prob_branchy(&s->c, 252))
565 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
566 for (i = 0; i < 2; i++)
567 for (j = 0; j < 2; j++)
568 if (vp56_rac_get_prob_branchy(&s->c, 252))
569 s->prob.p.tx16p[i][j] =
570 update_prob(&s->c, s->prob.p.tx16p[i][j]);
571 for (i = 0; i < 2; i++)
572 for (j = 0; j < 3; j++)
573 if (vp56_rac_get_prob_branchy(&s->c, 252))
574 s->prob.p.tx32p[i][j] =
575 update_prob(&s->c, s->prob.p.tx32p[i][j]);
576 }
577 }
578
579 // coef updates
580 for (i = 0; i < 4; i++) {
581 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
582 if (vp8_rac_get(&s->c)) {
583 for (j = 0; j < 2; j++)
584 for (k = 0; k < 2; k++)
585 for (l = 0; l < 6; l++)
586 for (m = 0; m < 6; m++) {
587 uint8_t *p = s->prob.coef[i][j][k][l][m];
588 uint8_t *r = ref[j][k][l][m];
589 if (m >= 3 && l == 0) // dc only has 3 pt
590 break;
591 for (n = 0; n < 3; n++) {
592 if (vp56_rac_get_prob_branchy(&s->c, 252))
593 p[n] = update_prob(&s->c, r[n]);
594 else
595 p[n] = r[n];
596 }
597 p[3] = 0;
598 }
599 } else {
600 for (j = 0; j < 2; j++)
601 for (k = 0; k < 2; k++)
602 for (l = 0; l < 6; l++)
603 for (m = 0; m < 6; m++) {
604 uint8_t *p = s->prob.coef[i][j][k][l][m];
605 uint8_t *r = ref[j][k][l][m];
606 if (m > 3 && l == 0) // dc only has 3 pt
607 break;
608 memcpy(p, r, 3);
609 p[3] = 0;
610 }
611 }
612 if (s->txfmmode == i)
613 break;
614 }
615
616 // mode updates
617 for (i = 0; i < 3; i++)
618 if (vp56_rac_get_prob_branchy(&s->c, 252))
619 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
620 if (!s->keyframe && !s->intraonly) {
621 for (i = 0; i < 7; i++)
622 for (j = 0; j < 3; j++)
623 if (vp56_rac_get_prob_branchy(&s->c, 252))
624 s->prob.p.mv_mode[i][j] =
625 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
626
627 if (s->filtermode == FILTER_SWITCHABLE)
628 for (i = 0; i < 4; i++)
629 for (j = 0; j < 2; j++)
630 if (vp56_rac_get_prob_branchy(&s->c, 252))
631 s->prob.p.filter[i][j] =
632 update_prob(&s->c, s->prob.p.filter[i][j]);
633
634 for (i = 0; i < 4; i++)
635 if (vp56_rac_get_prob_branchy(&s->c, 252))
636 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
637
638 if (s->allowcompinter) {
639 s->comppredmode = vp8_rac_get(&s->c);
640 if (s->comppredmode)
641 s->comppredmode += vp8_rac_get(&s->c);
642 if (s->comppredmode == PRED_SWITCHABLE)
643 for (i = 0; i < 5; i++)
644 if (vp56_rac_get_prob_branchy(&s->c, 252))
645 s->prob.p.comp[i] =
646 update_prob(&s->c, s->prob.p.comp[i]);
647 } else {
648 s->comppredmode = PRED_SINGLEREF;
649 }
650
651 if (s->comppredmode != PRED_COMPREF) {
652 for (i = 0; i < 5; i++) {
653 if (vp56_rac_get_prob_branchy(&s->c, 252))
654 s->prob.p.single_ref[i][0] =
655 update_prob(&s->c, s->prob.p.single_ref[i][0]);
656 if (vp56_rac_get_prob_branchy(&s->c, 252))
657 s->prob.p.single_ref[i][1] =
658 update_prob(&s->c, s->prob.p.single_ref[i][1]);
659 }
660 }
661
662 if (s->comppredmode != PRED_SINGLEREF) {
663 for (i = 0; i < 5; i++)
664 if (vp56_rac_get_prob_branchy(&s->c, 252))
665 s->prob.p.comp_ref[i] =
666 update_prob(&s->c, s->prob.p.comp_ref[i]);
667 }
668
669 for (i = 0; i < 4; i++)
670 for (j = 0; j < 9; j++)
671 if (vp56_rac_get_prob_branchy(&s->c, 252))
672 s->prob.p.y_mode[i][j] =
673 update_prob(&s->c, s->prob.p.y_mode[i][j]);
674
675 for (i = 0; i < 4; i++)
676 for (j = 0; j < 4; j++)
677 for (k = 0; k < 3; k++)
678 if (vp56_rac_get_prob_branchy(&s->c, 252))
679 s->prob.p.partition[3 - i][j][k] =
680 update_prob(&s->c,
681 s->prob.p.partition[3 - i][j][k]);
682
683 // mv fields don't use the update_prob subexp model for some reason
684 for (i = 0; i < 3; i++)
685 if (vp56_rac_get_prob_branchy(&s->c, 252))
686 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
687
688 for (i = 0; i < 2; i++) {
689 if (vp56_rac_get_prob_branchy(&s->c, 252))
690 s->prob.p.mv_comp[i].sign =
691 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
692
693 for (j = 0; j < 10; j++)
694 if (vp56_rac_get_prob_branchy(&s->c, 252))
695 s->prob.p.mv_comp[i].classes[j] =
696 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
697
698 if (vp56_rac_get_prob_branchy(&s->c, 252))
699 s->prob.p.mv_comp[i].class0 =
700 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
701
702 for (j = 0; j < 10; j++)
703 if (vp56_rac_get_prob_branchy(&s->c, 252))
704 s->prob.p.mv_comp[i].bits[j] =
705 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
706 }
707
708 for (i = 0; i < 2; i++) {
709 for (j = 0; j < 2; j++)
710 for (k = 0; k < 3; k++)
711 if (vp56_rac_get_prob_branchy(&s->c, 252))
712 s->prob.p.mv_comp[i].class0_fp[j][k] =
713 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
714
715 for (j = 0; j < 3; j++)
716 if (vp56_rac_get_prob_branchy(&s->c, 252))
717 s->prob.p.mv_comp[i].fp[j] =
718 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
719 }
720
721 if (s->highprecisionmvs) {
722 for (i = 0; i < 2; i++) {
723 if (vp56_rac_get_prob_branchy(&s->c, 252))
724 s->prob.p.mv_comp[i].class0_hp =
725 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
726
727 if (vp56_rac_get_prob_branchy(&s->c, 252))
728 s->prob.p.mv_comp[i].hp =
729 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
730 }
731 }
732 }
733
734 return (data2 - data) + size2;
735 }
736
737 static int decode_subblock(AVCodecContext *avctx, int row, int col,
738 VP9Filter *lflvl,
739 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
740 {
741 VP9Context *s = avctx->priv_data;
742 AVFrame *f = s->frames[CUR_FRAME].tf.f;
743 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
744 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
745 int ret;
746 const uint8_t *p = s->keyframe ? ff_vp9_default_kf_partition_probs[bl][c]
747 : s->prob.p.partition[bl][c];
748 enum BlockPartition bp;
749 ptrdiff_t hbs = 4 >> bl;
750
751 if (bl == BL_8X8) {
752 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
753 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
754 } else if (col + hbs < s->cols) {
755 if (row + hbs < s->rows) {
756 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
757 switch (bp) {
758 case PARTITION_NONE:
759 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
760 bl, bp);
761 break;
762 case PARTITION_H:
763 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
764 bl, bp);
765 if (!ret) {
766 yoff += hbs * 8 * f->linesize[0];
767 uvoff += hbs * 4 * f->linesize[1];
768 ret = ff_vp9_decode_block(avctx, row + hbs, col, lflvl,
769 yoff, uvoff, bl, bp);
770 }
771 break;
772 case PARTITION_V:
773 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
774 bl, bp);
775 if (!ret) {
776 yoff += hbs * 8;
777 uvoff += hbs * 4;
778 ret = ff_vp9_decode_block(avctx, row, col + hbs, lflvl,
779 yoff, uvoff, bl, bp);
780 }
781 break;
782 case PARTITION_SPLIT:
783 ret = decode_subblock(avctx, row, col, lflvl,
784 yoff, uvoff, bl + 1);
785 if (!ret) {
786 ret = decode_subblock(avctx, row, col + hbs, lflvl,
787 yoff + 8 * hbs, uvoff + 4 * hbs,
788 bl + 1);
789 if (!ret) {
790 yoff += hbs * 8 * f->linesize[0];
791 uvoff += hbs * 4 * f->linesize[1];
792 ret = decode_subblock(avctx, row + hbs, col, lflvl,
793 yoff, uvoff, bl + 1);
794 if (!ret) {
795 ret = decode_subblock(avctx, row + hbs, col + hbs,
796 lflvl, yoff + 8 * hbs,
797 uvoff + 4 * hbs, bl + 1);
798 }
799 }
800 }
801 break;
802 default:
803 av_log(avctx, AV_LOG_ERROR, "Unexpected partition %d.", bp);
804 return AVERROR_INVALIDDATA;
805 }
806 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
807 bp = PARTITION_SPLIT;
808 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
809 if (!ret)
810 ret = decode_subblock(avctx, row, col + hbs, lflvl,
811 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
812 } else {
813 bp = PARTITION_H;
814 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
815 bl, bp);
816 }
817 } else if (row + hbs < s->rows) {
818 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
819 bp = PARTITION_SPLIT;
820 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
821 if (!ret) {
822 yoff += hbs * 8 * f->linesize[0];
823 uvoff += hbs * 4 * f->linesize[1];
824 ret = decode_subblock(avctx, row + hbs, col, lflvl,
825 yoff, uvoff, bl + 1);
826 }
827 } else {
828 bp = PARTITION_V;
829 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
830 bl, bp);
831 }
832 } else {
833 bp = PARTITION_SPLIT;
834 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
835 }
836 s->counts.partition[bl][c][bp]++;
837
838 return ret;
839 }
840
841 static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
842 int row, int col,
843 ptrdiff_t yoff, ptrdiff_t uvoff)
844 {
845 VP9Context *s = avctx->priv_data;
846 AVFrame *f = s->frames[CUR_FRAME].tf.f;
847 uint8_t *dst = f->data[0] + yoff;
848 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
849 uint8_t *lvl = lflvl->level;
850 int y, x, p;
851
852 /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
853 * if you think of them as acting on a 8x8 block max, we can interleave
854 * each v/h within the single x loop, but that only works if we work on
855 * 8 pixel blocks, and we won't always do that (we want at least 16px
856 * to use SSE2 optimizations, perhaps 32 for AVX2). */
857
858 // filter edges between columns, Y plane (e.g. block1 | block2)
859 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
860 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
861 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
862 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
863 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
864 unsigned hm = hm1 | hm2 | hm13 | hm23;
865
866 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
867 if (hm1 & x) {
868 int L = *l, H = L >> 4;
869 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
870
871 if (col || x > 1) {
872 if (hmask1[0] & x) {
873 if (hmask2[0] & x) {
874 av_assert2(l[8] == L);
875 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
876 } else {
877 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
878 }
879 } else if (hm2 & x) {
880 L = l[8];
881 H |= (L >> 4) << 8;
882 E |= s->filter.mblim_lut[L] << 8;
883 I |= s->filter.lim_lut[L] << 8;
884 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
885 [!!(hmask2[1] & x)]
886 [0](ptr, ls_y, E, I, H);
887 } else {
888 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
889 [0](ptr, ls_y, E, I, H);
890 }
891 }
892 } else if (hm2 & x) {
893 int L = l[8], H = L >> 4;
894 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
895
896 if (col || x > 1) {
897 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
898 [0](ptr + 8 * ls_y, ls_y, E, I, H);
899 }
900 }
901 if (hm13 & x) {
902 int L = *l, H = L >> 4;
903 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
904
905 if (hm23 & x) {
906 L = l[8];
907 H |= (L >> 4) << 8;
908 E |= s->filter.mblim_lut[L] << 8;
909 I |= s->filter.lim_lut[L] << 8;
910 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
911 } else {
912 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
913 }
914 } else if (hm23 & x) {
915 int L = l[8], H = L >> 4;
916 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
917
918 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
919 }
920 }
921 }
922
923 // block1
924 // filter edges between rows, Y plane (e.g. ------)
925 // block2
926 dst = f->data[0] + yoff;
927 lvl = lflvl->level;
928 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
929 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
930 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
931
932 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
933 if (row || y) {
934 if (vm & x) {
935 int L = *l, H = L >> 4;
936 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
937
938 if (vmask[0] & x) {
939 if (vmask[0] & (x << 1)) {
940 av_assert2(l[1] == L);
941 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
942 } else {
943 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
944 }
945 } else if (vm & (x << 1)) {
946 L = l[1];
947 H |= (L >> 4) << 8;
948 E |= s->filter.mblim_lut[L] << 8;
949 I |= s->filter.lim_lut[L] << 8;
950 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
951 [!!(vmask[1] & (x << 1))]
952 [1](ptr, ls_y, E, I, H);
953 } else {
954 s->dsp.loop_filter_8[!!(vmask[1] & x)]
955 [1](ptr, ls_y, E, I, H);
956 }
957 } else if (vm & (x << 1)) {
958 int L = l[1], H = L >> 4;
959 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
960
961 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
962 [1](ptr + 8, ls_y, E, I, H);
963 }
964 }
965 if (vm3 & x) {
966 int L = *l, H = L >> 4;
967 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
968
969 if (vm3 & (x << 1)) {
970 L = l[1];
971 H |= (L >> 4) << 8;
972 E |= s->filter.mblim_lut[L] << 8;
973 I |= s->filter.lim_lut[L] << 8;
974 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
975 } else {
976 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
977 }
978 } else if (vm3 & (x << 1)) {
979 int L = l[1], H = L >> 4;
980 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
981
982 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
983 }
984 }
985 }
986
987 // same principle but for U/V planes
988 for (p = 0; p < 2; p++) {
989 lvl = lflvl->level;
990 dst = f->data[1 + p] + uvoff;
991 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
992 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
993 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
994 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
995 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
996
997 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
998 if (col || x > 1) {
999 if (hm1 & x) {
1000 int L = *l, H = L >> 4;
1001 int E = s->filter.mblim_lut[L];
1002 int I = s->filter.lim_lut[L];
1003
1004 if (hmask1[0] & x) {
1005 if (hmask2[0] & x) {
1006 av_assert2(l[16] == L);
1007 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
1008 } else {
1009 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
1010 }
1011 } else if (hm2 & x) {
1012 L = l[16];
1013 H |= (L >> 4) << 8;
1014 E |= s->filter.mblim_lut[L] << 8;
1015 I |= s->filter.lim_lut[L] << 8;
1016 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
1017 [!!(hmask2[1] & x)]
1018 [0](ptr, ls_uv, E, I, H);
1019 } else {
1020 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
1021 [0](ptr, ls_uv, E, I, H);
1022 }
1023 } else if (hm2 & x) {
1024 int L = l[16], H = L >> 4;
1025 int E = s->filter.mblim_lut[L];
1026 int I = s->filter.lim_lut[L];
1027
1028 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
1029 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
1030 }
1031 }
1032 if (x & 0xAA)
1033 l += 2;
1034 }
1035 }
1036 lvl = lflvl->level;
1037 dst = f->data[1 + p] + uvoff;
1038 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
1039 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
1040 unsigned vm = vmask[0] | vmask[1] | vmask[2];
1041
1042 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
1043 if (row || y) {
1044 if (vm & x) {
1045 int L = *l, H = L >> 4;
1046 int E = s->filter.mblim_lut[L];
1047 int I = s->filter.lim_lut[L];
1048
1049 if (vmask[0] & x) {
1050 if (vmask[0] & (x << 2)) {
1051 av_assert2(l[2] == L);
1052 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
1053 } else {
1054 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
1055 }
1056 } else if (vm & (x << 2)) {
1057 L = l[2];
1058 H |= (L >> 4) << 8;
1059 E |= s->filter.mblim_lut[L] << 8;
1060 I |= s->filter.lim_lut[L] << 8;
1061 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
1062 [!!(vmask[1] & (x << 2))]
1063 [1](ptr, ls_uv, E, I, H);
1064 } else {
1065 s->dsp.loop_filter_8[!!(vmask[1] & x)]
1066 [1](ptr, ls_uv, E, I, H);
1067 }
1068 } else if (vm & (x << 2)) {
1069 int L = l[2], H = L >> 4;
1070 int E = s->filter.mblim_lut[L];
1071 int I = s->filter.lim_lut[L];
1072
1073 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
1074 [1](ptr + 8, ls_uv, E, I, H);
1075 }
1076 }
1077 }
1078 if (y & 1)
1079 lvl += 16;
1080 }
1081 }
1082 }
1083
1084 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1085 {
1086 int sb_start = (idx * n) >> log2_n;
1087 int sb_end = ((idx + 1) * n) >> log2_n;
1088 *start = FFMIN(sb_start, n) << 3;
1089 *end = FFMIN(sb_end, n) << 3;
1090 }
1091
1092 static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1093 int *got_frame, const uint8_t *data, int size)
1094 {
1095 VP9Context *s = avctx->priv_data;
1096 AVFrame *f;
1097 int ret, tile_row, tile_col, i, ref = -1, row, col;
1098 ptrdiff_t yoff = 0, uvoff = 0;
1099
1100 ret = decode_frame_header(avctx, data, size, &ref);
1101 if (ret < 0) {
1102 return ret;
1103 } else if (!ret) {
1104 if (!s->refs[ref].f->buf[0]) {
1105 av_log(avctx, AV_LOG_ERROR,
1106 "Requested reference %d not available\n", ref);
1107 return AVERROR_INVALIDDATA;
1108 }
1109
1110 ret = av_frame_ref(frame, s->refs[ref].f);
1111 if (ret < 0)
1112 return ret;
1113 *got_frame = 1;
1114 return 0;
1115 }
1116 data += ret;
1117 size -= ret;
1118
1119 vp9_frame_unref(avctx, &s->frames[LAST_FRAME]);
1120 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->buf[0]) {
1121 ret = vp9_frame_ref(&s->frames[LAST_FRAME], &s->frames[CUR_FRAME]);
1122 if (ret < 0)
1123 return ret;
1124 }
1125
1126 vp9_frame_unref(avctx, &s->frames[CUR_FRAME]);
1127 ret = vp9_frame_alloc(avctx, &s->frames[CUR_FRAME]);
1128 if (ret < 0)
1129 return ret;
1130
1131 f = s->frames[CUR_FRAME].tf.f;
1132 f->key_frame = s->keyframe;
1133 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1134
1135 if (s->fullrange)
1136 avctx->color_range = AVCOL_RANGE_JPEG;
1137 else
1138 avctx->color_range = AVCOL_RANGE_MPEG;
1139
1140 switch (s->colorspace) {
1141 case 1: avctx->colorspace = AVCOL_SPC_BT470BG; break;
1142 case 2: avctx->colorspace = AVCOL_SPC_BT709; break;
1143 case 3: avctx->colorspace = AVCOL_SPC_SMPTE170M; break;
1144 case 4: avctx->colorspace = AVCOL_SPC_SMPTE240M; break;
1145 }
1146
1147 // main tile decode loop
1148 memset(s->above_partition_ctx, 0, s->cols);
1149 memset(s->above_skip_ctx, 0, s->cols);
1150 if (s->keyframe || s->intraonly)
1151 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1152 else
1153 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1154 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1155 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
1156 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
1157 memset(s->above_segpred_ctx, 0, s->cols);
1158 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
1159 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
1160 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
1161 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1162 int64_t tile_size;
1163
1164 if (tile_col == s->tiling.tile_cols - 1 &&
1165 tile_row == s->tiling.tile_rows - 1) {
1166 tile_size = size;
1167 } else {
1168 tile_size = AV_RB32(data);
1169 data += 4;
1170 size -= 4;
1171 }
1172 if (tile_size > size)
1173 return AVERROR_INVALIDDATA;
1174 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1175 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
1176 return AVERROR_INVALIDDATA;
1177 data += tile_size;
1178 size -= tile_size;
1179 }
1180
1181 for (row = s->tiling.tile_row_start;
1182 row < s->tiling.tile_row_end;
1183 row += 8, yoff += f->linesize[0] * 64,
1184 uvoff += f->linesize[1] * 32) {
1185 VP9Filter *lflvl = s->lflvl;
1186 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1187
1188 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1189 set_tile_offset(&s->tiling.tile_col_start,
1190 &s->tiling.tile_col_end,
1191 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
1192
1193 memset(s->left_partition_ctx, 0, 8);
1194 memset(s->left_skip_ctx, 0, 8);
1195 if (s->keyframe || s->intraonly)
1196 memset(s->left_mode_ctx, DC_PRED, 16);
1197 else
1198 memset(s->left_mode_ctx, NEARESTMV, 8);
1199 memset(s->left_y_nnz_ctx, 0, 16);
1200 memset(s->left_uv_nnz_ctx, 0, 16);
1201 memset(s->left_segpred_ctx, 0, 8);
1202
1203 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1204 for (col = s->tiling.tile_col_start;
1205 col < s->tiling.tile_col_end;
1206 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
1207 // FIXME integrate with lf code (i.e. zero after each
1208 // use, similar to invtxfm coefficients, or similar)
1209 memset(lflvl->mask, 0, sizeof(lflvl->mask));
1210
1211 if ((ret = decode_subblock(avctx, row, col, lflvl,
1212 yoff2, uvoff2, BL_64X64)) < 0)
1213 return ret;
1214 }
1215 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1216 }
1217
1218 // backup pre-loopfilter reconstruction data for intra
1219 // prediction of next row of sb64s
1220 if (row + 8 < s->rows) {
1221 memcpy(s->intra_pred_data[0],
1222 f->data[0] + yoff +
1223 63 * f->linesize[0],
1224 8 * s->cols);
1225 memcpy(s->intra_pred_data[1],
1226 f->data[1] + uvoff +
1227 31 * f->linesize[1],
1228 4 * s->cols);
1229 memcpy(s->intra_pred_data[2],
1230 f->data[2] + uvoff +
1231 31 * f->linesize[2],
1232 4 * s->cols);
1233 }
1234
1235 // loopfilter one row
1236 if (s->filter.level) {
1237 yoff2 = yoff;
1238 uvoff2 = uvoff;
1239 lflvl = s->lflvl;
1240 for (col = 0; col < s->cols;
1241 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
1242 loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
1243 }
1244 }
1245 }
1246
1247 // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
1248 // probability maintenance between frames)
1249 if (s->refreshctx) {
1250 if (s->parallelmode) {
1251 int j, k, l, m;
1252 for (i = 0; i < 4; i++) {
1253 for (j = 0; j < 2; j++)
1254 for (k = 0; k < 2; k++)
1255 for (l = 0; l < 6; l++)
1256 for (m = 0; m < 6; m++)
1257 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
1258 s->prob.coef[i][j][k][l][m], 3);
1259 if (s->txfmmode == i)
1260 break;
1261 }
1262 s->prob_ctx[s->framectxid].p = s->prob.p;
1263 } else {
1264 ff_vp9_adapt_probs(s);
1265 }
1266 }
1267
1268 // ref frame setup
1269 for (i = 0; i < 8; i++)
1270 if (s->refreshrefmask & (1 << i)) {
1271 ff_thread_release_buffer(avctx, &s->refs[i]);
1272 ret = ff_thread_ref_frame(&s->refs[i], &s->frames[CUR_FRAME].tf);
1273 if (ret < 0)
1274 return ret;
1275 }
1276
1277 if (!s->invisible) {
1278 av_frame_unref(frame);
1279 ret = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f);
1280 if (ret < 0)
1281 return ret;
1282 *got_frame = 1;
1283 }
1284
1285 return 0;
1286 }
1287
1288 static int vp9_decode_packet(AVCodecContext *avctx, void *frame,
1289 int *got_frame, AVPacket *avpkt)
1290 {
1291 const uint8_t *data = avpkt->data;
1292 int size = avpkt->size;
1293 int marker, ret;
1294
1295 /* Read superframe index - this is a collection of individual frames
1296 * that together lead to one visible frame */
1297 marker = data[size - 1];
1298 if ((marker & 0xe0) == 0xc0) {
1299 int nbytes = 1 + ((marker >> 3) & 0x3);
1300 int n_frames = 1 + (marker & 0x7);
1301 int idx_sz = 2 + n_frames * nbytes;
1302
1303 if (size >= idx_sz && data[size - idx_sz] == marker) {
1304 const uint8_t *idx = data + size + 1 - idx_sz;
1305
1306 while (n_frames--) {
1307 unsigned sz = AV_RL32(idx);
1308
1309 if (nbytes < 4)
1310 sz &= (1 << (8 * nbytes)) - 1;
1311 idx += nbytes;
1312
1313 if (sz > size) {
1314 av_log(avctx, AV_LOG_ERROR,
1315 "Superframe packet size too big: %u > %d\n",
1316 sz, size);
1317 return AVERROR_INVALIDDATA;
1318 }
1319
1320 ret = vp9_decode_frame(avctx, frame, got_frame, data, sz);
1321 if (ret < 0)
1322 return ret;
1323 data += sz;
1324 size -= sz;
1325 }
1326 return avpkt->size;
1327 }
1328 }
1329
1330 /* If we get here, there was no valid superframe index, i.e. this is just
1331 * one whole single frame. Decode it as such from the complete input buf. */
1332 if ((ret = vp9_decode_frame(avctx, frame, got_frame, data, size)) < 0)
1333 return ret;
1334 return size;
1335 }
1336
1337 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1338 {
1339 VP9Context *s = avctx->priv_data;
1340 int i;
1341
1342 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
1343 vp9_frame_unref(avctx, &s->frames[i]);
1344 av_frame_free(&s->frames[i].tf.f);
1345 }
1346
1347 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1348 ff_thread_release_buffer(avctx, &s->refs[i]);
1349 av_frame_free(&s->refs[i].f);
1350 }
1351
1352 av_freep(&s->c_b);
1353 av_freep(&s->above_partition_ctx);
1354
1355 return 0;
1356 }
1357
1358 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1359 {
1360 VP9Context *s = avctx->priv_data;
1361 int i;
1362
1363 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
1364
1365 ff_vp9dsp_init(&s->dsp);
1366 ff_videodsp_init(&s->vdsp, 8);
1367
1368 s->frames[0].tf.f = av_frame_alloc();
1369 s->frames[1].tf.f = av_frame_alloc();
1370 if (!s->frames[0].tf.f || !s->frames[1].tf.f)
1371 goto fail;
1372
1373 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1374 s->refs[i].f = av_frame_alloc();
1375 if (!s->refs[i].f)
1376 goto fail;
1377 }
1378
1379 s->filter.sharpness = -1;
1380
1381 return 0;
1382 fail:
1383 vp9_decode_free(avctx);
1384 return AVERROR(ENOMEM);
1385 }
1386
1387 AVCodec ff_vp9_decoder = {
1388 .name = "vp9",
1389 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1390 .type = AVMEDIA_TYPE_VIDEO,
1391 .id = AV_CODEC_ID_VP9,
1392 .priv_data_size = sizeof(VP9Context),
1393 .init = vp9_decode_init,
1394 .decode = vp9_decode_packet,
1395 .flush = vp9_decode_flush,
1396 .close = vp9_decode_free,
1397 .capabilities = AV_CODEC_CAP_DR1,
1398 };