vp9: add frame threading
[libav.git] / libavcodec / vp9.c
1 /*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of Libav.
8 *
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "libavutil/avassert.h"
25
26 #include "avcodec.h"
27 #include "get_bits.h"
28 #include "internal.h"
29 #include "videodsp.h"
30 #include "vp56.h"
31 #include "vp9.h"
32 #include "vp9data.h"
33
34 #define VP9_SYNCCODE 0x498342
35 #define MAX_PROB 255
36
37 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
38 {
39 ff_thread_release_buffer(avctx, &f->tf);
40 av_buffer_unref(&f->segmentation_map_buf);
41 av_buffer_unref(&f->mv_buf);
42 f->segmentation_map = NULL;
43 f->mv = NULL;
44 }
45
46 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
47 {
48 VP9Context *s = avctx->priv_data;
49 int ret, sz;
50
51 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
52 if (ret < 0)
53 return ret;
54
55 sz = 64 * s->sb_cols * s->sb_rows;
56 f->segmentation_map_buf = av_buffer_allocz(sz * sizeof(*f->segmentation_map));
57 f->mv_buf = av_buffer_allocz(sz * sizeof(*f->mv));
58 if (!f->segmentation_map_buf || !f->mv_buf) {
59 vp9_frame_unref(avctx, f);
60 return AVERROR(ENOMEM);
61 }
62
63 f->segmentation_map = f->segmentation_map_buf->data;
64 f->mv = (VP9MVRefPair*)f->mv_buf->data;
65
66 if (s->segmentation.enabled && !s->segmentation.update_map &&
67 !s->keyframe && !s->intraonly)
68 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
69
70 return 0;
71 }
72
73 static int vp9_frame_ref(VP9Frame *dst, VP9Frame *src)
74 {
75 int ret;
76
77 dst->segmentation_map_buf = av_buffer_ref(src->segmentation_map_buf);
78 dst->mv_buf = av_buffer_ref(src->mv_buf);
79 if (!dst->segmentation_map_buf || !dst->mv_buf) {
80 ret = AVERROR(ENOMEM);
81 goto fail;
82 }
83
84 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
85 if (ret < 0)
86 goto fail;
87
88 dst->segmentation_map = src->segmentation_map;
89 dst->mv = src->mv;
90
91 return 0;
92 fail:
93 av_buffer_unref(&dst->segmentation_map_buf);
94 av_buffer_unref(&dst->mv_buf);
95 return ret;
96 }
97
98 static void vp9_decode_flush(AVCodecContext *avctx)
99 {
100 VP9Context *s = avctx->priv_data;
101 int i;
102
103 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
104 vp9_frame_unref(avctx, &s->frames[i]);
105
106 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
107 ff_thread_release_buffer(avctx, &s->refs[i]);
108
109 s->use_last_frame_mvs = 0;
110
111 s->alloc_width = 0;
112 s->alloc_height = 0;
113 }
114
115 static int update_size(AVCodecContext *avctx, int w, int h)
116 {
117 VP9Context *s = avctx->priv_data;
118 uint8_t *p;
119 int nb_blocks, nb_superblocks;
120
121 if (s->above_partition_ctx && w == s->alloc_width && h == s->alloc_height)
122 return 0;
123
124 vp9_decode_flush(avctx);
125
126 if (w <= 0 || h <= 0)
127 return AVERROR_INVALIDDATA;
128
129 avctx->width = w;
130 avctx->height = h;
131 s->sb_cols = (w + 63) >> 6;
132 s->sb_rows = (h + 63) >> 6;
133 s->cols = (w + 7) >> 3;
134 s->rows = (h + 7) >> 3;
135
136 #define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
137 av_free(s->above_partition_ctx);
138 p = av_malloc(s->sb_cols *
139 (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
140 if (!p)
141 return AVERROR(ENOMEM);
142 assign(s->above_partition_ctx, uint8_t *, 8);
143 assign(s->above_skip_ctx, uint8_t *, 8);
144 assign(s->above_txfm_ctx, uint8_t *, 8);
145 assign(s->above_mode_ctx, uint8_t *, 16);
146 assign(s->above_y_nnz_ctx, uint8_t *, 16);
147 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
148 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
149 assign(s->intra_pred_data[0], uint8_t *, 64);
150 assign(s->intra_pred_data[1], uint8_t *, 32);
151 assign(s->intra_pred_data[2], uint8_t *, 32);
152 assign(s->above_segpred_ctx, uint8_t *, 8);
153 assign(s->above_intra_ctx, uint8_t *, 8);
154 assign(s->above_comp_ctx, uint8_t *, 8);
155 assign(s->above_ref_ctx, uint8_t *, 8);
156 assign(s->above_filter_ctx, uint8_t *, 8);
157 assign(s->lflvl, VP9Filter *, 1);
158 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
159 #undef assign
160
161 av_freep(&s->b_base);
162 av_freep(&s->block_base);
163
164 if (avctx->active_thread_type & FF_THREAD_FRAME) {
165 nb_blocks = s->cols * s->rows;
166 nb_superblocks = s->sb_cols * s->sb_rows;
167 } else {
168 nb_blocks = nb_superblocks = 1;
169 }
170
171 s->b_base = av_malloc_array(nb_blocks, sizeof(*s->b_base));
172 s->block_base = av_mallocz_array(nb_superblocks, (64 * 64 + 128) * 3);
173 if (!s->b_base || !s->block_base)
174 return AVERROR(ENOMEM);
175 s->uvblock_base[0] = s->block_base + nb_superblocks * 64 * 64;
176 s->uvblock_base[1] = s->uvblock_base[0] + nb_superblocks * 32 * 32;
177 s->eob_base = (uint8_t *)(s->uvblock_base[1] + nb_superblocks * 32 * 32);
178 s->uveob_base[0] = s->eob_base + nb_superblocks * 256;
179 s->uveob_base[1] = s->uveob_base[0] + nb_superblocks * 64;
180
181 s->alloc_width = w;
182 s->alloc_height = h;
183
184 return 0;
185 }
186
187 // The sign bit is at the end, not the start, of a bit sequence
188 static av_always_inline int get_bits_with_sign(GetBitContext *gb, int n)
189 {
190 int v = get_bits(gb, n);
191 return get_bits1(gb) ? -v : v;
192 }
193
194 static av_always_inline int inv_recenter_nonneg(int v, int m)
195 {
196 if (v > 2 * m)
197 return v;
198 if (v & 1)
199 return m - ((v + 1) >> 1);
200 return m + (v >> 1);
201 }
202
203 // differential forward probability updates
204 static int update_prob(VP56RangeCoder *c, int p)
205 {
206 static const int inv_map_table[MAX_PROB - 1] = {
207 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
208 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
209 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
210 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
211 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
212 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
213 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
214 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
215 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
216 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
217 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
218 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
219 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
220 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
221 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
222 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
223 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
224 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
225 252, 253,
226 };
227 int d;
228
229 /* This code is trying to do a differential probability update. For a
230 * current probability A in the range [1, 255], the difference to a new
231 * probability of any value can be expressed differentially as 1-A, 255-A
232 * where some part of this (absolute range) exists both in positive as
233 * well as the negative part, whereas another part only exists in one
234 * half. We're trying to code this shared part differentially, i.e.
235 * times two where the value of the lowest bit specifies the sign, and
236 * the single part is then coded on top of this. This absolute difference
237 * then again has a value of [0, 254], but a bigger value in this range
238 * indicates that we're further away from the original value A, so we
239 * can code this as a VLC code, since higher values are increasingly
240 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
241 * updates vs. the 'fine, exact' updates further down the range, which
242 * adds one extra dimension to this differential update model. */
243
244 if (!vp8_rac_get(c)) {
245 d = vp8_rac_get_uint(c, 4) + 0;
246 } else if (!vp8_rac_get(c)) {
247 d = vp8_rac_get_uint(c, 4) + 16;
248 } else if (!vp8_rac_get(c)) {
249 d = vp8_rac_get_uint(c, 5) + 32;
250 } else {
251 d = vp8_rac_get_uint(c, 7);
252 if (d >= 65) {
253 d = (d << 1) - 65 + vp8_rac_get(c);
254 d = av_clip(d, 0, MAX_PROB - 65 - 1);
255 }
256 d += 64;
257 }
258
259 return p <= 128
260 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1)
261 : 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
262 }
263
264 static int decode_frame_header(AVCodecContext *avctx,
265 const uint8_t *data, int size, int *ref)
266 {
267 VP9Context *s = avctx->priv_data;
268 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
269 int last_invisible;
270 const uint8_t *data2;
271
272 /* general header */
273 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
274 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
275 return ret;
276 }
277 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
278 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
279 return AVERROR_INVALIDDATA;
280 }
281 s->profile = get_bits1(&s->gb);
282 if (get_bits1(&s->gb)) { // reserved bit
283 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
284 return AVERROR_INVALIDDATA;
285 }
286 if (get_bits1(&s->gb)) {
287 *ref = get_bits(&s->gb, 3);
288 return 0;
289 }
290
291 s->last_keyframe = s->keyframe;
292 s->keyframe = !get_bits1(&s->gb);
293
294 last_invisible = s->invisible;
295 s->invisible = !get_bits1(&s->gb);
296 s->errorres = get_bits1(&s->gb);
297 s->use_last_frame_mvs = !s->errorres && !last_invisible;
298
299 if (s->keyframe) {
300 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
301 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
302 return AVERROR_INVALIDDATA;
303 }
304 s->colorspace = get_bits(&s->gb, 3);
305 if (s->colorspace == 7) { // RGB = profile 1
306 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
307 return AVERROR_INVALIDDATA;
308 }
309 s->fullrange = get_bits1(&s->gb);
310
311 // subsampling bits
312 if (s->profile == 1 || s->profile == 3) {
313 s->sub_x = get_bits1(&s->gb);
314 s->sub_y = get_bits1(&s->gb);
315 if (s->sub_x && s->sub_y) {
316 av_log(avctx, AV_LOG_ERROR,
317 "4:2:0 color not supported in profile 1 or 3\n");
318 return AVERROR_INVALIDDATA;
319 }
320 if (get_bits1(&s->gb)) { // reserved bit
321 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
322 return AVERROR_INVALIDDATA;
323 }
324 } else {
325 s->sub_x = s->sub_y = 1;
326 }
327 if (!s->sub_x || !s->sub_y) {
328 avpriv_report_missing_feature(avctx, "Subsampling %d:%d",
329 s->sub_x, s->sub_y);
330 return AVERROR_PATCHWELCOME;
331 }
332
333 s->refreshrefmask = 0xff;
334 w = get_bits(&s->gb, 16) + 1;
335 h = get_bits(&s->gb, 16) + 1;
336 if (get_bits1(&s->gb)) // display size
337 skip_bits(&s->gb, 32);
338 } else {
339 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
340 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
341 if (s->intraonly) {
342 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
343 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
344 return AVERROR_INVALIDDATA;
345 }
346 s->refreshrefmask = get_bits(&s->gb, 8);
347 w = get_bits(&s->gb, 16) + 1;
348 h = get_bits(&s->gb, 16) + 1;
349 if (get_bits1(&s->gb)) // display size
350 skip_bits(&s->gb, 32);
351 } else {
352 s->refreshrefmask = get_bits(&s->gb, 8);
353 s->refidx[0] = get_bits(&s->gb, 3);
354 s->signbias[0] = get_bits1(&s->gb);
355 s->refidx[1] = get_bits(&s->gb, 3);
356 s->signbias[1] = get_bits1(&s->gb);
357 s->refidx[2] = get_bits(&s->gb, 3);
358 s->signbias[2] = get_bits1(&s->gb);
359 if (!s->refs[s->refidx[0]].f->buf[0] ||
360 !s->refs[s->refidx[1]].f->buf[0] ||
361 !s->refs[s->refidx[2]].f->buf[0]) {
362 av_log(avctx, AV_LOG_ERROR,
363 "Not all references are available\n");
364 return AVERROR_INVALIDDATA;
365 }
366 if (get_bits1(&s->gb)) {
367 w = s->refs[s->refidx[0]].f->width;
368 h = s->refs[s->refidx[0]].f->height;
369 } else if (get_bits1(&s->gb)) {
370 w = s->refs[s->refidx[1]].f->width;
371 h = s->refs[s->refidx[1]].f->height;
372 } else if (get_bits1(&s->gb)) {
373 w = s->refs[s->refidx[2]].f->width;
374 h = s->refs[s->refidx[2]].f->height;
375 } else {
376 w = get_bits(&s->gb, 16) + 1;
377 h = get_bits(&s->gb, 16) + 1;
378 }
379 if (get_bits1(&s->gb)) // display size
380 skip_bits(&s->gb, 32);
381 s->highprecisionmvs = get_bits1(&s->gb);
382 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
383 get_bits(&s->gb, 2);
384 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
385 s->signbias[0] != s->signbias[2];
386 if (s->allowcompinter) {
387 if (s->signbias[0] == s->signbias[1]) {
388 s->fixcompref = 2;
389 s->varcompref[0] = 0;
390 s->varcompref[1] = 1;
391 } else if (s->signbias[0] == s->signbias[2]) {
392 s->fixcompref = 1;
393 s->varcompref[0] = 0;
394 s->varcompref[1] = 2;
395 } else {
396 s->fixcompref = 0;
397 s->varcompref[0] = 1;
398 s->varcompref[1] = 2;
399 }
400 }
401 }
402 }
403
404 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
405 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
406 s->framectxid = c = get_bits(&s->gb, 2);
407
408 /* loopfilter header data */
409 s->filter.level = get_bits(&s->gb, 6);
410 sharp = get_bits(&s->gb, 3);
411 /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
412 * keep the old cache values since they are still valid. */
413 if (s->filter.sharpness != sharp)
414 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
415 s->filter.sharpness = sharp;
416 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
417 if (get_bits1(&s->gb)) {
418 for (i = 0; i < 4; i++)
419 if (get_bits1(&s->gb))
420 s->lf_delta.ref[i] = get_bits_with_sign(&s->gb, 6);
421 for (i = 0; i < 2; i++)
422 if (get_bits1(&s->gb))
423 s->lf_delta.mode[i] = get_bits_with_sign(&s->gb, 6);
424 }
425 } else {
426 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
427 }
428
429 /* quantization header data */
430 s->yac_qi = get_bits(&s->gb, 8);
431 s->ydc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
432 s->uvdc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
433 s->uvac_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
434 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
435 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
436
437 /* segmentation header info */
438 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
439 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
440 for (i = 0; i < 7; i++)
441 s->prob.seg[i] = get_bits1(&s->gb) ?
442 get_bits(&s->gb, 8) : 255;
443 if ((s->segmentation.temporal = get_bits1(&s->gb)))
444 for (i = 0; i < 3; i++)
445 s->prob.segpred[i] = get_bits1(&s->gb) ?
446 get_bits(&s->gb, 8) : 255;
447 }
448
449 if (get_bits1(&s->gb)) {
450 s->segmentation.absolute_vals = get_bits1(&s->gb);
451 for (i = 0; i < 8; i++) {
452 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
453 s->segmentation.feat[i].q_val = get_bits_with_sign(&s->gb, 8);
454 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
455 s->segmentation.feat[i].lf_val = get_bits_with_sign(&s->gb, 6);
456 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
457 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
458 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
459 }
460 }
461 } else {
462 s->segmentation.feat[0].q_enabled = 0;
463 s->segmentation.feat[0].lf_enabled = 0;
464 s->segmentation.feat[0].skip_enabled = 0;
465 s->segmentation.feat[0].ref_enabled = 0;
466 }
467
468 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
469 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
470 int qyac, qydc, quvac, quvdc, lflvl, sh;
471
472 if (s->segmentation.feat[i].q_enabled) {
473 if (s->segmentation.absolute_vals)
474 qyac = s->segmentation.feat[i].q_val;
475 else
476 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
477 } else {
478 qyac = s->yac_qi;
479 }
480 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
481 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
482 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
483 qyac = av_clip_uintp2(qyac, 8);
484
485 s->segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[qydc];
486 s->segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[qyac];
487 s->segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[quvdc];
488 s->segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[quvac];
489
490 sh = s->filter.level >= 32;
491 if (s->segmentation.feat[i].lf_enabled) {
492 if (s->segmentation.absolute_vals)
493 lflvl = s->segmentation.feat[i].lf_val;
494 else
495 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
496 } else {
497 lflvl = s->filter.level;
498 }
499 s->segmentation.feat[i].lflvl[0][0] =
500 s->segmentation.feat[i].lflvl[0][1] =
501 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
502 for (j = 1; j < 4; j++) {
503 s->segmentation.feat[i].lflvl[j][0] =
504 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
505 s->lf_delta.mode[0]) << sh), 6);
506 s->segmentation.feat[i].lflvl[j][1] =
507 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
508 s->lf_delta.mode[1]) << sh), 6);
509 }
510 }
511
512 /* tiling info */
513 if ((ret = update_size(avctx, w, h)) < 0) {
514 av_log(avctx, AV_LOG_ERROR,
515 "Failed to initialize decoder for %dx%d\n", w, h);
516 return ret;
517 }
518 for (s->tiling.log2_tile_cols = 0;
519 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
520 s->tiling.log2_tile_cols++) ;
521 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
522 max = FFMAX(0, max - 1);
523 while (max > s->tiling.log2_tile_cols) {
524 if (get_bits1(&s->gb))
525 s->tiling.log2_tile_cols++;
526 else
527 break;
528 }
529 s->tiling.log2_tile_rows = decode012(&s->gb);
530 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
531 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
532 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
533 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
534 sizeof(VP56RangeCoder) *
535 s->tiling.tile_cols);
536 if (!s->c_b) {
537 av_log(avctx, AV_LOG_ERROR,
538 "Ran out of memory during range coder init\n");
539 return AVERROR(ENOMEM);
540 }
541 }
542
543 if (s->keyframe || s->errorres || s->intraonly) {
544 s->prob_ctx[0].p =
545 s->prob_ctx[1].p =
546 s->prob_ctx[2].p =
547 s->prob_ctx[3].p = ff_vp9_default_probs;
548 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
549 sizeof(ff_vp9_default_coef_probs));
550 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
551 sizeof(ff_vp9_default_coef_probs));
552 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
553 sizeof(ff_vp9_default_coef_probs));
554 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
555 sizeof(ff_vp9_default_coef_probs));
556 }
557
558 // next 16 bits is size of the rest of the header (arith-coded)
559 size2 = get_bits(&s->gb, 16);
560 data2 = align_get_bits(&s->gb);
561 if (size2 > size - (data2 - data)) {
562 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
563 return AVERROR_INVALIDDATA;
564 }
565 ff_vp56_init_range_decoder(&s->c, data2, size2);
566 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
567 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
568 return AVERROR_INVALIDDATA;
569 }
570
571 if (s->keyframe || s->intraonly)
572 memset(s->counts.coef, 0,
573 sizeof(s->counts.coef) + sizeof(s->counts.eob));
574 else
575 memset(&s->counts, 0, sizeof(s->counts));
576
577 /* FIXME is it faster to not copy here, but do it down in the fw updates
578 * as explicit copies if the fw update is missing (and skip the copy upon
579 * fw update)? */
580 s->prob.p = s->prob_ctx[c].p;
581
582 // txfm updates
583 if (s->lossless) {
584 s->txfmmode = TX_4X4;
585 } else {
586 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
587 if (s->txfmmode == 3)
588 s->txfmmode += vp8_rac_get(&s->c);
589
590 if (s->txfmmode == TX_SWITCHABLE) {
591 for (i = 0; i < 2; i++)
592 if (vp56_rac_get_prob_branchy(&s->c, 252))
593 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
594 for (i = 0; i < 2; i++)
595 for (j = 0; j < 2; j++)
596 if (vp56_rac_get_prob_branchy(&s->c, 252))
597 s->prob.p.tx16p[i][j] =
598 update_prob(&s->c, s->prob.p.tx16p[i][j]);
599 for (i = 0; i < 2; i++)
600 for (j = 0; j < 3; j++)
601 if (vp56_rac_get_prob_branchy(&s->c, 252))
602 s->prob.p.tx32p[i][j] =
603 update_prob(&s->c, s->prob.p.tx32p[i][j]);
604 }
605 }
606
607 // coef updates
608 for (i = 0; i < 4; i++) {
609 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
610 if (vp8_rac_get(&s->c)) {
611 for (j = 0; j < 2; j++)
612 for (k = 0; k < 2; k++)
613 for (l = 0; l < 6; l++)
614 for (m = 0; m < 6; m++) {
615 uint8_t *p = s->prob.coef[i][j][k][l][m];
616 uint8_t *r = ref[j][k][l][m];
617 if (m >= 3 && l == 0) // dc only has 3 pt
618 break;
619 for (n = 0; n < 3; n++) {
620 if (vp56_rac_get_prob_branchy(&s->c, 252))
621 p[n] = update_prob(&s->c, r[n]);
622 else
623 p[n] = r[n];
624 }
625 p[3] = 0;
626 }
627 } else {
628 for (j = 0; j < 2; j++)
629 for (k = 0; k < 2; k++)
630 for (l = 0; l < 6; l++)
631 for (m = 0; m < 6; m++) {
632 uint8_t *p = s->prob.coef[i][j][k][l][m];
633 uint8_t *r = ref[j][k][l][m];
634 if (m > 3 && l == 0) // dc only has 3 pt
635 break;
636 memcpy(p, r, 3);
637 p[3] = 0;
638 }
639 }
640 if (s->txfmmode == i)
641 break;
642 }
643
644 // mode updates
645 for (i = 0; i < 3; i++)
646 if (vp56_rac_get_prob_branchy(&s->c, 252))
647 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
648 if (!s->keyframe && !s->intraonly) {
649 for (i = 0; i < 7; i++)
650 for (j = 0; j < 3; j++)
651 if (vp56_rac_get_prob_branchy(&s->c, 252))
652 s->prob.p.mv_mode[i][j] =
653 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
654
655 if (s->filtermode == FILTER_SWITCHABLE)
656 for (i = 0; i < 4; i++)
657 for (j = 0; j < 2; j++)
658 if (vp56_rac_get_prob_branchy(&s->c, 252))
659 s->prob.p.filter[i][j] =
660 update_prob(&s->c, s->prob.p.filter[i][j]);
661
662 for (i = 0; i < 4; i++)
663 if (vp56_rac_get_prob_branchy(&s->c, 252))
664 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
665
666 if (s->allowcompinter) {
667 s->comppredmode = vp8_rac_get(&s->c);
668 if (s->comppredmode)
669 s->comppredmode += vp8_rac_get(&s->c);
670 if (s->comppredmode == PRED_SWITCHABLE)
671 for (i = 0; i < 5; i++)
672 if (vp56_rac_get_prob_branchy(&s->c, 252))
673 s->prob.p.comp[i] =
674 update_prob(&s->c, s->prob.p.comp[i]);
675 } else {
676 s->comppredmode = PRED_SINGLEREF;
677 }
678
679 if (s->comppredmode != PRED_COMPREF) {
680 for (i = 0; i < 5; i++) {
681 if (vp56_rac_get_prob_branchy(&s->c, 252))
682 s->prob.p.single_ref[i][0] =
683 update_prob(&s->c, s->prob.p.single_ref[i][0]);
684 if (vp56_rac_get_prob_branchy(&s->c, 252))
685 s->prob.p.single_ref[i][1] =
686 update_prob(&s->c, s->prob.p.single_ref[i][1]);
687 }
688 }
689
690 if (s->comppredmode != PRED_SINGLEREF) {
691 for (i = 0; i < 5; i++)
692 if (vp56_rac_get_prob_branchy(&s->c, 252))
693 s->prob.p.comp_ref[i] =
694 update_prob(&s->c, s->prob.p.comp_ref[i]);
695 }
696
697 for (i = 0; i < 4; i++)
698 for (j = 0; j < 9; j++)
699 if (vp56_rac_get_prob_branchy(&s->c, 252))
700 s->prob.p.y_mode[i][j] =
701 update_prob(&s->c, s->prob.p.y_mode[i][j]);
702
703 for (i = 0; i < 4; i++)
704 for (j = 0; j < 4; j++)
705 for (k = 0; k < 3; k++)
706 if (vp56_rac_get_prob_branchy(&s->c, 252))
707 s->prob.p.partition[3 - i][j][k] =
708 update_prob(&s->c,
709 s->prob.p.partition[3 - i][j][k]);
710
711 // mv fields don't use the update_prob subexp model for some reason
712 for (i = 0; i < 3; i++)
713 if (vp56_rac_get_prob_branchy(&s->c, 252))
714 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
715
716 for (i = 0; i < 2; i++) {
717 if (vp56_rac_get_prob_branchy(&s->c, 252))
718 s->prob.p.mv_comp[i].sign =
719 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
720
721 for (j = 0; j < 10; j++)
722 if (vp56_rac_get_prob_branchy(&s->c, 252))
723 s->prob.p.mv_comp[i].classes[j] =
724 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
725
726 if (vp56_rac_get_prob_branchy(&s->c, 252))
727 s->prob.p.mv_comp[i].class0 =
728 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
729
730 for (j = 0; j < 10; j++)
731 if (vp56_rac_get_prob_branchy(&s->c, 252))
732 s->prob.p.mv_comp[i].bits[j] =
733 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
734 }
735
736 for (i = 0; i < 2; i++) {
737 for (j = 0; j < 2; j++)
738 for (k = 0; k < 3; k++)
739 if (vp56_rac_get_prob_branchy(&s->c, 252))
740 s->prob.p.mv_comp[i].class0_fp[j][k] =
741 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
742
743 for (j = 0; j < 3; j++)
744 if (vp56_rac_get_prob_branchy(&s->c, 252))
745 s->prob.p.mv_comp[i].fp[j] =
746 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
747 }
748
749 if (s->highprecisionmvs) {
750 for (i = 0; i < 2; i++) {
751 if (vp56_rac_get_prob_branchy(&s->c, 252))
752 s->prob.p.mv_comp[i].class0_hp =
753 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
754
755 if (vp56_rac_get_prob_branchy(&s->c, 252))
756 s->prob.p.mv_comp[i].hp =
757 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
758 }
759 }
760 }
761
762 return (data2 - data) + size2;
763 }
764
765 static int decode_subblock(AVCodecContext *avctx, int row, int col,
766 VP9Filter *lflvl,
767 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
768 {
769 VP9Context *s = avctx->priv_data;
770 AVFrame *f = s->frames[CUR_FRAME].tf.f;
771 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
772 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
773 int ret;
774 const uint8_t *p = s->keyframe ? ff_vp9_default_kf_partition_probs[bl][c]
775 : s->prob.p.partition[bl][c];
776 enum BlockPartition bp;
777 ptrdiff_t hbs = 4 >> bl;
778
779 if (bl == BL_8X8) {
780 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
781 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
782 } else if (col + hbs < s->cols) {
783 if (row + hbs < s->rows) {
784 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
785 switch (bp) {
786 case PARTITION_NONE:
787 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
788 bl, bp);
789 break;
790 case PARTITION_H:
791 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
792 bl, bp);
793 if (!ret) {
794 yoff += hbs * 8 * f->linesize[0];
795 uvoff += hbs * 4 * f->linesize[1];
796 ret = ff_vp9_decode_block(avctx, row + hbs, col, lflvl,
797 yoff, uvoff, bl, bp);
798 }
799 break;
800 case PARTITION_V:
801 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
802 bl, bp);
803 if (!ret) {
804 yoff += hbs * 8;
805 uvoff += hbs * 4;
806 ret = ff_vp9_decode_block(avctx, row, col + hbs, lflvl,
807 yoff, uvoff, bl, bp);
808 }
809 break;
810 case PARTITION_SPLIT:
811 ret = decode_subblock(avctx, row, col, lflvl,
812 yoff, uvoff, bl + 1);
813 if (!ret) {
814 ret = decode_subblock(avctx, row, col + hbs, lflvl,
815 yoff + 8 * hbs, uvoff + 4 * hbs,
816 bl + 1);
817 if (!ret) {
818 yoff += hbs * 8 * f->linesize[0];
819 uvoff += hbs * 4 * f->linesize[1];
820 ret = decode_subblock(avctx, row + hbs, col, lflvl,
821 yoff, uvoff, bl + 1);
822 if (!ret) {
823 ret = decode_subblock(avctx, row + hbs, col + hbs,
824 lflvl, yoff + 8 * hbs,
825 uvoff + 4 * hbs, bl + 1);
826 }
827 }
828 }
829 break;
830 default:
831 av_log(avctx, AV_LOG_ERROR, "Unexpected partition %d.", bp);
832 return AVERROR_INVALIDDATA;
833 }
834 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
835 bp = PARTITION_SPLIT;
836 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
837 if (!ret)
838 ret = decode_subblock(avctx, row, col + hbs, lflvl,
839 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
840 } else {
841 bp = PARTITION_H;
842 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
843 bl, bp);
844 }
845 } else if (row + hbs < s->rows) {
846 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
847 bp = PARTITION_SPLIT;
848 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
849 if (!ret) {
850 yoff += hbs * 8 * f->linesize[0];
851 uvoff += hbs * 4 * f->linesize[1];
852 ret = decode_subblock(avctx, row + hbs, col, lflvl,
853 yoff, uvoff, bl + 1);
854 }
855 } else {
856 bp = PARTITION_V;
857 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
858 bl, bp);
859 }
860 } else {
861 bp = PARTITION_SPLIT;
862 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
863 }
864 s->counts.partition[bl][c][bp]++;
865
866 return ret;
867 }
868
869 static int decode_superblock_mem(AVCodecContext *avctx, int row, int col, struct VP9Filter *lflvl,
870 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
871 {
872 VP9Context *s = avctx->priv_data;
873 VP9Block *b = s->b;
874 ptrdiff_t hbs = 4 >> bl;
875 AVFrame *f = s->frames[CUR_FRAME].tf.f;
876 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
877 int res;
878
879 if (bl == BL_8X8) {
880 av_assert2(b->bl == BL_8X8);
881 res = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
882 } else if (s->b->bl == bl) {
883 if ((res = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp)) < 0)
884 return res;
885 if (b->bp == PARTITION_H && row + hbs < s->rows) {
886 yoff += hbs * 8 * y_stride;
887 uvoff += hbs * 4 * uv_stride;
888 res = ff_vp9_decode_block(avctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
889 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
890 yoff += hbs * 8;
891 uvoff += hbs * 4;
892 res = ff_vp9_decode_block(avctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
893 }
894 } else {
895 if ((res = decode_superblock_mem(avctx, row, col, lflvl, yoff, uvoff, bl + 1)) < 0)
896 return res;
897 if (col + hbs < s->cols) { // FIXME why not <=?
898 if (row + hbs < s->rows) {
899 if ((res = decode_superblock_mem(avctx, row, col + hbs, lflvl, yoff + 8 * hbs,
900 uvoff + 4 * hbs, bl + 1)) < 0)
901 return res;
902 yoff += hbs * 8 * y_stride;
903 uvoff += hbs * 4 * uv_stride;
904 if ((res = decode_superblock_mem(avctx, row + hbs, col, lflvl, yoff,
905 uvoff, bl + 1)) < 0)
906 return res;
907 res = decode_superblock_mem(avctx, row + hbs, col + hbs, lflvl,
908 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
909 } else {
910 yoff += hbs * 8;
911 uvoff += hbs * 4;
912 res = decode_superblock_mem(avctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
913 }
914 } else if (row + hbs < s->rows) {
915 yoff += hbs * 8 * y_stride;
916 uvoff += hbs * 4 * uv_stride;
917 res = decode_superblock_mem(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
918 }
919 }
920
921 return res;
922 }
923
924 static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
925 int row, int col,
926 ptrdiff_t yoff, ptrdiff_t uvoff)
927 {
928 VP9Context *s = avctx->priv_data;
929 AVFrame *f = s->frames[CUR_FRAME].tf.f;
930 uint8_t *dst = f->data[0] + yoff;
931 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
932 uint8_t *lvl = lflvl->level;
933 int y, x, p;
934
935 /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
936 * if you think of them as acting on a 8x8 block max, we can interleave
937 * each v/h within the single x loop, but that only works if we work on
938 * 8 pixel blocks, and we won't always do that (we want at least 16px
939 * to use SSE2 optimizations, perhaps 32 for AVX2). */
940
941 // filter edges between columns, Y plane (e.g. block1 | block2)
942 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
943 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
944 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
945 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
946 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
947 unsigned hm = hm1 | hm2 | hm13 | hm23;
948
949 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
950 if (hm1 & x) {
951 int L = *l, H = L >> 4;
952 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
953
954 if (col || x > 1) {
955 if (hmask1[0] & x) {
956 if (hmask2[0] & x) {
957 av_assert2(l[8] == L);
958 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
959 } else {
960 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
961 }
962 } else if (hm2 & x) {
963 L = l[8];
964 H |= (L >> 4) << 8;
965 E |= s->filter.mblim_lut[L] << 8;
966 I |= s->filter.lim_lut[L] << 8;
967 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
968 [!!(hmask2[1] & x)]
969 [0](ptr, ls_y, E, I, H);
970 } else {
971 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
972 [0](ptr, ls_y, E, I, H);
973 }
974 }
975 } else if (hm2 & x) {
976 int L = l[8], H = L >> 4;
977 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
978
979 if (col || x > 1) {
980 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
981 [0](ptr + 8 * ls_y, ls_y, E, I, H);
982 }
983 }
984 if (hm13 & x) {
985 int L = *l, H = L >> 4;
986 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
987
988 if (hm23 & x) {
989 L = l[8];
990 H |= (L >> 4) << 8;
991 E |= s->filter.mblim_lut[L] << 8;
992 I |= s->filter.lim_lut[L] << 8;
993 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
994 } else {
995 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
996 }
997 } else if (hm23 & x) {
998 int L = l[8], H = L >> 4;
999 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1000
1001 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
1002 }
1003 }
1004 }
1005
1006 // block1
1007 // filter edges between rows, Y plane (e.g. ------)
1008 // block2
1009 dst = f->data[0] + yoff;
1010 lvl = lflvl->level;
1011 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
1012 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
1013 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
1014
1015 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
1016 if (row || y) {
1017 if (vm & x) {
1018 int L = *l, H = L >> 4;
1019 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1020
1021 if (vmask[0] & x) {
1022 if (vmask[0] & (x << 1)) {
1023 av_assert2(l[1] == L);
1024 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
1025 } else {
1026 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
1027 }
1028 } else if (vm & (x << 1)) {
1029 L = l[1];
1030 H |= (L >> 4) << 8;
1031 E |= s->filter.mblim_lut[L] << 8;
1032 I |= s->filter.lim_lut[L] << 8;
1033 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
1034 [!!(vmask[1] & (x << 1))]
1035 [1](ptr, ls_y, E, I, H);
1036 } else {
1037 s->dsp.loop_filter_8[!!(vmask[1] & x)]
1038 [1](ptr, ls_y, E, I, H);
1039 }
1040 } else if (vm & (x << 1)) {
1041 int L = l[1], H = L >> 4;
1042 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1043
1044 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
1045 [1](ptr + 8, ls_y, E, I, H);
1046 }
1047 }
1048 if (vm3 & x) {
1049 int L = *l, H = L >> 4;
1050 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1051
1052 if (vm3 & (x << 1)) {
1053 L = l[1];
1054 H |= (L >> 4) << 8;
1055 E |= s->filter.mblim_lut[L] << 8;
1056 I |= s->filter.lim_lut[L] << 8;
1057 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
1058 } else {
1059 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
1060 }
1061 } else if (vm3 & (x << 1)) {
1062 int L = l[1], H = L >> 4;
1063 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1064
1065 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
1066 }
1067 }
1068 }
1069
1070 // same principle but for U/V planes
1071 for (p = 0; p < 2; p++) {
1072 lvl = lflvl->level;
1073 dst = f->data[1 + p] + uvoff;
1074 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
1075 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
1076 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
1077 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
1078 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
1079
1080 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
1081 if (col || x > 1) {
1082 if (hm1 & x) {
1083 int L = *l, H = L >> 4;
1084 int E = s->filter.mblim_lut[L];
1085 int I = s->filter.lim_lut[L];
1086
1087 if (hmask1[0] & x) {
1088 if (hmask2[0] & x) {
1089 av_assert2(l[16] == L);
1090 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
1091 } else {
1092 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
1093 }
1094 } else if (hm2 & x) {
1095 L = l[16];
1096 H |= (L >> 4) << 8;
1097 E |= s->filter.mblim_lut[L] << 8;
1098 I |= s->filter.lim_lut[L] << 8;
1099 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
1100 [!!(hmask2[1] & x)]
1101 [0](ptr, ls_uv, E, I, H);
1102 } else {
1103 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
1104 [0](ptr, ls_uv, E, I, H);
1105 }
1106 } else if (hm2 & x) {
1107 int L = l[16], H = L >> 4;
1108 int E = s->filter.mblim_lut[L];
1109 int I = s->filter.lim_lut[L];
1110
1111 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
1112 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
1113 }
1114 }
1115 if (x & 0xAA)
1116 l += 2;
1117 }
1118 }
1119 lvl = lflvl->level;
1120 dst = f->data[1 + p] + uvoff;
1121 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
1122 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
1123 unsigned vm = vmask[0] | vmask[1] | vmask[2];
1124
1125 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
1126 if (row || y) {
1127 if (vm & x) {
1128 int L = *l, H = L >> 4;
1129 int E = s->filter.mblim_lut[L];
1130 int I = s->filter.lim_lut[L];
1131
1132 if (vmask[0] & x) {
1133 if (vmask[0] & (x << 2)) {
1134 av_assert2(l[2] == L);
1135 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
1136 } else {
1137 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
1138 }
1139 } else if (vm & (x << 2)) {
1140 L = l[2];
1141 H |= (L >> 4) << 8;
1142 E |= s->filter.mblim_lut[L] << 8;
1143 I |= s->filter.lim_lut[L] << 8;
1144 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
1145 [!!(vmask[1] & (x << 2))]
1146 [1](ptr, ls_uv, E, I, H);
1147 } else {
1148 s->dsp.loop_filter_8[!!(vmask[1] & x)]
1149 [1](ptr, ls_uv, E, I, H);
1150 }
1151 } else if (vm & (x << 2)) {
1152 int L = l[2], H = L >> 4;
1153 int E = s->filter.mblim_lut[L];
1154 int I = s->filter.lim_lut[L];
1155
1156 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
1157 [1](ptr + 8, ls_uv, E, I, H);
1158 }
1159 }
1160 }
1161 if (y & 1)
1162 lvl += 16;
1163 }
1164 }
1165 }
1166
1167 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1168 {
1169 int sb_start = (idx * n) >> log2_n;
1170 int sb_end = ((idx + 1) * n) >> log2_n;
1171 *start = FFMIN(sb_start, n) << 3;
1172 *end = FFMIN(sb_end, n) << 3;
1173 }
1174
1175 static int update_refs(AVCodecContext *avctx)
1176 {
1177 VP9Context *s = avctx->priv_data;
1178 int i, ret;
1179
1180 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
1181 if (s->refreshrefmask & (1 << i)) {
1182 ff_thread_release_buffer(avctx, &s->refs[i]);
1183 ret = ff_thread_ref_frame(&s->refs[i], &s->frames[CUR_FRAME].tf);
1184 if (ret < 0)
1185 return ret;
1186 }
1187
1188 return 0;
1189 }
1190
1191 static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1192 int *got_frame, const uint8_t *data, int size,
1193 int can_finish_setup)
1194 {
1195 VP9Context *s = avctx->priv_data;
1196 AVFrame *f;
1197 int ret, tile_row, tile_col, i, ref = -1, row, col;
1198
1199 ret = decode_frame_header(avctx, data, size, &ref);
1200 if (ret < 0) {
1201 return ret;
1202 } else if (!ret) {
1203 if (!s->refs[ref].f->buf[0]) {
1204 av_log(avctx, AV_LOG_ERROR,
1205 "Requested reference %d not available\n", ref);
1206 return AVERROR_INVALIDDATA;
1207 }
1208
1209 ret = av_frame_ref(frame, s->refs[ref].f);
1210 if (ret < 0)
1211 return ret;
1212 *got_frame = 1;
1213 return 0;
1214 }
1215 data += ret;
1216 size -= ret;
1217
1218 vp9_frame_unref(avctx, &s->frames[LAST_FRAME]);
1219 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->buf[0]) {
1220 ret = vp9_frame_ref(&s->frames[LAST_FRAME], &s->frames[CUR_FRAME]);
1221 if (ret < 0)
1222 return ret;
1223 }
1224
1225 vp9_frame_unref(avctx, &s->frames[CUR_FRAME]);
1226 ret = vp9_frame_alloc(avctx, &s->frames[CUR_FRAME]);
1227 if (ret < 0)
1228 return ret;
1229
1230 f = s->frames[CUR_FRAME].tf.f;
1231 f->key_frame = s->keyframe;
1232 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1233
1234 if (s->fullrange)
1235 avctx->color_range = AVCOL_RANGE_JPEG;
1236 else
1237 avctx->color_range = AVCOL_RANGE_MPEG;
1238
1239 switch (s->colorspace) {
1240 case 1: avctx->colorspace = AVCOL_SPC_BT470BG; break;
1241 case 2: avctx->colorspace = AVCOL_SPC_BT709; break;
1242 case 3: avctx->colorspace = AVCOL_SPC_SMPTE170M; break;
1243 case 4: avctx->colorspace = AVCOL_SPC_SMPTE240M; break;
1244 }
1245
1246 s->pass = s->uses_2pass =
1247 avctx->active_thread_type & FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
1248
1249 if (s->refreshctx && s->parallelmode) {
1250 int j, k, l, m;
1251 for (i = 0; i < 4; i++) {
1252 for (j = 0; j < 2; j++)
1253 for (k = 0; k < 2; k++)
1254 for (l = 0; l < 6; l++)
1255 for (m = 0; m < 6; m++)
1256 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
1257 s->prob.coef[i][j][k][l][m], 3);
1258 if (s->txfmmode == i)
1259 break;
1260 }
1261 s->prob_ctx[s->framectxid].p = s->prob.p;
1262 }
1263 if ((s->parallelmode || !s->refreshctx) &&
1264 can_finish_setup && avctx->active_thread_type & FF_THREAD_FRAME) {
1265 ff_thread_finish_setup(avctx);
1266 s->setup_finished = 1;
1267 }
1268
1269 // main tile decode loop
1270 memset(s->above_partition_ctx, 0, s->cols);
1271 memset(s->above_skip_ctx, 0, s->cols);
1272 if (s->keyframe || s->intraonly)
1273 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1274 else
1275 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1276 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1277 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
1278 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
1279 memset(s->above_segpred_ctx, 0, s->cols);
1280
1281 do {
1282 ptrdiff_t yoff = 0, uvoff = 0;
1283 s->b = s->b_base;
1284 s->block = s->block_base;
1285 s->uvblock[0] = s->uvblock_base[0];
1286 s->uvblock[1] = s->uvblock_base[1];
1287 s->eob = s->eob_base;
1288 s->uveob[0] = s->uveob_base[0];
1289 s->uveob[1] = s->uveob_base[1];
1290
1291 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
1292 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
1293 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
1294
1295 if (s->pass != 2) {
1296 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1297 int64_t tile_size;
1298
1299 if (tile_col == s->tiling.tile_cols - 1 &&
1300 tile_row == s->tiling.tile_rows - 1) {
1301 tile_size = size;
1302 } else {
1303 tile_size = AV_RB32(data);
1304 data += 4;
1305 size -= 4;
1306 }
1307 if (tile_size > size) {
1308 ret = AVERROR_INVALIDDATA;
1309 goto fail;
1310 }
1311 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1312 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
1313 ret = AVERROR_INVALIDDATA;
1314 goto fail;
1315 }
1316 data += tile_size;
1317 size -= tile_size;
1318 }
1319 }
1320
1321 for (row = s->tiling.tile_row_start;
1322 row < s->tiling.tile_row_end;
1323 row += 8, yoff += f->linesize[0] * 64,
1324 uvoff += f->linesize[1] * 32) {
1325 VP9Filter *lflvl = s->lflvl;
1326 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1327
1328 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1329 set_tile_offset(&s->tiling.tile_col_start,
1330 &s->tiling.tile_col_end,
1331 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
1332
1333 memset(s->left_partition_ctx, 0, 8);
1334 memset(s->left_skip_ctx, 0, 8);
1335 if (s->keyframe || s->intraonly)
1336 memset(s->left_mode_ctx, DC_PRED, 16);
1337 else
1338 memset(s->left_mode_ctx, NEARESTMV, 8);
1339 memset(s->left_y_nnz_ctx, 0, 16);
1340 memset(s->left_uv_nnz_ctx, 0, 16);
1341 memset(s->left_segpred_ctx, 0, 8);
1342
1343 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1344 for (col = s->tiling.tile_col_start;
1345 col < s->tiling.tile_col_end;
1346 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
1347 // FIXME integrate with lf code (i.e. zero after each
1348 // use, similar to invtxfm coefficients, or similar)
1349 if (s->pass != 1)
1350 memset(lflvl->mask, 0, sizeof(lflvl->mask));
1351
1352 if (s->pass == 2) {
1353 ret = decode_superblock_mem(avctx, row, col, lflvl,
1354 yoff2, uvoff2, BL_64X64);
1355 } else {
1356 ret = decode_subblock(avctx, row, col, lflvl,
1357 yoff2, uvoff2, BL_64X64);
1358 }
1359 if (ret < 0)
1360 goto fail;
1361 }
1362 if (s->pass != 2)
1363 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1364 }
1365
1366 if (s->pass == 1)
1367 continue;
1368
1369 // backup pre-loopfilter reconstruction data for intra
1370 // prediction of next row of sb64s
1371 if (row + 8 < s->rows) {
1372 memcpy(s->intra_pred_data[0],
1373 f->data[0] + yoff +
1374 63 * f->linesize[0],
1375 8 * s->cols);
1376 memcpy(s->intra_pred_data[1],
1377 f->data[1] + uvoff +
1378 31 * f->linesize[1],
1379 4 * s->cols);
1380 memcpy(s->intra_pred_data[2],
1381 f->data[2] + uvoff +
1382 31 * f->linesize[2],
1383 4 * s->cols);
1384 }
1385
1386 // loopfilter one row
1387 if (s->filter.level) {
1388 yoff2 = yoff;
1389 uvoff2 = uvoff;
1390 lflvl = s->lflvl;
1391 for (col = 0; col < s->cols;
1392 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
1393 loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
1394 }
1395
1396 // FIXME maybe we can make this more finegrained by running the
1397 // loopfilter per-block instead of after each sbrow
1398 // In fact that would also make intra pred left preparation easier?
1399 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
1400 }
1401 }
1402
1403 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
1404 ff_vp9_adapt_probs(s);
1405 if (can_finish_setup && avctx->active_thread_type & FF_THREAD_FRAME) {
1406 ff_thread_finish_setup(avctx);
1407 s->setup_finished = 1;
1408 }
1409 }
1410 } while (s->pass++ == 1);
1411 fail:
1412 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
1413 if (ret < 0)
1414 return ret;
1415
1416 // ref frame setup
1417 if (!s->setup_finished) {
1418 ret = update_refs(avctx);
1419 if (ret < 0)
1420 return ret;
1421 }
1422
1423 if (!s->invisible) {
1424 av_frame_unref(frame);
1425 ret = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f);
1426 if (ret < 0)
1427 return ret;
1428 *got_frame = 1;
1429 }
1430
1431 return 0;
1432 }
1433
1434 static int vp9_decode_packet(AVCodecContext *avctx, void *frame,
1435 int *got_frame, AVPacket *avpkt)
1436 {
1437 VP9Context *s = avctx->priv_data;
1438 const uint8_t *data = avpkt->data;
1439 int size = avpkt->size;
1440 int marker, ret;
1441
1442 s->setup_finished = 0;
1443
1444 /* Read superframe index - this is a collection of individual frames
1445 * that together lead to one visible frame */
1446 marker = data[size - 1];
1447 if ((marker & 0xe0) == 0xc0) {
1448 int nbytes = 1 + ((marker >> 3) & 0x3);
1449 int n_frames = 1 + (marker & 0x7);
1450 int idx_sz = 2 + n_frames * nbytes;
1451
1452 if (size >= idx_sz && data[size - idx_sz] == marker) {
1453 const uint8_t *idx = data + size + 1 - idx_sz;
1454
1455 while (n_frames--) {
1456 unsigned sz = AV_RL32(idx);
1457
1458 if (nbytes < 4)
1459 sz &= (1 << (8 * nbytes)) - 1;
1460 idx += nbytes;
1461
1462 if (sz > size) {
1463 av_log(avctx, AV_LOG_ERROR,
1464 "Superframe packet size too big: %u > %d\n",
1465 sz, size);
1466 return AVERROR_INVALIDDATA;
1467 }
1468
1469 ret = vp9_decode_frame(avctx, frame, got_frame, data, sz,
1470 !n_frames);
1471 if (ret < 0)
1472 return ret;
1473 data += sz;
1474 size -= sz;
1475 }
1476 return avpkt->size;
1477 }
1478 }
1479
1480 /* If we get here, there was no valid superframe index, i.e. this is just
1481 * one whole single frame. Decode it as such from the complete input buf. */
1482 if ((ret = vp9_decode_frame(avctx, frame, got_frame, data, size, 1)) < 0)
1483 return ret;
1484 return size;
1485 }
1486
1487 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1488 {
1489 VP9Context *s = avctx->priv_data;
1490 int i;
1491
1492 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
1493 vp9_frame_unref(avctx, &s->frames[i]);
1494 av_frame_free(&s->frames[i].tf.f);
1495 }
1496
1497 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1498 ff_thread_release_buffer(avctx, &s->refs[i]);
1499 av_frame_free(&s->refs[i].f);
1500 }
1501
1502 av_freep(&s->c_b);
1503 av_freep(&s->above_partition_ctx);
1504 av_freep(&s->b_base);
1505 av_freep(&s->block_base);
1506
1507 return 0;
1508 }
1509
1510 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1511 {
1512 VP9Context *s = avctx->priv_data;
1513 int i;
1514
1515 memset(s, 0, sizeof(*s));
1516
1517 avctx->internal->allocate_progress = 1;
1518
1519 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
1520
1521 ff_vp9dsp_init(&s->dsp);
1522 ff_videodsp_init(&s->vdsp, 8);
1523
1524 s->frames[0].tf.f = av_frame_alloc();
1525 s->frames[1].tf.f = av_frame_alloc();
1526 if (!s->frames[0].tf.f || !s->frames[1].tf.f)
1527 goto fail;
1528
1529 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1530 s->refs[i].f = av_frame_alloc();
1531 if (!s->refs[i].f)
1532 goto fail;
1533 }
1534
1535 s->filter.sharpness = -1;
1536
1537 return 0;
1538 fail:
1539 vp9_decode_free(avctx);
1540 return AVERROR(ENOMEM);
1541 }
1542
1543 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1544 {
1545 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1546 int i, ret;
1547
1548 ret = update_size(dst, ssrc->alloc_width, ssrc->alloc_height);
1549 if (ret < 0)
1550 return ret;
1551
1552 for (i = 0; i < 2; i++) {
1553 if (s->frames[i].tf.f->data[0])
1554 vp9_frame_unref(dst, &s->frames[i]);
1555 if (ssrc->frames[i].tf.f->data[0]) {
1556 if ((ret = vp9_frame_ref(&s->frames[i], &ssrc->frames[i])) < 0)
1557 return ret;
1558 }
1559 }
1560 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1561 ff_thread_release_buffer(dst, &s->refs[i]);
1562 if (ssrc->refs[i].f->buf[0]) {
1563 ret = ff_thread_ref_frame(&s->refs[i], &ssrc->refs[i]);
1564 if (ret < 0)
1565 return ret;
1566 }
1567 }
1568
1569 s->refreshrefmask = ssrc->refreshrefmask;
1570 ret = update_refs(dst);
1571 if (ret < 0)
1572 return ret;
1573
1574 s->invisible = ssrc->invisible;
1575 s->keyframe = ssrc->keyframe;
1576 s->last_uses_2pass = ssrc->uses_2pass;
1577
1578 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1579 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
1580 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
1581 sizeof(s->segmentation.feat));
1582
1583 return 0;
1584 }
1585
1586 AVCodec ff_vp9_decoder = {
1587 .name = "vp9",
1588 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1589 .type = AVMEDIA_TYPE_VIDEO,
1590 .id = AV_CODEC_ID_VP9,
1591 .priv_data_size = sizeof(VP9Context),
1592 .init = vp9_decode_init,
1593 .decode = vp9_decode_packet,
1594 .flush = vp9_decode_flush,
1595 .close = vp9_decode_free,
1596 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
1597 .init_thread_copy = vp9_decode_init,
1598 .update_thread_context = vp9_decode_update_thread_context,
1599 };