vp9: allocate 'b', 'block/uvblock' and 'eob/uveob' dynamically.
[libav.git] / libavcodec / vp9.c
1 /*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of Libav.
8 *
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "libavutil/avassert.h"
25
26 #include "avcodec.h"
27 #include "get_bits.h"
28 #include "internal.h"
29 #include "videodsp.h"
30 #include "vp56.h"
31 #include "vp9.h"
32 #include "vp9data.h"
33
34 #define VP9_SYNCCODE 0x498342
35 #define MAX_PROB 255
36
37 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
38 {
39 ff_thread_release_buffer(avctx, &f->tf);
40 av_buffer_unref(&f->segmentation_map_buf);
41 av_buffer_unref(&f->mv_buf);
42 f->segmentation_map = NULL;
43 f->mv = NULL;
44 }
45
46 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
47 {
48 VP9Context *s = avctx->priv_data;
49 int ret, sz;
50
51 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
52 if (ret < 0)
53 return ret;
54
55 sz = 64 * s->sb_cols * s->sb_rows;
56 f->segmentation_map_buf = av_buffer_allocz(sz * sizeof(*f->segmentation_map));
57 f->mv_buf = av_buffer_allocz(sz * sizeof(*f->mv));
58 if (!f->segmentation_map_buf || !f->mv_buf) {
59 vp9_frame_unref(avctx, f);
60 return AVERROR(ENOMEM);
61 }
62
63 f->segmentation_map = f->segmentation_map_buf->data;
64 f->mv = (VP9MVRefPair*)f->mv_buf->data;
65
66 if (s->segmentation.enabled && !s->segmentation.update_map &&
67 !s->keyframe && !s->intraonly)
68 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
69
70 return 0;
71 }
72
73 static int vp9_frame_ref(VP9Frame *dst, VP9Frame *src)
74 {
75 int ret;
76
77 dst->segmentation_map_buf = av_buffer_ref(src->segmentation_map_buf);
78 dst->mv_buf = av_buffer_ref(src->mv_buf);
79 if (!dst->segmentation_map_buf || !dst->mv_buf) {
80 ret = AVERROR(ENOMEM);
81 goto fail;
82 }
83
84 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
85 if (ret < 0)
86 goto fail;
87
88 dst->segmentation_map = src->segmentation_map;
89 dst->mv = src->mv;
90
91 return 0;
92 fail:
93 av_buffer_unref(&dst->segmentation_map_buf);
94 av_buffer_unref(&dst->mv_buf);
95 return ret;
96 }
97
98 static void vp9_decode_flush(AVCodecContext *avctx)
99 {
100 VP9Context *s = avctx->priv_data;
101 int i;
102
103 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
104 vp9_frame_unref(avctx, &s->frames[i]);
105
106 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
107 ff_thread_release_buffer(avctx, &s->refs[i]);
108 }
109
110 static int update_size(AVCodecContext *avctx, int w, int h)
111 {
112 VP9Context *s = avctx->priv_data;
113 uint8_t *p;
114
115 if (s->above_partition_ctx && w == avctx->width && h == avctx->height)
116 return 0;
117
118 vp9_decode_flush(avctx);
119
120 if (w <= 0 || h <= 0)
121 return AVERROR_INVALIDDATA;
122
123 avctx->width = w;
124 avctx->height = h;
125 s->sb_cols = (w + 63) >> 6;
126 s->sb_rows = (h + 63) >> 6;
127 s->cols = (w + 7) >> 3;
128 s->rows = (h + 7) >> 3;
129
130 #define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
131 av_free(s->above_partition_ctx);
132 p = av_malloc(s->sb_cols *
133 (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
134 if (!p)
135 return AVERROR(ENOMEM);
136 assign(s->above_partition_ctx, uint8_t *, 8);
137 assign(s->above_skip_ctx, uint8_t *, 8);
138 assign(s->above_txfm_ctx, uint8_t *, 8);
139 assign(s->above_mode_ctx, uint8_t *, 16);
140 assign(s->above_y_nnz_ctx, uint8_t *, 16);
141 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
142 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
143 assign(s->intra_pred_data[0], uint8_t *, 64);
144 assign(s->intra_pred_data[1], uint8_t *, 32);
145 assign(s->intra_pred_data[2], uint8_t *, 32);
146 assign(s->above_segpred_ctx, uint8_t *, 8);
147 assign(s->above_intra_ctx, uint8_t *, 8);
148 assign(s->above_comp_ctx, uint8_t *, 8);
149 assign(s->above_ref_ctx, uint8_t *, 8);
150 assign(s->above_filter_ctx, uint8_t *, 8);
151 assign(s->lflvl, VP9Filter *, 1);
152 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
153 #undef assign
154
155 av_freep(&s->b_base);
156 av_freep(&s->block_base);
157 s->b_base = av_malloc(sizeof(*s->b_base));
158 s->block_base = av_mallocz((64 * 64 + 128) * 3);
159 if (!s->b_base || !s->block_base)
160 return AVERROR(ENOMEM);
161
162 s->uvblock_base[0] = s->block_base + 64 * 64;
163 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
164 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
165 s->uveob_base[0] = s->eob_base + 256;
166 s->uveob_base[1] = s->uveob_base[0] + 64;
167
168 return 0;
169 }
170
171 // The sign bit is at the end, not the start, of a bit sequence
172 static av_always_inline int get_bits_with_sign(GetBitContext *gb, int n)
173 {
174 int v = get_bits(gb, n);
175 return get_bits1(gb) ? -v : v;
176 }
177
178 static av_always_inline int inv_recenter_nonneg(int v, int m)
179 {
180 if (v > 2 * m)
181 return v;
182 if (v & 1)
183 return m - ((v + 1) >> 1);
184 return m + (v >> 1);
185 }
186
187 // differential forward probability updates
188 static int update_prob(VP56RangeCoder *c, int p)
189 {
190 static const int inv_map_table[MAX_PROB - 1] = {
191 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
192 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
193 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
194 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
195 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
196 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
197 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
198 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
199 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
200 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
201 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
202 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
203 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
204 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
205 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
206 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
207 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
208 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
209 252, 253,
210 };
211 int d;
212
213 /* This code is trying to do a differential probability update. For a
214 * current probability A in the range [1, 255], the difference to a new
215 * probability of any value can be expressed differentially as 1-A, 255-A
216 * where some part of this (absolute range) exists both in positive as
217 * well as the negative part, whereas another part only exists in one
218 * half. We're trying to code this shared part differentially, i.e.
219 * times two where the value of the lowest bit specifies the sign, and
220 * the single part is then coded on top of this. This absolute difference
221 * then again has a value of [0, 254], but a bigger value in this range
222 * indicates that we're further away from the original value A, so we
223 * can code this as a VLC code, since higher values are increasingly
224 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
225 * updates vs. the 'fine, exact' updates further down the range, which
226 * adds one extra dimension to this differential update model. */
227
228 if (!vp8_rac_get(c)) {
229 d = vp8_rac_get_uint(c, 4) + 0;
230 } else if (!vp8_rac_get(c)) {
231 d = vp8_rac_get_uint(c, 4) + 16;
232 } else if (!vp8_rac_get(c)) {
233 d = vp8_rac_get_uint(c, 5) + 32;
234 } else {
235 d = vp8_rac_get_uint(c, 7);
236 if (d >= 65) {
237 d = (d << 1) - 65 + vp8_rac_get(c);
238 d = av_clip(d, 0, MAX_PROB - 65 - 1);
239 }
240 d += 64;
241 }
242
243 return p <= 128
244 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1)
245 : 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
246 }
247
248 static int decode_frame_header(AVCodecContext *avctx,
249 const uint8_t *data, int size, int *ref)
250 {
251 VP9Context *s = avctx->priv_data;
252 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
253 int last_invisible;
254 const uint8_t *data2;
255
256 /* general header */
257 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
258 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
259 return ret;
260 }
261 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
262 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
263 return AVERROR_INVALIDDATA;
264 }
265 s->profile = get_bits1(&s->gb);
266 if (get_bits1(&s->gb)) { // reserved bit
267 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
268 return AVERROR_INVALIDDATA;
269 }
270 if (get_bits1(&s->gb)) {
271 *ref = get_bits(&s->gb, 3);
272 return 0;
273 }
274
275 s->last_keyframe = s->keyframe;
276 s->keyframe = !get_bits1(&s->gb);
277
278 last_invisible = s->invisible;
279 s->invisible = !get_bits1(&s->gb);
280 s->errorres = get_bits1(&s->gb);
281 // FIXME disable this upon resolution change
282 s->use_last_frame_mvs = !s->errorres && !last_invisible;
283
284 if (s->keyframe) {
285 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
286 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
287 return AVERROR_INVALIDDATA;
288 }
289 s->colorspace = get_bits(&s->gb, 3);
290 if (s->colorspace == 7) { // RGB = profile 1
291 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
292 return AVERROR_INVALIDDATA;
293 }
294 s->fullrange = get_bits1(&s->gb);
295
296 // subsampling bits
297 if (s->profile == 1 || s->profile == 3) {
298 s->sub_x = get_bits1(&s->gb);
299 s->sub_y = get_bits1(&s->gb);
300 if (s->sub_x && s->sub_y) {
301 av_log(avctx, AV_LOG_ERROR,
302 "4:2:0 color not supported in profile 1 or 3\n");
303 return AVERROR_INVALIDDATA;
304 }
305 if (get_bits1(&s->gb)) { // reserved bit
306 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
307 return AVERROR_INVALIDDATA;
308 }
309 } else {
310 s->sub_x = s->sub_y = 1;
311 }
312 if (!s->sub_x || !s->sub_y) {
313 avpriv_report_missing_feature(avctx, "Subsampling %d:%d",
314 s->sub_x, s->sub_y);
315 return AVERROR_PATCHWELCOME;
316 }
317
318 s->refreshrefmask = 0xff;
319 w = get_bits(&s->gb, 16) + 1;
320 h = get_bits(&s->gb, 16) + 1;
321 if (get_bits1(&s->gb)) // display size
322 skip_bits(&s->gb, 32);
323 } else {
324 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
325 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
326 if (s->intraonly) {
327 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
328 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
329 return AVERROR_INVALIDDATA;
330 }
331 s->refreshrefmask = get_bits(&s->gb, 8);
332 w = get_bits(&s->gb, 16) + 1;
333 h = get_bits(&s->gb, 16) + 1;
334 if (get_bits1(&s->gb)) // display size
335 skip_bits(&s->gb, 32);
336 } else {
337 s->refreshrefmask = get_bits(&s->gb, 8);
338 s->refidx[0] = get_bits(&s->gb, 3);
339 s->signbias[0] = get_bits1(&s->gb);
340 s->refidx[1] = get_bits(&s->gb, 3);
341 s->signbias[1] = get_bits1(&s->gb);
342 s->refidx[2] = get_bits(&s->gb, 3);
343 s->signbias[2] = get_bits1(&s->gb);
344 if (!s->refs[s->refidx[0]].f->buf[0] ||
345 !s->refs[s->refidx[1]].f->buf[0] ||
346 !s->refs[s->refidx[2]].f->buf[0]) {
347 av_log(avctx, AV_LOG_ERROR,
348 "Not all references are available\n");
349 return AVERROR_INVALIDDATA;
350 }
351 if (get_bits1(&s->gb)) {
352 w = s->refs[s->refidx[0]].f->width;
353 h = s->refs[s->refidx[0]].f->height;
354 } else if (get_bits1(&s->gb)) {
355 w = s->refs[s->refidx[1]].f->width;
356 h = s->refs[s->refidx[1]].f->height;
357 } else if (get_bits1(&s->gb)) {
358 w = s->refs[s->refidx[2]].f->width;
359 h = s->refs[s->refidx[2]].f->height;
360 } else {
361 w = get_bits(&s->gb, 16) + 1;
362 h = get_bits(&s->gb, 16) + 1;
363 }
364 if (get_bits1(&s->gb)) // display size
365 skip_bits(&s->gb, 32);
366 s->highprecisionmvs = get_bits1(&s->gb);
367 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
368 get_bits(&s->gb, 2);
369 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
370 s->signbias[0] != s->signbias[2];
371 if (s->allowcompinter) {
372 if (s->signbias[0] == s->signbias[1]) {
373 s->fixcompref = 2;
374 s->varcompref[0] = 0;
375 s->varcompref[1] = 1;
376 } else if (s->signbias[0] == s->signbias[2]) {
377 s->fixcompref = 1;
378 s->varcompref[0] = 0;
379 s->varcompref[1] = 2;
380 } else {
381 s->fixcompref = 0;
382 s->varcompref[0] = 1;
383 s->varcompref[1] = 2;
384 }
385 }
386 }
387 }
388
389 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
390 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
391 s->framectxid = c = get_bits(&s->gb, 2);
392
393 /* loopfilter header data */
394 s->filter.level = get_bits(&s->gb, 6);
395 sharp = get_bits(&s->gb, 3);
396 /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
397 * keep the old cache values since they are still valid. */
398 if (s->filter.sharpness != sharp)
399 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
400 s->filter.sharpness = sharp;
401 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
402 if (get_bits1(&s->gb)) {
403 for (i = 0; i < 4; i++)
404 if (get_bits1(&s->gb))
405 s->lf_delta.ref[i] = get_bits_with_sign(&s->gb, 6);
406 for (i = 0; i < 2; i++)
407 if (get_bits1(&s->gb))
408 s->lf_delta.mode[i] = get_bits_with_sign(&s->gb, 6);
409 }
410 } else {
411 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
412 }
413
414 /* quantization header data */
415 s->yac_qi = get_bits(&s->gb, 8);
416 s->ydc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
417 s->uvdc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
418 s->uvac_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
419 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
420 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
421
422 /* segmentation header info */
423 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
424 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
425 for (i = 0; i < 7; i++)
426 s->prob.seg[i] = get_bits1(&s->gb) ?
427 get_bits(&s->gb, 8) : 255;
428 if ((s->segmentation.temporal = get_bits1(&s->gb)))
429 for (i = 0; i < 3; i++)
430 s->prob.segpred[i] = get_bits1(&s->gb) ?
431 get_bits(&s->gb, 8) : 255;
432 }
433
434 if (get_bits1(&s->gb)) {
435 s->segmentation.absolute_vals = get_bits1(&s->gb);
436 for (i = 0; i < 8; i++) {
437 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
438 s->segmentation.feat[i].q_val = get_bits_with_sign(&s->gb, 8);
439 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
440 s->segmentation.feat[i].lf_val = get_bits_with_sign(&s->gb, 6);
441 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
442 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
443 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
444 }
445 }
446 } else {
447 s->segmentation.feat[0].q_enabled = 0;
448 s->segmentation.feat[0].lf_enabled = 0;
449 s->segmentation.feat[0].skip_enabled = 0;
450 s->segmentation.feat[0].ref_enabled = 0;
451 }
452
453 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
454 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
455 int qyac, qydc, quvac, quvdc, lflvl, sh;
456
457 if (s->segmentation.feat[i].q_enabled) {
458 if (s->segmentation.absolute_vals)
459 qyac = s->segmentation.feat[i].q_val;
460 else
461 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
462 } else {
463 qyac = s->yac_qi;
464 }
465 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
466 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
467 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
468 qyac = av_clip_uintp2(qyac, 8);
469
470 s->segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[qydc];
471 s->segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[qyac];
472 s->segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[quvdc];
473 s->segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[quvac];
474
475 sh = s->filter.level >= 32;
476 if (s->segmentation.feat[i].lf_enabled) {
477 if (s->segmentation.absolute_vals)
478 lflvl = s->segmentation.feat[i].lf_val;
479 else
480 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
481 } else {
482 lflvl = s->filter.level;
483 }
484 s->segmentation.feat[i].lflvl[0][0] =
485 s->segmentation.feat[i].lflvl[0][1] =
486 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
487 for (j = 1; j < 4; j++) {
488 s->segmentation.feat[i].lflvl[j][0] =
489 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
490 s->lf_delta.mode[0]) << sh), 6);
491 s->segmentation.feat[i].lflvl[j][1] =
492 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
493 s->lf_delta.mode[1]) << sh), 6);
494 }
495 }
496
497 /* tiling info */
498 if ((ret = update_size(avctx, w, h)) < 0) {
499 av_log(avctx, AV_LOG_ERROR,
500 "Failed to initialize decoder for %dx%d\n", w, h);
501 return ret;
502 }
503 for (s->tiling.log2_tile_cols = 0;
504 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
505 s->tiling.log2_tile_cols++) ;
506 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
507 max = FFMAX(0, max - 1);
508 while (max > s->tiling.log2_tile_cols) {
509 if (get_bits1(&s->gb))
510 s->tiling.log2_tile_cols++;
511 else
512 break;
513 }
514 s->tiling.log2_tile_rows = decode012(&s->gb);
515 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
516 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
517 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
518 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
519 sizeof(VP56RangeCoder) *
520 s->tiling.tile_cols);
521 if (!s->c_b) {
522 av_log(avctx, AV_LOG_ERROR,
523 "Ran out of memory during range coder init\n");
524 return AVERROR(ENOMEM);
525 }
526 }
527
528 if (s->keyframe || s->errorres || s->intraonly) {
529 s->prob_ctx[0].p =
530 s->prob_ctx[1].p =
531 s->prob_ctx[2].p =
532 s->prob_ctx[3].p = ff_vp9_default_probs;
533 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
534 sizeof(ff_vp9_default_coef_probs));
535 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
536 sizeof(ff_vp9_default_coef_probs));
537 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
538 sizeof(ff_vp9_default_coef_probs));
539 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
540 sizeof(ff_vp9_default_coef_probs));
541 }
542
543 // next 16 bits is size of the rest of the header (arith-coded)
544 size2 = get_bits(&s->gb, 16);
545 data2 = align_get_bits(&s->gb);
546 if (size2 > size - (data2 - data)) {
547 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
548 return AVERROR_INVALIDDATA;
549 }
550 ff_vp56_init_range_decoder(&s->c, data2, size2);
551 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
552 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
553 return AVERROR_INVALIDDATA;
554 }
555
556 if (s->keyframe || s->intraonly)
557 memset(s->counts.coef, 0,
558 sizeof(s->counts.coef) + sizeof(s->counts.eob));
559 else
560 memset(&s->counts, 0, sizeof(s->counts));
561
562 /* FIXME is it faster to not copy here, but do it down in the fw updates
563 * as explicit copies if the fw update is missing (and skip the copy upon
564 * fw update)? */
565 s->prob.p = s->prob_ctx[c].p;
566
567 // txfm updates
568 if (s->lossless) {
569 s->txfmmode = TX_4X4;
570 } else {
571 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
572 if (s->txfmmode == 3)
573 s->txfmmode += vp8_rac_get(&s->c);
574
575 if (s->txfmmode == TX_SWITCHABLE) {
576 for (i = 0; i < 2; i++)
577 if (vp56_rac_get_prob_branchy(&s->c, 252))
578 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
579 for (i = 0; i < 2; i++)
580 for (j = 0; j < 2; j++)
581 if (vp56_rac_get_prob_branchy(&s->c, 252))
582 s->prob.p.tx16p[i][j] =
583 update_prob(&s->c, s->prob.p.tx16p[i][j]);
584 for (i = 0; i < 2; i++)
585 for (j = 0; j < 3; j++)
586 if (vp56_rac_get_prob_branchy(&s->c, 252))
587 s->prob.p.tx32p[i][j] =
588 update_prob(&s->c, s->prob.p.tx32p[i][j]);
589 }
590 }
591
592 // coef updates
593 for (i = 0; i < 4; i++) {
594 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
595 if (vp8_rac_get(&s->c)) {
596 for (j = 0; j < 2; j++)
597 for (k = 0; k < 2; k++)
598 for (l = 0; l < 6; l++)
599 for (m = 0; m < 6; m++) {
600 uint8_t *p = s->prob.coef[i][j][k][l][m];
601 uint8_t *r = ref[j][k][l][m];
602 if (m >= 3 && l == 0) // dc only has 3 pt
603 break;
604 for (n = 0; n < 3; n++) {
605 if (vp56_rac_get_prob_branchy(&s->c, 252))
606 p[n] = update_prob(&s->c, r[n]);
607 else
608 p[n] = r[n];
609 }
610 p[3] = 0;
611 }
612 } else {
613 for (j = 0; j < 2; j++)
614 for (k = 0; k < 2; k++)
615 for (l = 0; l < 6; l++)
616 for (m = 0; m < 6; m++) {
617 uint8_t *p = s->prob.coef[i][j][k][l][m];
618 uint8_t *r = ref[j][k][l][m];
619 if (m > 3 && l == 0) // dc only has 3 pt
620 break;
621 memcpy(p, r, 3);
622 p[3] = 0;
623 }
624 }
625 if (s->txfmmode == i)
626 break;
627 }
628
629 // mode updates
630 for (i = 0; i < 3; i++)
631 if (vp56_rac_get_prob_branchy(&s->c, 252))
632 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
633 if (!s->keyframe && !s->intraonly) {
634 for (i = 0; i < 7; i++)
635 for (j = 0; j < 3; j++)
636 if (vp56_rac_get_prob_branchy(&s->c, 252))
637 s->prob.p.mv_mode[i][j] =
638 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
639
640 if (s->filtermode == FILTER_SWITCHABLE)
641 for (i = 0; i < 4; i++)
642 for (j = 0; j < 2; j++)
643 if (vp56_rac_get_prob_branchy(&s->c, 252))
644 s->prob.p.filter[i][j] =
645 update_prob(&s->c, s->prob.p.filter[i][j]);
646
647 for (i = 0; i < 4; i++)
648 if (vp56_rac_get_prob_branchy(&s->c, 252))
649 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
650
651 if (s->allowcompinter) {
652 s->comppredmode = vp8_rac_get(&s->c);
653 if (s->comppredmode)
654 s->comppredmode += vp8_rac_get(&s->c);
655 if (s->comppredmode == PRED_SWITCHABLE)
656 for (i = 0; i < 5; i++)
657 if (vp56_rac_get_prob_branchy(&s->c, 252))
658 s->prob.p.comp[i] =
659 update_prob(&s->c, s->prob.p.comp[i]);
660 } else {
661 s->comppredmode = PRED_SINGLEREF;
662 }
663
664 if (s->comppredmode != PRED_COMPREF) {
665 for (i = 0; i < 5; i++) {
666 if (vp56_rac_get_prob_branchy(&s->c, 252))
667 s->prob.p.single_ref[i][0] =
668 update_prob(&s->c, s->prob.p.single_ref[i][0]);
669 if (vp56_rac_get_prob_branchy(&s->c, 252))
670 s->prob.p.single_ref[i][1] =
671 update_prob(&s->c, s->prob.p.single_ref[i][1]);
672 }
673 }
674
675 if (s->comppredmode != PRED_SINGLEREF) {
676 for (i = 0; i < 5; i++)
677 if (vp56_rac_get_prob_branchy(&s->c, 252))
678 s->prob.p.comp_ref[i] =
679 update_prob(&s->c, s->prob.p.comp_ref[i]);
680 }
681
682 for (i = 0; i < 4; i++)
683 for (j = 0; j < 9; j++)
684 if (vp56_rac_get_prob_branchy(&s->c, 252))
685 s->prob.p.y_mode[i][j] =
686 update_prob(&s->c, s->prob.p.y_mode[i][j]);
687
688 for (i = 0; i < 4; i++)
689 for (j = 0; j < 4; j++)
690 for (k = 0; k < 3; k++)
691 if (vp56_rac_get_prob_branchy(&s->c, 252))
692 s->prob.p.partition[3 - i][j][k] =
693 update_prob(&s->c,
694 s->prob.p.partition[3 - i][j][k]);
695
696 // mv fields don't use the update_prob subexp model for some reason
697 for (i = 0; i < 3; i++)
698 if (vp56_rac_get_prob_branchy(&s->c, 252))
699 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
700
701 for (i = 0; i < 2; i++) {
702 if (vp56_rac_get_prob_branchy(&s->c, 252))
703 s->prob.p.mv_comp[i].sign =
704 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
705
706 for (j = 0; j < 10; j++)
707 if (vp56_rac_get_prob_branchy(&s->c, 252))
708 s->prob.p.mv_comp[i].classes[j] =
709 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
710
711 if (vp56_rac_get_prob_branchy(&s->c, 252))
712 s->prob.p.mv_comp[i].class0 =
713 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
714
715 for (j = 0; j < 10; j++)
716 if (vp56_rac_get_prob_branchy(&s->c, 252))
717 s->prob.p.mv_comp[i].bits[j] =
718 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
719 }
720
721 for (i = 0; i < 2; i++) {
722 for (j = 0; j < 2; j++)
723 for (k = 0; k < 3; k++)
724 if (vp56_rac_get_prob_branchy(&s->c, 252))
725 s->prob.p.mv_comp[i].class0_fp[j][k] =
726 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
727
728 for (j = 0; j < 3; j++)
729 if (vp56_rac_get_prob_branchy(&s->c, 252))
730 s->prob.p.mv_comp[i].fp[j] =
731 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
732 }
733
734 if (s->highprecisionmvs) {
735 for (i = 0; i < 2; i++) {
736 if (vp56_rac_get_prob_branchy(&s->c, 252))
737 s->prob.p.mv_comp[i].class0_hp =
738 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
739
740 if (vp56_rac_get_prob_branchy(&s->c, 252))
741 s->prob.p.mv_comp[i].hp =
742 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
743 }
744 }
745 }
746
747 return (data2 - data) + size2;
748 }
749
750 static int decode_subblock(AVCodecContext *avctx, int row, int col,
751 VP9Filter *lflvl,
752 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
753 {
754 VP9Context *s = avctx->priv_data;
755 AVFrame *f = s->frames[CUR_FRAME].tf.f;
756 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
757 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
758 int ret;
759 const uint8_t *p = s->keyframe ? ff_vp9_default_kf_partition_probs[bl][c]
760 : s->prob.p.partition[bl][c];
761 enum BlockPartition bp;
762 ptrdiff_t hbs = 4 >> bl;
763
764 if (bl == BL_8X8) {
765 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
766 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
767 } else if (col + hbs < s->cols) {
768 if (row + hbs < s->rows) {
769 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
770 switch (bp) {
771 case PARTITION_NONE:
772 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
773 bl, bp);
774 break;
775 case PARTITION_H:
776 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
777 bl, bp);
778 if (!ret) {
779 yoff += hbs * 8 * f->linesize[0];
780 uvoff += hbs * 4 * f->linesize[1];
781 ret = ff_vp9_decode_block(avctx, row + hbs, col, lflvl,
782 yoff, uvoff, bl, bp);
783 }
784 break;
785 case PARTITION_V:
786 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
787 bl, bp);
788 if (!ret) {
789 yoff += hbs * 8;
790 uvoff += hbs * 4;
791 ret = ff_vp9_decode_block(avctx, row, col + hbs, lflvl,
792 yoff, uvoff, bl, bp);
793 }
794 break;
795 case PARTITION_SPLIT:
796 ret = decode_subblock(avctx, row, col, lflvl,
797 yoff, uvoff, bl + 1);
798 if (!ret) {
799 ret = decode_subblock(avctx, row, col + hbs, lflvl,
800 yoff + 8 * hbs, uvoff + 4 * hbs,
801 bl + 1);
802 if (!ret) {
803 yoff += hbs * 8 * f->linesize[0];
804 uvoff += hbs * 4 * f->linesize[1];
805 ret = decode_subblock(avctx, row + hbs, col, lflvl,
806 yoff, uvoff, bl + 1);
807 if (!ret) {
808 ret = decode_subblock(avctx, row + hbs, col + hbs,
809 lflvl, yoff + 8 * hbs,
810 uvoff + 4 * hbs, bl + 1);
811 }
812 }
813 }
814 break;
815 default:
816 av_log(avctx, AV_LOG_ERROR, "Unexpected partition %d.", bp);
817 return AVERROR_INVALIDDATA;
818 }
819 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
820 bp = PARTITION_SPLIT;
821 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
822 if (!ret)
823 ret = decode_subblock(avctx, row, col + hbs, lflvl,
824 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
825 } else {
826 bp = PARTITION_H;
827 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
828 bl, bp);
829 }
830 } else if (row + hbs < s->rows) {
831 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
832 bp = PARTITION_SPLIT;
833 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
834 if (!ret) {
835 yoff += hbs * 8 * f->linesize[0];
836 uvoff += hbs * 4 * f->linesize[1];
837 ret = decode_subblock(avctx, row + hbs, col, lflvl,
838 yoff, uvoff, bl + 1);
839 }
840 } else {
841 bp = PARTITION_V;
842 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
843 bl, bp);
844 }
845 } else {
846 bp = PARTITION_SPLIT;
847 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
848 }
849 s->counts.partition[bl][c][bp]++;
850
851 return ret;
852 }
853
854 static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
855 int row, int col,
856 ptrdiff_t yoff, ptrdiff_t uvoff)
857 {
858 VP9Context *s = avctx->priv_data;
859 AVFrame *f = s->frames[CUR_FRAME].tf.f;
860 uint8_t *dst = f->data[0] + yoff;
861 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
862 uint8_t *lvl = lflvl->level;
863 int y, x, p;
864
865 /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
866 * if you think of them as acting on a 8x8 block max, we can interleave
867 * each v/h within the single x loop, but that only works if we work on
868 * 8 pixel blocks, and we won't always do that (we want at least 16px
869 * to use SSE2 optimizations, perhaps 32 for AVX2). */
870
871 // filter edges between columns, Y plane (e.g. block1 | block2)
872 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
873 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
874 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
875 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
876 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
877 unsigned hm = hm1 | hm2 | hm13 | hm23;
878
879 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
880 if (hm1 & x) {
881 int L = *l, H = L >> 4;
882 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
883
884 if (col || x > 1) {
885 if (hmask1[0] & x) {
886 if (hmask2[0] & x) {
887 av_assert2(l[8] == L);
888 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
889 } else {
890 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
891 }
892 } else if (hm2 & x) {
893 L = l[8];
894 H |= (L >> 4) << 8;
895 E |= s->filter.mblim_lut[L] << 8;
896 I |= s->filter.lim_lut[L] << 8;
897 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
898 [!!(hmask2[1] & x)]
899 [0](ptr, ls_y, E, I, H);
900 } else {
901 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
902 [0](ptr, ls_y, E, I, H);
903 }
904 }
905 } else if (hm2 & x) {
906 int L = l[8], H = L >> 4;
907 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
908
909 if (col || x > 1) {
910 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
911 [0](ptr + 8 * ls_y, ls_y, E, I, H);
912 }
913 }
914 if (hm13 & x) {
915 int L = *l, H = L >> 4;
916 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
917
918 if (hm23 & x) {
919 L = l[8];
920 H |= (L >> 4) << 8;
921 E |= s->filter.mblim_lut[L] << 8;
922 I |= s->filter.lim_lut[L] << 8;
923 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
924 } else {
925 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
926 }
927 } else if (hm23 & x) {
928 int L = l[8], H = L >> 4;
929 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
930
931 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
932 }
933 }
934 }
935
936 // block1
937 // filter edges between rows, Y plane (e.g. ------)
938 // block2
939 dst = f->data[0] + yoff;
940 lvl = lflvl->level;
941 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
942 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
943 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
944
945 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
946 if (row || y) {
947 if (vm & x) {
948 int L = *l, H = L >> 4;
949 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
950
951 if (vmask[0] & x) {
952 if (vmask[0] & (x << 1)) {
953 av_assert2(l[1] == L);
954 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
955 } else {
956 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
957 }
958 } else if (vm & (x << 1)) {
959 L = l[1];
960 H |= (L >> 4) << 8;
961 E |= s->filter.mblim_lut[L] << 8;
962 I |= s->filter.lim_lut[L] << 8;
963 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
964 [!!(vmask[1] & (x << 1))]
965 [1](ptr, ls_y, E, I, H);
966 } else {
967 s->dsp.loop_filter_8[!!(vmask[1] & x)]
968 [1](ptr, ls_y, E, I, H);
969 }
970 } else if (vm & (x << 1)) {
971 int L = l[1], H = L >> 4;
972 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
973
974 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
975 [1](ptr + 8, ls_y, E, I, H);
976 }
977 }
978 if (vm3 & x) {
979 int L = *l, H = L >> 4;
980 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
981
982 if (vm3 & (x << 1)) {
983 L = l[1];
984 H |= (L >> 4) << 8;
985 E |= s->filter.mblim_lut[L] << 8;
986 I |= s->filter.lim_lut[L] << 8;
987 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
988 } else {
989 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
990 }
991 } else if (vm3 & (x << 1)) {
992 int L = l[1], H = L >> 4;
993 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
994
995 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
996 }
997 }
998 }
999
1000 // same principle but for U/V planes
1001 for (p = 0; p < 2; p++) {
1002 lvl = lflvl->level;
1003 dst = f->data[1 + p] + uvoff;
1004 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
1005 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
1006 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
1007 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
1008 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
1009
1010 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
1011 if (col || x > 1) {
1012 if (hm1 & x) {
1013 int L = *l, H = L >> 4;
1014 int E = s->filter.mblim_lut[L];
1015 int I = s->filter.lim_lut[L];
1016
1017 if (hmask1[0] & x) {
1018 if (hmask2[0] & x) {
1019 av_assert2(l[16] == L);
1020 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
1021 } else {
1022 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
1023 }
1024 } else if (hm2 & x) {
1025 L = l[16];
1026 H |= (L >> 4) << 8;
1027 E |= s->filter.mblim_lut[L] << 8;
1028 I |= s->filter.lim_lut[L] << 8;
1029 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
1030 [!!(hmask2[1] & x)]
1031 [0](ptr, ls_uv, E, I, H);
1032 } else {
1033 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
1034 [0](ptr, ls_uv, E, I, H);
1035 }
1036 } else if (hm2 & x) {
1037 int L = l[16], H = L >> 4;
1038 int E = s->filter.mblim_lut[L];
1039 int I = s->filter.lim_lut[L];
1040
1041 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
1042 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
1043 }
1044 }
1045 if (x & 0xAA)
1046 l += 2;
1047 }
1048 }
1049 lvl = lflvl->level;
1050 dst = f->data[1 + p] + uvoff;
1051 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
1052 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
1053 unsigned vm = vmask[0] | vmask[1] | vmask[2];
1054
1055 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
1056 if (row || y) {
1057 if (vm & x) {
1058 int L = *l, H = L >> 4;
1059 int E = s->filter.mblim_lut[L];
1060 int I = s->filter.lim_lut[L];
1061
1062 if (vmask[0] & x) {
1063 if (vmask[0] & (x << 2)) {
1064 av_assert2(l[2] == L);
1065 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
1066 } else {
1067 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
1068 }
1069 } else if (vm & (x << 2)) {
1070 L = l[2];
1071 H |= (L >> 4) << 8;
1072 E |= s->filter.mblim_lut[L] << 8;
1073 I |= s->filter.lim_lut[L] << 8;
1074 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
1075 [!!(vmask[1] & (x << 2))]
1076 [1](ptr, ls_uv, E, I, H);
1077 } else {
1078 s->dsp.loop_filter_8[!!(vmask[1] & x)]
1079 [1](ptr, ls_uv, E, I, H);
1080 }
1081 } else if (vm & (x << 2)) {
1082 int L = l[2], H = L >> 4;
1083 int E = s->filter.mblim_lut[L];
1084 int I = s->filter.lim_lut[L];
1085
1086 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
1087 [1](ptr + 8, ls_uv, E, I, H);
1088 }
1089 }
1090 }
1091 if (y & 1)
1092 lvl += 16;
1093 }
1094 }
1095 }
1096
1097 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1098 {
1099 int sb_start = (idx * n) >> log2_n;
1100 int sb_end = ((idx + 1) * n) >> log2_n;
1101 *start = FFMIN(sb_start, n) << 3;
1102 *end = FFMIN(sb_end, n) << 3;
1103 }
1104
1105 static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1106 int *got_frame, const uint8_t *data, int size)
1107 {
1108 VP9Context *s = avctx->priv_data;
1109 AVFrame *f;
1110 int ret, tile_row, tile_col, i, ref = -1, row, col;
1111 ptrdiff_t yoff = 0, uvoff = 0;
1112
1113 ret = decode_frame_header(avctx, data, size, &ref);
1114 if (ret < 0) {
1115 return ret;
1116 } else if (!ret) {
1117 if (!s->refs[ref].f->buf[0]) {
1118 av_log(avctx, AV_LOG_ERROR,
1119 "Requested reference %d not available\n", ref);
1120 return AVERROR_INVALIDDATA;
1121 }
1122
1123 ret = av_frame_ref(frame, s->refs[ref].f);
1124 if (ret < 0)
1125 return ret;
1126 *got_frame = 1;
1127 return 0;
1128 }
1129 data += ret;
1130 size -= ret;
1131
1132 vp9_frame_unref(avctx, &s->frames[LAST_FRAME]);
1133 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->buf[0]) {
1134 ret = vp9_frame_ref(&s->frames[LAST_FRAME], &s->frames[CUR_FRAME]);
1135 if (ret < 0)
1136 return ret;
1137 }
1138
1139 vp9_frame_unref(avctx, &s->frames[CUR_FRAME]);
1140 ret = vp9_frame_alloc(avctx, &s->frames[CUR_FRAME]);
1141 if (ret < 0)
1142 return ret;
1143
1144 f = s->frames[CUR_FRAME].tf.f;
1145 f->key_frame = s->keyframe;
1146 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1147
1148 if (s->fullrange)
1149 avctx->color_range = AVCOL_RANGE_JPEG;
1150 else
1151 avctx->color_range = AVCOL_RANGE_MPEG;
1152
1153 switch (s->colorspace) {
1154 case 1: avctx->colorspace = AVCOL_SPC_BT470BG; break;
1155 case 2: avctx->colorspace = AVCOL_SPC_BT709; break;
1156 case 3: avctx->colorspace = AVCOL_SPC_SMPTE170M; break;
1157 case 4: avctx->colorspace = AVCOL_SPC_SMPTE240M; break;
1158 }
1159
1160 // main tile decode loop
1161 memset(s->above_partition_ctx, 0, s->cols);
1162 memset(s->above_skip_ctx, 0, s->cols);
1163 if (s->keyframe || s->intraonly)
1164 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1165 else
1166 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1167 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1168 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
1169 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
1170 memset(s->above_segpred_ctx, 0, s->cols);
1171
1172 s->b = s->b_base;
1173 s->block = s->block_base;
1174 s->uvblock[0] = s->uvblock_base[0];
1175 s->uvblock[1] = s->uvblock_base[1];
1176 s->eob = s->eob_base;
1177 s->uveob[0] = s->uveob_base[0];
1178 s->uveob[1] = s->uveob_base[1];
1179
1180 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
1181 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
1182 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
1183 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1184 int64_t tile_size;
1185
1186 if (tile_col == s->tiling.tile_cols - 1 &&
1187 tile_row == s->tiling.tile_rows - 1) {
1188 tile_size = size;
1189 } else {
1190 tile_size = AV_RB32(data);
1191 data += 4;
1192 size -= 4;
1193 }
1194 if (tile_size > size)
1195 return AVERROR_INVALIDDATA;
1196 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1197 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
1198 return AVERROR_INVALIDDATA;
1199 data += tile_size;
1200 size -= tile_size;
1201 }
1202
1203 for (row = s->tiling.tile_row_start;
1204 row < s->tiling.tile_row_end;
1205 row += 8, yoff += f->linesize[0] * 64,
1206 uvoff += f->linesize[1] * 32) {
1207 VP9Filter *lflvl = s->lflvl;
1208 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1209
1210 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1211 set_tile_offset(&s->tiling.tile_col_start,
1212 &s->tiling.tile_col_end,
1213 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
1214
1215 memset(s->left_partition_ctx, 0, 8);
1216 memset(s->left_skip_ctx, 0, 8);
1217 if (s->keyframe || s->intraonly)
1218 memset(s->left_mode_ctx, DC_PRED, 16);
1219 else
1220 memset(s->left_mode_ctx, NEARESTMV, 8);
1221 memset(s->left_y_nnz_ctx, 0, 16);
1222 memset(s->left_uv_nnz_ctx, 0, 16);
1223 memset(s->left_segpred_ctx, 0, 8);
1224
1225 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1226 for (col = s->tiling.tile_col_start;
1227 col < s->tiling.tile_col_end;
1228 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
1229 // FIXME integrate with lf code (i.e. zero after each
1230 // use, similar to invtxfm coefficients, or similar)
1231 memset(lflvl->mask, 0, sizeof(lflvl->mask));
1232
1233 if ((ret = decode_subblock(avctx, row, col, lflvl,
1234 yoff2, uvoff2, BL_64X64)) < 0)
1235 return ret;
1236 }
1237 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1238 }
1239
1240 // backup pre-loopfilter reconstruction data for intra
1241 // prediction of next row of sb64s
1242 if (row + 8 < s->rows) {
1243 memcpy(s->intra_pred_data[0],
1244 f->data[0] + yoff +
1245 63 * f->linesize[0],
1246 8 * s->cols);
1247 memcpy(s->intra_pred_data[1],
1248 f->data[1] + uvoff +
1249 31 * f->linesize[1],
1250 4 * s->cols);
1251 memcpy(s->intra_pred_data[2],
1252 f->data[2] + uvoff +
1253 31 * f->linesize[2],
1254 4 * s->cols);
1255 }
1256
1257 // loopfilter one row
1258 if (s->filter.level) {
1259 yoff2 = yoff;
1260 uvoff2 = uvoff;
1261 lflvl = s->lflvl;
1262 for (col = 0; col < s->cols;
1263 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
1264 loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
1265 }
1266 }
1267 }
1268
1269 // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
1270 // probability maintenance between frames)
1271 if (s->refreshctx) {
1272 if (s->parallelmode) {
1273 int j, k, l, m;
1274 for (i = 0; i < 4; i++) {
1275 for (j = 0; j < 2; j++)
1276 for (k = 0; k < 2; k++)
1277 for (l = 0; l < 6; l++)
1278 for (m = 0; m < 6; m++)
1279 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
1280 s->prob.coef[i][j][k][l][m], 3);
1281 if (s->txfmmode == i)
1282 break;
1283 }
1284 s->prob_ctx[s->framectxid].p = s->prob.p;
1285 } else {
1286 ff_vp9_adapt_probs(s);
1287 }
1288 }
1289
1290 // ref frame setup
1291 for (i = 0; i < 8; i++)
1292 if (s->refreshrefmask & (1 << i)) {
1293 ff_thread_release_buffer(avctx, &s->refs[i]);
1294 ret = ff_thread_ref_frame(&s->refs[i], &s->frames[CUR_FRAME].tf);
1295 if (ret < 0)
1296 return ret;
1297 }
1298
1299 if (!s->invisible) {
1300 av_frame_unref(frame);
1301 ret = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f);
1302 if (ret < 0)
1303 return ret;
1304 *got_frame = 1;
1305 }
1306
1307 return 0;
1308 }
1309
1310 static int vp9_decode_packet(AVCodecContext *avctx, void *frame,
1311 int *got_frame, AVPacket *avpkt)
1312 {
1313 const uint8_t *data = avpkt->data;
1314 int size = avpkt->size;
1315 int marker, ret;
1316
1317 /* Read superframe index - this is a collection of individual frames
1318 * that together lead to one visible frame */
1319 marker = data[size - 1];
1320 if ((marker & 0xe0) == 0xc0) {
1321 int nbytes = 1 + ((marker >> 3) & 0x3);
1322 int n_frames = 1 + (marker & 0x7);
1323 int idx_sz = 2 + n_frames * nbytes;
1324
1325 if (size >= idx_sz && data[size - idx_sz] == marker) {
1326 const uint8_t *idx = data + size + 1 - idx_sz;
1327
1328 while (n_frames--) {
1329 unsigned sz = AV_RL32(idx);
1330
1331 if (nbytes < 4)
1332 sz &= (1 << (8 * nbytes)) - 1;
1333 idx += nbytes;
1334
1335 if (sz > size) {
1336 av_log(avctx, AV_LOG_ERROR,
1337 "Superframe packet size too big: %u > %d\n",
1338 sz, size);
1339 return AVERROR_INVALIDDATA;
1340 }
1341
1342 ret = vp9_decode_frame(avctx, frame, got_frame, data, sz);
1343 if (ret < 0)
1344 return ret;
1345 data += sz;
1346 size -= sz;
1347 }
1348 return avpkt->size;
1349 }
1350 }
1351
1352 /* If we get here, there was no valid superframe index, i.e. this is just
1353 * one whole single frame. Decode it as such from the complete input buf. */
1354 if ((ret = vp9_decode_frame(avctx, frame, got_frame, data, size)) < 0)
1355 return ret;
1356 return size;
1357 }
1358
1359 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1360 {
1361 VP9Context *s = avctx->priv_data;
1362 int i;
1363
1364 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
1365 vp9_frame_unref(avctx, &s->frames[i]);
1366 av_frame_free(&s->frames[i].tf.f);
1367 }
1368
1369 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1370 ff_thread_release_buffer(avctx, &s->refs[i]);
1371 av_frame_free(&s->refs[i].f);
1372 }
1373
1374 av_freep(&s->c_b);
1375 av_freep(&s->above_partition_ctx);
1376 av_freep(&s->b_base);
1377 av_freep(&s->block_base);
1378
1379 return 0;
1380 }
1381
1382 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1383 {
1384 VP9Context *s = avctx->priv_data;
1385 int i;
1386
1387 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
1388
1389 ff_vp9dsp_init(&s->dsp);
1390 ff_videodsp_init(&s->vdsp, 8);
1391
1392 s->frames[0].tf.f = av_frame_alloc();
1393 s->frames[1].tf.f = av_frame_alloc();
1394 if (!s->frames[0].tf.f || !s->frames[1].tf.f)
1395 goto fail;
1396
1397 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1398 s->refs[i].f = av_frame_alloc();
1399 if (!s->refs[i].f)
1400 goto fail;
1401 }
1402
1403 s->filter.sharpness = -1;
1404
1405 return 0;
1406 fail:
1407 vp9_decode_free(avctx);
1408 return AVERROR(ENOMEM);
1409 }
1410
1411 AVCodec ff_vp9_decoder = {
1412 .name = "vp9",
1413 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1414 .type = AVMEDIA_TYPE_VIDEO,
1415 .id = AV_CODEC_ID_VP9,
1416 .priv_data_size = sizeof(VP9Context),
1417 .init = vp9_decode_init,
1418 .decode = vp9_decode_packet,
1419 .flush = vp9_decode_flush,
1420 .close = vp9_decode_free,
1421 .capabilities = AV_CODEC_CAP_DR1,
1422 };