doc: update api changes with the right commit hashes
[libav.git] / libavcodec / vp8.c
CommitLineData
32f3c541 1/*
3b636f21
DC
2 * VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
13a1304b 6 * Copyright (C) 2010 Jason Garrett-Glaser
3b636f21 7 *
2912e87a 8 * This file is part of Libav.
3b636f21 9 *
2912e87a 10 * Libav is free software; you can redistribute it and/or
3b636f21
DC
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
2912e87a 15 * Libav is distributed in the hope that it will be useful,
3b636f21
DC
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
2912e87a 21 * License along with Libav; if not, write to the Free Software
3b636f21
DC
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
737eb597 25#include "libavutil/imgutils.h"
3b636f21 26#include "avcodec.h"
f3a29b75 27#include "internal.h"
bcf4568f 28#include "vp8.h"
3b636f21 29#include "vp8data.h"
3b636f21 30#include "rectangle.h"
4773d904 31#include "thread.h"
3b636f21 32
a7878c9f
MR
33#if ARCH_ARM
34# include "arm/vp8.h"
35#endif
36
56535793
RB
37static void free_buffers(VP8Context *s)
38{
39 av_freep(&s->macroblocks_base);
40 av_freep(&s->filter_strength);
41 av_freep(&s->intra4x4_pred_mode_top);
42 av_freep(&s->top_nnz);
43 av_freep(&s->edge_emu_buffer);
44 av_freep(&s->top_border);
56535793
RB
45
46 s->macroblocks = NULL;
47}
48
ce42a048
RB
49static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
50{
51 int ret;
52 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
53 return ret;
e02dec25 54 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
ce42a048
RB
55 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
56 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
57 ff_thread_release_buffer(s->avctx, f);
58 return AVERROR(ENOMEM);
59 }
60 return 0;
61}
62
bfa0f965 63static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
ce42a048 64{
bfa0f965
RB
65 if (f->ref_index[0]) {
66 if (prefer_delayed_free) {
67 /* Upon a size change, we want to free the maps but other threads may still
68 * be using them, so queue them. Upon a seek, all threads are inactive so
69 * we want to cache one to prevent re-allocation in the next decoding
70 * iteration, but the rest we can free directly. */
71 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
72 if (s->num_maps_to_be_freed < max_queued_maps) {
73 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
74 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
75 av_free(f->ref_index[0]);
76 } /* else: MEMLEAK (should never happen, but better that than crash) */
ce42a048 77 f->ref_index[0] = NULL;
bfa0f965
RB
78 } else /* vp8_decode_free() */ {
79 av_free(f->ref_index[0]);
ce42a048 80 }
ce42a048
RB
81 }
82 ff_thread_release_buffer(s->avctx, f);
83}
84
bfa0f965
RB
85static void vp8_decode_flush_impl(AVCodecContext *avctx,
86 int prefer_delayed_free, int can_direct_free, int free_mem)
3b636f21
DC
87{
88 VP8Context *s = avctx->priv_data;
89 int i;
90
f3a29b75 91 if (!avctx->internal->is_copy) {
4773d904
RB
92 for (i = 0; i < 5; i++)
93 if (s->frames[i].data[0])
bfa0f965 94 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
4773d904 95 }
3b636f21
DC
96 memset(s->framep, 0, sizeof(s->framep));
97
bfa0f965
RB
98 if (free_mem) {
99 free_buffers(s);
100 s->maps_are_invalid = 1;
101 }
ce42a048
RB
102}
103
104static void vp8_decode_flush(AVCodecContext *avctx)
105{
bfa0f965 106 vp8_decode_flush_impl(avctx, 1, 1, 0);
3b636f21
DC
107}
108
109static int update_dimensions(VP8Context *s, int width, int height)
110{
4773d904
RB
111 if (width != s->avctx->width ||
112 height != s->avctx->height) {
113 if (av_image_check_size(width, height, 0, s->avctx))
114 return AVERROR_INVALIDDATA;
3b636f21 115
bfa0f965 116 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
3b636f21 117
4773d904
RB
118 avcodec_set_dimensions(s->avctx, width, height);
119 }
3b636f21
DC
120
121 s->mb_width = (s->avctx->coded_width +15) / 16;
122 s->mb_height = (s->avctx->coded_height+15) / 16;
123
aa93c52c
PM
124 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
125 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
ccf13f9e 126 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
3b636f21 127 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
9ac831c2 128 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
3b636f21 129
d2840fa4 130 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
ce42a048 131 !s->top_nnz || !s->top_border)
b6c420ce
DC
132 return AVERROR(ENOMEM);
133
c55e0d34 134 s->macroblocks = s->macroblocks_base + 1;
3b636f21
DC
135
136 return 0;
137}
138
139static void parse_segment_info(VP8Context *s)
140{
141 VP56RangeCoder *c = &s->c;
142 int i;
143
144 s->segmentation.update_map = vp8_rac_get(c);
145
146 if (vp8_rac_get(c)) { // update segment feature data
147 s->segmentation.absolute_vals = vp8_rac_get(c);
148
149 for (i = 0; i < 4; i++)
150 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
151
152 for (i = 0; i < 4; i++)
153 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
154 }
155 if (s->segmentation.update_map)
156 for (i = 0; i < 3; i++)
157 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
158}
159
160static void update_lf_deltas(VP8Context *s)
161{
162 VP56RangeCoder *c = &s->c;
163 int i;
164
14ba7472
JS
165 for (i = 0; i < 4; i++) {
166 if (vp8_rac_get(c)) {
167 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
3b636f21 168
14ba7472
JS
169 if (vp8_rac_get(c))
170 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
171 }
172 }
173
174 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
175 if (vp8_rac_get(c)) {
176 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
177
178 if (vp8_rac_get(c))
179 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
180 }
181 }
3b636f21
DC
182}
183
184static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
185{
186 const uint8_t *sizes = buf;
187 int i;
188
189 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
190
191 buf += 3*(s->num_coeff_partitions-1);
192 buf_size -= 3*(s->num_coeff_partitions-1);
193 if (buf_size < 0)
194 return -1;
195
196 for (i = 0; i < s->num_coeff_partitions-1; i++) {
06d50ca8 197 int size = AV_RL24(sizes + 3*i);
3b636f21
DC
198 if (buf_size - size < 0)
199 return -1;
200
905ef0d0 201 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
3b636f21
DC
202 buf += size;
203 buf_size -= size;
204 }
905ef0d0 205 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
3b636f21
DC
206
207 return 0;
208}
209
210static void get_quants(VP8Context *s)
211{
212 VP56RangeCoder *c = &s->c;
213 int i, base_qi;
214
215 int yac_qi = vp8_rac_get_uint(c, 7);
216 int ydc_delta = vp8_rac_get_sint(c, 4);
217 int y2dc_delta = vp8_rac_get_sint(c, 4);
218 int y2ac_delta = vp8_rac_get_sint(c, 4);
219 int uvdc_delta = vp8_rac_get_sint(c, 4);
220 int uvac_delta = vp8_rac_get_sint(c, 4);
221
222 for (i = 0; i < 4; i++) {
223 if (s->segmentation.enabled) {
224 base_qi = s->segmentation.base_quant[i];
225 if (!s->segmentation.absolute_vals)
226 base_qi += yac_qi;
227 } else
228 base_qi = yac_qi;
229
42761122
MR
230 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
231 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
232 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
233 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
234 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
235 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
a8ab0ccc
PM
236
237 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
238 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
3b636f21
DC
239 }
240}
241
242/**
243 * Determine which buffers golden and altref should be updated with after this frame.
244 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
245 *
246 * Intra frames update all 3 references
247 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
248 * If the update (golden|altref) flag is set, it's updated with the current frame
249 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
250 * If the flag is not set, the number read means:
251 * 0: no update
252 * 1: VP56_FRAME_PREVIOUS
253 * 2: update golden with altref, or update altref with golden
254 */
255static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
256{
257 VP56RangeCoder *c = &s->c;
258
259 if (update)
260 return VP56_FRAME_CURRENT;
261
262 switch (vp8_rac_get_uint(c, 2)) {
263 case 1:
264 return VP56_FRAME_PREVIOUS;
265 case 2:
266 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
267 }
268 return VP56_FRAME_NONE;
269}
270
271static void update_refs(VP8Context *s)
272{
273 VP56RangeCoder *c = &s->c;
274
275 int update_golden = vp8_rac_get(c);
276 int update_altref = vp8_rac_get(c);
277
278 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
279 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
280}
281
282static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
283{
284 VP56RangeCoder *c = &s->c;
370b622a 285 int header_size, hscale, vscale, i, j, k, l, m, ret;
3b636f21
DC
286 int width = s->avctx->width;
287 int height = s->avctx->height;
288
289 s->keyframe = !(buf[0] & 1);
290 s->profile = (buf[0]>>1) & 7;
291 s->invisible = !(buf[0] & 0x10);
06d50ca8 292 header_size = AV_RL24(buf) >> 5;
3b636f21
DC
293 buf += 3;
294 buf_size -= 3;
295
0ef1dbed
DC
296 if (s->profile > 3)
297 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
298
299 if (!s->profile)
300 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
301 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
302 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
3b636f21
DC
303
304 if (header_size > buf_size - 7*s->keyframe) {
305 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
306 return AVERROR_INVALIDDATA;
307 }
308
309 if (s->keyframe) {
06d50ca8
JGG
310 if (AV_RL24(buf) != 0x2a019d) {
311 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
3b636f21
DC
312 return AVERROR_INVALIDDATA;
313 }
314 width = AV_RL16(buf+3) & 0x3fff;
315 height = AV_RL16(buf+5) & 0x3fff;
316 hscale = buf[4] >> 6;
317 vscale = buf[6] >> 6;
318 buf += 7;
319 buf_size -= 7;
320
92a54426
MR
321 if (hscale || vscale)
322 av_log_missing_feature(s->avctx, "Upscaling", 1);
323
3b636f21 324 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
370b622a
JGG
325 for (i = 0; i < 4; i++)
326 for (j = 0; j < 16; j++)
327 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
328 sizeof(s->prob->token[i][j]));
3b636f21
DC
329 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
330 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
331 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
332 memset(&s->segmentation, 0, sizeof(s->segmentation));
333 }
334
335 if (!s->macroblocks_base || /* first frame */
336 width != s->avctx->width || height != s->avctx->height) {
f05c2fb6 337 if ((ret = update_dimensions(s, width, height)) < 0)
3b636f21
DC
338 return ret;
339 }
340
905ef0d0 341 ff_vp56_init_range_decoder(c, buf, header_size);
3b636f21
DC
342 buf += header_size;
343 buf_size -= header_size;
344
345 if (s->keyframe) {
346 if (vp8_rac_get(c))
347 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
348 vp8_rac_get(c); // whether we can skip clamping in dsp functions
349 }
350
351 if ((s->segmentation.enabled = vp8_rac_get(c)))
352 parse_segment_info(s);
353 else
354 s->segmentation.update_map = 0; // FIXME: move this to some init function?
355
356 s->filter.simple = vp8_rac_get(c);
357 s->filter.level = vp8_rac_get_uint(c, 6);
358 s->filter.sharpness = vp8_rac_get_uint(c, 3);
359
360 if ((s->lf_delta.enabled = vp8_rac_get(c)))
361 if (vp8_rac_get(c))
362 update_lf_deltas(s);
363
364 if (setup_partitions(s, buf, buf_size)) {
365 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
366 return AVERROR_INVALIDDATA;
367 }
368
369 get_quants(s);
370
371 if (!s->keyframe) {
372 update_refs(s);
373 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
374 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
375 }
376
377 // if we aren't saving this frame's probabilities for future frames,
378 // make a copy of the current probabilities
379 if (!(s->update_probabilities = vp8_rac_get(c)))
380 s->prob[1] = s->prob[0];
381
382 s->update_last = s->keyframe || vp8_rac_get(c);
383
384 for (i = 0; i < 4; i++)
385 for (j = 0; j < 8; j++)
386 for (k = 0; k < 3; k++)
387 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
370b622a
JGG
388 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
389 int prob = vp8_rac_get_uint(c, 8);
b0d58795
JGG
390 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
391 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
370b622a 392 }
3b636f21
DC
393
394 if ((s->mbskip_enabled = vp8_rac_get(c)))
a8ab0ccc 395 s->prob->mbskip = vp8_rac_get_uint(c, 8);
3b636f21
DC
396
397 if (!s->keyframe) {
a8ab0ccc
PM
398 s->prob->intra = vp8_rac_get_uint(c, 8);
399 s->prob->last = vp8_rac_get_uint(c, 8);
400 s->prob->golden = vp8_rac_get_uint(c, 8);
3b636f21
DC
401
402 if (vp8_rac_get(c))
403 for (i = 0; i < 4; i++)
404 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
405 if (vp8_rac_get(c))
406 for (i = 0; i < 3; i++)
407 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
408
409 // 17.2 MV probability update
410 for (i = 0; i < 2; i++)
411 for (j = 0; j < 19; j++)
7697cdcf 412 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
3b636f21
DC
413 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
414 }
415
416 return 0;
417}
418
7634771e 419static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
3b636f21 420{
7634771e
JGG
421 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
422 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
3b636f21
DC
423}
424
3b636f21
DC
425/**
426 * Motion vector coding, 17.1.
427 */
428static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
429{
ca18a478 430 int bit, x = 0;
3b636f21 431
7697cdcf 432 if (vp56_rac_get_prob_branchy(c, p[0])) {
3b636f21
DC
433 int i;
434
435 for (i = 0; i < 3; i++)
436 x += vp56_rac_get_prob(c, p[9 + i]) << i;
437 for (i = 9; i > 3; i--)
438 x += vp56_rac_get_prob(c, p[9 + i]) << i;
439 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
440 x += 8;
ca18a478
DC
441 } else {
442 // small_mvtree
443 const uint8_t *ps = p+2;
444 bit = vp56_rac_get_prob(c, *ps);
445 ps += 1 + 3*bit;
446 x += 4*bit;
447 bit = vp56_rac_get_prob(c, *ps);
448 ps += 1 + bit;
449 x += 2*bit;
450 x += vp56_rac_get_prob(c, *ps);
451 }
3b636f21
DC
452
453 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
454}
455
414ac27d
JGG
456static av_always_inline
457const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
3b636f21 458{
7bf254c4
JGG
459 if (left == top)
460 return vp8_submv_prob[4-!!left];
461 if (!top)
3b636f21 462 return vp8_submv_prob[2];
7bf254c4 463 return vp8_submv_prob[1-!!left];
3b636f21
DC
464}
465
466/**
467 * Split motion vector prediction, 16.4.
7ed06b2b 468 * @returns the number of motion vectors parsed (2, 4 or 16)
3b636f21 469 */
414ac27d
JGG
470static av_always_inline
471int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
3b636f21 472{
0908f1b9
JGG
473 int part_idx;
474 int n, num;
c55e0d34 475 VP8Macroblock *top_mb = &mb[2];
7bf254c4
JGG
476 VP8Macroblock *left_mb = &mb[-1];
477 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
478 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
0908f1b9 479 *mbsplits_cur, *firstidx;
c55e0d34
JGG
480 VP56mv *top_mv = top_mb->bmv;
481 VP56mv *left_mv = left_mb->bmv;
482 VP56mv *cur_mv = mb->bmv;
3b636f21 483
0908f1b9
JGG
484 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
485 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
486 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
487 } else {
488 part_idx = VP8_SPLITMVMODE_8x8;
489 }
490 } else {
491 part_idx = VP8_SPLITMVMODE_4x4;
492 }
493
494 num = vp8_mbsplit_count[part_idx];
495 mbsplits_cur = vp8_mbsplits[part_idx],
496 firstidx = vp8_mbfirstidx[part_idx];
497 mb->partitioning = part_idx;
498
3b636f21 499 for (n = 0; n < num; n++) {
7ed06b2b 500 int k = firstidx[n];
7bf254c4 501 uint32_t left, above;
7ed06b2b
RB
502 const uint8_t *submv_prob;
503
7bf254c4
JGG
504 if (!(k & 3))
505 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
506 else
507 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
508 if (k <= 3)
509 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
510 else
511 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
7ed06b2b
RB
512
513 submv_prob = get_submv_prob(left, above);
3b636f21 514
c5dec7f1
JGG
515 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
516 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
517 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
518 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
519 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
520 } else {
521 AV_ZERO32(&mb->bmv[n]);
522 }
523 } else {
524 AV_WN32A(&mb->bmv[n], above);
525 }
526 } else {
7bf254c4 527 AV_WN32A(&mb->bmv[n], left);
3b636f21 528 }
3b636f21 529 }
7ed06b2b
RB
530
531 return num;
3b636f21
DC
532}
533
414ac27d 534static av_always_inline
f3d09d44
JGG
535void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
536{
537 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
538 mb - 1 /* left */,
539 mb + 1 /* top-left */ };
540 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
66f608a6 541 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
f3d09d44
JGG
542 int idx = CNT_ZERO;
543 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1eeca886 544 int8_t *sign_bias = s->sign_bias;
f3d09d44
JGG
545 VP56mv near_mv[4];
546 uint8_t cnt[4] = { 0 };
547 VP56RangeCoder *c = &s->c;
548
549 AV_ZERO32(&near_mv[0]);
550 AV_ZERO32(&near_mv[1]);
0f0b5d64 551 AV_ZERO32(&near_mv[2]);
f3d09d44
JGG
552
553 /* Process MB on top, left and top-left */
554 #define MV_EDGE_CHECK(n)\
555 {\
556 VP8Macroblock *edge = mb_edge[n];\
557 int edge_ref = edge->ref_frame;\
558 if (edge_ref != VP56_FRAME_CURRENT) {\
559 uint32_t mv = AV_RN32A(&edge->mv);\
560 if (mv) {\
561 if (cur_sign_bias != sign_bias[edge_ref]) {\
562 /* SWAR negate of the values in mv. */\
563 mv = ~mv;\
564 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
565 }\
566 if (!n || mv != AV_RN32A(&near_mv[idx]))\
567 AV_WN32A(&near_mv[++idx], mv);\
568 cnt[idx] += 1 + (n != 2);\
569 } else\
570 cnt[CNT_ZERO] += 1 + (n != 2);\
571 }\
572 }
573
574 MV_EDGE_CHECK(0)
575 MV_EDGE_CHECK(1)
576 MV_EDGE_CHECK(2)
577
578 mb->partitioning = VP8_SPLITMVMODE_NONE;
579 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
580 mb->mode = VP8_MVMODE_MV;
581
582 /* If we have three distinct MVs, merge first and last if they're the same */
66f608a6 583 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
f3d09d44
JGG
584 cnt[CNT_NEAREST] += 1;
585
586 /* Swap near and nearest if necessary */
587 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
588 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
589 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
590 }
591
592 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
593 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
594
595 /* Choose the best mv out of 0,0 and the nearest mv */
7634771e 596 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
66f608a6
AS
597 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
598 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
599 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
f3d09d44
JGG
600
601 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
602 mb->mode = VP8_MVMODE_SPLIT;
603 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
604 } else {
605 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
606 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
607 mb->bmv[0] = mb->mv;
608 }
609 } else {
7634771e 610 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
f3d09d44
JGG
611 mb->bmv[0] = mb->mv;
612 }
613 } else {
7634771e 614 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
f3d09d44
JGG
615 mb->bmv[0] = mb->mv;
616 }
617 } else {
618 mb->mode = VP8_MVMODE_ZERO;
619 AV_ZERO32(&mb->mv);
620 mb->bmv[0] = mb->mv;
621 }
622}
623
624static av_always_inline
d2840fa4
PM
625void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
626 int mb_x, int keyframe)
3b636f21 627{
d2840fa4 628 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
d1c58fce 629 if (keyframe) {
d2840fa4
PM
630 int x, y;
631 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
632 uint8_t* const left = s->intra4x4_pred_mode_left;
d1c58fce
JGG
633 for (y = 0; y < 4; y++) {
634 for (x = 0; x < 4; x++) {
d2840fa4
PM
635 const uint8_t *ctx;
636 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
637 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
638 left[y] = top[x] = *intra4x4;
639 intra4x4++;
3b636f21 640 }
3b636f21 641 }
d1c58fce 642 } else {
d2840fa4 643 int i;
d1c58fce
JGG
644 for (i = 0; i < 16; i++)
645 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
3b636f21
DC
646 }
647}
648
414ac27d 649static av_always_inline
4773d904 650void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
3b636f21
DC
651{
652 VP56RangeCoder *c = &s->c;
3b636f21
DC
653
654 if (s->segmentation.update_map)
c55e0d34 655 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
30011bf2 656 else if (s->segmentation.enabled)
4773d904 657 *segment = ref ? *ref : *segment;
b9a7186b 658 s->segment = *segment;
3b636f21 659
a8ab0ccc 660 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
3b636f21
DC
661
662 if (s->keyframe) {
663 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
664
665 if (mb->mode == MODE_I4x4) {
d2840fa4
PM
666 decode_intra4x4_modes(s, c, mb_x, 1);
667 } else {
668 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
669 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
670 AV_WN32A(s->intra4x4_pred_mode_left, modes);
671 }
3b636f21
DC
672
673 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
674 mb->ref_frame = VP56_FRAME_CURRENT;
a8ab0ccc 675 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
3b636f21 676 // inter MB, 16.2
a8ab0ccc
PM
677 if (vp56_rac_get_prob_branchy(c, s->prob->last))
678 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
3b636f21
DC
679 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
680 else
681 mb->ref_frame = VP56_FRAME_PREVIOUS;
c4211046 682 s->ref_count[mb->ref_frame-1]++;
3b636f21
DC
683
684 // motion vectors, 16.3
f3d09d44 685 decode_mvs(s, mb, mb_x, mb_y);
3b636f21
DC
686 } else {
687 // intra MB, 16.1
688 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
689
158e062c 690 if (mb->mode == MODE_I4x4)
d2840fa4 691 decode_intra4x4_modes(s, c, mb_x, 0);
3b636f21
DC
692
693 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
694 mb->ref_frame = VP56_FRAME_CURRENT;
b946111f 695 mb->partitioning = VP8_SPLITMVMODE_NONE;
14767f35 696 AV_ZERO32(&mb->bmv[0]);
3b636f21
DC
697 }
698}
699
a7878c9f 700#ifndef decode_block_coeffs_internal
3b636f21 701/**
e394953e
RB
702 * @param c arithmetic bitstream reader context
703 * @param block destination for block coefficients
704 * @param probs probabilities to use when reading trees from the bitstream
3b636f21 705 * @param i initial coeff index, 0 unless a separate DC block is coded
3fa76268 706 * @param qmul array holding the dc/ac dequant factor at position 0/1
3b636f21
DC
707 * @return 0 if no coeffs were decoded
708 * otherwise, the index of the last coeff decoded plus one
709 */
6163d880 710static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
3efbe137 711 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1e739679 712 int i, uint8_t *token_prob, int16_t qmul[2])
3b636f21 713{
6163d880 714 VP56RangeCoder c = *r;
afb54a85 715 goto skip_eob;
fe1b5d97 716 do {
1e739679 717 int coeff;
6163d880
RB
718 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
719 break;
3b636f21 720
fe1b5d97 721skip_eob:
6163d880 722 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
c22b4468 723 if (++i == 16)
6163d880 724 break; // invalid input; blocks should end with EOB
370b622a 725 token_prob = probs[i][0];
c22b4468 726 goto skip_eob;
fe1b5d97
DC
727 }
728
6163d880 729 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
fe1b5d97 730 coeff = 1;
370b622a 731 token_prob = probs[i+1][1];
fe1b5d97 732 } else {
6163d880
RB
733 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
734 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
fe1b5d97 735 if (coeff)
6163d880 736 coeff += vp56_rac_get_prob(&c, token_prob[5]);
fe1b5d97
DC
737 coeff += 2;
738 } else {
739 // DCT_CAT*
6163d880
RB
740 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
741 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
742 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
fe1b5d97
DC
743 } else { // DCT_CAT2
744 coeff = 7;
6163d880
RB
745 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
746 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
fe1b5d97
DC
747 }
748 } else { // DCT_CAT3 and up
6163d880
RB
749 int a = vp56_rac_get_prob(&c, token_prob[8]);
750 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
fe1b5d97
DC
751 int cat = (a<<1) + b;
752 coeff = 3 + (8<<cat);
6163d880 753 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
fe1b5d97
DC
754 }
755 }
370b622a 756 token_prob = probs[i+1][2];
fe1b5d97 757 }
6163d880 758 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
afb54a85 759 } while (++i < 16);
fe1b5d97 760
6163d880 761 *r = c;
afb54a85 762 return i;
3b636f21 763}
a7878c9f 764#endif
3b636f21 765
3c432e11
DB
766/**
767 * @param c arithmetic bitstream reader context
768 * @param block destination for block coefficients
769 * @param probs probabilities to use when reading trees from the bitstream
770 * @param i initial coeff index, 0 unless a separate DC block is coded
771 * @param zero_nhood the initial prediction context for number of surrounding
772 * all-zero blocks (only left/top, so 0-2)
773 * @param qmul array holding the dc/ac dequant factor at position 0/1
774 * @return 0 if no coeffs were decoded
775 * otherwise, the index of the last coeff decoded plus one
776 */
414ac27d 777static av_always_inline
1e739679 778int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
81a13131 779 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1e739679
JGG
780 int i, int zero_nhood, int16_t qmul[2])
781{
782 uint8_t *token_prob = probs[i][zero_nhood];
783 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
784 return 0;
785 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
786}
787
788static av_always_inline
414ac27d
JGG
789void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
790 uint8_t t_nnz[9], uint8_t l_nnz[9])
3b636f21 791{
3b636f21
DC
792 int i, x, y, luma_start = 0, luma_ctx = 3;
793 int nnz_pred, nnz, nnz_total = 0;
b9a7186b 794 int segment = s->segment;
f311208c 795 int block_dc = 0;
3b636f21 796
3b636f21 797 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
3b636f21
DC
798 nnz_pred = t_nnz[8] + l_nnz[8];
799
800 // decode DC values and do hadamard
827d43bb 801 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
3b636f21
DC
802 s->qmat[segment].luma_dc_qmul);
803 l_nnz[8] = t_nnz[8] = !!nnz;
f311208c
JGG
804 if (nnz) {
805 nnz_total += nnz;
806 block_dc = 1;
807 if (nnz == 1)
808 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
809 else
810 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
811 }
3b636f21
DC
812 luma_start = 1;
813 luma_ctx = 0;
814 }
815
816 // luma blocks
817 for (y = 0; y < 4; y++)
818 for (x = 0; x < 4; x++) {
ffbf0794 819 nnz_pred = l_nnz[y] + t_nnz[x];
3b636f21 820 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
ffbf0794 821 nnz_pred, s->qmat[segment].luma_qmul);
f311208c
JGG
822 // nnz+block_dc may be one more than the actual last index, but we don't care
823 s->non_zero_count_cache[y][x] = nnz + block_dc;
3b636f21
DC
824 t_nnz[x] = l_nnz[y] = !!nnz;
825 nnz_total += nnz;
826 }
827
828 // chroma blocks
829 // TODO: what to do about dimensions? 2nd dim for luma is x,
830 // but for chroma it's (y<<1)|x
831 for (i = 4; i < 6; i++)
832 for (y = 0; y < 2; y++)
833 for (x = 0; x < 2; x++) {
834 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
835 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
836 nnz_pred, s->qmat[segment].chroma_qmul);
837 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
838 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
839 nnz_total += nnz;
840 }
841
842 // if there were no coded coeffs despite the macroblock not being marked skip,
843 // we MUST not do the inner loop filter and should not do IDCT
844 // Since skip isn't used for bitstream prediction, just manually set it.
845 if (!nnz_total)
846 mb->skip = 1;
847}
848
9ac831c2
DC
849static av_always_inline
850void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
851 int linesize, int uvlinesize, int simple)
852{
853 AV_COPY128(top_border, src_y + 15*linesize);
854 if (!simple) {
855 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
856 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
857 }
858}
859
860static av_always_inline
861void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
862 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
863 int simple, int xchg)
864{
865 uint8_t *top_border_m1 = top_border-32; // for TL prediction
866 src_y -= linesize;
867 src_cb -= uvlinesize;
868 src_cr -= uvlinesize;
869
096971e8
MR
870#define XCHG(a,b,xchg) do { \
871 if (xchg) AV_SWAP64(b,a); \
872 else AV_COPY64(b,a); \
873 } while (0)
9ac831c2
DC
874
875 XCHG(top_border_m1+8, src_y-8, xchg);
876 XCHG(top_border, src_y, xchg);
877 XCHG(top_border+8, src_y+8, 1);
070ce7ef 878 if (mb_x < mb_width-1)
9ac831c2 879 XCHG(top_border+32, src_y+16, 1);
070ce7ef 880
9ac831c2
DC
881 // only copy chroma for normal loop filter
882 // or to initialize the top row to 127
883 if (!simple || !mb_y) {
884 XCHG(top_border_m1+16, src_cb-8, xchg);
885 XCHG(top_border_m1+24, src_cr-8, xchg);
886 XCHG(top_border+16, src_cb, 1);
887 XCHG(top_border+24, src_cr, 1);
888 }
889}
890
414ac27d 891static av_always_inline
ee555de7
RB
892int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
893{
894 if (!mb_x) {
895 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
896 } else {
897 return mb_y ? mode : LEFT_DC_PRED8x8;
898 }
899}
900
901static av_always_inline
902int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
903{
904 if (!mb_x) {
905 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
906 } else {
907 return mb_y ? mode : HOR_PRED8x8;
908 }
909}
910
911static av_always_inline
912int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
3b636f21
DC
913{
914 if (mode == DC_PRED8x8) {
ee555de7
RB
915 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
916 } else {
917 return mode;
918 }
919}
920
921static av_always_inline
922int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
923{
924 switch (mode) {
925 case DC_PRED8x8:
926 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
927 case VERT_PRED8x8:
928 return !mb_y ? DC_127_PRED8x8 : mode;
929 case HOR_PRED8x8:
930 return !mb_x ? DC_129_PRED8x8 : mode;
931 case PLANE_PRED8x8 /*TM*/:
932 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
933 }
934 return mode;
935}
936
937static av_always_inline
938int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
939{
940 if (!mb_x) {
941 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
942 } else {
943 return mb_y ? mode : HOR_VP8_PRED;
944 }
945}
946
947static av_always_inline
948int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
949{
950 switch (mode) {
951 case VERT_PRED:
952 if (!mb_x && mb_y) {
953 *copy_buf = 1;
954 return mode;
955 }
956 /* fall-through */
957 case DIAG_DOWN_LEFT_PRED:
958 case VERT_LEFT_PRED:
959 return !mb_y ? DC_127_PRED : mode;
960 case HOR_PRED:
961 if (!mb_y) {
962 *copy_buf = 1;
963 return mode;
a71abb71 964 }
ee555de7
RB
965 /* fall-through */
966 case HOR_UP_PRED:
967 return !mb_x ? DC_129_PRED : mode;
968 case TM_VP8_PRED:
969 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
970 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
971 case DIAG_DOWN_RIGHT_PRED:
972 case VERT_RIGHT_PRED:
973 case HOR_DOWN_PRED:
974 if (!mb_y || !mb_x)
975 *copy_buf = 1;
976 return mode;
3b636f21
DC
977 }
978 return mode;
979}
980
414ac27d
JGG
981static av_always_inline
982void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
d2840fa4 983 int mb_x, int mb_y)
3b636f21 984{
ee555de7 985 AVCodecContext *avctx = s->avctx;
bb591566
MR
986 int x, y, mode, nnz;
987 uint32_t tr;
3b636f21 988
9ac831c2
DC
989 // for the first row, we need to run xchg_mb_border to init the top edge to 127
990 // otherwise, skip it if we aren't going to deblock
ee555de7 991 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
9ac831c2
DC
992 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
993 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
994 s->filter.simple, 1);
995
3b636f21 996 if (mb->mode < MODE_I4x4) {
ee555de7
RB
997 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
998 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
999 } else {
1000 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1001 }
3b636f21
DC
1002 s->hpc.pred16x16[mode](dst[0], s->linesize);
1003 } else {
1004 uint8_t *ptr = dst[0];
d2840fa4 1005 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
ee555de7 1006 uint8_t tr_top[4] = { 127, 127, 127, 127 };
3b636f21
DC
1007
1008 // all blocks on the right edge of the macroblock use bottom edge
1009 // the top macroblock for their topright edge
1010 uint8_t *tr_right = ptr - s->linesize + 16;
1011
1012 // if we're on the right edge of the frame, said edge is extended
1013 // from the top macroblock
7148da48
RB
1014 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1015 mb_x == s->mb_width-1) {
bb591566 1016 tr = tr_right[-1]*0x01010101u;
3b636f21
DC
1017 tr_right = (uint8_t *)&tr;
1018 }
1019
b74f70d6
JGG
1020 if (mb->skip)
1021 AV_ZERO128(s->non_zero_count_cache);
1022
3b636f21
DC
1023 for (y = 0; y < 4; y++) {
1024 uint8_t *topright = ptr + 4 - s->linesize;
1025 for (x = 0; x < 4; x++) {
ee555de7
RB
1026 int copy = 0, linesize = s->linesize;
1027 uint8_t *dst = ptr+4*x;
1028 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1029
1030 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1031 topright = tr_top;
1032 } else if (x == 3)
3b636f21
DC
1033 topright = tr_right;
1034
ee555de7
RB
1035 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1036 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1037 if (copy) {
1038 dst = copy_dst + 12;
1039 linesize = 8;
1040 if (!(mb_y + y)) {
1041 copy_dst[3] = 127U;
9d4bdcb7 1042 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
ee555de7 1043 } else {
9d4bdcb7 1044 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
ee555de7
RB
1045 if (!(mb_x + x)) {
1046 copy_dst[3] = 129U;
1047 } else {
1048 copy_dst[3] = ptr[4*x-s->linesize-1];
1049 }
1050 }
1051 if (!(mb_x + x)) {
1052 copy_dst[11] =
1053 copy_dst[19] =
1054 copy_dst[27] =
1055 copy_dst[35] = 129U;
1056 } else {
1057 copy_dst[11] = ptr[4*x -1];
1058 copy_dst[19] = ptr[4*x+s->linesize -1];
1059 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1060 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1061 }
1062 }
1063 } else {
1064 mode = intra4x4[x];
1065 }
1066 s->hpc.pred4x4[mode](dst, topright, linesize);
1067 if (copy) {
9d4bdcb7
RB
1068 AV_COPY32(ptr+4*x , copy_dst+12);
1069 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1070 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1071 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
ee555de7 1072 }
3b636f21
DC
1073
1074 nnz = s->non_zero_count_cache[y][x];
1075 if (nnz) {
1076 if (nnz == 1)
1077 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1078 else
1079 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1080 }
1081 topright += 4;
1082 }
1083
1084 ptr += 4*s->linesize;
d2840fa4 1085 intra4x4 += 4;
3b636f21
DC
1086 }
1087 }
1088
ee555de7
RB
1089 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1090 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1091 } else {
1092 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1093 }
3b636f21
DC
1094 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1095 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
9ac831c2 1096
ee555de7 1097 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
9ac831c2
DC
1098 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1099 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1100 s->filter.simple, 0);
3b636f21
DC
1101}
1102
64233e70
JGG
1103static const uint8_t subpel_idx[3][8] = {
1104 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1105 // also function pointer index
1106 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1107 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1108};
1109
3b636f21 1110/**
3c432e11 1111 * luma MC function
3b636f21
DC
1112 *
1113 * @param s VP8 decoding context
3b636f21 1114 * @param dst target buffer for block data at block position
24c9baba 1115 * @param ref reference picture buffer at origin (0, 0)
3b636f21
DC
1116 * @param mv motion vector (relative to block position) to get pixel data from
1117 * @param x_off horizontal position of block from origin (0, 0)
1118 * @param y_off vertical position of block from origin (0, 0)
1119 * @param block_w width of block (16, 8 or 4)
1120 * @param block_h height of block (always same as block_w)
1121 * @param width width of src/dst plane data
1122 * @param height height of src/dst plane data
1123 * @param linesize size of a single line of plane data, including padding
e394953e 1124 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
3b636f21 1125 */
414ac27d 1126static av_always_inline
4773d904 1127void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
64233e70
JGG
1128 int x_off, int y_off, int block_w, int block_h,
1129 int width, int height, int linesize,
1130 vp8_mc_func mc_func[3][3])
3b636f21 1131{
4773d904
RB
1132 uint8_t *src = ref->data[0];
1133
c0498b30 1134 if (AV_RN32A(mv)) {
64233e70
JGG
1135
1136 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1137 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1138
1139 x_off += mv->x >> 2;
1140 y_off += mv->y >> 2;
c0498b30
JGG
1141
1142 // edge emulation
4773d904 1143 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
c0498b30 1144 src += y_off * linesize + x_off;
64233e70
JGG
1145 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1146 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
2e279598 1147 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
64233e70
JGG
1148 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1149 x_off - mx_idx, y_off - my_idx, width, height);
44002d83 1150 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
c0498b30
JGG
1151 }
1152 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
4773d904
RB
1153 } else {
1154 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
c0498b30 1155 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
4773d904 1156 }
3b636f21
DC
1157}
1158
3c432e11
DB
1159/**
1160 * chroma MC function
1161 *
1162 * @param s VP8 decoding context
1163 * @param dst1 target buffer for block data at block position (U plane)
1164 * @param dst2 target buffer for block data at block position (V plane)
1165 * @param ref reference picture buffer at origin (0, 0)
1166 * @param mv motion vector (relative to block position) to get pixel data from
1167 * @param x_off horizontal position of block from origin (0, 0)
1168 * @param y_off vertical position of block from origin (0, 0)
1169 * @param block_w width of block (16, 8 or 4)
1170 * @param block_h height of block (always same as block_w)
1171 * @param width width of src/dst plane data
1172 * @param height height of src/dst plane data
1173 * @param linesize size of a single line of plane data, including padding
1174 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1175 */
414ac27d 1176static av_always_inline
4773d904
RB
1177void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1178 const VP56mv *mv, int x_off, int y_off,
64233e70
JGG
1179 int block_w, int block_h, int width, int height, int linesize,
1180 vp8_mc_func mc_func[3][3])
1181{
4773d904
RB
1182 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1183
64233e70
JGG
1184 if (AV_RN32A(mv)) {
1185 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1186 int my = mv->y&7, my_idx = subpel_idx[0][my];
1187
1188 x_off += mv->x >> 3;
1189 y_off += mv->y >> 3;
1190
1191 // edge emulation
1192 src1 += y_off * linesize + x_off;
1193 src2 += y_off * linesize + x_off;
4773d904 1194 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
64233e70
JGG
1195 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1196 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1197 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1198 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1199 x_off - mx_idx, y_off - my_idx, width, height);
1200 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1201 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1202
1203 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1204 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1205 x_off - mx_idx, y_off - my_idx, width, height);
1206 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1207 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1208 } else {
1209 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1210 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1211 }
1212 } else {
4773d904 1213 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
64233e70
JGG
1214 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1215 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1216 }
1217}
1218
1219static av_always_inline
414ac27d
JGG
1220void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1221 AVFrame *ref_frame, int x_off, int y_off,
1222 int bx_off, int by_off,
1223 int block_w, int block_h,
1224 int width, int height, VP56mv *mv)
7c4dcf81
RB
1225{
1226 VP56mv uvmv = *mv;
1227
1228 /* Y */
64233e70 1229 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
4773d904 1230 ref_frame, mv, x_off + bx_off, y_off + by_off,
64233e70
JGG
1231 block_w, block_h, width, height, s->linesize,
1232 s->put_pixels_tab[block_w == 8]);
7c4dcf81
RB
1233
1234 /* U/V */
1235 if (s->profile == 3) {
1236 uvmv.x &= ~7;
1237 uvmv.y &= ~7;
1238 }
1239 x_off >>= 1; y_off >>= 1;
1240 bx_off >>= 1; by_off >>= 1;
1241 width >>= 1; height >>= 1;
1242 block_w >>= 1; block_h >>= 1;
64233e70 1243 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
4773d904
RB
1244 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1245 &uvmv, x_off + bx_off, y_off + by_off,
64233e70
JGG
1246 block_w, block_h, width, height, s->uvlinesize,
1247 s->put_pixels_tab[1 + (block_w == 4)]);
7c4dcf81
RB
1248}
1249
d864dee8
JGG
1250/* Fetch pixels for estimated mv 4 macroblocks ahead.
1251 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
414ac27d 1252static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
d864dee8 1253{
ef38842f
JGG
1254 /* Don't prefetch refs that haven't been used very often this frame. */
1255 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
c4211046 1256 int x_off = mb_x << 4, y_off = mb_y << 4;
7e13022a
JGG
1257 int mx = (mb->mv.x>>2) + x_off + 8;
1258 int my = (mb->mv.y>>2) + y_off;
c4211046
JGG
1259 uint8_t **src= s->framep[ref]->data;
1260 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
4773d904
RB
1261 /* For threading, a ff_thread_await_progress here might be useful, but
1262 * it actually slows down the decoder. Since a bad prefetch doesn't
1263 * generate bad decoder output, we don't run it here. */
c4211046
JGG
1264 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1265 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1266 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1267 }
d864dee8
JGG
1268}
1269
3b636f21
DC
1270/**
1271 * Apply motion vectors to prediction buffer, chapter 18.
1272 */
414ac27d
JGG
1273static av_always_inline
1274void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1275 int mb_x, int mb_y)
3b636f21
DC
1276{
1277 int x_off = mb_x << 4, y_off = mb_y << 4;
1278 int width = 16*s->mb_width, height = 16*s->mb_height;
d292c345
JGG
1279 AVFrame *ref = s->framep[mb->ref_frame];
1280 VP56mv *bmv = mb->bmv;
3b636f21 1281
73be29b0
JGG
1282 switch (mb->partitioning) {
1283 case VP8_SPLITMVMODE_NONE:
d292c345 1284 vp8_mc_part(s, dst, ref, x_off, y_off,
7c4dcf81 1285 0, 0, 16, 16, width, height, &mb->mv);
73be29b0 1286 break;
7c4dcf81 1287 case VP8_SPLITMVMODE_4x4: {
3b636f21 1288 int x, y;
7c4dcf81 1289 VP56mv uvmv;
3b636f21
DC
1290
1291 /* Y */
1292 for (y = 0; y < 4; y++) {
1293 for (x = 0; x < 4; x++) {
64233e70 1294 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
4773d904 1295 ref, &bmv[4*y + x],
64233e70
JGG
1296 4*x + x_off, 4*y + y_off, 4, 4,
1297 width, height, s->linesize,
1298 s->put_pixels_tab[2]);
3b636f21
DC
1299 }
1300 }
1301
1302 /* U/V */
1303 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1304 for (y = 0; y < 2; y++) {
1305 for (x = 0; x < 2; x++) {
1306 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1307 mb->bmv[ 2*y * 4 + 2*x+1].x +
1308 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1309 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1310 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1311 mb->bmv[ 2*y * 4 + 2*x+1].y +
1312 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1313 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
8f910a56
SG
1314 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1315 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
3b636f21
DC
1316 if (s->profile == 3) {
1317 uvmv.x &= ~7;
1318 uvmv.y &= ~7;
1319 }
64233e70 1320 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
4773d904 1321 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
64233e70
JGG
1322 4*x + x_off, 4*y + y_off, 4, 4,
1323 width, height, s->uvlinesize,
1324 s->put_pixels_tab[2]);
3b636f21
DC
1325 }
1326 }
7c4dcf81
RB
1327 break;
1328 }
1329 case VP8_SPLITMVMODE_16x8:
d292c345
JGG
1330 vp8_mc_part(s, dst, ref, x_off, y_off,
1331 0, 0, 16, 8, width, height, &bmv[0]);
1332 vp8_mc_part(s, dst, ref, x_off, y_off,
1333 0, 8, 16, 8, width, height, &bmv[1]);
7c4dcf81
RB
1334 break;
1335 case VP8_SPLITMVMODE_8x16:
d292c345
JGG
1336 vp8_mc_part(s, dst, ref, x_off, y_off,
1337 0, 0, 8, 16, width, height, &bmv[0]);
1338 vp8_mc_part(s, dst, ref, x_off, y_off,
1339 8, 0, 8, 16, width, height, &bmv[1]);
7c4dcf81
RB
1340 break;
1341 case VP8_SPLITMVMODE_8x8:
d292c345
JGG
1342 vp8_mc_part(s, dst, ref, x_off, y_off,
1343 0, 0, 8, 8, width, height, &bmv[0]);
1344 vp8_mc_part(s, dst, ref, x_off, y_off,
1345 8, 0, 8, 8, width, height, &bmv[1]);
1346 vp8_mc_part(s, dst, ref, x_off, y_off,
1347 0, 8, 8, 8, width, height, &bmv[2]);
1348 vp8_mc_part(s, dst, ref, x_off, y_off,
1349 8, 8, 8, 8, width, height, &bmv[3]);
7c4dcf81 1350 break;
3b636f21
DC
1351 }
1352}
1353
414ac27d 1354static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
3b636f21 1355{
3df56f41 1356 int x, y, ch;
3b636f21 1357
8a467b2d
JGG
1358 if (mb->mode != MODE_I4x4) {
1359 uint8_t *y_dst = dst[0];
3b636f21 1360 for (y = 0; y < 4; y++) {
62457f90 1361 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
3df56f41
JGG
1362 if (nnz4) {
1363 if (nnz4&~0x01010101) {
8a467b2d 1364 for (x = 0; x < 4; x++) {
62457f90
JGG
1365 if ((uint8_t)nnz4 == 1)
1366 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1367 else if((uint8_t)nnz4 > 1)
1368 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1369 nnz4 >>= 8;
1370 if (!nnz4)
1371 break;
8a467b2d
JGG
1372 }
1373 } else {
3ae079a3 1374 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
3b636f21
DC
1375 }
1376 }
1377 y_dst += 4*s->linesize;
1378 }
8a467b2d 1379 }
3b636f21 1380
8a467b2d 1381 for (ch = 0; ch < 2; ch++) {
62457f90 1382 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
3ae079a3 1383 if (nnz4) {
8a467b2d 1384 uint8_t *ch_dst = dst[1+ch];
3ae079a3
JGG
1385 if (nnz4&~0x01010101) {
1386 for (y = 0; y < 2; y++) {
1387 for (x = 0; x < 2; x++) {
62457f90
JGG
1388 if ((uint8_t)nnz4 == 1)
1389 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1390 else if((uint8_t)nnz4 > 1)
1391 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1392 nnz4 >>= 8;
1393 if (!nnz4)
628b48db 1394 goto chroma_idct_end;
8a467b2d 1395 }
3ae079a3 1396 ch_dst += 4*s->uvlinesize;
8a467b2d 1397 }
3ae079a3
JGG
1398 } else {
1399 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
3b636f21
DC
1400 }
1401 }
628b48db 1402chroma_idct_end: ;
3b636f21
DC
1403 }
1404}
1405
414ac27d 1406static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
3b636f21
DC
1407{
1408 int interior_limit, filter_level;
1409
1410 if (s->segmentation.enabled) {
b9a7186b 1411 filter_level = s->segmentation.filter_level[s->segment];
3b636f21
DC
1412 if (!s->segmentation.absolute_vals)
1413 filter_level += s->filter.level;
1414 } else
1415 filter_level = s->filter.level;
1416
1417 if (s->lf_delta.enabled) {
1418 filter_level += s->lf_delta.ref[mb->ref_frame];
dd18c9a0 1419 filter_level += s->lf_delta.mode[mb->mode];
3b636f21 1420 }
a1b227bb 1421
1550f45a 1422 filter_level = av_clip_uintp2(filter_level, 6);
3b636f21
DC
1423
1424 interior_limit = filter_level;
1425 if (s->filter.sharpness) {
8a2c99b4 1426 interior_limit >>= (s->filter.sharpness + 3) >> 2;
3b636f21
DC
1427 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1428 }
1429 interior_limit = FFMAX(interior_limit, 1);
1430
968570d6
JGG
1431 f->filter_level = filter_level;
1432 f->inner_limit = interior_limit;
c55e0d34 1433 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
3b636f21
DC
1434}
1435
414ac27d 1436static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1437{
968570d6
JGG
1438 int mbedge_lim, bedge_lim, hev_thresh;
1439 int filter_level = f->filter_level;
1440 int inner_limit = f->inner_limit;
c55e0d34 1441 int inner_filter = f->inner_filter;
145d3186
JGG
1442 int linesize = s->linesize;
1443 int uvlinesize = s->uvlinesize;
79dec154
JGG
1444 static const uint8_t hev_thresh_lut[2][64] = {
1445 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1446 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1447 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1448 3, 3, 3, 3 },
1449 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1450 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1451 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1452 2, 2, 2, 2 }
1453 };
3b636f21 1454
3b636f21
DC
1455 if (!filter_level)
1456 return;
1457
79dec154
JGG
1458 bedge_lim = 2*filter_level + inner_limit;
1459 mbedge_lim = bedge_lim + 4;
968570d6 1460
79dec154 1461 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
5245c04d 1462
3b636f21 1463 if (mb_x) {
145d3186 1464 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
3facfc99 1465 mbedge_lim, inner_limit, hev_thresh);
145d3186 1466 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1467 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1468 }
1469
c55e0d34 1470 if (inner_filter) {
145d3186
JGG
1471 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1472 inner_limit, hev_thresh);
1473 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1474 inner_limit, hev_thresh);
1475 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1476 inner_limit, hev_thresh);
1477 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1478 uvlinesize, bedge_lim,
1479 inner_limit, hev_thresh);
3b636f21
DC
1480 }
1481
1482 if (mb_y) {
145d3186 1483 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
3facfc99 1484 mbedge_lim, inner_limit, hev_thresh);
145d3186 1485 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1486 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1487 }
1488
c55e0d34 1489 if (inner_filter) {
145d3186
JGG
1490 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1491 linesize, bedge_lim,
1492 inner_limit, hev_thresh);
1493 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1494 linesize, bedge_lim,
1495 inner_limit, hev_thresh);
1496 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1497 linesize, bedge_lim,
1498 inner_limit, hev_thresh);
1499 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1500 dst[2] + 4 * uvlinesize,
1501 uvlinesize, bedge_lim,
3facfc99 1502 inner_limit, hev_thresh);
3b636f21
DC
1503 }
1504}
1505
414ac27d 1506static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1507{
968570d6
JGG
1508 int mbedge_lim, bedge_lim;
1509 int filter_level = f->filter_level;
1510 int inner_limit = f->inner_limit;
c55e0d34 1511 int inner_filter = f->inner_filter;
145d3186 1512 int linesize = s->linesize;
3b636f21 1513
3b636f21
DC
1514 if (!filter_level)
1515 return;
1516
79dec154
JGG
1517 bedge_lim = 2*filter_level + inner_limit;
1518 mbedge_lim = bedge_lim + 4;
3b636f21
DC
1519
1520 if (mb_x)
145d3186 1521 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1522 if (inner_filter) {
145d3186
JGG
1523 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1524 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1525 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
3b636f21
DC
1526 }
1527
1528 if (mb_y)
145d3186 1529 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1530 if (inner_filter) {
145d3186
JGG
1531 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1532 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1533 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
3b636f21
DC
1534 }
1535}
1536
4773d904 1537static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
3b636f21 1538{
968570d6 1539 VP8FilterStrength *f = s->filter_strength;
3b636f21 1540 uint8_t *dst[3] = {
4773d904
RB
1541 curframe->data[0] + 16*mb_y*s->linesize,
1542 curframe->data[1] + 8*mb_y*s->uvlinesize,
1543 curframe->data[2] + 8*mb_y*s->uvlinesize
3b636f21
DC
1544 };
1545 int mb_x;
1546
1547 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
9ac831c2 1548 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
c55e0d34 1549 filter_mb(s, dst, f++, mb_x, mb_y);
3b636f21
DC
1550 dst[0] += 16;
1551 dst[1] += 8;
1552 dst[2] += 8;
1553 }
1554}
1555
4773d904 1556static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
3b636f21 1557{
968570d6 1558 VP8FilterStrength *f = s->filter_strength;
4773d904 1559 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
3b636f21
DC
1560 int mb_x;
1561
1562 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
9ac831c2 1563 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
c55e0d34 1564 filter_mb_simple(s, dst, f++, mb_x, mb_y);
3b636f21
DC
1565 dst += 16;
1566 }
1567}
1568
ce42a048
RB
1569static void release_queued_segmaps(VP8Context *s, int is_close)
1570{
1571 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1572 while (s->num_maps_to_be_freed > leave_behind)
1573 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1574 s->maps_are_invalid = 0;
1575}
1576
3b636f21
DC
1577static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1578 AVPacket *avpkt)
1579{
1580 VP8Context *s = avctx->priv_data;
1581 int ret, mb_x, mb_y, i, y, referenced;
1582 enum AVDiscard skip_thresh;
e02dec25 1583 AVFrame *av_uninit(curframe), *prev_frame;
3b636f21 1584
ce42a048
RB
1585 release_queued_segmaps(s, 0);
1586
3b636f21 1587 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
fb90785e 1588 goto err;
3b636f21 1589
e02dec25
AC
1590 prev_frame = s->framep[VP56_FRAME_CURRENT];
1591
3b636f21
DC
1592 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1593 || s->update_altref == VP56_FRAME_CURRENT;
1594
1595 skip_thresh = !referenced ? AVDISCARD_NONREF :
1596 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1597
1598 if (avctx->skip_frame >= skip_thresh) {
1599 s->invisible = 1;
fb90785e 1600 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
3b636f21
DC
1601 goto skip_decode;
1602 }
9ac831c2 1603 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
3b636f21 1604
4773d904
RB
1605 // release no longer referenced frames
1606 for (i = 0; i < 5; i++)
1607 if (s->frames[i].data[0] &&
1608 &s->frames[i] != prev_frame &&
1609 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1610 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1611 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
bfa0f965 1612 vp8_release_frame(s, &s->frames[i], 1, 0);
4773d904
RB
1613
1614 // find a free buffer
1615 for (i = 0; i < 5; i++)
1616 if (&s->frames[i] != prev_frame &&
1617 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
3b636f21
DC
1618 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1619 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1620 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1621 break;
1622 }
4773d904
RB
1623 if (i == 5) {
1624 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1625 abort();
1626 }
3b636f21 1627 if (curframe->data[0])
bfa0f965 1628 vp8_release_frame(s, curframe, 1, 0);
3b636f21 1629
fb90785e
RB
1630 // Given that arithmetic probabilities are updated every frame, it's quite likely
1631 // that the values we have on a random interframe are complete junk if we didn't
1632 // start decode on a keyframe. So just don't display anything rather than junk.
1633 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1634 !s->framep[VP56_FRAME_GOLDEN] ||
1635 !s->framep[VP56_FRAME_GOLDEN2])) {
1636 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1637 ret = AVERROR_INVALIDDATA;
1638 goto err;
1639 }
1640
3b636f21 1641 curframe->key_frame = s->keyframe;
975a1447 1642 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3b636f21 1643 curframe->reference = referenced ? 3 : 0;
ce42a048 1644 if ((ret = vp8_alloc_frame(s, curframe))) {
3b636f21 1645 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
fb90785e 1646 goto err;
3b636f21
DC
1647 }
1648
4773d904
RB
1649 // check if golden and altref are swapped
1650 if (s->update_altref != VP56_FRAME_NONE) {
1651 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1652 } else {
1653 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1654 }
1655 if (s->update_golden != VP56_FRAME_NONE) {
1656 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1657 } else {
1658 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1659 }
1660 if (s->update_last) {
1661 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1662 } else {
1663 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1664 }
1665 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1666
1667 ff_thread_finish_setup(avctx);
1668
3b636f21
DC
1669 s->linesize = curframe->linesize[0];
1670 s->uvlinesize = curframe->linesize[1];
1671
1672 if (!s->edge_emu_buffer)
1673 s->edge_emu_buffer = av_malloc(21*s->linesize);
1674
1675 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1676
aa93c52c
PM
1677 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1678 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
c55e0d34 1679
3b636f21 1680 // top edge of 127 for intra prediction
ee555de7
RB
1681 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1682 s->top_border[0][15] = s->top_border[0][23] = 127;
1683 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1684 }
c4211046 1685 memset(s->ref_count, 0, sizeof(s->ref_count));
d2840fa4 1686 if (s->keyframe)
ccf13f9e 1687 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
3b636f21 1688
4773d904 1689#define MARGIN (16 << 2)
7634771e
JGG
1690 s->mv_min.y = -MARGIN;
1691 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1692
3b636f21
DC
1693 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1694 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
c55e0d34 1695 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
aa93c52c 1696 int mb_xy = mb_y*s->mb_width;
3b636f21
DC
1697 uint8_t *dst[3] = {
1698 curframe->data[0] + 16*mb_y*s->linesize,
1699 curframe->data[1] + 8*mb_y*s->uvlinesize,
1700 curframe->data[2] + 8*mb_y*s->uvlinesize
1701 };
1702
aa93c52c 1703 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
3b636f21 1704 memset(s->left_nnz, 0, sizeof(s->left_nnz));
d2840fa4 1705 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
3b636f21
DC
1706
1707 // left edge of 129 for intra prediction
ee555de7 1708 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
3b636f21
DC
1709 for (i = 0; i < 3; i++)
1710 for (y = 0; y < 16>>!!i; y++)
1711 dst[i][y*curframe->linesize[i]-1] = 129;
ee555de7
RB
1712 if (mb_y == 1) // top left edge is also 129
1713 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1714 }
3b636f21 1715
7634771e
JGG
1716 s->mv_min.x = -MARGIN;
1717 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
9ebcf769 1718 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
4773d904 1719 ff_thread_await_progress(prev_frame, mb_y, 0);
7634771e 1720
ef38842f 1721 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
d864dee8
JGG
1722 /* Prefetch the current frame, 4 MBs ahead */
1723 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1724 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1725
ce42a048
RB
1726 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1727 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
3b636f21 1728
ef38842f 1729 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
c4211046 1730
3b636f21
DC
1731 if (!mb->skip)
1732 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
3b636f21 1733
b946111f 1734 if (mb->mode <= MODE_I4x4)
d2840fa4 1735 intra_predict(s, dst, mb, mb_x, mb_y);
b946111f 1736 else
3b636f21 1737 inter_predict(s, dst, mb, mb_x, mb_y);
3b636f21 1738
ef38842f 1739 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
c4211046 1740
3b636f21 1741 if (!mb->skip) {
8a467b2d 1742 idct_mb(s, dst, mb);
3b636f21
DC
1743 } else {
1744 AV_ZERO64(s->left_nnz);
1745 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1746
1747 // Reset DC block predictors if they would exist if the mb had coefficients
1748 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1749 s->left_nnz[8] = 0;
1750 s->top_nnz[mb_x][8] = 0;
1751 }
1752 }
1753
968570d6
JGG
1754 if (s->deblock_filter)
1755 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1756
ef38842f 1757 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
c4211046 1758
3b636f21
DC
1759 dst[0] += 16;
1760 dst[1] += 8;
1761 dst[2] += 8;
7634771e
JGG
1762 s->mv_min.x -= 64;
1763 s->mv_max.x -= 64;
3b636f21 1764 }
9ac831c2 1765 if (s->deblock_filter) {
3b636f21 1766 if (s->filter.simple)
4773d904 1767 filter_mb_row_simple(s, curframe, mb_y);
3b636f21 1768 else
4773d904 1769 filter_mb_row(s, curframe, mb_y);
3b636f21 1770 }
7634771e
JGG
1771 s->mv_min.y -= 64;
1772 s->mv_max.y -= 64;
4773d904
RB
1773
1774 ff_thread_report_progress(curframe, mb_y, 0);
3b636f21 1775 }
3b636f21 1776
4773d904 1777 ff_thread_report_progress(curframe, INT_MAX, 0);
fb90785e
RB
1778 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1779
3b636f21
DC
1780skip_decode:
1781 // if future frames don't use the updated probabilities,
1782 // reset them to the values we saved
1783 if (!s->update_probabilities)
1784 s->prob[0] = s->prob[1];
1785
3b636f21 1786 if (!s->invisible) {
4773d904 1787 *(AVFrame*)data = *curframe;
3b636f21
DC
1788 *data_size = sizeof(AVFrame);
1789 }
1790
1791 return avpkt->size;
fb90785e
RB
1792err:
1793 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1794 return ret;
3b636f21
DC
1795}
1796
1797static av_cold int vp8_decode_init(AVCodecContext *avctx)
1798{
1799 VP8Context *s = avctx->priv_data;
1800
1801 s->avctx = avctx;
1802 avctx->pix_fmt = PIX_FMT_YUV420P;
1803
9cf0841e 1804 ff_dsputil_init(&s->dsp, avctx);
76741b0e 1805 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
3b636f21
DC
1806 ff_vp8dsp_init(&s->vp8dsp);
1807
3b636f21
DC
1808 return 0;
1809}
1810
1811static av_cold int vp8_decode_free(AVCodecContext *avctx)
1812{
bfa0f965 1813 vp8_decode_flush_impl(avctx, 0, 1, 1);
ce42a048 1814 release_queued_segmaps(avctx->priv_data, 1);
3b636f21
DC
1815 return 0;
1816}
1817
4773d904
RB
1818static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1819{
1820 VP8Context *s = avctx->priv_data;
1821
1822 s->avctx = avctx;
1823
1824 return 0;
1825}
1826
1827#define REBASE(pic) \
1828 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1829
1830static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1831{
1832 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1833
56535793
RB
1834 if (s->macroblocks_base &&
1835 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
1836 free_buffers(s);
e02dec25 1837 s->maps_are_invalid = 1;
82a0497c
RB
1838 s->mb_width = s_src->mb_width;
1839 s->mb_height = s_src->mb_height;
56535793
RB
1840 }
1841
4773d904
RB
1842 s->prob[0] = s_src->prob[!s_src->update_probabilities];
1843 s->segmentation = s_src->segmentation;
1844 s->lf_delta = s_src->lf_delta;
1845 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1846
1847 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1848 s->framep[0] = REBASE(s_src->next_framep[0]);
1849 s->framep[1] = REBASE(s_src->next_framep[1]);
1850 s->framep[2] = REBASE(s_src->next_framep[2]);
1851 s->framep[3] = REBASE(s_src->next_framep[3]);
1852
1853 return 0;
1854}
1855
d36beb3f 1856AVCodec ff_vp8_decoder = {
00c3b67b
MS
1857 .name = "vp8",
1858 .type = AVMEDIA_TYPE_VIDEO,
1859 .id = CODEC_ID_VP8,
1860 .priv_data_size = sizeof(VP8Context),
1861 .init = vp8_decode_init,
1862 .close = vp8_decode_free,
1863 .decode = vp8_decode_frame,
1864 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1865 .flush = vp8_decode_flush,
1866 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
4773d904
RB
1867 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1868 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
3b636f21 1869};