vp8: move data from VP8Context->VP8Macroblock
[libav.git] / libavcodec / vp8.c
CommitLineData
32f3c541 1/*
3b636f21
DC
2 * VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
13a1304b 6 * Copyright (C) 2010 Jason Garrett-Glaser
3b636f21 7 *
2912e87a 8 * This file is part of Libav.
3b636f21 9 *
2912e87a 10 * Libav is free software; you can redistribute it and/or
3b636f21
DC
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
2912e87a 15 * Libav is distributed in the hope that it will be useful,
3b636f21
DC
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
2912e87a 21 * License along with Libav; if not, write to the Free Software
3b636f21
DC
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
737eb597 25#include "libavutil/imgutils.h"
3b636f21 26#include "avcodec.h"
f3a29b75 27#include "internal.h"
bcf4568f 28#include "vp8.h"
3b636f21 29#include "vp8data.h"
3b636f21 30#include "rectangle.h"
4773d904 31#include "thread.h"
3b636f21 32
a7878c9f
MR
33#if ARCH_ARM
34# include "arm/vp8.h"
35#endif
36
56535793
RB
37static void free_buffers(VP8Context *s)
38{
39 av_freep(&s->macroblocks_base);
40 av_freep(&s->filter_strength);
41 av_freep(&s->intra4x4_pred_mode_top);
42 av_freep(&s->top_nnz);
43 av_freep(&s->edge_emu_buffer);
44 av_freep(&s->top_border);
56535793
RB
45
46 s->macroblocks = NULL;
47}
48
ce42a048
RB
49static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
50{
51 int ret;
52 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
53 return ret;
e02dec25 54 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
ce42a048
RB
55 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
56 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
57 ff_thread_release_buffer(s->avctx, f);
58 return AVERROR(ENOMEM);
59 }
60 return 0;
61}
62
bfa0f965 63static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
ce42a048 64{
bfa0f965
RB
65 if (f->ref_index[0]) {
66 if (prefer_delayed_free) {
67 /* Upon a size change, we want to free the maps but other threads may still
68 * be using them, so queue them. Upon a seek, all threads are inactive so
69 * we want to cache one to prevent re-allocation in the next decoding
70 * iteration, but the rest we can free directly. */
71 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
72 if (s->num_maps_to_be_freed < max_queued_maps) {
73 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
74 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
75 av_free(f->ref_index[0]);
76 } /* else: MEMLEAK (should never happen, but better that than crash) */
ce42a048 77 f->ref_index[0] = NULL;
bfa0f965
RB
78 } else /* vp8_decode_free() */ {
79 av_free(f->ref_index[0]);
ce42a048 80 }
ce42a048
RB
81 }
82 ff_thread_release_buffer(s->avctx, f);
83}
84
bfa0f965
RB
85static void vp8_decode_flush_impl(AVCodecContext *avctx,
86 int prefer_delayed_free, int can_direct_free, int free_mem)
3b636f21
DC
87{
88 VP8Context *s = avctx->priv_data;
89 int i;
90
f3a29b75 91 if (!avctx->internal->is_copy) {
4773d904
RB
92 for (i = 0; i < 5; i++)
93 if (s->frames[i].data[0])
bfa0f965 94 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
4773d904 95 }
3b636f21
DC
96 memset(s->framep, 0, sizeof(s->framep));
97
bfa0f965
RB
98 if (free_mem) {
99 free_buffers(s);
100 s->maps_are_invalid = 1;
101 }
ce42a048
RB
102}
103
104static void vp8_decode_flush(AVCodecContext *avctx)
105{
bfa0f965 106 vp8_decode_flush_impl(avctx, 1, 1, 0);
3b636f21
DC
107}
108
109static int update_dimensions(VP8Context *s, int width, int height)
110{
4773d904
RB
111 if (width != s->avctx->width ||
112 height != s->avctx->height) {
113 if (av_image_check_size(width, height, 0, s->avctx))
114 return AVERROR_INVALIDDATA;
3b636f21 115
bfa0f965 116 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
3b636f21 117
4773d904
RB
118 avcodec_set_dimensions(s->avctx, width, height);
119 }
3b636f21
DC
120
121 s->mb_width = (s->avctx->coded_width +15) / 16;
122 s->mb_height = (s->avctx->coded_height+15) / 16;
123
aa93c52c
PM
124 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
125 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
ccf13f9e 126 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
3b636f21 127 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
9ac831c2 128 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
3b636f21 129
d2840fa4 130 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
ce42a048 131 !s->top_nnz || !s->top_border)
b6c420ce
DC
132 return AVERROR(ENOMEM);
133
c55e0d34 134 s->macroblocks = s->macroblocks_base + 1;
3b636f21
DC
135
136 return 0;
137}
138
139static void parse_segment_info(VP8Context *s)
140{
141 VP56RangeCoder *c = &s->c;
142 int i;
143
144 s->segmentation.update_map = vp8_rac_get(c);
145
146 if (vp8_rac_get(c)) { // update segment feature data
147 s->segmentation.absolute_vals = vp8_rac_get(c);
148
149 for (i = 0; i < 4; i++)
150 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
151
152 for (i = 0; i < 4; i++)
153 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
154 }
155 if (s->segmentation.update_map)
156 for (i = 0; i < 3; i++)
157 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
158}
159
160static void update_lf_deltas(VP8Context *s)
161{
162 VP56RangeCoder *c = &s->c;
163 int i;
164
14ba7472
JS
165 for (i = 0; i < 4; i++) {
166 if (vp8_rac_get(c)) {
167 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
3b636f21 168
14ba7472
JS
169 if (vp8_rac_get(c))
170 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
171 }
172 }
173
174 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
175 if (vp8_rac_get(c)) {
176 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
177
178 if (vp8_rac_get(c))
179 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
180 }
181 }
3b636f21
DC
182}
183
184static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
185{
186 const uint8_t *sizes = buf;
187 int i;
188
189 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
190
191 buf += 3*(s->num_coeff_partitions-1);
192 buf_size -= 3*(s->num_coeff_partitions-1);
193 if (buf_size < 0)
194 return -1;
195
196 for (i = 0; i < s->num_coeff_partitions-1; i++) {
06d50ca8 197 int size = AV_RL24(sizes + 3*i);
3b636f21
DC
198 if (buf_size - size < 0)
199 return -1;
200
905ef0d0 201 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
3b636f21
DC
202 buf += size;
203 buf_size -= size;
204 }
905ef0d0 205 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
3b636f21
DC
206
207 return 0;
208}
209
210static void get_quants(VP8Context *s)
211{
212 VP56RangeCoder *c = &s->c;
213 int i, base_qi;
214
215 int yac_qi = vp8_rac_get_uint(c, 7);
216 int ydc_delta = vp8_rac_get_sint(c, 4);
217 int y2dc_delta = vp8_rac_get_sint(c, 4);
218 int y2ac_delta = vp8_rac_get_sint(c, 4);
219 int uvdc_delta = vp8_rac_get_sint(c, 4);
220 int uvac_delta = vp8_rac_get_sint(c, 4);
221
222 for (i = 0; i < 4; i++) {
223 if (s->segmentation.enabled) {
224 base_qi = s->segmentation.base_quant[i];
225 if (!s->segmentation.absolute_vals)
226 base_qi += yac_qi;
227 } else
228 base_qi = yac_qi;
229
42761122
MR
230 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
231 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
232 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
233 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
234 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
235 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
a8ab0ccc
PM
236
237 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
238 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
3b636f21
DC
239 }
240}
241
242/**
243 * Determine which buffers golden and altref should be updated with after this frame.
244 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
245 *
246 * Intra frames update all 3 references
247 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
248 * If the update (golden|altref) flag is set, it's updated with the current frame
249 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
250 * If the flag is not set, the number read means:
251 * 0: no update
252 * 1: VP56_FRAME_PREVIOUS
253 * 2: update golden with altref, or update altref with golden
254 */
255static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
256{
257 VP56RangeCoder *c = &s->c;
258
259 if (update)
260 return VP56_FRAME_CURRENT;
261
262 switch (vp8_rac_get_uint(c, 2)) {
263 case 1:
264 return VP56_FRAME_PREVIOUS;
265 case 2:
266 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
267 }
268 return VP56_FRAME_NONE;
269}
270
271static void update_refs(VP8Context *s)
272{
273 VP56RangeCoder *c = &s->c;
274
275 int update_golden = vp8_rac_get(c);
276 int update_altref = vp8_rac_get(c);
277
278 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
279 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
280}
281
282static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
283{
284 VP56RangeCoder *c = &s->c;
370b622a 285 int header_size, hscale, vscale, i, j, k, l, m, ret;
3b636f21
DC
286 int width = s->avctx->width;
287 int height = s->avctx->height;
288
289 s->keyframe = !(buf[0] & 1);
290 s->profile = (buf[0]>>1) & 7;
291 s->invisible = !(buf[0] & 0x10);
06d50ca8 292 header_size = AV_RL24(buf) >> 5;
3b636f21
DC
293 buf += 3;
294 buf_size -= 3;
295
0ef1dbed
DC
296 if (s->profile > 3)
297 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
298
299 if (!s->profile)
300 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
301 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
302 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
3b636f21
DC
303
304 if (header_size > buf_size - 7*s->keyframe) {
305 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
306 return AVERROR_INVALIDDATA;
307 }
308
309 if (s->keyframe) {
06d50ca8
JGG
310 if (AV_RL24(buf) != 0x2a019d) {
311 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
3b636f21
DC
312 return AVERROR_INVALIDDATA;
313 }
314 width = AV_RL16(buf+3) & 0x3fff;
315 height = AV_RL16(buf+5) & 0x3fff;
316 hscale = buf[4] >> 6;
317 vscale = buf[6] >> 6;
318 buf += 7;
319 buf_size -= 7;
320
92a54426
MR
321 if (hscale || vscale)
322 av_log_missing_feature(s->avctx, "Upscaling", 1);
323
3b636f21 324 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
370b622a
JGG
325 for (i = 0; i < 4; i++)
326 for (j = 0; j < 16; j++)
327 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
328 sizeof(s->prob->token[i][j]));
3b636f21
DC
329 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
330 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
331 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
332 memset(&s->segmentation, 0, sizeof(s->segmentation));
333 }
334
335 if (!s->macroblocks_base || /* first frame */
336 width != s->avctx->width || height != s->avctx->height) {
f05c2fb6 337 if ((ret = update_dimensions(s, width, height)) < 0)
3b636f21
DC
338 return ret;
339 }
340
905ef0d0 341 ff_vp56_init_range_decoder(c, buf, header_size);
3b636f21
DC
342 buf += header_size;
343 buf_size -= header_size;
344
345 if (s->keyframe) {
346 if (vp8_rac_get(c))
347 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
348 vp8_rac_get(c); // whether we can skip clamping in dsp functions
349 }
350
351 if ((s->segmentation.enabled = vp8_rac_get(c)))
352 parse_segment_info(s);
353 else
354 s->segmentation.update_map = 0; // FIXME: move this to some init function?
355
356 s->filter.simple = vp8_rac_get(c);
357 s->filter.level = vp8_rac_get_uint(c, 6);
358 s->filter.sharpness = vp8_rac_get_uint(c, 3);
359
360 if ((s->lf_delta.enabled = vp8_rac_get(c)))
361 if (vp8_rac_get(c))
362 update_lf_deltas(s);
363
364 if (setup_partitions(s, buf, buf_size)) {
365 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
366 return AVERROR_INVALIDDATA;
367 }
368
369 get_quants(s);
370
371 if (!s->keyframe) {
372 update_refs(s);
373 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
374 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
375 }
376
377 // if we aren't saving this frame's probabilities for future frames,
378 // make a copy of the current probabilities
379 if (!(s->update_probabilities = vp8_rac_get(c)))
380 s->prob[1] = s->prob[0];
381
382 s->update_last = s->keyframe || vp8_rac_get(c);
383
384 for (i = 0; i < 4; i++)
385 for (j = 0; j < 8; j++)
386 for (k = 0; k < 3; k++)
387 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
370b622a
JGG
388 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
389 int prob = vp8_rac_get_uint(c, 8);
b0d58795
JGG
390 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
391 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
370b622a 392 }
3b636f21
DC
393
394 if ((s->mbskip_enabled = vp8_rac_get(c)))
a8ab0ccc 395 s->prob->mbskip = vp8_rac_get_uint(c, 8);
3b636f21
DC
396
397 if (!s->keyframe) {
a8ab0ccc
PM
398 s->prob->intra = vp8_rac_get_uint(c, 8);
399 s->prob->last = vp8_rac_get_uint(c, 8);
400 s->prob->golden = vp8_rac_get_uint(c, 8);
3b636f21
DC
401
402 if (vp8_rac_get(c))
403 for (i = 0; i < 4; i++)
404 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
405 if (vp8_rac_get(c))
406 for (i = 0; i < 3; i++)
407 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
408
409 // 17.2 MV probability update
410 for (i = 0; i < 2; i++)
411 for (j = 0; j < 19; j++)
7697cdcf 412 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
3b636f21
DC
413 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
414 }
415
416 return 0;
417}
418
7634771e 419static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
3b636f21 420{
7634771e
JGG
421 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
422 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
3b636f21
DC
423}
424
3b636f21
DC
425/**
426 * Motion vector coding, 17.1.
427 */
428static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
429{
ca18a478 430 int bit, x = 0;
3b636f21 431
7697cdcf 432 if (vp56_rac_get_prob_branchy(c, p[0])) {
3b636f21
DC
433 int i;
434
435 for (i = 0; i < 3; i++)
436 x += vp56_rac_get_prob(c, p[9 + i]) << i;
437 for (i = 9; i > 3; i--)
438 x += vp56_rac_get_prob(c, p[9 + i]) << i;
439 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
440 x += 8;
ca18a478
DC
441 } else {
442 // small_mvtree
443 const uint8_t *ps = p+2;
444 bit = vp56_rac_get_prob(c, *ps);
445 ps += 1 + 3*bit;
446 x += 4*bit;
447 bit = vp56_rac_get_prob(c, *ps);
448 ps += 1 + bit;
449 x += 2*bit;
450 x += vp56_rac_get_prob(c, *ps);
451 }
3b636f21
DC
452
453 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
454}
455
414ac27d
JGG
456static av_always_inline
457const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
3b636f21 458{
7bf254c4
JGG
459 if (left == top)
460 return vp8_submv_prob[4-!!left];
461 if (!top)
3b636f21 462 return vp8_submv_prob[2];
7bf254c4 463 return vp8_submv_prob[1-!!left];
3b636f21
DC
464}
465
466/**
467 * Split motion vector prediction, 16.4.
7ed06b2b 468 * @returns the number of motion vectors parsed (2, 4 or 16)
3b636f21 469 */
414ac27d
JGG
470static av_always_inline
471int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
3b636f21 472{
0908f1b9
JGG
473 int part_idx;
474 int n, num;
c55e0d34 475 VP8Macroblock *top_mb = &mb[2];
7bf254c4
JGG
476 VP8Macroblock *left_mb = &mb[-1];
477 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
478 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
0908f1b9 479 *mbsplits_cur, *firstidx;
c55e0d34
JGG
480 VP56mv *top_mv = top_mb->bmv;
481 VP56mv *left_mv = left_mb->bmv;
482 VP56mv *cur_mv = mb->bmv;
3b636f21 483
0908f1b9
JGG
484 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
485 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
486 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
487 } else {
488 part_idx = VP8_SPLITMVMODE_8x8;
489 }
490 } else {
491 part_idx = VP8_SPLITMVMODE_4x4;
492 }
493
494 num = vp8_mbsplit_count[part_idx];
495 mbsplits_cur = vp8_mbsplits[part_idx],
496 firstidx = vp8_mbfirstidx[part_idx];
497 mb->partitioning = part_idx;
498
3b636f21 499 for (n = 0; n < num; n++) {
7ed06b2b 500 int k = firstidx[n];
7bf254c4 501 uint32_t left, above;
7ed06b2b
RB
502 const uint8_t *submv_prob;
503
7bf254c4
JGG
504 if (!(k & 3))
505 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
506 else
507 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
508 if (k <= 3)
509 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
510 else
511 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
7ed06b2b
RB
512
513 submv_prob = get_submv_prob(left, above);
3b636f21 514
c5dec7f1
JGG
515 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
516 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
517 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
518 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
519 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
520 } else {
521 AV_ZERO32(&mb->bmv[n]);
522 }
523 } else {
524 AV_WN32A(&mb->bmv[n], above);
525 }
526 } else {
7bf254c4 527 AV_WN32A(&mb->bmv[n], left);
3b636f21 528 }
3b636f21 529 }
7ed06b2b
RB
530
531 return num;
3b636f21
DC
532}
533
414ac27d 534static av_always_inline
f3d09d44
JGG
535void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
536{
537 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
538 mb - 1 /* left */,
539 mb + 1 /* top-left */ };
540 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
66f608a6 541 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
f3d09d44
JGG
542 int idx = CNT_ZERO;
543 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1eeca886 544 int8_t *sign_bias = s->sign_bias;
f3d09d44
JGG
545 VP56mv near_mv[4];
546 uint8_t cnt[4] = { 0 };
547 VP56RangeCoder *c = &s->c;
548
549 AV_ZERO32(&near_mv[0]);
550 AV_ZERO32(&near_mv[1]);
0f0b5d64 551 AV_ZERO32(&near_mv[2]);
f3d09d44
JGG
552
553 /* Process MB on top, left and top-left */
554 #define MV_EDGE_CHECK(n)\
555 {\
556 VP8Macroblock *edge = mb_edge[n];\
557 int edge_ref = edge->ref_frame;\
558 if (edge_ref != VP56_FRAME_CURRENT) {\
559 uint32_t mv = AV_RN32A(&edge->mv);\
560 if (mv) {\
561 if (cur_sign_bias != sign_bias[edge_ref]) {\
562 /* SWAR negate of the values in mv. */\
563 mv = ~mv;\
564 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
565 }\
566 if (!n || mv != AV_RN32A(&near_mv[idx]))\
567 AV_WN32A(&near_mv[++idx], mv);\
568 cnt[idx] += 1 + (n != 2);\
569 } else\
570 cnt[CNT_ZERO] += 1 + (n != 2);\
571 }\
572 }
573
574 MV_EDGE_CHECK(0)
575 MV_EDGE_CHECK(1)
576 MV_EDGE_CHECK(2)
577
578 mb->partitioning = VP8_SPLITMVMODE_NONE;
579 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
580 mb->mode = VP8_MVMODE_MV;
581
582 /* If we have three distinct MVs, merge first and last if they're the same */
66f608a6 583 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
f3d09d44
JGG
584 cnt[CNT_NEAREST] += 1;
585
586 /* Swap near and nearest if necessary */
587 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
588 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
589 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
590 }
591
592 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
593 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
594
595 /* Choose the best mv out of 0,0 and the nearest mv */
7634771e 596 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
66f608a6
AS
597 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
598 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
599 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
f3d09d44
JGG
600
601 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
602 mb->mode = VP8_MVMODE_SPLIT;
603 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
604 } else {
605 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
606 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
607 mb->bmv[0] = mb->mv;
608 }
609 } else {
7634771e 610 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
f3d09d44
JGG
611 mb->bmv[0] = mb->mv;
612 }
613 } else {
7634771e 614 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
f3d09d44
JGG
615 mb->bmv[0] = mb->mv;
616 }
617 } else {
618 mb->mode = VP8_MVMODE_ZERO;
619 AV_ZERO32(&mb->mv);
620 mb->bmv[0] = mb->mv;
621 }
622}
623
624static av_always_inline
17343e39 625void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
d2840fa4 626 int mb_x, int keyframe)
3b636f21 627{
17343e39
DK
628 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
629
d1c58fce 630 if (keyframe) {
d2840fa4
PM
631 int x, y;
632 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
633 uint8_t* const left = s->intra4x4_pred_mode_left;
d1c58fce
JGG
634 for (y = 0; y < 4; y++) {
635 for (x = 0; x < 4; x++) {
d2840fa4
PM
636 const uint8_t *ctx;
637 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
638 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
639 left[y] = top[x] = *intra4x4;
640 intra4x4++;
3b636f21 641 }
3b636f21 642 }
d1c58fce 643 } else {
d2840fa4 644 int i;
d1c58fce
JGG
645 for (i = 0; i < 16; i++)
646 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
3b636f21
DC
647 }
648}
649
414ac27d 650static av_always_inline
4773d904 651void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
3b636f21
DC
652{
653 VP56RangeCoder *c = &s->c;
3b636f21
DC
654
655 if (s->segmentation.update_map)
c55e0d34 656 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
30011bf2 657 else if (s->segmentation.enabled)
4773d904 658 *segment = ref ? *ref : *segment;
17343e39 659 mb->segment = *segment;
3b636f21 660
a8ab0ccc 661 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
3b636f21
DC
662
663 if (s->keyframe) {
664 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
665
666 if (mb->mode == MODE_I4x4) {
17343e39 667 decode_intra4x4_modes(s, c, mb, mb_x, 1);
d2840fa4
PM
668 } else {
669 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
670 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
671 AV_WN32A(s->intra4x4_pred_mode_left, modes);
672 }
3b636f21 673
17343e39 674 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
3b636f21 675 mb->ref_frame = VP56_FRAME_CURRENT;
a8ab0ccc 676 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
3b636f21 677 // inter MB, 16.2
a8ab0ccc
PM
678 if (vp56_rac_get_prob_branchy(c, s->prob->last))
679 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
3b636f21
DC
680 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
681 else
682 mb->ref_frame = VP56_FRAME_PREVIOUS;
c4211046 683 s->ref_count[mb->ref_frame-1]++;
3b636f21
DC
684
685 // motion vectors, 16.3
f3d09d44 686 decode_mvs(s, mb, mb_x, mb_y);
3b636f21
DC
687 } else {
688 // intra MB, 16.1
689 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
690
158e062c 691 if (mb->mode == MODE_I4x4)
17343e39 692 decode_intra4x4_modes(s, c, mb, mb_x, 0);
3b636f21 693
17343e39 694 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
3b636f21 695 mb->ref_frame = VP56_FRAME_CURRENT;
b946111f 696 mb->partitioning = VP8_SPLITMVMODE_NONE;
14767f35 697 AV_ZERO32(&mb->bmv[0]);
3b636f21
DC
698 }
699}
700
a7878c9f 701#ifndef decode_block_coeffs_internal
3b636f21 702/**
e394953e
RB
703 * @param c arithmetic bitstream reader context
704 * @param block destination for block coefficients
705 * @param probs probabilities to use when reading trees from the bitstream
3b636f21 706 * @param i initial coeff index, 0 unless a separate DC block is coded
3fa76268 707 * @param qmul array holding the dc/ac dequant factor at position 0/1
3b636f21
DC
708 * @return 0 if no coeffs were decoded
709 * otherwise, the index of the last coeff decoded plus one
710 */
6163d880 711static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
3efbe137 712 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1e739679 713 int i, uint8_t *token_prob, int16_t qmul[2])
3b636f21 714{
6163d880 715 VP56RangeCoder c = *r;
afb54a85 716 goto skip_eob;
fe1b5d97 717 do {
1e739679 718 int coeff;
6163d880
RB
719 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
720 break;
3b636f21 721
fe1b5d97 722skip_eob:
6163d880 723 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
c22b4468 724 if (++i == 16)
6163d880 725 break; // invalid input; blocks should end with EOB
370b622a 726 token_prob = probs[i][0];
c22b4468 727 goto skip_eob;
fe1b5d97
DC
728 }
729
6163d880 730 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
fe1b5d97 731 coeff = 1;
370b622a 732 token_prob = probs[i+1][1];
fe1b5d97 733 } else {
6163d880
RB
734 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
735 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
fe1b5d97 736 if (coeff)
6163d880 737 coeff += vp56_rac_get_prob(&c, token_prob[5]);
fe1b5d97
DC
738 coeff += 2;
739 } else {
740 // DCT_CAT*
6163d880
RB
741 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
742 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
743 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
fe1b5d97
DC
744 } else { // DCT_CAT2
745 coeff = 7;
6163d880
RB
746 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
747 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
fe1b5d97
DC
748 }
749 } else { // DCT_CAT3 and up
6163d880
RB
750 int a = vp56_rac_get_prob(&c, token_prob[8]);
751 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
fe1b5d97
DC
752 int cat = (a<<1) + b;
753 coeff = 3 + (8<<cat);
6163d880 754 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
fe1b5d97
DC
755 }
756 }
370b622a 757 token_prob = probs[i+1][2];
fe1b5d97 758 }
6163d880 759 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
afb54a85 760 } while (++i < 16);
fe1b5d97 761
6163d880 762 *r = c;
afb54a85 763 return i;
3b636f21 764}
a7878c9f 765#endif
3b636f21 766
3c432e11
DB
767/**
768 * @param c arithmetic bitstream reader context
769 * @param block destination for block coefficients
770 * @param probs probabilities to use when reading trees from the bitstream
771 * @param i initial coeff index, 0 unless a separate DC block is coded
772 * @param zero_nhood the initial prediction context for number of surrounding
773 * all-zero blocks (only left/top, so 0-2)
774 * @param qmul array holding the dc/ac dequant factor at position 0/1
775 * @return 0 if no coeffs were decoded
776 * otherwise, the index of the last coeff decoded plus one
777 */
414ac27d 778static av_always_inline
1e739679 779int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
81a13131 780 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1e739679
JGG
781 int i, int zero_nhood, int16_t qmul[2])
782{
783 uint8_t *token_prob = probs[i][zero_nhood];
784 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
785 return 0;
786 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
787}
788
789static av_always_inline
414ac27d
JGG
790void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
791 uint8_t t_nnz[9], uint8_t l_nnz[9])
3b636f21 792{
3b636f21
DC
793 int i, x, y, luma_start = 0, luma_ctx = 3;
794 int nnz_pred, nnz, nnz_total = 0;
17343e39 795 int segment = mb->segment;
f311208c 796 int block_dc = 0;
3b636f21 797
3b636f21 798 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
3b636f21
DC
799 nnz_pred = t_nnz[8] + l_nnz[8];
800
801 // decode DC values and do hadamard
827d43bb 802 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
3b636f21
DC
803 s->qmat[segment].luma_dc_qmul);
804 l_nnz[8] = t_nnz[8] = !!nnz;
f311208c
JGG
805 if (nnz) {
806 nnz_total += nnz;
807 block_dc = 1;
808 if (nnz == 1)
809 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
810 else
811 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
812 }
3b636f21
DC
813 luma_start = 1;
814 luma_ctx = 0;
815 }
816
817 // luma blocks
818 for (y = 0; y < 4; y++)
819 for (x = 0; x < 4; x++) {
ffbf0794 820 nnz_pred = l_nnz[y] + t_nnz[x];
3b636f21 821 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
ffbf0794 822 nnz_pred, s->qmat[segment].luma_qmul);
f311208c
JGG
823 // nnz+block_dc may be one more than the actual last index, but we don't care
824 s->non_zero_count_cache[y][x] = nnz + block_dc;
3b636f21
DC
825 t_nnz[x] = l_nnz[y] = !!nnz;
826 nnz_total += nnz;
827 }
828
829 // chroma blocks
830 // TODO: what to do about dimensions? 2nd dim for luma is x,
831 // but for chroma it's (y<<1)|x
832 for (i = 4; i < 6; i++)
833 for (y = 0; y < 2; y++)
834 for (x = 0; x < 2; x++) {
835 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
836 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
837 nnz_pred, s->qmat[segment].chroma_qmul);
838 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
839 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
840 nnz_total += nnz;
841 }
842
843 // if there were no coded coeffs despite the macroblock not being marked skip,
844 // we MUST not do the inner loop filter and should not do IDCT
845 // Since skip isn't used for bitstream prediction, just manually set it.
846 if (!nnz_total)
847 mb->skip = 1;
848}
849
9ac831c2
DC
850static av_always_inline
851void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
852 int linesize, int uvlinesize, int simple)
853{
854 AV_COPY128(top_border, src_y + 15*linesize);
855 if (!simple) {
856 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
857 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
858 }
859}
860
861static av_always_inline
862void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
863 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
864 int simple, int xchg)
865{
866 uint8_t *top_border_m1 = top_border-32; // for TL prediction
867 src_y -= linesize;
868 src_cb -= uvlinesize;
869 src_cr -= uvlinesize;
870
096971e8
MR
871#define XCHG(a,b,xchg) do { \
872 if (xchg) AV_SWAP64(b,a); \
873 else AV_COPY64(b,a); \
874 } while (0)
9ac831c2
DC
875
876 XCHG(top_border_m1+8, src_y-8, xchg);
877 XCHG(top_border, src_y, xchg);
878 XCHG(top_border+8, src_y+8, 1);
070ce7ef 879 if (mb_x < mb_width-1)
9ac831c2 880 XCHG(top_border+32, src_y+16, 1);
070ce7ef 881
9ac831c2
DC
882 // only copy chroma for normal loop filter
883 // or to initialize the top row to 127
884 if (!simple || !mb_y) {
885 XCHG(top_border_m1+16, src_cb-8, xchg);
886 XCHG(top_border_m1+24, src_cr-8, xchg);
887 XCHG(top_border+16, src_cb, 1);
888 XCHG(top_border+24, src_cr, 1);
889 }
890}
891
414ac27d 892static av_always_inline
ee555de7
RB
893int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
894{
895 if (!mb_x) {
896 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
897 } else {
898 return mb_y ? mode : LEFT_DC_PRED8x8;
899 }
900}
901
902static av_always_inline
903int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
904{
905 if (!mb_x) {
906 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
907 } else {
908 return mb_y ? mode : HOR_PRED8x8;
909 }
910}
911
912static av_always_inline
913int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
3b636f21
DC
914{
915 if (mode == DC_PRED8x8) {
ee555de7
RB
916 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
917 } else {
918 return mode;
919 }
920}
921
922static av_always_inline
923int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
924{
925 switch (mode) {
926 case DC_PRED8x8:
927 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
928 case VERT_PRED8x8:
929 return !mb_y ? DC_127_PRED8x8 : mode;
930 case HOR_PRED8x8:
931 return !mb_x ? DC_129_PRED8x8 : mode;
932 case PLANE_PRED8x8 /*TM*/:
933 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
934 }
935 return mode;
936}
937
938static av_always_inline
939int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
940{
941 if (!mb_x) {
942 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
943 } else {
944 return mb_y ? mode : HOR_VP8_PRED;
945 }
946}
947
948static av_always_inline
949int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
950{
951 switch (mode) {
952 case VERT_PRED:
953 if (!mb_x && mb_y) {
954 *copy_buf = 1;
955 return mode;
956 }
957 /* fall-through */
958 case DIAG_DOWN_LEFT_PRED:
959 case VERT_LEFT_PRED:
960 return !mb_y ? DC_127_PRED : mode;
961 case HOR_PRED:
962 if (!mb_y) {
963 *copy_buf = 1;
964 return mode;
a71abb71 965 }
ee555de7
RB
966 /* fall-through */
967 case HOR_UP_PRED:
968 return !mb_x ? DC_129_PRED : mode;
969 case TM_VP8_PRED:
970 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
971 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
972 case DIAG_DOWN_RIGHT_PRED:
973 case VERT_RIGHT_PRED:
974 case HOR_DOWN_PRED:
975 if (!mb_y || !mb_x)
976 *copy_buf = 1;
977 return mode;
3b636f21
DC
978 }
979 return mode;
980}
981
414ac27d
JGG
982static av_always_inline
983void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
d2840fa4 984 int mb_x, int mb_y)
3b636f21 985{
ee555de7 986 AVCodecContext *avctx = s->avctx;
bb591566
MR
987 int x, y, mode, nnz;
988 uint32_t tr;
3b636f21 989
9ac831c2
DC
990 // for the first row, we need to run xchg_mb_border to init the top edge to 127
991 // otherwise, skip it if we aren't going to deblock
ee555de7 992 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
9ac831c2
DC
993 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
994 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
995 s->filter.simple, 1);
996
3b636f21 997 if (mb->mode < MODE_I4x4) {
ee555de7
RB
998 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
999 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1000 } else {
1001 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1002 }
3b636f21
DC
1003 s->hpc.pred16x16[mode](dst[0], s->linesize);
1004 } else {
1005 uint8_t *ptr = dst[0];
17343e39 1006 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
ee555de7 1007 uint8_t tr_top[4] = { 127, 127, 127, 127 };
3b636f21
DC
1008
1009 // all blocks on the right edge of the macroblock use bottom edge
1010 // the top macroblock for their topright edge
1011 uint8_t *tr_right = ptr - s->linesize + 16;
1012
1013 // if we're on the right edge of the frame, said edge is extended
1014 // from the top macroblock
7148da48
RB
1015 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1016 mb_x == s->mb_width-1) {
bb591566 1017 tr = tr_right[-1]*0x01010101u;
3b636f21
DC
1018 tr_right = (uint8_t *)&tr;
1019 }
1020
b74f70d6
JGG
1021 if (mb->skip)
1022 AV_ZERO128(s->non_zero_count_cache);
1023
3b636f21
DC
1024 for (y = 0; y < 4; y++) {
1025 uint8_t *topright = ptr + 4 - s->linesize;
1026 for (x = 0; x < 4; x++) {
ee555de7
RB
1027 int copy = 0, linesize = s->linesize;
1028 uint8_t *dst = ptr+4*x;
1029 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1030
1031 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1032 topright = tr_top;
1033 } else if (x == 3)
3b636f21
DC
1034 topright = tr_right;
1035
ee555de7
RB
1036 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1037 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1038 if (copy) {
1039 dst = copy_dst + 12;
1040 linesize = 8;
1041 if (!(mb_y + y)) {
1042 copy_dst[3] = 127U;
9d4bdcb7 1043 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
ee555de7 1044 } else {
9d4bdcb7 1045 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
ee555de7
RB
1046 if (!(mb_x + x)) {
1047 copy_dst[3] = 129U;
1048 } else {
1049 copy_dst[3] = ptr[4*x-s->linesize-1];
1050 }
1051 }
1052 if (!(mb_x + x)) {
1053 copy_dst[11] =
1054 copy_dst[19] =
1055 copy_dst[27] =
1056 copy_dst[35] = 129U;
1057 } else {
1058 copy_dst[11] = ptr[4*x -1];
1059 copy_dst[19] = ptr[4*x+s->linesize -1];
1060 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1061 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1062 }
1063 }
1064 } else {
1065 mode = intra4x4[x];
1066 }
1067 s->hpc.pred4x4[mode](dst, topright, linesize);
1068 if (copy) {
9d4bdcb7
RB
1069 AV_COPY32(ptr+4*x , copy_dst+12);
1070 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1071 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1072 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
ee555de7 1073 }
3b636f21
DC
1074
1075 nnz = s->non_zero_count_cache[y][x];
1076 if (nnz) {
1077 if (nnz == 1)
1078 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1079 else
1080 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1081 }
1082 topright += 4;
1083 }
1084
1085 ptr += 4*s->linesize;
d2840fa4 1086 intra4x4 += 4;
3b636f21
DC
1087 }
1088 }
1089
ee555de7 1090 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
17343e39 1091 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
ee555de7 1092 } else {
17343e39 1093 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
ee555de7 1094 }
3b636f21
DC
1095 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1096 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
9ac831c2 1097
ee555de7 1098 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
9ac831c2
DC
1099 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1100 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1101 s->filter.simple, 0);
3b636f21
DC
1102}
1103
64233e70
JGG
1104static const uint8_t subpel_idx[3][8] = {
1105 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1106 // also function pointer index
1107 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1108 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1109};
1110
3b636f21 1111/**
3c432e11 1112 * luma MC function
3b636f21
DC
1113 *
1114 * @param s VP8 decoding context
3b636f21 1115 * @param dst target buffer for block data at block position
24c9baba 1116 * @param ref reference picture buffer at origin (0, 0)
3b636f21
DC
1117 * @param mv motion vector (relative to block position) to get pixel data from
1118 * @param x_off horizontal position of block from origin (0, 0)
1119 * @param y_off vertical position of block from origin (0, 0)
1120 * @param block_w width of block (16, 8 or 4)
1121 * @param block_h height of block (always same as block_w)
1122 * @param width width of src/dst plane data
1123 * @param height height of src/dst plane data
1124 * @param linesize size of a single line of plane data, including padding
e394953e 1125 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
3b636f21 1126 */
414ac27d 1127static av_always_inline
4773d904 1128void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
64233e70
JGG
1129 int x_off, int y_off, int block_w, int block_h,
1130 int width, int height, int linesize,
1131 vp8_mc_func mc_func[3][3])
3b636f21 1132{
4773d904
RB
1133 uint8_t *src = ref->data[0];
1134
c0498b30 1135 if (AV_RN32A(mv)) {
64233e70
JGG
1136
1137 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1138 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1139
1140 x_off += mv->x >> 2;
1141 y_off += mv->y >> 2;
c0498b30
JGG
1142
1143 // edge emulation
4773d904 1144 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
c0498b30 1145 src += y_off * linesize + x_off;
64233e70
JGG
1146 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1147 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
2e279598 1148 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
64233e70
JGG
1149 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1150 x_off - mx_idx, y_off - my_idx, width, height);
44002d83 1151 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
c0498b30
JGG
1152 }
1153 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
4773d904
RB
1154 } else {
1155 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
c0498b30 1156 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
4773d904 1157 }
3b636f21
DC
1158}
1159
3c432e11
DB
1160/**
1161 * chroma MC function
1162 *
1163 * @param s VP8 decoding context
1164 * @param dst1 target buffer for block data at block position (U plane)
1165 * @param dst2 target buffer for block data at block position (V plane)
1166 * @param ref reference picture buffer at origin (0, 0)
1167 * @param mv motion vector (relative to block position) to get pixel data from
1168 * @param x_off horizontal position of block from origin (0, 0)
1169 * @param y_off vertical position of block from origin (0, 0)
1170 * @param block_w width of block (16, 8 or 4)
1171 * @param block_h height of block (always same as block_w)
1172 * @param width width of src/dst plane data
1173 * @param height height of src/dst plane data
1174 * @param linesize size of a single line of plane data, including padding
1175 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1176 */
414ac27d 1177static av_always_inline
4773d904
RB
1178void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1179 const VP56mv *mv, int x_off, int y_off,
64233e70
JGG
1180 int block_w, int block_h, int width, int height, int linesize,
1181 vp8_mc_func mc_func[3][3])
1182{
4773d904
RB
1183 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1184
64233e70
JGG
1185 if (AV_RN32A(mv)) {
1186 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1187 int my = mv->y&7, my_idx = subpel_idx[0][my];
1188
1189 x_off += mv->x >> 3;
1190 y_off += mv->y >> 3;
1191
1192 // edge emulation
1193 src1 += y_off * linesize + x_off;
1194 src2 += y_off * linesize + x_off;
4773d904 1195 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
64233e70
JGG
1196 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1197 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1198 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1199 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1200 x_off - mx_idx, y_off - my_idx, width, height);
1201 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1202 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1203
1204 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1205 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1206 x_off - mx_idx, y_off - my_idx, width, height);
1207 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1208 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1209 } else {
1210 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1211 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1212 }
1213 } else {
4773d904 1214 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
64233e70
JGG
1215 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1216 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1217 }
1218}
1219
1220static av_always_inline
414ac27d
JGG
1221void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1222 AVFrame *ref_frame, int x_off, int y_off,
1223 int bx_off, int by_off,
1224 int block_w, int block_h,
1225 int width, int height, VP56mv *mv)
7c4dcf81
RB
1226{
1227 VP56mv uvmv = *mv;
1228
1229 /* Y */
64233e70 1230 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
4773d904 1231 ref_frame, mv, x_off + bx_off, y_off + by_off,
64233e70
JGG
1232 block_w, block_h, width, height, s->linesize,
1233 s->put_pixels_tab[block_w == 8]);
7c4dcf81
RB
1234
1235 /* U/V */
1236 if (s->profile == 3) {
1237 uvmv.x &= ~7;
1238 uvmv.y &= ~7;
1239 }
1240 x_off >>= 1; y_off >>= 1;
1241 bx_off >>= 1; by_off >>= 1;
1242 width >>= 1; height >>= 1;
1243 block_w >>= 1; block_h >>= 1;
64233e70 1244 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
4773d904
RB
1245 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1246 &uvmv, x_off + bx_off, y_off + by_off,
64233e70
JGG
1247 block_w, block_h, width, height, s->uvlinesize,
1248 s->put_pixels_tab[1 + (block_w == 4)]);
7c4dcf81
RB
1249}
1250
d864dee8
JGG
1251/* Fetch pixels for estimated mv 4 macroblocks ahead.
1252 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
414ac27d 1253static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
d864dee8 1254{
ef38842f
JGG
1255 /* Don't prefetch refs that haven't been used very often this frame. */
1256 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
c4211046 1257 int x_off = mb_x << 4, y_off = mb_y << 4;
7e13022a
JGG
1258 int mx = (mb->mv.x>>2) + x_off + 8;
1259 int my = (mb->mv.y>>2) + y_off;
c4211046
JGG
1260 uint8_t **src= s->framep[ref]->data;
1261 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
4773d904
RB
1262 /* For threading, a ff_thread_await_progress here might be useful, but
1263 * it actually slows down the decoder. Since a bad prefetch doesn't
1264 * generate bad decoder output, we don't run it here. */
c4211046
JGG
1265 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1266 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1267 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1268 }
d864dee8
JGG
1269}
1270
3b636f21
DC
1271/**
1272 * Apply motion vectors to prediction buffer, chapter 18.
1273 */
414ac27d
JGG
1274static av_always_inline
1275void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1276 int mb_x, int mb_y)
3b636f21
DC
1277{
1278 int x_off = mb_x << 4, y_off = mb_y << 4;
1279 int width = 16*s->mb_width, height = 16*s->mb_height;
d292c345
JGG
1280 AVFrame *ref = s->framep[mb->ref_frame];
1281 VP56mv *bmv = mb->bmv;
3b636f21 1282
73be29b0
JGG
1283 switch (mb->partitioning) {
1284 case VP8_SPLITMVMODE_NONE:
d292c345 1285 vp8_mc_part(s, dst, ref, x_off, y_off,
7c4dcf81 1286 0, 0, 16, 16, width, height, &mb->mv);
73be29b0 1287 break;
7c4dcf81 1288 case VP8_SPLITMVMODE_4x4: {
3b636f21 1289 int x, y;
7c4dcf81 1290 VP56mv uvmv;
3b636f21
DC
1291
1292 /* Y */
1293 for (y = 0; y < 4; y++) {
1294 for (x = 0; x < 4; x++) {
64233e70 1295 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
4773d904 1296 ref, &bmv[4*y + x],
64233e70
JGG
1297 4*x + x_off, 4*y + y_off, 4, 4,
1298 width, height, s->linesize,
1299 s->put_pixels_tab[2]);
3b636f21
DC
1300 }
1301 }
1302
1303 /* U/V */
1304 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1305 for (y = 0; y < 2; y++) {
1306 for (x = 0; x < 2; x++) {
1307 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1308 mb->bmv[ 2*y * 4 + 2*x+1].x +
1309 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1310 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1311 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1312 mb->bmv[ 2*y * 4 + 2*x+1].y +
1313 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1314 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
8f910a56
SG
1315 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1316 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
3b636f21
DC
1317 if (s->profile == 3) {
1318 uvmv.x &= ~7;
1319 uvmv.y &= ~7;
1320 }
64233e70 1321 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
4773d904 1322 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
64233e70
JGG
1323 4*x + x_off, 4*y + y_off, 4, 4,
1324 width, height, s->uvlinesize,
1325 s->put_pixels_tab[2]);
3b636f21
DC
1326 }
1327 }
7c4dcf81
RB
1328 break;
1329 }
1330 case VP8_SPLITMVMODE_16x8:
d292c345
JGG
1331 vp8_mc_part(s, dst, ref, x_off, y_off,
1332 0, 0, 16, 8, width, height, &bmv[0]);
1333 vp8_mc_part(s, dst, ref, x_off, y_off,
1334 0, 8, 16, 8, width, height, &bmv[1]);
7c4dcf81
RB
1335 break;
1336 case VP8_SPLITMVMODE_8x16:
d292c345
JGG
1337 vp8_mc_part(s, dst, ref, x_off, y_off,
1338 0, 0, 8, 16, width, height, &bmv[0]);
1339 vp8_mc_part(s, dst, ref, x_off, y_off,
1340 8, 0, 8, 16, width, height, &bmv[1]);
7c4dcf81
RB
1341 break;
1342 case VP8_SPLITMVMODE_8x8:
d292c345
JGG
1343 vp8_mc_part(s, dst, ref, x_off, y_off,
1344 0, 0, 8, 8, width, height, &bmv[0]);
1345 vp8_mc_part(s, dst, ref, x_off, y_off,
1346 8, 0, 8, 8, width, height, &bmv[1]);
1347 vp8_mc_part(s, dst, ref, x_off, y_off,
1348 0, 8, 8, 8, width, height, &bmv[2]);
1349 vp8_mc_part(s, dst, ref, x_off, y_off,
1350 8, 8, 8, 8, width, height, &bmv[3]);
7c4dcf81 1351 break;
3b636f21
DC
1352 }
1353}
1354
414ac27d 1355static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
3b636f21 1356{
3df56f41 1357 int x, y, ch;
3b636f21 1358
8a467b2d
JGG
1359 if (mb->mode != MODE_I4x4) {
1360 uint8_t *y_dst = dst[0];
3b636f21 1361 for (y = 0; y < 4; y++) {
62457f90 1362 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
3df56f41
JGG
1363 if (nnz4) {
1364 if (nnz4&~0x01010101) {
8a467b2d 1365 for (x = 0; x < 4; x++) {
62457f90
JGG
1366 if ((uint8_t)nnz4 == 1)
1367 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1368 else if((uint8_t)nnz4 > 1)
1369 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1370 nnz4 >>= 8;
1371 if (!nnz4)
1372 break;
8a467b2d
JGG
1373 }
1374 } else {
3ae079a3 1375 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
3b636f21
DC
1376 }
1377 }
1378 y_dst += 4*s->linesize;
1379 }
8a467b2d 1380 }
3b636f21 1381
8a467b2d 1382 for (ch = 0; ch < 2; ch++) {
62457f90 1383 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
3ae079a3 1384 if (nnz4) {
8a467b2d 1385 uint8_t *ch_dst = dst[1+ch];
3ae079a3
JGG
1386 if (nnz4&~0x01010101) {
1387 for (y = 0; y < 2; y++) {
1388 for (x = 0; x < 2; x++) {
62457f90
JGG
1389 if ((uint8_t)nnz4 == 1)
1390 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1391 else if((uint8_t)nnz4 > 1)
1392 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1393 nnz4 >>= 8;
1394 if (!nnz4)
628b48db 1395 goto chroma_idct_end;
8a467b2d 1396 }
3ae079a3 1397 ch_dst += 4*s->uvlinesize;
8a467b2d 1398 }
3ae079a3
JGG
1399 } else {
1400 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
3b636f21
DC
1401 }
1402 }
628b48db 1403chroma_idct_end: ;
3b636f21
DC
1404 }
1405}
1406
414ac27d 1407static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
3b636f21
DC
1408{
1409 int interior_limit, filter_level;
1410
1411 if (s->segmentation.enabled) {
17343e39 1412 filter_level = s->segmentation.filter_level[mb->segment];
3b636f21
DC
1413 if (!s->segmentation.absolute_vals)
1414 filter_level += s->filter.level;
1415 } else
1416 filter_level = s->filter.level;
1417
1418 if (s->lf_delta.enabled) {
1419 filter_level += s->lf_delta.ref[mb->ref_frame];
dd18c9a0 1420 filter_level += s->lf_delta.mode[mb->mode];
3b636f21 1421 }
a1b227bb 1422
1550f45a 1423 filter_level = av_clip_uintp2(filter_level, 6);
3b636f21
DC
1424
1425 interior_limit = filter_level;
1426 if (s->filter.sharpness) {
8a2c99b4 1427 interior_limit >>= (s->filter.sharpness + 3) >> 2;
3b636f21
DC
1428 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1429 }
1430 interior_limit = FFMAX(interior_limit, 1);
1431
968570d6
JGG
1432 f->filter_level = filter_level;
1433 f->inner_limit = interior_limit;
c55e0d34 1434 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
3b636f21
DC
1435}
1436
414ac27d 1437static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1438{
968570d6
JGG
1439 int mbedge_lim, bedge_lim, hev_thresh;
1440 int filter_level = f->filter_level;
1441 int inner_limit = f->inner_limit;
c55e0d34 1442 int inner_filter = f->inner_filter;
145d3186
JGG
1443 int linesize = s->linesize;
1444 int uvlinesize = s->uvlinesize;
79dec154
JGG
1445 static const uint8_t hev_thresh_lut[2][64] = {
1446 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1447 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1448 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1449 3, 3, 3, 3 },
1450 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1451 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1452 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1453 2, 2, 2, 2 }
1454 };
3b636f21 1455
3b636f21
DC
1456 if (!filter_level)
1457 return;
1458
79dec154
JGG
1459 bedge_lim = 2*filter_level + inner_limit;
1460 mbedge_lim = bedge_lim + 4;
968570d6 1461
79dec154 1462 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
5245c04d 1463
3b636f21 1464 if (mb_x) {
145d3186 1465 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
3facfc99 1466 mbedge_lim, inner_limit, hev_thresh);
145d3186 1467 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1468 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1469 }
1470
c55e0d34 1471 if (inner_filter) {
145d3186
JGG
1472 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1473 inner_limit, hev_thresh);
1474 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1475 inner_limit, hev_thresh);
1476 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1477 inner_limit, hev_thresh);
1478 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1479 uvlinesize, bedge_lim,
1480 inner_limit, hev_thresh);
3b636f21
DC
1481 }
1482
1483 if (mb_y) {
145d3186 1484 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
3facfc99 1485 mbedge_lim, inner_limit, hev_thresh);
145d3186 1486 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1487 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1488 }
1489
c55e0d34 1490 if (inner_filter) {
145d3186
JGG
1491 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1492 linesize, bedge_lim,
1493 inner_limit, hev_thresh);
1494 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1495 linesize, bedge_lim,
1496 inner_limit, hev_thresh);
1497 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1498 linesize, bedge_lim,
1499 inner_limit, hev_thresh);
1500 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1501 dst[2] + 4 * uvlinesize,
1502 uvlinesize, bedge_lim,
3facfc99 1503 inner_limit, hev_thresh);
3b636f21
DC
1504 }
1505}
1506
414ac27d 1507static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1508{
968570d6
JGG
1509 int mbedge_lim, bedge_lim;
1510 int filter_level = f->filter_level;
1511 int inner_limit = f->inner_limit;
c55e0d34 1512 int inner_filter = f->inner_filter;
145d3186 1513 int linesize = s->linesize;
3b636f21 1514
3b636f21
DC
1515 if (!filter_level)
1516 return;
1517
79dec154
JGG
1518 bedge_lim = 2*filter_level + inner_limit;
1519 mbedge_lim = bedge_lim + 4;
3b636f21
DC
1520
1521 if (mb_x)
145d3186 1522 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1523 if (inner_filter) {
145d3186
JGG
1524 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1525 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1526 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
3b636f21
DC
1527 }
1528
1529 if (mb_y)
145d3186 1530 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1531 if (inner_filter) {
145d3186
JGG
1532 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1533 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1534 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
3b636f21
DC
1535 }
1536}
1537
4773d904 1538static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
3b636f21 1539{
968570d6 1540 VP8FilterStrength *f = s->filter_strength;
3b636f21 1541 uint8_t *dst[3] = {
4773d904
RB
1542 curframe->data[0] + 16*mb_y*s->linesize,
1543 curframe->data[1] + 8*mb_y*s->uvlinesize,
1544 curframe->data[2] + 8*mb_y*s->uvlinesize
3b636f21
DC
1545 };
1546 int mb_x;
1547
1548 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
9ac831c2 1549 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
c55e0d34 1550 filter_mb(s, dst, f++, mb_x, mb_y);
3b636f21
DC
1551 dst[0] += 16;
1552 dst[1] += 8;
1553 dst[2] += 8;
1554 }
1555}
1556
4773d904 1557static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
3b636f21 1558{
968570d6 1559 VP8FilterStrength *f = s->filter_strength;
4773d904 1560 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
3b636f21
DC
1561 int mb_x;
1562
1563 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
9ac831c2 1564 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
c55e0d34 1565 filter_mb_simple(s, dst, f++, mb_x, mb_y);
3b636f21
DC
1566 dst += 16;
1567 }
1568}
1569
ce42a048
RB
1570static void release_queued_segmaps(VP8Context *s, int is_close)
1571{
1572 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1573 while (s->num_maps_to_be_freed > leave_behind)
1574 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1575 s->maps_are_invalid = 0;
1576}
1577
337ade52
DK
1578#define MARGIN (16 << 2)
1579static void vp8_decode_mb_row(AVCodecContext *avctx, AVFrame *curframe,
1580 AVFrame *prev_frame, int mb_y)
1581{
1582 VP8Context *s = avctx->priv_data;
1583 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1584 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1585 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1586 uint8_t *dst[3] = {
1587 curframe->data[0] + 16*mb_y*s->linesize,
1588 curframe->data[1] + 8*mb_y*s->uvlinesize,
1589 curframe->data[2] + 8*mb_y*s->uvlinesize
1590 };
1591
1592 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1593 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1594 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1595
1596 // left edge of 129 for intra prediction
1597 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1598 for (i = 0; i < 3; i++)
1599 for (y = 0; y < 16>>!!i; y++)
1600 dst[i][y*curframe->linesize[i]-1] = 129;
1601 if (mb_y == 1) // top left edge is also 129
1602 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1603 }
1604
1605 s->mv_min.x = -MARGIN;
1606 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1607
1608 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1609 /* Prefetch the current frame, 4 MBs ahead */
1610 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1611 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1612
1613 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1614 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
1615
1616 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1617
1618 if (!mb->skip)
1619 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1620
1621 if (mb->mode <= MODE_I4x4)
1622 intra_predict(s, dst, mb, mb_x, mb_y);
1623 else
1624 inter_predict(s, dst, mb, mb_x, mb_y);
1625
1626 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1627
1628 if (!mb->skip) {
1629 idct_mb(s, dst, mb);
1630 } else {
1631 AV_ZERO64(s->left_nnz);
1632 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1633
1634 // Reset DC block predictors if they would exist if the mb had coefficients
1635 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1636 s->left_nnz[8] = 0;
1637 s->top_nnz[mb_x][8] = 0;
1638 }
1639 }
1640
1641 if (s->deblock_filter)
1642 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1643
1644 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1645
1646 dst[0] += 16;
1647 dst[1] += 8;
1648 dst[2] += 8;
1649 s->mv_min.x -= 64;
1650 s->mv_max.x -= 64;
1651 }
1652 if (s->deblock_filter) {
1653 if (s->filter.simple)
1654 filter_mb_row_simple(s, curframe, mb_y);
1655 else
1656 filter_mb_row(s, curframe, mb_y);
1657 }
1658 s->mv_min.y -= 64;
1659 s->mv_max.y -= 64;
1660}
1661
3b636f21
DC
1662static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1663 AVPacket *avpkt)
1664{
1665 VP8Context *s = avctx->priv_data;
337ade52 1666 int ret, mb_y, i, referenced;
3b636f21 1667 enum AVDiscard skip_thresh;
e02dec25 1668 AVFrame *av_uninit(curframe), *prev_frame;
3b636f21 1669
ce42a048
RB
1670 release_queued_segmaps(s, 0);
1671
3b636f21 1672 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
fb90785e 1673 goto err;
3b636f21 1674
e02dec25
AC
1675 prev_frame = s->framep[VP56_FRAME_CURRENT];
1676
3b636f21
DC
1677 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1678 || s->update_altref == VP56_FRAME_CURRENT;
1679
1680 skip_thresh = !referenced ? AVDISCARD_NONREF :
1681 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1682
1683 if (avctx->skip_frame >= skip_thresh) {
1684 s->invisible = 1;
fb90785e 1685 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
3b636f21
DC
1686 goto skip_decode;
1687 }
9ac831c2 1688 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
3b636f21 1689
4773d904
RB
1690 // release no longer referenced frames
1691 for (i = 0; i < 5; i++)
1692 if (s->frames[i].data[0] &&
1693 &s->frames[i] != prev_frame &&
1694 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1695 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1696 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
bfa0f965 1697 vp8_release_frame(s, &s->frames[i], 1, 0);
4773d904
RB
1698
1699 // find a free buffer
1700 for (i = 0; i < 5; i++)
1701 if (&s->frames[i] != prev_frame &&
1702 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
3b636f21
DC
1703 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1704 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1705 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1706 break;
1707 }
4773d904
RB
1708 if (i == 5) {
1709 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1710 abort();
1711 }
3b636f21 1712 if (curframe->data[0])
bfa0f965 1713 vp8_release_frame(s, curframe, 1, 0);
3b636f21 1714
fb90785e
RB
1715 // Given that arithmetic probabilities are updated every frame, it's quite likely
1716 // that the values we have on a random interframe are complete junk if we didn't
1717 // start decode on a keyframe. So just don't display anything rather than junk.
1718 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1719 !s->framep[VP56_FRAME_GOLDEN] ||
1720 !s->framep[VP56_FRAME_GOLDEN2])) {
1721 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1722 ret = AVERROR_INVALIDDATA;
1723 goto err;
1724 }
1725
3b636f21 1726 curframe->key_frame = s->keyframe;
975a1447 1727 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3b636f21 1728 curframe->reference = referenced ? 3 : 0;
ce42a048 1729 if ((ret = vp8_alloc_frame(s, curframe))) {
3b636f21 1730 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
fb90785e 1731 goto err;
3b636f21
DC
1732 }
1733
4773d904
RB
1734 // check if golden and altref are swapped
1735 if (s->update_altref != VP56_FRAME_NONE) {
1736 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1737 } else {
1738 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1739 }
1740 if (s->update_golden != VP56_FRAME_NONE) {
1741 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1742 } else {
1743 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1744 }
1745 if (s->update_last) {
1746 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1747 } else {
1748 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1749 }
1750 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1751
1752 ff_thread_finish_setup(avctx);
1753
3b636f21
DC
1754 s->linesize = curframe->linesize[0];
1755 s->uvlinesize = curframe->linesize[1];
1756
1757 if (!s->edge_emu_buffer)
1758 s->edge_emu_buffer = av_malloc(21*s->linesize);
1759
1760 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1761
aa93c52c
PM
1762 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1763 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
c55e0d34 1764
3b636f21 1765 // top edge of 127 for intra prediction
ee555de7
RB
1766 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1767 s->top_border[0][15] = s->top_border[0][23] = 127;
1768 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1769 }
c4211046 1770 memset(s->ref_count, 0, sizeof(s->ref_count));
d2840fa4 1771 if (s->keyframe)
ccf13f9e 1772 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
3b636f21 1773
7634771e
JGG
1774 s->mv_min.y = -MARGIN;
1775 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1776
3b636f21 1777 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
9ebcf769 1778 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
4773d904 1779 ff_thread_await_progress(prev_frame, mb_y, 0);
7634771e 1780
337ade52 1781 vp8_decode_mb_row(avctx, curframe, prev_frame, mb_y);
4773d904
RB
1782
1783 ff_thread_report_progress(curframe, mb_y, 0);
3b636f21 1784 }
3b636f21 1785
4773d904 1786 ff_thread_report_progress(curframe, INT_MAX, 0);
fb90785e
RB
1787 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1788
3b636f21
DC
1789skip_decode:
1790 // if future frames don't use the updated probabilities,
1791 // reset them to the values we saved
1792 if (!s->update_probabilities)
1793 s->prob[0] = s->prob[1];
1794
3b636f21 1795 if (!s->invisible) {
4773d904 1796 *(AVFrame*)data = *curframe;
3b636f21
DC
1797 *data_size = sizeof(AVFrame);
1798 }
1799
1800 return avpkt->size;
fb90785e
RB
1801err:
1802 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1803 return ret;
3b636f21
DC
1804}
1805
1806static av_cold int vp8_decode_init(AVCodecContext *avctx)
1807{
1808 VP8Context *s = avctx->priv_data;
1809
1810 s->avctx = avctx;
1811 avctx->pix_fmt = PIX_FMT_YUV420P;
1812
9cf0841e 1813 ff_dsputil_init(&s->dsp, avctx);
76741b0e 1814 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
3b636f21
DC
1815 ff_vp8dsp_init(&s->vp8dsp);
1816
3b636f21
DC
1817 return 0;
1818}
1819
1820static av_cold int vp8_decode_free(AVCodecContext *avctx)
1821{
bfa0f965 1822 vp8_decode_flush_impl(avctx, 0, 1, 1);
ce42a048 1823 release_queued_segmaps(avctx->priv_data, 1);
3b636f21
DC
1824 return 0;
1825}
1826
4773d904
RB
1827static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1828{
1829 VP8Context *s = avctx->priv_data;
1830
1831 s->avctx = avctx;
1832
1833 return 0;
1834}
1835
1836#define REBASE(pic) \
1837 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1838
1839static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1840{
1841 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1842
56535793
RB
1843 if (s->macroblocks_base &&
1844 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
1845 free_buffers(s);
e02dec25 1846 s->maps_are_invalid = 1;
82a0497c
RB
1847 s->mb_width = s_src->mb_width;
1848 s->mb_height = s_src->mb_height;
56535793
RB
1849 }
1850
4773d904
RB
1851 s->prob[0] = s_src->prob[!s_src->update_probabilities];
1852 s->segmentation = s_src->segmentation;
1853 s->lf_delta = s_src->lf_delta;
1854 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1855
1856 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1857 s->framep[0] = REBASE(s_src->next_framep[0]);
1858 s->framep[1] = REBASE(s_src->next_framep[1]);
1859 s->framep[2] = REBASE(s_src->next_framep[2]);
1860 s->framep[3] = REBASE(s_src->next_framep[3]);
1861
1862 return 0;
1863}
1864
d36beb3f 1865AVCodec ff_vp8_decoder = {
00c3b67b
MS
1866 .name = "vp8",
1867 .type = AVMEDIA_TYPE_VIDEO,
1868 .id = CODEC_ID_VP8,
1869 .priv_data_size = sizeof(VP8Context),
1870 .init = vp8_decode_init,
1871 .close = vp8_decode_free,
1872 .decode = vp8_decode_frame,
1873 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1874 .flush = vp8_decode_flush,
1875 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
4773d904
RB
1876 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1877 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
3b636f21 1878};