ac3enc: Remove unneeded clipping of shift amount.
[libav.git] / libavcodec / vp8.c
CommitLineData
3b636f21
DC
1/**
2 * VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
13a1304b 6 * Copyright (C) 2010 Jason Garrett-Glaser
3b636f21
DC
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
6ce9b431 25#include "libavcore/imgutils.h"
3b636f21
DC
26#include "avcodec.h"
27#include "vp56.h"
28#include "vp8data.h"
29#include "vp8dsp.h"
30#include "h264pred.h"
31#include "rectangle.h"
32
33typedef struct {
968570d6
JGG
34 uint8_t filter_level;
35 uint8_t inner_limit;
c55e0d34 36 uint8_t inner_filter;
968570d6
JGG
37} VP8FilterStrength;
38
39typedef struct {
3b636f21
DC
40 uint8_t skip;
41 // todo: make it possible to check for at least (i4x4 or split_mv)
42 // in one op. are others needed?
43 uint8_t mode;
44 uint8_t ref_frame;
45 uint8_t partitioning;
46 VP56mv mv;
47 VP56mv bmv[16];
48} VP8Macroblock;
49
50typedef struct {
51 AVCodecContext *avctx;
52 DSPContext dsp;
53 VP8DSPContext vp8dsp;
54 H264PredContext hpc;
0ef1dbed 55 vp8_mc_func put_pixels_tab[3][3][3];
3b636f21
DC
56 AVFrame frames[4];
57 AVFrame *framep[4];
58 uint8_t *edge_emu_buffer;
59 VP56RangeCoder c; ///< header context, includes mb modes and motion vectors
60 int profile;
61
62 int mb_width; /* number of horizontal MB */
63 int mb_height; /* number of vertical MB */
64 int linesize;
65 int uvlinesize;
66
67 int keyframe;
68 int invisible;
69 int update_last; ///< update VP56_FRAME_PREVIOUS with the current one
70 int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so
71 int update_altref;
9ac831c2 72 int deblock_filter;
3b636f21
DC
73
74 /**
75 * If this flag is not set, all the probability updates
76 * are discarded after this frame is decoded.
77 */
78 int update_probabilities;
79
80 /**
81 * All coefficients are contained in separate arith coding contexts.
82 * There can be 1, 2, 4, or 8 of these after the header context.
83 */
84 int num_coeff_partitions;
85 VP56RangeCoder coeff_partition[8];
86
87 VP8Macroblock *macroblocks;
88 VP8Macroblock *macroblocks_base;
968570d6 89 VP8FilterStrength *filter_strength;
3b636f21 90
d2840fa4
PM
91 uint8_t *intra4x4_pred_mode_top;
92 uint8_t intra4x4_pred_mode_left[4];
c55e0d34 93 uint8_t *segmentation_map;
3b636f21
DC
94
95 /**
9ac831c2
DC
96 * Cache of the top row needed for intra prediction
97 * 16 for luma, 8 for each chroma plane
98 */
99 uint8_t (*top_border)[16+8+8];
100
101 /**
3b636f21
DC
102 * For coeff decode, we need to know whether the above block had non-zero
103 * coefficients. This means for each macroblock, we need data for 4 luma
104 * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
105 * per macroblock. We keep the last row in top_nnz.
106 */
107 uint8_t (*top_nnz)[9];
108 DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
109
110 /**
111 * This is the index plus one of the last non-zero coeff
112 * for each of the blocks in the current macroblock.
113 * So, 0 -> no coeffs
114 * 1 -> dc-only (special transform)
115 * 2+-> full transform
116 */
117 DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
118 DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
827d43bb 119 DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
d1c58fce 120 uint8_t intra4x4_pred_mode_mb[16];
3b636f21
DC
121
122 int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock
b9a7186b 123 int segment; ///< segment of the current macroblock
3b636f21
DC
124
125 int mbskip_enabled;
126 int sign_bias[4]; ///< one state [0, 1] per ref frame type
c4211046 127 int ref_count[3];
3b636f21
DC
128
129 /**
130 * Base parameters for segmentation, i.e. per-macroblock parameters.
131 * These must be kept unchanged even if segmentation is not used for
132 * a frame, since the values persist between interframes.
133 */
134 struct {
135 int enabled;
136 int absolute_vals;
137 int update_map;
138 int8_t base_quant[4];
139 int8_t filter_level[4]; ///< base loop filter level
140 } segmentation;
141
142 /**
143 * Macroblocks can have one of 4 different quants in a frame when
144 * segmentation is enabled.
145 * If segmentation is disabled, only the first segment's values are used.
146 */
147 struct {
148 // [0] - DC qmul [1] - AC qmul
149 int16_t luma_qmul[2];
150 int16_t luma_dc_qmul[2]; ///< luma dc-only block quant
151 int16_t chroma_qmul[2];
152 } qmat[4];
153
154 struct {
155 int simple;
156 int level;
157 int sharpness;
158 } filter;
159
160 struct {
161 int enabled; ///< whether each mb can have a different strength based on mode/ref
162
163 /**
164 * filter strength adjustment for the following macroblock modes:
dd18c9a0
JGG
165 * [0-3] - i16x16 (always zero)
166 * [4] - i4x4
167 * [5] - zero mv
168 * [6] - inter modes except for zero or split mv
169 * [7] - split mv
3b636f21
DC
170 * i16x16 modes never have any adjustment
171 */
dd18c9a0 172 int8_t mode[VP8_MVMODE_SPLIT+1];
3b636f21
DC
173
174 /**
175 * filter strength adjustment for macroblocks that reference:
176 * [0] - intra / VP56_FRAME_CURRENT
177 * [1] - VP56_FRAME_PREVIOUS
178 * [2] - VP56_FRAME_GOLDEN
179 * [3] - altref / VP56_FRAME_GOLDEN2
180 */
181 int8_t ref[4];
182 } lf_delta;
183
184 /**
185 * These are all of the updatable probabilities for binary decisions.
186 * They are only implictly reset on keyframes, making it quite likely
187 * for an interframe to desync if a prior frame's header was corrupt
188 * or missing outright!
189 */
190 struct {
191 uint8_t segmentid[3];
a8ab0ccc
PM
192 uint8_t mbskip;
193 uint8_t intra;
194 uint8_t last;
195 uint8_t golden;
3b636f21
DC
196 uint8_t pred16x16[4];
197 uint8_t pred8x8c[3];
370b622a
JGG
198 /* Padded to allow overreads */
199 uint8_t token[4][17][3][NUM_DCT_TOKENS-1];
3b636f21
DC
200 uint8_t mvc[2][19];
201 } prob[2];
202} VP8Context;
203
3b636f21
DC
204static void vp8_decode_flush(AVCodecContext *avctx)
205{
206 VP8Context *s = avctx->priv_data;
207 int i;
208
209 for (i = 0; i < 4; i++)
210 if (s->frames[i].data[0])
211 avctx->release_buffer(avctx, &s->frames[i]);
212 memset(s->framep, 0, sizeof(s->framep));
213
214 av_freep(&s->macroblocks_base);
fca05ea8 215 av_freep(&s->filter_strength);
d2840fa4 216 av_freep(&s->intra4x4_pred_mode_top);
3b636f21
DC
217 av_freep(&s->top_nnz);
218 av_freep(&s->edge_emu_buffer);
9ac831c2 219 av_freep(&s->top_border);
c55e0d34 220 av_freep(&s->segmentation_map);
3b636f21
DC
221
222 s->macroblocks = NULL;
3b636f21
DC
223}
224
225static int update_dimensions(VP8Context *s, int width, int height)
226{
e16f217c 227 if (av_image_check_size(width, height, 0, s->avctx))
3b636f21
DC
228 return AVERROR_INVALIDDATA;
229
230 vp8_decode_flush(s->avctx);
231
232 avcodec_set_dimensions(s->avctx, width, height);
233
234 s->mb_width = (s->avctx->coded_width +15) / 16;
235 s->mb_height = (s->avctx->coded_height+15) / 16;
236
aa93c52c
PM
237 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
238 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
ccf13f9e 239 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
3b636f21 240 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
9ac831c2 241 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
aa93c52c 242 s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
3b636f21 243
d2840fa4 244 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
c55e0d34 245 !s->top_nnz || !s->top_border || !s->segmentation_map)
b6c420ce
DC
246 return AVERROR(ENOMEM);
247
c55e0d34 248 s->macroblocks = s->macroblocks_base + 1;
3b636f21
DC
249
250 return 0;
251}
252
253static void parse_segment_info(VP8Context *s)
254{
255 VP56RangeCoder *c = &s->c;
256 int i;
257
258 s->segmentation.update_map = vp8_rac_get(c);
259
260 if (vp8_rac_get(c)) { // update segment feature data
261 s->segmentation.absolute_vals = vp8_rac_get(c);
262
263 for (i = 0; i < 4; i++)
264 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
265
266 for (i = 0; i < 4; i++)
267 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
268 }
269 if (s->segmentation.update_map)
270 for (i = 0; i < 3; i++)
271 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
272}
273
274static void update_lf_deltas(VP8Context *s)
275{
276 VP56RangeCoder *c = &s->c;
277 int i;
278
279 for (i = 0; i < 4; i++)
280 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6);
281
dd18c9a0 282 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
3b636f21
DC
283 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
284}
285
286static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
287{
288 const uint8_t *sizes = buf;
289 int i;
290
291 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
292
293 buf += 3*(s->num_coeff_partitions-1);
294 buf_size -= 3*(s->num_coeff_partitions-1);
295 if (buf_size < 0)
296 return -1;
297
298 for (i = 0; i < s->num_coeff_partitions-1; i++) {
06d50ca8 299 int size = AV_RL24(sizes + 3*i);
3b636f21
DC
300 if (buf_size - size < 0)
301 return -1;
302
905ef0d0 303 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
3b636f21
DC
304 buf += size;
305 buf_size -= size;
306 }
905ef0d0 307 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
3b636f21
DC
308
309 return 0;
310}
311
312static void get_quants(VP8Context *s)
313{
314 VP56RangeCoder *c = &s->c;
315 int i, base_qi;
316
317 int yac_qi = vp8_rac_get_uint(c, 7);
318 int ydc_delta = vp8_rac_get_sint(c, 4);
319 int y2dc_delta = vp8_rac_get_sint(c, 4);
320 int y2ac_delta = vp8_rac_get_sint(c, 4);
321 int uvdc_delta = vp8_rac_get_sint(c, 4);
322 int uvac_delta = vp8_rac_get_sint(c, 4);
323
324 for (i = 0; i < 4; i++) {
325 if (s->segmentation.enabled) {
326 base_qi = s->segmentation.base_quant[i];
327 if (!s->segmentation.absolute_vals)
328 base_qi += yac_qi;
329 } else
330 base_qi = yac_qi;
331
332 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)];
333 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)];
334 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)];
a8ab0ccc
PM
335 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100;
336 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)];
3b636f21 337 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)];
a8ab0ccc
PM
338
339 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
340 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
3b636f21
DC
341 }
342}
343
344/**
345 * Determine which buffers golden and altref should be updated with after this frame.
346 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
347 *
348 * Intra frames update all 3 references
349 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
350 * If the update (golden|altref) flag is set, it's updated with the current frame
351 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
352 * If the flag is not set, the number read means:
353 * 0: no update
354 * 1: VP56_FRAME_PREVIOUS
355 * 2: update golden with altref, or update altref with golden
356 */
357static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
358{
359 VP56RangeCoder *c = &s->c;
360
361 if (update)
362 return VP56_FRAME_CURRENT;
363
364 switch (vp8_rac_get_uint(c, 2)) {
365 case 1:
366 return VP56_FRAME_PREVIOUS;
367 case 2:
368 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
369 }
370 return VP56_FRAME_NONE;
371}
372
373static void update_refs(VP8Context *s)
374{
375 VP56RangeCoder *c = &s->c;
376
377 int update_golden = vp8_rac_get(c);
378 int update_altref = vp8_rac_get(c);
379
380 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
381 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
382}
383
384static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
385{
386 VP56RangeCoder *c = &s->c;
370b622a 387 int header_size, hscale, vscale, i, j, k, l, m, ret;
3b636f21
DC
388 int width = s->avctx->width;
389 int height = s->avctx->height;
390
391 s->keyframe = !(buf[0] & 1);
392 s->profile = (buf[0]>>1) & 7;
393 s->invisible = !(buf[0] & 0x10);
06d50ca8 394 header_size = AV_RL24(buf) >> 5;
3b636f21
DC
395 buf += 3;
396 buf_size -= 3;
397
0ef1dbed
DC
398 if (s->profile > 3)
399 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
400
401 if (!s->profile)
402 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
403 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
404 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
3b636f21
DC
405
406 if (header_size > buf_size - 7*s->keyframe) {
407 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
408 return AVERROR_INVALIDDATA;
409 }
410
411 if (s->keyframe) {
06d50ca8
JGG
412 if (AV_RL24(buf) != 0x2a019d) {
413 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
3b636f21
DC
414 return AVERROR_INVALIDDATA;
415 }
416 width = AV_RL16(buf+3) & 0x3fff;
417 height = AV_RL16(buf+5) & 0x3fff;
418 hscale = buf[4] >> 6;
419 vscale = buf[6] >> 6;
420 buf += 7;
421 buf_size -= 7;
422
92a54426
MR
423 if (hscale || vscale)
424 av_log_missing_feature(s->avctx, "Upscaling", 1);
425
3b636f21 426 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
370b622a
JGG
427 for (i = 0; i < 4; i++)
428 for (j = 0; j < 16; j++)
429 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
430 sizeof(s->prob->token[i][j]));
3b636f21
DC
431 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
432 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
433 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
434 memset(&s->segmentation, 0, sizeof(s->segmentation));
435 }
436
437 if (!s->macroblocks_base || /* first frame */
438 width != s->avctx->width || height != s->avctx->height) {
439 if ((ret = update_dimensions(s, width, height) < 0))
440 return ret;
441 }
442
905ef0d0 443 ff_vp56_init_range_decoder(c, buf, header_size);
3b636f21
DC
444 buf += header_size;
445 buf_size -= header_size;
446
447 if (s->keyframe) {
448 if (vp8_rac_get(c))
449 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
450 vp8_rac_get(c); // whether we can skip clamping in dsp functions
451 }
452
453 if ((s->segmentation.enabled = vp8_rac_get(c)))
454 parse_segment_info(s);
455 else
456 s->segmentation.update_map = 0; // FIXME: move this to some init function?
457
458 s->filter.simple = vp8_rac_get(c);
459 s->filter.level = vp8_rac_get_uint(c, 6);
460 s->filter.sharpness = vp8_rac_get_uint(c, 3);
461
462 if ((s->lf_delta.enabled = vp8_rac_get(c)))
463 if (vp8_rac_get(c))
464 update_lf_deltas(s);
465
466 if (setup_partitions(s, buf, buf_size)) {
467 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
468 return AVERROR_INVALIDDATA;
469 }
470
471 get_quants(s);
472
473 if (!s->keyframe) {
474 update_refs(s);
475 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
476 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
477 }
478
479 // if we aren't saving this frame's probabilities for future frames,
480 // make a copy of the current probabilities
481 if (!(s->update_probabilities = vp8_rac_get(c)))
482 s->prob[1] = s->prob[0];
483
484 s->update_last = s->keyframe || vp8_rac_get(c);
485
486 for (i = 0; i < 4; i++)
487 for (j = 0; j < 8; j++)
488 for (k = 0; k < 3; k++)
489 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
370b622a
JGG
490 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
491 int prob = vp8_rac_get_uint(c, 8);
b0d58795
JGG
492 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
493 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
370b622a 494 }
3b636f21
DC
495
496 if ((s->mbskip_enabled = vp8_rac_get(c)))
a8ab0ccc 497 s->prob->mbskip = vp8_rac_get_uint(c, 8);
3b636f21
DC
498
499 if (!s->keyframe) {
a8ab0ccc
PM
500 s->prob->intra = vp8_rac_get_uint(c, 8);
501 s->prob->last = vp8_rac_get_uint(c, 8);
502 s->prob->golden = vp8_rac_get_uint(c, 8);
3b636f21
DC
503
504 if (vp8_rac_get(c))
505 for (i = 0; i < 4; i++)
506 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
507 if (vp8_rac_get(c))
508 for (i = 0; i < 3; i++)
509 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
510
511 // 17.2 MV probability update
512 for (i = 0; i < 2; i++)
513 for (j = 0; j < 19; j++)
7697cdcf 514 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
3b636f21
DC
515 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
516 }
517
518 return 0;
519}
520
414ac27d
JGG
521static av_always_inline
522void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y)
3b636f21
DC
523{
524#define MARGIN (16 << 2)
525 dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN),
526 ((s->mb_width - 1 - mb_x) << 6) + MARGIN);
527 dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN),
528 ((s->mb_height - 1 - mb_y) << 6) + MARGIN);
529}
530
414ac27d 531static av_always_inline
aa93c52c 532void find_near_mvs(VP8Context *s, VP8Macroblock *mb,
414ac27d 533 VP56mv near[2], VP56mv *best, uint8_t cnt[4])
3b636f21 534{
c55e0d34
JGG
535 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
536 mb - 1 /* left */,
537 mb + 1 /* top-left */ };
3b636f21
DC
538 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };
539 VP56mv near_mv[4] = {{ 0 }};
540 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
702e8d33 541 int idx = CNT_ZERO;
3b636f21 542 int best_idx = CNT_ZERO;
702e8d33
JGG
543 int cur_sign_bias = s->sign_bias[mb->ref_frame];
544 int *sign_bias = s->sign_bias;
3b636f21
DC
545
546 /* Process MB on top, left and top-left */
702e8d33
JGG
547 #define MV_EDGE_CHECK(n)\
548 {\
549 VP8Macroblock *edge = mb_edge[n];\
550 int edge_ref = edge->ref_frame;\
551 if (edge_ref != VP56_FRAME_CURRENT) {\
552 uint32_t mv = AV_RN32A(&edge->mv);\
553 if (mv) {\
554 if (cur_sign_bias != sign_bias[edge_ref]) {\
555 /* SWAR negate of the values in mv. */\
0087aa47
JGG
556 mv = ~mv;\
557 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
702e8d33
JGG
558 }\
559 if (!n || mv != AV_RN32A(&near_mv[idx]))\
560 AV_WN32A(&near_mv[++idx], mv);\
561 cnt[idx] += 1 + (n != 2);\
562 } else\
563 cnt[CNT_ZERO] += 1 + (n != 2);\
564 }\
3b636f21 565 }
702e8d33
JGG
566 MV_EDGE_CHECK(0)
567 MV_EDGE_CHECK(1)
568 MV_EDGE_CHECK(2)
3b636f21 569
702e8d33
JGG
570 /* If we have three distinct MVs, merge first and last if they're the same */
571 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT]))
3b636f21
DC
572 cnt[CNT_NEAREST] += 1;
573
574 cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
575 (mb_edge[EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
576 (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
577
578 /* Swap near and nearest if necessary */
579 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
702e8d33
JGG
580 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
581 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
3b636f21
DC
582 }
583
584 /* Choose the best mv out of 0,0 and the nearest mv */
585 if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])
586 best_idx = CNT_NEAREST;
587
9fddd14a 588 mb->mv = near_mv[best_idx];
3b636f21
DC
589 near[0] = near_mv[CNT_NEAREST];
590 near[1] = near_mv[CNT_NEAR];
591}
592
593/**
594 * Motion vector coding, 17.1.
595 */
596static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
597{
ca18a478 598 int bit, x = 0;
3b636f21 599
7697cdcf 600 if (vp56_rac_get_prob_branchy(c, p[0])) {
3b636f21
DC
601 int i;
602
603 for (i = 0; i < 3; i++)
604 x += vp56_rac_get_prob(c, p[9 + i]) << i;
605 for (i = 9; i > 3; i--)
606 x += vp56_rac_get_prob(c, p[9 + i]) << i;
607 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
608 x += 8;
ca18a478
DC
609 } else {
610 // small_mvtree
611 const uint8_t *ps = p+2;
612 bit = vp56_rac_get_prob(c, *ps);
613 ps += 1 + 3*bit;
614 x += 4*bit;
615 bit = vp56_rac_get_prob(c, *ps);
616 ps += 1 + bit;
617 x += 2*bit;
618 x += vp56_rac_get_prob(c, *ps);
619 }
3b636f21
DC
620
621 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
622}
623
414ac27d
JGG
624static av_always_inline
625const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
3b636f21 626{
7bf254c4
JGG
627 if (left == top)
628 return vp8_submv_prob[4-!!left];
629 if (!top)
3b636f21 630 return vp8_submv_prob[2];
7bf254c4 631 return vp8_submv_prob[1-!!left];
3b636f21
DC
632}
633
634/**
635 * Split motion vector prediction, 16.4.
7ed06b2b 636 * @returns the number of motion vectors parsed (2, 4 or 16)
3b636f21 637 */
414ac27d
JGG
638static av_always_inline
639int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
3b636f21 640{
0908f1b9
JGG
641 int part_idx;
642 int n, num;
c55e0d34 643 VP8Macroblock *top_mb = &mb[2];
7bf254c4
JGG
644 VP8Macroblock *left_mb = &mb[-1];
645 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
646 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
0908f1b9 647 *mbsplits_cur, *firstidx;
c55e0d34
JGG
648 VP56mv *top_mv = top_mb->bmv;
649 VP56mv *left_mv = left_mb->bmv;
650 VP56mv *cur_mv = mb->bmv;
3b636f21 651
0908f1b9
JGG
652 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
653 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
654 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
655 } else {
656 part_idx = VP8_SPLITMVMODE_8x8;
657 }
658 } else {
659 part_idx = VP8_SPLITMVMODE_4x4;
660 }
661
662 num = vp8_mbsplit_count[part_idx];
663 mbsplits_cur = vp8_mbsplits[part_idx],
664 firstidx = vp8_mbfirstidx[part_idx];
665 mb->partitioning = part_idx;
666
3b636f21 667 for (n = 0; n < num; n++) {
7ed06b2b 668 int k = firstidx[n];
7bf254c4 669 uint32_t left, above;
7ed06b2b
RB
670 const uint8_t *submv_prob;
671
7bf254c4
JGG
672 if (!(k & 3))
673 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
674 else
675 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
676 if (k <= 3)
677 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
678 else
679 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
7ed06b2b
RB
680
681 submv_prob = get_submv_prob(left, above);
3b636f21 682
c5dec7f1
JGG
683 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
684 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
685 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
686 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
687 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
688 } else {
689 AV_ZERO32(&mb->bmv[n]);
690 }
691 } else {
692 AV_WN32A(&mb->bmv[n], above);
693 }
694 } else {
7bf254c4 695 AV_WN32A(&mb->bmv[n], left);
3b636f21 696 }
3b636f21 697 }
7ed06b2b
RB
698
699 return num;
3b636f21
DC
700}
701
414ac27d 702static av_always_inline
d2840fa4
PM
703void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
704 int mb_x, int keyframe)
3b636f21 705{
d2840fa4 706 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
d1c58fce 707 if (keyframe) {
d2840fa4
PM
708 int x, y;
709 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
710 uint8_t* const left = s->intra4x4_pred_mode_left;
d1c58fce
JGG
711 for (y = 0; y < 4; y++) {
712 for (x = 0; x < 4; x++) {
d2840fa4
PM
713 const uint8_t *ctx;
714 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
715 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
716 left[y] = top[x] = *intra4x4;
717 intra4x4++;
3b636f21 718 }
3b636f21 719 }
d1c58fce 720 } else {
d2840fa4 721 int i;
d1c58fce
JGG
722 for (i = 0; i < 16; i++)
723 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
3b636f21
DC
724 }
725}
726
414ac27d 727static av_always_inline
d2840fa4 728void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment)
3b636f21
DC
729{
730 VP56RangeCoder *c = &s->c;
3b636f21
DC
731
732 if (s->segmentation.update_map)
c55e0d34 733 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
b9a7186b 734 s->segment = *segment;
3b636f21 735
a8ab0ccc 736 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
3b636f21
DC
737
738 if (s->keyframe) {
739 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
740
741 if (mb->mode == MODE_I4x4) {
d2840fa4
PM
742 decode_intra4x4_modes(s, c, mb_x, 1);
743 } else {
744 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
745 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
746 AV_WN32A(s->intra4x4_pred_mode_left, modes);
747 }
3b636f21
DC
748
749 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
750 mb->ref_frame = VP56_FRAME_CURRENT;
a8ab0ccc 751 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
3b636f21 752 VP56mv near[2], best;
702e8d33 753 uint8_t cnt[4] = { 0 };
3b636f21
DC
754
755 // inter MB, 16.2
a8ab0ccc
PM
756 if (vp56_rac_get_prob_branchy(c, s->prob->last))
757 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
3b636f21
DC
758 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
759 else
760 mb->ref_frame = VP56_FRAME_PREVIOUS;
c4211046 761 s->ref_count[mb->ref_frame-1]++;
3b636f21
DC
762
763 // motion vectors, 16.3
aa93c52c 764 find_near_mvs(s, mb, near, &best, cnt);
23117d69 765 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[0]][0])) {
dd18c9a0 766 mb->mode = VP8_MVMODE_MV;
23117d69
JGG
767 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[1]][1])) {
768 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[2]][2])) {
769 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[3]][3])) {
770 mb->mode = VP8_MVMODE_SPLIT;
771 clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y);
772 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
773 } else {
23117d69 774 clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y);
2b476e02
JGG
775 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
776 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
23117d69 777 }
dd18c9a0 778 } else
23117d69 779 clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y);
dd18c9a0 780 } else
23117d69 781 clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y);
23117d69
JGG
782 } else {
783 mb->mode = VP8_MVMODE_ZERO;
14767f35 784 AV_ZERO32(&mb->mv);
3b636f21
DC
785 }
786 if (mb->mode != VP8_MVMODE_SPLIT) {
7ed06b2b
RB
787 mb->partitioning = VP8_SPLITMVMODE_NONE;
788 mb->bmv[0] = mb->mv;
3b636f21
DC
789 }
790 } else {
791 // intra MB, 16.1
792 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
793
158e062c 794 if (mb->mode == MODE_I4x4)
d2840fa4 795 decode_intra4x4_modes(s, c, mb_x, 0);
3b636f21
DC
796
797 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
798 mb->ref_frame = VP56_FRAME_CURRENT;
b946111f 799 mb->partitioning = VP8_SPLITMVMODE_NONE;
14767f35 800 AV_ZERO32(&mb->bmv[0]);
3b636f21
DC
801 }
802}
803
804/**
e394953e
RB
805 * @param c arithmetic bitstream reader context
806 * @param block destination for block coefficients
807 * @param probs probabilities to use when reading trees from the bitstream
3b636f21
DC
808 * @param i initial coeff index, 0 unless a separate DC block is coded
809 * @param zero_nhood the initial prediction context for number of surrounding
810 * all-zero blocks (only left/top, so 0-2)
3fa76268 811 * @param qmul array holding the dc/ac dequant factor at position 0/1
3b636f21
DC
812 * @return 0 if no coeffs were decoded
813 * otherwise, the index of the last coeff decoded plus one
814 */
1e739679
JGG
815static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
816 uint8_t probs[8][3][NUM_DCT_TOKENS-1],
817 int i, uint8_t *token_prob, int16_t qmul[2])
3b636f21 818{
afb54a85 819 goto skip_eob;
fe1b5d97 820 do {
1e739679 821 int coeff;
fe1b5d97 822 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
afb54a85 823 return i;
3b636f21 824
fe1b5d97
DC
825skip_eob:
826 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
c22b4468 827 if (++i == 16)
afb54a85 828 return i; // invalid input; blocks should end with EOB
370b622a 829 token_prob = probs[i][0];
c22b4468 830 goto skip_eob;
fe1b5d97
DC
831 }
832
833 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
834 coeff = 1;
370b622a 835 token_prob = probs[i+1][1];
fe1b5d97 836 } else {
fe1b5d97 837 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
476be414 838 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
fe1b5d97
DC
839 if (coeff)
840 coeff += vp56_rac_get_prob(c, token_prob[5]);
841 coeff += 2;
842 } else {
843 // DCT_CAT*
844 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
845 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
846 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
847 } else { // DCT_CAT2
848 coeff = 7;
849 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
850 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
851 }
852 } else { // DCT_CAT3 and up
853 int a = vp56_rac_get_prob(c, token_prob[8]);
854 int b = vp56_rac_get_prob(c, token_prob[9+a]);
855 int cat = (a<<1) + b;
856 coeff = 3 + (8<<cat);
857 coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]);
858 }
859 }
370b622a 860 token_prob = probs[i+1][2];
fe1b5d97 861 }
fe1b5d97 862 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
afb54a85 863 } while (++i < 16);
fe1b5d97 864
afb54a85 865 return i;
3b636f21
DC
866}
867
414ac27d 868static av_always_inline
1e739679
JGG
869int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
870 uint8_t probs[8][3][NUM_DCT_TOKENS-1],
871 int i, int zero_nhood, int16_t qmul[2])
872{
873 uint8_t *token_prob = probs[i][zero_nhood];
874 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
875 return 0;
876 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
877}
878
879static av_always_inline
414ac27d
JGG
880void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
881 uint8_t t_nnz[9], uint8_t l_nnz[9])
3b636f21 882{
3b636f21
DC
883 int i, x, y, luma_start = 0, luma_ctx = 3;
884 int nnz_pred, nnz, nnz_total = 0;
b9a7186b 885 int segment = s->segment;
f311208c 886 int block_dc = 0;
3b636f21 887
3b636f21 888 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
3b636f21
DC
889 nnz_pred = t_nnz[8] + l_nnz[8];
890
891 // decode DC values and do hadamard
827d43bb 892 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
3b636f21
DC
893 s->qmat[segment].luma_dc_qmul);
894 l_nnz[8] = t_nnz[8] = !!nnz;
f311208c
JGG
895 if (nnz) {
896 nnz_total += nnz;
897 block_dc = 1;
898 if (nnz == 1)
899 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
900 else
901 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
902 }
3b636f21
DC
903 luma_start = 1;
904 luma_ctx = 0;
905 }
906
907 // luma blocks
908 for (y = 0; y < 4; y++)
909 for (x = 0; x < 4; x++) {
ffbf0794 910 nnz_pred = l_nnz[y] + t_nnz[x];
3b636f21 911 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
ffbf0794 912 nnz_pred, s->qmat[segment].luma_qmul);
f311208c
JGG
913 // nnz+block_dc may be one more than the actual last index, but we don't care
914 s->non_zero_count_cache[y][x] = nnz + block_dc;
3b636f21
DC
915 t_nnz[x] = l_nnz[y] = !!nnz;
916 nnz_total += nnz;
917 }
918
919 // chroma blocks
920 // TODO: what to do about dimensions? 2nd dim for luma is x,
921 // but for chroma it's (y<<1)|x
922 for (i = 4; i < 6; i++)
923 for (y = 0; y < 2; y++)
924 for (x = 0; x < 2; x++) {
925 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
926 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
927 nnz_pred, s->qmat[segment].chroma_qmul);
928 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
929 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
930 nnz_total += nnz;
931 }
932
933 // if there were no coded coeffs despite the macroblock not being marked skip,
934 // we MUST not do the inner loop filter and should not do IDCT
935 // Since skip isn't used for bitstream prediction, just manually set it.
936 if (!nnz_total)
937 mb->skip = 1;
938}
939
9ac831c2
DC
940static av_always_inline
941void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
942 int linesize, int uvlinesize, int simple)
943{
944 AV_COPY128(top_border, src_y + 15*linesize);
945 if (!simple) {
946 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
947 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
948 }
949}
950
951static av_always_inline
952void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
953 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
954 int simple, int xchg)
955{
956 uint8_t *top_border_m1 = top_border-32; // for TL prediction
957 src_y -= linesize;
958 src_cb -= uvlinesize;
959 src_cr -= uvlinesize;
960
096971e8
MR
961#define XCHG(a,b,xchg) do { \
962 if (xchg) AV_SWAP64(b,a); \
963 else AV_COPY64(b,a); \
964 } while (0)
9ac831c2
DC
965
966 XCHG(top_border_m1+8, src_y-8, xchg);
967 XCHG(top_border, src_y, xchg);
968 XCHG(top_border+8, src_y+8, 1);
070ce7ef 969 if (mb_x < mb_width-1)
9ac831c2 970 XCHG(top_border+32, src_y+16, 1);
070ce7ef 971
9ac831c2
DC
972 // only copy chroma for normal loop filter
973 // or to initialize the top row to 127
974 if (!simple || !mb_y) {
975 XCHG(top_border_m1+16, src_cb-8, xchg);
976 XCHG(top_border_m1+24, src_cr-8, xchg);
977 XCHG(top_border+16, src_cb, 1);
978 XCHG(top_border+24, src_cr, 1);
979 }
980}
981
414ac27d 982static av_always_inline
ee555de7
RB
983int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
984{
985 if (!mb_x) {
986 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
987 } else {
988 return mb_y ? mode : LEFT_DC_PRED8x8;
989 }
990}
991
992static av_always_inline
993int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
994{
995 if (!mb_x) {
996 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
997 } else {
998 return mb_y ? mode : HOR_PRED8x8;
999 }
1000}
1001
1002static av_always_inline
1003int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
3b636f21
DC
1004{
1005 if (mode == DC_PRED8x8) {
ee555de7
RB
1006 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1007 } else {
1008 return mode;
1009 }
1010}
1011
1012static av_always_inline
1013int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
1014{
1015 switch (mode) {
1016 case DC_PRED8x8:
1017 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1018 case VERT_PRED8x8:
1019 return !mb_y ? DC_127_PRED8x8 : mode;
1020 case HOR_PRED8x8:
1021 return !mb_x ? DC_129_PRED8x8 : mode;
1022 case PLANE_PRED8x8 /*TM*/:
1023 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
1024 }
1025 return mode;
1026}
1027
1028static av_always_inline
1029int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
1030{
1031 if (!mb_x) {
1032 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
1033 } else {
1034 return mb_y ? mode : HOR_VP8_PRED;
1035 }
1036}
1037
1038static av_always_inline
1039int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1040{
1041 switch (mode) {
1042 case VERT_PRED:
1043 if (!mb_x && mb_y) {
1044 *copy_buf = 1;
1045 return mode;
1046 }
1047 /* fall-through */
1048 case DIAG_DOWN_LEFT_PRED:
1049 case VERT_LEFT_PRED:
1050 return !mb_y ? DC_127_PRED : mode;
1051 case HOR_PRED:
1052 if (!mb_y) {
1053 *copy_buf = 1;
1054 return mode;
a71abb71 1055 }
ee555de7
RB
1056 /* fall-through */
1057 case HOR_UP_PRED:
1058 return !mb_x ? DC_129_PRED : mode;
1059 case TM_VP8_PRED:
1060 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1061 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1062 case DIAG_DOWN_RIGHT_PRED:
1063 case VERT_RIGHT_PRED:
1064 case HOR_DOWN_PRED:
1065 if (!mb_y || !mb_x)
1066 *copy_buf = 1;
1067 return mode;
3b636f21
DC
1068 }
1069 return mode;
1070}
1071
414ac27d
JGG
1072static av_always_inline
1073void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
d2840fa4 1074 int mb_x, int mb_y)
3b636f21 1075{
ee555de7 1076 AVCodecContext *avctx = s->avctx;
3b636f21
DC
1077 int x, y, mode, nnz, tr;
1078
9ac831c2
DC
1079 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1080 // otherwise, skip it if we aren't going to deblock
ee555de7 1081 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
9ac831c2
DC
1082 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1083 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1084 s->filter.simple, 1);
1085
3b636f21 1086 if (mb->mode < MODE_I4x4) {
ee555de7
RB
1087 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1088 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1089 } else {
1090 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1091 }
3b636f21
DC
1092 s->hpc.pred16x16[mode](dst[0], s->linesize);
1093 } else {
1094 uint8_t *ptr = dst[0];
d2840fa4 1095 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
ee555de7 1096 uint8_t tr_top[4] = { 127, 127, 127, 127 };
3b636f21
DC
1097
1098 // all blocks on the right edge of the macroblock use bottom edge
1099 // the top macroblock for their topright edge
1100 uint8_t *tr_right = ptr - s->linesize + 16;
1101
1102 // if we're on the right edge of the frame, said edge is extended
1103 // from the top macroblock
7148da48
RB
1104 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1105 mb_x == s->mb_width-1) {
3b636f21
DC
1106 tr = tr_right[-1]*0x01010101;
1107 tr_right = (uint8_t *)&tr;
1108 }
1109
b74f70d6
JGG
1110 if (mb->skip)
1111 AV_ZERO128(s->non_zero_count_cache);
1112
3b636f21
DC
1113 for (y = 0; y < 4; y++) {
1114 uint8_t *topright = ptr + 4 - s->linesize;
1115 for (x = 0; x < 4; x++) {
ee555de7
RB
1116 int copy = 0, linesize = s->linesize;
1117 uint8_t *dst = ptr+4*x;
1118 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1119
1120 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1121 topright = tr_top;
1122 } else if (x == 3)
3b636f21
DC
1123 topright = tr_right;
1124
ee555de7
RB
1125 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1126 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1127 if (copy) {
1128 dst = copy_dst + 12;
1129 linesize = 8;
1130 if (!(mb_y + y)) {
1131 copy_dst[3] = 127U;
9d4bdcb7 1132 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
ee555de7 1133 } else {
9d4bdcb7 1134 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
ee555de7
RB
1135 if (!(mb_x + x)) {
1136 copy_dst[3] = 129U;
1137 } else {
1138 copy_dst[3] = ptr[4*x-s->linesize-1];
1139 }
1140 }
1141 if (!(mb_x + x)) {
1142 copy_dst[11] =
1143 copy_dst[19] =
1144 copy_dst[27] =
1145 copy_dst[35] = 129U;
1146 } else {
1147 copy_dst[11] = ptr[4*x -1];
1148 copy_dst[19] = ptr[4*x+s->linesize -1];
1149 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1150 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1151 }
1152 }
1153 } else {
1154 mode = intra4x4[x];
1155 }
1156 s->hpc.pred4x4[mode](dst, topright, linesize);
1157 if (copy) {
9d4bdcb7
RB
1158 AV_COPY32(ptr+4*x , copy_dst+12);
1159 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1160 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1161 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
ee555de7 1162 }
3b636f21
DC
1163
1164 nnz = s->non_zero_count_cache[y][x];
1165 if (nnz) {
1166 if (nnz == 1)
1167 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1168 else
1169 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1170 }
1171 topright += 4;
1172 }
1173
1174 ptr += 4*s->linesize;
d2840fa4 1175 intra4x4 += 4;
3b636f21
DC
1176 }
1177 }
1178
ee555de7
RB
1179 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1180 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1181 } else {
1182 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1183 }
3b636f21
DC
1184 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1185 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
9ac831c2 1186
ee555de7 1187 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
9ac831c2
DC
1188 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1189 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1190 s->filter.simple, 0);
3b636f21
DC
1191}
1192
64233e70
JGG
1193static const uint8_t subpel_idx[3][8] = {
1194 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1195 // also function pointer index
1196 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1197 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1198};
1199
3b636f21
DC
1200/**
1201 * Generic MC function.
1202 *
1203 * @param s VP8 decoding context
1204 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
1205 * @param dst target buffer for block data at block position
1206 * @param src reference picture buffer at origin (0, 0)
1207 * @param mv motion vector (relative to block position) to get pixel data from
1208 * @param x_off horizontal position of block from origin (0, 0)
1209 * @param y_off vertical position of block from origin (0, 0)
1210 * @param block_w width of block (16, 8 or 4)
1211 * @param block_h height of block (always same as block_w)
1212 * @param width width of src/dst plane data
1213 * @param height height of src/dst plane data
1214 * @param linesize size of a single line of plane data, including padding
e394953e 1215 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
3b636f21 1216 */
414ac27d 1217static av_always_inline
64233e70
JGG
1218void vp8_mc_luma(VP8Context *s, uint8_t *dst, uint8_t *src, const VP56mv *mv,
1219 int x_off, int y_off, int block_w, int block_h,
1220 int width, int height, int linesize,
1221 vp8_mc_func mc_func[3][3])
3b636f21 1222{
c0498b30 1223 if (AV_RN32A(mv)) {
64233e70
JGG
1224
1225 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1226 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1227
1228 x_off += mv->x >> 2;
1229 y_off += mv->y >> 2;
c0498b30
JGG
1230
1231 // edge emulation
1232 src += y_off * linesize + x_off;
64233e70
JGG
1233 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1234 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
2e279598 1235 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
64233e70
JGG
1236 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1237 x_off - mx_idx, y_off - my_idx, width, height);
44002d83 1238 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
c0498b30
JGG
1239 }
1240 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1241 } else
1242 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
3b636f21
DC
1243}
1244
414ac27d 1245static av_always_inline
64233e70
JGG
1246void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, uint8_t *src1,
1247 uint8_t *src2, const VP56mv *mv, int x_off, int y_off,
1248 int block_w, int block_h, int width, int height, int linesize,
1249 vp8_mc_func mc_func[3][3])
1250{
1251 if (AV_RN32A(mv)) {
1252 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1253 int my = mv->y&7, my_idx = subpel_idx[0][my];
1254
1255 x_off += mv->x >> 3;
1256 y_off += mv->y >> 3;
1257
1258 // edge emulation
1259 src1 += y_off * linesize + x_off;
1260 src2 += y_off * linesize + x_off;
1261 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1262 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1263 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1264 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1265 x_off - mx_idx, y_off - my_idx, width, height);
1266 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1267 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1268
1269 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1270 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1271 x_off - mx_idx, y_off - my_idx, width, height);
1272 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1273 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1274 } else {
1275 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1276 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1277 }
1278 } else {
1279 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1280 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1281 }
1282}
1283
1284static av_always_inline
414ac27d
JGG
1285void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1286 AVFrame *ref_frame, int x_off, int y_off,
1287 int bx_off, int by_off,
1288 int block_w, int block_h,
1289 int width, int height, VP56mv *mv)
7c4dcf81
RB
1290{
1291 VP56mv uvmv = *mv;
1292
1293 /* Y */
64233e70
JGG
1294 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1295 ref_frame->data[0], mv, x_off + bx_off, y_off + by_off,
1296 block_w, block_h, width, height, s->linesize,
1297 s->put_pixels_tab[block_w == 8]);
7c4dcf81
RB
1298
1299 /* U/V */
1300 if (s->profile == 3) {
1301 uvmv.x &= ~7;
1302 uvmv.y &= ~7;
1303 }
1304 x_off >>= 1; y_off >>= 1;
1305 bx_off >>= 1; by_off >>= 1;
1306 width >>= 1; height >>= 1;
1307 block_w >>= 1; block_h >>= 1;
64233e70
JGG
1308 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1309 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame->data[1],
1310 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off,
1311 block_w, block_h, width, height, s->uvlinesize,
1312 s->put_pixels_tab[1 + (block_w == 4)]);
7c4dcf81
RB
1313}
1314
d864dee8
JGG
1315/* Fetch pixels for estimated mv 4 macroblocks ahead.
1316 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
414ac27d 1317static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
d864dee8 1318{
ef38842f
JGG
1319 /* Don't prefetch refs that haven't been used very often this frame. */
1320 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
c4211046 1321 int x_off = mb_x << 4, y_off = mb_y << 4;
7e13022a
JGG
1322 int mx = (mb->mv.x>>2) + x_off + 8;
1323 int my = (mb->mv.y>>2) + y_off;
c4211046
JGG
1324 uint8_t **src= s->framep[ref]->data;
1325 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1326 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1327 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1328 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1329 }
d864dee8
JGG
1330}
1331
3b636f21
DC
1332/**
1333 * Apply motion vectors to prediction buffer, chapter 18.
1334 */
414ac27d
JGG
1335static av_always_inline
1336void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1337 int mb_x, int mb_y)
3b636f21
DC
1338{
1339 int x_off = mb_x << 4, y_off = mb_y << 4;
1340 int width = 16*s->mb_width, height = 16*s->mb_height;
d292c345
JGG
1341 AVFrame *ref = s->framep[mb->ref_frame];
1342 VP56mv *bmv = mb->bmv;
3b636f21 1343
73be29b0
JGG
1344 switch (mb->partitioning) {
1345 case VP8_SPLITMVMODE_NONE:
d292c345 1346 vp8_mc_part(s, dst, ref, x_off, y_off,
7c4dcf81 1347 0, 0, 16, 16, width, height, &mb->mv);
73be29b0 1348 break;
7c4dcf81 1349 case VP8_SPLITMVMODE_4x4: {
3b636f21 1350 int x, y;
7c4dcf81 1351 VP56mv uvmv;
3b636f21
DC
1352
1353 /* Y */
1354 for (y = 0; y < 4; y++) {
1355 for (x = 0; x < 4; x++) {
64233e70
JGG
1356 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1357 ref->data[0], &bmv[4*y + x],
1358 4*x + x_off, 4*y + y_off, 4, 4,
1359 width, height, s->linesize,
1360 s->put_pixels_tab[2]);
3b636f21
DC
1361 }
1362 }
1363
1364 /* U/V */
1365 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1366 for (y = 0; y < 2; y++) {
1367 for (x = 0; x < 2; x++) {
1368 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1369 mb->bmv[ 2*y * 4 + 2*x+1].x +
1370 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1371 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1372 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1373 mb->bmv[ 2*y * 4 + 2*x+1].y +
1374 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1375 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
8f910a56
SG
1376 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1377 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
3b636f21
DC
1378 if (s->profile == 3) {
1379 uvmv.x &= ~7;
1380 uvmv.y &= ~7;
1381 }
64233e70
JGG
1382 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1383 dst[2] + 4*y*s->uvlinesize + x*4,
1384 ref->data[1], ref->data[2], &uvmv,
1385 4*x + x_off, 4*y + y_off, 4, 4,
1386 width, height, s->uvlinesize,
1387 s->put_pixels_tab[2]);
3b636f21
DC
1388 }
1389 }
7c4dcf81
RB
1390 break;
1391 }
1392 case VP8_SPLITMVMODE_16x8:
d292c345
JGG
1393 vp8_mc_part(s, dst, ref, x_off, y_off,
1394 0, 0, 16, 8, width, height, &bmv[0]);
1395 vp8_mc_part(s, dst, ref, x_off, y_off,
1396 0, 8, 16, 8, width, height, &bmv[1]);
7c4dcf81
RB
1397 break;
1398 case VP8_SPLITMVMODE_8x16:
d292c345
JGG
1399 vp8_mc_part(s, dst, ref, x_off, y_off,
1400 0, 0, 8, 16, width, height, &bmv[0]);
1401 vp8_mc_part(s, dst, ref, x_off, y_off,
1402 8, 0, 8, 16, width, height, &bmv[1]);
7c4dcf81
RB
1403 break;
1404 case VP8_SPLITMVMODE_8x8:
d292c345
JGG
1405 vp8_mc_part(s, dst, ref, x_off, y_off,
1406 0, 0, 8, 8, width, height, &bmv[0]);
1407 vp8_mc_part(s, dst, ref, x_off, y_off,
1408 8, 0, 8, 8, width, height, &bmv[1]);
1409 vp8_mc_part(s, dst, ref, x_off, y_off,
1410 0, 8, 8, 8, width, height, &bmv[2]);
1411 vp8_mc_part(s, dst, ref, x_off, y_off,
1412 8, 8, 8, 8, width, height, &bmv[3]);
7c4dcf81 1413 break;
3b636f21
DC
1414 }
1415}
1416
414ac27d 1417static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
3b636f21 1418{
3df56f41 1419 int x, y, ch;
3b636f21 1420
8a467b2d
JGG
1421 if (mb->mode != MODE_I4x4) {
1422 uint8_t *y_dst = dst[0];
3b636f21 1423 for (y = 0; y < 4; y++) {
62457f90 1424 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
3df56f41
JGG
1425 if (nnz4) {
1426 if (nnz4&~0x01010101) {
8a467b2d 1427 for (x = 0; x < 4; x++) {
62457f90
JGG
1428 if ((uint8_t)nnz4 == 1)
1429 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1430 else if((uint8_t)nnz4 > 1)
1431 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1432 nnz4 >>= 8;
1433 if (!nnz4)
1434 break;
8a467b2d
JGG
1435 }
1436 } else {
3ae079a3 1437 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
3b636f21
DC
1438 }
1439 }
1440 y_dst += 4*s->linesize;
1441 }
8a467b2d 1442 }
3b636f21 1443
8a467b2d 1444 for (ch = 0; ch < 2; ch++) {
62457f90 1445 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
3ae079a3 1446 if (nnz4) {
8a467b2d 1447 uint8_t *ch_dst = dst[1+ch];
3ae079a3
JGG
1448 if (nnz4&~0x01010101) {
1449 for (y = 0; y < 2; y++) {
1450 for (x = 0; x < 2; x++) {
62457f90
JGG
1451 if ((uint8_t)nnz4 == 1)
1452 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1453 else if((uint8_t)nnz4 > 1)
1454 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1455 nnz4 >>= 8;
1456 if (!nnz4)
1457 break;
8a467b2d 1458 }
3ae079a3 1459 ch_dst += 4*s->uvlinesize;
8a467b2d 1460 }
3ae079a3
JGG
1461 } else {
1462 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
3b636f21
DC
1463 }
1464 }
3b636f21
DC
1465 }
1466}
1467
414ac27d 1468static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
3b636f21
DC
1469{
1470 int interior_limit, filter_level;
1471
1472 if (s->segmentation.enabled) {
b9a7186b 1473 filter_level = s->segmentation.filter_level[s->segment];
3b636f21
DC
1474 if (!s->segmentation.absolute_vals)
1475 filter_level += s->filter.level;
1476 } else
1477 filter_level = s->filter.level;
1478
1479 if (s->lf_delta.enabled) {
1480 filter_level += s->lf_delta.ref[mb->ref_frame];
dd18c9a0 1481 filter_level += s->lf_delta.mode[mb->mode];
3b636f21 1482 }
a1b227bb
JGG
1483
1484/* Like av_clip for inputs 0 and max, where max is equal to (2^n-1) */
1485#define POW2CLIP(x,max) (((x) & ~max) ? (-(x))>>31 & max : (x));
1486 filter_level = POW2CLIP(filter_level, 63);
3b636f21
DC
1487
1488 interior_limit = filter_level;
1489 if (s->filter.sharpness) {
8a2c99b4 1490 interior_limit >>= (s->filter.sharpness + 3) >> 2;
3b636f21
DC
1491 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1492 }
1493 interior_limit = FFMAX(interior_limit, 1);
1494
968570d6
JGG
1495 f->filter_level = filter_level;
1496 f->inner_limit = interior_limit;
c55e0d34 1497 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
3b636f21
DC
1498}
1499
414ac27d 1500static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1501{
968570d6
JGG
1502 int mbedge_lim, bedge_lim, hev_thresh;
1503 int filter_level = f->filter_level;
1504 int inner_limit = f->inner_limit;
c55e0d34 1505 int inner_filter = f->inner_filter;
145d3186
JGG
1506 int linesize = s->linesize;
1507 int uvlinesize = s->uvlinesize;
79dec154
JGG
1508 static const uint8_t hev_thresh_lut[2][64] = {
1509 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1510 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1511 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1512 3, 3, 3, 3 },
1513 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1514 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1515 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1516 2, 2, 2, 2 }
1517 };
3b636f21 1518
3b636f21
DC
1519 if (!filter_level)
1520 return;
1521
79dec154
JGG
1522 bedge_lim = 2*filter_level + inner_limit;
1523 mbedge_lim = bedge_lim + 4;
968570d6 1524
79dec154 1525 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
5245c04d 1526
3b636f21 1527 if (mb_x) {
145d3186 1528 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
3facfc99 1529 mbedge_lim, inner_limit, hev_thresh);
145d3186 1530 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1531 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1532 }
1533
c55e0d34 1534 if (inner_filter) {
145d3186
JGG
1535 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1536 inner_limit, hev_thresh);
1537 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1538 inner_limit, hev_thresh);
1539 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1540 inner_limit, hev_thresh);
1541 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1542 uvlinesize, bedge_lim,
1543 inner_limit, hev_thresh);
3b636f21
DC
1544 }
1545
1546 if (mb_y) {
145d3186 1547 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
3facfc99 1548 mbedge_lim, inner_limit, hev_thresh);
145d3186 1549 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1550 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1551 }
1552
c55e0d34 1553 if (inner_filter) {
145d3186
JGG
1554 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1555 linesize, bedge_lim,
1556 inner_limit, hev_thresh);
1557 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1558 linesize, bedge_lim,
1559 inner_limit, hev_thresh);
1560 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1561 linesize, bedge_lim,
1562 inner_limit, hev_thresh);
1563 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1564 dst[2] + 4 * uvlinesize,
1565 uvlinesize, bedge_lim,
3facfc99 1566 inner_limit, hev_thresh);
3b636f21
DC
1567 }
1568}
1569
414ac27d 1570static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1571{
968570d6
JGG
1572 int mbedge_lim, bedge_lim;
1573 int filter_level = f->filter_level;
1574 int inner_limit = f->inner_limit;
c55e0d34 1575 int inner_filter = f->inner_filter;
145d3186 1576 int linesize = s->linesize;
3b636f21 1577
3b636f21
DC
1578 if (!filter_level)
1579 return;
1580
79dec154
JGG
1581 bedge_lim = 2*filter_level + inner_limit;
1582 mbedge_lim = bedge_lim + 4;
3b636f21
DC
1583
1584 if (mb_x)
145d3186 1585 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1586 if (inner_filter) {
145d3186
JGG
1587 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1588 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1589 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
3b636f21
DC
1590 }
1591
1592 if (mb_y)
145d3186 1593 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1594 if (inner_filter) {
145d3186
JGG
1595 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1596 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1597 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
3b636f21
DC
1598 }
1599}
1600
1601static void filter_mb_row(VP8Context *s, int mb_y)
1602{
968570d6 1603 VP8FilterStrength *f = s->filter_strength;
3b636f21
DC
1604 uint8_t *dst[3] = {
1605 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize,
1606 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize,
1607 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize
1608 };
1609 int mb_x;
1610
1611 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
9ac831c2 1612 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
c55e0d34 1613 filter_mb(s, dst, f++, mb_x, mb_y);
3b636f21
DC
1614 dst[0] += 16;
1615 dst[1] += 8;
1616 dst[2] += 8;
1617 }
1618}
1619
1620static void filter_mb_row_simple(VP8Context *s, int mb_y)
1621{
968570d6 1622 VP8FilterStrength *f = s->filter_strength;
968570d6 1623 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize;
3b636f21
DC
1624 int mb_x;
1625
1626 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
9ac831c2 1627 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
c55e0d34 1628 filter_mb_simple(s, dst, f++, mb_x, mb_y);
3b636f21
DC
1629 dst += 16;
1630 }
1631}
1632
1633static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1634 AVPacket *avpkt)
1635{
1636 VP8Context *s = avctx->priv_data;
1637 int ret, mb_x, mb_y, i, y, referenced;
1638 enum AVDiscard skip_thresh;
28e241de 1639 AVFrame *av_uninit(curframe);
3b636f21
DC
1640
1641 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1642 return ret;
1643
1644 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1645 || s->update_altref == VP56_FRAME_CURRENT;
1646
1647 skip_thresh = !referenced ? AVDISCARD_NONREF :
1648 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1649
1650 if (avctx->skip_frame >= skip_thresh) {
1651 s->invisible = 1;
1652 goto skip_decode;
1653 }
9ac831c2 1654 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
3b636f21
DC
1655
1656 for (i = 0; i < 4; i++)
1657 if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1658 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1659 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1660 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1661 break;
1662 }
1663 if (curframe->data[0])
1664 avctx->release_buffer(avctx, curframe);
1665
1666 curframe->key_frame = s->keyframe;
1667 curframe->pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE;
1668 curframe->reference = referenced ? 3 : 0;
1669 if ((ret = avctx->get_buffer(avctx, curframe))) {
1670 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1671 return ret;
1672 }
1673
1674 // Given that arithmetic probabilities are updated every frame, it's quite likely
1675 // that the values we have on a random interframe are complete junk if we didn't
1676 // start decode on a keyframe. So just don't display anything rather than junk.
1677 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1678 !s->framep[VP56_FRAME_GOLDEN] ||
1679 !s->framep[VP56_FRAME_GOLDEN2])) {
1680 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1681 return AVERROR_INVALIDDATA;
1682 }
1683
1684 s->linesize = curframe->linesize[0];
1685 s->uvlinesize = curframe->linesize[1];
1686
1687 if (!s->edge_emu_buffer)
1688 s->edge_emu_buffer = av_malloc(21*s->linesize);
1689
1690 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1691
aa93c52c
PM
1692 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1693 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
c55e0d34 1694
3b636f21 1695 // top edge of 127 for intra prediction
ee555de7
RB
1696 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1697 s->top_border[0][15] = s->top_border[0][23] = 127;
1698 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1699 }
c4211046 1700 memset(s->ref_count, 0, sizeof(s->ref_count));
d2840fa4 1701 if (s->keyframe)
ccf13f9e 1702 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
3b636f21
DC
1703
1704 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1705 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
c55e0d34 1706 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
aa93c52c 1707 int mb_xy = mb_y*s->mb_width;
3b636f21
DC
1708 uint8_t *dst[3] = {
1709 curframe->data[0] + 16*mb_y*s->linesize,
1710 curframe->data[1] + 8*mb_y*s->uvlinesize,
1711 curframe->data[2] + 8*mb_y*s->uvlinesize
1712 };
1713
aa93c52c 1714 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
3b636f21 1715 memset(s->left_nnz, 0, sizeof(s->left_nnz));
d2840fa4 1716 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
3b636f21
DC
1717
1718 // left edge of 129 for intra prediction
ee555de7 1719 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
3b636f21
DC
1720 for (i = 0; i < 3; i++)
1721 for (y = 0; y < 16>>!!i; y++)
1722 dst[i][y*curframe->linesize[i]-1] = 129;
ee555de7
RB
1723 if (mb_y == 1) // top left edge is also 129
1724 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1725 }
3b636f21 1726
ef38842f 1727 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
d864dee8
JGG
1728 /* Prefetch the current frame, 4 MBs ahead */
1729 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1730 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1731
aa93c52c 1732 decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy);
3b636f21 1733
ef38842f 1734 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
c4211046 1735
3b636f21
DC
1736 if (!mb->skip)
1737 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
3b636f21 1738
b946111f 1739 if (mb->mode <= MODE_I4x4)
d2840fa4 1740 intra_predict(s, dst, mb, mb_x, mb_y);
b946111f 1741 else
3b636f21 1742 inter_predict(s, dst, mb, mb_x, mb_y);
3b636f21 1743
ef38842f 1744 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
c4211046 1745
3b636f21 1746 if (!mb->skip) {
8a467b2d 1747 idct_mb(s, dst, mb);
3b636f21
DC
1748 } else {
1749 AV_ZERO64(s->left_nnz);
1750 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1751
1752 // Reset DC block predictors if they would exist if the mb had coefficients
1753 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1754 s->left_nnz[8] = 0;
1755 s->top_nnz[mb_x][8] = 0;
1756 }
1757 }
1758
968570d6
JGG
1759 if (s->deblock_filter)
1760 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1761
ef38842f 1762 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
c4211046 1763
3b636f21
DC
1764 dst[0] += 16;
1765 dst[1] += 8;
1766 dst[2] += 8;
3b636f21 1767 }
9ac831c2 1768 if (s->deblock_filter) {
3b636f21 1769 if (s->filter.simple)
9ac831c2 1770 filter_mb_row_simple(s, mb_y);
3b636f21 1771 else
9ac831c2 1772 filter_mb_row(s, mb_y);
3b636f21
DC
1773 }
1774 }
3b636f21
DC
1775
1776skip_decode:
1777 // if future frames don't use the updated probabilities,
1778 // reset them to the values we saved
1779 if (!s->update_probabilities)
1780 s->prob[0] = s->prob[1];
1781
1782 // check if golden and altref are swapped
1783 if (s->update_altref == VP56_FRAME_GOLDEN &&
1784 s->update_golden == VP56_FRAME_GOLDEN2)
1785 FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]);
1786 else {
1787 if (s->update_altref != VP56_FRAME_NONE)
1788 s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1789
1790 if (s->update_golden != VP56_FRAME_NONE)
1791 s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1792 }
1793
1794 if (s->update_last) // move cur->prev
1795 s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT];
1796
1797 // release no longer referenced frames
1798 for (i = 0; i < 4; i++)
1799 if (s->frames[i].data[0] &&
1800 &s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
1801 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1802 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1803 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1804 avctx->release_buffer(avctx, &s->frames[i]);
1805
1806 if (!s->invisible) {
1807 *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT];
1808 *data_size = sizeof(AVFrame);
1809 }
1810
1811 return avpkt->size;
1812}
1813
1814static av_cold int vp8_decode_init(AVCodecContext *avctx)
1815{
1816 VP8Context *s = avctx->priv_data;
1817
1818 s->avctx = avctx;
1819 avctx->pix_fmt = PIX_FMT_YUV420P;
1820
1821 dsputil_init(&s->dsp, avctx);
1822 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
1823 ff_vp8dsp_init(&s->vp8dsp);
1824
3b636f21
DC
1825 return 0;
1826}
1827
1828static av_cold int vp8_decode_free(AVCodecContext *avctx)
1829{
1830 vp8_decode_flush(avctx);
1831 return 0;
1832}
1833
d36beb3f 1834AVCodec ff_vp8_decoder = {
3b636f21
DC
1835 "vp8",
1836 AVMEDIA_TYPE_VIDEO,
1837 CODEC_ID_VP8,
1838 sizeof(VP8Context),
1839 vp8_decode_init,
1840 NULL,
1841 vp8_decode_free,
1842 vp8_decode_frame,
1843 CODEC_CAP_DR1,
1844 .flush = vp8_decode_flush,
1845 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1846};