vp8: implement sliced threading
[libav.git] / libavcodec / vp8.c
CommitLineData
32f3c541 1/*
3b636f21
DC
2 * VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
13a1304b 6 * Copyright (C) 2010 Jason Garrett-Glaser
951455c1 7 * Copyright (C) 2012 Daniel Kang
3b636f21 8 *
2912e87a 9 * This file is part of Libav.
3b636f21 10 *
2912e87a 11 * Libav is free software; you can redistribute it and/or
3b636f21
DC
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
2912e87a 16 * Libav is distributed in the hope that it will be useful,
3b636f21
DC
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
2912e87a 22 * License along with Libav; if not, write to the Free Software
3b636f21
DC
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
737eb597 26#include "libavutil/imgutils.h"
3b636f21 27#include "avcodec.h"
f3a29b75 28#include "internal.h"
bcf4568f 29#include "vp8.h"
3b636f21 30#include "vp8data.h"
3b636f21 31#include "rectangle.h"
4773d904 32#include "thread.h"
3b636f21 33
951455c1
DK
34#if HAVE_PTHREADS
35#include <pthread.h>
36#elif HAVE_W32THREADS
37#include "w32pthreads.h"
38#endif
39
a7878c9f
MR
40#if ARCH_ARM
41# include "arm/vp8.h"
42#endif
43
56535793
RB
44static void free_buffers(VP8Context *s)
45{
951455c1
DK
46 int i;
47 if (s->thread_data)
48 for (i = 0; i < MAX_THREADS; i++) {
49 av_freep(&s->thread_data[i].filter_strength);
50 av_freep(&s->thread_data[i].edge_emu_buffer);
51 }
52 av_freep(&s->thread_data);
56535793 53 av_freep(&s->macroblocks_base);
56535793
RB
54 av_freep(&s->intra4x4_pred_mode_top);
55 av_freep(&s->top_nnz);
56535793 56 av_freep(&s->top_border);
56535793
RB
57
58 s->macroblocks = NULL;
59}
60
ce42a048
RB
61static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
62{
63 int ret;
64 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
65 return ret;
e02dec25 66 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
ce42a048
RB
67 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
68 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
69 ff_thread_release_buffer(s->avctx, f);
70 return AVERROR(ENOMEM);
71 }
72 return 0;
73}
74
bfa0f965 75static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
ce42a048 76{
bfa0f965
RB
77 if (f->ref_index[0]) {
78 if (prefer_delayed_free) {
79 /* Upon a size change, we want to free the maps but other threads may still
80 * be using them, so queue them. Upon a seek, all threads are inactive so
81 * we want to cache one to prevent re-allocation in the next decoding
82 * iteration, but the rest we can free directly. */
83 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
84 if (s->num_maps_to_be_freed < max_queued_maps) {
85 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
86 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
87 av_free(f->ref_index[0]);
88 } /* else: MEMLEAK (should never happen, but better that than crash) */
ce42a048 89 f->ref_index[0] = NULL;
bfa0f965
RB
90 } else /* vp8_decode_free() */ {
91 av_free(f->ref_index[0]);
ce42a048 92 }
ce42a048
RB
93 }
94 ff_thread_release_buffer(s->avctx, f);
95}
96
bfa0f965
RB
97static void vp8_decode_flush_impl(AVCodecContext *avctx,
98 int prefer_delayed_free, int can_direct_free, int free_mem)
3b636f21
DC
99{
100 VP8Context *s = avctx->priv_data;
101 int i;
102
f3a29b75 103 if (!avctx->internal->is_copy) {
4773d904
RB
104 for (i = 0; i < 5; i++)
105 if (s->frames[i].data[0])
bfa0f965 106 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
4773d904 107 }
3b636f21
DC
108 memset(s->framep, 0, sizeof(s->framep));
109
bfa0f965
RB
110 if (free_mem) {
111 free_buffers(s);
112 s->maps_are_invalid = 1;
113 }
ce42a048
RB
114}
115
116static void vp8_decode_flush(AVCodecContext *avctx)
117{
bfa0f965 118 vp8_decode_flush_impl(avctx, 1, 1, 0);
3b636f21
DC
119}
120
121static int update_dimensions(VP8Context *s, int width, int height)
122{
951455c1
DK
123 AVCodecContext *avctx = s->avctx;
124 int i;
125
4773d904
RB
126 if (width != s->avctx->width ||
127 height != s->avctx->height) {
128 if (av_image_check_size(width, height, 0, s->avctx))
129 return AVERROR_INVALIDDATA;
3b636f21 130
bfa0f965 131 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
3b636f21 132
4773d904
RB
133 avcodec_set_dimensions(s->avctx, width, height);
134 }
3b636f21
DC
135
136 s->mb_width = (s->avctx->coded_width +15) / 16;
137 s->mb_height = (s->avctx->coded_height+15) / 16;
138
951455c1
DK
139 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
140 if (!s->mb_layout) { // Frame threading and one thread
141 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
142 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
143 }
144 else // Sliced threading
145 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
146 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
147 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
148 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
3b636f21 149
951455c1
DK
150 for (i = 0; i < MAX_THREADS; i++) {
151 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
152 pthread_mutex_init(&s->thread_data[i].lock, NULL);
153 pthread_cond_init(&s->thread_data[i].cond, NULL);
154 }
155
156 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
157 (!s->intra4x4_pred_mode_top && !s->mb_layout))
b6c420ce
DC
158 return AVERROR(ENOMEM);
159
c55e0d34 160 s->macroblocks = s->macroblocks_base + 1;
3b636f21
DC
161
162 return 0;
163}
164
165static void parse_segment_info(VP8Context *s)
166{
167 VP56RangeCoder *c = &s->c;
168 int i;
169
170 s->segmentation.update_map = vp8_rac_get(c);
171
172 if (vp8_rac_get(c)) { // update segment feature data
173 s->segmentation.absolute_vals = vp8_rac_get(c);
174
175 for (i = 0; i < 4; i++)
176 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
177
178 for (i = 0; i < 4; i++)
179 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
180 }
181 if (s->segmentation.update_map)
182 for (i = 0; i < 3; i++)
183 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
184}
185
186static void update_lf_deltas(VP8Context *s)
187{
188 VP56RangeCoder *c = &s->c;
189 int i;
190
14ba7472
JS
191 for (i = 0; i < 4; i++) {
192 if (vp8_rac_get(c)) {
193 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
3b636f21 194
14ba7472
JS
195 if (vp8_rac_get(c))
196 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
197 }
198 }
199
200 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
201 if (vp8_rac_get(c)) {
202 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
203
204 if (vp8_rac_get(c))
205 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
206 }
207 }
3b636f21
DC
208}
209
210static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
211{
212 const uint8_t *sizes = buf;
213 int i;
214
215 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
216
217 buf += 3*(s->num_coeff_partitions-1);
218 buf_size -= 3*(s->num_coeff_partitions-1);
219 if (buf_size < 0)
220 return -1;
221
222 for (i = 0; i < s->num_coeff_partitions-1; i++) {
06d50ca8 223 int size = AV_RL24(sizes + 3*i);
3b636f21
DC
224 if (buf_size - size < 0)
225 return -1;
226
905ef0d0 227 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
3b636f21
DC
228 buf += size;
229 buf_size -= size;
230 }
905ef0d0 231 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
3b636f21
DC
232
233 return 0;
234}
235
236static void get_quants(VP8Context *s)
237{
238 VP56RangeCoder *c = &s->c;
239 int i, base_qi;
240
241 int yac_qi = vp8_rac_get_uint(c, 7);
242 int ydc_delta = vp8_rac_get_sint(c, 4);
243 int y2dc_delta = vp8_rac_get_sint(c, 4);
244 int y2ac_delta = vp8_rac_get_sint(c, 4);
245 int uvdc_delta = vp8_rac_get_sint(c, 4);
246 int uvac_delta = vp8_rac_get_sint(c, 4);
247
248 for (i = 0; i < 4; i++) {
249 if (s->segmentation.enabled) {
250 base_qi = s->segmentation.base_quant[i];
251 if (!s->segmentation.absolute_vals)
252 base_qi += yac_qi;
253 } else
254 base_qi = yac_qi;
255
42761122
MR
256 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
257 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
258 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
259 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
260 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
261 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
a8ab0ccc
PM
262
263 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
264 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
3b636f21
DC
265 }
266}
267
268/**
269 * Determine which buffers golden and altref should be updated with after this frame.
270 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
271 *
272 * Intra frames update all 3 references
273 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
274 * If the update (golden|altref) flag is set, it's updated with the current frame
275 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
276 * If the flag is not set, the number read means:
277 * 0: no update
278 * 1: VP56_FRAME_PREVIOUS
279 * 2: update golden with altref, or update altref with golden
280 */
281static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
282{
283 VP56RangeCoder *c = &s->c;
284
285 if (update)
286 return VP56_FRAME_CURRENT;
287
288 switch (vp8_rac_get_uint(c, 2)) {
289 case 1:
290 return VP56_FRAME_PREVIOUS;
291 case 2:
292 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
293 }
294 return VP56_FRAME_NONE;
295}
296
297static void update_refs(VP8Context *s)
298{
299 VP56RangeCoder *c = &s->c;
300
301 int update_golden = vp8_rac_get(c);
302 int update_altref = vp8_rac_get(c);
303
304 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
305 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
306}
307
308static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
309{
310 VP56RangeCoder *c = &s->c;
370b622a 311 int header_size, hscale, vscale, i, j, k, l, m, ret;
3b636f21
DC
312 int width = s->avctx->width;
313 int height = s->avctx->height;
314
315 s->keyframe = !(buf[0] & 1);
316 s->profile = (buf[0]>>1) & 7;
317 s->invisible = !(buf[0] & 0x10);
06d50ca8 318 header_size = AV_RL24(buf) >> 5;
3b636f21
DC
319 buf += 3;
320 buf_size -= 3;
321
0ef1dbed
DC
322 if (s->profile > 3)
323 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
324
325 if (!s->profile)
326 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
327 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
328 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
3b636f21
DC
329
330 if (header_size > buf_size - 7*s->keyframe) {
331 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
332 return AVERROR_INVALIDDATA;
333 }
334
335 if (s->keyframe) {
06d50ca8
JGG
336 if (AV_RL24(buf) != 0x2a019d) {
337 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
3b636f21
DC
338 return AVERROR_INVALIDDATA;
339 }
340 width = AV_RL16(buf+3) & 0x3fff;
341 height = AV_RL16(buf+5) & 0x3fff;
342 hscale = buf[4] >> 6;
343 vscale = buf[6] >> 6;
344 buf += 7;
345 buf_size -= 7;
346
92a54426
MR
347 if (hscale || vscale)
348 av_log_missing_feature(s->avctx, "Upscaling", 1);
349
3b636f21 350 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
370b622a
JGG
351 for (i = 0; i < 4; i++)
352 for (j = 0; j < 16; j++)
353 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
354 sizeof(s->prob->token[i][j]));
3b636f21
DC
355 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
356 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
357 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
358 memset(&s->segmentation, 0, sizeof(s->segmentation));
359 }
360
905ef0d0 361 ff_vp56_init_range_decoder(c, buf, header_size);
3b636f21
DC
362 buf += header_size;
363 buf_size -= header_size;
364
365 if (s->keyframe) {
366 if (vp8_rac_get(c))
367 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
368 vp8_rac_get(c); // whether we can skip clamping in dsp functions
369 }
370
371 if ((s->segmentation.enabled = vp8_rac_get(c)))
372 parse_segment_info(s);
373 else
374 s->segmentation.update_map = 0; // FIXME: move this to some init function?
375
376 s->filter.simple = vp8_rac_get(c);
377 s->filter.level = vp8_rac_get_uint(c, 6);
378 s->filter.sharpness = vp8_rac_get_uint(c, 3);
379
380 if ((s->lf_delta.enabled = vp8_rac_get(c)))
381 if (vp8_rac_get(c))
382 update_lf_deltas(s);
383
384 if (setup_partitions(s, buf, buf_size)) {
385 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
386 return AVERROR_INVALIDDATA;
387 }
388
951455c1
DK
389 if (!s->macroblocks_base || /* first frame */
390 width != s->avctx->width || height != s->avctx->height) {
391 if ((ret = update_dimensions(s, width, height)) < 0)
392 return ret;
393 }
394
3b636f21
DC
395 get_quants(s);
396
397 if (!s->keyframe) {
398 update_refs(s);
399 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
400 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
401 }
402
403 // if we aren't saving this frame's probabilities for future frames,
404 // make a copy of the current probabilities
405 if (!(s->update_probabilities = vp8_rac_get(c)))
406 s->prob[1] = s->prob[0];
407
408 s->update_last = s->keyframe || vp8_rac_get(c);
409
410 for (i = 0; i < 4; i++)
411 for (j = 0; j < 8; j++)
412 for (k = 0; k < 3; k++)
413 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
370b622a
JGG
414 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
415 int prob = vp8_rac_get_uint(c, 8);
b0d58795
JGG
416 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
417 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
370b622a 418 }
3b636f21
DC
419
420 if ((s->mbskip_enabled = vp8_rac_get(c)))
a8ab0ccc 421 s->prob->mbskip = vp8_rac_get_uint(c, 8);
3b636f21
DC
422
423 if (!s->keyframe) {
a8ab0ccc
PM
424 s->prob->intra = vp8_rac_get_uint(c, 8);
425 s->prob->last = vp8_rac_get_uint(c, 8);
426 s->prob->golden = vp8_rac_get_uint(c, 8);
3b636f21
DC
427
428 if (vp8_rac_get(c))
429 for (i = 0; i < 4; i++)
430 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
431 if (vp8_rac_get(c))
432 for (i = 0; i < 3; i++)
433 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
434
435 // 17.2 MV probability update
436 for (i = 0; i < 2; i++)
437 for (j = 0; j < 19; j++)
7697cdcf 438 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
3b636f21
DC
439 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
440 }
441
442 return 0;
443}
444
7634771e 445static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
3b636f21 446{
7634771e
JGG
447 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
448 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
3b636f21
DC
449}
450
3b636f21
DC
451/**
452 * Motion vector coding, 17.1.
453 */
454static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
455{
ca18a478 456 int bit, x = 0;
3b636f21 457
7697cdcf 458 if (vp56_rac_get_prob_branchy(c, p[0])) {
3b636f21
DC
459 int i;
460
461 for (i = 0; i < 3; i++)
462 x += vp56_rac_get_prob(c, p[9 + i]) << i;
463 for (i = 9; i > 3; i--)
464 x += vp56_rac_get_prob(c, p[9 + i]) << i;
465 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
466 x += 8;
ca18a478
DC
467 } else {
468 // small_mvtree
469 const uint8_t *ps = p+2;
470 bit = vp56_rac_get_prob(c, *ps);
471 ps += 1 + 3*bit;
472 x += 4*bit;
473 bit = vp56_rac_get_prob(c, *ps);
474 ps += 1 + bit;
475 x += 2*bit;
476 x += vp56_rac_get_prob(c, *ps);
477 }
3b636f21
DC
478
479 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
480}
481
414ac27d
JGG
482static av_always_inline
483const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
3b636f21 484{
7bf254c4
JGG
485 if (left == top)
486 return vp8_submv_prob[4-!!left];
487 if (!top)
3b636f21 488 return vp8_submv_prob[2];
7bf254c4 489 return vp8_submv_prob[1-!!left];
3b636f21
DC
490}
491
492/**
493 * Split motion vector prediction, 16.4.
7ed06b2b 494 * @returns the number of motion vectors parsed (2, 4 or 16)
3b636f21 495 */
414ac27d 496static av_always_inline
951455c1 497int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
3b636f21 498{
0908f1b9
JGG
499 int part_idx;
500 int n, num;
951455c1 501 VP8Macroblock *top_mb;
7bf254c4
JGG
502 VP8Macroblock *left_mb = &mb[-1];
503 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
951455c1 504 *mbsplits_top,
0908f1b9 505 *mbsplits_cur, *firstidx;
951455c1 506 VP56mv *top_mv;
c55e0d34
JGG
507 VP56mv *left_mv = left_mb->bmv;
508 VP56mv *cur_mv = mb->bmv;
3b636f21 509
951455c1
DK
510 if (!layout) // layout is inlined, s->mb_layout is not
511 top_mb = &mb[2];
512 else
513 top_mb = &mb[-s->mb_width-1];
514 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
515 top_mv = top_mb->bmv;
516
0908f1b9
JGG
517 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
518 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
519 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
520 } else {
521 part_idx = VP8_SPLITMVMODE_8x8;
522 }
523 } else {
524 part_idx = VP8_SPLITMVMODE_4x4;
525 }
526
527 num = vp8_mbsplit_count[part_idx];
528 mbsplits_cur = vp8_mbsplits[part_idx],
529 firstidx = vp8_mbfirstidx[part_idx];
530 mb->partitioning = part_idx;
531
3b636f21 532 for (n = 0; n < num; n++) {
7ed06b2b 533 int k = firstidx[n];
7bf254c4 534 uint32_t left, above;
7ed06b2b
RB
535 const uint8_t *submv_prob;
536
7bf254c4
JGG
537 if (!(k & 3))
538 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
539 else
540 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
541 if (k <= 3)
542 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
543 else
544 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
7ed06b2b
RB
545
546 submv_prob = get_submv_prob(left, above);
3b636f21 547
c5dec7f1
JGG
548 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
549 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
550 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
551 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
552 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
553 } else {
554 AV_ZERO32(&mb->bmv[n]);
555 }
556 } else {
557 AV_WN32A(&mb->bmv[n], above);
558 }
559 } else {
7bf254c4 560 AV_WN32A(&mb->bmv[n], left);
3b636f21 561 }
3b636f21 562 }
7ed06b2b
RB
563
564 return num;
3b636f21
DC
565}
566
414ac27d 567static av_always_inline
951455c1 568void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
f3d09d44 569{
951455c1 570 VP8Macroblock *mb_edge[3] = { 0 /* top */,
f3d09d44 571 mb - 1 /* left */,
951455c1 572 0 /* top-left */ };
f3d09d44 573 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
66f608a6 574 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
f3d09d44
JGG
575 int idx = CNT_ZERO;
576 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1eeca886 577 int8_t *sign_bias = s->sign_bias;
f3d09d44
JGG
578 VP56mv near_mv[4];
579 uint8_t cnt[4] = { 0 };
580 VP56RangeCoder *c = &s->c;
581
951455c1
DK
582 if (!layout) { // layout is inlined (s->mb_layout is not)
583 mb_edge[0] = mb + 2;
584 mb_edge[2] = mb + 1;
585 }
586 else {
587 mb_edge[0] = mb - s->mb_width-1;
588 mb_edge[2] = mb - s->mb_width-2;
589 }
590
f3d09d44
JGG
591 AV_ZERO32(&near_mv[0]);
592 AV_ZERO32(&near_mv[1]);
0f0b5d64 593 AV_ZERO32(&near_mv[2]);
f3d09d44
JGG
594
595 /* Process MB on top, left and top-left */
596 #define MV_EDGE_CHECK(n)\
597 {\
598 VP8Macroblock *edge = mb_edge[n];\
599 int edge_ref = edge->ref_frame;\
600 if (edge_ref != VP56_FRAME_CURRENT) {\
601 uint32_t mv = AV_RN32A(&edge->mv);\
602 if (mv) {\
603 if (cur_sign_bias != sign_bias[edge_ref]) {\
604 /* SWAR negate of the values in mv. */\
605 mv = ~mv;\
606 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
607 }\
608 if (!n || mv != AV_RN32A(&near_mv[idx]))\
609 AV_WN32A(&near_mv[++idx], mv);\
610 cnt[idx] += 1 + (n != 2);\
611 } else\
612 cnt[CNT_ZERO] += 1 + (n != 2);\
613 }\
614 }
615
616 MV_EDGE_CHECK(0)
617 MV_EDGE_CHECK(1)
618 MV_EDGE_CHECK(2)
619
620 mb->partitioning = VP8_SPLITMVMODE_NONE;
621 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
622 mb->mode = VP8_MVMODE_MV;
623
624 /* If we have three distinct MVs, merge first and last if they're the same */
66f608a6 625 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
f3d09d44
JGG
626 cnt[CNT_NEAREST] += 1;
627
628 /* Swap near and nearest if necessary */
629 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
630 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
631 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
632 }
633
634 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
635 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
636
637 /* Choose the best mv out of 0,0 and the nearest mv */
7634771e 638 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
66f608a6
AS
639 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
640 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
641 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
f3d09d44
JGG
642
643 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
644 mb->mode = VP8_MVMODE_SPLIT;
951455c1 645 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
f3d09d44
JGG
646 } else {
647 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
648 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
649 mb->bmv[0] = mb->mv;
650 }
651 } else {
7634771e 652 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
f3d09d44
JGG
653 mb->bmv[0] = mb->mv;
654 }
655 } else {
7634771e 656 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
f3d09d44
JGG
657 mb->bmv[0] = mb->mv;
658 }
659 } else {
660 mb->mode = VP8_MVMODE_ZERO;
661 AV_ZERO32(&mb->mv);
662 mb->bmv[0] = mb->mv;
663 }
664}
665
666static av_always_inline
17343e39 667void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
951455c1 668 int mb_x, int keyframe, int layout)
3b636f21 669{
17343e39
DK
670 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
671
951455c1
DK
672 if (layout == 1) {
673 VP8Macroblock *mb_top = mb - s->mb_width - 1;
674 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
675 }
d1c58fce 676 if (keyframe) {
d2840fa4 677 int x, y;
951455c1 678 uint8_t* top;
d2840fa4 679 uint8_t* const left = s->intra4x4_pred_mode_left;
951455c1
DK
680 if (layout == 1)
681 top = mb->intra4x4_pred_mode_top;
682 else
683 top = s->intra4x4_pred_mode_top + 4 * mb_x;
d1c58fce
JGG
684 for (y = 0; y < 4; y++) {
685 for (x = 0; x < 4; x++) {
d2840fa4
PM
686 const uint8_t *ctx;
687 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
688 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
689 left[y] = top[x] = *intra4x4;
690 intra4x4++;
3b636f21 691 }
3b636f21 692 }
d1c58fce 693 } else {
d2840fa4 694 int i;
d1c58fce
JGG
695 for (i = 0; i < 16; i++)
696 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
3b636f21
DC
697 }
698}
699
414ac27d 700static av_always_inline
951455c1
DK
701void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
702 uint8_t *segment, uint8_t *ref, int layout)
3b636f21
DC
703{
704 VP56RangeCoder *c = &s->c;
3b636f21
DC
705
706 if (s->segmentation.update_map)
c55e0d34 707 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
30011bf2 708 else if (s->segmentation.enabled)
4773d904 709 *segment = ref ? *ref : *segment;
17343e39 710 mb->segment = *segment;
3b636f21 711
a8ab0ccc 712 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
3b636f21
DC
713
714 if (s->keyframe) {
715 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
716
717 if (mb->mode == MODE_I4x4) {
951455c1 718 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
d2840fa4
PM
719 } else {
720 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
951455c1
DK
721 if (s->mb_layout == 1)
722 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
723 else
724 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
725 AV_WN32A( s->intra4x4_pred_mode_left, modes);
d2840fa4 726 }
3b636f21 727
17343e39 728 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
3b636f21 729 mb->ref_frame = VP56_FRAME_CURRENT;
a8ab0ccc 730 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
3b636f21 731 // inter MB, 16.2
a8ab0ccc
PM
732 if (vp56_rac_get_prob_branchy(c, s->prob->last))
733 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
3b636f21
DC
734 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
735 else
736 mb->ref_frame = VP56_FRAME_PREVIOUS;
c4211046 737 s->ref_count[mb->ref_frame-1]++;
3b636f21
DC
738
739 // motion vectors, 16.3
951455c1 740 decode_mvs(s, mb, mb_x, mb_y, layout);
3b636f21
DC
741 } else {
742 // intra MB, 16.1
743 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
744
158e062c 745 if (mb->mode == MODE_I4x4)
951455c1 746 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
3b636f21 747
17343e39 748 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
3b636f21 749 mb->ref_frame = VP56_FRAME_CURRENT;
b946111f 750 mb->partitioning = VP8_SPLITMVMODE_NONE;
14767f35 751 AV_ZERO32(&mb->bmv[0]);
3b636f21
DC
752 }
753}
754
a7878c9f 755#ifndef decode_block_coeffs_internal
3b636f21 756/**
e394953e
RB
757 * @param c arithmetic bitstream reader context
758 * @param block destination for block coefficients
759 * @param probs probabilities to use when reading trees from the bitstream
3b636f21 760 * @param i initial coeff index, 0 unless a separate DC block is coded
3fa76268 761 * @param qmul array holding the dc/ac dequant factor at position 0/1
3b636f21
DC
762 * @return 0 if no coeffs were decoded
763 * otherwise, the index of the last coeff decoded plus one
764 */
6163d880 765static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
3efbe137 766 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1e739679 767 int i, uint8_t *token_prob, int16_t qmul[2])
3b636f21 768{
6163d880 769 VP56RangeCoder c = *r;
afb54a85 770 goto skip_eob;
fe1b5d97 771 do {
1e739679 772 int coeff;
6163d880
RB
773 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
774 break;
3b636f21 775
fe1b5d97 776skip_eob:
6163d880 777 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
c22b4468 778 if (++i == 16)
6163d880 779 break; // invalid input; blocks should end with EOB
370b622a 780 token_prob = probs[i][0];
c22b4468 781 goto skip_eob;
fe1b5d97
DC
782 }
783
6163d880 784 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
fe1b5d97 785 coeff = 1;
370b622a 786 token_prob = probs[i+1][1];
fe1b5d97 787 } else {
6163d880
RB
788 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
789 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
fe1b5d97 790 if (coeff)
6163d880 791 coeff += vp56_rac_get_prob(&c, token_prob[5]);
fe1b5d97
DC
792 coeff += 2;
793 } else {
794 // DCT_CAT*
6163d880
RB
795 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
796 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
797 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
fe1b5d97
DC
798 } else { // DCT_CAT2
799 coeff = 7;
6163d880
RB
800 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
801 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
fe1b5d97
DC
802 }
803 } else { // DCT_CAT3 and up
6163d880
RB
804 int a = vp56_rac_get_prob(&c, token_prob[8]);
805 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
fe1b5d97
DC
806 int cat = (a<<1) + b;
807 coeff = 3 + (8<<cat);
6163d880 808 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
fe1b5d97
DC
809 }
810 }
370b622a 811 token_prob = probs[i+1][2];
fe1b5d97 812 }
6163d880 813 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
afb54a85 814 } while (++i < 16);
fe1b5d97 815
6163d880 816 *r = c;
afb54a85 817 return i;
3b636f21 818}
a7878c9f 819#endif
3b636f21 820
3c432e11
DB
821/**
822 * @param c arithmetic bitstream reader context
823 * @param block destination for block coefficients
824 * @param probs probabilities to use when reading trees from the bitstream
825 * @param i initial coeff index, 0 unless a separate DC block is coded
826 * @param zero_nhood the initial prediction context for number of surrounding
827 * all-zero blocks (only left/top, so 0-2)
828 * @param qmul array holding the dc/ac dequant factor at position 0/1
829 * @return 0 if no coeffs were decoded
830 * otherwise, the index of the last coeff decoded plus one
831 */
414ac27d 832static av_always_inline
1e739679 833int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
81a13131 834 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1e739679
JGG
835 int i, int zero_nhood, int16_t qmul[2])
836{
837 uint8_t *token_prob = probs[i][zero_nhood];
838 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
839 return 0;
840 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
841}
842
843static av_always_inline
951455c1 844void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
414ac27d 845 uint8_t t_nnz[9], uint8_t l_nnz[9])
3b636f21 846{
3b636f21
DC
847 int i, x, y, luma_start = 0, luma_ctx = 3;
848 int nnz_pred, nnz, nnz_total = 0;
17343e39 849 int segment = mb->segment;
f311208c 850 int block_dc = 0;
3b636f21 851
3b636f21 852 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
3b636f21
DC
853 nnz_pred = t_nnz[8] + l_nnz[8];
854
855 // decode DC values and do hadamard
951455c1 856 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
3b636f21
DC
857 s->qmat[segment].luma_dc_qmul);
858 l_nnz[8] = t_nnz[8] = !!nnz;
f311208c
JGG
859 if (nnz) {
860 nnz_total += nnz;
861 block_dc = 1;
862 if (nnz == 1)
951455c1 863 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
f311208c 864 else
951455c1 865 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
f311208c 866 }
3b636f21
DC
867 luma_start = 1;
868 luma_ctx = 0;
869 }
870
871 // luma blocks
872 for (y = 0; y < 4; y++)
873 for (x = 0; x < 4; x++) {
ffbf0794 874 nnz_pred = l_nnz[y] + t_nnz[x];
951455c1 875 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
ffbf0794 876 nnz_pred, s->qmat[segment].luma_qmul);
f311208c 877 // nnz+block_dc may be one more than the actual last index, but we don't care
951455c1 878 td->non_zero_count_cache[y][x] = nnz + block_dc;
3b636f21
DC
879 t_nnz[x] = l_nnz[y] = !!nnz;
880 nnz_total += nnz;
881 }
882
883 // chroma blocks
884 // TODO: what to do about dimensions? 2nd dim for luma is x,
885 // but for chroma it's (y<<1)|x
886 for (i = 4; i < 6; i++)
887 for (y = 0; y < 2; y++)
888 for (x = 0; x < 2; x++) {
889 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
951455c1 890 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
3b636f21 891 nnz_pred, s->qmat[segment].chroma_qmul);
951455c1 892 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
3b636f21
DC
893 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
894 nnz_total += nnz;
895 }
896
897 // if there were no coded coeffs despite the macroblock not being marked skip,
898 // we MUST not do the inner loop filter and should not do IDCT
899 // Since skip isn't used for bitstream prediction, just manually set it.
900 if (!nnz_total)
901 mb->skip = 1;
902}
903
9ac831c2
DC
904static av_always_inline
905void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
906 int linesize, int uvlinesize, int simple)
907{
908 AV_COPY128(top_border, src_y + 15*linesize);
909 if (!simple) {
910 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
911 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
912 }
913}
914
915static av_always_inline
916void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
917 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
918 int simple, int xchg)
919{
920 uint8_t *top_border_m1 = top_border-32; // for TL prediction
921 src_y -= linesize;
922 src_cb -= uvlinesize;
923 src_cr -= uvlinesize;
924
096971e8
MR
925#define XCHG(a,b,xchg) do { \
926 if (xchg) AV_SWAP64(b,a); \
927 else AV_COPY64(b,a); \
928 } while (0)
9ac831c2
DC
929
930 XCHG(top_border_m1+8, src_y-8, xchg);
931 XCHG(top_border, src_y, xchg);
932 XCHG(top_border+8, src_y+8, 1);
070ce7ef 933 if (mb_x < mb_width-1)
9ac831c2 934 XCHG(top_border+32, src_y+16, 1);
070ce7ef 935
9ac831c2
DC
936 // only copy chroma for normal loop filter
937 // or to initialize the top row to 127
938 if (!simple || !mb_y) {
939 XCHG(top_border_m1+16, src_cb-8, xchg);
940 XCHG(top_border_m1+24, src_cr-8, xchg);
941 XCHG(top_border+16, src_cb, 1);
942 XCHG(top_border+24, src_cr, 1);
943 }
944}
945
414ac27d 946static av_always_inline
ee555de7
RB
947int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
948{
949 if (!mb_x) {
950 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
951 } else {
952 return mb_y ? mode : LEFT_DC_PRED8x8;
953 }
954}
955
956static av_always_inline
957int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
958{
959 if (!mb_x) {
960 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
961 } else {
962 return mb_y ? mode : HOR_PRED8x8;
963 }
964}
965
966static av_always_inline
967int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
3b636f21
DC
968{
969 if (mode == DC_PRED8x8) {
ee555de7
RB
970 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
971 } else {
972 return mode;
973 }
974}
975
976static av_always_inline
977int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
978{
979 switch (mode) {
980 case DC_PRED8x8:
981 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
982 case VERT_PRED8x8:
983 return !mb_y ? DC_127_PRED8x8 : mode;
984 case HOR_PRED8x8:
985 return !mb_x ? DC_129_PRED8x8 : mode;
986 case PLANE_PRED8x8 /*TM*/:
987 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
988 }
989 return mode;
990}
991
992static av_always_inline
993int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
994{
995 if (!mb_x) {
996 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
997 } else {
998 return mb_y ? mode : HOR_VP8_PRED;
999 }
1000}
1001
1002static av_always_inline
1003int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1004{
1005 switch (mode) {
1006 case VERT_PRED:
1007 if (!mb_x && mb_y) {
1008 *copy_buf = 1;
1009 return mode;
1010 }
1011 /* fall-through */
1012 case DIAG_DOWN_LEFT_PRED:
1013 case VERT_LEFT_PRED:
1014 return !mb_y ? DC_127_PRED : mode;
1015 case HOR_PRED:
1016 if (!mb_y) {
1017 *copy_buf = 1;
1018 return mode;
a71abb71 1019 }
ee555de7
RB
1020 /* fall-through */
1021 case HOR_UP_PRED:
1022 return !mb_x ? DC_129_PRED : mode;
1023 case TM_VP8_PRED:
1024 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1025 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1026 case DIAG_DOWN_RIGHT_PRED:
1027 case VERT_RIGHT_PRED:
1028 case HOR_DOWN_PRED:
1029 if (!mb_y || !mb_x)
1030 *copy_buf = 1;
1031 return mode;
3b636f21
DC
1032 }
1033 return mode;
1034}
1035
414ac27d 1036static av_always_inline
951455c1
DK
1037void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1038 VP8Macroblock *mb, int mb_x, int mb_y)
3b636f21 1039{
ee555de7 1040 AVCodecContext *avctx = s->avctx;
bb591566
MR
1041 int x, y, mode, nnz;
1042 uint32_t tr;
3b636f21 1043
9ac831c2
DC
1044 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1045 // otherwise, skip it if we aren't going to deblock
951455c1 1046 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
9ac831c2
DC
1047 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1048 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1049 s->filter.simple, 1);
1050
3b636f21 1051 if (mb->mode < MODE_I4x4) {
ee555de7
RB
1052 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1053 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1054 } else {
1055 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1056 }
3b636f21
DC
1057 s->hpc.pred16x16[mode](dst[0], s->linesize);
1058 } else {
1059 uint8_t *ptr = dst[0];
17343e39 1060 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
ee555de7 1061 uint8_t tr_top[4] = { 127, 127, 127, 127 };
3b636f21
DC
1062
1063 // all blocks on the right edge of the macroblock use bottom edge
1064 // the top macroblock for their topright edge
1065 uint8_t *tr_right = ptr - s->linesize + 16;
1066
1067 // if we're on the right edge of the frame, said edge is extended
1068 // from the top macroblock
7148da48
RB
1069 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1070 mb_x == s->mb_width-1) {
bb591566 1071 tr = tr_right[-1]*0x01010101u;
3b636f21
DC
1072 tr_right = (uint8_t *)&tr;
1073 }
1074
b74f70d6 1075 if (mb->skip)
951455c1 1076 AV_ZERO128(td->non_zero_count_cache);
b74f70d6 1077
3b636f21
DC
1078 for (y = 0; y < 4; y++) {
1079 uint8_t *topright = ptr + 4 - s->linesize;
1080 for (x = 0; x < 4; x++) {
ee555de7
RB
1081 int copy = 0, linesize = s->linesize;
1082 uint8_t *dst = ptr+4*x;
1083 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1084
1085 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1086 topright = tr_top;
1087 } else if (x == 3)
3b636f21
DC
1088 topright = tr_right;
1089
ee555de7
RB
1090 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1091 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1092 if (copy) {
1093 dst = copy_dst + 12;
1094 linesize = 8;
1095 if (!(mb_y + y)) {
1096 copy_dst[3] = 127U;
9d4bdcb7 1097 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
ee555de7 1098 } else {
9d4bdcb7 1099 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
ee555de7
RB
1100 if (!(mb_x + x)) {
1101 copy_dst[3] = 129U;
1102 } else {
1103 copy_dst[3] = ptr[4*x-s->linesize-1];
1104 }
1105 }
1106 if (!(mb_x + x)) {
1107 copy_dst[11] =
1108 copy_dst[19] =
1109 copy_dst[27] =
1110 copy_dst[35] = 129U;
1111 } else {
1112 copy_dst[11] = ptr[4*x -1];
1113 copy_dst[19] = ptr[4*x+s->linesize -1];
1114 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1115 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1116 }
1117 }
1118 } else {
1119 mode = intra4x4[x];
1120 }
1121 s->hpc.pred4x4[mode](dst, topright, linesize);
1122 if (copy) {
9d4bdcb7
RB
1123 AV_COPY32(ptr+4*x , copy_dst+12);
1124 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1125 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1126 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
ee555de7 1127 }
3b636f21 1128
951455c1 1129 nnz = td->non_zero_count_cache[y][x];
3b636f21
DC
1130 if (nnz) {
1131 if (nnz == 1)
951455c1 1132 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
3b636f21 1133 else
951455c1 1134 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
3b636f21
DC
1135 }
1136 topright += 4;
1137 }
1138
1139 ptr += 4*s->linesize;
d2840fa4 1140 intra4x4 += 4;
3b636f21
DC
1141 }
1142 }
1143
ee555de7 1144 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
17343e39 1145 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
ee555de7 1146 } else {
17343e39 1147 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
ee555de7 1148 }
3b636f21
DC
1149 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1150 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
9ac831c2 1151
951455c1 1152 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
9ac831c2
DC
1153 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1154 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1155 s->filter.simple, 0);
3b636f21
DC
1156}
1157
64233e70
JGG
1158static const uint8_t subpel_idx[3][8] = {
1159 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1160 // also function pointer index
1161 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1162 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1163};
1164
3b636f21 1165/**
3c432e11 1166 * luma MC function
3b636f21
DC
1167 *
1168 * @param s VP8 decoding context
3b636f21 1169 * @param dst target buffer for block data at block position
24c9baba 1170 * @param ref reference picture buffer at origin (0, 0)
3b636f21
DC
1171 * @param mv motion vector (relative to block position) to get pixel data from
1172 * @param x_off horizontal position of block from origin (0, 0)
1173 * @param y_off vertical position of block from origin (0, 0)
1174 * @param block_w width of block (16, 8 or 4)
1175 * @param block_h height of block (always same as block_w)
1176 * @param width width of src/dst plane data
1177 * @param height height of src/dst plane data
1178 * @param linesize size of a single line of plane data, including padding
e394953e 1179 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
3b636f21 1180 */
414ac27d 1181static av_always_inline
951455c1
DK
1182void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1183 AVFrame *ref, const VP56mv *mv,
64233e70
JGG
1184 int x_off, int y_off, int block_w, int block_h,
1185 int width, int height, int linesize,
1186 vp8_mc_func mc_func[3][3])
3b636f21 1187{
4773d904
RB
1188 uint8_t *src = ref->data[0];
1189
c0498b30 1190 if (AV_RN32A(mv)) {
64233e70
JGG
1191
1192 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1193 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1194
1195 x_off += mv->x >> 2;
1196 y_off += mv->y >> 2;
c0498b30
JGG
1197
1198 // edge emulation
4773d904 1199 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
c0498b30 1200 src += y_off * linesize + x_off;
64233e70
JGG
1201 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1202 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
951455c1 1203 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
64233e70
JGG
1204 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1205 x_off - mx_idx, y_off - my_idx, width, height);
951455c1 1206 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
c0498b30
JGG
1207 }
1208 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
4773d904
RB
1209 } else {
1210 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
c0498b30 1211 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
4773d904 1212 }
3b636f21
DC
1213}
1214
3c432e11
DB
1215/**
1216 * chroma MC function
1217 *
1218 * @param s VP8 decoding context
1219 * @param dst1 target buffer for block data at block position (U plane)
1220 * @param dst2 target buffer for block data at block position (V plane)
1221 * @param ref reference picture buffer at origin (0, 0)
1222 * @param mv motion vector (relative to block position) to get pixel data from
1223 * @param x_off horizontal position of block from origin (0, 0)
1224 * @param y_off vertical position of block from origin (0, 0)
1225 * @param block_w width of block (16, 8 or 4)
1226 * @param block_h height of block (always same as block_w)
1227 * @param width width of src/dst plane data
1228 * @param height height of src/dst plane data
1229 * @param linesize size of a single line of plane data, including padding
1230 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1231 */
414ac27d 1232static av_always_inline
951455c1
DK
1233void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1234 AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
64233e70
JGG
1235 int block_w, int block_h, int width, int height, int linesize,
1236 vp8_mc_func mc_func[3][3])
1237{
4773d904
RB
1238 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1239
64233e70
JGG
1240 if (AV_RN32A(mv)) {
1241 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1242 int my = mv->y&7, my_idx = subpel_idx[0][my];
1243
1244 x_off += mv->x >> 3;
1245 y_off += mv->y >> 3;
1246
1247 // edge emulation
1248 src1 += y_off * linesize + x_off;
1249 src2 += y_off * linesize + x_off;
4773d904 1250 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
64233e70
JGG
1251 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1252 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
951455c1 1253 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
64233e70
JGG
1254 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1255 x_off - mx_idx, y_off - my_idx, width, height);
951455c1 1256 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
64233e70
JGG
1257 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1258
951455c1 1259 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
64233e70
JGG
1260 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1261 x_off - mx_idx, y_off - my_idx, width, height);
951455c1 1262 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
64233e70
JGG
1263 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1264 } else {
1265 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1266 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1267 }
1268 } else {
4773d904 1269 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
64233e70
JGG
1270 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1271 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1272 }
1273}
1274
1275static av_always_inline
951455c1 1276void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
414ac27d
JGG
1277 AVFrame *ref_frame, int x_off, int y_off,
1278 int bx_off, int by_off,
1279 int block_w, int block_h,
1280 int width, int height, VP56mv *mv)
7c4dcf81
RB
1281{
1282 VP56mv uvmv = *mv;
1283
1284 /* Y */
951455c1 1285 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
4773d904 1286 ref_frame, mv, x_off + bx_off, y_off + by_off,
64233e70
JGG
1287 block_w, block_h, width, height, s->linesize,
1288 s->put_pixels_tab[block_w == 8]);
7c4dcf81
RB
1289
1290 /* U/V */
1291 if (s->profile == 3) {
1292 uvmv.x &= ~7;
1293 uvmv.y &= ~7;
1294 }
1295 x_off >>= 1; y_off >>= 1;
1296 bx_off >>= 1; by_off >>= 1;
1297 width >>= 1; height >>= 1;
1298 block_w >>= 1; block_h >>= 1;
951455c1 1299 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
4773d904
RB
1300 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1301 &uvmv, x_off + bx_off, y_off + by_off,
64233e70
JGG
1302 block_w, block_h, width, height, s->uvlinesize,
1303 s->put_pixels_tab[1 + (block_w == 4)]);
7c4dcf81
RB
1304}
1305
d864dee8
JGG
1306/* Fetch pixels for estimated mv 4 macroblocks ahead.
1307 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
414ac27d 1308static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
d864dee8 1309{
ef38842f
JGG
1310 /* Don't prefetch refs that haven't been used very often this frame. */
1311 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
c4211046 1312 int x_off = mb_x << 4, y_off = mb_y << 4;
7e13022a
JGG
1313 int mx = (mb->mv.x>>2) + x_off + 8;
1314 int my = (mb->mv.y>>2) + y_off;
c4211046
JGG
1315 uint8_t **src= s->framep[ref]->data;
1316 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
4773d904
RB
1317 /* For threading, a ff_thread_await_progress here might be useful, but
1318 * it actually slows down the decoder. Since a bad prefetch doesn't
1319 * generate bad decoder output, we don't run it here. */
c4211046
JGG
1320 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1321 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1322 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1323 }
d864dee8
JGG
1324}
1325
3b636f21
DC
1326/**
1327 * Apply motion vectors to prediction buffer, chapter 18.
1328 */
414ac27d 1329static av_always_inline
951455c1
DK
1330void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1331 VP8Macroblock *mb, int mb_x, int mb_y)
3b636f21
DC
1332{
1333 int x_off = mb_x << 4, y_off = mb_y << 4;
1334 int width = 16*s->mb_width, height = 16*s->mb_height;
d292c345
JGG
1335 AVFrame *ref = s->framep[mb->ref_frame];
1336 VP56mv *bmv = mb->bmv;
3b636f21 1337
73be29b0
JGG
1338 switch (mb->partitioning) {
1339 case VP8_SPLITMVMODE_NONE:
951455c1 1340 vp8_mc_part(s, td, dst, ref, x_off, y_off,
7c4dcf81 1341 0, 0, 16, 16, width, height, &mb->mv);
73be29b0 1342 break;
7c4dcf81 1343 case VP8_SPLITMVMODE_4x4: {
3b636f21 1344 int x, y;
7c4dcf81 1345 VP56mv uvmv;
3b636f21
DC
1346
1347 /* Y */
1348 for (y = 0; y < 4; y++) {
1349 for (x = 0; x < 4; x++) {
951455c1 1350 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
4773d904 1351 ref, &bmv[4*y + x],
64233e70
JGG
1352 4*x + x_off, 4*y + y_off, 4, 4,
1353 width, height, s->linesize,
1354 s->put_pixels_tab[2]);
3b636f21
DC
1355 }
1356 }
1357
1358 /* U/V */
1359 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1360 for (y = 0; y < 2; y++) {
1361 for (x = 0; x < 2; x++) {
1362 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1363 mb->bmv[ 2*y * 4 + 2*x+1].x +
1364 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1365 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1366 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1367 mb->bmv[ 2*y * 4 + 2*x+1].y +
1368 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1369 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
8f910a56
SG
1370 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1371 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
3b636f21
DC
1372 if (s->profile == 3) {
1373 uvmv.x &= ~7;
1374 uvmv.y &= ~7;
1375 }
951455c1 1376 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
4773d904 1377 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
64233e70
JGG
1378 4*x + x_off, 4*y + y_off, 4, 4,
1379 width, height, s->uvlinesize,
1380 s->put_pixels_tab[2]);
3b636f21
DC
1381 }
1382 }
7c4dcf81
RB
1383 break;
1384 }
1385 case VP8_SPLITMVMODE_16x8:
951455c1 1386 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1387 0, 0, 16, 8, width, height, &bmv[0]);
951455c1 1388 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1389 0, 8, 16, 8, width, height, &bmv[1]);
7c4dcf81
RB
1390 break;
1391 case VP8_SPLITMVMODE_8x16:
951455c1 1392 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1393 0, 0, 8, 16, width, height, &bmv[0]);
951455c1 1394 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1395 8, 0, 8, 16, width, height, &bmv[1]);
7c4dcf81
RB
1396 break;
1397 case VP8_SPLITMVMODE_8x8:
951455c1 1398 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1399 0, 0, 8, 8, width, height, &bmv[0]);
951455c1 1400 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1401 8, 0, 8, 8, width, height, &bmv[1]);
951455c1 1402 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1403 0, 8, 8, 8, width, height, &bmv[2]);
951455c1 1404 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1405 8, 8, 8, 8, width, height, &bmv[3]);
7c4dcf81 1406 break;
3b636f21
DC
1407 }
1408}
1409
951455c1
DK
1410static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1411 uint8_t *dst[3], VP8Macroblock *mb)
3b636f21 1412{
3df56f41 1413 int x, y, ch;
3b636f21 1414
8a467b2d
JGG
1415 if (mb->mode != MODE_I4x4) {
1416 uint8_t *y_dst = dst[0];
3b636f21 1417 for (y = 0; y < 4; y++) {
951455c1 1418 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
3df56f41
JGG
1419 if (nnz4) {
1420 if (nnz4&~0x01010101) {
8a467b2d 1421 for (x = 0; x < 4; x++) {
62457f90 1422 if ((uint8_t)nnz4 == 1)
951455c1 1423 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
62457f90 1424 else if((uint8_t)nnz4 > 1)
951455c1 1425 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
62457f90
JGG
1426 nnz4 >>= 8;
1427 if (!nnz4)
1428 break;
8a467b2d
JGG
1429 }
1430 } else {
951455c1 1431 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
3b636f21
DC
1432 }
1433 }
1434 y_dst += 4*s->linesize;
1435 }
8a467b2d 1436 }
3b636f21 1437
8a467b2d 1438 for (ch = 0; ch < 2; ch++) {
951455c1 1439 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
3ae079a3 1440 if (nnz4) {
8a467b2d 1441 uint8_t *ch_dst = dst[1+ch];
3ae079a3
JGG
1442 if (nnz4&~0x01010101) {
1443 for (y = 0; y < 2; y++) {
1444 for (x = 0; x < 2; x++) {
62457f90 1445 if ((uint8_t)nnz4 == 1)
951455c1 1446 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
62457f90 1447 else if((uint8_t)nnz4 > 1)
951455c1 1448 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
62457f90
JGG
1449 nnz4 >>= 8;
1450 if (!nnz4)
628b48db 1451 goto chroma_idct_end;
8a467b2d 1452 }
3ae079a3 1453 ch_dst += 4*s->uvlinesize;
8a467b2d 1454 }
3ae079a3 1455 } else {
951455c1 1456 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
3b636f21
DC
1457 }
1458 }
628b48db 1459chroma_idct_end: ;
3b636f21
DC
1460 }
1461}
1462
414ac27d 1463static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
3b636f21
DC
1464{
1465 int interior_limit, filter_level;
1466
1467 if (s->segmentation.enabled) {
17343e39 1468 filter_level = s->segmentation.filter_level[mb->segment];
3b636f21
DC
1469 if (!s->segmentation.absolute_vals)
1470 filter_level += s->filter.level;
1471 } else
1472 filter_level = s->filter.level;
1473
1474 if (s->lf_delta.enabled) {
1475 filter_level += s->lf_delta.ref[mb->ref_frame];
dd18c9a0 1476 filter_level += s->lf_delta.mode[mb->mode];
3b636f21 1477 }
a1b227bb 1478
1550f45a 1479 filter_level = av_clip_uintp2(filter_level, 6);
3b636f21
DC
1480
1481 interior_limit = filter_level;
1482 if (s->filter.sharpness) {
8a2c99b4 1483 interior_limit >>= (s->filter.sharpness + 3) >> 2;
3b636f21
DC
1484 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1485 }
1486 interior_limit = FFMAX(interior_limit, 1);
1487
968570d6
JGG
1488 f->filter_level = filter_level;
1489 f->inner_limit = interior_limit;
c55e0d34 1490 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
3b636f21
DC
1491}
1492
414ac27d 1493static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1494{
968570d6
JGG
1495 int mbedge_lim, bedge_lim, hev_thresh;
1496 int filter_level = f->filter_level;
1497 int inner_limit = f->inner_limit;
c55e0d34 1498 int inner_filter = f->inner_filter;
145d3186
JGG
1499 int linesize = s->linesize;
1500 int uvlinesize = s->uvlinesize;
79dec154
JGG
1501 static const uint8_t hev_thresh_lut[2][64] = {
1502 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1503 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1504 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1505 3, 3, 3, 3 },
1506 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1507 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1508 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1509 2, 2, 2, 2 }
1510 };
3b636f21 1511
3b636f21
DC
1512 if (!filter_level)
1513 return;
1514
79dec154
JGG
1515 bedge_lim = 2*filter_level + inner_limit;
1516 mbedge_lim = bedge_lim + 4;
968570d6 1517
79dec154 1518 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
5245c04d 1519
3b636f21 1520 if (mb_x) {
145d3186 1521 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
3facfc99 1522 mbedge_lim, inner_limit, hev_thresh);
145d3186 1523 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1524 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1525 }
1526
c55e0d34 1527 if (inner_filter) {
145d3186
JGG
1528 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1529 inner_limit, hev_thresh);
1530 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1531 inner_limit, hev_thresh);
1532 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1533 inner_limit, hev_thresh);
1534 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1535 uvlinesize, bedge_lim,
1536 inner_limit, hev_thresh);
3b636f21
DC
1537 }
1538
1539 if (mb_y) {
145d3186 1540 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
3facfc99 1541 mbedge_lim, inner_limit, hev_thresh);
145d3186 1542 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1543 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1544 }
1545
c55e0d34 1546 if (inner_filter) {
145d3186
JGG
1547 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1548 linesize, bedge_lim,
1549 inner_limit, hev_thresh);
1550 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1551 linesize, bedge_lim,
1552 inner_limit, hev_thresh);
1553 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1554 linesize, bedge_lim,
1555 inner_limit, hev_thresh);
1556 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1557 dst[2] + 4 * uvlinesize,
1558 uvlinesize, bedge_lim,
3facfc99 1559 inner_limit, hev_thresh);
3b636f21
DC
1560 }
1561}
1562
414ac27d 1563static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1564{
968570d6
JGG
1565 int mbedge_lim, bedge_lim;
1566 int filter_level = f->filter_level;
1567 int inner_limit = f->inner_limit;
c55e0d34 1568 int inner_filter = f->inner_filter;
145d3186 1569 int linesize = s->linesize;
3b636f21 1570
3b636f21
DC
1571 if (!filter_level)
1572 return;
1573
79dec154
JGG
1574 bedge_lim = 2*filter_level + inner_limit;
1575 mbedge_lim = bedge_lim + 4;
3b636f21
DC
1576
1577 if (mb_x)
145d3186 1578 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1579 if (inner_filter) {
145d3186
JGG
1580 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1581 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1582 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
3b636f21
DC
1583 }
1584
1585 if (mb_y)
145d3186 1586 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1587 if (inner_filter) {
145d3186
JGG
1588 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1589 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1590 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
3b636f21
DC
1591 }
1592}
1593
ce42a048
RB
1594static void release_queued_segmaps(VP8Context *s, int is_close)
1595{
1596 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1597 while (s->num_maps_to_be_freed > leave_behind)
1598 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1599 s->maps_are_invalid = 0;
1600}
1601
337ade52 1602#define MARGIN (16 << 2)
951455c1
DK
1603static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
1604 AVFrame *prev_frame)
337ade52
DK
1605{
1606 VP8Context *s = avctx->priv_data;
951455c1
DK
1607 int mb_x, mb_y;
1608
1609 s->mv_min.y = -MARGIN;
1610 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1611 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1612 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1613 int mb_xy = mb_y*s->mb_width;
1614
1615 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1616
1617 s->mv_min.x = -MARGIN;
1618 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1619 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1620 if (mb_y == 0)
1621 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1622 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1623 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
1624 s->mv_min.x -= 64;
1625 s->mv_max.x -= 64;
1626 }
1627 s->mv_min.y -= 64;
1628 s->mv_max.y -= 64;
1629 }
1630}
1631
1632#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1633 do {\
1634 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1635 if (otd->thread_mb_pos < tmp) {\
1636 pthread_mutex_lock(&otd->lock);\
1637 td->wait_mb_pos = tmp;\
1638 do {\
1639 if (otd->thread_mb_pos >= tmp)\
1640 break;\
1641 pthread_cond_wait(&otd->cond, &otd->lock);\
1642 } while (1);\
1643 td->wait_mb_pos = INT_MAX;\
1644 pthread_mutex_unlock(&otd->lock);\
1645 }\
1646 } while(0);
1647
1648#define update_pos(td, mb_y, mb_x)\
1649 do {\
1650 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1651 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1652 int is_null = (next_td == NULL) || (prev_td == NULL);\
1653 int pos_check = (is_null) ? 1 :\
1654 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1655 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1656 td->thread_mb_pos = pos;\
1657 if (sliced_threading && pos_check) {\
1658 pthread_mutex_lock(&td->lock);\
1659 pthread_cond_broadcast(&td->cond);\
1660 pthread_mutex_unlock(&td->lock);\
1661 }\
1662 } while(0);
1663
1664static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1665 int jobnr, int threadnr)
1666{
1667 VP8Context *s = avctx->priv_data;
1668 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1669 int mb_y = td->thread_mb_pos>>16;
337ade52 1670 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
951455c1
DK
1671 int num_jobs = s->num_jobs;
1672 AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
1673 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1674 VP8Macroblock *mb;
337ade52
DK
1675 uint8_t *dst[3] = {
1676 curframe->data[0] + 16*mb_y*s->linesize,
1677 curframe->data[1] + 8*mb_y*s->uvlinesize,
1678 curframe->data[2] + 8*mb_y*s->uvlinesize
1679 };
951455c1
DK
1680 if (mb_y == 0) prev_td = td;
1681 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1682 if (mb_y == s->mb_height-1) next_td = td;
1683 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1684 if (s->mb_layout == 1)
1685 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1686 else {
1687 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1688 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1689 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1690 }
1691
1692 memset(td->left_nnz, 0, sizeof(td->left_nnz));
337ade52
DK
1693 // left edge of 129 for intra prediction
1694 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1695 for (i = 0; i < 3; i++)
1696 for (y = 0; y < 16>>!!i; y++)
1697 dst[i][y*curframe->linesize[i]-1] = 129;
951455c1 1698 if (mb_y == 1) {
337ade52 1699 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
951455c1 1700 }
337ade52
DK
1701 }
1702
1703 s->mv_min.x = -MARGIN;
1704 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1705
1706 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
951455c1
DK
1707 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1708 if (prev_td != td) {
1709 if (threadnr != 0) {
1710 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1711 } else {
1712 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1713 }
1714 }
1715
337ade52
DK
1716 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1717 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1718
951455c1
DK
1719 if (!s->mb_layout)
1720 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1721 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
337ade52
DK
1722
1723 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1724
1725 if (!mb->skip)
951455c1 1726 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
337ade52
DK
1727
1728 if (mb->mode <= MODE_I4x4)
951455c1 1729 intra_predict(s, td, dst, mb, mb_x, mb_y);
337ade52 1730 else
951455c1 1731 inter_predict(s, td, dst, mb, mb_x, mb_y);
337ade52
DK
1732
1733 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1734
1735 if (!mb->skip) {
951455c1 1736 idct_mb(s, td, dst, mb);
337ade52 1737 } else {
951455c1 1738 AV_ZERO64(td->left_nnz);
337ade52
DK
1739 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1740
1741 // Reset DC block predictors if they would exist if the mb had coefficients
1742 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
951455c1 1743 td->left_nnz[8] = 0;
337ade52
DK
1744 s->top_nnz[mb_x][8] = 0;
1745 }
1746 }
1747
1748 if (s->deblock_filter)
951455c1
DK
1749 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1750
1751 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1752 if (s->filter.simple)
1753 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1754 else
1755 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1756 }
337ade52
DK
1757
1758 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1759
1760 dst[0] += 16;
1761 dst[1] += 8;
1762 dst[2] += 8;
1763 s->mv_min.x -= 64;
1764 s->mv_max.x -= 64;
951455c1
DK
1765
1766 if (mb_x == s->mb_width+1) {
1767 update_pos(td, mb_y, s->mb_width+3);
1768 } else {
1769 update_pos(td, mb_y, mb_x);
1770 }
337ade52 1771 }
951455c1
DK
1772}
1773
1774static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1775 int jobnr, int threadnr)
1776{
1777 VP8Context *s = avctx->priv_data;
1778 VP8ThreadData *td = &s->thread_data[threadnr];
1779 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1780 AVFrame *curframe = s->curframe;
1781 VP8Macroblock *mb;
1782 VP8ThreadData *prev_td, *next_td;
1783 uint8_t *dst[3] = {
1784 curframe->data[0] + 16*mb_y*s->linesize,
1785 curframe->data[1] + 8*mb_y*s->uvlinesize,
1786 curframe->data[2] + 8*mb_y*s->uvlinesize
1787 };
1788
1789 if (s->mb_layout == 1)
1790 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1791 else
1792 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1793
1794 if (mb_y == 0) prev_td = td;
1795 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1796 if (mb_y == s->mb_height-1) next_td = td;
1797 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1798
1799 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1800 VP8FilterStrength *f = &td->filter_strength[mb_x];
1801 if (prev_td != td) {
1802 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1803 }
1804 if (next_td != td)
1805 if (next_td != &s->thread_data[0]) {
1806 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1807 }
1808
1809 if (num_jobs == 1) {
1810 if (s->filter.simple)
1811 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1812 else
1813 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1814 }
1815
337ade52 1816 if (s->filter.simple)
951455c1 1817 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
337ade52 1818 else
951455c1
DK
1819 filter_mb(s, dst, f, mb_x, mb_y);
1820 dst[0] += 16;
1821 dst[1] += 8;
1822 dst[2] += 8;
1823
1824 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1825 }
1826}
1827
1828static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1829 int jobnr, int threadnr)
1830{
1831 VP8Context *s = avctx->priv_data;
1832 VP8ThreadData *td = &s->thread_data[jobnr];
1833 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1834 AVFrame *curframe = s->curframe;
1835 int mb_y, num_jobs = s->num_jobs;
1836 td->thread_nr = threadnr;
1837 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1838 if (mb_y >= s->mb_height) break;
1839 td->thread_mb_pos = mb_y<<16;
1840 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1841 if (s->deblock_filter)
1842 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1843 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1844
1845 s->mv_min.y -= 64;
1846 s->mv_max.y -= 64;
1847
1848 if (avctx->active_thread_type == FF_THREAD_FRAME)
1849 ff_thread_report_progress(curframe, mb_y, 0);
337ade52 1850 }
951455c1
DK
1851
1852 return 0;
337ade52
DK
1853}
1854
3b636f21
DC
1855static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1856 AVPacket *avpkt)
1857{
1858 VP8Context *s = avctx->priv_data;
951455c1 1859 int ret, i, referenced, num_jobs;
3b636f21 1860 enum AVDiscard skip_thresh;
e02dec25 1861 AVFrame *av_uninit(curframe), *prev_frame;
3b636f21 1862
ce42a048
RB
1863 release_queued_segmaps(s, 0);
1864
3b636f21 1865 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
fb90785e 1866 goto err;
3b636f21 1867
e02dec25
AC
1868 prev_frame = s->framep[VP56_FRAME_CURRENT];
1869
3b636f21
DC
1870 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1871 || s->update_altref == VP56_FRAME_CURRENT;
1872
1873 skip_thresh = !referenced ? AVDISCARD_NONREF :
1874 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1875
1876 if (avctx->skip_frame >= skip_thresh) {
1877 s->invisible = 1;
fb90785e 1878 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
3b636f21
DC
1879 goto skip_decode;
1880 }
9ac831c2 1881 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
3b636f21 1882
4773d904
RB
1883 // release no longer referenced frames
1884 for (i = 0; i < 5; i++)
1885 if (s->frames[i].data[0] &&
1886 &s->frames[i] != prev_frame &&
1887 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1888 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1889 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
bfa0f965 1890 vp8_release_frame(s, &s->frames[i], 1, 0);
4773d904
RB
1891
1892 // find a free buffer
1893 for (i = 0; i < 5; i++)
1894 if (&s->frames[i] != prev_frame &&
1895 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
3b636f21
DC
1896 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1897 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1898 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1899 break;
1900 }
4773d904
RB
1901 if (i == 5) {
1902 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1903 abort();
1904 }
3b636f21 1905 if (curframe->data[0])
bfa0f965 1906 vp8_release_frame(s, curframe, 1, 0);
3b636f21 1907
fb90785e
RB
1908 // Given that arithmetic probabilities are updated every frame, it's quite likely
1909 // that the values we have on a random interframe are complete junk if we didn't
1910 // start decode on a keyframe. So just don't display anything rather than junk.
1911 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1912 !s->framep[VP56_FRAME_GOLDEN] ||
1913 !s->framep[VP56_FRAME_GOLDEN2])) {
1914 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1915 ret = AVERROR_INVALIDDATA;
1916 goto err;
1917 }
1918
3b636f21 1919 curframe->key_frame = s->keyframe;
975a1447 1920 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3b636f21 1921 curframe->reference = referenced ? 3 : 0;
ce42a048 1922 if ((ret = vp8_alloc_frame(s, curframe))) {
3b636f21 1923 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
fb90785e 1924 goto err;
3b636f21
DC
1925 }
1926
4773d904
RB
1927 // check if golden and altref are swapped
1928 if (s->update_altref != VP56_FRAME_NONE) {
1929 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1930 } else {
1931 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1932 }
1933 if (s->update_golden != VP56_FRAME_NONE) {
1934 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1935 } else {
1936 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1937 }
1938 if (s->update_last) {
1939 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1940 } else {
1941 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1942 }
1943 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1944
1945 ff_thread_finish_setup(avctx);
1946
3b636f21
DC
1947 s->linesize = curframe->linesize[0];
1948 s->uvlinesize = curframe->linesize[1];
1949
951455c1
DK
1950 if (!s->thread_data[0].edge_emu_buffer)
1951 for (i = 0; i < MAX_THREADS; i++)
1952 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
3b636f21
DC
1953
1954 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
aa93c52c 1955 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
951455c1
DK
1956 if (!s->mb_layout)
1957 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1958 if (!s->mb_layout && s->keyframe)
1959 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
c55e0d34 1960
3b636f21 1961 // top edge of 127 for intra prediction
ee555de7
RB
1962 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1963 s->top_border[0][15] = s->top_border[0][23] = 127;
1964 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1965 }
c4211046 1966 memset(s->ref_count, 0, sizeof(s->ref_count));
3b636f21 1967
7634771e 1968
951455c1
DK
1969 // Make sure the previous frame has read its segmentation map,
1970 // if we re-use the same map.
1971 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1972 ff_thread_await_progress(prev_frame, 1, 0);
7634771e 1973
951455c1
DK
1974 if (s->mb_layout == 1)
1975 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
4773d904 1976
951455c1
DK
1977 if (avctx->active_thread_type == FF_THREAD_FRAME)
1978 num_jobs = 1;
1979 else
1980 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1981 s->num_jobs = num_jobs;
1982 s->curframe = curframe;
1983 s->prev_frame = prev_frame;
1984 s->mv_min.y = -MARGIN;
1985 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1986 for (i = 0; i < MAX_THREADS; i++) {
1987 s->thread_data[i].thread_mb_pos = 0;
1988 s->thread_data[i].wait_mb_pos = INT_MAX;
1989 }
1990 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
3b636f21 1991
4773d904 1992 ff_thread_report_progress(curframe, INT_MAX, 0);
fb90785e
RB
1993 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1994
3b636f21
DC
1995skip_decode:
1996 // if future frames don't use the updated probabilities,
1997 // reset them to the values we saved
1998 if (!s->update_probabilities)
1999 s->prob[0] = s->prob[1];
2000
3b636f21 2001 if (!s->invisible) {
4773d904 2002 *(AVFrame*)data = *curframe;
3b636f21
DC
2003 *data_size = sizeof(AVFrame);
2004 }
2005
2006 return avpkt->size;
fb90785e
RB
2007err:
2008 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2009 return ret;
3b636f21
DC
2010}
2011
2012static av_cold int vp8_decode_init(AVCodecContext *avctx)
2013{
2014 VP8Context *s = avctx->priv_data;
2015
2016 s->avctx = avctx;
2017 avctx->pix_fmt = PIX_FMT_YUV420P;
2018
9cf0841e 2019 ff_dsputil_init(&s->dsp, avctx);
76741b0e 2020 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
3b636f21
DC
2021 ff_vp8dsp_init(&s->vp8dsp);
2022
3b636f21
DC
2023 return 0;
2024}
2025
2026static av_cold int vp8_decode_free(AVCodecContext *avctx)
2027{
bfa0f965 2028 vp8_decode_flush_impl(avctx, 0, 1, 1);
ce42a048 2029 release_queued_segmaps(avctx->priv_data, 1);
3b636f21
DC
2030 return 0;
2031}
2032
4773d904
RB
2033static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2034{
2035 VP8Context *s = avctx->priv_data;
2036
2037 s->avctx = avctx;
2038
2039 return 0;
2040}
2041
2042#define REBASE(pic) \
2043 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2044
2045static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2046{
2047 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2048
56535793
RB
2049 if (s->macroblocks_base &&
2050 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2051 free_buffers(s);
e02dec25 2052 s->maps_are_invalid = 1;
82a0497c
RB
2053 s->mb_width = s_src->mb_width;
2054 s->mb_height = s_src->mb_height;
56535793
RB
2055 }
2056
4773d904
RB
2057 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2058 s->segmentation = s_src->segmentation;
2059 s->lf_delta = s_src->lf_delta;
2060 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2061
2062 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
2063 s->framep[0] = REBASE(s_src->next_framep[0]);
2064 s->framep[1] = REBASE(s_src->next_framep[1]);
2065 s->framep[2] = REBASE(s_src->next_framep[2]);
2066 s->framep[3] = REBASE(s_src->next_framep[3]);
2067
2068 return 0;
2069}
2070
d36beb3f 2071AVCodec ff_vp8_decoder = {
00c3b67b
MS
2072 .name = "vp8",
2073 .type = AVMEDIA_TYPE_VIDEO,
2074 .id = CODEC_ID_VP8,
2075 .priv_data_size = sizeof(VP8Context),
2076 .init = vp8_decode_init,
2077 .close = vp8_decode_free,
2078 .decode = vp8_decode_frame,
951455c1 2079 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
00c3b67b
MS
2080 .flush = vp8_decode_flush,
2081 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
4773d904
RB
2082 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2083 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
3b636f21 2084};