vp8: Enclose pthread function calls in ifdefs
[libav.git] / libavcodec / vp8.c
CommitLineData
32f3c541 1/*
3b636f21
DC
2 * VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
13a1304b 6 * Copyright (C) 2010 Jason Garrett-Glaser
951455c1 7 * Copyright (C) 2012 Daniel Kang
3b636f21 8 *
2912e87a 9 * This file is part of Libav.
3b636f21 10 *
2912e87a 11 * Libav is free software; you can redistribute it and/or
3b636f21
DC
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
2912e87a 16 * Libav is distributed in the hope that it will be useful,
3b636f21
DC
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
2912e87a 22 * License along with Libav; if not, write to the Free Software
3b636f21
DC
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
737eb597 26#include "libavutil/imgutils.h"
3b636f21 27#include "avcodec.h"
f3a29b75 28#include "internal.h"
bcf4568f 29#include "vp8.h"
3b636f21 30#include "vp8data.h"
3b636f21 31#include "rectangle.h"
4773d904 32#include "thread.h"
3b636f21 33
a7878c9f
MR
34#if ARCH_ARM
35# include "arm/vp8.h"
36#endif
37
56535793
RB
38static void free_buffers(VP8Context *s)
39{
951455c1
DK
40 int i;
41 if (s->thread_data)
42 for (i = 0; i < MAX_THREADS; i++) {
43 av_freep(&s->thread_data[i].filter_strength);
44 av_freep(&s->thread_data[i].edge_emu_buffer);
45 }
46 av_freep(&s->thread_data);
56535793 47 av_freep(&s->macroblocks_base);
56535793
RB
48 av_freep(&s->intra4x4_pred_mode_top);
49 av_freep(&s->top_nnz);
56535793 50 av_freep(&s->top_border);
56535793
RB
51
52 s->macroblocks = NULL;
53}
54
ce42a048
RB
55static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
56{
57 int ret;
58 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
59 return ret;
e02dec25 60 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
ce42a048
RB
61 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
62 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
63 ff_thread_release_buffer(s->avctx, f);
64 return AVERROR(ENOMEM);
65 }
66 return 0;
67}
68
bfa0f965 69static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
ce42a048 70{
bfa0f965
RB
71 if (f->ref_index[0]) {
72 if (prefer_delayed_free) {
73 /* Upon a size change, we want to free the maps but other threads may still
74 * be using them, so queue them. Upon a seek, all threads are inactive so
75 * we want to cache one to prevent re-allocation in the next decoding
76 * iteration, but the rest we can free directly. */
77 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
78 if (s->num_maps_to_be_freed < max_queued_maps) {
79 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
80 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
81 av_free(f->ref_index[0]);
82 } /* else: MEMLEAK (should never happen, but better that than crash) */
ce42a048 83 f->ref_index[0] = NULL;
bfa0f965
RB
84 } else /* vp8_decode_free() */ {
85 av_free(f->ref_index[0]);
ce42a048 86 }
ce42a048
RB
87 }
88 ff_thread_release_buffer(s->avctx, f);
89}
90
bfa0f965
RB
91static void vp8_decode_flush_impl(AVCodecContext *avctx,
92 int prefer_delayed_free, int can_direct_free, int free_mem)
3b636f21
DC
93{
94 VP8Context *s = avctx->priv_data;
95 int i;
96
f3a29b75 97 if (!avctx->internal->is_copy) {
4773d904
RB
98 for (i = 0; i < 5; i++)
99 if (s->frames[i].data[0])
bfa0f965 100 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
4773d904 101 }
3b636f21
DC
102 memset(s->framep, 0, sizeof(s->framep));
103
bfa0f965
RB
104 if (free_mem) {
105 free_buffers(s);
106 s->maps_are_invalid = 1;
107 }
ce42a048
RB
108}
109
110static void vp8_decode_flush(AVCodecContext *avctx)
111{
bfa0f965 112 vp8_decode_flush_impl(avctx, 1, 1, 0);
3b636f21
DC
113}
114
115static int update_dimensions(VP8Context *s, int width, int height)
116{
951455c1
DK
117 AVCodecContext *avctx = s->avctx;
118 int i;
119
4773d904
RB
120 if (width != s->avctx->width ||
121 height != s->avctx->height) {
122 if (av_image_check_size(width, height, 0, s->avctx))
123 return AVERROR_INVALIDDATA;
3b636f21 124
bfa0f965 125 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
3b636f21 126
4773d904
RB
127 avcodec_set_dimensions(s->avctx, width, height);
128 }
3b636f21
DC
129
130 s->mb_width = (s->avctx->coded_width +15) / 16;
131 s->mb_height = (s->avctx->coded_height+15) / 16;
132
951455c1
DK
133 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
134 if (!s->mb_layout) { // Frame threading and one thread
135 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
136 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
137 }
138 else // Sliced threading
139 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
140 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
141 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
142 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
3b636f21 143
951455c1
DK
144 for (i = 0; i < MAX_THREADS; i++) {
145 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
25f056e6 146#if HAVE_THREADS
951455c1
DK
147 pthread_mutex_init(&s->thread_data[i].lock, NULL);
148 pthread_cond_init(&s->thread_data[i].cond, NULL);
25f056e6 149#endif
951455c1
DK
150 }
151
152 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
153 (!s->intra4x4_pred_mode_top && !s->mb_layout))
b6c420ce
DC
154 return AVERROR(ENOMEM);
155
c55e0d34 156 s->macroblocks = s->macroblocks_base + 1;
3b636f21
DC
157
158 return 0;
159}
160
161static void parse_segment_info(VP8Context *s)
162{
163 VP56RangeCoder *c = &s->c;
164 int i;
165
166 s->segmentation.update_map = vp8_rac_get(c);
167
168 if (vp8_rac_get(c)) { // update segment feature data
169 s->segmentation.absolute_vals = vp8_rac_get(c);
170
171 for (i = 0; i < 4; i++)
172 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
173
174 for (i = 0; i < 4; i++)
175 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
176 }
177 if (s->segmentation.update_map)
178 for (i = 0; i < 3; i++)
179 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
180}
181
182static void update_lf_deltas(VP8Context *s)
183{
184 VP56RangeCoder *c = &s->c;
185 int i;
186
14ba7472
JS
187 for (i = 0; i < 4; i++) {
188 if (vp8_rac_get(c)) {
189 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
3b636f21 190
14ba7472
JS
191 if (vp8_rac_get(c))
192 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
193 }
194 }
195
196 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
197 if (vp8_rac_get(c)) {
198 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
199
200 if (vp8_rac_get(c))
201 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
202 }
203 }
3b636f21
DC
204}
205
206static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
207{
208 const uint8_t *sizes = buf;
209 int i;
210
211 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
212
213 buf += 3*(s->num_coeff_partitions-1);
214 buf_size -= 3*(s->num_coeff_partitions-1);
215 if (buf_size < 0)
216 return -1;
217
218 for (i = 0; i < s->num_coeff_partitions-1; i++) {
06d50ca8 219 int size = AV_RL24(sizes + 3*i);
3b636f21
DC
220 if (buf_size - size < 0)
221 return -1;
222
905ef0d0 223 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
3b636f21
DC
224 buf += size;
225 buf_size -= size;
226 }
905ef0d0 227 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
3b636f21
DC
228
229 return 0;
230}
231
232static void get_quants(VP8Context *s)
233{
234 VP56RangeCoder *c = &s->c;
235 int i, base_qi;
236
237 int yac_qi = vp8_rac_get_uint(c, 7);
238 int ydc_delta = vp8_rac_get_sint(c, 4);
239 int y2dc_delta = vp8_rac_get_sint(c, 4);
240 int y2ac_delta = vp8_rac_get_sint(c, 4);
241 int uvdc_delta = vp8_rac_get_sint(c, 4);
242 int uvac_delta = vp8_rac_get_sint(c, 4);
243
244 for (i = 0; i < 4; i++) {
245 if (s->segmentation.enabled) {
246 base_qi = s->segmentation.base_quant[i];
247 if (!s->segmentation.absolute_vals)
248 base_qi += yac_qi;
249 } else
250 base_qi = yac_qi;
251
42761122
MR
252 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
253 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
254 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
255 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
256 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
257 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
a8ab0ccc
PM
258
259 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
260 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
3b636f21
DC
261 }
262}
263
264/**
265 * Determine which buffers golden and altref should be updated with after this frame.
266 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
267 *
268 * Intra frames update all 3 references
269 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
270 * If the update (golden|altref) flag is set, it's updated with the current frame
271 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
272 * If the flag is not set, the number read means:
273 * 0: no update
274 * 1: VP56_FRAME_PREVIOUS
275 * 2: update golden with altref, or update altref with golden
276 */
277static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
278{
279 VP56RangeCoder *c = &s->c;
280
281 if (update)
282 return VP56_FRAME_CURRENT;
283
284 switch (vp8_rac_get_uint(c, 2)) {
285 case 1:
286 return VP56_FRAME_PREVIOUS;
287 case 2:
288 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
289 }
290 return VP56_FRAME_NONE;
291}
292
293static void update_refs(VP8Context *s)
294{
295 VP56RangeCoder *c = &s->c;
296
297 int update_golden = vp8_rac_get(c);
298 int update_altref = vp8_rac_get(c);
299
300 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
301 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
302}
303
304static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
305{
306 VP56RangeCoder *c = &s->c;
370b622a 307 int header_size, hscale, vscale, i, j, k, l, m, ret;
3b636f21
DC
308 int width = s->avctx->width;
309 int height = s->avctx->height;
310
311 s->keyframe = !(buf[0] & 1);
312 s->profile = (buf[0]>>1) & 7;
313 s->invisible = !(buf[0] & 0x10);
06d50ca8 314 header_size = AV_RL24(buf) >> 5;
3b636f21
DC
315 buf += 3;
316 buf_size -= 3;
317
0ef1dbed
DC
318 if (s->profile > 3)
319 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
320
321 if (!s->profile)
322 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
323 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
324 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
3b636f21
DC
325
326 if (header_size > buf_size - 7*s->keyframe) {
327 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
328 return AVERROR_INVALIDDATA;
329 }
330
331 if (s->keyframe) {
06d50ca8
JGG
332 if (AV_RL24(buf) != 0x2a019d) {
333 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
3b636f21
DC
334 return AVERROR_INVALIDDATA;
335 }
336 width = AV_RL16(buf+3) & 0x3fff;
337 height = AV_RL16(buf+5) & 0x3fff;
338 hscale = buf[4] >> 6;
339 vscale = buf[6] >> 6;
340 buf += 7;
341 buf_size -= 7;
342
92a54426
MR
343 if (hscale || vscale)
344 av_log_missing_feature(s->avctx, "Upscaling", 1);
345
3b636f21 346 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
370b622a
JGG
347 for (i = 0; i < 4; i++)
348 for (j = 0; j < 16; j++)
349 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
350 sizeof(s->prob->token[i][j]));
3b636f21
DC
351 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
352 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
353 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
354 memset(&s->segmentation, 0, sizeof(s->segmentation));
355 }
356
905ef0d0 357 ff_vp56_init_range_decoder(c, buf, header_size);
3b636f21
DC
358 buf += header_size;
359 buf_size -= header_size;
360
361 if (s->keyframe) {
362 if (vp8_rac_get(c))
363 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
364 vp8_rac_get(c); // whether we can skip clamping in dsp functions
365 }
366
367 if ((s->segmentation.enabled = vp8_rac_get(c)))
368 parse_segment_info(s);
369 else
370 s->segmentation.update_map = 0; // FIXME: move this to some init function?
371
372 s->filter.simple = vp8_rac_get(c);
373 s->filter.level = vp8_rac_get_uint(c, 6);
374 s->filter.sharpness = vp8_rac_get_uint(c, 3);
375
376 if ((s->lf_delta.enabled = vp8_rac_get(c)))
377 if (vp8_rac_get(c))
378 update_lf_deltas(s);
379
380 if (setup_partitions(s, buf, buf_size)) {
381 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
382 return AVERROR_INVALIDDATA;
383 }
384
951455c1
DK
385 if (!s->macroblocks_base || /* first frame */
386 width != s->avctx->width || height != s->avctx->height) {
387 if ((ret = update_dimensions(s, width, height)) < 0)
388 return ret;
389 }
390
3b636f21
DC
391 get_quants(s);
392
393 if (!s->keyframe) {
394 update_refs(s);
395 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
396 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
397 }
398
399 // if we aren't saving this frame's probabilities for future frames,
400 // make a copy of the current probabilities
401 if (!(s->update_probabilities = vp8_rac_get(c)))
402 s->prob[1] = s->prob[0];
403
404 s->update_last = s->keyframe || vp8_rac_get(c);
405
406 for (i = 0; i < 4; i++)
407 for (j = 0; j < 8; j++)
408 for (k = 0; k < 3; k++)
409 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
370b622a
JGG
410 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
411 int prob = vp8_rac_get_uint(c, 8);
b0d58795
JGG
412 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
413 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
370b622a 414 }
3b636f21
DC
415
416 if ((s->mbskip_enabled = vp8_rac_get(c)))
a8ab0ccc 417 s->prob->mbskip = vp8_rac_get_uint(c, 8);
3b636f21
DC
418
419 if (!s->keyframe) {
a8ab0ccc
PM
420 s->prob->intra = vp8_rac_get_uint(c, 8);
421 s->prob->last = vp8_rac_get_uint(c, 8);
422 s->prob->golden = vp8_rac_get_uint(c, 8);
3b636f21
DC
423
424 if (vp8_rac_get(c))
425 for (i = 0; i < 4; i++)
426 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
427 if (vp8_rac_get(c))
428 for (i = 0; i < 3; i++)
429 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
430
431 // 17.2 MV probability update
432 for (i = 0; i < 2; i++)
433 for (j = 0; j < 19; j++)
7697cdcf 434 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
3b636f21
DC
435 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
436 }
437
438 return 0;
439}
440
7634771e 441static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
3b636f21 442{
7634771e
JGG
443 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
444 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
3b636f21
DC
445}
446
3b636f21
DC
447/**
448 * Motion vector coding, 17.1.
449 */
450static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
451{
ca18a478 452 int bit, x = 0;
3b636f21 453
7697cdcf 454 if (vp56_rac_get_prob_branchy(c, p[0])) {
3b636f21
DC
455 int i;
456
457 for (i = 0; i < 3; i++)
458 x += vp56_rac_get_prob(c, p[9 + i]) << i;
459 for (i = 9; i > 3; i--)
460 x += vp56_rac_get_prob(c, p[9 + i]) << i;
461 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
462 x += 8;
ca18a478
DC
463 } else {
464 // small_mvtree
465 const uint8_t *ps = p+2;
466 bit = vp56_rac_get_prob(c, *ps);
467 ps += 1 + 3*bit;
468 x += 4*bit;
469 bit = vp56_rac_get_prob(c, *ps);
470 ps += 1 + bit;
471 x += 2*bit;
472 x += vp56_rac_get_prob(c, *ps);
473 }
3b636f21
DC
474
475 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
476}
477
414ac27d
JGG
478static av_always_inline
479const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
3b636f21 480{
7bf254c4
JGG
481 if (left == top)
482 return vp8_submv_prob[4-!!left];
483 if (!top)
3b636f21 484 return vp8_submv_prob[2];
7bf254c4 485 return vp8_submv_prob[1-!!left];
3b636f21
DC
486}
487
488/**
489 * Split motion vector prediction, 16.4.
7ed06b2b 490 * @returns the number of motion vectors parsed (2, 4 or 16)
3b636f21 491 */
414ac27d 492static av_always_inline
951455c1 493int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
3b636f21 494{
0908f1b9
JGG
495 int part_idx;
496 int n, num;
951455c1 497 VP8Macroblock *top_mb;
7bf254c4
JGG
498 VP8Macroblock *left_mb = &mb[-1];
499 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
951455c1 500 *mbsplits_top,
0908f1b9 501 *mbsplits_cur, *firstidx;
951455c1 502 VP56mv *top_mv;
c55e0d34
JGG
503 VP56mv *left_mv = left_mb->bmv;
504 VP56mv *cur_mv = mb->bmv;
3b636f21 505
951455c1
DK
506 if (!layout) // layout is inlined, s->mb_layout is not
507 top_mb = &mb[2];
508 else
509 top_mb = &mb[-s->mb_width-1];
510 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
511 top_mv = top_mb->bmv;
512
0908f1b9
JGG
513 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
514 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
515 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
516 } else {
517 part_idx = VP8_SPLITMVMODE_8x8;
518 }
519 } else {
520 part_idx = VP8_SPLITMVMODE_4x4;
521 }
522
523 num = vp8_mbsplit_count[part_idx];
524 mbsplits_cur = vp8_mbsplits[part_idx],
525 firstidx = vp8_mbfirstidx[part_idx];
526 mb->partitioning = part_idx;
527
3b636f21 528 for (n = 0; n < num; n++) {
7ed06b2b 529 int k = firstidx[n];
7bf254c4 530 uint32_t left, above;
7ed06b2b
RB
531 const uint8_t *submv_prob;
532
7bf254c4
JGG
533 if (!(k & 3))
534 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
535 else
536 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
537 if (k <= 3)
538 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
539 else
540 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
7ed06b2b
RB
541
542 submv_prob = get_submv_prob(left, above);
3b636f21 543
c5dec7f1
JGG
544 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
545 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
546 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
547 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
548 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
549 } else {
550 AV_ZERO32(&mb->bmv[n]);
551 }
552 } else {
553 AV_WN32A(&mb->bmv[n], above);
554 }
555 } else {
7bf254c4 556 AV_WN32A(&mb->bmv[n], left);
3b636f21 557 }
3b636f21 558 }
7ed06b2b
RB
559
560 return num;
3b636f21
DC
561}
562
414ac27d 563static av_always_inline
951455c1 564void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
f3d09d44 565{
951455c1 566 VP8Macroblock *mb_edge[3] = { 0 /* top */,
f3d09d44 567 mb - 1 /* left */,
951455c1 568 0 /* top-left */ };
f3d09d44 569 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
66f608a6 570 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
f3d09d44
JGG
571 int idx = CNT_ZERO;
572 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1eeca886 573 int8_t *sign_bias = s->sign_bias;
f3d09d44
JGG
574 VP56mv near_mv[4];
575 uint8_t cnt[4] = { 0 };
576 VP56RangeCoder *c = &s->c;
577
951455c1
DK
578 if (!layout) { // layout is inlined (s->mb_layout is not)
579 mb_edge[0] = mb + 2;
580 mb_edge[2] = mb + 1;
581 }
582 else {
583 mb_edge[0] = mb - s->mb_width-1;
584 mb_edge[2] = mb - s->mb_width-2;
585 }
586
f3d09d44
JGG
587 AV_ZERO32(&near_mv[0]);
588 AV_ZERO32(&near_mv[1]);
0f0b5d64 589 AV_ZERO32(&near_mv[2]);
f3d09d44
JGG
590
591 /* Process MB on top, left and top-left */
592 #define MV_EDGE_CHECK(n)\
593 {\
594 VP8Macroblock *edge = mb_edge[n];\
595 int edge_ref = edge->ref_frame;\
596 if (edge_ref != VP56_FRAME_CURRENT) {\
597 uint32_t mv = AV_RN32A(&edge->mv);\
598 if (mv) {\
599 if (cur_sign_bias != sign_bias[edge_ref]) {\
600 /* SWAR negate of the values in mv. */\
601 mv = ~mv;\
602 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
603 }\
604 if (!n || mv != AV_RN32A(&near_mv[idx]))\
605 AV_WN32A(&near_mv[++idx], mv);\
606 cnt[idx] += 1 + (n != 2);\
607 } else\
608 cnt[CNT_ZERO] += 1 + (n != 2);\
609 }\
610 }
611
612 MV_EDGE_CHECK(0)
613 MV_EDGE_CHECK(1)
614 MV_EDGE_CHECK(2)
615
616 mb->partitioning = VP8_SPLITMVMODE_NONE;
617 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
618 mb->mode = VP8_MVMODE_MV;
619
620 /* If we have three distinct MVs, merge first and last if they're the same */
66f608a6 621 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
f3d09d44
JGG
622 cnt[CNT_NEAREST] += 1;
623
624 /* Swap near and nearest if necessary */
625 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
626 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
627 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
628 }
629
630 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
631 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
632
633 /* Choose the best mv out of 0,0 and the nearest mv */
7634771e 634 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
66f608a6
AS
635 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
636 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
637 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
f3d09d44
JGG
638
639 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
640 mb->mode = VP8_MVMODE_SPLIT;
951455c1 641 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
f3d09d44
JGG
642 } else {
643 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
644 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
645 mb->bmv[0] = mb->mv;
646 }
647 } else {
7634771e 648 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
f3d09d44
JGG
649 mb->bmv[0] = mb->mv;
650 }
651 } else {
7634771e 652 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
f3d09d44
JGG
653 mb->bmv[0] = mb->mv;
654 }
655 } else {
656 mb->mode = VP8_MVMODE_ZERO;
657 AV_ZERO32(&mb->mv);
658 mb->bmv[0] = mb->mv;
659 }
660}
661
662static av_always_inline
17343e39 663void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
951455c1 664 int mb_x, int keyframe, int layout)
3b636f21 665{
17343e39
DK
666 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
667
951455c1
DK
668 if (layout == 1) {
669 VP8Macroblock *mb_top = mb - s->mb_width - 1;
670 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
671 }
d1c58fce 672 if (keyframe) {
d2840fa4 673 int x, y;
951455c1 674 uint8_t* top;
d2840fa4 675 uint8_t* const left = s->intra4x4_pred_mode_left;
951455c1
DK
676 if (layout == 1)
677 top = mb->intra4x4_pred_mode_top;
678 else
679 top = s->intra4x4_pred_mode_top + 4 * mb_x;
d1c58fce
JGG
680 for (y = 0; y < 4; y++) {
681 for (x = 0; x < 4; x++) {
d2840fa4
PM
682 const uint8_t *ctx;
683 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
684 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
685 left[y] = top[x] = *intra4x4;
686 intra4x4++;
3b636f21 687 }
3b636f21 688 }
d1c58fce 689 } else {
d2840fa4 690 int i;
d1c58fce
JGG
691 for (i = 0; i < 16; i++)
692 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
3b636f21
DC
693 }
694}
695
414ac27d 696static av_always_inline
951455c1
DK
697void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
698 uint8_t *segment, uint8_t *ref, int layout)
3b636f21
DC
699{
700 VP56RangeCoder *c = &s->c;
3b636f21
DC
701
702 if (s->segmentation.update_map)
c55e0d34 703 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
30011bf2 704 else if (s->segmentation.enabled)
4773d904 705 *segment = ref ? *ref : *segment;
17343e39 706 mb->segment = *segment;
3b636f21 707
a8ab0ccc 708 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
3b636f21
DC
709
710 if (s->keyframe) {
711 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
712
713 if (mb->mode == MODE_I4x4) {
951455c1 714 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
d2840fa4
PM
715 } else {
716 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
951455c1
DK
717 if (s->mb_layout == 1)
718 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
719 else
720 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
721 AV_WN32A( s->intra4x4_pred_mode_left, modes);
d2840fa4 722 }
3b636f21 723
17343e39 724 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
3b636f21 725 mb->ref_frame = VP56_FRAME_CURRENT;
a8ab0ccc 726 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
3b636f21 727 // inter MB, 16.2
a8ab0ccc
PM
728 if (vp56_rac_get_prob_branchy(c, s->prob->last))
729 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
3b636f21
DC
730 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
731 else
732 mb->ref_frame = VP56_FRAME_PREVIOUS;
c4211046 733 s->ref_count[mb->ref_frame-1]++;
3b636f21
DC
734
735 // motion vectors, 16.3
951455c1 736 decode_mvs(s, mb, mb_x, mb_y, layout);
3b636f21
DC
737 } else {
738 // intra MB, 16.1
739 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
740
158e062c 741 if (mb->mode == MODE_I4x4)
951455c1 742 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
3b636f21 743
17343e39 744 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
3b636f21 745 mb->ref_frame = VP56_FRAME_CURRENT;
b946111f 746 mb->partitioning = VP8_SPLITMVMODE_NONE;
14767f35 747 AV_ZERO32(&mb->bmv[0]);
3b636f21
DC
748 }
749}
750
a7878c9f 751#ifndef decode_block_coeffs_internal
3b636f21 752/**
e394953e
RB
753 * @param c arithmetic bitstream reader context
754 * @param block destination for block coefficients
755 * @param probs probabilities to use when reading trees from the bitstream
3b636f21 756 * @param i initial coeff index, 0 unless a separate DC block is coded
3fa76268 757 * @param qmul array holding the dc/ac dequant factor at position 0/1
3b636f21
DC
758 * @return 0 if no coeffs were decoded
759 * otherwise, the index of the last coeff decoded plus one
760 */
6163d880 761static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
3efbe137 762 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1e739679 763 int i, uint8_t *token_prob, int16_t qmul[2])
3b636f21 764{
6163d880 765 VP56RangeCoder c = *r;
afb54a85 766 goto skip_eob;
fe1b5d97 767 do {
1e739679 768 int coeff;
6163d880
RB
769 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
770 break;
3b636f21 771
fe1b5d97 772skip_eob:
6163d880 773 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
c22b4468 774 if (++i == 16)
6163d880 775 break; // invalid input; blocks should end with EOB
370b622a 776 token_prob = probs[i][0];
c22b4468 777 goto skip_eob;
fe1b5d97
DC
778 }
779
6163d880 780 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
fe1b5d97 781 coeff = 1;
370b622a 782 token_prob = probs[i+1][1];
fe1b5d97 783 } else {
6163d880
RB
784 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
785 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
fe1b5d97 786 if (coeff)
6163d880 787 coeff += vp56_rac_get_prob(&c, token_prob[5]);
fe1b5d97
DC
788 coeff += 2;
789 } else {
790 // DCT_CAT*
6163d880
RB
791 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
792 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
793 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
fe1b5d97
DC
794 } else { // DCT_CAT2
795 coeff = 7;
6163d880
RB
796 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
797 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
fe1b5d97
DC
798 }
799 } else { // DCT_CAT3 and up
6163d880
RB
800 int a = vp56_rac_get_prob(&c, token_prob[8]);
801 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
fe1b5d97
DC
802 int cat = (a<<1) + b;
803 coeff = 3 + (8<<cat);
6163d880 804 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
fe1b5d97
DC
805 }
806 }
370b622a 807 token_prob = probs[i+1][2];
fe1b5d97 808 }
6163d880 809 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
afb54a85 810 } while (++i < 16);
fe1b5d97 811
6163d880 812 *r = c;
afb54a85 813 return i;
3b636f21 814}
a7878c9f 815#endif
3b636f21 816
3c432e11
DB
817/**
818 * @param c arithmetic bitstream reader context
819 * @param block destination for block coefficients
820 * @param probs probabilities to use when reading trees from the bitstream
821 * @param i initial coeff index, 0 unless a separate DC block is coded
822 * @param zero_nhood the initial prediction context for number of surrounding
823 * all-zero blocks (only left/top, so 0-2)
824 * @param qmul array holding the dc/ac dequant factor at position 0/1
825 * @return 0 if no coeffs were decoded
826 * otherwise, the index of the last coeff decoded plus one
827 */
414ac27d 828static av_always_inline
1e739679 829int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
81a13131 830 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1e739679
JGG
831 int i, int zero_nhood, int16_t qmul[2])
832{
833 uint8_t *token_prob = probs[i][zero_nhood];
834 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
835 return 0;
836 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
837}
838
839static av_always_inline
951455c1 840void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
414ac27d 841 uint8_t t_nnz[9], uint8_t l_nnz[9])
3b636f21 842{
3b636f21
DC
843 int i, x, y, luma_start = 0, luma_ctx = 3;
844 int nnz_pred, nnz, nnz_total = 0;
17343e39 845 int segment = mb->segment;
f311208c 846 int block_dc = 0;
3b636f21 847
3b636f21 848 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
3b636f21
DC
849 nnz_pred = t_nnz[8] + l_nnz[8];
850
851 // decode DC values and do hadamard
951455c1 852 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
3b636f21
DC
853 s->qmat[segment].luma_dc_qmul);
854 l_nnz[8] = t_nnz[8] = !!nnz;
f311208c
JGG
855 if (nnz) {
856 nnz_total += nnz;
857 block_dc = 1;
858 if (nnz == 1)
951455c1 859 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
f311208c 860 else
951455c1 861 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
f311208c 862 }
3b636f21
DC
863 luma_start = 1;
864 luma_ctx = 0;
865 }
866
867 // luma blocks
868 for (y = 0; y < 4; y++)
869 for (x = 0; x < 4; x++) {
ffbf0794 870 nnz_pred = l_nnz[y] + t_nnz[x];
951455c1 871 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
ffbf0794 872 nnz_pred, s->qmat[segment].luma_qmul);
f311208c 873 // nnz+block_dc may be one more than the actual last index, but we don't care
951455c1 874 td->non_zero_count_cache[y][x] = nnz + block_dc;
3b636f21
DC
875 t_nnz[x] = l_nnz[y] = !!nnz;
876 nnz_total += nnz;
877 }
878
879 // chroma blocks
880 // TODO: what to do about dimensions? 2nd dim for luma is x,
881 // but for chroma it's (y<<1)|x
882 for (i = 4; i < 6; i++)
883 for (y = 0; y < 2; y++)
884 for (x = 0; x < 2; x++) {
885 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
951455c1 886 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
3b636f21 887 nnz_pred, s->qmat[segment].chroma_qmul);
951455c1 888 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
3b636f21
DC
889 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
890 nnz_total += nnz;
891 }
892
893 // if there were no coded coeffs despite the macroblock not being marked skip,
894 // we MUST not do the inner loop filter and should not do IDCT
895 // Since skip isn't used for bitstream prediction, just manually set it.
896 if (!nnz_total)
897 mb->skip = 1;
898}
899
9ac831c2
DC
900static av_always_inline
901void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
902 int linesize, int uvlinesize, int simple)
903{
904 AV_COPY128(top_border, src_y + 15*linesize);
905 if (!simple) {
906 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
907 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
908 }
909}
910
911static av_always_inline
912void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
913 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
914 int simple, int xchg)
915{
916 uint8_t *top_border_m1 = top_border-32; // for TL prediction
917 src_y -= linesize;
918 src_cb -= uvlinesize;
919 src_cr -= uvlinesize;
920
096971e8
MR
921#define XCHG(a,b,xchg) do { \
922 if (xchg) AV_SWAP64(b,a); \
923 else AV_COPY64(b,a); \
924 } while (0)
9ac831c2
DC
925
926 XCHG(top_border_m1+8, src_y-8, xchg);
927 XCHG(top_border, src_y, xchg);
928 XCHG(top_border+8, src_y+8, 1);
070ce7ef 929 if (mb_x < mb_width-1)
9ac831c2 930 XCHG(top_border+32, src_y+16, 1);
070ce7ef 931
9ac831c2
DC
932 // only copy chroma for normal loop filter
933 // or to initialize the top row to 127
934 if (!simple || !mb_y) {
935 XCHG(top_border_m1+16, src_cb-8, xchg);
936 XCHG(top_border_m1+24, src_cr-8, xchg);
937 XCHG(top_border+16, src_cb, 1);
938 XCHG(top_border+24, src_cr, 1);
939 }
940}
941
414ac27d 942static av_always_inline
ee555de7
RB
943int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
944{
945 if (!mb_x) {
946 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
947 } else {
948 return mb_y ? mode : LEFT_DC_PRED8x8;
949 }
950}
951
952static av_always_inline
953int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
954{
955 if (!mb_x) {
956 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
957 } else {
958 return mb_y ? mode : HOR_PRED8x8;
959 }
960}
961
962static av_always_inline
963int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
3b636f21
DC
964{
965 if (mode == DC_PRED8x8) {
ee555de7
RB
966 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
967 } else {
968 return mode;
969 }
970}
971
972static av_always_inline
973int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
974{
975 switch (mode) {
976 case DC_PRED8x8:
977 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
978 case VERT_PRED8x8:
979 return !mb_y ? DC_127_PRED8x8 : mode;
980 case HOR_PRED8x8:
981 return !mb_x ? DC_129_PRED8x8 : mode;
982 case PLANE_PRED8x8 /*TM*/:
983 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
984 }
985 return mode;
986}
987
988static av_always_inline
989int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
990{
991 if (!mb_x) {
992 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
993 } else {
994 return mb_y ? mode : HOR_VP8_PRED;
995 }
996}
997
998static av_always_inline
999int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1000{
1001 switch (mode) {
1002 case VERT_PRED:
1003 if (!mb_x && mb_y) {
1004 *copy_buf = 1;
1005 return mode;
1006 }
1007 /* fall-through */
1008 case DIAG_DOWN_LEFT_PRED:
1009 case VERT_LEFT_PRED:
1010 return !mb_y ? DC_127_PRED : mode;
1011 case HOR_PRED:
1012 if (!mb_y) {
1013 *copy_buf = 1;
1014 return mode;
a71abb71 1015 }
ee555de7
RB
1016 /* fall-through */
1017 case HOR_UP_PRED:
1018 return !mb_x ? DC_129_PRED : mode;
1019 case TM_VP8_PRED:
1020 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1021 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1022 case DIAG_DOWN_RIGHT_PRED:
1023 case VERT_RIGHT_PRED:
1024 case HOR_DOWN_PRED:
1025 if (!mb_y || !mb_x)
1026 *copy_buf = 1;
1027 return mode;
3b636f21
DC
1028 }
1029 return mode;
1030}
1031
414ac27d 1032static av_always_inline
951455c1
DK
1033void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1034 VP8Macroblock *mb, int mb_x, int mb_y)
3b636f21 1035{
ee555de7 1036 AVCodecContext *avctx = s->avctx;
bb591566
MR
1037 int x, y, mode, nnz;
1038 uint32_t tr;
3b636f21 1039
9ac831c2
DC
1040 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1041 // otherwise, skip it if we aren't going to deblock
951455c1 1042 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
9ac831c2
DC
1043 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1044 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1045 s->filter.simple, 1);
1046
3b636f21 1047 if (mb->mode < MODE_I4x4) {
ee555de7
RB
1048 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1049 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1050 } else {
1051 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1052 }
3b636f21
DC
1053 s->hpc.pred16x16[mode](dst[0], s->linesize);
1054 } else {
1055 uint8_t *ptr = dst[0];
17343e39 1056 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
ee555de7 1057 uint8_t tr_top[4] = { 127, 127, 127, 127 };
3b636f21
DC
1058
1059 // all blocks on the right edge of the macroblock use bottom edge
1060 // the top macroblock for their topright edge
1061 uint8_t *tr_right = ptr - s->linesize + 16;
1062
1063 // if we're on the right edge of the frame, said edge is extended
1064 // from the top macroblock
7148da48
RB
1065 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1066 mb_x == s->mb_width-1) {
bb591566 1067 tr = tr_right[-1]*0x01010101u;
3b636f21
DC
1068 tr_right = (uint8_t *)&tr;
1069 }
1070
b74f70d6 1071 if (mb->skip)
951455c1 1072 AV_ZERO128(td->non_zero_count_cache);
b74f70d6 1073
3b636f21
DC
1074 for (y = 0; y < 4; y++) {
1075 uint8_t *topright = ptr + 4 - s->linesize;
1076 for (x = 0; x < 4; x++) {
ee555de7
RB
1077 int copy = 0, linesize = s->linesize;
1078 uint8_t *dst = ptr+4*x;
1079 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1080
1081 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1082 topright = tr_top;
1083 } else if (x == 3)
3b636f21
DC
1084 topright = tr_right;
1085
ee555de7
RB
1086 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1087 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1088 if (copy) {
1089 dst = copy_dst + 12;
1090 linesize = 8;
1091 if (!(mb_y + y)) {
1092 copy_dst[3] = 127U;
9d4bdcb7 1093 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
ee555de7 1094 } else {
9d4bdcb7 1095 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
ee555de7
RB
1096 if (!(mb_x + x)) {
1097 copy_dst[3] = 129U;
1098 } else {
1099 copy_dst[3] = ptr[4*x-s->linesize-1];
1100 }
1101 }
1102 if (!(mb_x + x)) {
1103 copy_dst[11] =
1104 copy_dst[19] =
1105 copy_dst[27] =
1106 copy_dst[35] = 129U;
1107 } else {
1108 copy_dst[11] = ptr[4*x -1];
1109 copy_dst[19] = ptr[4*x+s->linesize -1];
1110 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1111 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1112 }
1113 }
1114 } else {
1115 mode = intra4x4[x];
1116 }
1117 s->hpc.pred4x4[mode](dst, topright, linesize);
1118 if (copy) {
9d4bdcb7
RB
1119 AV_COPY32(ptr+4*x , copy_dst+12);
1120 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1121 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1122 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
ee555de7 1123 }
3b636f21 1124
951455c1 1125 nnz = td->non_zero_count_cache[y][x];
3b636f21
DC
1126 if (nnz) {
1127 if (nnz == 1)
951455c1 1128 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
3b636f21 1129 else
951455c1 1130 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
3b636f21
DC
1131 }
1132 topright += 4;
1133 }
1134
1135 ptr += 4*s->linesize;
d2840fa4 1136 intra4x4 += 4;
3b636f21
DC
1137 }
1138 }
1139
ee555de7 1140 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
17343e39 1141 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
ee555de7 1142 } else {
17343e39 1143 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
ee555de7 1144 }
3b636f21
DC
1145 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1146 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
9ac831c2 1147
951455c1 1148 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
9ac831c2
DC
1149 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1150 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1151 s->filter.simple, 0);
3b636f21
DC
1152}
1153
64233e70
JGG
1154static const uint8_t subpel_idx[3][8] = {
1155 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1156 // also function pointer index
1157 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1158 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1159};
1160
3b636f21 1161/**
3c432e11 1162 * luma MC function
3b636f21
DC
1163 *
1164 * @param s VP8 decoding context
3b636f21 1165 * @param dst target buffer for block data at block position
24c9baba 1166 * @param ref reference picture buffer at origin (0, 0)
3b636f21
DC
1167 * @param mv motion vector (relative to block position) to get pixel data from
1168 * @param x_off horizontal position of block from origin (0, 0)
1169 * @param y_off vertical position of block from origin (0, 0)
1170 * @param block_w width of block (16, 8 or 4)
1171 * @param block_h height of block (always same as block_w)
1172 * @param width width of src/dst plane data
1173 * @param height height of src/dst plane data
1174 * @param linesize size of a single line of plane data, including padding
e394953e 1175 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
3b636f21 1176 */
414ac27d 1177static av_always_inline
951455c1
DK
1178void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1179 AVFrame *ref, const VP56mv *mv,
64233e70
JGG
1180 int x_off, int y_off, int block_w, int block_h,
1181 int width, int height, int linesize,
1182 vp8_mc_func mc_func[3][3])
3b636f21 1183{
4773d904
RB
1184 uint8_t *src = ref->data[0];
1185
c0498b30 1186 if (AV_RN32A(mv)) {
64233e70
JGG
1187
1188 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1189 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1190
1191 x_off += mv->x >> 2;
1192 y_off += mv->y >> 2;
c0498b30
JGG
1193
1194 // edge emulation
4773d904 1195 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
c0498b30 1196 src += y_off * linesize + x_off;
64233e70
JGG
1197 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1198 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
951455c1 1199 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
64233e70
JGG
1200 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1201 x_off - mx_idx, y_off - my_idx, width, height);
951455c1 1202 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
c0498b30
JGG
1203 }
1204 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
4773d904
RB
1205 } else {
1206 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
c0498b30 1207 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
4773d904 1208 }
3b636f21
DC
1209}
1210
3c432e11
DB
1211/**
1212 * chroma MC function
1213 *
1214 * @param s VP8 decoding context
1215 * @param dst1 target buffer for block data at block position (U plane)
1216 * @param dst2 target buffer for block data at block position (V plane)
1217 * @param ref reference picture buffer at origin (0, 0)
1218 * @param mv motion vector (relative to block position) to get pixel data from
1219 * @param x_off horizontal position of block from origin (0, 0)
1220 * @param y_off vertical position of block from origin (0, 0)
1221 * @param block_w width of block (16, 8 or 4)
1222 * @param block_h height of block (always same as block_w)
1223 * @param width width of src/dst plane data
1224 * @param height height of src/dst plane data
1225 * @param linesize size of a single line of plane data, including padding
1226 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1227 */
414ac27d 1228static av_always_inline
951455c1
DK
1229void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1230 AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
64233e70
JGG
1231 int block_w, int block_h, int width, int height, int linesize,
1232 vp8_mc_func mc_func[3][3])
1233{
4773d904
RB
1234 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1235
64233e70
JGG
1236 if (AV_RN32A(mv)) {
1237 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1238 int my = mv->y&7, my_idx = subpel_idx[0][my];
1239
1240 x_off += mv->x >> 3;
1241 y_off += mv->y >> 3;
1242
1243 // edge emulation
1244 src1 += y_off * linesize + x_off;
1245 src2 += y_off * linesize + x_off;
4773d904 1246 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
64233e70
JGG
1247 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1248 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
951455c1 1249 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
64233e70
JGG
1250 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1251 x_off - mx_idx, y_off - my_idx, width, height);
951455c1 1252 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
64233e70
JGG
1253 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1254
951455c1 1255 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
64233e70
JGG
1256 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1257 x_off - mx_idx, y_off - my_idx, width, height);
951455c1 1258 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
64233e70
JGG
1259 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1260 } else {
1261 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1262 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1263 }
1264 } else {
4773d904 1265 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
64233e70
JGG
1266 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1267 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1268 }
1269}
1270
1271static av_always_inline
951455c1 1272void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
414ac27d
JGG
1273 AVFrame *ref_frame, int x_off, int y_off,
1274 int bx_off, int by_off,
1275 int block_w, int block_h,
1276 int width, int height, VP56mv *mv)
7c4dcf81
RB
1277{
1278 VP56mv uvmv = *mv;
1279
1280 /* Y */
951455c1 1281 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
4773d904 1282 ref_frame, mv, x_off + bx_off, y_off + by_off,
64233e70
JGG
1283 block_w, block_h, width, height, s->linesize,
1284 s->put_pixels_tab[block_w == 8]);
7c4dcf81
RB
1285
1286 /* U/V */
1287 if (s->profile == 3) {
1288 uvmv.x &= ~7;
1289 uvmv.y &= ~7;
1290 }
1291 x_off >>= 1; y_off >>= 1;
1292 bx_off >>= 1; by_off >>= 1;
1293 width >>= 1; height >>= 1;
1294 block_w >>= 1; block_h >>= 1;
951455c1 1295 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
4773d904
RB
1296 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1297 &uvmv, x_off + bx_off, y_off + by_off,
64233e70
JGG
1298 block_w, block_h, width, height, s->uvlinesize,
1299 s->put_pixels_tab[1 + (block_w == 4)]);
7c4dcf81
RB
1300}
1301
d864dee8
JGG
1302/* Fetch pixels for estimated mv 4 macroblocks ahead.
1303 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
414ac27d 1304static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
d864dee8 1305{
ef38842f
JGG
1306 /* Don't prefetch refs that haven't been used very often this frame. */
1307 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
c4211046 1308 int x_off = mb_x << 4, y_off = mb_y << 4;
7e13022a
JGG
1309 int mx = (mb->mv.x>>2) + x_off + 8;
1310 int my = (mb->mv.y>>2) + y_off;
c4211046
JGG
1311 uint8_t **src= s->framep[ref]->data;
1312 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
4773d904
RB
1313 /* For threading, a ff_thread_await_progress here might be useful, but
1314 * it actually slows down the decoder. Since a bad prefetch doesn't
1315 * generate bad decoder output, we don't run it here. */
c4211046
JGG
1316 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1317 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1318 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1319 }
d864dee8
JGG
1320}
1321
3b636f21
DC
1322/**
1323 * Apply motion vectors to prediction buffer, chapter 18.
1324 */
414ac27d 1325static av_always_inline
951455c1
DK
1326void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1327 VP8Macroblock *mb, int mb_x, int mb_y)
3b636f21
DC
1328{
1329 int x_off = mb_x << 4, y_off = mb_y << 4;
1330 int width = 16*s->mb_width, height = 16*s->mb_height;
d292c345
JGG
1331 AVFrame *ref = s->framep[mb->ref_frame];
1332 VP56mv *bmv = mb->bmv;
3b636f21 1333
73be29b0
JGG
1334 switch (mb->partitioning) {
1335 case VP8_SPLITMVMODE_NONE:
951455c1 1336 vp8_mc_part(s, td, dst, ref, x_off, y_off,
7c4dcf81 1337 0, 0, 16, 16, width, height, &mb->mv);
73be29b0 1338 break;
7c4dcf81 1339 case VP8_SPLITMVMODE_4x4: {
3b636f21 1340 int x, y;
7c4dcf81 1341 VP56mv uvmv;
3b636f21
DC
1342
1343 /* Y */
1344 for (y = 0; y < 4; y++) {
1345 for (x = 0; x < 4; x++) {
951455c1 1346 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
4773d904 1347 ref, &bmv[4*y + x],
64233e70
JGG
1348 4*x + x_off, 4*y + y_off, 4, 4,
1349 width, height, s->linesize,
1350 s->put_pixels_tab[2]);
3b636f21
DC
1351 }
1352 }
1353
1354 /* U/V */
1355 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1356 for (y = 0; y < 2; y++) {
1357 for (x = 0; x < 2; x++) {
1358 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1359 mb->bmv[ 2*y * 4 + 2*x+1].x +
1360 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1361 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1362 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1363 mb->bmv[ 2*y * 4 + 2*x+1].y +
1364 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1365 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
8f910a56
SG
1366 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1367 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
3b636f21
DC
1368 if (s->profile == 3) {
1369 uvmv.x &= ~7;
1370 uvmv.y &= ~7;
1371 }
951455c1 1372 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
4773d904 1373 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
64233e70
JGG
1374 4*x + x_off, 4*y + y_off, 4, 4,
1375 width, height, s->uvlinesize,
1376 s->put_pixels_tab[2]);
3b636f21
DC
1377 }
1378 }
7c4dcf81
RB
1379 break;
1380 }
1381 case VP8_SPLITMVMODE_16x8:
951455c1 1382 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1383 0, 0, 16, 8, width, height, &bmv[0]);
951455c1 1384 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1385 0, 8, 16, 8, width, height, &bmv[1]);
7c4dcf81
RB
1386 break;
1387 case VP8_SPLITMVMODE_8x16:
951455c1 1388 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1389 0, 0, 8, 16, width, height, &bmv[0]);
951455c1 1390 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1391 8, 0, 8, 16, width, height, &bmv[1]);
7c4dcf81
RB
1392 break;
1393 case VP8_SPLITMVMODE_8x8:
951455c1 1394 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1395 0, 0, 8, 8, width, height, &bmv[0]);
951455c1 1396 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1397 8, 0, 8, 8, width, height, &bmv[1]);
951455c1 1398 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1399 0, 8, 8, 8, width, height, &bmv[2]);
951455c1 1400 vp8_mc_part(s, td, dst, ref, x_off, y_off,
d292c345 1401 8, 8, 8, 8, width, height, &bmv[3]);
7c4dcf81 1402 break;
3b636f21
DC
1403 }
1404}
1405
951455c1
DK
1406static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1407 uint8_t *dst[3], VP8Macroblock *mb)
3b636f21 1408{
3df56f41 1409 int x, y, ch;
3b636f21 1410
8a467b2d
JGG
1411 if (mb->mode != MODE_I4x4) {
1412 uint8_t *y_dst = dst[0];
3b636f21 1413 for (y = 0; y < 4; y++) {
951455c1 1414 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
3df56f41
JGG
1415 if (nnz4) {
1416 if (nnz4&~0x01010101) {
8a467b2d 1417 for (x = 0; x < 4; x++) {
62457f90 1418 if ((uint8_t)nnz4 == 1)
951455c1 1419 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
62457f90 1420 else if((uint8_t)nnz4 > 1)
951455c1 1421 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
62457f90
JGG
1422 nnz4 >>= 8;
1423 if (!nnz4)
1424 break;
8a467b2d
JGG
1425 }
1426 } else {
951455c1 1427 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
3b636f21
DC
1428 }
1429 }
1430 y_dst += 4*s->linesize;
1431 }
8a467b2d 1432 }
3b636f21 1433
8a467b2d 1434 for (ch = 0; ch < 2; ch++) {
951455c1 1435 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
3ae079a3 1436 if (nnz4) {
8a467b2d 1437 uint8_t *ch_dst = dst[1+ch];
3ae079a3
JGG
1438 if (nnz4&~0x01010101) {
1439 for (y = 0; y < 2; y++) {
1440 for (x = 0; x < 2; x++) {
62457f90 1441 if ((uint8_t)nnz4 == 1)
951455c1 1442 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
62457f90 1443 else if((uint8_t)nnz4 > 1)
951455c1 1444 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
62457f90
JGG
1445 nnz4 >>= 8;
1446 if (!nnz4)
628b48db 1447 goto chroma_idct_end;
8a467b2d 1448 }
3ae079a3 1449 ch_dst += 4*s->uvlinesize;
8a467b2d 1450 }
3ae079a3 1451 } else {
951455c1 1452 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
3b636f21
DC
1453 }
1454 }
628b48db 1455chroma_idct_end: ;
3b636f21
DC
1456 }
1457}
1458
414ac27d 1459static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
3b636f21
DC
1460{
1461 int interior_limit, filter_level;
1462
1463 if (s->segmentation.enabled) {
17343e39 1464 filter_level = s->segmentation.filter_level[mb->segment];
3b636f21
DC
1465 if (!s->segmentation.absolute_vals)
1466 filter_level += s->filter.level;
1467 } else
1468 filter_level = s->filter.level;
1469
1470 if (s->lf_delta.enabled) {
1471 filter_level += s->lf_delta.ref[mb->ref_frame];
dd18c9a0 1472 filter_level += s->lf_delta.mode[mb->mode];
3b636f21 1473 }
a1b227bb 1474
1550f45a 1475 filter_level = av_clip_uintp2(filter_level, 6);
3b636f21
DC
1476
1477 interior_limit = filter_level;
1478 if (s->filter.sharpness) {
8a2c99b4 1479 interior_limit >>= (s->filter.sharpness + 3) >> 2;
3b636f21
DC
1480 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1481 }
1482 interior_limit = FFMAX(interior_limit, 1);
1483
968570d6
JGG
1484 f->filter_level = filter_level;
1485 f->inner_limit = interior_limit;
c55e0d34 1486 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
3b636f21
DC
1487}
1488
414ac27d 1489static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1490{
968570d6
JGG
1491 int mbedge_lim, bedge_lim, hev_thresh;
1492 int filter_level = f->filter_level;
1493 int inner_limit = f->inner_limit;
c55e0d34 1494 int inner_filter = f->inner_filter;
145d3186
JGG
1495 int linesize = s->linesize;
1496 int uvlinesize = s->uvlinesize;
79dec154
JGG
1497 static const uint8_t hev_thresh_lut[2][64] = {
1498 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1499 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1500 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1501 3, 3, 3, 3 },
1502 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1503 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1504 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1505 2, 2, 2, 2 }
1506 };
3b636f21 1507
3b636f21
DC
1508 if (!filter_level)
1509 return;
1510
79dec154
JGG
1511 bedge_lim = 2*filter_level + inner_limit;
1512 mbedge_lim = bedge_lim + 4;
968570d6 1513
79dec154 1514 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
5245c04d 1515
3b636f21 1516 if (mb_x) {
145d3186 1517 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
3facfc99 1518 mbedge_lim, inner_limit, hev_thresh);
145d3186 1519 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1520 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1521 }
1522
c55e0d34 1523 if (inner_filter) {
145d3186
JGG
1524 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1525 inner_limit, hev_thresh);
1526 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1527 inner_limit, hev_thresh);
1528 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1529 inner_limit, hev_thresh);
1530 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1531 uvlinesize, bedge_lim,
1532 inner_limit, hev_thresh);
3b636f21
DC
1533 }
1534
1535 if (mb_y) {
145d3186 1536 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
3facfc99 1537 mbedge_lim, inner_limit, hev_thresh);
145d3186 1538 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
3facfc99 1539 mbedge_lim, inner_limit, hev_thresh);
3b636f21
DC
1540 }
1541
c55e0d34 1542 if (inner_filter) {
145d3186
JGG
1543 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1544 linesize, bedge_lim,
1545 inner_limit, hev_thresh);
1546 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1547 linesize, bedge_lim,
1548 inner_limit, hev_thresh);
1549 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1550 linesize, bedge_lim,
1551 inner_limit, hev_thresh);
1552 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1553 dst[2] + 4 * uvlinesize,
1554 uvlinesize, bedge_lim,
3facfc99 1555 inner_limit, hev_thresh);
3b636f21
DC
1556 }
1557}
1558
414ac27d 1559static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
3b636f21 1560{
968570d6
JGG
1561 int mbedge_lim, bedge_lim;
1562 int filter_level = f->filter_level;
1563 int inner_limit = f->inner_limit;
c55e0d34 1564 int inner_filter = f->inner_filter;
145d3186 1565 int linesize = s->linesize;
3b636f21 1566
3b636f21
DC
1567 if (!filter_level)
1568 return;
1569
79dec154
JGG
1570 bedge_lim = 2*filter_level + inner_limit;
1571 mbedge_lim = bedge_lim + 4;
3b636f21
DC
1572
1573 if (mb_x)
145d3186 1574 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1575 if (inner_filter) {
145d3186
JGG
1576 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1577 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1578 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
3b636f21
DC
1579 }
1580
1581 if (mb_y)
145d3186 1582 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
c55e0d34 1583 if (inner_filter) {
145d3186
JGG
1584 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1585 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1586 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
3b636f21
DC
1587 }
1588}
1589
ce42a048
RB
1590static void release_queued_segmaps(VP8Context *s, int is_close)
1591{
1592 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1593 while (s->num_maps_to_be_freed > leave_behind)
1594 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1595 s->maps_are_invalid = 0;
1596}
1597
337ade52 1598#define MARGIN (16 << 2)
951455c1
DK
1599static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
1600 AVFrame *prev_frame)
337ade52
DK
1601{
1602 VP8Context *s = avctx->priv_data;
951455c1
DK
1603 int mb_x, mb_y;
1604
1605 s->mv_min.y = -MARGIN;
1606 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1607 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1608 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1609 int mb_xy = mb_y*s->mb_width;
1610
1611 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1612
1613 s->mv_min.x = -MARGIN;
1614 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1615 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1616 if (mb_y == 0)
1617 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1618 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1619 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
1620 s->mv_min.x -= 64;
1621 s->mv_max.x -= 64;
1622 }
1623 s->mv_min.y -= 64;
1624 s->mv_max.y -= 64;
1625 }
1626}
1627
25f056e6 1628#if HAVE_THREADS
951455c1
DK
1629#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1630 do {\
1631 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1632 if (otd->thread_mb_pos < tmp) {\
1633 pthread_mutex_lock(&otd->lock);\
1634 td->wait_mb_pos = tmp;\
1635 do {\
1636 if (otd->thread_mb_pos >= tmp)\
1637 break;\
1638 pthread_cond_wait(&otd->cond, &otd->lock);\
1639 } while (1);\
1640 td->wait_mb_pos = INT_MAX;\
1641 pthread_mutex_unlock(&otd->lock);\
1642 }\
1643 } while(0);
1644
1645#define update_pos(td, mb_y, mb_x)\
1646 do {\
1647 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1648 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1649 int is_null = (next_td == NULL) || (prev_td == NULL);\
1650 int pos_check = (is_null) ? 1 :\
1651 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1652 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1653 td->thread_mb_pos = pos;\
1654 if (sliced_threading && pos_check) {\
1655 pthread_mutex_lock(&td->lock);\
1656 pthread_cond_broadcast(&td->cond);\
1657 pthread_mutex_unlock(&td->lock);\
1658 }\
1659 } while(0);
25f056e6
MS
1660#else
1661#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1662#define update_pos(td, mb_y, mb_x)
1663#endif
951455c1
DK
1664
1665static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1666 int jobnr, int threadnr)
1667{
1668 VP8Context *s = avctx->priv_data;
1669 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1670 int mb_y = td->thread_mb_pos>>16;
337ade52 1671 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
951455c1
DK
1672 int num_jobs = s->num_jobs;
1673 AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
1674 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1675 VP8Macroblock *mb;
337ade52
DK
1676 uint8_t *dst[3] = {
1677 curframe->data[0] + 16*mb_y*s->linesize,
1678 curframe->data[1] + 8*mb_y*s->uvlinesize,
1679 curframe->data[2] + 8*mb_y*s->uvlinesize
1680 };
951455c1
DK
1681 if (mb_y == 0) prev_td = td;
1682 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1683 if (mb_y == s->mb_height-1) next_td = td;
1684 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1685 if (s->mb_layout == 1)
1686 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1687 else {
1688 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1689 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1690 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1691 }
1692
1693 memset(td->left_nnz, 0, sizeof(td->left_nnz));
337ade52
DK
1694 // left edge of 129 for intra prediction
1695 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1696 for (i = 0; i < 3; i++)
1697 for (y = 0; y < 16>>!!i; y++)
1698 dst[i][y*curframe->linesize[i]-1] = 129;
951455c1 1699 if (mb_y == 1) {
337ade52 1700 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
951455c1 1701 }
337ade52
DK
1702 }
1703
1704 s->mv_min.x = -MARGIN;
1705 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1706
1707 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
951455c1
DK
1708 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1709 if (prev_td != td) {
1710 if (threadnr != 0) {
1711 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1712 } else {
1713 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1714 }
1715 }
1716
337ade52
DK
1717 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1718 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1719
951455c1
DK
1720 if (!s->mb_layout)
1721 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1722 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
337ade52
DK
1723
1724 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1725
1726 if (!mb->skip)
951455c1 1727 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
337ade52
DK
1728
1729 if (mb->mode <= MODE_I4x4)
951455c1 1730 intra_predict(s, td, dst, mb, mb_x, mb_y);
337ade52 1731 else
951455c1 1732 inter_predict(s, td, dst, mb, mb_x, mb_y);
337ade52
DK
1733
1734 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1735
1736 if (!mb->skip) {
951455c1 1737 idct_mb(s, td, dst, mb);
337ade52 1738 } else {
951455c1 1739 AV_ZERO64(td->left_nnz);
337ade52
DK
1740 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1741
1742 // Reset DC block predictors if they would exist if the mb had coefficients
1743 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
951455c1 1744 td->left_nnz[8] = 0;
337ade52
DK
1745 s->top_nnz[mb_x][8] = 0;
1746 }
1747 }
1748
1749 if (s->deblock_filter)
951455c1
DK
1750 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1751
1752 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1753 if (s->filter.simple)
1754 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1755 else
1756 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1757 }
337ade52
DK
1758
1759 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1760
1761 dst[0] += 16;
1762 dst[1] += 8;
1763 dst[2] += 8;
1764 s->mv_min.x -= 64;
1765 s->mv_max.x -= 64;
951455c1
DK
1766
1767 if (mb_x == s->mb_width+1) {
1768 update_pos(td, mb_y, s->mb_width+3);
1769 } else {
1770 update_pos(td, mb_y, mb_x);
1771 }
337ade52 1772 }
951455c1
DK
1773}
1774
1775static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1776 int jobnr, int threadnr)
1777{
1778 VP8Context *s = avctx->priv_data;
1779 VP8ThreadData *td = &s->thread_data[threadnr];
1780 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1781 AVFrame *curframe = s->curframe;
1782 VP8Macroblock *mb;
1783 VP8ThreadData *prev_td, *next_td;
1784 uint8_t *dst[3] = {
1785 curframe->data[0] + 16*mb_y*s->linesize,
1786 curframe->data[1] + 8*mb_y*s->uvlinesize,
1787 curframe->data[2] + 8*mb_y*s->uvlinesize
1788 };
1789
1790 if (s->mb_layout == 1)
1791 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1792 else
1793 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1794
1795 if (mb_y == 0) prev_td = td;
1796 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1797 if (mb_y == s->mb_height-1) next_td = td;
1798 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1799
1800 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1801 VP8FilterStrength *f = &td->filter_strength[mb_x];
1802 if (prev_td != td) {
1803 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1804 }
1805 if (next_td != td)
1806 if (next_td != &s->thread_data[0]) {
1807 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1808 }
1809
1810 if (num_jobs == 1) {
1811 if (s->filter.simple)
1812 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1813 else
1814 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1815 }
1816
337ade52 1817 if (s->filter.simple)
951455c1 1818 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
337ade52 1819 else
951455c1
DK
1820 filter_mb(s, dst, f, mb_x, mb_y);
1821 dst[0] += 16;
1822 dst[1] += 8;
1823 dst[2] += 8;
1824
1825 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1826 }
1827}
1828
1829static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1830 int jobnr, int threadnr)
1831{
1832 VP8Context *s = avctx->priv_data;
1833 VP8ThreadData *td = &s->thread_data[jobnr];
1834 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1835 AVFrame *curframe = s->curframe;
1836 int mb_y, num_jobs = s->num_jobs;
1837 td->thread_nr = threadnr;
1838 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1839 if (mb_y >= s->mb_height) break;
1840 td->thread_mb_pos = mb_y<<16;
1841 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1842 if (s->deblock_filter)
1843 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1844 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1845
1846 s->mv_min.y -= 64;
1847 s->mv_max.y -= 64;
1848
1849 if (avctx->active_thread_type == FF_THREAD_FRAME)
1850 ff_thread_report_progress(curframe, mb_y, 0);
337ade52 1851 }
951455c1
DK
1852
1853 return 0;
337ade52
DK
1854}
1855
3b636f21
DC
1856static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1857 AVPacket *avpkt)
1858{
1859 VP8Context *s = avctx->priv_data;
951455c1 1860 int ret, i, referenced, num_jobs;
3b636f21 1861 enum AVDiscard skip_thresh;
e02dec25 1862 AVFrame *av_uninit(curframe), *prev_frame;
3b636f21 1863
ce42a048
RB
1864 release_queued_segmaps(s, 0);
1865
3b636f21 1866 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
fb90785e 1867 goto err;
3b636f21 1868
e02dec25
AC
1869 prev_frame = s->framep[VP56_FRAME_CURRENT];
1870
3b636f21
DC
1871 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1872 || s->update_altref == VP56_FRAME_CURRENT;
1873
1874 skip_thresh = !referenced ? AVDISCARD_NONREF :
1875 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1876
1877 if (avctx->skip_frame >= skip_thresh) {
1878 s->invisible = 1;
fb90785e 1879 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
3b636f21
DC
1880 goto skip_decode;
1881 }
9ac831c2 1882 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
3b636f21 1883
4773d904
RB
1884 // release no longer referenced frames
1885 for (i = 0; i < 5; i++)
1886 if (s->frames[i].data[0] &&
1887 &s->frames[i] != prev_frame &&
1888 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1889 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1890 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
bfa0f965 1891 vp8_release_frame(s, &s->frames[i], 1, 0);
4773d904
RB
1892
1893 // find a free buffer
1894 for (i = 0; i < 5; i++)
1895 if (&s->frames[i] != prev_frame &&
1896 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
3b636f21
DC
1897 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1898 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1899 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1900 break;
1901 }
4773d904
RB
1902 if (i == 5) {
1903 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1904 abort();
1905 }
3b636f21 1906 if (curframe->data[0])
bfa0f965 1907 vp8_release_frame(s, curframe, 1, 0);
3b636f21 1908
fb90785e
RB
1909 // Given that arithmetic probabilities are updated every frame, it's quite likely
1910 // that the values we have on a random interframe are complete junk if we didn't
1911 // start decode on a keyframe. So just don't display anything rather than junk.
1912 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1913 !s->framep[VP56_FRAME_GOLDEN] ||
1914 !s->framep[VP56_FRAME_GOLDEN2])) {
1915 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1916 ret = AVERROR_INVALIDDATA;
1917 goto err;
1918 }
1919
3b636f21 1920 curframe->key_frame = s->keyframe;
975a1447 1921 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3b636f21 1922 curframe->reference = referenced ? 3 : 0;
ce42a048 1923 if ((ret = vp8_alloc_frame(s, curframe))) {
3b636f21 1924 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
fb90785e 1925 goto err;
3b636f21
DC
1926 }
1927
4773d904
RB
1928 // check if golden and altref are swapped
1929 if (s->update_altref != VP56_FRAME_NONE) {
1930 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1931 } else {
1932 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1933 }
1934 if (s->update_golden != VP56_FRAME_NONE) {
1935 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1936 } else {
1937 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1938 }
1939 if (s->update_last) {
1940 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1941 } else {
1942 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1943 }
1944 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1945
1946 ff_thread_finish_setup(avctx);
1947
3b636f21
DC
1948 s->linesize = curframe->linesize[0];
1949 s->uvlinesize = curframe->linesize[1];
1950
951455c1
DK
1951 if (!s->thread_data[0].edge_emu_buffer)
1952 for (i = 0; i < MAX_THREADS; i++)
1953 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
3b636f21
DC
1954
1955 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
aa93c52c 1956 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
951455c1
DK
1957 if (!s->mb_layout)
1958 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1959 if (!s->mb_layout && s->keyframe)
1960 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
c55e0d34 1961
3b636f21 1962 // top edge of 127 for intra prediction
ee555de7
RB
1963 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1964 s->top_border[0][15] = s->top_border[0][23] = 127;
1965 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1966 }
c4211046 1967 memset(s->ref_count, 0, sizeof(s->ref_count));
3b636f21 1968
7634771e 1969
951455c1
DK
1970 // Make sure the previous frame has read its segmentation map,
1971 // if we re-use the same map.
1972 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1973 ff_thread_await_progress(prev_frame, 1, 0);
7634771e 1974
951455c1
DK
1975 if (s->mb_layout == 1)
1976 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
4773d904 1977
951455c1
DK
1978 if (avctx->active_thread_type == FF_THREAD_FRAME)
1979 num_jobs = 1;
1980 else
1981 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1982 s->num_jobs = num_jobs;
1983 s->curframe = curframe;
1984 s->prev_frame = prev_frame;
1985 s->mv_min.y = -MARGIN;
1986 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1987 for (i = 0; i < MAX_THREADS; i++) {
1988 s->thread_data[i].thread_mb_pos = 0;
1989 s->thread_data[i].wait_mb_pos = INT_MAX;
1990 }
1991 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
3b636f21 1992
4773d904 1993 ff_thread_report_progress(curframe, INT_MAX, 0);
fb90785e
RB
1994 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1995
3b636f21
DC
1996skip_decode:
1997 // if future frames don't use the updated probabilities,
1998 // reset them to the values we saved
1999 if (!s->update_probabilities)
2000 s->prob[0] = s->prob[1];
2001
3b636f21 2002 if (!s->invisible) {
4773d904 2003 *(AVFrame*)data = *curframe;
3b636f21
DC
2004 *data_size = sizeof(AVFrame);
2005 }
2006
2007 return avpkt->size;
fb90785e
RB
2008err:
2009 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2010 return ret;
3b636f21
DC
2011}
2012
2013static av_cold int vp8_decode_init(AVCodecContext *avctx)
2014{
2015 VP8Context *s = avctx->priv_data;
2016
2017 s->avctx = avctx;
2018 avctx->pix_fmt = PIX_FMT_YUV420P;
2019
9cf0841e 2020 ff_dsputil_init(&s->dsp, avctx);
76741b0e 2021 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
3b636f21
DC
2022 ff_vp8dsp_init(&s->vp8dsp);
2023
3b636f21
DC
2024 return 0;
2025}
2026
2027static av_cold int vp8_decode_free(AVCodecContext *avctx)
2028{
bfa0f965 2029 vp8_decode_flush_impl(avctx, 0, 1, 1);
ce42a048 2030 release_queued_segmaps(avctx->priv_data, 1);
3b636f21
DC
2031 return 0;
2032}
2033
4773d904
RB
2034static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2035{
2036 VP8Context *s = avctx->priv_data;
2037
2038 s->avctx = avctx;
2039
2040 return 0;
2041}
2042
2043#define REBASE(pic) \
2044 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2045
2046static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2047{
2048 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2049
56535793
RB
2050 if (s->macroblocks_base &&
2051 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2052 free_buffers(s);
e02dec25 2053 s->maps_are_invalid = 1;
82a0497c
RB
2054 s->mb_width = s_src->mb_width;
2055 s->mb_height = s_src->mb_height;
56535793
RB
2056 }
2057
4773d904
RB
2058 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2059 s->segmentation = s_src->segmentation;
2060 s->lf_delta = s_src->lf_delta;
2061 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2062
2063 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
2064 s->framep[0] = REBASE(s_src->next_framep[0]);
2065 s->framep[1] = REBASE(s_src->next_framep[1]);
2066 s->framep[2] = REBASE(s_src->next_framep[2]);
2067 s->framep[3] = REBASE(s_src->next_framep[3]);
2068
2069 return 0;
2070}
2071
d36beb3f 2072AVCodec ff_vp8_decoder = {
00c3b67b
MS
2073 .name = "vp8",
2074 .type = AVMEDIA_TYPE_VIDEO,
2075 .id = CODEC_ID_VP8,
2076 .priv_data_size = sizeof(VP8Context),
2077 .init = vp8_decode_init,
2078 .close = vp8_decode_free,
2079 .decode = vp8_decode_frame,
951455c1 2080 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
00c3b67b
MS
2081 .flush = vp8_decode_flush,
2082 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
4773d904
RB
2083 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2084 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
3b636f21 2085};