vp8: Enclose pthread function calls in ifdefs
[libav.git] / libavcodec / vp8.c
1 /*
2 * VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
8 *
9 * This file is part of Libav.
10 *
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26 #include "libavutil/imgutils.h"
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "vp8.h"
30 #include "vp8data.h"
31 #include "rectangle.h"
32 #include "thread.h"
33
34 #if ARCH_ARM
35 # include "arm/vp8.h"
36 #endif
37
38 static void free_buffers(VP8Context *s)
39 {
40 int i;
41 if (s->thread_data)
42 for (i = 0; i < MAX_THREADS; i++) {
43 av_freep(&s->thread_data[i].filter_strength);
44 av_freep(&s->thread_data[i].edge_emu_buffer);
45 }
46 av_freep(&s->thread_data);
47 av_freep(&s->macroblocks_base);
48 av_freep(&s->intra4x4_pred_mode_top);
49 av_freep(&s->top_nnz);
50 av_freep(&s->top_border);
51
52 s->macroblocks = NULL;
53 }
54
55 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
56 {
57 int ret;
58 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
59 return ret;
60 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
61 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
62 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
63 ff_thread_release_buffer(s->avctx, f);
64 return AVERROR(ENOMEM);
65 }
66 return 0;
67 }
68
69 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
70 {
71 if (f->ref_index[0]) {
72 if (prefer_delayed_free) {
73 /* Upon a size change, we want to free the maps but other threads may still
74 * be using them, so queue them. Upon a seek, all threads are inactive so
75 * we want to cache one to prevent re-allocation in the next decoding
76 * iteration, but the rest we can free directly. */
77 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
78 if (s->num_maps_to_be_freed < max_queued_maps) {
79 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
80 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
81 av_free(f->ref_index[0]);
82 } /* else: MEMLEAK (should never happen, but better that than crash) */
83 f->ref_index[0] = NULL;
84 } else /* vp8_decode_free() */ {
85 av_free(f->ref_index[0]);
86 }
87 }
88 ff_thread_release_buffer(s->avctx, f);
89 }
90
91 static void vp8_decode_flush_impl(AVCodecContext *avctx,
92 int prefer_delayed_free, int can_direct_free, int free_mem)
93 {
94 VP8Context *s = avctx->priv_data;
95 int i;
96
97 if (!avctx->internal->is_copy) {
98 for (i = 0; i < 5; i++)
99 if (s->frames[i].data[0])
100 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
101 }
102 memset(s->framep, 0, sizeof(s->framep));
103
104 if (free_mem) {
105 free_buffers(s);
106 s->maps_are_invalid = 1;
107 }
108 }
109
110 static void vp8_decode_flush(AVCodecContext *avctx)
111 {
112 vp8_decode_flush_impl(avctx, 1, 1, 0);
113 }
114
115 static int update_dimensions(VP8Context *s, int width, int height)
116 {
117 AVCodecContext *avctx = s->avctx;
118 int i;
119
120 if (width != s->avctx->width ||
121 height != s->avctx->height) {
122 if (av_image_check_size(width, height, 0, s->avctx))
123 return AVERROR_INVALIDDATA;
124
125 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
126
127 avcodec_set_dimensions(s->avctx, width, height);
128 }
129
130 s->mb_width = (s->avctx->coded_width +15) / 16;
131 s->mb_height = (s->avctx->coded_height+15) / 16;
132
133 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
134 if (!s->mb_layout) { // Frame threading and one thread
135 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
136 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
137 }
138 else // Sliced threading
139 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
140 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
141 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
142 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
143
144 for (i = 0; i < MAX_THREADS; i++) {
145 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
146 #if HAVE_THREADS
147 pthread_mutex_init(&s->thread_data[i].lock, NULL);
148 pthread_cond_init(&s->thread_data[i].cond, NULL);
149 #endif
150 }
151
152 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
153 (!s->intra4x4_pred_mode_top && !s->mb_layout))
154 return AVERROR(ENOMEM);
155
156 s->macroblocks = s->macroblocks_base + 1;
157
158 return 0;
159 }
160
161 static void parse_segment_info(VP8Context *s)
162 {
163 VP56RangeCoder *c = &s->c;
164 int i;
165
166 s->segmentation.update_map = vp8_rac_get(c);
167
168 if (vp8_rac_get(c)) { // update segment feature data
169 s->segmentation.absolute_vals = vp8_rac_get(c);
170
171 for (i = 0; i < 4; i++)
172 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
173
174 for (i = 0; i < 4; i++)
175 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
176 }
177 if (s->segmentation.update_map)
178 for (i = 0; i < 3; i++)
179 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
180 }
181
182 static void update_lf_deltas(VP8Context *s)
183 {
184 VP56RangeCoder *c = &s->c;
185 int i;
186
187 for (i = 0; i < 4; i++) {
188 if (vp8_rac_get(c)) {
189 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
190
191 if (vp8_rac_get(c))
192 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
193 }
194 }
195
196 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
197 if (vp8_rac_get(c)) {
198 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
199
200 if (vp8_rac_get(c))
201 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
202 }
203 }
204 }
205
206 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
207 {
208 const uint8_t *sizes = buf;
209 int i;
210
211 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
212
213 buf += 3*(s->num_coeff_partitions-1);
214 buf_size -= 3*(s->num_coeff_partitions-1);
215 if (buf_size < 0)
216 return -1;
217
218 for (i = 0; i < s->num_coeff_partitions-1; i++) {
219 int size = AV_RL24(sizes + 3*i);
220 if (buf_size - size < 0)
221 return -1;
222
223 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
224 buf += size;
225 buf_size -= size;
226 }
227 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
228
229 return 0;
230 }
231
232 static void get_quants(VP8Context *s)
233 {
234 VP56RangeCoder *c = &s->c;
235 int i, base_qi;
236
237 int yac_qi = vp8_rac_get_uint(c, 7);
238 int ydc_delta = vp8_rac_get_sint(c, 4);
239 int y2dc_delta = vp8_rac_get_sint(c, 4);
240 int y2ac_delta = vp8_rac_get_sint(c, 4);
241 int uvdc_delta = vp8_rac_get_sint(c, 4);
242 int uvac_delta = vp8_rac_get_sint(c, 4);
243
244 for (i = 0; i < 4; i++) {
245 if (s->segmentation.enabled) {
246 base_qi = s->segmentation.base_quant[i];
247 if (!s->segmentation.absolute_vals)
248 base_qi += yac_qi;
249 } else
250 base_qi = yac_qi;
251
252 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
253 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
254 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
255 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
256 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
257 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
258
259 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
260 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
261 }
262 }
263
264 /**
265 * Determine which buffers golden and altref should be updated with after this frame.
266 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
267 *
268 * Intra frames update all 3 references
269 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
270 * If the update (golden|altref) flag is set, it's updated with the current frame
271 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
272 * If the flag is not set, the number read means:
273 * 0: no update
274 * 1: VP56_FRAME_PREVIOUS
275 * 2: update golden with altref, or update altref with golden
276 */
277 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
278 {
279 VP56RangeCoder *c = &s->c;
280
281 if (update)
282 return VP56_FRAME_CURRENT;
283
284 switch (vp8_rac_get_uint(c, 2)) {
285 case 1:
286 return VP56_FRAME_PREVIOUS;
287 case 2:
288 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
289 }
290 return VP56_FRAME_NONE;
291 }
292
293 static void update_refs(VP8Context *s)
294 {
295 VP56RangeCoder *c = &s->c;
296
297 int update_golden = vp8_rac_get(c);
298 int update_altref = vp8_rac_get(c);
299
300 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
301 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
302 }
303
304 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
305 {
306 VP56RangeCoder *c = &s->c;
307 int header_size, hscale, vscale, i, j, k, l, m, ret;
308 int width = s->avctx->width;
309 int height = s->avctx->height;
310
311 s->keyframe = !(buf[0] & 1);
312 s->profile = (buf[0]>>1) & 7;
313 s->invisible = !(buf[0] & 0x10);
314 header_size = AV_RL24(buf) >> 5;
315 buf += 3;
316 buf_size -= 3;
317
318 if (s->profile > 3)
319 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
320
321 if (!s->profile)
322 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
323 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
324 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
325
326 if (header_size > buf_size - 7*s->keyframe) {
327 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
328 return AVERROR_INVALIDDATA;
329 }
330
331 if (s->keyframe) {
332 if (AV_RL24(buf) != 0x2a019d) {
333 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
334 return AVERROR_INVALIDDATA;
335 }
336 width = AV_RL16(buf+3) & 0x3fff;
337 height = AV_RL16(buf+5) & 0x3fff;
338 hscale = buf[4] >> 6;
339 vscale = buf[6] >> 6;
340 buf += 7;
341 buf_size -= 7;
342
343 if (hscale || vscale)
344 av_log_missing_feature(s->avctx, "Upscaling", 1);
345
346 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
347 for (i = 0; i < 4; i++)
348 for (j = 0; j < 16; j++)
349 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
350 sizeof(s->prob->token[i][j]));
351 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
352 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
353 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
354 memset(&s->segmentation, 0, sizeof(s->segmentation));
355 }
356
357 ff_vp56_init_range_decoder(c, buf, header_size);
358 buf += header_size;
359 buf_size -= header_size;
360
361 if (s->keyframe) {
362 if (vp8_rac_get(c))
363 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
364 vp8_rac_get(c); // whether we can skip clamping in dsp functions
365 }
366
367 if ((s->segmentation.enabled = vp8_rac_get(c)))
368 parse_segment_info(s);
369 else
370 s->segmentation.update_map = 0; // FIXME: move this to some init function?
371
372 s->filter.simple = vp8_rac_get(c);
373 s->filter.level = vp8_rac_get_uint(c, 6);
374 s->filter.sharpness = vp8_rac_get_uint(c, 3);
375
376 if ((s->lf_delta.enabled = vp8_rac_get(c)))
377 if (vp8_rac_get(c))
378 update_lf_deltas(s);
379
380 if (setup_partitions(s, buf, buf_size)) {
381 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
382 return AVERROR_INVALIDDATA;
383 }
384
385 if (!s->macroblocks_base || /* first frame */
386 width != s->avctx->width || height != s->avctx->height) {
387 if ((ret = update_dimensions(s, width, height)) < 0)
388 return ret;
389 }
390
391 get_quants(s);
392
393 if (!s->keyframe) {
394 update_refs(s);
395 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
396 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
397 }
398
399 // if we aren't saving this frame's probabilities for future frames,
400 // make a copy of the current probabilities
401 if (!(s->update_probabilities = vp8_rac_get(c)))
402 s->prob[1] = s->prob[0];
403
404 s->update_last = s->keyframe || vp8_rac_get(c);
405
406 for (i = 0; i < 4; i++)
407 for (j = 0; j < 8; j++)
408 for (k = 0; k < 3; k++)
409 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
410 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
411 int prob = vp8_rac_get_uint(c, 8);
412 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
413 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
414 }
415
416 if ((s->mbskip_enabled = vp8_rac_get(c)))
417 s->prob->mbskip = vp8_rac_get_uint(c, 8);
418
419 if (!s->keyframe) {
420 s->prob->intra = vp8_rac_get_uint(c, 8);
421 s->prob->last = vp8_rac_get_uint(c, 8);
422 s->prob->golden = vp8_rac_get_uint(c, 8);
423
424 if (vp8_rac_get(c))
425 for (i = 0; i < 4; i++)
426 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
427 if (vp8_rac_get(c))
428 for (i = 0; i < 3; i++)
429 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
430
431 // 17.2 MV probability update
432 for (i = 0; i < 2; i++)
433 for (j = 0; j < 19; j++)
434 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
435 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
436 }
437
438 return 0;
439 }
440
441 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
442 {
443 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
444 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
445 }
446
447 /**
448 * Motion vector coding, 17.1.
449 */
450 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
451 {
452 int bit, x = 0;
453
454 if (vp56_rac_get_prob_branchy(c, p[0])) {
455 int i;
456
457 for (i = 0; i < 3; i++)
458 x += vp56_rac_get_prob(c, p[9 + i]) << i;
459 for (i = 9; i > 3; i--)
460 x += vp56_rac_get_prob(c, p[9 + i]) << i;
461 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
462 x += 8;
463 } else {
464 // small_mvtree
465 const uint8_t *ps = p+2;
466 bit = vp56_rac_get_prob(c, *ps);
467 ps += 1 + 3*bit;
468 x += 4*bit;
469 bit = vp56_rac_get_prob(c, *ps);
470 ps += 1 + bit;
471 x += 2*bit;
472 x += vp56_rac_get_prob(c, *ps);
473 }
474
475 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
476 }
477
478 static av_always_inline
479 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
480 {
481 if (left == top)
482 return vp8_submv_prob[4-!!left];
483 if (!top)
484 return vp8_submv_prob[2];
485 return vp8_submv_prob[1-!!left];
486 }
487
488 /**
489 * Split motion vector prediction, 16.4.
490 * @returns the number of motion vectors parsed (2, 4 or 16)
491 */
492 static av_always_inline
493 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
494 {
495 int part_idx;
496 int n, num;
497 VP8Macroblock *top_mb;
498 VP8Macroblock *left_mb = &mb[-1];
499 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
500 *mbsplits_top,
501 *mbsplits_cur, *firstidx;
502 VP56mv *top_mv;
503 VP56mv *left_mv = left_mb->bmv;
504 VP56mv *cur_mv = mb->bmv;
505
506 if (!layout) // layout is inlined, s->mb_layout is not
507 top_mb = &mb[2];
508 else
509 top_mb = &mb[-s->mb_width-1];
510 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
511 top_mv = top_mb->bmv;
512
513 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
514 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
515 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
516 } else {
517 part_idx = VP8_SPLITMVMODE_8x8;
518 }
519 } else {
520 part_idx = VP8_SPLITMVMODE_4x4;
521 }
522
523 num = vp8_mbsplit_count[part_idx];
524 mbsplits_cur = vp8_mbsplits[part_idx],
525 firstidx = vp8_mbfirstidx[part_idx];
526 mb->partitioning = part_idx;
527
528 for (n = 0; n < num; n++) {
529 int k = firstidx[n];
530 uint32_t left, above;
531 const uint8_t *submv_prob;
532
533 if (!(k & 3))
534 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
535 else
536 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
537 if (k <= 3)
538 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
539 else
540 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
541
542 submv_prob = get_submv_prob(left, above);
543
544 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
545 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
546 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
547 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
548 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
549 } else {
550 AV_ZERO32(&mb->bmv[n]);
551 }
552 } else {
553 AV_WN32A(&mb->bmv[n], above);
554 }
555 } else {
556 AV_WN32A(&mb->bmv[n], left);
557 }
558 }
559
560 return num;
561 }
562
563 static av_always_inline
564 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
565 {
566 VP8Macroblock *mb_edge[3] = { 0 /* top */,
567 mb - 1 /* left */,
568 0 /* top-left */ };
569 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
570 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
571 int idx = CNT_ZERO;
572 int cur_sign_bias = s->sign_bias[mb->ref_frame];
573 int8_t *sign_bias = s->sign_bias;
574 VP56mv near_mv[4];
575 uint8_t cnt[4] = { 0 };
576 VP56RangeCoder *c = &s->c;
577
578 if (!layout) { // layout is inlined (s->mb_layout is not)
579 mb_edge[0] = mb + 2;
580 mb_edge[2] = mb + 1;
581 }
582 else {
583 mb_edge[0] = mb - s->mb_width-1;
584 mb_edge[2] = mb - s->mb_width-2;
585 }
586
587 AV_ZERO32(&near_mv[0]);
588 AV_ZERO32(&near_mv[1]);
589 AV_ZERO32(&near_mv[2]);
590
591 /* Process MB on top, left and top-left */
592 #define MV_EDGE_CHECK(n)\
593 {\
594 VP8Macroblock *edge = mb_edge[n];\
595 int edge_ref = edge->ref_frame;\
596 if (edge_ref != VP56_FRAME_CURRENT) {\
597 uint32_t mv = AV_RN32A(&edge->mv);\
598 if (mv) {\
599 if (cur_sign_bias != sign_bias[edge_ref]) {\
600 /* SWAR negate of the values in mv. */\
601 mv = ~mv;\
602 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
603 }\
604 if (!n || mv != AV_RN32A(&near_mv[idx]))\
605 AV_WN32A(&near_mv[++idx], mv);\
606 cnt[idx] += 1 + (n != 2);\
607 } else\
608 cnt[CNT_ZERO] += 1 + (n != 2);\
609 }\
610 }
611
612 MV_EDGE_CHECK(0)
613 MV_EDGE_CHECK(1)
614 MV_EDGE_CHECK(2)
615
616 mb->partitioning = VP8_SPLITMVMODE_NONE;
617 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
618 mb->mode = VP8_MVMODE_MV;
619
620 /* If we have three distinct MVs, merge first and last if they're the same */
621 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
622 cnt[CNT_NEAREST] += 1;
623
624 /* Swap near and nearest if necessary */
625 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
626 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
627 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
628 }
629
630 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
631 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
632
633 /* Choose the best mv out of 0,0 and the nearest mv */
634 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
635 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
636 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
637 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
638
639 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
640 mb->mode = VP8_MVMODE_SPLIT;
641 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
642 } else {
643 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
644 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
645 mb->bmv[0] = mb->mv;
646 }
647 } else {
648 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
649 mb->bmv[0] = mb->mv;
650 }
651 } else {
652 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
653 mb->bmv[0] = mb->mv;
654 }
655 } else {
656 mb->mode = VP8_MVMODE_ZERO;
657 AV_ZERO32(&mb->mv);
658 mb->bmv[0] = mb->mv;
659 }
660 }
661
662 static av_always_inline
663 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
664 int mb_x, int keyframe, int layout)
665 {
666 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
667
668 if (layout == 1) {
669 VP8Macroblock *mb_top = mb - s->mb_width - 1;
670 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
671 }
672 if (keyframe) {
673 int x, y;
674 uint8_t* top;
675 uint8_t* const left = s->intra4x4_pred_mode_left;
676 if (layout == 1)
677 top = mb->intra4x4_pred_mode_top;
678 else
679 top = s->intra4x4_pred_mode_top + 4 * mb_x;
680 for (y = 0; y < 4; y++) {
681 for (x = 0; x < 4; x++) {
682 const uint8_t *ctx;
683 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
684 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
685 left[y] = top[x] = *intra4x4;
686 intra4x4++;
687 }
688 }
689 } else {
690 int i;
691 for (i = 0; i < 16; i++)
692 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
693 }
694 }
695
696 static av_always_inline
697 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
698 uint8_t *segment, uint8_t *ref, int layout)
699 {
700 VP56RangeCoder *c = &s->c;
701
702 if (s->segmentation.update_map)
703 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
704 else if (s->segmentation.enabled)
705 *segment = ref ? *ref : *segment;
706 mb->segment = *segment;
707
708 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
709
710 if (s->keyframe) {
711 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
712
713 if (mb->mode == MODE_I4x4) {
714 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
715 } else {
716 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
717 if (s->mb_layout == 1)
718 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
719 else
720 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
721 AV_WN32A( s->intra4x4_pred_mode_left, modes);
722 }
723
724 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
725 mb->ref_frame = VP56_FRAME_CURRENT;
726 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
727 // inter MB, 16.2
728 if (vp56_rac_get_prob_branchy(c, s->prob->last))
729 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
730 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
731 else
732 mb->ref_frame = VP56_FRAME_PREVIOUS;
733 s->ref_count[mb->ref_frame-1]++;
734
735 // motion vectors, 16.3
736 decode_mvs(s, mb, mb_x, mb_y, layout);
737 } else {
738 // intra MB, 16.1
739 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
740
741 if (mb->mode == MODE_I4x4)
742 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
743
744 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
745 mb->ref_frame = VP56_FRAME_CURRENT;
746 mb->partitioning = VP8_SPLITMVMODE_NONE;
747 AV_ZERO32(&mb->bmv[0]);
748 }
749 }
750
751 #ifndef decode_block_coeffs_internal
752 /**
753 * @param c arithmetic bitstream reader context
754 * @param block destination for block coefficients
755 * @param probs probabilities to use when reading trees from the bitstream
756 * @param i initial coeff index, 0 unless a separate DC block is coded
757 * @param qmul array holding the dc/ac dequant factor at position 0/1
758 * @return 0 if no coeffs were decoded
759 * otherwise, the index of the last coeff decoded plus one
760 */
761 static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
762 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
763 int i, uint8_t *token_prob, int16_t qmul[2])
764 {
765 VP56RangeCoder c = *r;
766 goto skip_eob;
767 do {
768 int coeff;
769 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
770 break;
771
772 skip_eob:
773 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
774 if (++i == 16)
775 break; // invalid input; blocks should end with EOB
776 token_prob = probs[i][0];
777 goto skip_eob;
778 }
779
780 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
781 coeff = 1;
782 token_prob = probs[i+1][1];
783 } else {
784 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
785 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
786 if (coeff)
787 coeff += vp56_rac_get_prob(&c, token_prob[5]);
788 coeff += 2;
789 } else {
790 // DCT_CAT*
791 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
792 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
793 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
794 } else { // DCT_CAT2
795 coeff = 7;
796 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
797 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
798 }
799 } else { // DCT_CAT3 and up
800 int a = vp56_rac_get_prob(&c, token_prob[8]);
801 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
802 int cat = (a<<1) + b;
803 coeff = 3 + (8<<cat);
804 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
805 }
806 }
807 token_prob = probs[i+1][2];
808 }
809 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
810 } while (++i < 16);
811
812 *r = c;
813 return i;
814 }
815 #endif
816
817 /**
818 * @param c arithmetic bitstream reader context
819 * @param block destination for block coefficients
820 * @param probs probabilities to use when reading trees from the bitstream
821 * @param i initial coeff index, 0 unless a separate DC block is coded
822 * @param zero_nhood the initial prediction context for number of surrounding
823 * all-zero blocks (only left/top, so 0-2)
824 * @param qmul array holding the dc/ac dequant factor at position 0/1
825 * @return 0 if no coeffs were decoded
826 * otherwise, the index of the last coeff decoded plus one
827 */
828 static av_always_inline
829 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
830 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
831 int i, int zero_nhood, int16_t qmul[2])
832 {
833 uint8_t *token_prob = probs[i][zero_nhood];
834 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
835 return 0;
836 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
837 }
838
839 static av_always_inline
840 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
841 uint8_t t_nnz[9], uint8_t l_nnz[9])
842 {
843 int i, x, y, luma_start = 0, luma_ctx = 3;
844 int nnz_pred, nnz, nnz_total = 0;
845 int segment = mb->segment;
846 int block_dc = 0;
847
848 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
849 nnz_pred = t_nnz[8] + l_nnz[8];
850
851 // decode DC values and do hadamard
852 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
853 s->qmat[segment].luma_dc_qmul);
854 l_nnz[8] = t_nnz[8] = !!nnz;
855 if (nnz) {
856 nnz_total += nnz;
857 block_dc = 1;
858 if (nnz == 1)
859 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
860 else
861 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
862 }
863 luma_start = 1;
864 luma_ctx = 0;
865 }
866
867 // luma blocks
868 for (y = 0; y < 4; y++)
869 for (x = 0; x < 4; x++) {
870 nnz_pred = l_nnz[y] + t_nnz[x];
871 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
872 nnz_pred, s->qmat[segment].luma_qmul);
873 // nnz+block_dc may be one more than the actual last index, but we don't care
874 td->non_zero_count_cache[y][x] = nnz + block_dc;
875 t_nnz[x] = l_nnz[y] = !!nnz;
876 nnz_total += nnz;
877 }
878
879 // chroma blocks
880 // TODO: what to do about dimensions? 2nd dim for luma is x,
881 // but for chroma it's (y<<1)|x
882 for (i = 4; i < 6; i++)
883 for (y = 0; y < 2; y++)
884 for (x = 0; x < 2; x++) {
885 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
886 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
887 nnz_pred, s->qmat[segment].chroma_qmul);
888 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
889 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
890 nnz_total += nnz;
891 }
892
893 // if there were no coded coeffs despite the macroblock not being marked skip,
894 // we MUST not do the inner loop filter and should not do IDCT
895 // Since skip isn't used for bitstream prediction, just manually set it.
896 if (!nnz_total)
897 mb->skip = 1;
898 }
899
900 static av_always_inline
901 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
902 int linesize, int uvlinesize, int simple)
903 {
904 AV_COPY128(top_border, src_y + 15*linesize);
905 if (!simple) {
906 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
907 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
908 }
909 }
910
911 static av_always_inline
912 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
913 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
914 int simple, int xchg)
915 {
916 uint8_t *top_border_m1 = top_border-32; // for TL prediction
917 src_y -= linesize;
918 src_cb -= uvlinesize;
919 src_cr -= uvlinesize;
920
921 #define XCHG(a,b,xchg) do { \
922 if (xchg) AV_SWAP64(b,a); \
923 else AV_COPY64(b,a); \
924 } while (0)
925
926 XCHG(top_border_m1+8, src_y-8, xchg);
927 XCHG(top_border, src_y, xchg);
928 XCHG(top_border+8, src_y+8, 1);
929 if (mb_x < mb_width-1)
930 XCHG(top_border+32, src_y+16, 1);
931
932 // only copy chroma for normal loop filter
933 // or to initialize the top row to 127
934 if (!simple || !mb_y) {
935 XCHG(top_border_m1+16, src_cb-8, xchg);
936 XCHG(top_border_m1+24, src_cr-8, xchg);
937 XCHG(top_border+16, src_cb, 1);
938 XCHG(top_border+24, src_cr, 1);
939 }
940 }
941
942 static av_always_inline
943 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
944 {
945 if (!mb_x) {
946 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
947 } else {
948 return mb_y ? mode : LEFT_DC_PRED8x8;
949 }
950 }
951
952 static av_always_inline
953 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
954 {
955 if (!mb_x) {
956 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
957 } else {
958 return mb_y ? mode : HOR_PRED8x8;
959 }
960 }
961
962 static av_always_inline
963 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
964 {
965 if (mode == DC_PRED8x8) {
966 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
967 } else {
968 return mode;
969 }
970 }
971
972 static av_always_inline
973 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
974 {
975 switch (mode) {
976 case DC_PRED8x8:
977 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
978 case VERT_PRED8x8:
979 return !mb_y ? DC_127_PRED8x8 : mode;
980 case HOR_PRED8x8:
981 return !mb_x ? DC_129_PRED8x8 : mode;
982 case PLANE_PRED8x8 /*TM*/:
983 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
984 }
985 return mode;
986 }
987
988 static av_always_inline
989 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
990 {
991 if (!mb_x) {
992 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
993 } else {
994 return mb_y ? mode : HOR_VP8_PRED;
995 }
996 }
997
998 static av_always_inline
999 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1000 {
1001 switch (mode) {
1002 case VERT_PRED:
1003 if (!mb_x && mb_y) {
1004 *copy_buf = 1;
1005 return mode;
1006 }
1007 /* fall-through */
1008 case DIAG_DOWN_LEFT_PRED:
1009 case VERT_LEFT_PRED:
1010 return !mb_y ? DC_127_PRED : mode;
1011 case HOR_PRED:
1012 if (!mb_y) {
1013 *copy_buf = 1;
1014 return mode;
1015 }
1016 /* fall-through */
1017 case HOR_UP_PRED:
1018 return !mb_x ? DC_129_PRED : mode;
1019 case TM_VP8_PRED:
1020 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1021 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1022 case DIAG_DOWN_RIGHT_PRED:
1023 case VERT_RIGHT_PRED:
1024 case HOR_DOWN_PRED:
1025 if (!mb_y || !mb_x)
1026 *copy_buf = 1;
1027 return mode;
1028 }
1029 return mode;
1030 }
1031
1032 static av_always_inline
1033 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1034 VP8Macroblock *mb, int mb_x, int mb_y)
1035 {
1036 AVCodecContext *avctx = s->avctx;
1037 int x, y, mode, nnz;
1038 uint32_t tr;
1039
1040 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1041 // otherwise, skip it if we aren't going to deblock
1042 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1043 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1044 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1045 s->filter.simple, 1);
1046
1047 if (mb->mode < MODE_I4x4) {
1048 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1049 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1050 } else {
1051 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1052 }
1053 s->hpc.pred16x16[mode](dst[0], s->linesize);
1054 } else {
1055 uint8_t *ptr = dst[0];
1056 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1057 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1058
1059 // all blocks on the right edge of the macroblock use bottom edge
1060 // the top macroblock for their topright edge
1061 uint8_t *tr_right = ptr - s->linesize + 16;
1062
1063 // if we're on the right edge of the frame, said edge is extended
1064 // from the top macroblock
1065 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1066 mb_x == s->mb_width-1) {
1067 tr = tr_right[-1]*0x01010101u;
1068 tr_right = (uint8_t *)&tr;
1069 }
1070
1071 if (mb->skip)
1072 AV_ZERO128(td->non_zero_count_cache);
1073
1074 for (y = 0; y < 4; y++) {
1075 uint8_t *topright = ptr + 4 - s->linesize;
1076 for (x = 0; x < 4; x++) {
1077 int copy = 0, linesize = s->linesize;
1078 uint8_t *dst = ptr+4*x;
1079 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1080
1081 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1082 topright = tr_top;
1083 } else if (x == 3)
1084 topright = tr_right;
1085
1086 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1087 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1088 if (copy) {
1089 dst = copy_dst + 12;
1090 linesize = 8;
1091 if (!(mb_y + y)) {
1092 copy_dst[3] = 127U;
1093 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1094 } else {
1095 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1096 if (!(mb_x + x)) {
1097 copy_dst[3] = 129U;
1098 } else {
1099 copy_dst[3] = ptr[4*x-s->linesize-1];
1100 }
1101 }
1102 if (!(mb_x + x)) {
1103 copy_dst[11] =
1104 copy_dst[19] =
1105 copy_dst[27] =
1106 copy_dst[35] = 129U;
1107 } else {
1108 copy_dst[11] = ptr[4*x -1];
1109 copy_dst[19] = ptr[4*x+s->linesize -1];
1110 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1111 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1112 }
1113 }
1114 } else {
1115 mode = intra4x4[x];
1116 }
1117 s->hpc.pred4x4[mode](dst, topright, linesize);
1118 if (copy) {
1119 AV_COPY32(ptr+4*x , copy_dst+12);
1120 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1121 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1122 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1123 }
1124
1125 nnz = td->non_zero_count_cache[y][x];
1126 if (nnz) {
1127 if (nnz == 1)
1128 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1129 else
1130 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1131 }
1132 topright += 4;
1133 }
1134
1135 ptr += 4*s->linesize;
1136 intra4x4 += 4;
1137 }
1138 }
1139
1140 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1141 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1142 } else {
1143 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1144 }
1145 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1146 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1147
1148 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1149 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1150 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1151 s->filter.simple, 0);
1152 }
1153
1154 static const uint8_t subpel_idx[3][8] = {
1155 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1156 // also function pointer index
1157 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1158 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1159 };
1160
1161 /**
1162 * luma MC function
1163 *
1164 * @param s VP8 decoding context
1165 * @param dst target buffer for block data at block position
1166 * @param ref reference picture buffer at origin (0, 0)
1167 * @param mv motion vector (relative to block position) to get pixel data from
1168 * @param x_off horizontal position of block from origin (0, 0)
1169 * @param y_off vertical position of block from origin (0, 0)
1170 * @param block_w width of block (16, 8 or 4)
1171 * @param block_h height of block (always same as block_w)
1172 * @param width width of src/dst plane data
1173 * @param height height of src/dst plane data
1174 * @param linesize size of a single line of plane data, including padding
1175 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1176 */
1177 static av_always_inline
1178 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1179 AVFrame *ref, const VP56mv *mv,
1180 int x_off, int y_off, int block_w, int block_h,
1181 int width, int height, int linesize,
1182 vp8_mc_func mc_func[3][3])
1183 {
1184 uint8_t *src = ref->data[0];
1185
1186 if (AV_RN32A(mv)) {
1187
1188 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1189 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1190
1191 x_off += mv->x >> 2;
1192 y_off += mv->y >> 2;
1193
1194 // edge emulation
1195 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1196 src += y_off * linesize + x_off;
1197 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1198 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1199 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1200 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1201 x_off - mx_idx, y_off - my_idx, width, height);
1202 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1203 }
1204 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1205 } else {
1206 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1207 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1208 }
1209 }
1210
1211 /**
1212 * chroma MC function
1213 *
1214 * @param s VP8 decoding context
1215 * @param dst1 target buffer for block data at block position (U plane)
1216 * @param dst2 target buffer for block data at block position (V plane)
1217 * @param ref reference picture buffer at origin (0, 0)
1218 * @param mv motion vector (relative to block position) to get pixel data from
1219 * @param x_off horizontal position of block from origin (0, 0)
1220 * @param y_off vertical position of block from origin (0, 0)
1221 * @param block_w width of block (16, 8 or 4)
1222 * @param block_h height of block (always same as block_w)
1223 * @param width width of src/dst plane data
1224 * @param height height of src/dst plane data
1225 * @param linesize size of a single line of plane data, including padding
1226 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1227 */
1228 static av_always_inline
1229 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1230 AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
1231 int block_w, int block_h, int width, int height, int linesize,
1232 vp8_mc_func mc_func[3][3])
1233 {
1234 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1235
1236 if (AV_RN32A(mv)) {
1237 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1238 int my = mv->y&7, my_idx = subpel_idx[0][my];
1239
1240 x_off += mv->x >> 3;
1241 y_off += mv->y >> 3;
1242
1243 // edge emulation
1244 src1 += y_off * linesize + x_off;
1245 src2 += y_off * linesize + x_off;
1246 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1247 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1248 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1249 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1250 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1251 x_off - mx_idx, y_off - my_idx, width, height);
1252 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1253 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1254
1255 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1256 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1257 x_off - mx_idx, y_off - my_idx, width, height);
1258 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1259 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1260 } else {
1261 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1262 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1263 }
1264 } else {
1265 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1266 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1267 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1268 }
1269 }
1270
1271 static av_always_inline
1272 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1273 AVFrame *ref_frame, int x_off, int y_off,
1274 int bx_off, int by_off,
1275 int block_w, int block_h,
1276 int width, int height, VP56mv *mv)
1277 {
1278 VP56mv uvmv = *mv;
1279
1280 /* Y */
1281 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1282 ref_frame, mv, x_off + bx_off, y_off + by_off,
1283 block_w, block_h, width, height, s->linesize,
1284 s->put_pixels_tab[block_w == 8]);
1285
1286 /* U/V */
1287 if (s->profile == 3) {
1288 uvmv.x &= ~7;
1289 uvmv.y &= ~7;
1290 }
1291 x_off >>= 1; y_off >>= 1;
1292 bx_off >>= 1; by_off >>= 1;
1293 width >>= 1; height >>= 1;
1294 block_w >>= 1; block_h >>= 1;
1295 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1296 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1297 &uvmv, x_off + bx_off, y_off + by_off,
1298 block_w, block_h, width, height, s->uvlinesize,
1299 s->put_pixels_tab[1 + (block_w == 4)]);
1300 }
1301
1302 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1303 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1304 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1305 {
1306 /* Don't prefetch refs that haven't been used very often this frame. */
1307 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1308 int x_off = mb_x << 4, y_off = mb_y << 4;
1309 int mx = (mb->mv.x>>2) + x_off + 8;
1310 int my = (mb->mv.y>>2) + y_off;
1311 uint8_t **src= s->framep[ref]->data;
1312 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1313 /* For threading, a ff_thread_await_progress here might be useful, but
1314 * it actually slows down the decoder. Since a bad prefetch doesn't
1315 * generate bad decoder output, we don't run it here. */
1316 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1317 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1318 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1319 }
1320 }
1321
1322 /**
1323 * Apply motion vectors to prediction buffer, chapter 18.
1324 */
1325 static av_always_inline
1326 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1327 VP8Macroblock *mb, int mb_x, int mb_y)
1328 {
1329 int x_off = mb_x << 4, y_off = mb_y << 4;
1330 int width = 16*s->mb_width, height = 16*s->mb_height;
1331 AVFrame *ref = s->framep[mb->ref_frame];
1332 VP56mv *bmv = mb->bmv;
1333
1334 switch (mb->partitioning) {
1335 case VP8_SPLITMVMODE_NONE:
1336 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1337 0, 0, 16, 16, width, height, &mb->mv);
1338 break;
1339 case VP8_SPLITMVMODE_4x4: {
1340 int x, y;
1341 VP56mv uvmv;
1342
1343 /* Y */
1344 for (y = 0; y < 4; y++) {
1345 for (x = 0; x < 4; x++) {
1346 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1347 ref, &bmv[4*y + x],
1348 4*x + x_off, 4*y + y_off, 4, 4,
1349 width, height, s->linesize,
1350 s->put_pixels_tab[2]);
1351 }
1352 }
1353
1354 /* U/V */
1355 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1356 for (y = 0; y < 2; y++) {
1357 for (x = 0; x < 2; x++) {
1358 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1359 mb->bmv[ 2*y * 4 + 2*x+1].x +
1360 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1361 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1362 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1363 mb->bmv[ 2*y * 4 + 2*x+1].y +
1364 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1365 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1366 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1367 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1368 if (s->profile == 3) {
1369 uvmv.x &= ~7;
1370 uvmv.y &= ~7;
1371 }
1372 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1373 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1374 4*x + x_off, 4*y + y_off, 4, 4,
1375 width, height, s->uvlinesize,
1376 s->put_pixels_tab[2]);
1377 }
1378 }
1379 break;
1380 }
1381 case VP8_SPLITMVMODE_16x8:
1382 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1383 0, 0, 16, 8, width, height, &bmv[0]);
1384 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1385 0, 8, 16, 8, width, height, &bmv[1]);
1386 break;
1387 case VP8_SPLITMVMODE_8x16:
1388 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1389 0, 0, 8, 16, width, height, &bmv[0]);
1390 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1391 8, 0, 8, 16, width, height, &bmv[1]);
1392 break;
1393 case VP8_SPLITMVMODE_8x8:
1394 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1395 0, 0, 8, 8, width, height, &bmv[0]);
1396 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1397 8, 0, 8, 8, width, height, &bmv[1]);
1398 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1399 0, 8, 8, 8, width, height, &bmv[2]);
1400 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1401 8, 8, 8, 8, width, height, &bmv[3]);
1402 break;
1403 }
1404 }
1405
1406 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1407 uint8_t *dst[3], VP8Macroblock *mb)
1408 {
1409 int x, y, ch;
1410
1411 if (mb->mode != MODE_I4x4) {
1412 uint8_t *y_dst = dst[0];
1413 for (y = 0; y < 4; y++) {
1414 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1415 if (nnz4) {
1416 if (nnz4&~0x01010101) {
1417 for (x = 0; x < 4; x++) {
1418 if ((uint8_t)nnz4 == 1)
1419 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1420 else if((uint8_t)nnz4 > 1)
1421 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1422 nnz4 >>= 8;
1423 if (!nnz4)
1424 break;
1425 }
1426 } else {
1427 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1428 }
1429 }
1430 y_dst += 4*s->linesize;
1431 }
1432 }
1433
1434 for (ch = 0; ch < 2; ch++) {
1435 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1436 if (nnz4) {
1437 uint8_t *ch_dst = dst[1+ch];
1438 if (nnz4&~0x01010101) {
1439 for (y = 0; y < 2; y++) {
1440 for (x = 0; x < 2; x++) {
1441 if ((uint8_t)nnz4 == 1)
1442 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1443 else if((uint8_t)nnz4 > 1)
1444 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1445 nnz4 >>= 8;
1446 if (!nnz4)
1447 goto chroma_idct_end;
1448 }
1449 ch_dst += 4*s->uvlinesize;
1450 }
1451 } else {
1452 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1453 }
1454 }
1455 chroma_idct_end: ;
1456 }
1457 }
1458
1459 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1460 {
1461 int interior_limit, filter_level;
1462
1463 if (s->segmentation.enabled) {
1464 filter_level = s->segmentation.filter_level[mb->segment];
1465 if (!s->segmentation.absolute_vals)
1466 filter_level += s->filter.level;
1467 } else
1468 filter_level = s->filter.level;
1469
1470 if (s->lf_delta.enabled) {
1471 filter_level += s->lf_delta.ref[mb->ref_frame];
1472 filter_level += s->lf_delta.mode[mb->mode];
1473 }
1474
1475 filter_level = av_clip_uintp2(filter_level, 6);
1476
1477 interior_limit = filter_level;
1478 if (s->filter.sharpness) {
1479 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1480 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1481 }
1482 interior_limit = FFMAX(interior_limit, 1);
1483
1484 f->filter_level = filter_level;
1485 f->inner_limit = interior_limit;
1486 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1487 }
1488
1489 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1490 {
1491 int mbedge_lim, bedge_lim, hev_thresh;
1492 int filter_level = f->filter_level;
1493 int inner_limit = f->inner_limit;
1494 int inner_filter = f->inner_filter;
1495 int linesize = s->linesize;
1496 int uvlinesize = s->uvlinesize;
1497 static const uint8_t hev_thresh_lut[2][64] = {
1498 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1499 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1500 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1501 3, 3, 3, 3 },
1502 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1503 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1504 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1505 2, 2, 2, 2 }
1506 };
1507
1508 if (!filter_level)
1509 return;
1510
1511 bedge_lim = 2*filter_level + inner_limit;
1512 mbedge_lim = bedge_lim + 4;
1513
1514 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1515
1516 if (mb_x) {
1517 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1518 mbedge_lim, inner_limit, hev_thresh);
1519 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1520 mbedge_lim, inner_limit, hev_thresh);
1521 }
1522
1523 if (inner_filter) {
1524 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1525 inner_limit, hev_thresh);
1526 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1527 inner_limit, hev_thresh);
1528 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1529 inner_limit, hev_thresh);
1530 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1531 uvlinesize, bedge_lim,
1532 inner_limit, hev_thresh);
1533 }
1534
1535 if (mb_y) {
1536 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1537 mbedge_lim, inner_limit, hev_thresh);
1538 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1539 mbedge_lim, inner_limit, hev_thresh);
1540 }
1541
1542 if (inner_filter) {
1543 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1544 linesize, bedge_lim,
1545 inner_limit, hev_thresh);
1546 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1547 linesize, bedge_lim,
1548 inner_limit, hev_thresh);
1549 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1550 linesize, bedge_lim,
1551 inner_limit, hev_thresh);
1552 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1553 dst[2] + 4 * uvlinesize,
1554 uvlinesize, bedge_lim,
1555 inner_limit, hev_thresh);
1556 }
1557 }
1558
1559 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1560 {
1561 int mbedge_lim, bedge_lim;
1562 int filter_level = f->filter_level;
1563 int inner_limit = f->inner_limit;
1564 int inner_filter = f->inner_filter;
1565 int linesize = s->linesize;
1566
1567 if (!filter_level)
1568 return;
1569
1570 bedge_lim = 2*filter_level + inner_limit;
1571 mbedge_lim = bedge_lim + 4;
1572
1573 if (mb_x)
1574 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1575 if (inner_filter) {
1576 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1577 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1578 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1579 }
1580
1581 if (mb_y)
1582 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1583 if (inner_filter) {
1584 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1585 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1586 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1587 }
1588 }
1589
1590 static void release_queued_segmaps(VP8Context *s, int is_close)
1591 {
1592 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1593 while (s->num_maps_to_be_freed > leave_behind)
1594 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1595 s->maps_are_invalid = 0;
1596 }
1597
1598 #define MARGIN (16 << 2)
1599 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
1600 AVFrame *prev_frame)
1601 {
1602 VP8Context *s = avctx->priv_data;
1603 int mb_x, mb_y;
1604
1605 s->mv_min.y = -MARGIN;
1606 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1607 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1608 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1609 int mb_xy = mb_y*s->mb_width;
1610
1611 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1612
1613 s->mv_min.x = -MARGIN;
1614 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1615 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1616 if (mb_y == 0)
1617 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1618 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1619 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
1620 s->mv_min.x -= 64;
1621 s->mv_max.x -= 64;
1622 }
1623 s->mv_min.y -= 64;
1624 s->mv_max.y -= 64;
1625 }
1626 }
1627
1628 #if HAVE_THREADS
1629 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1630 do {\
1631 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1632 if (otd->thread_mb_pos < tmp) {\
1633 pthread_mutex_lock(&otd->lock);\
1634 td->wait_mb_pos = tmp;\
1635 do {\
1636 if (otd->thread_mb_pos >= tmp)\
1637 break;\
1638 pthread_cond_wait(&otd->cond, &otd->lock);\
1639 } while (1);\
1640 td->wait_mb_pos = INT_MAX;\
1641 pthread_mutex_unlock(&otd->lock);\
1642 }\
1643 } while(0);
1644
1645 #define update_pos(td, mb_y, mb_x)\
1646 do {\
1647 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1648 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1649 int is_null = (next_td == NULL) || (prev_td == NULL);\
1650 int pos_check = (is_null) ? 1 :\
1651 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1652 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1653 td->thread_mb_pos = pos;\
1654 if (sliced_threading && pos_check) {\
1655 pthread_mutex_lock(&td->lock);\
1656 pthread_cond_broadcast(&td->cond);\
1657 pthread_mutex_unlock(&td->lock);\
1658 }\
1659 } while(0);
1660 #else
1661 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1662 #define update_pos(td, mb_y, mb_x)
1663 #endif
1664
1665 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1666 int jobnr, int threadnr)
1667 {
1668 VP8Context *s = avctx->priv_data;
1669 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1670 int mb_y = td->thread_mb_pos>>16;
1671 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1672 int num_jobs = s->num_jobs;
1673 AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
1674 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1675 VP8Macroblock *mb;
1676 uint8_t *dst[3] = {
1677 curframe->data[0] + 16*mb_y*s->linesize,
1678 curframe->data[1] + 8*mb_y*s->uvlinesize,
1679 curframe->data[2] + 8*mb_y*s->uvlinesize
1680 };
1681 if (mb_y == 0) prev_td = td;
1682 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1683 if (mb_y == s->mb_height-1) next_td = td;
1684 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1685 if (s->mb_layout == 1)
1686 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1687 else {
1688 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1689 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1690 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1691 }
1692
1693 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1694 // left edge of 129 for intra prediction
1695 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1696 for (i = 0; i < 3; i++)
1697 for (y = 0; y < 16>>!!i; y++)
1698 dst[i][y*curframe->linesize[i]-1] = 129;
1699 if (mb_y == 1) {
1700 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1701 }
1702 }
1703
1704 s->mv_min.x = -MARGIN;
1705 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1706
1707 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1708 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1709 if (prev_td != td) {
1710 if (threadnr != 0) {
1711 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1712 } else {
1713 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1714 }
1715 }
1716
1717 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1718 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1719
1720 if (!s->mb_layout)
1721 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1722 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
1723
1724 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1725
1726 if (!mb->skip)
1727 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1728
1729 if (mb->mode <= MODE_I4x4)
1730 intra_predict(s, td, dst, mb, mb_x, mb_y);
1731 else
1732 inter_predict(s, td, dst, mb, mb_x, mb_y);
1733
1734 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1735
1736 if (!mb->skip) {
1737 idct_mb(s, td, dst, mb);
1738 } else {
1739 AV_ZERO64(td->left_nnz);
1740 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1741
1742 // Reset DC block predictors if they would exist if the mb had coefficients
1743 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1744 td->left_nnz[8] = 0;
1745 s->top_nnz[mb_x][8] = 0;
1746 }
1747 }
1748
1749 if (s->deblock_filter)
1750 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1751
1752 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1753 if (s->filter.simple)
1754 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1755 else
1756 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1757 }
1758
1759 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1760
1761 dst[0] += 16;
1762 dst[1] += 8;
1763 dst[2] += 8;
1764 s->mv_min.x -= 64;
1765 s->mv_max.x -= 64;
1766
1767 if (mb_x == s->mb_width+1) {
1768 update_pos(td, mb_y, s->mb_width+3);
1769 } else {
1770 update_pos(td, mb_y, mb_x);
1771 }
1772 }
1773 }
1774
1775 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1776 int jobnr, int threadnr)
1777 {
1778 VP8Context *s = avctx->priv_data;
1779 VP8ThreadData *td = &s->thread_data[threadnr];
1780 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1781 AVFrame *curframe = s->curframe;
1782 VP8Macroblock *mb;
1783 VP8ThreadData *prev_td, *next_td;
1784 uint8_t *dst[3] = {
1785 curframe->data[0] + 16*mb_y*s->linesize,
1786 curframe->data[1] + 8*mb_y*s->uvlinesize,
1787 curframe->data[2] + 8*mb_y*s->uvlinesize
1788 };
1789
1790 if (s->mb_layout == 1)
1791 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1792 else
1793 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1794
1795 if (mb_y == 0) prev_td = td;
1796 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1797 if (mb_y == s->mb_height-1) next_td = td;
1798 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1799
1800 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1801 VP8FilterStrength *f = &td->filter_strength[mb_x];
1802 if (prev_td != td) {
1803 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1804 }
1805 if (next_td != td)
1806 if (next_td != &s->thread_data[0]) {
1807 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1808 }
1809
1810 if (num_jobs == 1) {
1811 if (s->filter.simple)
1812 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1813 else
1814 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1815 }
1816
1817 if (s->filter.simple)
1818 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1819 else
1820 filter_mb(s, dst, f, mb_x, mb_y);
1821 dst[0] += 16;
1822 dst[1] += 8;
1823 dst[2] += 8;
1824
1825 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1826 }
1827 }
1828
1829 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1830 int jobnr, int threadnr)
1831 {
1832 VP8Context *s = avctx->priv_data;
1833 VP8ThreadData *td = &s->thread_data[jobnr];
1834 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1835 AVFrame *curframe = s->curframe;
1836 int mb_y, num_jobs = s->num_jobs;
1837 td->thread_nr = threadnr;
1838 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1839 if (mb_y >= s->mb_height) break;
1840 td->thread_mb_pos = mb_y<<16;
1841 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1842 if (s->deblock_filter)
1843 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1844 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1845
1846 s->mv_min.y -= 64;
1847 s->mv_max.y -= 64;
1848
1849 if (avctx->active_thread_type == FF_THREAD_FRAME)
1850 ff_thread_report_progress(curframe, mb_y, 0);
1851 }
1852
1853 return 0;
1854 }
1855
1856 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1857 AVPacket *avpkt)
1858 {
1859 VP8Context *s = avctx->priv_data;
1860 int ret, i, referenced, num_jobs;
1861 enum AVDiscard skip_thresh;
1862 AVFrame *av_uninit(curframe), *prev_frame;
1863
1864 release_queued_segmaps(s, 0);
1865
1866 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1867 goto err;
1868
1869 prev_frame = s->framep[VP56_FRAME_CURRENT];
1870
1871 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1872 || s->update_altref == VP56_FRAME_CURRENT;
1873
1874 skip_thresh = !referenced ? AVDISCARD_NONREF :
1875 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1876
1877 if (avctx->skip_frame >= skip_thresh) {
1878 s->invisible = 1;
1879 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1880 goto skip_decode;
1881 }
1882 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1883
1884 // release no longer referenced frames
1885 for (i = 0; i < 5; i++)
1886 if (s->frames[i].data[0] &&
1887 &s->frames[i] != prev_frame &&
1888 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1889 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1890 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1891 vp8_release_frame(s, &s->frames[i], 1, 0);
1892
1893 // find a free buffer
1894 for (i = 0; i < 5; i++)
1895 if (&s->frames[i] != prev_frame &&
1896 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1897 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1898 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1899 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1900 break;
1901 }
1902 if (i == 5) {
1903 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1904 abort();
1905 }
1906 if (curframe->data[0])
1907 vp8_release_frame(s, curframe, 1, 0);
1908
1909 // Given that arithmetic probabilities are updated every frame, it's quite likely
1910 // that the values we have on a random interframe are complete junk if we didn't
1911 // start decode on a keyframe. So just don't display anything rather than junk.
1912 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1913 !s->framep[VP56_FRAME_GOLDEN] ||
1914 !s->framep[VP56_FRAME_GOLDEN2])) {
1915 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1916 ret = AVERROR_INVALIDDATA;
1917 goto err;
1918 }
1919
1920 curframe->key_frame = s->keyframe;
1921 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1922 curframe->reference = referenced ? 3 : 0;
1923 if ((ret = vp8_alloc_frame(s, curframe))) {
1924 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1925 goto err;
1926 }
1927
1928 // check if golden and altref are swapped
1929 if (s->update_altref != VP56_FRAME_NONE) {
1930 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1931 } else {
1932 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1933 }
1934 if (s->update_golden != VP56_FRAME_NONE) {
1935 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1936 } else {
1937 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1938 }
1939 if (s->update_last) {
1940 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1941 } else {
1942 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1943 }
1944 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1945
1946 ff_thread_finish_setup(avctx);
1947
1948 s->linesize = curframe->linesize[0];
1949 s->uvlinesize = curframe->linesize[1];
1950
1951 if (!s->thread_data[0].edge_emu_buffer)
1952 for (i = 0; i < MAX_THREADS; i++)
1953 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1954
1955 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1956 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1957 if (!s->mb_layout)
1958 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1959 if (!s->mb_layout && s->keyframe)
1960 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1961
1962 // top edge of 127 for intra prediction
1963 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1964 s->top_border[0][15] = s->top_border[0][23] = 127;
1965 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1966 }
1967 memset(s->ref_count, 0, sizeof(s->ref_count));
1968
1969
1970 // Make sure the previous frame has read its segmentation map,
1971 // if we re-use the same map.
1972 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1973 ff_thread_await_progress(prev_frame, 1, 0);
1974
1975 if (s->mb_layout == 1)
1976 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1977
1978 if (avctx->active_thread_type == FF_THREAD_FRAME)
1979 num_jobs = 1;
1980 else
1981 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1982 s->num_jobs = num_jobs;
1983 s->curframe = curframe;
1984 s->prev_frame = prev_frame;
1985 s->mv_min.y = -MARGIN;
1986 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1987 for (i = 0; i < MAX_THREADS; i++) {
1988 s->thread_data[i].thread_mb_pos = 0;
1989 s->thread_data[i].wait_mb_pos = INT_MAX;
1990 }
1991 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1992
1993 ff_thread_report_progress(curframe, INT_MAX, 0);
1994 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1995
1996 skip_decode:
1997 // if future frames don't use the updated probabilities,
1998 // reset them to the values we saved
1999 if (!s->update_probabilities)
2000 s->prob[0] = s->prob[1];
2001
2002 if (!s->invisible) {
2003 *(AVFrame*)data = *curframe;
2004 *data_size = sizeof(AVFrame);
2005 }
2006
2007 return avpkt->size;
2008 err:
2009 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2010 return ret;
2011 }
2012
2013 static av_cold int vp8_decode_init(AVCodecContext *avctx)
2014 {
2015 VP8Context *s = avctx->priv_data;
2016
2017 s->avctx = avctx;
2018 avctx->pix_fmt = PIX_FMT_YUV420P;
2019
2020 ff_dsputil_init(&s->dsp, avctx);
2021 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
2022 ff_vp8dsp_init(&s->vp8dsp);
2023
2024 return 0;
2025 }
2026
2027 static av_cold int vp8_decode_free(AVCodecContext *avctx)
2028 {
2029 vp8_decode_flush_impl(avctx, 0, 1, 1);
2030 release_queued_segmaps(avctx->priv_data, 1);
2031 return 0;
2032 }
2033
2034 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2035 {
2036 VP8Context *s = avctx->priv_data;
2037
2038 s->avctx = avctx;
2039
2040 return 0;
2041 }
2042
2043 #define REBASE(pic) \
2044 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2045
2046 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2047 {
2048 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2049
2050 if (s->macroblocks_base &&
2051 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2052 free_buffers(s);
2053 s->maps_are_invalid = 1;
2054 s->mb_width = s_src->mb_width;
2055 s->mb_height = s_src->mb_height;
2056 }
2057
2058 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2059 s->segmentation = s_src->segmentation;
2060 s->lf_delta = s_src->lf_delta;
2061 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2062
2063 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
2064 s->framep[0] = REBASE(s_src->next_framep[0]);
2065 s->framep[1] = REBASE(s_src->next_framep[1]);
2066 s->framep[2] = REBASE(s_src->next_framep[2]);
2067 s->framep[3] = REBASE(s_src->next_framep[3]);
2068
2069 return 0;
2070 }
2071
2072 AVCodec ff_vp8_decoder = {
2073 .name = "vp8",
2074 .type = AVMEDIA_TYPE_VIDEO,
2075 .id = CODEC_ID_VP8,
2076 .priv_data_size = sizeof(VP8Context),
2077 .init = vp8_decode_init,
2078 .close = vp8_decode_free,
2079 .decode = vp8_decode_frame,
2080 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2081 .flush = vp8_decode_flush,
2082 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2083 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2084 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2085 };