Commit | Line | Data |
---|---|---|
32f3c541 | 1 | /* |
3b636f21 DC |
2 | * VP8 compatible video decoder |
3 | * | |
4 | * Copyright (C) 2010 David Conrad | |
5 | * Copyright (C) 2010 Ronald S. Bultje | |
13a1304b | 6 | * Copyright (C) 2010 Jason Garrett-Glaser |
951455c1 | 7 | * Copyright (C) 2012 Daniel Kang |
3b636f21 | 8 | * |
2912e87a | 9 | * This file is part of Libav. |
3b636f21 | 10 | * |
2912e87a | 11 | * Libav is free software; you can redistribute it and/or |
3b636f21 DC |
12 | * modify it under the terms of the GNU Lesser General Public |
13 | * License as published by the Free Software Foundation; either | |
14 | * version 2.1 of the License, or (at your option) any later version. | |
15 | * | |
2912e87a | 16 | * Libav is distributed in the hope that it will be useful, |
3b636f21 DC |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 | * Lesser General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU Lesser General Public | |
2912e87a | 22 | * License along with Libav; if not, write to the Free Software |
3b636f21 DC |
23 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
24 | */ | |
25 | ||
737eb597 | 26 | #include "libavutil/imgutils.h" |
3b636f21 | 27 | #include "avcodec.h" |
f3a29b75 | 28 | #include "internal.h" |
bcf4568f | 29 | #include "vp8.h" |
3b636f21 | 30 | #include "vp8data.h" |
3b636f21 | 31 | #include "rectangle.h" |
4773d904 | 32 | #include "thread.h" |
3b636f21 | 33 | |
a7878c9f MR |
34 | #if ARCH_ARM |
35 | # include "arm/vp8.h" | |
36 | #endif | |
37 | ||
56535793 RB |
38 | static void free_buffers(VP8Context *s) |
39 | { | |
951455c1 DK |
40 | int i; |
41 | if (s->thread_data) | |
42 | for (i = 0; i < MAX_THREADS; i++) { | |
43 | av_freep(&s->thread_data[i].filter_strength); | |
44 | av_freep(&s->thread_data[i].edge_emu_buffer); | |
45 | } | |
46 | av_freep(&s->thread_data); | |
56535793 | 47 | av_freep(&s->macroblocks_base); |
56535793 RB |
48 | av_freep(&s->intra4x4_pred_mode_top); |
49 | av_freep(&s->top_nnz); | |
56535793 | 50 | av_freep(&s->top_border); |
56535793 RB |
51 | |
52 | s->macroblocks = NULL; | |
53 | } | |
54 | ||
ce42a048 RB |
55 | static int vp8_alloc_frame(VP8Context *s, AVFrame *f) |
56 | { | |
57 | int ret; | |
58 | if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0) | |
59 | return ret; | |
e02dec25 | 60 | if (s->num_maps_to_be_freed && !s->maps_are_invalid) { |
ce42a048 RB |
61 | f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed]; |
62 | } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) { | |
63 | ff_thread_release_buffer(s->avctx, f); | |
64 | return AVERROR(ENOMEM); | |
65 | } | |
66 | return 0; | |
67 | } | |
68 | ||
bfa0f965 | 69 | static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free) |
ce42a048 | 70 | { |
bfa0f965 RB |
71 | if (f->ref_index[0]) { |
72 | if (prefer_delayed_free) { | |
73 | /* Upon a size change, we want to free the maps but other threads may still | |
74 | * be using them, so queue them. Upon a seek, all threads are inactive so | |
75 | * we want to cache one to prevent re-allocation in the next decoding | |
76 | * iteration, but the rest we can free directly. */ | |
77 | int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps); | |
78 | if (s->num_maps_to_be_freed < max_queued_maps) { | |
79 | s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0]; | |
80 | } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ { | |
81 | av_free(f->ref_index[0]); | |
82 | } /* else: MEMLEAK (should never happen, but better that than crash) */ | |
ce42a048 | 83 | f->ref_index[0] = NULL; |
bfa0f965 RB |
84 | } else /* vp8_decode_free() */ { |
85 | av_free(f->ref_index[0]); | |
ce42a048 | 86 | } |
ce42a048 RB |
87 | } |
88 | ff_thread_release_buffer(s->avctx, f); | |
89 | } | |
90 | ||
bfa0f965 RB |
91 | static void vp8_decode_flush_impl(AVCodecContext *avctx, |
92 | int prefer_delayed_free, int can_direct_free, int free_mem) | |
3b636f21 DC |
93 | { |
94 | VP8Context *s = avctx->priv_data; | |
95 | int i; | |
96 | ||
f3a29b75 | 97 | if (!avctx->internal->is_copy) { |
4773d904 RB |
98 | for (i = 0; i < 5; i++) |
99 | if (s->frames[i].data[0]) | |
bfa0f965 | 100 | vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free); |
4773d904 | 101 | } |
3b636f21 DC |
102 | memset(s->framep, 0, sizeof(s->framep)); |
103 | ||
bfa0f965 RB |
104 | if (free_mem) { |
105 | free_buffers(s); | |
106 | s->maps_are_invalid = 1; | |
107 | } | |
ce42a048 RB |
108 | } |
109 | ||
110 | static void vp8_decode_flush(AVCodecContext *avctx) | |
111 | { | |
bfa0f965 | 112 | vp8_decode_flush_impl(avctx, 1, 1, 0); |
3b636f21 DC |
113 | } |
114 | ||
115 | static int update_dimensions(VP8Context *s, int width, int height) | |
116 | { | |
951455c1 DK |
117 | AVCodecContext *avctx = s->avctx; |
118 | int i; | |
119 | ||
4773d904 RB |
120 | if (width != s->avctx->width || |
121 | height != s->avctx->height) { | |
122 | if (av_image_check_size(width, height, 0, s->avctx)) | |
123 | return AVERROR_INVALIDDATA; | |
3b636f21 | 124 | |
bfa0f965 | 125 | vp8_decode_flush_impl(s->avctx, 1, 0, 1); |
3b636f21 | 126 | |
4773d904 RB |
127 | avcodec_set_dimensions(s->avctx, width, height); |
128 | } | |
3b636f21 DC |
129 | |
130 | s->mb_width = (s->avctx->coded_width +15) / 16; | |
131 | s->mb_height = (s->avctx->coded_height+15) / 16; | |
132 | ||
951455c1 DK |
133 | s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1); |
134 | if (!s->mb_layout) { // Frame threading and one thread | |
135 | s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks)); | |
136 | s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4); | |
137 | } | |
138 | else // Sliced threading | |
139 | s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks)); | |
140 | s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); | |
141 | s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); | |
142 | s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData)); | |
3b636f21 | 143 | |
951455c1 DK |
144 | for (i = 0; i < MAX_THREADS; i++) { |
145 | s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength)); | |
25f056e6 | 146 | #if HAVE_THREADS |
951455c1 DK |
147 | pthread_mutex_init(&s->thread_data[i].lock, NULL); |
148 | pthread_cond_init(&s->thread_data[i].cond, NULL); | |
25f056e6 | 149 | #endif |
951455c1 DK |
150 | } |
151 | ||
152 | if (!s->macroblocks_base || !s->top_nnz || !s->top_border || | |
153 | (!s->intra4x4_pred_mode_top && !s->mb_layout)) | |
b6c420ce DC |
154 | return AVERROR(ENOMEM); |
155 | ||
c55e0d34 | 156 | s->macroblocks = s->macroblocks_base + 1; |
3b636f21 DC |
157 | |
158 | return 0; | |
159 | } | |
160 | ||
161 | static void parse_segment_info(VP8Context *s) | |
162 | { | |
163 | VP56RangeCoder *c = &s->c; | |
164 | int i; | |
165 | ||
166 | s->segmentation.update_map = vp8_rac_get(c); | |
167 | ||
168 | if (vp8_rac_get(c)) { // update segment feature data | |
169 | s->segmentation.absolute_vals = vp8_rac_get(c); | |
170 | ||
171 | for (i = 0; i < 4; i++) | |
172 | s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); | |
173 | ||
174 | for (i = 0; i < 4; i++) | |
175 | s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); | |
176 | } | |
177 | if (s->segmentation.update_map) | |
178 | for (i = 0; i < 3; i++) | |
179 | s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; | |
180 | } | |
181 | ||
182 | static void update_lf_deltas(VP8Context *s) | |
183 | { | |
184 | VP56RangeCoder *c = &s->c; | |
185 | int i; | |
186 | ||
14ba7472 JS |
187 | for (i = 0; i < 4; i++) { |
188 | if (vp8_rac_get(c)) { | |
189 | s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6); | |
3b636f21 | 190 | |
14ba7472 JS |
191 | if (vp8_rac_get(c)) |
192 | s->lf_delta.ref[i] = -s->lf_delta.ref[i]; | |
193 | } | |
194 | } | |
195 | ||
196 | for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) { | |
197 | if (vp8_rac_get(c)) { | |
198 | s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6); | |
199 | ||
200 | if (vp8_rac_get(c)) | |
201 | s->lf_delta.mode[i] = -s->lf_delta.mode[i]; | |
202 | } | |
203 | } | |
3b636f21 DC |
204 | } |
205 | ||
206 | static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) | |
207 | { | |
208 | const uint8_t *sizes = buf; | |
209 | int i; | |
210 | ||
211 | s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); | |
212 | ||
213 | buf += 3*(s->num_coeff_partitions-1); | |
214 | buf_size -= 3*(s->num_coeff_partitions-1); | |
215 | if (buf_size < 0) | |
216 | return -1; | |
217 | ||
218 | for (i = 0; i < s->num_coeff_partitions-1; i++) { | |
06d50ca8 | 219 | int size = AV_RL24(sizes + 3*i); |
3b636f21 DC |
220 | if (buf_size - size < 0) |
221 | return -1; | |
222 | ||
905ef0d0 | 223 | ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size); |
3b636f21 DC |
224 | buf += size; |
225 | buf_size -= size; | |
226 | } | |
905ef0d0 | 227 | ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); |
3b636f21 DC |
228 | |
229 | return 0; | |
230 | } | |
231 | ||
232 | static void get_quants(VP8Context *s) | |
233 | { | |
234 | VP56RangeCoder *c = &s->c; | |
235 | int i, base_qi; | |
236 | ||
237 | int yac_qi = vp8_rac_get_uint(c, 7); | |
238 | int ydc_delta = vp8_rac_get_sint(c, 4); | |
239 | int y2dc_delta = vp8_rac_get_sint(c, 4); | |
240 | int y2ac_delta = vp8_rac_get_sint(c, 4); | |
241 | int uvdc_delta = vp8_rac_get_sint(c, 4); | |
242 | int uvac_delta = vp8_rac_get_sint(c, 4); | |
243 | ||
244 | for (i = 0; i < 4; i++) { | |
245 | if (s->segmentation.enabled) { | |
246 | base_qi = s->segmentation.base_quant[i]; | |
247 | if (!s->segmentation.absolute_vals) | |
248 | base_qi += yac_qi; | |
249 | } else | |
250 | base_qi = yac_qi; | |
251 | ||
42761122 MR |
252 | s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)]; |
253 | s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)]; | |
254 | s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)]; | |
255 | s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100; | |
256 | s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)]; | |
257 | s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)]; | |
a8ab0ccc PM |
258 | |
259 | s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); | |
260 | s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); | |
3b636f21 DC |
261 | } |
262 | } | |
263 | ||
264 | /** | |
265 | * Determine which buffers golden and altref should be updated with after this frame. | |
266 | * The spec isn't clear here, so I'm going by my understanding of what libvpx does | |
267 | * | |
268 | * Intra frames update all 3 references | |
269 | * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set | |
270 | * If the update (golden|altref) flag is set, it's updated with the current frame | |
271 | * if update_last is set, and VP56_FRAME_PREVIOUS otherwise. | |
272 | * If the flag is not set, the number read means: | |
273 | * 0: no update | |
274 | * 1: VP56_FRAME_PREVIOUS | |
275 | * 2: update golden with altref, or update altref with golden | |
276 | */ | |
277 | static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) | |
278 | { | |
279 | VP56RangeCoder *c = &s->c; | |
280 | ||
281 | if (update) | |
282 | return VP56_FRAME_CURRENT; | |
283 | ||
284 | switch (vp8_rac_get_uint(c, 2)) { | |
285 | case 1: | |
286 | return VP56_FRAME_PREVIOUS; | |
287 | case 2: | |
288 | return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN; | |
289 | } | |
290 | return VP56_FRAME_NONE; | |
291 | } | |
292 | ||
293 | static void update_refs(VP8Context *s) | |
294 | { | |
295 | VP56RangeCoder *c = &s->c; | |
296 | ||
297 | int update_golden = vp8_rac_get(c); | |
298 | int update_altref = vp8_rac_get(c); | |
299 | ||
300 | s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); | |
301 | s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); | |
302 | } | |
303 | ||
304 | static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) | |
305 | { | |
306 | VP56RangeCoder *c = &s->c; | |
370b622a | 307 | int header_size, hscale, vscale, i, j, k, l, m, ret; |
3b636f21 DC |
308 | int width = s->avctx->width; |
309 | int height = s->avctx->height; | |
310 | ||
311 | s->keyframe = !(buf[0] & 1); | |
312 | s->profile = (buf[0]>>1) & 7; | |
313 | s->invisible = !(buf[0] & 0x10); | |
06d50ca8 | 314 | header_size = AV_RL24(buf) >> 5; |
3b636f21 DC |
315 | buf += 3; |
316 | buf_size -= 3; | |
317 | ||
0ef1dbed DC |
318 | if (s->profile > 3) |
319 | av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); | |
320 | ||
321 | if (!s->profile) | |
322 | memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); | |
323 | else // profile 1-3 use bilinear, 4+ aren't defined so whatever | |
324 | memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab)); | |
3b636f21 DC |
325 | |
326 | if (header_size > buf_size - 7*s->keyframe) { | |
327 | av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); | |
328 | return AVERROR_INVALIDDATA; | |
329 | } | |
330 | ||
331 | if (s->keyframe) { | |
06d50ca8 JGG |
332 | if (AV_RL24(buf) != 0x2a019d) { |
333 | av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf)); | |
3b636f21 DC |
334 | return AVERROR_INVALIDDATA; |
335 | } | |
336 | width = AV_RL16(buf+3) & 0x3fff; | |
337 | height = AV_RL16(buf+5) & 0x3fff; | |
338 | hscale = buf[4] >> 6; | |
339 | vscale = buf[6] >> 6; | |
340 | buf += 7; | |
341 | buf_size -= 7; | |
342 | ||
92a54426 MR |
343 | if (hscale || vscale) |
344 | av_log_missing_feature(s->avctx, "Upscaling", 1); | |
345 | ||
3b636f21 | 346 | s->update_golden = s->update_altref = VP56_FRAME_CURRENT; |
370b622a JGG |
347 | for (i = 0; i < 4; i++) |
348 | for (j = 0; j < 16; j++) | |
349 | memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], | |
350 | sizeof(s->prob->token[i][j])); | |
3b636f21 DC |
351 | memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16)); |
352 | memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c)); | |
353 | memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc)); | |
354 | memset(&s->segmentation, 0, sizeof(s->segmentation)); | |
355 | } | |
356 | ||
905ef0d0 | 357 | ff_vp56_init_range_decoder(c, buf, header_size); |
3b636f21 DC |
358 | buf += header_size; |
359 | buf_size -= header_size; | |
360 | ||
361 | if (s->keyframe) { | |
362 | if (vp8_rac_get(c)) | |
363 | av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); | |
364 | vp8_rac_get(c); // whether we can skip clamping in dsp functions | |
365 | } | |
366 | ||
367 | if ((s->segmentation.enabled = vp8_rac_get(c))) | |
368 | parse_segment_info(s); | |
369 | else | |
370 | s->segmentation.update_map = 0; // FIXME: move this to some init function? | |
371 | ||
372 | s->filter.simple = vp8_rac_get(c); | |
373 | s->filter.level = vp8_rac_get_uint(c, 6); | |
374 | s->filter.sharpness = vp8_rac_get_uint(c, 3); | |
375 | ||
376 | if ((s->lf_delta.enabled = vp8_rac_get(c))) | |
377 | if (vp8_rac_get(c)) | |
378 | update_lf_deltas(s); | |
379 | ||
380 | if (setup_partitions(s, buf, buf_size)) { | |
381 | av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); | |
382 | return AVERROR_INVALIDDATA; | |
383 | } | |
384 | ||
951455c1 DK |
385 | if (!s->macroblocks_base || /* first frame */ |
386 | width != s->avctx->width || height != s->avctx->height) { | |
387 | if ((ret = update_dimensions(s, width, height)) < 0) | |
388 | return ret; | |
389 | } | |
390 | ||
3b636f21 DC |
391 | get_quants(s); |
392 | ||
393 | if (!s->keyframe) { | |
394 | update_refs(s); | |
395 | s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); | |
396 | s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c); | |
397 | } | |
398 | ||
399 | // if we aren't saving this frame's probabilities for future frames, | |
400 | // make a copy of the current probabilities | |
401 | if (!(s->update_probabilities = vp8_rac_get(c))) | |
402 | s->prob[1] = s->prob[0]; | |
403 | ||
404 | s->update_last = s->keyframe || vp8_rac_get(c); | |
405 | ||
406 | for (i = 0; i < 4; i++) | |
407 | for (j = 0; j < 8; j++) | |
408 | for (k = 0; k < 3; k++) | |
409 | for (l = 0; l < NUM_DCT_TOKENS-1; l++) | |
370b622a JGG |
410 | if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) { |
411 | int prob = vp8_rac_get_uint(c, 8); | |
b0d58795 JGG |
412 | for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++) |
413 | s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob; | |
370b622a | 414 | } |
3b636f21 DC |
415 | |
416 | if ((s->mbskip_enabled = vp8_rac_get(c))) | |
a8ab0ccc | 417 | s->prob->mbskip = vp8_rac_get_uint(c, 8); |
3b636f21 DC |
418 | |
419 | if (!s->keyframe) { | |
a8ab0ccc PM |
420 | s->prob->intra = vp8_rac_get_uint(c, 8); |
421 | s->prob->last = vp8_rac_get_uint(c, 8); | |
422 | s->prob->golden = vp8_rac_get_uint(c, 8); | |
3b636f21 DC |
423 | |
424 | if (vp8_rac_get(c)) | |
425 | for (i = 0; i < 4; i++) | |
426 | s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8); | |
427 | if (vp8_rac_get(c)) | |
428 | for (i = 0; i < 3; i++) | |
429 | s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8); | |
430 | ||
431 | // 17.2 MV probability update | |
432 | for (i = 0; i < 2; i++) | |
433 | for (j = 0; j < 19; j++) | |
7697cdcf | 434 | if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j])) |
3b636f21 DC |
435 | s->prob->mvc[i][j] = vp8_rac_get_nn(c); |
436 | } | |
437 | ||
438 | return 0; | |
439 | } | |
440 | ||
7634771e | 441 | static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src) |
3b636f21 | 442 | { |
7634771e JGG |
443 | dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x); |
444 | dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y); | |
3b636f21 DC |
445 | } |
446 | ||
3b636f21 DC |
447 | /** |
448 | * Motion vector coding, 17.1. | |
449 | */ | |
450 | static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) | |
451 | { | |
ca18a478 | 452 | int bit, x = 0; |
3b636f21 | 453 | |
7697cdcf | 454 | if (vp56_rac_get_prob_branchy(c, p[0])) { |
3b636f21 DC |
455 | int i; |
456 | ||
457 | for (i = 0; i < 3; i++) | |
458 | x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
459 | for (i = 9; i > 3; i--) | |
460 | x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
461 | if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12])) | |
462 | x += 8; | |
ca18a478 DC |
463 | } else { |
464 | // small_mvtree | |
465 | const uint8_t *ps = p+2; | |
466 | bit = vp56_rac_get_prob(c, *ps); | |
467 | ps += 1 + 3*bit; | |
468 | x += 4*bit; | |
469 | bit = vp56_rac_get_prob(c, *ps); | |
470 | ps += 1 + bit; | |
471 | x += 2*bit; | |
472 | x += vp56_rac_get_prob(c, *ps); | |
473 | } | |
3b636f21 DC |
474 | |
475 | return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; | |
476 | } | |
477 | ||
414ac27d JGG |
478 | static av_always_inline |
479 | const uint8_t *get_submv_prob(uint32_t left, uint32_t top) | |
3b636f21 | 480 | { |
7bf254c4 JGG |
481 | if (left == top) |
482 | return vp8_submv_prob[4-!!left]; | |
483 | if (!top) | |
3b636f21 | 484 | return vp8_submv_prob[2]; |
7bf254c4 | 485 | return vp8_submv_prob[1-!!left]; |
3b636f21 DC |
486 | } |
487 | ||
488 | /** | |
489 | * Split motion vector prediction, 16.4. | |
7ed06b2b | 490 | * @returns the number of motion vectors parsed (2, 4 or 16) |
3b636f21 | 491 | */ |
414ac27d | 492 | static av_always_inline |
951455c1 | 493 | int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout) |
3b636f21 | 494 | { |
0908f1b9 JGG |
495 | int part_idx; |
496 | int n, num; | |
951455c1 | 497 | VP8Macroblock *top_mb; |
7bf254c4 JGG |
498 | VP8Macroblock *left_mb = &mb[-1]; |
499 | const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning], | |
951455c1 | 500 | *mbsplits_top, |
0908f1b9 | 501 | *mbsplits_cur, *firstidx; |
951455c1 | 502 | VP56mv *top_mv; |
c55e0d34 JGG |
503 | VP56mv *left_mv = left_mb->bmv; |
504 | VP56mv *cur_mv = mb->bmv; | |
3b636f21 | 505 | |
951455c1 DK |
506 | if (!layout) // layout is inlined, s->mb_layout is not |
507 | top_mb = &mb[2]; | |
508 | else | |
509 | top_mb = &mb[-s->mb_width-1]; | |
510 | mbsplits_top = vp8_mbsplits[top_mb->partitioning]; | |
511 | top_mv = top_mb->bmv; | |
512 | ||
0908f1b9 JGG |
513 | if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { |
514 | if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) { | |
515 | part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]); | |
516 | } else { | |
517 | part_idx = VP8_SPLITMVMODE_8x8; | |
518 | } | |
519 | } else { | |
520 | part_idx = VP8_SPLITMVMODE_4x4; | |
521 | } | |
522 | ||
523 | num = vp8_mbsplit_count[part_idx]; | |
524 | mbsplits_cur = vp8_mbsplits[part_idx], | |
525 | firstidx = vp8_mbfirstidx[part_idx]; | |
526 | mb->partitioning = part_idx; | |
527 | ||
3b636f21 | 528 | for (n = 0; n < num; n++) { |
7ed06b2b | 529 | int k = firstidx[n]; |
7bf254c4 | 530 | uint32_t left, above; |
7ed06b2b RB |
531 | const uint8_t *submv_prob; |
532 | ||
7bf254c4 JGG |
533 | if (!(k & 3)) |
534 | left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]); | |
535 | else | |
536 | left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); | |
537 | if (k <= 3) | |
538 | above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]); | |
539 | else | |
540 | above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]); | |
7ed06b2b RB |
541 | |
542 | submv_prob = get_submv_prob(left, above); | |
3b636f21 | 543 | |
c5dec7f1 JGG |
544 | if (vp56_rac_get_prob_branchy(c, submv_prob[0])) { |
545 | if (vp56_rac_get_prob_branchy(c, submv_prob[1])) { | |
546 | if (vp56_rac_get_prob_branchy(c, submv_prob[2])) { | |
547 | mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]); | |
548 | mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]); | |
549 | } else { | |
550 | AV_ZERO32(&mb->bmv[n]); | |
551 | } | |
552 | } else { | |
553 | AV_WN32A(&mb->bmv[n], above); | |
554 | } | |
555 | } else { | |
7bf254c4 | 556 | AV_WN32A(&mb->bmv[n], left); |
3b636f21 | 557 | } |
3b636f21 | 558 | } |
7ed06b2b RB |
559 | |
560 | return num; | |
3b636f21 DC |
561 | } |
562 | ||
414ac27d | 563 | static av_always_inline |
951455c1 | 564 | void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout) |
f3d09d44 | 565 | { |
951455c1 | 566 | VP8Macroblock *mb_edge[3] = { 0 /* top */, |
f3d09d44 | 567 | mb - 1 /* left */, |
951455c1 | 568 | 0 /* top-left */ }; |
f3d09d44 | 569 | enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; |
66f608a6 | 570 | enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; |
f3d09d44 JGG |
571 | int idx = CNT_ZERO; |
572 | int cur_sign_bias = s->sign_bias[mb->ref_frame]; | |
1eeca886 | 573 | int8_t *sign_bias = s->sign_bias; |
f3d09d44 JGG |
574 | VP56mv near_mv[4]; |
575 | uint8_t cnt[4] = { 0 }; | |
576 | VP56RangeCoder *c = &s->c; | |
577 | ||
951455c1 DK |
578 | if (!layout) { // layout is inlined (s->mb_layout is not) |
579 | mb_edge[0] = mb + 2; | |
580 | mb_edge[2] = mb + 1; | |
581 | } | |
582 | else { | |
583 | mb_edge[0] = mb - s->mb_width-1; | |
584 | mb_edge[2] = mb - s->mb_width-2; | |
585 | } | |
586 | ||
f3d09d44 JGG |
587 | AV_ZERO32(&near_mv[0]); |
588 | AV_ZERO32(&near_mv[1]); | |
0f0b5d64 | 589 | AV_ZERO32(&near_mv[2]); |
f3d09d44 JGG |
590 | |
591 | /* Process MB on top, left and top-left */ | |
592 | #define MV_EDGE_CHECK(n)\ | |
593 | {\ | |
594 | VP8Macroblock *edge = mb_edge[n];\ | |
595 | int edge_ref = edge->ref_frame;\ | |
596 | if (edge_ref != VP56_FRAME_CURRENT) {\ | |
597 | uint32_t mv = AV_RN32A(&edge->mv);\ | |
598 | if (mv) {\ | |
599 | if (cur_sign_bias != sign_bias[edge_ref]) {\ | |
600 | /* SWAR negate of the values in mv. */\ | |
601 | mv = ~mv;\ | |
602 | mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\ | |
603 | }\ | |
604 | if (!n || mv != AV_RN32A(&near_mv[idx]))\ | |
605 | AV_WN32A(&near_mv[++idx], mv);\ | |
606 | cnt[idx] += 1 + (n != 2);\ | |
607 | } else\ | |
608 | cnt[CNT_ZERO] += 1 + (n != 2);\ | |
609 | }\ | |
610 | } | |
611 | ||
612 | MV_EDGE_CHECK(0) | |
613 | MV_EDGE_CHECK(1) | |
614 | MV_EDGE_CHECK(2) | |
615 | ||
616 | mb->partitioning = VP8_SPLITMVMODE_NONE; | |
617 | if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) { | |
618 | mb->mode = VP8_MVMODE_MV; | |
619 | ||
620 | /* If we have three distinct MVs, merge first and last if they're the same */ | |
66f608a6 | 621 | if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT])) |
f3d09d44 JGG |
622 | cnt[CNT_NEAREST] += 1; |
623 | ||
624 | /* Swap near and nearest if necessary */ | |
625 | if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { | |
626 | FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); | |
627 | FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); | |
628 | } | |
629 | ||
630 | if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { | |
631 | if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { | |
632 | ||
633 | /* Choose the best mv out of 0,0 and the nearest mv */ | |
7634771e | 634 | clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]); |
66f608a6 AS |
635 | cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + |
636 | (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + | |
637 | (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); | |
f3d09d44 JGG |
638 | |
639 | if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) { | |
640 | mb->mode = VP8_MVMODE_SPLIT; | |
951455c1 | 641 | mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1]; |
f3d09d44 JGG |
642 | } else { |
643 | mb->mv.y += read_mv_component(c, s->prob->mvc[0]); | |
644 | mb->mv.x += read_mv_component(c, s->prob->mvc[1]); | |
645 | mb->bmv[0] = mb->mv; | |
646 | } | |
647 | } else { | |
7634771e | 648 | clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]); |
f3d09d44 JGG |
649 | mb->bmv[0] = mb->mv; |
650 | } | |
651 | } else { | |
7634771e | 652 | clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]); |
f3d09d44 JGG |
653 | mb->bmv[0] = mb->mv; |
654 | } | |
655 | } else { | |
656 | mb->mode = VP8_MVMODE_ZERO; | |
657 | AV_ZERO32(&mb->mv); | |
658 | mb->bmv[0] = mb->mv; | |
659 | } | |
660 | } | |
661 | ||
662 | static av_always_inline | |
17343e39 | 663 | void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, |
951455c1 | 664 | int mb_x, int keyframe, int layout) |
3b636f21 | 665 | { |
17343e39 DK |
666 | uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; |
667 | ||
951455c1 DK |
668 | if (layout == 1) { |
669 | VP8Macroblock *mb_top = mb - s->mb_width - 1; | |
670 | memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4); | |
671 | } | |
d1c58fce | 672 | if (keyframe) { |
d2840fa4 | 673 | int x, y; |
951455c1 | 674 | uint8_t* top; |
d2840fa4 | 675 | uint8_t* const left = s->intra4x4_pred_mode_left; |
951455c1 DK |
676 | if (layout == 1) |
677 | top = mb->intra4x4_pred_mode_top; | |
678 | else | |
679 | top = s->intra4x4_pred_mode_top + 4 * mb_x; | |
d1c58fce JGG |
680 | for (y = 0; y < 4; y++) { |
681 | for (x = 0; x < 4; x++) { | |
d2840fa4 PM |
682 | const uint8_t *ctx; |
683 | ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; | |
684 | *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); | |
685 | left[y] = top[x] = *intra4x4; | |
686 | intra4x4++; | |
3b636f21 | 687 | } |
3b636f21 | 688 | } |
d1c58fce | 689 | } else { |
d2840fa4 | 690 | int i; |
d1c58fce JGG |
691 | for (i = 0; i < 16; i++) |
692 | intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); | |
3b636f21 DC |
693 | } |
694 | } | |
695 | ||
414ac27d | 696 | static av_always_inline |
951455c1 DK |
697 | void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, |
698 | uint8_t *segment, uint8_t *ref, int layout) | |
3b636f21 DC |
699 | { |
700 | VP56RangeCoder *c = &s->c; | |
3b636f21 DC |
701 | |
702 | if (s->segmentation.update_map) | |
c55e0d34 | 703 | *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); |
30011bf2 | 704 | else if (s->segmentation.enabled) |
4773d904 | 705 | *segment = ref ? *ref : *segment; |
17343e39 | 706 | mb->segment = *segment; |
3b636f21 | 707 | |
a8ab0ccc | 708 | mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; |
3b636f21 DC |
709 | |
710 | if (s->keyframe) { | |
711 | mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); | |
712 | ||
713 | if (mb->mode == MODE_I4x4) { | |
951455c1 | 714 | decode_intra4x4_modes(s, c, mb, mb_x, 1, layout); |
d2840fa4 PM |
715 | } else { |
716 | const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u; | |
951455c1 DK |
717 | if (s->mb_layout == 1) |
718 | AV_WN32A(mb->intra4x4_pred_mode_top, modes); | |
719 | else | |
720 | AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); | |
721 | AV_WN32A( s->intra4x4_pred_mode_left, modes); | |
d2840fa4 | 722 | } |
3b636f21 | 723 | |
17343e39 | 724 | mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); |
3b636f21 | 725 | mb->ref_frame = VP56_FRAME_CURRENT; |
a8ab0ccc | 726 | } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { |
3b636f21 | 727 | // inter MB, 16.2 |
a8ab0ccc PM |
728 | if (vp56_rac_get_prob_branchy(c, s->prob->last)) |
729 | mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ? | |
3b636f21 DC |
730 | VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN; |
731 | else | |
732 | mb->ref_frame = VP56_FRAME_PREVIOUS; | |
c4211046 | 733 | s->ref_count[mb->ref_frame-1]++; |
3b636f21 DC |
734 | |
735 | // motion vectors, 16.3 | |
951455c1 | 736 | decode_mvs(s, mb, mb_x, mb_y, layout); |
3b636f21 DC |
737 | } else { |
738 | // intra MB, 16.1 | |
739 | mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); | |
740 | ||
158e062c | 741 | if (mb->mode == MODE_I4x4) |
951455c1 | 742 | decode_intra4x4_modes(s, c, mb, mb_x, 0, layout); |
3b636f21 | 743 | |
17343e39 | 744 | mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); |
3b636f21 | 745 | mb->ref_frame = VP56_FRAME_CURRENT; |
b946111f | 746 | mb->partitioning = VP8_SPLITMVMODE_NONE; |
14767f35 | 747 | AV_ZERO32(&mb->bmv[0]); |
3b636f21 DC |
748 | } |
749 | } | |
750 | ||
a7878c9f | 751 | #ifndef decode_block_coeffs_internal |
3b636f21 | 752 | /** |
e394953e RB |
753 | * @param c arithmetic bitstream reader context |
754 | * @param block destination for block coefficients | |
755 | * @param probs probabilities to use when reading trees from the bitstream | |
3b636f21 | 756 | * @param i initial coeff index, 0 unless a separate DC block is coded |
3fa76268 | 757 | * @param qmul array holding the dc/ac dequant factor at position 0/1 |
3b636f21 DC |
758 | * @return 0 if no coeffs were decoded |
759 | * otherwise, the index of the last coeff decoded plus one | |
760 | */ | |
6163d880 | 761 | static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16], |
3efbe137 | 762 | uint8_t probs[16][3][NUM_DCT_TOKENS-1], |
1e739679 | 763 | int i, uint8_t *token_prob, int16_t qmul[2]) |
3b636f21 | 764 | { |
6163d880 | 765 | VP56RangeCoder c = *r; |
afb54a85 | 766 | goto skip_eob; |
fe1b5d97 | 767 | do { |
1e739679 | 768 | int coeff; |
6163d880 RB |
769 | if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB |
770 | break; | |
3b636f21 | 771 | |
fe1b5d97 | 772 | skip_eob: |
6163d880 | 773 | if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0 |
c22b4468 | 774 | if (++i == 16) |
6163d880 | 775 | break; // invalid input; blocks should end with EOB |
370b622a | 776 | token_prob = probs[i][0]; |
c22b4468 | 777 | goto skip_eob; |
fe1b5d97 DC |
778 | } |
779 | ||
6163d880 | 780 | if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1 |
fe1b5d97 | 781 | coeff = 1; |
370b622a | 782 | token_prob = probs[i+1][1]; |
fe1b5d97 | 783 | } else { |
6163d880 RB |
784 | if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4 |
785 | coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]); | |
fe1b5d97 | 786 | if (coeff) |
6163d880 | 787 | coeff += vp56_rac_get_prob(&c, token_prob[5]); |
fe1b5d97 DC |
788 | coeff += 2; |
789 | } else { | |
790 | // DCT_CAT* | |
6163d880 RB |
791 | if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) { |
792 | if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1 | |
793 | coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]); | |
fe1b5d97 DC |
794 | } else { // DCT_CAT2 |
795 | coeff = 7; | |
6163d880 RB |
796 | coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1; |
797 | coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]); | |
fe1b5d97 DC |
798 | } |
799 | } else { // DCT_CAT3 and up | |
6163d880 RB |
800 | int a = vp56_rac_get_prob(&c, token_prob[8]); |
801 | int b = vp56_rac_get_prob(&c, token_prob[9+a]); | |
fe1b5d97 DC |
802 | int cat = (a<<1) + b; |
803 | coeff = 3 + (8<<cat); | |
6163d880 | 804 | coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]); |
fe1b5d97 DC |
805 | } |
806 | } | |
370b622a | 807 | token_prob = probs[i+1][2]; |
fe1b5d97 | 808 | } |
6163d880 | 809 | block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i]; |
afb54a85 | 810 | } while (++i < 16); |
fe1b5d97 | 811 | |
6163d880 | 812 | *r = c; |
afb54a85 | 813 | return i; |
3b636f21 | 814 | } |
a7878c9f | 815 | #endif |
3b636f21 | 816 | |
3c432e11 DB |
817 | /** |
818 | * @param c arithmetic bitstream reader context | |
819 | * @param block destination for block coefficients | |
820 | * @param probs probabilities to use when reading trees from the bitstream | |
821 | * @param i initial coeff index, 0 unless a separate DC block is coded | |
822 | * @param zero_nhood the initial prediction context for number of surrounding | |
823 | * all-zero blocks (only left/top, so 0-2) | |
824 | * @param qmul array holding the dc/ac dequant factor at position 0/1 | |
825 | * @return 0 if no coeffs were decoded | |
826 | * otherwise, the index of the last coeff decoded plus one | |
827 | */ | |
414ac27d | 828 | static av_always_inline |
1e739679 | 829 | int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], |
81a13131 | 830 | uint8_t probs[16][3][NUM_DCT_TOKENS-1], |
1e739679 JGG |
831 | int i, int zero_nhood, int16_t qmul[2]) |
832 | { | |
833 | uint8_t *token_prob = probs[i][zero_nhood]; | |
834 | if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB | |
835 | return 0; | |
836 | return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul); | |
837 | } | |
838 | ||
839 | static av_always_inline | |
951455c1 | 840 | void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb, |
414ac27d | 841 | uint8_t t_nnz[9], uint8_t l_nnz[9]) |
3b636f21 | 842 | { |
3b636f21 DC |
843 | int i, x, y, luma_start = 0, luma_ctx = 3; |
844 | int nnz_pred, nnz, nnz_total = 0; | |
17343e39 | 845 | int segment = mb->segment; |
f311208c | 846 | int block_dc = 0; |
3b636f21 | 847 | |
3b636f21 | 848 | if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { |
3b636f21 DC |
849 | nnz_pred = t_nnz[8] + l_nnz[8]; |
850 | ||
851 | // decode DC values and do hadamard | |
951455c1 | 852 | nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred, |
3b636f21 DC |
853 | s->qmat[segment].luma_dc_qmul); |
854 | l_nnz[8] = t_nnz[8] = !!nnz; | |
f311208c JGG |
855 | if (nnz) { |
856 | nnz_total += nnz; | |
857 | block_dc = 1; | |
858 | if (nnz == 1) | |
951455c1 | 859 | s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc); |
f311208c | 860 | else |
951455c1 | 861 | s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc); |
f311208c | 862 | } |
3b636f21 DC |
863 | luma_start = 1; |
864 | luma_ctx = 0; | |
865 | } | |
866 | ||
867 | // luma blocks | |
868 | for (y = 0; y < 4; y++) | |
869 | for (x = 0; x < 4; x++) { | |
ffbf0794 | 870 | nnz_pred = l_nnz[y] + t_nnz[x]; |
951455c1 | 871 | nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start, |
ffbf0794 | 872 | nnz_pred, s->qmat[segment].luma_qmul); |
f311208c | 873 | // nnz+block_dc may be one more than the actual last index, but we don't care |
951455c1 | 874 | td->non_zero_count_cache[y][x] = nnz + block_dc; |
3b636f21 DC |
875 | t_nnz[x] = l_nnz[y] = !!nnz; |
876 | nnz_total += nnz; | |
877 | } | |
878 | ||
879 | // chroma blocks | |
880 | // TODO: what to do about dimensions? 2nd dim for luma is x, | |
881 | // but for chroma it's (y<<1)|x | |
882 | for (i = 4; i < 6; i++) | |
883 | for (y = 0; y < 2; y++) | |
884 | for (x = 0; x < 2; x++) { | |
885 | nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; | |
951455c1 | 886 | nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0, |
3b636f21 | 887 | nnz_pred, s->qmat[segment].chroma_qmul); |
951455c1 | 888 | td->non_zero_count_cache[i][(y<<1)+x] = nnz; |
3b636f21 DC |
889 | t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; |
890 | nnz_total += nnz; | |
891 | } | |
892 | ||
893 | // if there were no coded coeffs despite the macroblock not being marked skip, | |
894 | // we MUST not do the inner loop filter and should not do IDCT | |
895 | // Since skip isn't used for bitstream prediction, just manually set it. | |
896 | if (!nnz_total) | |
897 | mb->skip = 1; | |
898 | } | |
899 | ||
9ac831c2 DC |
900 | static av_always_inline |
901 | void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, | |
902 | int linesize, int uvlinesize, int simple) | |
903 | { | |
904 | AV_COPY128(top_border, src_y + 15*linesize); | |
905 | if (!simple) { | |
906 | AV_COPY64(top_border+16, src_cb + 7*uvlinesize); | |
907 | AV_COPY64(top_border+24, src_cr + 7*uvlinesize); | |
908 | } | |
909 | } | |
910 | ||
911 | static av_always_inline | |
912 | void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, | |
913 | int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, | |
914 | int simple, int xchg) | |
915 | { | |
916 | uint8_t *top_border_m1 = top_border-32; // for TL prediction | |
917 | src_y -= linesize; | |
918 | src_cb -= uvlinesize; | |
919 | src_cr -= uvlinesize; | |
920 | ||
096971e8 MR |
921 | #define XCHG(a,b,xchg) do { \ |
922 | if (xchg) AV_SWAP64(b,a); \ | |
923 | else AV_COPY64(b,a); \ | |
924 | } while (0) | |
9ac831c2 DC |
925 | |
926 | XCHG(top_border_m1+8, src_y-8, xchg); | |
927 | XCHG(top_border, src_y, xchg); | |
928 | XCHG(top_border+8, src_y+8, 1); | |
070ce7ef | 929 | if (mb_x < mb_width-1) |
9ac831c2 | 930 | XCHG(top_border+32, src_y+16, 1); |
070ce7ef | 931 | |
9ac831c2 DC |
932 | // only copy chroma for normal loop filter |
933 | // or to initialize the top row to 127 | |
934 | if (!simple || !mb_y) { | |
935 | XCHG(top_border_m1+16, src_cb-8, xchg); | |
936 | XCHG(top_border_m1+24, src_cr-8, xchg); | |
937 | XCHG(top_border+16, src_cb, 1); | |
938 | XCHG(top_border+24, src_cr, 1); | |
939 | } | |
940 | } | |
941 | ||
414ac27d | 942 | static av_always_inline |
ee555de7 RB |
943 | int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y) |
944 | { | |
945 | if (!mb_x) { | |
946 | return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; | |
947 | } else { | |
948 | return mb_y ? mode : LEFT_DC_PRED8x8; | |
949 | } | |
950 | } | |
951 | ||
952 | static av_always_inline | |
953 | int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y) | |
954 | { | |
955 | if (!mb_x) { | |
956 | return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8; | |
957 | } else { | |
958 | return mb_y ? mode : HOR_PRED8x8; | |
959 | } | |
960 | } | |
961 | ||
962 | static av_always_inline | |
963 | int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y) | |
3b636f21 DC |
964 | { |
965 | if (mode == DC_PRED8x8) { | |
ee555de7 RB |
966 | return check_dc_pred8x8_mode(mode, mb_x, mb_y); |
967 | } else { | |
968 | return mode; | |
969 | } | |
970 | } | |
971 | ||
972 | static av_always_inline | |
973 | int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y) | |
974 | { | |
975 | switch (mode) { | |
976 | case DC_PRED8x8: | |
977 | return check_dc_pred8x8_mode(mode, mb_x, mb_y); | |
978 | case VERT_PRED8x8: | |
979 | return !mb_y ? DC_127_PRED8x8 : mode; | |
980 | case HOR_PRED8x8: | |
981 | return !mb_x ? DC_129_PRED8x8 : mode; | |
982 | case PLANE_PRED8x8 /*TM*/: | |
983 | return check_tm_pred8x8_mode(mode, mb_x, mb_y); | |
984 | } | |
985 | return mode; | |
986 | } | |
987 | ||
988 | static av_always_inline | |
989 | int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y) | |
990 | { | |
991 | if (!mb_x) { | |
992 | return mb_y ? VERT_VP8_PRED : DC_129_PRED; | |
993 | } else { | |
994 | return mb_y ? mode : HOR_VP8_PRED; | |
995 | } | |
996 | } | |
997 | ||
998 | static av_always_inline | |
999 | int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf) | |
1000 | { | |
1001 | switch (mode) { | |
1002 | case VERT_PRED: | |
1003 | if (!mb_x && mb_y) { | |
1004 | *copy_buf = 1; | |
1005 | return mode; | |
1006 | } | |
1007 | /* fall-through */ | |
1008 | case DIAG_DOWN_LEFT_PRED: | |
1009 | case VERT_LEFT_PRED: | |
1010 | return !mb_y ? DC_127_PRED : mode; | |
1011 | case HOR_PRED: | |
1012 | if (!mb_y) { | |
1013 | *copy_buf = 1; | |
1014 | return mode; | |
a71abb71 | 1015 | } |
ee555de7 RB |
1016 | /* fall-through */ |
1017 | case HOR_UP_PRED: | |
1018 | return !mb_x ? DC_129_PRED : mode; | |
1019 | case TM_VP8_PRED: | |
1020 | return check_tm_pred4x4_mode(mode, mb_x, mb_y); | |
1021 | case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC | |
1022 | case DIAG_DOWN_RIGHT_PRED: | |
1023 | case VERT_RIGHT_PRED: | |
1024 | case HOR_DOWN_PRED: | |
1025 | if (!mb_y || !mb_x) | |
1026 | *copy_buf = 1; | |
1027 | return mode; | |
3b636f21 DC |
1028 | } |
1029 | return mode; | |
1030 | } | |
1031 | ||
414ac27d | 1032 | static av_always_inline |
951455c1 DK |
1033 | void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], |
1034 | VP8Macroblock *mb, int mb_x, int mb_y) | |
3b636f21 | 1035 | { |
ee555de7 | 1036 | AVCodecContext *avctx = s->avctx; |
bb591566 MR |
1037 | int x, y, mode, nnz; |
1038 | uint32_t tr; | |
3b636f21 | 1039 | |
9ac831c2 DC |
1040 | // for the first row, we need to run xchg_mb_border to init the top edge to 127 |
1041 | // otherwise, skip it if we aren't going to deblock | |
951455c1 | 1042 | if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0) |
9ac831c2 DC |
1043 | xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], |
1044 | s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, | |
1045 | s->filter.simple, 1); | |
1046 | ||
3b636f21 | 1047 | if (mb->mode < MODE_I4x4) { |
ee555de7 RB |
1048 | if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested |
1049 | mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y); | |
1050 | } else { | |
1051 | mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y); | |
1052 | } | |
3b636f21 DC |
1053 | s->hpc.pred16x16[mode](dst[0], s->linesize); |
1054 | } else { | |
1055 | uint8_t *ptr = dst[0]; | |
17343e39 | 1056 | uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; |
ee555de7 | 1057 | uint8_t tr_top[4] = { 127, 127, 127, 127 }; |
3b636f21 DC |
1058 | |
1059 | // all blocks on the right edge of the macroblock use bottom edge | |
1060 | // the top macroblock for their topright edge | |
1061 | uint8_t *tr_right = ptr - s->linesize + 16; | |
1062 | ||
1063 | // if we're on the right edge of the frame, said edge is extended | |
1064 | // from the top macroblock | |
7148da48 RB |
1065 | if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) && |
1066 | mb_x == s->mb_width-1) { | |
bb591566 | 1067 | tr = tr_right[-1]*0x01010101u; |
3b636f21 DC |
1068 | tr_right = (uint8_t *)&tr; |
1069 | } | |
1070 | ||
b74f70d6 | 1071 | if (mb->skip) |
951455c1 | 1072 | AV_ZERO128(td->non_zero_count_cache); |
b74f70d6 | 1073 | |
3b636f21 DC |
1074 | for (y = 0; y < 4; y++) { |
1075 | uint8_t *topright = ptr + 4 - s->linesize; | |
1076 | for (x = 0; x < 4; x++) { | |
ee555de7 RB |
1077 | int copy = 0, linesize = s->linesize; |
1078 | uint8_t *dst = ptr+4*x; | |
1079 | DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8]; | |
1080 | ||
1081 | if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) { | |
1082 | topright = tr_top; | |
1083 | } else if (x == 3) | |
3b636f21 DC |
1084 | topright = tr_right; |
1085 | ||
ee555de7 RB |
1086 | if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works |
1087 | mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©); | |
1088 | if (copy) { | |
1089 | dst = copy_dst + 12; | |
1090 | linesize = 8; | |
1091 | if (!(mb_y + y)) { | |
1092 | copy_dst[3] = 127U; | |
9d4bdcb7 | 1093 | AV_WN32A(copy_dst+4, 127U * 0x01010101U); |
ee555de7 | 1094 | } else { |
9d4bdcb7 | 1095 | AV_COPY32(copy_dst+4, ptr+4*x-s->linesize); |
ee555de7 RB |
1096 | if (!(mb_x + x)) { |
1097 | copy_dst[3] = 129U; | |
1098 | } else { | |
1099 | copy_dst[3] = ptr[4*x-s->linesize-1]; | |
1100 | } | |
1101 | } | |
1102 | if (!(mb_x + x)) { | |
1103 | copy_dst[11] = | |
1104 | copy_dst[19] = | |
1105 | copy_dst[27] = | |
1106 | copy_dst[35] = 129U; | |
1107 | } else { | |
1108 | copy_dst[11] = ptr[4*x -1]; | |
1109 | copy_dst[19] = ptr[4*x+s->linesize -1]; | |
1110 | copy_dst[27] = ptr[4*x+s->linesize*2-1]; | |
1111 | copy_dst[35] = ptr[4*x+s->linesize*3-1]; | |
1112 | } | |
1113 | } | |
1114 | } else { | |
1115 | mode = intra4x4[x]; | |
1116 | } | |
1117 | s->hpc.pred4x4[mode](dst, topright, linesize); | |
1118 | if (copy) { | |
9d4bdcb7 RB |
1119 | AV_COPY32(ptr+4*x , copy_dst+12); |
1120 | AV_COPY32(ptr+4*x+s->linesize , copy_dst+20); | |
1121 | AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28); | |
1122 | AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36); | |
ee555de7 | 1123 | } |
3b636f21 | 1124 | |
951455c1 | 1125 | nnz = td->non_zero_count_cache[y][x]; |
3b636f21 DC |
1126 | if (nnz) { |
1127 | if (nnz == 1) | |
951455c1 | 1128 | s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize); |
3b636f21 | 1129 | else |
951455c1 | 1130 | s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize); |
3b636f21 DC |
1131 | } |
1132 | topright += 4; | |
1133 | } | |
1134 | ||
1135 | ptr += 4*s->linesize; | |
d2840fa4 | 1136 | intra4x4 += 4; |
3b636f21 DC |
1137 | } |
1138 | } | |
1139 | ||
ee555de7 | 1140 | if (avctx->flags & CODEC_FLAG_EMU_EDGE) { |
17343e39 | 1141 | mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y); |
ee555de7 | 1142 | } else { |
17343e39 | 1143 | mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y); |
ee555de7 | 1144 | } |
3b636f21 DC |
1145 | s->hpc.pred8x8[mode](dst[1], s->uvlinesize); |
1146 | s->hpc.pred8x8[mode](dst[2], s->uvlinesize); | |
9ac831c2 | 1147 | |
951455c1 | 1148 | if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0) |
9ac831c2 DC |
1149 | xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], |
1150 | s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, | |
1151 | s->filter.simple, 0); | |
3b636f21 DC |
1152 | } |
1153 | ||
64233e70 JGG |
1154 | static const uint8_t subpel_idx[3][8] = { |
1155 | { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels, | |
1156 | // also function pointer index | |
1157 | { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required | |
1158 | { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels | |
1159 | }; | |
1160 | ||
3b636f21 | 1161 | /** |
3c432e11 | 1162 | * luma MC function |
3b636f21 DC |
1163 | * |
1164 | * @param s VP8 decoding context | |
3b636f21 | 1165 | * @param dst target buffer for block data at block position |
24c9baba | 1166 | * @param ref reference picture buffer at origin (0, 0) |
3b636f21 DC |
1167 | * @param mv motion vector (relative to block position) to get pixel data from |
1168 | * @param x_off horizontal position of block from origin (0, 0) | |
1169 | * @param y_off vertical position of block from origin (0, 0) | |
1170 | * @param block_w width of block (16, 8 or 4) | |
1171 | * @param block_h height of block (always same as block_w) | |
1172 | * @param width width of src/dst plane data | |
1173 | * @param height height of src/dst plane data | |
1174 | * @param linesize size of a single line of plane data, including padding | |
e394953e | 1175 | * @param mc_func motion compensation function pointers (bilinear or sixtap MC) |
3b636f21 | 1176 | */ |
414ac27d | 1177 | static av_always_inline |
951455c1 DK |
1178 | void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst, |
1179 | AVFrame *ref, const VP56mv *mv, | |
64233e70 JGG |
1180 | int x_off, int y_off, int block_w, int block_h, |
1181 | int width, int height, int linesize, | |
1182 | vp8_mc_func mc_func[3][3]) | |
3b636f21 | 1183 | { |
4773d904 RB |
1184 | uint8_t *src = ref->data[0]; |
1185 | ||
c0498b30 | 1186 | if (AV_RN32A(mv)) { |
64233e70 JGG |
1187 | |
1188 | int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx]; | |
1189 | int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my]; | |
1190 | ||
1191 | x_off += mv->x >> 2; | |
1192 | y_off += mv->y >> 2; | |
c0498b30 JGG |
1193 | |
1194 | // edge emulation | |
4773d904 | 1195 | ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0); |
c0498b30 | 1196 | src += y_off * linesize + x_off; |
64233e70 JGG |
1197 | if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || |
1198 | y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { | |
951455c1 | 1199 | s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize, |
64233e70 JGG |
1200 | block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], |
1201 | x_off - mx_idx, y_off - my_idx, width, height); | |
951455c1 | 1202 | src = td->edge_emu_buffer + mx_idx + linesize * my_idx; |
c0498b30 JGG |
1203 | } |
1204 | mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); | |
4773d904 RB |
1205 | } else { |
1206 | ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0); | |
c0498b30 | 1207 | mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); |
4773d904 | 1208 | } |
3b636f21 DC |
1209 | } |
1210 | ||
3c432e11 DB |
1211 | /** |
1212 | * chroma MC function | |
1213 | * | |
1214 | * @param s VP8 decoding context | |
1215 | * @param dst1 target buffer for block data at block position (U plane) | |
1216 | * @param dst2 target buffer for block data at block position (V plane) | |
1217 | * @param ref reference picture buffer at origin (0, 0) | |
1218 | * @param mv motion vector (relative to block position) to get pixel data from | |
1219 | * @param x_off horizontal position of block from origin (0, 0) | |
1220 | * @param y_off vertical position of block from origin (0, 0) | |
1221 | * @param block_w width of block (16, 8 or 4) | |
1222 | * @param block_h height of block (always same as block_w) | |
1223 | * @param width width of src/dst plane data | |
1224 | * @param height height of src/dst plane data | |
1225 | * @param linesize size of a single line of plane data, including padding | |
1226 | * @param mc_func motion compensation function pointers (bilinear or sixtap MC) | |
1227 | */ | |
414ac27d | 1228 | static av_always_inline |
951455c1 DK |
1229 | void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2, |
1230 | AVFrame *ref, const VP56mv *mv, int x_off, int y_off, | |
64233e70 JGG |
1231 | int block_w, int block_h, int width, int height, int linesize, |
1232 | vp8_mc_func mc_func[3][3]) | |
1233 | { | |
4773d904 RB |
1234 | uint8_t *src1 = ref->data[1], *src2 = ref->data[2]; |
1235 | ||
64233e70 JGG |
1236 | if (AV_RN32A(mv)) { |
1237 | int mx = mv->x&7, mx_idx = subpel_idx[0][mx]; | |
1238 | int my = mv->y&7, my_idx = subpel_idx[0][my]; | |
1239 | ||
1240 | x_off += mv->x >> 3; | |
1241 | y_off += mv->y >> 3; | |
1242 | ||
1243 | // edge emulation | |
1244 | src1 += y_off * linesize + x_off; | |
1245 | src2 += y_off * linesize + x_off; | |
4773d904 | 1246 | ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0); |
64233e70 JGG |
1247 | if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || |
1248 | y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { | |
951455c1 | 1249 | s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize, |
64233e70 JGG |
1250 | block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], |
1251 | x_off - mx_idx, y_off - my_idx, width, height); | |
951455c1 | 1252 | src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx; |
64233e70 JGG |
1253 | mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); |
1254 | ||
951455c1 | 1255 | s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize, |
64233e70 JGG |
1256 | block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], |
1257 | x_off - mx_idx, y_off - my_idx, width, height); | |
951455c1 | 1258 | src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx; |
64233e70 JGG |
1259 | mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); |
1260 | } else { | |
1261 | mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); | |
1262 | mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); | |
1263 | } | |
1264 | } else { | |
4773d904 | 1265 | ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0); |
64233e70 JGG |
1266 | mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0); |
1267 | mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0); | |
1268 | } | |
1269 | } | |
1270 | ||
1271 | static av_always_inline | |
951455c1 | 1272 | void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], |
414ac27d JGG |
1273 | AVFrame *ref_frame, int x_off, int y_off, |
1274 | int bx_off, int by_off, | |
1275 | int block_w, int block_h, | |
1276 | int width, int height, VP56mv *mv) | |
7c4dcf81 RB |
1277 | { |
1278 | VP56mv uvmv = *mv; | |
1279 | ||
1280 | /* Y */ | |
951455c1 | 1281 | vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off, |
4773d904 | 1282 | ref_frame, mv, x_off + bx_off, y_off + by_off, |
64233e70 JGG |
1283 | block_w, block_h, width, height, s->linesize, |
1284 | s->put_pixels_tab[block_w == 8]); | |
7c4dcf81 RB |
1285 | |
1286 | /* U/V */ | |
1287 | if (s->profile == 3) { | |
1288 | uvmv.x &= ~7; | |
1289 | uvmv.y &= ~7; | |
1290 | } | |
1291 | x_off >>= 1; y_off >>= 1; | |
1292 | bx_off >>= 1; by_off >>= 1; | |
1293 | width >>= 1; height >>= 1; | |
1294 | block_w >>= 1; block_h >>= 1; | |
951455c1 | 1295 | vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off, |
4773d904 RB |
1296 | dst[2] + by_off * s->uvlinesize + bx_off, ref_frame, |
1297 | &uvmv, x_off + bx_off, y_off + by_off, | |
64233e70 JGG |
1298 | block_w, block_h, width, height, s->uvlinesize, |
1299 | s->put_pixels_tab[1 + (block_w == 4)]); | |
7c4dcf81 RB |
1300 | } |
1301 | ||
d864dee8 JGG |
1302 | /* Fetch pixels for estimated mv 4 macroblocks ahead. |
1303 | * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ | |
414ac27d | 1304 | static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) |
d864dee8 | 1305 | { |
ef38842f JGG |
1306 | /* Don't prefetch refs that haven't been used very often this frame. */ |
1307 | if (s->ref_count[ref-1] > (mb_xy >> 5)) { | |
c4211046 | 1308 | int x_off = mb_x << 4, y_off = mb_y << 4; |
7e13022a JGG |
1309 | int mx = (mb->mv.x>>2) + x_off + 8; |
1310 | int my = (mb->mv.y>>2) + y_off; | |
c4211046 JGG |
1311 | uint8_t **src= s->framep[ref]->data; |
1312 | int off= mx + (my + (mb_x&3)*4)*s->linesize + 64; | |
4773d904 RB |
1313 | /* For threading, a ff_thread_await_progress here might be useful, but |
1314 | * it actually slows down the decoder. Since a bad prefetch doesn't | |
1315 | * generate bad decoder output, we don't run it here. */ | |
c4211046 JGG |
1316 | s->dsp.prefetch(src[0]+off, s->linesize, 4); |
1317 | off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64; | |
1318 | s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); | |
1319 | } | |
d864dee8 JGG |
1320 | } |
1321 | ||
3b636f21 DC |
1322 | /** |
1323 | * Apply motion vectors to prediction buffer, chapter 18. | |
1324 | */ | |
414ac27d | 1325 | static av_always_inline |
951455c1 DK |
1326 | void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], |
1327 | VP8Macroblock *mb, int mb_x, int mb_y) | |
3b636f21 DC |
1328 | { |
1329 | int x_off = mb_x << 4, y_off = mb_y << 4; | |
1330 | int width = 16*s->mb_width, height = 16*s->mb_height; | |
d292c345 JGG |
1331 | AVFrame *ref = s->framep[mb->ref_frame]; |
1332 | VP56mv *bmv = mb->bmv; | |
3b636f21 | 1333 | |
73be29b0 JGG |
1334 | switch (mb->partitioning) { |
1335 | case VP8_SPLITMVMODE_NONE: | |
951455c1 | 1336 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
7c4dcf81 | 1337 | 0, 0, 16, 16, width, height, &mb->mv); |
73be29b0 | 1338 | break; |
7c4dcf81 | 1339 | case VP8_SPLITMVMODE_4x4: { |
3b636f21 | 1340 | int x, y; |
7c4dcf81 | 1341 | VP56mv uvmv; |
3b636f21 DC |
1342 | |
1343 | /* Y */ | |
1344 | for (y = 0; y < 4; y++) { | |
1345 | for (x = 0; x < 4; x++) { | |
951455c1 | 1346 | vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4, |
4773d904 | 1347 | ref, &bmv[4*y + x], |
64233e70 JGG |
1348 | 4*x + x_off, 4*y + y_off, 4, 4, |
1349 | width, height, s->linesize, | |
1350 | s->put_pixels_tab[2]); | |
3b636f21 DC |
1351 | } |
1352 | } | |
1353 | ||
1354 | /* U/V */ | |
1355 | x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; | |
1356 | for (y = 0; y < 2; y++) { | |
1357 | for (x = 0; x < 2; x++) { | |
1358 | uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x + | |
1359 | mb->bmv[ 2*y * 4 + 2*x+1].x + | |
1360 | mb->bmv[(2*y+1) * 4 + 2*x ].x + | |
1361 | mb->bmv[(2*y+1) * 4 + 2*x+1].x; | |
1362 | uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y + | |
1363 | mb->bmv[ 2*y * 4 + 2*x+1].y + | |
1364 | mb->bmv[(2*y+1) * 4 + 2*x ].y + | |
1365 | mb->bmv[(2*y+1) * 4 + 2*x+1].y; | |
8f910a56 SG |
1366 | uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2; |
1367 | uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2; | |
3b636f21 DC |
1368 | if (s->profile == 3) { |
1369 | uvmv.x &= ~7; | |
1370 | uvmv.y &= ~7; | |
1371 | } | |
951455c1 | 1372 | vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4, |
4773d904 | 1373 | dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv, |
64233e70 JGG |
1374 | 4*x + x_off, 4*y + y_off, 4, 4, |
1375 | width, height, s->uvlinesize, | |
1376 | s->put_pixels_tab[2]); | |
3b636f21 DC |
1377 | } |
1378 | } | |
7c4dcf81 RB |
1379 | break; |
1380 | } | |
1381 | case VP8_SPLITMVMODE_16x8: | |
951455c1 | 1382 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
d292c345 | 1383 | 0, 0, 16, 8, width, height, &bmv[0]); |
951455c1 | 1384 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
d292c345 | 1385 | 0, 8, 16, 8, width, height, &bmv[1]); |
7c4dcf81 RB |
1386 | break; |
1387 | case VP8_SPLITMVMODE_8x16: | |
951455c1 | 1388 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
d292c345 | 1389 | 0, 0, 8, 16, width, height, &bmv[0]); |
951455c1 | 1390 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
d292c345 | 1391 | 8, 0, 8, 16, width, height, &bmv[1]); |
7c4dcf81 RB |
1392 | break; |
1393 | case VP8_SPLITMVMODE_8x8: | |
951455c1 | 1394 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
d292c345 | 1395 | 0, 0, 8, 8, width, height, &bmv[0]); |
951455c1 | 1396 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
d292c345 | 1397 | 8, 0, 8, 8, width, height, &bmv[1]); |
951455c1 | 1398 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
d292c345 | 1399 | 0, 8, 8, 8, width, height, &bmv[2]); |
951455c1 | 1400 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
d292c345 | 1401 | 8, 8, 8, 8, width, height, &bmv[3]); |
7c4dcf81 | 1402 | break; |
3b636f21 DC |
1403 | } |
1404 | } | |
1405 | ||
951455c1 DK |
1406 | static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td, |
1407 | uint8_t *dst[3], VP8Macroblock *mb) | |
3b636f21 | 1408 | { |
3df56f41 | 1409 | int x, y, ch; |
3b636f21 | 1410 | |
8a467b2d JGG |
1411 | if (mb->mode != MODE_I4x4) { |
1412 | uint8_t *y_dst = dst[0]; | |
3b636f21 | 1413 | for (y = 0; y < 4; y++) { |
951455c1 | 1414 | uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]); |
3df56f41 JGG |
1415 | if (nnz4) { |
1416 | if (nnz4&~0x01010101) { | |
8a467b2d | 1417 | for (x = 0; x < 4; x++) { |
62457f90 | 1418 | if ((uint8_t)nnz4 == 1) |
951455c1 | 1419 | s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize); |
62457f90 | 1420 | else if((uint8_t)nnz4 > 1) |
951455c1 | 1421 | s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize); |
62457f90 JGG |
1422 | nnz4 >>= 8; |
1423 | if (!nnz4) | |
1424 | break; | |
8a467b2d JGG |
1425 | } |
1426 | } else { | |
951455c1 | 1427 | s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize); |
3b636f21 DC |
1428 | } |
1429 | } | |
1430 | y_dst += 4*s->linesize; | |
1431 | } | |
8a467b2d | 1432 | } |
3b636f21 | 1433 | |
8a467b2d | 1434 | for (ch = 0; ch < 2; ch++) { |
951455c1 | 1435 | uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]); |
3ae079a3 | 1436 | if (nnz4) { |
8a467b2d | 1437 | uint8_t *ch_dst = dst[1+ch]; |
3ae079a3 JGG |
1438 | if (nnz4&~0x01010101) { |
1439 | for (y = 0; y < 2; y++) { | |
1440 | for (x = 0; x < 2; x++) { | |
62457f90 | 1441 | if ((uint8_t)nnz4 == 1) |
951455c1 | 1442 | s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize); |
62457f90 | 1443 | else if((uint8_t)nnz4 > 1) |
951455c1 | 1444 | s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize); |
62457f90 JGG |
1445 | nnz4 >>= 8; |
1446 | if (!nnz4) | |
628b48db | 1447 | goto chroma_idct_end; |
8a467b2d | 1448 | } |
3ae079a3 | 1449 | ch_dst += 4*s->uvlinesize; |
8a467b2d | 1450 | } |
3ae079a3 | 1451 | } else { |
951455c1 | 1452 | s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize); |
3b636f21 DC |
1453 | } |
1454 | } | |
628b48db | 1455 | chroma_idct_end: ; |
3b636f21 DC |
1456 | } |
1457 | } | |
1458 | ||
414ac27d | 1459 | static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) |
3b636f21 DC |
1460 | { |
1461 | int interior_limit, filter_level; | |
1462 | ||
1463 | if (s->segmentation.enabled) { | |
17343e39 | 1464 | filter_level = s->segmentation.filter_level[mb->segment]; |
3b636f21 DC |
1465 | if (!s->segmentation.absolute_vals) |
1466 | filter_level += s->filter.level; | |
1467 | } else | |
1468 | filter_level = s->filter.level; | |
1469 | ||
1470 | if (s->lf_delta.enabled) { | |
1471 | filter_level += s->lf_delta.ref[mb->ref_frame]; | |
dd18c9a0 | 1472 | filter_level += s->lf_delta.mode[mb->mode]; |
3b636f21 | 1473 | } |
a1b227bb | 1474 | |
1550f45a | 1475 | filter_level = av_clip_uintp2(filter_level, 6); |
3b636f21 DC |
1476 | |
1477 | interior_limit = filter_level; | |
1478 | if (s->filter.sharpness) { | |
8a2c99b4 | 1479 | interior_limit >>= (s->filter.sharpness + 3) >> 2; |
3b636f21 DC |
1480 | interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); |
1481 | } | |
1482 | interior_limit = FFMAX(interior_limit, 1); | |
1483 | ||
968570d6 JGG |
1484 | f->filter_level = filter_level; |
1485 | f->inner_limit = interior_limit; | |
c55e0d34 | 1486 | f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT; |
3b636f21 DC |
1487 | } |
1488 | ||
414ac27d | 1489 | static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) |
3b636f21 | 1490 | { |
968570d6 JGG |
1491 | int mbedge_lim, bedge_lim, hev_thresh; |
1492 | int filter_level = f->filter_level; | |
1493 | int inner_limit = f->inner_limit; | |
c55e0d34 | 1494 | int inner_filter = f->inner_filter; |
145d3186 JGG |
1495 | int linesize = s->linesize; |
1496 | int uvlinesize = s->uvlinesize; | |
79dec154 JGG |
1497 | static const uint8_t hev_thresh_lut[2][64] = { |
1498 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, | |
1499 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
1500 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
1501 | 3, 3, 3, 3 }, | |
1502 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, | |
1503 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1504 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
1505 | 2, 2, 2, 2 } | |
1506 | }; | |
3b636f21 | 1507 | |
3b636f21 DC |
1508 | if (!filter_level) |
1509 | return; | |
1510 | ||
79dec154 JGG |
1511 | bedge_lim = 2*filter_level + inner_limit; |
1512 | mbedge_lim = bedge_lim + 4; | |
968570d6 | 1513 | |
79dec154 | 1514 | hev_thresh = hev_thresh_lut[s->keyframe][filter_level]; |
5245c04d | 1515 | |
3b636f21 | 1516 | if (mb_x) { |
145d3186 | 1517 | s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, |
3facfc99 | 1518 | mbedge_lim, inner_limit, hev_thresh); |
145d3186 | 1519 | s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, |
3facfc99 | 1520 | mbedge_lim, inner_limit, hev_thresh); |
3b636f21 DC |
1521 | } |
1522 | ||
c55e0d34 | 1523 | if (inner_filter) { |
145d3186 JGG |
1524 | s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim, |
1525 | inner_limit, hev_thresh); | |
1526 | s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim, | |
1527 | inner_limit, hev_thresh); | |
1528 | s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim, | |
1529 | inner_limit, hev_thresh); | |
1530 | s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, | |
1531 | uvlinesize, bedge_lim, | |
1532 | inner_limit, hev_thresh); | |
3b636f21 DC |
1533 | } |
1534 | ||
1535 | if (mb_y) { | |
145d3186 | 1536 | s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, |
3facfc99 | 1537 | mbedge_lim, inner_limit, hev_thresh); |
145d3186 | 1538 | s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, |
3facfc99 | 1539 | mbedge_lim, inner_limit, hev_thresh); |
3b636f21 DC |
1540 | } |
1541 | ||
c55e0d34 | 1542 | if (inner_filter) { |
145d3186 JGG |
1543 | s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize, |
1544 | linesize, bedge_lim, | |
1545 | inner_limit, hev_thresh); | |
1546 | s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize, | |
1547 | linesize, bedge_lim, | |
1548 | inner_limit, hev_thresh); | |
1549 | s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize, | |
1550 | linesize, bedge_lim, | |
1551 | inner_limit, hev_thresh); | |
1552 | s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, | |
1553 | dst[2] + 4 * uvlinesize, | |
1554 | uvlinesize, bedge_lim, | |
3facfc99 | 1555 | inner_limit, hev_thresh); |
3b636f21 DC |
1556 | } |
1557 | } | |
1558 | ||
414ac27d | 1559 | static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) |
3b636f21 | 1560 | { |
968570d6 JGG |
1561 | int mbedge_lim, bedge_lim; |
1562 | int filter_level = f->filter_level; | |
1563 | int inner_limit = f->inner_limit; | |
c55e0d34 | 1564 | int inner_filter = f->inner_filter; |
145d3186 | 1565 | int linesize = s->linesize; |
3b636f21 | 1566 | |
3b636f21 DC |
1567 | if (!filter_level) |
1568 | return; | |
1569 | ||
79dec154 JGG |
1570 | bedge_lim = 2*filter_level + inner_limit; |
1571 | mbedge_lim = bedge_lim + 4; | |
3b636f21 DC |
1572 | |
1573 | if (mb_x) | |
145d3186 | 1574 | s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); |
c55e0d34 | 1575 | if (inner_filter) { |
145d3186 JGG |
1576 | s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim); |
1577 | s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim); | |
1578 | s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim); | |
3b636f21 DC |
1579 | } |
1580 | ||
1581 | if (mb_y) | |
145d3186 | 1582 | s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); |
c55e0d34 | 1583 | if (inner_filter) { |
145d3186 JGG |
1584 | s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim); |
1585 | s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim); | |
1586 | s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim); | |
3b636f21 DC |
1587 | } |
1588 | } | |
1589 | ||
ce42a048 RB |
1590 | static void release_queued_segmaps(VP8Context *s, int is_close) |
1591 | { | |
1592 | int leave_behind = is_close ? 0 : !s->maps_are_invalid; | |
1593 | while (s->num_maps_to_be_freed > leave_behind) | |
1594 | av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]); | |
1595 | s->maps_are_invalid = 0; | |
1596 | } | |
1597 | ||
337ade52 | 1598 | #define MARGIN (16 << 2) |
951455c1 DK |
1599 | static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe, |
1600 | AVFrame *prev_frame) | |
337ade52 DK |
1601 | { |
1602 | VP8Context *s = avctx->priv_data; | |
951455c1 DK |
1603 | int mb_x, mb_y; |
1604 | ||
1605 | s->mv_min.y = -MARGIN; | |
1606 | s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; | |
1607 | for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | |
1608 | VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1); | |
1609 | int mb_xy = mb_y*s->mb_width; | |
1610 | ||
1611 | AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101); | |
1612 | ||
1613 | s->mv_min.x = -MARGIN; | |
1614 | s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; | |
1615 | for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { | |
1616 | if (mb_y == 0) | |
1617 | AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101); | |
1618 | decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy, | |
1619 | prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1); | |
1620 | s->mv_min.x -= 64; | |
1621 | s->mv_max.x -= 64; | |
1622 | } | |
1623 | s->mv_min.y -= 64; | |
1624 | s->mv_max.y -= 64; | |
1625 | } | |
1626 | } | |
1627 | ||
25f056e6 | 1628 | #if HAVE_THREADS |
951455c1 DK |
1629 | #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\ |
1630 | do {\ | |
1631 | int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\ | |
1632 | if (otd->thread_mb_pos < tmp) {\ | |
1633 | pthread_mutex_lock(&otd->lock);\ | |
1634 | td->wait_mb_pos = tmp;\ | |
1635 | do {\ | |
1636 | if (otd->thread_mb_pos >= tmp)\ | |
1637 | break;\ | |
1638 | pthread_cond_wait(&otd->cond, &otd->lock);\ | |
1639 | } while (1);\ | |
1640 | td->wait_mb_pos = INT_MAX;\ | |
1641 | pthread_mutex_unlock(&otd->lock);\ | |
1642 | }\ | |
1643 | } while(0); | |
1644 | ||
1645 | #define update_pos(td, mb_y, mb_x)\ | |
1646 | do {\ | |
1647 | int pos = (mb_y << 16) | (mb_x & 0xFFFF);\ | |
1648 | int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\ | |
1649 | int is_null = (next_td == NULL) || (prev_td == NULL);\ | |
1650 | int pos_check = (is_null) ? 1 :\ | |
1651 | (next_td != td && pos >= next_td->wait_mb_pos) ||\ | |
1652 | (prev_td != td && pos >= prev_td->wait_mb_pos);\ | |
1653 | td->thread_mb_pos = pos;\ | |
1654 | if (sliced_threading && pos_check) {\ | |
1655 | pthread_mutex_lock(&td->lock);\ | |
1656 | pthread_cond_broadcast(&td->cond);\ | |
1657 | pthread_mutex_unlock(&td->lock);\ | |
1658 | }\ | |
1659 | } while(0); | |
25f056e6 MS |
1660 | #else |
1661 | #define check_thread_pos(td, otd, mb_x_check, mb_y_check) | |
1662 | #define update_pos(td, mb_y, mb_x) | |
1663 | #endif | |
951455c1 DK |
1664 | |
1665 | static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, | |
1666 | int jobnr, int threadnr) | |
1667 | { | |
1668 | VP8Context *s = avctx->priv_data; | |
1669 | VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr]; | |
1670 | int mb_y = td->thread_mb_pos>>16; | |
337ade52 | 1671 | int i, y, mb_x, mb_xy = mb_y*s->mb_width; |
951455c1 DK |
1672 | int num_jobs = s->num_jobs; |
1673 | AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame; | |
1674 | VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; | |
1675 | VP8Macroblock *mb; | |
337ade52 DK |
1676 | uint8_t *dst[3] = { |
1677 | curframe->data[0] + 16*mb_y*s->linesize, | |
1678 | curframe->data[1] + 8*mb_y*s->uvlinesize, | |
1679 | curframe->data[2] + 8*mb_y*s->uvlinesize | |
1680 | }; | |
951455c1 DK |
1681 | if (mb_y == 0) prev_td = td; |
1682 | else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs]; | |
1683 | if (mb_y == s->mb_height-1) next_td = td; | |
1684 | else next_td = &s->thread_data[(jobnr + 1)%num_jobs]; | |
1685 | if (s->mb_layout == 1) | |
1686 | mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1); | |
1687 | else { | |
1688 | mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; | |
1689 | memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock | |
1690 | AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101); | |
1691 | } | |
1692 | ||
1693 | memset(td->left_nnz, 0, sizeof(td->left_nnz)); | |
337ade52 DK |
1694 | // left edge of 129 for intra prediction |
1695 | if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { | |
1696 | for (i = 0; i < 3; i++) | |
1697 | for (y = 0; y < 16>>!!i; y++) | |
1698 | dst[i][y*curframe->linesize[i]-1] = 129; | |
951455c1 | 1699 | if (mb_y == 1) { |
337ade52 | 1700 | s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129; |
951455c1 | 1701 | } |
337ade52 DK |
1702 | } |
1703 | ||
1704 | s->mv_min.x = -MARGIN; | |
1705 | s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; | |
1706 | ||
1707 | for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { | |
951455c1 DK |
1708 | // Wait for previous thread to read mb_x+2, and reach mb_y-1. |
1709 | if (prev_td != td) { | |
1710 | if (threadnr != 0) { | |
1711 | check_thread_pos(td, prev_td, mb_x+1, mb_y-1); | |
1712 | } else { | |
1713 | check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1); | |
1714 | } | |
1715 | } | |
1716 | ||
337ade52 DK |
1717 | s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); |
1718 | s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); | |
1719 | ||
951455c1 DK |
1720 | if (!s->mb_layout) |
1721 | decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy, | |
1722 | prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0); | |
337ade52 DK |
1723 | |
1724 | prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); | |
1725 | ||
1726 | if (!mb->skip) | |
951455c1 | 1727 | decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz); |
337ade52 DK |
1728 | |
1729 | if (mb->mode <= MODE_I4x4) | |
951455c1 | 1730 | intra_predict(s, td, dst, mb, mb_x, mb_y); |
337ade52 | 1731 | else |
951455c1 | 1732 | inter_predict(s, td, dst, mb, mb_x, mb_y); |
337ade52 DK |
1733 | |
1734 | prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); | |
1735 | ||
1736 | if (!mb->skip) { | |
951455c1 | 1737 | idct_mb(s, td, dst, mb); |
337ade52 | 1738 | } else { |
951455c1 | 1739 | AV_ZERO64(td->left_nnz); |
337ade52 DK |
1740 | AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned |
1741 | ||
1742 | // Reset DC block predictors if they would exist if the mb had coefficients | |
1743 | if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | |
951455c1 | 1744 | td->left_nnz[8] = 0; |
337ade52 DK |
1745 | s->top_nnz[mb_x][8] = 0; |
1746 | } | |
1747 | } | |
1748 | ||
1749 | if (s->deblock_filter) | |
951455c1 DK |
1750 | filter_level_for_mb(s, mb, &td->filter_strength[mb_x]); |
1751 | ||
1752 | if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) { | |
1753 | if (s->filter.simple) | |
1754 | backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1); | |
1755 | else | |
1756 | backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); | |
1757 | } | |
337ade52 DK |
1758 | |
1759 | prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); | |
1760 | ||
1761 | dst[0] += 16; | |
1762 | dst[1] += 8; | |
1763 | dst[2] += 8; | |
1764 | s->mv_min.x -= 64; | |
1765 | s->mv_max.x -= 64; | |
951455c1 DK |
1766 | |
1767 | if (mb_x == s->mb_width+1) { | |
1768 | update_pos(td, mb_y, s->mb_width+3); | |
1769 | } else { | |
1770 | update_pos(td, mb_y, mb_x); | |
1771 | } | |
337ade52 | 1772 | } |
951455c1 DK |
1773 | } |
1774 | ||
1775 | static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata, | |
1776 | int jobnr, int threadnr) | |
1777 | { | |
1778 | VP8Context *s = avctx->priv_data; | |
1779 | VP8ThreadData *td = &s->thread_data[threadnr]; | |
1780 | int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs; | |
1781 | AVFrame *curframe = s->curframe; | |
1782 | VP8Macroblock *mb; | |
1783 | VP8ThreadData *prev_td, *next_td; | |
1784 | uint8_t *dst[3] = { | |
1785 | curframe->data[0] + 16*mb_y*s->linesize, | |
1786 | curframe->data[1] + 8*mb_y*s->uvlinesize, | |
1787 | curframe->data[2] + 8*mb_y*s->uvlinesize | |
1788 | }; | |
1789 | ||
1790 | if (s->mb_layout == 1) | |
1791 | mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1); | |
1792 | else | |
1793 | mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; | |
1794 | ||
1795 | if (mb_y == 0) prev_td = td; | |
1796 | else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs]; | |
1797 | if (mb_y == s->mb_height-1) next_td = td; | |
1798 | else next_td = &s->thread_data[(jobnr + 1)%num_jobs]; | |
1799 | ||
1800 | for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) { | |
1801 | VP8FilterStrength *f = &td->filter_strength[mb_x]; | |
1802 | if (prev_td != td) { | |
1803 | check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1); | |
1804 | } | |
1805 | if (next_td != td) | |
1806 | if (next_td != &s->thread_data[0]) { | |
1807 | check_thread_pos(td, next_td, mb_x+1, mb_y+1); | |
1808 | } | |
1809 | ||
1810 | if (num_jobs == 1) { | |
1811 | if (s->filter.simple) | |
1812 | backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1); | |
1813 | else | |
1814 | backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); | |
1815 | } | |
1816 | ||
337ade52 | 1817 | if (s->filter.simple) |
951455c1 | 1818 | filter_mb_simple(s, dst[0], f, mb_x, mb_y); |
337ade52 | 1819 | else |
951455c1 DK |
1820 | filter_mb(s, dst, f, mb_x, mb_y); |
1821 | dst[0] += 16; | |
1822 | dst[1] += 8; | |
1823 | dst[2] += 8; | |
1824 | ||
1825 | update_pos(td, mb_y, (s->mb_width+3) + mb_x); | |
1826 | } | |
1827 | } | |
1828 | ||
1829 | static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, | |
1830 | int jobnr, int threadnr) | |
1831 | { | |
1832 | VP8Context *s = avctx->priv_data; | |
1833 | VP8ThreadData *td = &s->thread_data[jobnr]; | |
1834 | VP8ThreadData *next_td = NULL, *prev_td = NULL; | |
1835 | AVFrame *curframe = s->curframe; | |
1836 | int mb_y, num_jobs = s->num_jobs; | |
1837 | td->thread_nr = threadnr; | |
1838 | for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) { | |
1839 | if (mb_y >= s->mb_height) break; | |
1840 | td->thread_mb_pos = mb_y<<16; | |
1841 | vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr); | |
1842 | if (s->deblock_filter) | |
1843 | vp8_filter_mb_row(avctx, tdata, jobnr, threadnr); | |
1844 | update_pos(td, mb_y, INT_MAX & 0xFFFF); | |
1845 | ||
1846 | s->mv_min.y -= 64; | |
1847 | s->mv_max.y -= 64; | |
1848 | ||
1849 | if (avctx->active_thread_type == FF_THREAD_FRAME) | |
1850 | ff_thread_report_progress(curframe, mb_y, 0); | |
337ade52 | 1851 | } |
951455c1 DK |
1852 | |
1853 | return 0; | |
337ade52 DK |
1854 | } |
1855 | ||
3b636f21 DC |
1856 | static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, |
1857 | AVPacket *avpkt) | |
1858 | { | |
1859 | VP8Context *s = avctx->priv_data; | |
951455c1 | 1860 | int ret, i, referenced, num_jobs; |
3b636f21 | 1861 | enum AVDiscard skip_thresh; |
e02dec25 | 1862 | AVFrame *av_uninit(curframe), *prev_frame; |
3b636f21 | 1863 | |
ce42a048 RB |
1864 | release_queued_segmaps(s, 0); |
1865 | ||
3b636f21 | 1866 | if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) |
fb90785e | 1867 | goto err; |
3b636f21 | 1868 | |
e02dec25 AC |
1869 | prev_frame = s->framep[VP56_FRAME_CURRENT]; |
1870 | ||
3b636f21 DC |
1871 | referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT |
1872 | || s->update_altref == VP56_FRAME_CURRENT; | |
1873 | ||
1874 | skip_thresh = !referenced ? AVDISCARD_NONREF : | |
1875 | !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; | |
1876 | ||
1877 | if (avctx->skip_frame >= skip_thresh) { | |
1878 | s->invisible = 1; | |
fb90785e | 1879 | memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); |
3b636f21 DC |
1880 | goto skip_decode; |
1881 | } | |
9ac831c2 | 1882 | s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; |
3b636f21 | 1883 | |
4773d904 RB |
1884 | // release no longer referenced frames |
1885 | for (i = 0; i < 5; i++) | |
1886 | if (s->frames[i].data[0] && | |
1887 | &s->frames[i] != prev_frame && | |
1888 | &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
1889 | &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | |
1890 | &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) | |
bfa0f965 | 1891 | vp8_release_frame(s, &s->frames[i], 1, 0); |
4773d904 RB |
1892 | |
1893 | // find a free buffer | |
1894 | for (i = 0; i < 5; i++) | |
1895 | if (&s->frames[i] != prev_frame && | |
1896 | &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
3b636f21 DC |
1897 | &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && |
1898 | &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { | |
1899 | curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i]; | |
1900 | break; | |
1901 | } | |
4773d904 RB |
1902 | if (i == 5) { |
1903 | av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n"); | |
1904 | abort(); | |
1905 | } | |
3b636f21 | 1906 | if (curframe->data[0]) |
bfa0f965 | 1907 | vp8_release_frame(s, curframe, 1, 0); |
3b636f21 | 1908 | |
fb90785e RB |
1909 | // Given that arithmetic probabilities are updated every frame, it's quite likely |
1910 | // that the values we have on a random interframe are complete junk if we didn't | |
1911 | // start decode on a keyframe. So just don't display anything rather than junk. | |
1912 | if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || | |
1913 | !s->framep[VP56_FRAME_GOLDEN] || | |
1914 | !s->framep[VP56_FRAME_GOLDEN2])) { | |
1915 | av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); | |
1916 | ret = AVERROR_INVALIDDATA; | |
1917 | goto err; | |
1918 | } | |
1919 | ||
3b636f21 | 1920 | curframe->key_frame = s->keyframe; |
975a1447 | 1921 | curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; |
3b636f21 | 1922 | curframe->reference = referenced ? 3 : 0; |
ce42a048 | 1923 | if ((ret = vp8_alloc_frame(s, curframe))) { |
3b636f21 | 1924 | av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); |
fb90785e | 1925 | goto err; |
3b636f21 DC |
1926 | } |
1927 | ||
4773d904 RB |
1928 | // check if golden and altref are swapped |
1929 | if (s->update_altref != VP56_FRAME_NONE) { | |
1930 | s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; | |
1931 | } else { | |
1932 | s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2]; | |
1933 | } | |
1934 | if (s->update_golden != VP56_FRAME_NONE) { | |
1935 | s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; | |
1936 | } else { | |
1937 | s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN]; | |
1938 | } | |
1939 | if (s->update_last) { | |
1940 | s->next_framep[VP56_FRAME_PREVIOUS] = curframe; | |
1941 | } else { | |
1942 | s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS]; | |
1943 | } | |
1944 | s->next_framep[VP56_FRAME_CURRENT] = curframe; | |
1945 | ||
1946 | ff_thread_finish_setup(avctx); | |
1947 | ||
3b636f21 DC |
1948 | s->linesize = curframe->linesize[0]; |
1949 | s->uvlinesize = curframe->linesize[1]; | |
1950 | ||
951455c1 DK |
1951 | if (!s->thread_data[0].edge_emu_buffer) |
1952 | for (i = 0; i < MAX_THREADS; i++) | |
1953 | s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize); | |
3b636f21 DC |
1954 | |
1955 | memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); | |
aa93c52c | 1956 | /* Zero macroblock structures for top/top-left prediction from outside the frame. */ |
951455c1 DK |
1957 | if (!s->mb_layout) |
1958 | memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks)); | |
1959 | if (!s->mb_layout && s->keyframe) | |
1960 | memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4); | |
c55e0d34 | 1961 | |
3b636f21 | 1962 | // top edge of 127 for intra prediction |
ee555de7 RB |
1963 | if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { |
1964 | s->top_border[0][15] = s->top_border[0][23] = 127; | |
1965 | memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1); | |
1966 | } | |
c4211046 | 1967 | memset(s->ref_count, 0, sizeof(s->ref_count)); |
3b636f21 | 1968 | |
7634771e | 1969 | |
951455c1 DK |
1970 | // Make sure the previous frame has read its segmentation map, |
1971 | // if we re-use the same map. | |
1972 | if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map) | |
1973 | ff_thread_await_progress(prev_frame, 1, 0); | |
7634771e | 1974 | |
951455c1 DK |
1975 | if (s->mb_layout == 1) |
1976 | vp8_decode_mv_mb_modes(avctx, curframe, prev_frame); | |
4773d904 | 1977 | |
951455c1 DK |
1978 | if (avctx->active_thread_type == FF_THREAD_FRAME) |
1979 | num_jobs = 1; | |
1980 | else | |
1981 | num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count); | |
1982 | s->num_jobs = num_jobs; | |
1983 | s->curframe = curframe; | |
1984 | s->prev_frame = prev_frame; | |
1985 | s->mv_min.y = -MARGIN; | |
1986 | s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; | |
1987 | for (i = 0; i < MAX_THREADS; i++) { | |
1988 | s->thread_data[i].thread_mb_pos = 0; | |
1989 | s->thread_data[i].wait_mb_pos = INT_MAX; | |
1990 | } | |
1991 | avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs); | |
3b636f21 | 1992 | |
4773d904 | 1993 | ff_thread_report_progress(curframe, INT_MAX, 0); |
fb90785e RB |
1994 | memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4); |
1995 | ||
3b636f21 DC |
1996 | skip_decode: |
1997 | // if future frames don't use the updated probabilities, | |
1998 | // reset them to the values we saved | |
1999 | if (!s->update_probabilities) | |
2000 | s->prob[0] = s->prob[1]; | |
2001 | ||
3b636f21 | 2002 | if (!s->invisible) { |
4773d904 | 2003 | *(AVFrame*)data = *curframe; |
3b636f21 DC |
2004 | *data_size = sizeof(AVFrame); |
2005 | } | |
2006 | ||
2007 | return avpkt->size; | |
fb90785e RB |
2008 | err: |
2009 | memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); | |
2010 | return ret; | |
3b636f21 DC |
2011 | } |
2012 | ||
2013 | static av_cold int vp8_decode_init(AVCodecContext *avctx) | |
2014 | { | |
2015 | VP8Context *s = avctx->priv_data; | |
2016 | ||
2017 | s->avctx = avctx; | |
2018 | avctx->pix_fmt = PIX_FMT_YUV420P; | |
2019 | ||
9cf0841e | 2020 | ff_dsputil_init(&s->dsp, avctx); |
76741b0e | 2021 | ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1); |
3b636f21 DC |
2022 | ff_vp8dsp_init(&s->vp8dsp); |
2023 | ||
3b636f21 DC |
2024 | return 0; |
2025 | } | |
2026 | ||
2027 | static av_cold int vp8_decode_free(AVCodecContext *avctx) | |
2028 | { | |
bfa0f965 | 2029 | vp8_decode_flush_impl(avctx, 0, 1, 1); |
ce42a048 | 2030 | release_queued_segmaps(avctx->priv_data, 1); |
3b636f21 DC |
2031 | return 0; |
2032 | } | |
2033 | ||
4773d904 RB |
2034 | static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx) |
2035 | { | |
2036 | VP8Context *s = avctx->priv_data; | |
2037 | ||
2038 | s->avctx = avctx; | |
2039 | ||
2040 | return 0; | |
2041 | } | |
2042 | ||
2043 | #define REBASE(pic) \ | |
2044 | pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL | |
2045 | ||
2046 | static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) | |
2047 | { | |
2048 | VP8Context *s = dst->priv_data, *s_src = src->priv_data; | |
2049 | ||
56535793 RB |
2050 | if (s->macroblocks_base && |
2051 | (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) { | |
2052 | free_buffers(s); | |
e02dec25 | 2053 | s->maps_are_invalid = 1; |
82a0497c RB |
2054 | s->mb_width = s_src->mb_width; |
2055 | s->mb_height = s_src->mb_height; | |
56535793 RB |
2056 | } |
2057 | ||
4773d904 RB |
2058 | s->prob[0] = s_src->prob[!s_src->update_probabilities]; |
2059 | s->segmentation = s_src->segmentation; | |
2060 | s->lf_delta = s_src->lf_delta; | |
2061 | memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias)); | |
2062 | ||
2063 | memcpy(&s->frames, &s_src->frames, sizeof(s->frames)); | |
2064 | s->framep[0] = REBASE(s_src->next_framep[0]); | |
2065 | s->framep[1] = REBASE(s_src->next_framep[1]); | |
2066 | s->framep[2] = REBASE(s_src->next_framep[2]); | |
2067 | s->framep[3] = REBASE(s_src->next_framep[3]); | |
2068 | ||
2069 | return 0; | |
2070 | } | |
2071 | ||
d36beb3f | 2072 | AVCodec ff_vp8_decoder = { |
00c3b67b MS |
2073 | .name = "vp8", |
2074 | .type = AVMEDIA_TYPE_VIDEO, | |
2075 | .id = CODEC_ID_VP8, | |
2076 | .priv_data_size = sizeof(VP8Context), | |
2077 | .init = vp8_decode_init, | |
2078 | .close = vp8_decode_free, | |
2079 | .decode = vp8_decode_frame, | |
951455c1 | 2080 | .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS, |
00c3b67b MS |
2081 | .flush = vp8_decode_flush, |
2082 | .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), | |
4773d904 RB |
2083 | .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy), |
2084 | .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context), | |
3b636f21 | 2085 | }; |