lavc: Prettify printing of codec tags containing non alphanumeric characters
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265 1/*
ff3d4310 2 * H.26L/H.264/AVC/JVT/14496-10/... decoder
0da71265
MN
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
2912e87a 5 * This file is part of Libav.
b78e7197 6 *
2912e87a 7 * Libav is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
2912e87a 12 * Libav is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
2912e87a 18 * License along with Libav; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
ba87f080 23 * @file
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
737eb597 28#include "libavutil/imgutils.h"
40e5d31b 29#include "internal.h"
55b9ef18
DB
30#include "cabac.h"
31#include "cabac_functions.h"
0da71265
MN
32#include "dsputil.h"
33#include "avcodec.h"
34#include "mpegvideo.h"
26b4fe82 35#include "h264.h"
0da71265 36#include "h264data.h"
79dad2a9 37#include "h264chroma.h"
188d3c51 38#include "h264_mvpred.h"
0da71265 39#include "golomb.h"
199436b9 40#include "mathops.h"
626464fb 41#include "rectangle.h"
75d5156a 42#include "svq3.h"
6a9c8594 43#include "thread.h"
369122dd 44#include "vdpau_internal.h"
cfa5a81e 45#include "libavutil/avassert.h"
0da71265 46
e5d40372 47// #undef NDEBUG
0da71265
MN
48#include <assert.h>
49
0becb078
DB
50const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 };
51
e5d40372
DB
52static const uint8_t rem6[QP_MAX_NUM + 1] = {
53 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
54 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
55 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
acd8d10f
PI
56};
57
e5d40372
DB
58static const uint8_t div6[QP_MAX_NUM + 1] = {
59 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3,
60 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
61 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
acd8d10f
PI
62};
63
8d061989
RB
64static const enum AVPixelFormat h264_hwaccel_pixfmt_list_420[] = {
65#if CONFIG_H264_DXVA2_HWACCEL
66 AV_PIX_FMT_DXVA2_VLD,
67#endif
68#if CONFIG_H264_VAAPI_HWACCEL
69 AV_PIX_FMT_VAAPI_VLD,
70#endif
71#if CONFIG_H264_VDA_HWACCEL
72 AV_PIX_FMT_VDA_VLD,
73#endif
74#if CONFIG_H264_VDPAU_HWACCEL
75 AV_PIX_FMT_VDPAU,
76#endif
77 AV_PIX_FMT_YUV420P,
78 AV_PIX_FMT_NONE
79};
80
d65522e8 81static const enum AVPixelFormat h264_hwaccel_pixfmt_list_jpeg_420[] = {
78bc4d69 82#if CONFIG_H264_DXVA2_HWACCEL
716d413c 83 AV_PIX_FMT_DXVA2_VLD,
78bc4d69
RDC
84#endif
85#if CONFIG_H264_VAAPI_HWACCEL
716d413c 86 AV_PIX_FMT_VAAPI_VLD,
78bc4d69
RDC
87#endif
88#if CONFIG_H264_VDA_HWACCEL
716d413c 89 AV_PIX_FMT_VDA_VLD,
78bc4d69
RDC
90#endif
91#if CONFIG_H264_VDPAU_HWACCEL
ec0e9200 92 AV_PIX_FMT_VDPAU,
78bc4d69 93#endif
716d413c
AK
94 AV_PIX_FMT_YUVJ420P,
95 AV_PIX_FMT_NONE
0435fb16
BC
96};
97
54974c62
AK
98static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
99 int (*mv)[2][4][2],
100 int mb_x, int mb_y, int mb_intra, int mb_skipped)
101{
102 H264Context *h = opaque;
54974c62 103
2c541554
AK
104 h->mb_x = mb_x;
105 h->mb_y = mb_y;
106 h->mb_xy = mb_x + mb_y * h->mb_stride;
54974c62
AK
107 memset(h->non_zero_count_cache, 0, sizeof(h->non_zero_count_cache));
108 assert(ref >= 0);
109 /* FIXME: It is possible albeit uncommon that slice references
110 * differ between slices. We take the easy approach and ignore
111 * it for now. If this turns out to have any relevance in
112 * practice then correct remapping should be added. */
113 if (ref >= h->ref_count[0])
114 ref = 0;
2c541554 115 fill_rectangle(&h->cur_pic.f.ref_index[0][4 * h->mb_xy],
54974c62
AK
116 2, 2, 2, ref, 1);
117 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
118 fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8,
2c541554 119 pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4);
54974c62
AK
120 assert(!FRAME_MBAFF);
121 ff_h264_hl_decode_mb(h);
122}
123
2c541554
AK
124void ff_h264_draw_horiz_band(H264Context *h, int y, int height)
125{
fae6fd5b 126 ff_draw_horiz_band(h->avctx, NULL, &h->cur_pic,
2c541554 127 h->ref_list[0][0].f.data[0] ? &h->ref_list[0][0] : NULL,
fae6fd5b 128 y, height, h->picture_structure, h->first_field, 0,
2c541554
AK
129 h->low_delay, h->mb_height * 16, h->mb_width * 16);
130}
131
132static void free_frame_buffer(H264Context *h, Picture *pic)
133{
134 ff_thread_release_buffer(h->avctx, &pic->f);
135 av_freep(&pic->f.hwaccel_picture_private);
136}
137
138static void free_picture(H264Context *h, Picture *pic)
139{
140 int i;
141
142 if (pic->f.data[0])
143 free_frame_buffer(h, pic);
144
145 av_freep(&pic->qscale_table_base);
146 pic->f.qscale_table = NULL;
147 av_freep(&pic->mb_type_base);
148 pic->f.mb_type = NULL;
149 for (i = 0; i < 2; i++) {
150 av_freep(&pic->motion_val_base[i]);
151 av_freep(&pic->f.ref_index[i]);
152 pic->f.motion_val[i] = NULL;
153 }
154}
155
156static void release_unused_pictures(H264Context *h, int remove_current)
157{
158 int i;
159
160 /* release non reference frames */
161 for (i = 0; i < h->picture_count; i++) {
162 if (h->DPB[i].f.data[0] && !h->DPB[i].f.reference &&
163 (!h->DPB[i].owner2 || h->DPB[i].owner2 == h) &&
164 (remove_current || &h->DPB[i] != h->cur_pic_ptr)) {
165 free_frame_buffer(h, &h->DPB[i]);
166 }
167 }
168}
169
170static int alloc_scratch_buffers(H264Context *h, int linesize)
171{
172 int alloc_size = FFALIGN(FFABS(linesize) + 32, 32);
173
174 if (h->bipred_scratchpad)
175 return 0;
176
177 h->bipred_scratchpad = av_malloc(16 * 6 * alloc_size);
178 // edge emu needs blocksize + filter length - 1
179 // (= 21x21 for h264)
180 h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 21);
181 h->me.scratchpad = av_mallocz(alloc_size * 2 * 16 * 2);
182
183 if (!h->bipred_scratchpad || !h->edge_emu_buffer || !h->me.scratchpad) {
184 av_freep(&h->bipred_scratchpad);
185 av_freep(&h->edge_emu_buffer);
186 av_freep(&h->me.scratchpad);
187 return AVERROR(ENOMEM);
188 }
189
190 h->me.temp = h->me.scratchpad;
191
192 return 0;
193}
194
195static int alloc_picture(H264Context *h, Picture *pic)
196{
197 const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1;
198 const int mb_array_size = h->mb_stride * h->mb_height;
199 const int b4_stride = h->mb_width * 4 + 1;
200 const int b4_array_size = b4_stride * h->mb_height * 4;
201 int i, ret = 0;
202
203 av_assert0(!pic->f.data[0]);
204
205 if (h->avctx->hwaccel) {
206 const AVHWAccel *hwaccel = h->avctx->hwaccel;
207 av_assert0(!pic->f.hwaccel_picture_private);
208 if (hwaccel->priv_data_size) {
209 pic->f.hwaccel_picture_private = av_mallocz(hwaccel->priv_data_size);
210 if (!pic->f.hwaccel_picture_private)
211 return AVERROR(ENOMEM);
212 }
213 }
214 ret = ff_thread_get_buffer(h->avctx, &pic->f);
215 if (ret < 0)
216 goto fail;
217
218 h->linesize = pic->f.linesize[0];
219 h->uvlinesize = pic->f.linesize[1];
220
221 if (pic->f.qscale_table == NULL) {
222 FF_ALLOCZ_OR_GOTO(h->avctx, pic->qscale_table_base,
223 (big_mb_num + h->mb_stride) * sizeof(uint8_t),
224 fail)
225 FF_ALLOCZ_OR_GOTO(h->avctx, pic->mb_type_base,
226 (big_mb_num + h->mb_stride) * sizeof(uint32_t),
227 fail)
228 pic->f.mb_type = pic->mb_type_base + 2 * h->mb_stride + 1;
229 pic->f.qscale_table = pic->qscale_table_base + 2 * h->mb_stride + 1;
230
231 for (i = 0; i < 2; i++) {
232 FF_ALLOCZ_OR_GOTO(h->avctx, pic->motion_val_base[i],
233 2 * (b4_array_size + 4) * sizeof(int16_t),
234 fail)
235 pic->f.motion_val[i] = pic->motion_val_base[i] + 4;
236 FF_ALLOCZ_OR_GOTO(h->avctx, pic->f.ref_index[i],
237 4 * mb_array_size * sizeof(uint8_t), fail)
238 }
239 pic->f.motion_subsample_log2 = 2;
240
241 pic->f.qstride = h->mb_stride;
242 }
243
244 pic->owner2 = h;
245
246 return 0;
247fail:
248 free_frame_buffer(h, pic);
249 return (ret < 0) ? ret : AVERROR(ENOMEM);
250}
251
252static inline int pic_is_unused(H264Context *h, Picture *pic)
253{
254 if (pic->f.data[0] == NULL)
255 return 1;
256 if (pic->needs_realloc && !(pic->f.reference & DELAYED_PIC_REF))
257 if (!pic->owner2 || pic->owner2 == h)
258 return 1;
259 return 0;
260}
261
262static int find_unused_picture(H264Context *h)
263{
264 int i;
265
266 for (i = h->picture_range_start; i < h->picture_range_end; i++) {
267 if (pic_is_unused(h, &h->DPB[i]))
268 break;
269 }
270 if (i == h->picture_range_end)
271 return AVERROR_INVALIDDATA;
272
273 if (h->DPB[i].needs_realloc) {
274 h->DPB[i].needs_realloc = 0;
275 free_picture(h, &h->DPB[i]);
276 avcodec_get_frame_defaults(&h->DPB[i].f);
277 }
278
279 return i;
280}
281
0da71265 282/**
58c42af7
DB
283 * Check if the top & left blocks are available if needed and
284 * change the dc mode so it only uses the available blocks.
0da71265 285 */
e5d40372
DB
286int ff_h264_check_intra4x4_pred_mode(H264Context *h)
287{
e5d40372
DB
288 static const int8_t top[12] = {
289 -1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0
290 };
291 static const int8_t left[12] = {
292 0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED
293 };
2bedc0e8
MN
294 int i;
295
e5d40372
DB
296 if (!(h->top_samples_available & 0x8000)) {
297 for (i = 0; i < 4; i++) {
298 int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]];
299 if (status < 0) {
2c541554 300 av_log(h->avctx, AV_LOG_ERROR,
e5d40372 301 "top block unavailable for requested intra4x4 mode %d at %d %d\n",
2c541554 302 status, h->mb_x, h->mb_y);
2bedc0e8 303 return -1;
e5d40372
DB
304 } else if (status) {
305 h->intra4x4_pred_mode_cache[scan8[0] + i] = status;
2bedc0e8
MN
306 }
307 }
308 }
309
e5d40372
DB
310 if ((h->left_samples_available & 0x8888) != 0x8888) {
311 static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 };
312 for (i = 0; i < 4; i++)
313 if (!(h->left_samples_available & mask[i])) {
314 int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
315 if (status < 0) {
2c541554 316 av_log(h->avctx, AV_LOG_ERROR,
e5d40372 317 "left block unavailable for requested intra4x4 mode %d at %d %d\n",
2c541554 318 status, h->mb_x, h->mb_y);
2bedc0e8 319 return -1;
e5d40372
DB
320 } else if (status) {
321 h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
2bedc0e8
MN
322 }
323 }
2bedc0e8
MN
324 }
325
326 return 0;
e5d40372 327} // FIXME cleanup like ff_h264_check_intra_pred_mode
2bedc0e8
MN
328
329/**
58c42af7
DB
330 * Check if the top & left blocks are available if needed and
331 * change the dc mode so it only uses the available blocks.
2bedc0e8 332 */
e5d40372
DB
333int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma)
334{
e5d40372
DB
335 static const int8_t top[7] = { LEFT_DC_PRED8x8, 1, -1, -1 };
336 static const int8_t left[7] = { TOP_DC_PRED8x8, -1, 2, -1, DC_128_PRED8x8 };
337
338 if (mode > 6U) {
2c541554 339 av_log(h->avctx, AV_LOG_ERROR,
e5d40372 340 "out of range intra chroma pred mode at %d %d\n",
2c541554 341 h->mb_x, h->mb_y);
7440fe83 342 return -1;
5175b937 343 }
115329f1 344
e5d40372
DB
345 if (!(h->top_samples_available & 0x8000)) {
346 mode = top[mode];
347 if (mode < 0) {
2c541554 348 av_log(h->avctx, AV_LOG_ERROR,
e5d40372 349 "top block unavailable for requested intra mode at %d %d\n",
2c541554 350 h->mb_x, h->mb_y);
0da71265
MN
351 return -1;
352 }
353 }
115329f1 354
e5d40372
DB
355 if ((h->left_samples_available & 0x8080) != 0x8080) {
356 mode = left[mode];
357 if (is_chroma && (h->left_samples_available & 0x8080)) {
358 // mad cow disease mode, aka MBAFF + constrained_intra_pred
359 mode = ALZHEIMER_DC_L0T_PRED8x8 +
360 (!(h->left_samples_available & 0x8000)) +
361 2 * (mode == DC_128_PRED8x8);
d1d10e91 362 }
e5d40372 363 if (mode < 0) {
2c541554 364 av_log(h->avctx, AV_LOG_ERROR,
e5d40372 365 "left block unavailable for requested intra mode at %d %d\n",
2c541554 366 h->mb_x, h->mb_y);
0da71265 367 return -1;
115329f1 368 }
0da71265
MN
369 }
370
371 return mode;
372}
373
e5d40372
DB
374const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
375 int *dst_length, int *consumed, int length)
376{
0da71265
MN
377 int i, si, di;
378 uint8_t *dst;
24456882 379 int bufidx;
0da71265 380
e5d40372
DB
381 // src[0]&0x80; // forbidden bit
382 h->nal_ref_idc = src[0] >> 5;
383 h->nal_unit_type = src[0] & 0x1F;
0da71265 384
e5d40372
DB
385 src++;
386 length--;
e08715d3 387
58db34aa
RB
388#define STARTCODE_TEST \
389 if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \
390 if (src[i + 2] != 3) { \
391 /* startcode, so we must be past the end */ \
392 length = i; \
393 } \
394 break; \
395 }
b250f9c6 396#if HAVE_FAST_UNALIGNED
58db34aa
RB
397#define FIND_FIRST_ZERO \
398 if (i > 0 && !src[i]) \
399 i--; \
400 while (src[i]) \
401 i++
e5d40372 402#if HAVE_FAST_64BIT
e5d40372
DB
403 for (i = 0; i + 1 < length; i += 9) {
404 if (!((~AV_RN64A(src + i) &
405 (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
406 0x8000800080008080ULL))
58db34aa
RB
407 continue;
408 FIND_FIRST_ZERO;
409 STARTCODE_TEST;
410 i -= 7;
411 }
e5d40372 412#else
e5d40372
DB
413 for (i = 0; i + 1 < length; i += 5) {
414 if (!((~AV_RN32A(src + i) &
415 (AV_RN32A(src + i) - 0x01000101U)) &
416 0x80008080U))
e08715d3 417 continue;
58db34aa
RB
418 FIND_FIRST_ZERO;
419 STARTCODE_TEST;
420 i -= 3;
421 }
422#endif
e08715d3 423#else
e5d40372
DB
424 for (i = 0; i + 1 < length; i += 2) {
425 if (src[i])
426 continue;
427 if (i > 0 && src[i - 1] == 0)
428 i--;
58db34aa 429 STARTCODE_TEST;
0da71265 430 }
58db34aa 431#endif
0da71265 432
e5d40372
DB
433 if (i >= length - 1) { // no escaped 0
434 *dst_length = length;
435 *consumed = length + 1; // +1 for the header
115329f1 436 return src;
0da71265
MN
437 }
438
e5d40372
DB
439 // use second escape buffer for inter data
440 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0;
441 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx],
442 length + FF_INPUT_BUFFER_PADDING_SIZE);
443 dst = h->rbsp_buffer[bufidx];
0da71265 444
e5d40372 445 if (dst == NULL)
ac658be5 446 return NULL;
ac658be5 447
593af7cd 448 memcpy(dst, src, i);
e5d40372
DB
449 si = di = i;
450 while (si + 2 < length) {
451 // remove escapes (very rare 1:2^22)
452 if (src[si + 2] > 3) {
453 dst[di++] = src[si++];
454 dst[di++] = src[si++];
455 } else if (src[si] == 0 && src[si + 1] == 0) {
456 if (src[si + 2] == 3) { // escape
457 dst[di++] = 0;
458 dst[di++] = 0;
459 si += 3;
c8470cc1 460 continue;
e5d40372 461 } else // next start code
593af7cd 462 goto nsc;
0da71265
MN
463 }
464
e5d40372 465 dst[di++] = src[si++];
0da71265 466 }
e5d40372
DB
467 while (si < length)
468 dst[di++] = src[si++];
593af7cd 469nsc:
0da71265 470
e5d40372 471 memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
d4369630 472
e5d40372
DB
473 *dst_length = di;
474 *consumed = si + 1; // +1 for the header
475 /* FIXME store exact number of bits in the getbitcontext
476 * (it is needed for decoding) */
0da71265
MN
477 return dst;
478}
479
85297319
DEP
480/**
481 * Identify the exact end of the bitstream
482 * @return the length of the trailing, or 0 if damaged
483 */
b691fd7a 484static int decode_rbsp_trailing(H264Context *h, const uint8_t *src)
e5d40372
DB
485{
486 int v = *src;
0da71265
MN
487 int r;
488
2c541554 489 tprintf(h->avctx, "rbsp trailing %X\n", v);
0da71265 490
e5d40372
DB
491 for (r = 1; r < 9; r++) {
492 if (v & 1)
493 return r;
494 v >>= 1;
0da71265
MN
495 }
496 return 0;
497}
498
e5d40372
DB
499static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n,
500 int height, int y_offset, int list)
501{
502 int raw_my = h->mv_cache[list][scan8[n]][1];
fb845ffd
RB
503 int filter_height_up = (raw_my & 3) ? 2 : 0;
504 int filter_height_down = (raw_my & 3) ? 3 : 0;
e5d40372 505 int full_my = (raw_my >> 2) + y_offset;
fb845ffd
RB
506 int top = full_my - filter_height_up;
507 int bottom = full_my + filter_height_down + height;
6a9c8594
AS
508
509 return FFMAX(abs(top), bottom);
510}
511
e5d40372
DB
512static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
513 int height, int y_offset, int list0,
514 int list1, int *nrefs)
515{
6a9c8594
AS
516 int my;
517
2c541554 518 y_offset += 16 * (h->mb_y >> MB_FIELD);
6a9c8594 519
e5d40372
DB
520 if (list0) {
521 int ref_n = h->ref_cache[0][scan8[n]];
522 Picture *ref = &h->ref_list[0][ref_n];
6a9c8594
AS
523
524 // Error resilience puts the current picture in the ref list.
525 // Don't try to wait on these as it will cause a deadlock.
526 // Fields can wait on each other, though.
2c541554
AK
527 if (ref->f.thread_opaque != h->cur_pic.f.thread_opaque ||
528 (ref->f.reference & 3) != h->picture_structure) {
6a9c8594 529 my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
e5d40372
DB
530 if (refs[0][ref_n] < 0)
531 nrefs[0] += 1;
6a9c8594
AS
532 refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
533 }
534 }
535
e5d40372
DB
536 if (list1) {
537 int ref_n = h->ref_cache[1][scan8[n]];
538 Picture *ref = &h->ref_list[1][ref_n];
6a9c8594 539
2c541554
AK
540 if (ref->f.thread_opaque != h->cur_pic.f.thread_opaque ||
541 (ref->f.reference & 3) != h->picture_structure) {
6a9c8594 542 my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
e5d40372
DB
543 if (refs[1][ref_n] < 0)
544 nrefs[1] += 1;
6a9c8594
AS
545 refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
546 }
547 }
548}
549
550/**
551 * Wait until all reference frames are available for MC operations.
552 *
553 * @param h the H264 context
554 */
e5d40372
DB
555static void await_references(H264Context *h)
556{
e5d40372 557 const int mb_xy = h->mb_xy;
2c541554 558 const int mb_type = h->cur_pic.f.mb_type[mb_xy];
6a9c8594 559 int refs[2][48];
e5d40372 560 int nrefs[2] = { 0 };
6a9c8594
AS
561 int ref, list;
562
563 memset(refs, -1, sizeof(refs));
564
e5d40372 565 if (IS_16X16(mb_type)) {
6a9c8594 566 get_lowest_part_y(h, refs, 0, 16, 0,
e5d40372
DB
567 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
568 } else if (IS_16X8(mb_type)) {
6a9c8594 569 get_lowest_part_y(h, refs, 0, 8, 0,
e5d40372 570 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
6a9c8594 571 get_lowest_part_y(h, refs, 8, 8, 8,
e5d40372
DB
572 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
573 } else if (IS_8X16(mb_type)) {
6a9c8594 574 get_lowest_part_y(h, refs, 0, 16, 0,
e5d40372 575 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
6a9c8594 576 get_lowest_part_y(h, refs, 4, 16, 0,
e5d40372
DB
577 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
578 } else {
6a9c8594
AS
579 int i;
580
581 assert(IS_8X8(mb_type));
582
e5d40372
DB
583 for (i = 0; i < 4; i++) {
584 const int sub_mb_type = h->sub_mb_type[i];
585 const int n = 4 * i;
586 int y_offset = (i & 2) << 2;
587
588 if (IS_SUB_8X8(sub_mb_type)) {
589 get_lowest_part_y(h, refs, n, 8, y_offset,
590 IS_DIR(sub_mb_type, 0, 0),
591 IS_DIR(sub_mb_type, 0, 1),
592 nrefs);
593 } else if (IS_SUB_8X4(sub_mb_type)) {
594 get_lowest_part_y(h, refs, n, 4, y_offset,
595 IS_DIR(sub_mb_type, 0, 0),
596 IS_DIR(sub_mb_type, 0, 1),
597 nrefs);
598 get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4,
599 IS_DIR(sub_mb_type, 0, 0),
600 IS_DIR(sub_mb_type, 0, 1),
601 nrefs);
602 } else if (IS_SUB_4X8(sub_mb_type)) {
603 get_lowest_part_y(h, refs, n, 8, y_offset,
604 IS_DIR(sub_mb_type, 0, 0),
605 IS_DIR(sub_mb_type, 0, 1),
606 nrefs);
607 get_lowest_part_y(h, refs, n + 1, 8, y_offset,
608 IS_DIR(sub_mb_type, 0, 0),
609 IS_DIR(sub_mb_type, 0, 1),
610 nrefs);
611 } else {
6a9c8594
AS
612 int j;
613 assert(IS_SUB_4X4(sub_mb_type));
e5d40372
DB
614 for (j = 0; j < 4; j++) {
615 int sub_y_offset = y_offset + 2 * (j & 2);
616 get_lowest_part_y(h, refs, n + j, 4, sub_y_offset,
617 IS_DIR(sub_mb_type, 0, 0),
618 IS_DIR(sub_mb_type, 0, 1),
619 nrefs);
6a9c8594
AS
620 }
621 }
622 }
623 }
624
e5d40372
DB
625 for (list = h->list_count - 1; list >= 0; list--)
626 for (ref = 0; ref < 48 && nrefs[list]; ref++) {
6a9c8594 627 int row = refs[list][ref];
e5d40372
DB
628 if (row >= 0) {
629 Picture *ref_pic = &h->ref_list[list][ref];
630 int ref_field = ref_pic->f.reference - 1;
6a9c8594 631 int ref_field_picture = ref_pic->field_picture;
2c541554 632 int pic_height = 16 * h->mb_height >> ref_field_picture;
6a9c8594
AS
633
634 row <<= MB_MBAFF;
635 nrefs[list]--;
636
e5d40372
DB
637 if (!FIELD_PICTURE && ref_field_picture) { // frame referencing two fields
638 ff_thread_await_progress(&ref_pic->f,
639 FFMIN((row >> 1) - !(row & 1),
640 pic_height - 1),
641 1);
642 ff_thread_await_progress(&ref_pic->f,
643 FFMIN((row >> 1), pic_height - 1),
644 0);
645 } else if (FIELD_PICTURE && !ref_field_picture) { // field referencing one field of a frame
646 ff_thread_await_progress(&ref_pic->f,
647 FFMIN(row * 2 + ref_field,
648 pic_height - 1),
649 0);
650 } else if (FIELD_PICTURE) {
651 ff_thread_await_progress(&ref_pic->f,
652 FFMIN(row, pic_height - 1),
653 ref_field);
654 } else {
655 ff_thread_await_progress(&ref_pic->f,
656 FFMIN(row, pic_height - 1),
657 0);
6a9c8594
AS
658 }
659 }
660 }
66c6b5e2 661}
66c6b5e2 662
e5d40372
DB
663static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
664 int n, int square, int height,
665 int delta, int list,
666 uint8_t *dest_y, uint8_t *dest_cb,
667 uint8_t *dest_cr,
668 int src_x_offset, int src_y_offset,
669 qpel_mc_func *qpix_op,
670 h264_chroma_mc_func chroma_op,
671 int pixel_shift, int chroma_idc)
05fb63f5 672{
e5d40372
DB
673 const int mx = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
674 int my = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
675 const int luma_xy = (mx & 3) + ((my & 3) << 2);
676 int offset = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize;
677 uint8_t *src_y = pic->f.data[0] + offset;
678 uint8_t *src_cb, *src_cr;
fae6fd5b
RB
679 int extra_width = 0;
680 int extra_height = 0;
e5d40372
DB
681 int emu = 0;
682 const int full_mx = mx >> 2;
683 const int full_my = my >> 2;
2c541554
AK
684 const int pic_width = 16 * h->mb_width;
685 const int pic_height = 16 * h->mb_height >> MB_FIELD;
229d263c 686 int ysh;
115329f1 687
e5d40372
DB
688 if (mx & 7)
689 extra_width -= 3;
690 if (my & 7)
691 extra_height -= 3;
692
693 if (full_mx < 0 - extra_width ||
694 full_my < 0 - extra_height ||
695 full_mx + 16 /*FIXME*/ > pic_width + extra_width ||
696 full_my + 16 /*FIXME*/ > pic_height + extra_height) {
2c541554 697 h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
8c53d39e
RB
698 src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
699 h->mb_linesize,
700 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
701 full_my - 2, pic_width, pic_height);
2c541554 702 src_y = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
e5d40372
DB
703 emu = 1;
704 }
705
706 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps?
707 if (!square)
5d18eaad 708 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
115329f1 709
2c541554 710 if (CONFIG_GRAY && h->flags & CODEC_FLAG_GRAY)
e5d40372 711 return;
7b442ad9 712
e5d40372 713 if (chroma_idc == 3 /* yuv444 */) {
657ccb5a 714 src_cb = pic->f.data[1] + offset;
e5d40372 715 if (emu) {
2c541554 716 h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
8c53d39e
RB
717 src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
718 h->mb_linesize,
719 16 + 5, 16 + 5 /*FIXME*/,
720 full_mx - 2, full_my - 2,
721 pic_width, pic_height);
2c541554 722 src_cb = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
c90b9442 723 }
e5d40372
DB
724 qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps?
725 if (!square)
c90b9442 726 qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
c90b9442 727
657ccb5a 728 src_cr = pic->f.data[2] + offset;
e5d40372 729 if (emu) {
2c541554 730 h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
8c53d39e
RB
731 src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
732 h->mb_linesize,
733 16 + 5, 16 + 5 /*FIXME*/,
734 full_mx - 2, full_my - 2,
735 pic_width, pic_height);
2c541554 736 src_cr = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
c90b9442 737 }
e5d40372
DB
738 qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps?
739 if (!square)
c90b9442 740 qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
c90b9442
JGG
741 return;
742 }
743
05fb63f5 744 ysh = 3 - (chroma_idc == 2 /* yuv422 */);
e5d40372 745 if (chroma_idc == 1 /* yuv420 */ && MB_FIELD) {
5d18eaad 746 // chroma offset when predicting from a field of opposite parity
2c541554 747 my += 2 * ((h->mb_y & 1) - (pic->f.reference - 1));
e5d40372 748 emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
5d18eaad 749 }
229d263c 750
e5d40372
DB
751 src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) +
752 (my >> ysh) * h->mb_uvlinesize;
753 src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) +
754 (my >> ysh) * h->mb_uvlinesize;
5d18eaad 755
e5d40372 756 if (emu) {
2c541554 757 h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cb, h->mb_uvlinesize,
8c53d39e
RB
758 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
759 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
2c541554 760 src_cb = h->edge_emu_buffer;
0da71265 761 }
e5d40372
DB
762 chroma_op(dest_cb, src_cb, h->mb_uvlinesize,
763 height >> (chroma_idc == 1 /* yuv420 */),
764 mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
0da71265 765
e5d40372 766 if (emu) {
2c541554 767 h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cr, h->mb_uvlinesize,
8c53d39e
RB
768 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
769 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
2c541554 770 src_cr = h->edge_emu_buffer;
0da71265 771 }
05fb63f5 772 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
e5d40372 773 mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
0da71265
MN
774}
775
e5d40372
DB
776static av_always_inline void mc_part_std(H264Context *h, int n, int square,
777 int height, int delta,
778 uint8_t *dest_y, uint8_t *dest_cb,
779 uint8_t *dest_cr,
780 int x_offset, int y_offset,
781 qpel_mc_func *qpix_put,
782 h264_chroma_mc_func chroma_put,
783 qpel_mc_func *qpix_avg,
784 h264_chroma_mc_func chroma_avg,
785 int list0, int list1,
786 int pixel_shift, int chroma_idc)
05fb63f5 787{
e5d40372
DB
788 qpel_mc_func *qpix_op = qpix_put;
789 h264_chroma_mc_func chroma_op = chroma_put;
115329f1 790
e5d40372 791 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
05fb63f5 792 if (chroma_idc == 3 /* yuv444 */) {
e5d40372
DB
793 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
794 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
05fb63f5 795 } else if (chroma_idc == 2 /* yuv422 */) {
e5d40372
DB
796 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
797 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
798 } else { /* yuv420 */
799 dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
800 dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
c90b9442 801 }
2c541554
AK
802 x_offset += 8 * h->mb_x;
803 y_offset += 8 * (h->mb_y >> MB_FIELD);
115329f1 804
e5d40372
DB
805 if (list0) {
806 Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]];
c2d33742 807 mc_dir_part(h, ref, n, square, height, delta, 0,
e5d40372
DB
808 dest_y, dest_cb, dest_cr, x_offset, y_offset,
809 qpix_op, chroma_op, pixel_shift, chroma_idc);
0da71265 810
e5d40372
DB
811 qpix_op = qpix_avg;
812 chroma_op = chroma_avg;
0da71265
MN
813 }
814
e5d40372
DB
815 if (list1) {
816 Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]];
c2d33742 817 mc_dir_part(h, ref, n, square, height, delta, 1,
e5d40372
DB
818 dest_y, dest_cb, dest_cr, x_offset, y_offset,
819 qpix_op, chroma_op, pixel_shift, chroma_idc);
0da71265
MN
820 }
821}
822
e5d40372
DB
823static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
824 int height, int delta,
825 uint8_t *dest_y, uint8_t *dest_cb,
826 uint8_t *dest_cr,
827 int x_offset, int y_offset,
828 qpel_mc_func *qpix_put,
829 h264_chroma_mc_func chroma_put,
830 h264_weight_func luma_weight_op,
831 h264_weight_func chroma_weight_op,
832 h264_biweight_func luma_weight_avg,
833 h264_biweight_func chroma_weight_avg,
834 int list0, int list1,
835 int pixel_shift, int chroma_idc)
836{
c2d33742 837 int chroma_height;
9f2d1b4f 838
e5d40372 839 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
05fb63f5 840 if (chroma_idc == 3 /* yuv444 */) {
e5d40372 841 chroma_height = height;
c90b9442 842 chroma_weight_avg = luma_weight_avg;
e5d40372
DB
843 chroma_weight_op = luma_weight_op;
844 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
845 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
05fb63f5 846 } else if (chroma_idc == 2 /* yuv422 */) {
c2d33742 847 chroma_height = height;
e5d40372
DB
848 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
849 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
850 } else { /* yuv420 */
c2d33742 851 chroma_height = height >> 1;
e5d40372
DB
852 dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
853 dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
c90b9442 854 }
2c541554
AK
855 x_offset += 8 * h->mb_x;
856 y_offset += 8 * (h->mb_y >> MB_FIELD);
115329f1 857
e5d40372 858 if (list0 && list1) {
9f2d1b4f
LM
859 /* don't optimize for luma-only case, since B-frames usually
860 * use implicit weights => chroma too. */
a394959b
JG
861 uint8_t *tmp_cb = h->bipred_scratchpad;
862 uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift);
863 uint8_t *tmp_y = h->bipred_scratchpad + 16 * h->mb_uvlinesize;
e5d40372
DB
864 int refn0 = h->ref_cache[0][scan8[n]];
865 int refn1 = h->ref_cache[1][scan8[n]];
9f2d1b4f 866
c2d33742 867 mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
9f2d1b4f 868 dest_y, dest_cb, dest_cr,
05fb63f5
RB
869 x_offset, y_offset, qpix_put, chroma_put,
870 pixel_shift, chroma_idc);
c2d33742 871 mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
9f2d1b4f 872 tmp_y, tmp_cb, tmp_cr,
05fb63f5
RB
873 x_offset, y_offset, qpix_put, chroma_put,
874 pixel_shift, chroma_idc);
9f2d1b4f 875
e5d40372 876 if (h->use_weight == 2) {
2c541554 877 int weight0 = h->implicit_weight[refn0][refn1][h->mb_y & 1];
9f2d1b4f 878 int weight1 = 64 - weight0;
e5d40372
DB
879 luma_weight_avg(dest_y, tmp_y, h->mb_linesize,
880 height, 5, weight0, weight1, 0);
c2d33742
RB
881 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
882 chroma_height, 5, weight0, weight1, 0);
883 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
884 chroma_height, 5, weight0, weight1, 0);
e5d40372
DB
885 } else {
886 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height,
887 h->luma_log2_weight_denom,
888 h->luma_weight[refn0][0][0],
889 h->luma_weight[refn1][1][0],
890 h->luma_weight[refn0][0][1] +
891 h->luma_weight[refn1][1][1]);
892 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height,
893 h->chroma_log2_weight_denom,
894 h->chroma_weight[refn0][0][0][0],
895 h->chroma_weight[refn1][1][0][0],
896 h->chroma_weight[refn0][0][0][1] +
897 h->chroma_weight[refn1][1][0][1]);
898 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height,
899 h->chroma_log2_weight_denom,
900 h->chroma_weight[refn0][0][1][0],
901 h->chroma_weight[refn1][1][1][0],
902 h->chroma_weight[refn0][0][1][1] +
903 h->chroma_weight[refn1][1][1][1]);
9f2d1b4f 904 }
e5d40372
DB
905 } else {
906 int list = list1 ? 1 : 0;
907 int refn = h->ref_cache[list][scan8[n]];
908 Picture *ref = &h->ref_list[list][refn];
c2d33742 909 mc_dir_part(h, ref, n, square, height, delta, list,
9f2d1b4f 910 dest_y, dest_cb, dest_cr, x_offset, y_offset,
05fb63f5 911 qpix_put, chroma_put, pixel_shift, chroma_idc);
9f2d1b4f 912
e5d40372
DB
913 luma_weight_op(dest_y, h->mb_linesize, height,
914 h->luma_log2_weight_denom,
915 h->luma_weight[refn][list][0],
916 h->luma_weight[refn][list][1]);
917 if (h->use_weight_chroma) {
918 chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height,
919 h->chroma_log2_weight_denom,
920 h->chroma_weight[refn][list][0][0],
921 h->chroma_weight[refn][list][0][1]);
922 chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height,
923 h->chroma_log2_weight_denom,
924 h->chroma_weight[refn][list][1][0],
925 h->chroma_weight[refn][list][1][1]);
9f2d1b4f
LM
926 }
927 }
928}
929
e5d40372
DB
930static av_always_inline void prefetch_motion(H264Context *h, int list,
931 int pixel_shift, int chroma_idc)
27209bb1 932{
513fbd8e
LM
933 /* fetch pixels for estimated mv 4 macroblocks ahead
934 * optimized for 64byte cache lines */
513fbd8e 935 const int refn = h->ref_cache[list][scan8[0]];
e5d40372 936 if (refn >= 0) {
2c541554
AK
937 const int mx = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * h->mb_x + 8;
938 const int my = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * h->mb_y;
657ccb5a 939 uint8_t **src = h->ref_list[list][refn].f.data;
e5d40372 940 int off = (mx << pixel_shift) +
2c541554 941 (my + (h->mb_x & 3) * 4) * h->mb_linesize +
e5d40372 942 (64 << pixel_shift);
2c541554 943 h->vdsp.prefetch(src[0] + off, h->linesize, 4);
05fb63f5 944 if (chroma_idc == 3 /* yuv444 */) {
2c541554
AK
945 h->vdsp.prefetch(src[1] + off, h->linesize, 4);
946 h->vdsp.prefetch(src[2] + off, h->linesize, 4);
e5d40372
DB
947 } else {
948 off = ((mx >> 1) << pixel_shift) +
2c541554 949 ((my >> 1) + (h->mb_x & 7)) * h->uvlinesize +
e5d40372 950 (64 << pixel_shift);
2c541554 951 h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
c90b9442 952 }
513fbd8e
LM
953 }
954}
955
e5d40372
DB
956static void free_tables(H264Context *h, int free_rbsp)
957{
7978debd 958 int i;
afebe2f7 959 H264Context *hx;
6a9c8594 960
0da71265 961 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
962 av_freep(&h->chroma_pred_mode_table);
963 av_freep(&h->cbp_table);
9e528114
LA
964 av_freep(&h->mvd_table[0]);
965 av_freep(&h->mvd_table[1]);
5ad984c9 966 av_freep(&h->direct_table);
0da71265
MN
967 av_freep(&h->non_zero_count);
968 av_freep(&h->slice_table_base);
e5d40372 969 h->slice_table = NULL;
c988f975 970 av_freep(&h->list_counts);
e5017ab8 971
0da71265 972 av_freep(&h->mb2b_xy);
d43c1922 973 av_freep(&h->mb2br_xy);
9f2d1b4f 974
2c541554
AK
975 if (free_rbsp) {
976 for (i = 0; i < h->picture_count && !h->avctx->internal->is_copy; i++)
977 free_picture(h, &h->DPB[i]);
978 av_freep(&h->DPB);
979 h->picture_count = 0;
980 } else if (h->DPB) {
981 for (i = 0; i < h->picture_count; i++)
982 h->DPB[i].needs_realloc = 1;
983 }
984
985 h->cur_pic_ptr = NULL;
986
e5d40372 987 for (i = 0; i < MAX_THREADS; i++) {
afebe2f7 988 hx = h->thread_context[i];
e5d40372
DB
989 if (!hx)
990 continue;
afebe2f7
991 av_freep(&hx->top_borders[1]);
992 av_freep(&hx->top_borders[0]);
a394959b 993 av_freep(&hx->bipred_scratchpad);
2c541554
AK
994 av_freep(&hx->edge_emu_buffer);
995 av_freep(&hx->dc_val_base);
996 av_freep(&hx->me.scratchpad);
997 av_freep(&hx->er.mb_index2xy);
998 av_freep(&hx->er.error_status_table);
999 av_freep(&hx->er.er_temp_buffer);
1000 av_freep(&hx->er.mbintra_table);
1001 av_freep(&hx->er.mbskip_table);
1002
e5d40372 1003 if (free_rbsp) {
fcb7e535
RB
1004 av_freep(&hx->rbsp_buffer[1]);
1005 av_freep(&hx->rbsp_buffer[0]);
1006 hx->rbsp_buffer_size[0] = 0;
1007 hx->rbsp_buffer_size[1] = 0;
91078926 1008 }
e5d40372
DB
1009 if (i)
1010 av_freep(&h->thread_context[i]);
afebe2f7 1011 }
0da71265
MN
1012}
1013
e5d40372
DB
1014static void init_dequant8_coeff_table(H264Context *h)
1015{
1016 int i, j, q, x;
1017 const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
239ea04c 1018
e5d40372 1019 for (i = 0; i < 6; i++) {
c90b9442 1020 h->dequant8_coeff[i] = h->dequant8_buffer[i];
e5d40372
DB
1021 for (j = 0; j < i; j++)
1022 if (!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i],
1023 64 * sizeof(uint8_t))) {
c90b9442
JGG
1024 h->dequant8_coeff[i] = h->dequant8_buffer[j];
1025 break;
1026 }
e5d40372 1027 if (j < i)
c90b9442 1028 continue;
239ea04c 1029
e5d40372 1030 for (q = 0; q < max_qp + 1; q++) {
d9ec210b 1031 int shift = div6[q];
e5d40372
DB
1032 int idx = rem6[q];
1033 for (x = 0; x < 64; x++)
1034 h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] =
1035 ((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] *
1036 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
1037 }
1038 }
1039}
1040
e5d40372
DB
1041static void init_dequant4_coeff_table(H264Context *h)
1042{
1043 int i, j, q, x;
1044 const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
1045 for (i = 0; i < 6; i++) {
239ea04c 1046 h->dequant4_coeff[i] = h->dequant4_buffer[i];
e5d40372
DB
1047 for (j = 0; j < i; j++)
1048 if (!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i],
1049 16 * sizeof(uint8_t))) {
239ea04c
LM
1050 h->dequant4_coeff[i] = h->dequant4_buffer[j];
1051 break;
1052 }
e5d40372 1053 if (j < i)
239ea04c
LM
1054 continue;
1055
e5d40372 1056 for (q = 0; q < max_qp + 1; q++) {
d9ec210b 1057 int shift = div6[q] + 2;
e5d40372
DB
1058 int idx = rem6[q];
1059 for (x = 0; x < 16; x++)
1060 h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] =
1061 ((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] *
1062 h->pps.scaling_matrix4[i][x]) << shift;
239ea04c
LM
1063 }
1064 }
1065}
1066
e5d40372
DB
1067static void init_dequant_tables(H264Context *h)
1068{
1069 int i, x;
239ea04c 1070 init_dequant4_coeff_table(h);
e5d40372 1071 if (h->pps.transform_8x8_mode)
239ea04c 1072 init_dequant8_coeff_table(h);
e5d40372
DB
1073 if (h->sps.transform_bypass) {
1074 for (i = 0; i < 6; i++)
1075 for (x = 0; x < 16; x++)
1076 h->dequant4_coeff[i][0][x] = 1 << 6;
1077 if (h->pps.transform_8x8_mode)
1078 for (i = 0; i < 6; i++)
1079 for (x = 0; x < 64; x++)
1080 h->dequant8_coeff[i][0][x] = 1 << 6;
239ea04c
LM
1081 }
1082}
1083
e5d40372
DB
1084int ff_h264_alloc_tables(H264Context *h)
1085{
2c541554
AK
1086 const int big_mb_num = h->mb_stride * (h->mb_height + 1);
1087 const int row_mb_num = h->mb_stride * 2 * h->avctx->thread_count;
1088 int x, y, i;
e5d40372 1089
2c541554 1090 FF_ALLOCZ_OR_GOTO(h->avctx, h->intra4x4_pred_mode,
e5d40372 1091 row_mb_num * 8 * sizeof(uint8_t), fail)
2c541554 1092 FF_ALLOCZ_OR_GOTO(h->avctx, h->non_zero_count,
e5d40372 1093 big_mb_num * 48 * sizeof(uint8_t), fail)
2c541554
AK
1094 FF_ALLOCZ_OR_GOTO(h->avctx, h->slice_table_base,
1095 (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base), fail)
1096 FF_ALLOCZ_OR_GOTO(h->avctx, h->cbp_table,
e5d40372 1097 big_mb_num * sizeof(uint16_t), fail)
2c541554 1098 FF_ALLOCZ_OR_GOTO(h->avctx, h->chroma_pred_mode_table,
e5d40372 1099 big_mb_num * sizeof(uint8_t), fail)
2c541554 1100 FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[0],
e5d40372 1101 16 * row_mb_num * sizeof(uint8_t), fail);
2c541554 1102 FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[1],
e5d40372 1103 16 * row_mb_num * sizeof(uint8_t), fail);
2c541554 1104 FF_ALLOCZ_OR_GOTO(h->avctx, h->direct_table,
e5d40372 1105 4 * big_mb_num * sizeof(uint8_t), fail);
2c541554 1106 FF_ALLOCZ_OR_GOTO(h->avctx, h->list_counts,
e5d40372
DB
1107 big_mb_num * sizeof(uint8_t), fail)
1108
1109 memset(h->slice_table_base, -1,
2c541554
AK
1110 (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base));
1111 h->slice_table = h->slice_table_base + h->mb_stride * 2 + 1;
e5d40372 1112
2c541554 1113 FF_ALLOCZ_OR_GOTO(h->avctx, h->mb2b_xy,
e5d40372 1114 big_mb_num * sizeof(uint32_t), fail);
2c541554 1115 FF_ALLOCZ_OR_GOTO(h->avctx, h->mb2br_xy,
e5d40372 1116 big_mb_num * sizeof(uint32_t), fail);
2c541554
AK
1117 for (y = 0; y < h->mb_height; y++)
1118 for (x = 0; x < h->mb_width; x++) {
1119 const int mb_xy = x + y * h->mb_stride;
e5d40372
DB
1120 const int b_xy = 4 * x + 4 * y * h->b_stride;
1121
1122 h->mb2b_xy[mb_xy] = b_xy;
2c541554 1123 h->mb2br_xy[mb_xy] = 8 * (FMO ? mb_xy : (mb_xy % (2 * h->mb_stride)));
0da71265 1124 }
9f2d1b4f 1125
e5d40372 1126 if (!h->dequant4_coeff[0])
56edbd81
LM
1127 init_dequant_tables(h);
1128
2c541554
AK
1129 if (!h->DPB) {
1130 h->picture_count = MAX_PICTURE_COUNT * FFMAX(1, h->avctx->thread_count);
1131 h->DPB = av_mallocz_array(h->picture_count, sizeof(*h->DPB));
1132 if (!h->DPB)
1133 return AVERROR(ENOMEM);
1134 for (i = 0; i < h->picture_count; i++)
1135 avcodec_get_frame_defaults(&h->DPB[i].f);
1136 avcodec_get_frame_defaults(&h->cur_pic.f);
1137 }
1138
0da71265 1139 return 0;
e5d40372 1140
0da71265 1141fail:
91078926 1142 free_tables(h, 1);
0da71265
MN
1143 return -1;
1144}
1145
afebe2f7
1146/**
1147 * Mimic alloc_tables(), but for every context thread.
1148 */
e5d40372
DB
1149static void clone_tables(H264Context *dst, H264Context *src, int i)
1150{
2c541554 1151 dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i * 8 * 2 * src->mb_stride;
e5d40372
DB
1152 dst->non_zero_count = src->non_zero_count;
1153 dst->slice_table = src->slice_table;
1154 dst->cbp_table = src->cbp_table;
1155 dst->mb2b_xy = src->mb2b_xy;
1156 dst->mb2br_xy = src->mb2br_xy;
1157 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2c541554
AK
1158 dst->mvd_table[0] = src->mvd_table[0] + i * 8 * 2 * src->mb_stride;
1159 dst->mvd_table[1] = src->mvd_table[1] + i * 8 * 2 * src->mb_stride;
e5d40372
DB
1160 dst->direct_table = src->direct_table;
1161 dst->list_counts = src->list_counts;
2c541554
AK
1162 dst->DPB = src->DPB;
1163 dst->cur_pic_ptr = src->cur_pic_ptr;
1164 dst->cur_pic = src->cur_pic;
a394959b 1165 dst->bipred_scratchpad = NULL;
2c541554
AK
1166 dst->edge_emu_buffer = NULL;
1167 dst->me.scratchpad = NULL;
1168 ff_h264_pred_init(&dst->hpc, src->avctx->codec_id, src->sps.bit_depth_luma,
e5d40372 1169 src->sps.chroma_format_idc);
afebe2f7
1170}
1171
1172/**
1173 * Init context
1174 * Allocate buffers which are not shared amongst multiple threads.
1175 */
e5d40372
DB
1176static int context_init(H264Context *h)
1177{
2c541554
AK
1178 ERContext *er = &h->er;
1179 int mb_array_size = h->mb_height * h->mb_stride;
1180 int y_size = (2 * h->mb_width + 1) * (2 * h->mb_height + 1);
1181 int c_size = h->mb_stride * (h->mb_height + 1);
1182 int yc_size = y_size + 2 * c_size;
1183 int x, y, i;
1184
1185 FF_ALLOCZ_OR_GOTO(h->avctx, h->top_borders[0],
1186 h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
1187 FF_ALLOCZ_OR_GOTO(h->avctx, h->top_borders[1],
1188 h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
e5d40372
DB
1189
1190 h->ref_cache[0][scan8[5] + 1] =
1191 h->ref_cache[0][scan8[7] + 1] =
1192 h->ref_cache[0][scan8[13] + 1] =
1193 h->ref_cache[1][scan8[5] + 1] =
1194 h->ref_cache[1][scan8[7] + 1] =
1195 h->ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE;
145061a1 1196
2c541554
AK
1197 /* init ER */
1198 er->avctx = h->avctx;
1199 er->dsp = &h->dsp;
1200 er->decode_mb = h264_er_decode_mb;
1201 er->opaque = h;
1202 er->quarter_sample = 1;
1203
1204 er->mb_num = h->mb_num;
1205 er->mb_width = h->mb_width;
1206 er->mb_height = h->mb_height;
1207 er->mb_stride = h->mb_stride;
1208 er->b8_stride = h->mb_width * 2 + 1;
1209
1210 FF_ALLOCZ_OR_GOTO(h->avctx, er->mb_index2xy, (h->mb_num + 1) * sizeof(int),
1211 fail); // error ressilience code looks cleaner with this
1212 for (y = 0; y < h->mb_height; y++)
1213 for (x = 0; x < h->mb_width; x++)
1214 er->mb_index2xy[x + y * h->mb_width] = x + y * h->mb_stride;
1215
1216 er->mb_index2xy[h->mb_height * h->mb_width] = (h->mb_height - 1) *
1217 h->mb_stride + h->mb_width;
1218
1219 FF_ALLOCZ_OR_GOTO(h->avctx, er->error_status_table,
1220 mb_array_size * sizeof(uint8_t), fail);
1221
1222 FF_ALLOC_OR_GOTO(h->avctx, er->mbintra_table, mb_array_size, fail);
1223 memset(er->mbintra_table, 1, mb_array_size);
1224
1225 FF_ALLOCZ_OR_GOTO(h->avctx, er->mbskip_table, mb_array_size + 2, fail);
1226
1227 FF_ALLOC_OR_GOTO(h->avctx, er->er_temp_buffer, h->mb_height * h->mb_stride,
1228 fail);
1229
1230 FF_ALLOCZ_OR_GOTO(h->avctx, h->dc_val_base, yc_size * sizeof(int16_t), fail);
1231 er->dc_val[0] = h->dc_val_base + h->mb_width * 2 + 2;
1232 er->dc_val[1] = h->dc_val_base + y_size + h->mb_stride + 1;
1233 er->dc_val[2] = er->dc_val[1] + c_size;
1234 for (i = 0; i < yc_size; i++)
1235 h->dc_val_base[i] = 1024;
54974c62 1236
afebe2f7 1237 return 0;
e5d40372 1238
afebe2f7
1239fail:
1240 return -1; // free_tables will clean up for us
1241}
1242
61c6eef5
JG
1243static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
1244 int parse_extradata);
9855b2e3 1245
e5d40372
DB
1246static av_cold void common_init(H264Context *h)
1247{
0da71265 1248
2c541554
AK
1249 h->width = h->avctx->width;
1250 h->height = h->avctx->height;
1251
1252 h->bit_depth_luma = 8;
1253 h->chroma_format_idc = 1;
115329f1 1254
76741b0e 1255 ff_h264dsp_init(&h->h264dsp, 8, 1);
79dad2a9 1256 ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
e9d81735 1257 ff_h264qpel_init(&h->h264qpel, 8);
2c541554 1258 ff_h264_pred_init(&h->hpc, h->avctx->codec_id, 8, 1);
0da71265 1259
e5d40372 1260 h->dequant_coeff_pps = -1;
56edbd81 1261
e5d40372 1262 /* needed so that IDCT permutation is known early */
2c541554
AK
1263 ff_dsputil_init(&h->dsp, h->avctx);
1264 ff_videodsp_init(&h->vdsp, 8);
a5805aa9 1265
e5d40372
DB
1266 memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
1267 memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
0da71265
MN
1268}
1269
05e95319
HC
1270int ff_h264_decode_extradata(H264Context *h)
1271{
2c541554 1272 AVCodecContext *avctx = h->avctx;
9855b2e3 1273
e5d40372 1274 if (avctx->extradata[0] == 1) {
9855b2e3
MN
1275 int i, cnt, nalsize;
1276 unsigned char *p = avctx->extradata;
1277
1278 h->is_avc = 1;
1279
e5d40372 1280 if (avctx->extradata_size < 7) {
9855b2e3
MN
1281 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
1282 return -1;
1283 }
1284 /* sps and pps in the avcC always have length coded with 2 bytes,
e5d40372 1285 * so put a fake nal_length_size = 2 while parsing them */
9855b2e3
MN
1286 h->nal_length_size = 2;
1287 // Decode sps from avcC
e5d40372
DB
1288 cnt = *(p + 5) & 0x1f; // Number of sps
1289 p += 6;
9855b2e3
MN
1290 for (i = 0; i < cnt; i++) {
1291 nalsize = AV_RB16(p) + 2;
d1186ff7
LA
1292 if (p - avctx->extradata + nalsize > avctx->extradata_size)
1293 return -1;
61c6eef5 1294 if (decode_nal_units(h, p, nalsize, 1) < 0) {
e5d40372
DB
1295 av_log(avctx, AV_LOG_ERROR,
1296 "Decoding sps %d from avcC failed\n", i);
9855b2e3
MN
1297 return -1;
1298 }
1299 p += nalsize;
1300 }
1301 // Decode pps from avcC
1302 cnt = *(p++); // Number of pps
1303 for (i = 0; i < cnt; i++) {
1304 nalsize = AV_RB16(p) + 2;
d1186ff7
LA
1305 if (p - avctx->extradata + nalsize > avctx->extradata_size)
1306 return -1;
61c6eef5 1307 if (decode_nal_units(h, p, nalsize, 1) < 0) {
e5d40372
DB
1308 av_log(avctx, AV_LOG_ERROR,
1309 "Decoding pps %d from avcC failed\n", i);
9855b2e3
MN
1310 return -1;
1311 }
1312 p += nalsize;
1313 }
e5d40372 1314 // Now store right nal length size, that will be used to parse all other nals
0c17beba 1315 h->nal_length_size = (avctx->extradata[4] & 0x03) + 1;
9855b2e3
MN
1316 } else {
1317 h->is_avc = 0;
61c6eef5 1318 if (decode_nal_units(h, avctx->extradata, avctx->extradata_size, 1) < 0)
9855b2e3
MN
1319 return -1;
1320 }
05e95319
HC
1321 return 0;
1322}
1323
e5d40372
DB
1324av_cold int ff_h264_decode_init(AVCodecContext *avctx)
1325{
1326 H264Context *h = avctx->priv_data;
ea2bb12e 1327 int i;
05e95319 1328
2c541554 1329 h->avctx = avctx;
05e95319
HC
1330 common_init(h);
1331
2c541554
AK
1332 h->picture_structure = PICT_FRAME;
1333 h->picture_range_start = 0;
1334 h->picture_range_end = MAX_PICTURE_COUNT;
1335 h->slice_context_count = 1;
1336 h->workaround_bugs = avctx->workaround_bugs;
1337 h->flags = avctx->flags;
05e95319 1338
e5d40372
DB
1339 /* set defaults */
1340 // s->decode_mb = ff_h263_decode_mb;
e5d40372 1341 if (!avctx->has_b_frames)
2c541554 1342 h->low_delay = 1;
05e95319
HC
1343
1344 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
1345
1346 ff_h264_decode_init_vlc();
1347
6e3ef511 1348 h->pixel_shift = 0;
19a0729b 1349 h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
6e3ef511 1350
05e95319 1351 h->thread_context[0] = h;
e5d40372 1352 h->outputed_poc = h->next_outputed_poc = INT_MIN;
ea2bb12e
RB
1353 for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
1354 h->last_pocs[i] = INT_MIN;
e5d40372
DB
1355 h->prev_poc_msb = 1 << 16;
1356 h->x264_build = -1;
05e95319 1357 ff_h264_reset_sei(h);
36ef5369 1358 if (avctx->codec_id == AV_CODEC_ID_H264) {
e5d40372 1359 if (avctx->ticks_per_frame == 1)
2c541554 1360 h->avctx->time_base.den *= 2;
05e95319
HC
1361 avctx->ticks_per_frame = 2;
1362 }
1363
e5d40372 1364 if (avctx->extradata_size > 0 && avctx->extradata &&
05e95319
HC
1365 ff_h264_decode_extradata(h))
1366 return -1;
1367
e5d40372 1368 if (h->sps.bitstream_restriction_flag &&
2c541554
AK
1369 h->avctx->has_b_frames < h->sps.num_reorder_frames) {
1370 h->avctx->has_b_frames = h->sps.num_reorder_frames;
1371 h->low_delay = 0;
db8cb47d 1372 }
9855b2e3 1373
0da71265
MN
1374 return 0;
1375}
1376
e5d40372 1377#define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size))))
2c541554
AK
1378#undef REBASE_PICTURE
1379#define REBASE_PICTURE(pic, new_ctx, old_ctx) \
1380 ((pic && pic >= old_ctx->DPB && \
1381 pic < old_ctx->DPB + old_ctx->picture_count) ? \
1382 &new_ctx->DPB[pic - old_ctx->DPB] : NULL)
e5d40372
DB
1383
1384static void copy_picture_range(Picture **to, Picture **from, int count,
2c541554
AK
1385 H264Context *new_base,
1386 H264Context *old_base)
6a9c8594
AS
1387{
1388 int i;
1389
e5d40372 1390 for (i = 0; i < count; i++) {
6a9c8594 1391 assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
2c541554 1392 IN_RANGE(from[i], old_base->DPB,
e5d40372 1393 sizeof(Picture) * old_base->picture_count) ||
6a9c8594
AS
1394 !from[i]));
1395 to[i] = REBASE_PICTURE(from[i], new_base, old_base);
1396 }
1397}
1398
1399static void copy_parameter_set(void **to, void **from, int count, int size)
1400{
1401 int i;
1402
e5d40372
DB
1403 for (i = 0; i < count; i++) {
1404 if (to[i] && !from[i])
1405 av_freep(&to[i]);
1406 else if (from[i] && !to[i])
1407 to[i] = av_malloc(size);
6a9c8594 1408
e5d40372
DB
1409 if (from[i])
1410 memcpy(to[i], from[i], size);
6a9c8594
AS
1411 }
1412}
1413
e5d40372
DB
1414static int decode_init_thread_copy(AVCodecContext *avctx)
1415{
1416 H264Context *h = avctx->priv_data;
6a9c8594 1417
f3a29b75
JR
1418 if (!avctx->internal->is_copy)
1419 return 0;
6a9c8594
AS
1420 memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
1421 memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
1422
2c541554 1423 h->context_initialized = 0;
0eae920c 1424
6a9c8594
AS
1425 return 0;
1426}
1427
e5d40372
DB
1428#define copy_fields(to, from, start_field, end_field) \
1429 memcpy(&to->start_field, &from->start_field, \
1430 (char *)&to->end_field - (char *)&to->start_field)
1431
9e696d2e
JG
1432static int h264_slice_header_init(H264Context *, int);
1433
1434static int h264_set_parameter_from_sps(H264Context *h);
1435
e5d40372
DB
1436static int decode_update_thread_context(AVCodecContext *dst,
1437 const AVCodecContext *src)
1438{
1439 H264Context *h = dst->priv_data, *h1 = src->priv_data;
2c541554
AK
1440 int inited = h->context_initialized, err = 0;
1441 int context_reinitialized = 0;
6a9c8594
AS
1442 int i;
1443
2c541554 1444 if (dst == src || !h1->context_initialized)
e5d40372 1445 return 0;
6a9c8594 1446
9e696d2e 1447 if (inited &&
2c541554
AK
1448 (h->width != h1->width ||
1449 h->height != h1->height ||
1450 h->mb_width != h1->mb_width ||
1451 h->mb_height != h1->mb_height ||
9e696d2e
JG
1452 h->sps.bit_depth_luma != h1->sps.bit_depth_luma ||
1453 h->sps.chroma_format_idc != h1->sps.chroma_format_idc ||
1454 h->sps.colorspace != h1->sps.colorspace)) {
1455
1456 av_freep(&h->bipred_scratchpad);
1457
2c541554
AK
1458 h->width = h1->width;
1459 h->height = h1->height;
1460 h->mb_height = h1->mb_height;
1461 h->mb_width = h1->mb_width;
1462 h->mb_num = h1->mb_num;
1463 h->mb_stride = h1->mb_stride;
9e696d2e
JG
1464 h->b_stride = h1->b_stride;
1465
1466 if ((err = h264_slice_header_init(h, 1)) < 0) {
2c541554 1467 av_log(h->avctx, AV_LOG_ERROR, "h264_slice_header_init() failed");
9e696d2e
JG
1468 return err;
1469 }
2c541554 1470 context_reinitialized = 1;
9e696d2e 1471
2c541554
AK
1472 /* update linesize on resize. The decoder doesn't
1473 * necessarily call ff_h264_frame_start in the new thread */
1474 h->linesize = h1->linesize;
1475 h->uvlinesize = h1->uvlinesize;
9e696d2e
JG
1476
1477 /* copy block_offset since frame_start may not be called */
1478 memcpy(h->block_offset, h1->block_offset, sizeof(h->block_offset));
9e696d2e
JG
1479 }
1480
e5d40372
DB
1481 if (!inited) {
1482 for (i = 0; i < MAX_SPS_COUNT; i++)
6a9c8594
AS
1483 av_freep(h->sps_buffers + i);
1484
e5d40372 1485 for (i = 0; i < MAX_PPS_COUNT; i++)
6a9c8594
AS
1486 av_freep(h->pps_buffers + i);
1487
2c541554 1488 memcpy(h, h1, sizeof(*h1));
6a9c8594
AS
1489 memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
1490 memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
2c541554
AK
1491 memset(&h->er, 0, sizeof(h->er));
1492 memset(&h->me, 0, sizeof(h->me));
1493 h->context_initialized = 0;
1494
1495 h->picture_range_start += MAX_PICTURE_COUNT;
1496 h->picture_range_end += MAX_PICTURE_COUNT;
1497
1498 h->avctx = dst;
1499 h->DPB = NULL;
1500 h->cur_pic.f.extended_data = h->cur_pic.f.data;
1501
bac3ab13
DB
1502 if (ff_h264_alloc_tables(h) < 0) {
1503 av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
1504 return AVERROR(ENOMEM);
1505 }
6a9c8594
AS
1506 context_init(h);
1507
e5d40372
DB
1508 for (i = 0; i < 2; i++) {
1509 h->rbsp_buffer[i] = NULL;
6a9c8594
AS
1510 h->rbsp_buffer_size[i] = 0;
1511 }
a394959b 1512 h->bipred_scratchpad = NULL;
2c541554 1513 h->edge_emu_buffer = NULL;
6a9c8594
AS
1514
1515 h->thread_context[0] = h;
1516
2c541554
AK
1517 h->dsp.clear_blocks(h->mb);
1518 h->dsp.clear_blocks(h->mb + (24 * 16 << h->pixel_shift));
1519 h->context_initialized = 1;
6a9c8594
AS
1520 }
1521
2c541554
AK
1522 h->avctx->coded_height = h1->avctx->coded_height;
1523 h->avctx->coded_width = h1->avctx->coded_width;
1524 h->avctx->width = h1->avctx->width;
1525 h->avctx->height = h1->avctx->height;
1526 h->coded_picture_number = h1->coded_picture_number;
1527 h->first_field = h1->first_field;
1528 h->picture_structure = h1->picture_structure;
1529 h->qscale = h1->qscale;
1530 h->droppable = h1->droppable;
1531 h->data_partitioning = h1->data_partitioning;
1532 h->low_delay = h1->low_delay;
1533
1534 memcpy(h->DPB, h1->DPB, h1->picture_count * sizeof(*h1->DPB));
1535
1536 // reset s->picture[].f.extended_data to s->picture[].f.data
1537 for (i = 0; i < h->picture_count; i++)
1538 h->DPB[i].f.extended_data = h->DPB[i].f.data;
1539
1540 h->cur_pic_ptr = REBASE_PICTURE(h1->cur_pic_ptr, h, h1);
1541 h->cur_pic = h1->cur_pic;
1542 h->cur_pic.f.extended_data = h->cur_pic.f.data;
1543
1544 h->workaround_bugs = h1->workaround_bugs;
1545 h->low_delay = h1->low_delay;
1546 h->droppable = h1->droppable;
1547
a394959b
JG
1548 /* frame_start may not be called for the next thread (if it's decoding
1549 * a bottom field) so this has to be allocated here */
2c541554
AK
1550 err = alloc_scratch_buffers(h, h1->linesize);
1551 if (err < 0)
1552 return err;
a394959b 1553
e5d40372
DB
1554 // extradata/NAL handling
1555 h->is_avc = h1->is_avc;
6a9c8594 1556
e5d40372
DB
1557 // SPS/PPS
1558 copy_parameter_set((void **)h->sps_buffers, (void **)h1->sps_buffers,
1559 MAX_SPS_COUNT, sizeof(SPS));
1560 h->sps = h1->sps;
1561 copy_parameter_set((void **)h->pps_buffers, (void **)h1->pps_buffers,
1562 MAX_PPS_COUNT, sizeof(PPS));
1563 h->pps = h1->pps;
6a9c8594 1564
e5d40372
DB
1565 // Dequantization matrices
1566 // FIXME these are big - can they be only copied when PPS changes?
6a9c8594
AS
1567 copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
1568
e5d40372
DB
1569 for (i = 0; i < 6; i++)
1570 h->dequant4_coeff[i] = h->dequant4_buffer[0] +
1571 (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
6a9c8594 1572
e5d40372
DB
1573 for (i = 0; i < 6; i++)
1574 h->dequant8_coeff[i] = h->dequant8_buffer[0] +
1575 (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
6a9c8594
AS
1576
1577 h->dequant_coeff_pps = h1->dequant_coeff_pps;
1578
e5d40372 1579 // POC timing
6a9c8594
AS
1580 copy_fields(h, h1, poc_lsb, redundant_pic_count);
1581
e5d40372 1582 // reference lists
6a9c8594
AS
1583 copy_fields(h, h1, short_ref, cabac_init_idc);
1584
2c541554
AK
1585 copy_picture_range(h->short_ref, h1->short_ref, 32, h, h1);
1586 copy_picture_range(h->long_ref, h1->long_ref, 32, h, h1);
e5d40372 1587 copy_picture_range(h->delayed_pic, h1->delayed_pic,
2c541554 1588 MAX_DELAYED_PIC_COUNT + 2, h, h1);
6a9c8594
AS
1589
1590 h->last_slice_type = h1->last_slice_type;
1591
2c541554
AK
1592 if (context_reinitialized)
1593 h264_set_parameter_from_sps(h);
1594
1595 if (!h->cur_pic_ptr)
e5d40372 1596 return 0;
6a9c8594 1597
2c541554 1598 if (!h->droppable) {
12fe7594 1599 err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
e5d40372
DB
1600 h->prev_poc_msb = h->poc_msb;
1601 h->prev_poc_lsb = h->poc_lsb;
6a9c8594 1602 }
e5d40372
DB
1603 h->prev_frame_num_offset = h->frame_num_offset;
1604 h->prev_frame_num = h->frame_num;
1605 h->outputed_poc = h->next_outputed_poc;
6a9c8594 1606
12fe7594 1607 return err;
6a9c8594
AS
1608}
1609
e5d40372
DB
1610int ff_h264_frame_start(H264Context *h)
1611{
2c541554
AK
1612 Picture *pic;
1613 int i, ret;
6e3ef511 1614 const int pixel_shift = h->pixel_shift;
0da71265 1615
2c541554
AK
1616 release_unused_pictures(h, 1);
1617 h->cur_pic_ptr = NULL;
1618
1619 i = find_unused_picture(h);
1620 if (i < 0) {
1621 av_log(h->avctx, AV_LOG_ERROR, "no frame buffer available\n");
1622 return i;
1623 }
1624 pic = &h->DPB[i];
1625
1626 pic->f.reference = h->droppable ? 0 : h->picture_structure;
1627 pic->f.coded_picture_number = h->coded_picture_number++;
1628 pic->field_picture = h->picture_structure != PICT_FRAME;
3a22d7fa 1629 /*
2c541554
AK
1630 * Zero key_frame here; IDR markings per slice in frame or fields are ORed
1631 * in later.
3a22d7fa
JD
1632 * See decode_nal_units().
1633 */
2c541554
AK
1634 pic->f.key_frame = 0;
1635 pic->mmco_reset = 0;
1636
1637 if ((ret = alloc_picture(h, pic)) < 0)
1638 return ret;
1639
1640 h->cur_pic_ptr = pic;
1641 h->cur_pic = *h->cur_pic_ptr;
1642 h->cur_pic.f.extended_data = h->cur_pic.f.data;
1643
1644 ff_er_frame_start(&h->er);
0da71265 1645
2c541554 1646 assert(h->linesize && h->uvlinesize);
0da71265 1647
e5d40372 1648 for (i = 0; i < 16; i++) {
2c541554
AK
1649 h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->linesize * ((scan8[i] - scan8[0]) >> 3);
1650 h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->linesize * ((scan8[i] - scan8[0]) >> 3);
0da71265 1651 }
e5d40372
DB
1652 for (i = 0; i < 16; i++) {
1653 h->block_offset[16 + i] =
2c541554 1654 h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
e5d40372 1655 h->block_offset[48 + 16 + i] =
2c541554 1656 h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
0da71265
MN
1657 }
1658
934b0821
LM
1659 /* can't be in alloc_tables because linesize isn't known there.
1660 * FIXME: redo bipred weight to not require extra buffer? */
2c541554
AK
1661 for (i = 0; i < h->slice_context_count; i++)
1662 if (h->thread_context[i]) {
1663 ret = alloc_scratch_buffers(h->thread_context[i], h->linesize);
1664 if (ret < 0)
1665 return ret;
1666 }
e5d40372
DB
1667
1668 /* Some macroblocks can be accessed before they're available in case
1669 * of lost slices, MBAFF or threading. */
1670 memset(h->slice_table, -1,
2c541554 1671 (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table));
e5d40372
DB
1672
1673 // s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding ||
1674 // s->current_picture.f.reference /* || h->contains_intra */ || 1;
1675
1676 /* We mark the current picture as non-reference after allocating it, so
1677 * that if we break out due to an error it can be released automatically
1678 * in the next ff_MPV_frame_start().
1679 * SVQ3 as well as most other codecs have only last/next/current and thus
1680 * get released even with set reference, besides SVQ3 and others do not
1681 * mark frames as reference later "naturally". */
2c541554
AK
1682 if (h->avctx->codec_id != AV_CODEC_ID_SVQ3)
1683 h->cur_pic_ptr->f.reference = 0;
357282c6 1684
2c541554 1685 h->cur_pic_ptr->field_poc[0] = h->cur_pic_ptr->field_poc[1] = INT_MAX;
6a9c8594
AS
1686
1687 h->next_output_pic = NULL;
1688
2c541554 1689 assert(h->cur_pic_ptr->long_ref == 0);
357282c6 1690
af8aa846 1691 return 0;
0da71265
MN
1692}
1693
6a9c8594 1694/**
e5d40372
DB
1695 * Run setup operations that must be run after slice header decoding.
1696 * This includes finding the next displayed frame.
1697 *
1698 * @param h h264 master context
1699 * @param setup_finished enough NALs have been read that we can call
1700 * ff_thread_finish_setup()
1701 */
1702static void decode_postinit(H264Context *h, int setup_finished)
1703{
2c541554
AK
1704 Picture *out = h->cur_pic_ptr;
1705 Picture *cur = h->cur_pic_ptr;
6a9c8594 1706 int i, pics, out_of_order, out_idx;
adedd840 1707 int invalid = 0, cnt = 0;
6a9c8594 1708
2c541554
AK
1709 h->cur_pic_ptr->f.qscale_type = FF_QSCALE_TYPE_H264;
1710 h->cur_pic_ptr->f.pict_type = h->pict_type;
6a9c8594 1711
e5d40372
DB
1712 if (h->next_output_pic)
1713 return;
6a9c8594 1714
e5d40372
DB
1715 if (cur->field_poc[0] == INT_MAX || cur->field_poc[1] == INT_MAX) {
1716 /* FIXME: if we have two PAFF fields in one packet, we can't start
1717 * the next thread here. If we have one field per packet, we can.
1718 * The check in decode_nal_units() is not good enough to find this
1719 * yet, so we assume the worst for now. */
1720 // if (setup_finished)
2c541554 1721 // ff_thread_finish_setup(h->avctx);
6a9c8594
AS
1722 return;
1723 }
1724
657ccb5a
DB
1725 cur->f.interlaced_frame = 0;
1726 cur->f.repeat_pict = 0;
6a9c8594
AS
1727
1728 /* Signal interlacing information externally. */
e5d40372
DB
1729 /* Prioritize picture timing SEI information over used
1730 * decoding process if it exists. */
6a9c8594 1731
e5d40372
DB
1732 if (h->sps.pic_struct_present_flag) {
1733 switch (h->sei_pic_struct) {
6a9c8594
AS
1734 case SEI_PIC_STRUCT_FRAME:
1735 break;
1736 case SEI_PIC_STRUCT_TOP_FIELD:
1737 case SEI_PIC_STRUCT_BOTTOM_FIELD:
657ccb5a 1738 cur->f.interlaced_frame = 1;
6a9c8594
AS
1739 break;
1740 case SEI_PIC_STRUCT_TOP_BOTTOM:
1741 case SEI_PIC_STRUCT_BOTTOM_TOP:
1742 if (FIELD_OR_MBAFF_PICTURE)
657ccb5a 1743 cur->f.interlaced_frame = 1;
6a9c8594
AS
1744 else
1745 // try to flag soft telecine progressive
657ccb5a 1746 cur->f.interlaced_frame = h->prev_interlaced_frame;
6a9c8594
AS
1747 break;
1748 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
1749 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
e5d40372
DB
1750 /* Signal the possibility of telecined film externally
1751 * (pic_struct 5,6). From these hints, let the applications
1752 * decide if they apply deinterlacing. */
657ccb5a 1753 cur->f.repeat_pict = 1;
6a9c8594
AS
1754 break;
1755 case SEI_PIC_STRUCT_FRAME_DOUBLING:
657ccb5a 1756 cur->f.repeat_pict = 2;
6a9c8594
AS
1757 break;
1758 case SEI_PIC_STRUCT_FRAME_TRIPLING:
657ccb5a 1759 cur->f.repeat_pict = 4;
6a9c8594
AS
1760 break;
1761 }
1762
e5d40372
DB
1763 if ((h->sei_ct_type & 3) &&
1764 h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
657ccb5a 1765 cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0;
e5d40372 1766 } else {
6a9c8594 1767 /* Derive interlacing flag from used decoding process. */
657ccb5a 1768 cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE;
6a9c8594 1769 }
657ccb5a 1770 h->prev_interlaced_frame = cur->f.interlaced_frame;
6a9c8594 1771
e5d40372 1772 if (cur->field_poc[0] != cur->field_poc[1]) {
6a9c8594 1773 /* Derive top_field_first from field pocs. */
657ccb5a 1774 cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1];
e5d40372 1775 } else {
657ccb5a 1776 if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) {
e5d40372
DB
1777 /* Use picture timing SEI information. Even if it is a
1778 * information of a past frame, better than nothing. */
1779 if (h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM ||
1780 h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
657ccb5a 1781 cur->f.top_field_first = 1;
6a9c8594 1782 else
657ccb5a 1783 cur->f.top_field_first = 0;
e5d40372 1784 } else {
6a9c8594 1785 /* Most likely progressive */
657ccb5a 1786 cur->f.top_field_first = 0;
6a9c8594
AS
1787 }
1788 }
1789
e5d40372 1790 // FIXME do something with unavailable reference frames
6a9c8594
AS
1791
1792 /* Sort B-frames into display order */
1793
e5d40372 1794 if (h->sps.bitstream_restriction_flag &&
2c541554
AK
1795 h->avctx->has_b_frames < h->sps.num_reorder_frames) {
1796 h->avctx->has_b_frames = h->sps.num_reorder_frames;
1797 h->low_delay = 0;
6a9c8594
AS
1798 }
1799
2c541554 1800 if (h->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT &&
e5d40372 1801 !h->sps.bitstream_restriction_flag) {
2c541554
AK
1802 h->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1;
1803 h->low_delay = 0;
6a9c8594
AS
1804 }
1805
1806 pics = 0;
e5d40372
DB
1807 while (h->delayed_pic[pics])
1808 pics++;
6a9c8594
AS
1809
1810 assert(pics <= MAX_DELAYED_PIC_COUNT);
1811
1812 h->delayed_pic[pics++] = cur;
657ccb5a
DB
1813 if (cur->f.reference == 0)
1814 cur->f.reference = DELAYED_PIC_REF;
6a9c8594 1815
adedd840
RB
1816 /* Frame reordering. This code takes pictures from coding order and sorts
1817 * them by their incremental POC value into display order. It supports POC
1818 * gaps, MMCO reset codes and random resets.
1819 * A "display group" can start either with a IDR frame (f.key_frame = 1),
1820 * and/or can be closed down with a MMCO reset code. In sequences where
1821 * there is no delay, we can't detect that (since the frame was already
1822 * output to the user), so we also set h->mmco_reset to detect the MMCO
1823 * reset code.
2c541554 1824 * FIXME: if we detect insufficient delays (as per h->avctx->has_b_frames),
adedd840
RB
1825 * we increase the delay between input and output. All frames affected by
1826 * the lag (e.g. those that should have been output before another frame
1827 * that we already returned to the user) will be dropped. This is a bug
1828 * that we will fix later. */
1829 for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
1830 cnt += out->poc < h->last_pocs[i];
1831 invalid += out->poc == INT_MIN;
1832 }
e5d40372
DB
1833 if (!h->mmco_reset && !cur->f.key_frame &&
1834 cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) {
adedd840
RB
1835 h->mmco_reset = 2;
1836 if (pics > 1)
1837 h->delayed_pic[pics - 2]->mmco_reset = 2;
1838 }
1839 if (h->mmco_reset || cur->f.key_frame) {
1840 for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
1841 h->last_pocs[i] = INT_MIN;
1842 cnt = 0;
1843 invalid = MAX_DELAYED_PIC_COUNT;
1844 }
e5d40372 1845 out = h->delayed_pic[0];
6a9c8594 1846 out_idx = 0;
e5d40372
DB
1847 for (i = 1; i < MAX_DELAYED_PIC_COUNT &&
1848 h->delayed_pic[i] &&
1849 !h->delayed_pic[i - 1]->mmco_reset &&
1850 !h->delayed_pic[i]->f.key_frame;
1851 i++)
1852 if (h->delayed_pic[i]->poc < out->poc) {
1853 out = h->delayed_pic[i];
6a9c8594
AS
1854 out_idx = i;
1855 }
2c541554 1856 if (h->avctx->has_b_frames == 0 &&
e5d40372 1857 (h->delayed_pic[0]->f.key_frame || h->mmco_reset))
adedd840 1858 h->next_outputed_poc = INT_MIN;
e5d40372
DB
1859 out_of_order = !out->f.key_frame && !h->mmco_reset &&
1860 (out->poc < h->next_outputed_poc);
6a9c8594 1861
e5d40372 1862 if (h->sps.bitstream_restriction_flag &&
2c541554
AK
1863 h->avctx->has_b_frames >= h->sps.num_reorder_frames) {
1864 } else if (out_of_order && pics - 1 == h->avctx->has_b_frames &&
1865 h->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) {
ea2bb12e 1866 if (invalid + cnt < MAX_DELAYED_PIC_COUNT) {
2c541554 1867 h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, cnt);
ea2bb12e 1868 }
2c541554
AK
1869 h->low_delay = 0;
1870 } else if (h->low_delay &&
e5d40372
DB
1871 ((h->next_outputed_poc != INT_MIN &&
1872 out->poc > h->next_outputed_poc + 2) ||
ea2bb12e 1873 cur->f.pict_type == AV_PICTURE_TYPE_B)) {
2c541554
AK
1874 h->low_delay = 0;
1875 h->avctx->has_b_frames++;
6a9c8594
AS
1876 }
1877
2c541554 1878 if (pics > h->avctx->has_b_frames) {
657ccb5a 1879 out->f.reference &= ~DELAYED_PIC_REF;
e5d40372
DB
1880 // for frame threading, the owner must be the second field's thread or
1881 // else the first thread can release the picture and reuse it unsafely
2c541554 1882 out->owner2 = h;
e5d40372
DB
1883 for (i = out_idx; h->delayed_pic[i]; i++)
1884 h->delayed_pic[i] = h->delayed_pic[i + 1];
1885 }
1886 memmove(h->last_pocs, &h->last_pocs[1],
1887 sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1));
adedd840 1888 h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc;
2c541554 1889 if (!out_of_order && pics > h->avctx->has_b_frames) {
6a9c8594 1890 h->next_output_pic = out;
adedd840
RB
1891 if (out->mmco_reset) {
1892 if (out_idx > 0) {
e5d40372 1893 h->next_outputed_poc = out->poc;
adedd840
RB
1894 h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset;
1895 } else {
1896 h->next_outputed_poc = INT_MIN;
1897 }
1898 } else {
0b4c3232
RB
1899 if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f.key_frame) {
1900 h->next_outputed_poc = INT_MIN;
1901 } else {
1902 h->next_outputed_poc = out->poc;
1903 }
adedd840
RB
1904 }
1905 h->mmco_reset = 0;
e5d40372 1906 } else {
2c541554 1907 av_log(h->avctx, AV_LOG_DEBUG, "no picture\n");
6a9c8594
AS
1908 }
1909
1910 if (setup_finished)
2c541554 1911 ff_thread_finish_setup(h->avctx);
6a9c8594
AS
1912}
1913
76741b0e
BC
1914static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
1915 uint8_t *src_cb, uint8_t *src_cr,
e5d40372
DB
1916 int linesize, int uvlinesize,
1917 int simple)
76741b0e 1918{
0b69d625 1919 uint8_t *top_border;
5f7f9719 1920 int top_idx = 1;
6e3ef511 1921 const int pixel_shift = h->pixel_shift;
76741b0e
BC
1922 int chroma444 = CHROMA444;
1923 int chroma422 = CHROMA422;
115329f1 1924
e5d40372 1925 src_y -= linesize;
53c05b1e
MN
1926 src_cb -= uvlinesize;
1927 src_cr -= uvlinesize;
1928
e5d40372 1929 if (!simple && FRAME_MBAFF) {
2c541554 1930 if (h->mb_y & 1) {
e5d40372 1931 if (!MB_MBAFF) {
2c541554 1932 top_border = h->top_borders[0][h->mb_x];
e5d40372 1933 AV_COPY128(top_border, src_y + 15 * linesize);
6e3ef511 1934 if (pixel_shift)
e5d40372 1935 AV_COPY128(top_border + 16, src_y + 15 * linesize + 16);
2c541554 1936 if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
e5d40372
DB
1937 if (chroma444) {
1938 if (pixel_shift) {
1939 AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
1940 AV_COPY128(top_border + 48, src_cb + 15 * uvlinesize + 16);
1941 AV_COPY128(top_border + 64, src_cr + 15 * uvlinesize);
1942 AV_COPY128(top_border + 80, src_cr + 15 * uvlinesize + 16);
c90b9442 1943 } else {
e5d40372
DB
1944 AV_COPY128(top_border + 16, src_cb + 15 * uvlinesize);
1945 AV_COPY128(top_border + 32, src_cr + 15 * uvlinesize);
c90b9442 1946 }
e5d40372 1947 } else if (chroma422) {
76741b0e 1948 if (pixel_shift) {
e5d40372
DB
1949 AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
1950 AV_COPY128(top_border + 48, src_cr + 15 * uvlinesize);
76741b0e 1951 } else {
e5d40372
DB
1952 AV_COPY64(top_border + 16, src_cb + 15 * uvlinesize);
1953 AV_COPY64(top_border + 24, src_cr + 15 * uvlinesize);
76741b0e 1954 }
6e3ef511 1955 } else {
c90b9442 1956 if (pixel_shift) {
e5d40372
DB
1957 AV_COPY128(top_border + 32, src_cb + 7 * uvlinesize);
1958 AV_COPY128(top_border + 48, src_cr + 7 * uvlinesize);
c90b9442 1959 } else {
e5d40372
DB
1960 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1961 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
c90b9442 1962 }
6e3ef511 1963 }
5f7f9719
MN
1964 }
1965 }
e5d40372 1966 } else if (MB_MBAFF) {
c988f975 1967 top_idx = 0;
e5d40372 1968 } else
c988f975 1969 return;
5f7f9719
MN
1970 }
1971
2c541554 1972 top_border = h->top_borders[top_idx][h->mb_x];
09f21198 1973 /* There are two lines saved, the line above the top macroblock
e5d40372
DB
1974 * of a pair, and the line above the bottom macroblock. */
1975 AV_COPY128(top_border, src_y + 16 * linesize);
6e3ef511 1976 if (pixel_shift)
e5d40372
DB
1977 AV_COPY128(top_border + 16, src_y + 16 * linesize + 16);
1978
2c541554 1979 if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
e5d40372
DB
1980 if (chroma444) {
1981 if (pixel_shift) {
1982 AV_COPY128(top_border + 32, src_cb + 16 * linesize);
1983 AV_COPY128(top_border + 48, src_cb + 16 * linesize + 16);
1984 AV_COPY128(top_border + 64, src_cr + 16 * linesize);
1985 AV_COPY128(top_border + 80, src_cr + 16 * linesize + 16);
c90b9442 1986 } else {
e5d40372
DB
1987 AV_COPY128(top_border + 16, src_cb + 16 * linesize);
1988 AV_COPY128(top_border + 32, src_cr + 16 * linesize);
c90b9442 1989 }
e5d40372 1990 } else if (chroma422) {
76741b0e 1991 if (pixel_shift) {
e5d40372
DB
1992 AV_COPY128(top_border + 32, src_cb + 16 * uvlinesize);
1993 AV_COPY128(top_border + 48, src_cr + 16 * uvlinesize);
76741b0e 1994 } else {
e5d40372
DB
1995 AV_COPY64(top_border + 16, src_cb + 16 * uvlinesize);
1996 AV_COPY64(top_border + 24, src_cr + 16 * uvlinesize);
76741b0e 1997 }
6e3ef511 1998 } else {
c90b9442 1999 if (pixel_shift) {
e5d40372
DB
2000 AV_COPY128(top_border + 32, src_cb + 8 * uvlinesize);
2001 AV_COPY128(top_border + 48, src_cr + 8 * uvlinesize);
c90b9442 2002 } else {
e5d40372
DB
2003 AV_COPY64(top_border + 16, src_cb + 8 * uvlinesize);
2004 AV_COPY64(top_border + 24, src_cr + 8 * uvlinesize);
c90b9442 2005 }
6e3ef511 2006 }
53c05b1e
MN
2007 }
2008}
2009
bbdd52ed 2010static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
e5d40372
DB
2011 uint8_t *src_cb, uint8_t *src_cr,
2012 int linesize, int uvlinesize,
2013 int xchg, int chroma444,
2014 int simple, int pixel_shift)
2015{
4e987f82 2016 int deblock_topleft;
b69378e2 2017 int deblock_top;
5f7f9719 2018 int top_idx = 1;
1e4f1c56
AS
2019 uint8_t *top_border_m1;
2020 uint8_t *top_border;
5f7f9719 2021
e5d40372 2022 if (!simple && FRAME_MBAFF) {
2c541554 2023 if (h->mb_y & 1) {
e5d40372 2024 if (!MB_MBAFF)
c988f975 2025 return;
e5d40372 2026 } else {
5f7f9719
MN
2027 top_idx = MB_MBAFF ? 0 : 1;
2028 }
5f7f9719 2029 }
b69378e2 2030
e5d40372 2031 if (h->deblocking_filter == 2) {
2c541554 2032 deblock_topleft = h->slice_table[h->mb_xy - 1 - h->mb_stride] == h->slice_num;
4e987f82 2033 deblock_top = h->top_type;
b69378e2 2034 } else {
2c541554
AK
2035 deblock_topleft = (h->mb_x > 0);
2036 deblock_top = (h->mb_y > !!MB_FIELD);
b69378e2 2037 }
53c05b1e 2038
e5d40372 2039 src_y -= linesize + 1 + pixel_shift;
6e3ef511
OA
2040 src_cb -= uvlinesize + 1 + pixel_shift;
2041 src_cr -= uvlinesize + 1 + pixel_shift;
53c05b1e 2042
2c541554
AK
2043 top_border_m1 = h->top_borders[top_idx][h->mb_x - 1];
2044 top_border = h->top_borders[top_idx][h->mb_x];
1e4f1c56 2045
e5d40372
DB
2046#define XCHG(a, b, xchg) \
2047 if (pixel_shift) { \
2048 if (xchg) { \
2049 AV_SWAP64(b + 0, a + 0); \
2050 AV_SWAP64(b + 8, a + 8); \
2051 } else { \
2052 AV_COPY128(b, a); \
2053 } \
2054 } else if (xchg) \
2055 AV_SWAP64(b, a); \
2056 else \
2057 AV_COPY64(b, a);
2058
2059 if (deblock_top) {
2060 if (deblock_topleft) {
2061 XCHG(top_border_m1 + (8 << pixel_shift),
2062 src_y - (7 << pixel_shift), 1);
c988f975 2063 }
6e3ef511
OA
2064 XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
2065 XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
2c541554
AK
2066 if (h->mb_x + 1 < h->mb_width) {
2067 XCHG(h->top_borders[top_idx][h->mb_x + 1],
e5d40372 2068 src_y + (17 << pixel_shift), 1);
43efd19a 2069 }
53c05b1e 2070 }
2c541554 2071 if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
e5d40372
DB
2072 if (chroma444) {
2073 if (deblock_topleft) {
c90b9442
JGG
2074 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
2075 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
2076 }
2077 XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
2078 XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
2079 XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
2080 XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
2c541554
AK
2081 if (h->mb_x + 1 < h->mb_width) {
2082 XCHG(h->top_borders[top_idx][h->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
2083 XCHG(h->top_borders[top_idx][h->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
c90b9442
JGG
2084 }
2085 } else {
e5d40372
DB
2086 if (deblock_top) {
2087 if (deblock_topleft) {
c90b9442
JGG
2088 XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
2089 XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
2090 }
e5d40372
DB
2091 XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
2092 XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
c988f975 2093 }
53c05b1e 2094 }
53c05b1e
MN
2095 }
2096}
2097
88bd7fdc 2098static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth,
e5d40372
DB
2099 int index)
2100{
6e3ef511 2101 if (high_bit_depth) {
e5d40372 2102 return AV_RN32A(((int32_t *)mb) + index);
6e3ef511
OA
2103 } else
2104 return AV_RN16A(mb + index);
2105}
2106
88bd7fdc 2107static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth,
e5d40372
DB
2108 int index, int value)
2109{
6e3ef511 2110 if (high_bit_depth) {
e5d40372 2111 AV_WN32A(((int32_t *)mb) + index, value);
6e3ef511
OA
2112 } else
2113 AV_WN16A(mb + index, value);
2114}
2115
e5d40372
DB
2116static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
2117 int mb_type, int is_h264,
2118 int simple,
2119 int transform_bypass,
2120 int pixel_shift,
2121 int *block_offset,
2122 int linesize,
2123 uint8_t *dest_y, int p)
c90b9442 2124{
88bd7fdc
DB
2125 void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
2126 void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride);
c90b9442 2127 int i;
2c541554 2128 int qscale = p == 0 ? h->qscale : h->chroma_qp[p - 1];
e5d40372
DB
2129 block_offset += 16 * p;
2130 if (IS_INTRA4x4(mb_type)) {
2491f9ee
AK
2131 if (IS_8x8DCT(mb_type)) {
2132 if (transform_bypass) {
2133 idct_dc_add =
2ed00820 2134 idct_add = h->h264dsp.h264_add_pixels8;
2491f9ee
AK
2135 } else {
2136 idct_dc_add = h->h264dsp.h264_idct8_dc_add;
2137 idct_add = h->h264dsp.h264_idct8_add;
2138 }
2139 for (i = 0; i < 16; i += 4) {
2140 uint8_t *const ptr = dest_y + block_offset[i];
2141 const int dir = h->intra4x4_pred_mode_cache[scan8[i]];
2142 if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
2143 h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
e5d40372 2144 } else {
2491f9ee
AK
2145 const int nnz = h->non_zero_count_cache[scan8[i + p * 16]];
2146 h->hpc.pred8x8l[dir](ptr, (h->topleft_samples_available << i) & 0x8000,
2147 (h->topright_samples_available << i) & 0x4000, linesize);
2148 if (nnz) {
2149 if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
2150 idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2151 else
2152 idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
c90b9442
JGG
2153 }
2154 }
2491f9ee
AK
2155 }
2156 } else {
2157 if (transform_bypass) {
2158 idct_dc_add =
2ed00820 2159 idct_add = h->h264dsp.h264_add_pixels4;
e5d40372 2160 } else {
2491f9ee
AK
2161 idct_dc_add = h->h264dsp.h264_idct_dc_add;
2162 idct_add = h->h264dsp.h264_idct_add;
2163 }
2164 for (i = 0; i < 16; i++) {
2165 uint8_t *const ptr = dest_y + block_offset[i];
2166 const int dir = h->intra4x4_pred_mode_cache[scan8[i]];
c90b9442 2167
2491f9ee
AK
2168 if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
2169 h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2170 } else {
2171 uint8_t *topright;
2172 int nnz, tr;
2173 uint64_t tr_high;
2174 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
2175 const int topright_avail = (h->topright_samples_available << i) & 0x8000;
2c541554 2176 assert(h->mb_y || linesize <= block_offset[i]);
2491f9ee
AK
2177 if (!topright_avail) {
2178 if (pixel_shift) {
2179 tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
2180 topright = (uint8_t *)&tr_high;
2181 } else {
2182 tr = ptr[3 - linesize] * 0x01010101u;
2183 topright = (uint8_t *)&tr;
2184 }
e5d40372 2185 } else
2491f9ee
AK
2186 topright = ptr + (4 << pixel_shift) - linesize;
2187 } else
2188 topright = NULL;
2189
2190 h->hpc.pred4x4[dir](ptr, topright, linesize);
2191 nnz = h->non_zero_count_cache[scan8[i + p * 16]];
2192 if (nnz) {
2193 if (is_h264) {
2194 if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
2195 idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2196 else
2197 idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2198 } else if (CONFIG_SVQ3_DECODER)
2199 ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, qscale, 0);
c90b9442
JGG
2200 }
2201 }
2202 }
2203 }
e5d40372
DB
2204 } else {
2205 h->hpc.pred16x16[h->intra16x16_pred_mode](dest_y, linesize);
2206 if (is_h264) {
2207 if (h->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) {
2208 if (!transform_bypass)
2209 h->h264dsp.h264_luma_dc_dequant_idct(h->mb + (p * 256 << pixel_shift),
2210 h->mb_luma_dc[p],
2211 h->dequant4_coeff[p][qscale][0]);
2212 else {
2213 static const uint8_t dc_mapping[16] = {
2214 0 * 16, 1 * 16, 4 * 16, 5 * 16,
2215 2 * 16, 3 * 16, 6 * 16, 7 * 16,
2216 8 * 16, 9 * 16, 12 * 16, 13 * 16,
2217 10 * 16, 11 * 16, 14 * 16, 15 * 16 };
2218 for (i = 0; i < 16; i++)
2219 dctcoef_set(h->mb + (p * 256 << pixel_shift),
2220 pixel_shift, dc_mapping[i],
2221 dctcoef_get(h->mb_luma_dc[p],
2222 pixel_shift, i));
c90b9442
JGG
2223 }
2224 }
301fb921 2225 } else if (CONFIG_SVQ3_DECODER)
e5d40372
DB
2226 ff_svq3_luma_dc_dequant_idct_c(h->mb + p * 256,
2227 h->mb_luma_dc[p], qscale);
c90b9442
JGG
2228 }
2229}
2230
e5d40372
DB
2231static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
2232 int is_h264, int simple,
2233 int transform_bypass,
2234 int pixel_shift,
2235 int *block_offset,
2236 int linesize,
2237 uint8_t *dest_y, int p)
c90b9442 2238{
88bd7fdc 2239 void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
c90b9442 2240 int i;
e5d40372
DB
2241 block_offset += 16 * p;
2242 if (!IS_INTRA4x4(mb_type)) {
2243 if (is_h264) {
2244 if (IS_INTRA16x16(mb_type)) {
2245 if (transform_bypass) {
2246 if (h->sps.profile_idc == 244 &&
2247 (h->intra16x16_pred_mode == VERT_PRED8x8 ||
2248 h->intra16x16_pred_mode == HOR_PRED8x8)) {
2249 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset,
2250 h->mb + (p * 256 << pixel_shift),
2251 linesize);
2252 } else {
2253 for (i = 0; i < 16; i++)
2254 if (h->non_zero_count_cache[scan8[i + p * 16]] ||
2255 dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
2ed00820
RB
2256 h->h264dsp.h264_add_pixels4(dest_y + block_offset[i],
2257 h->mb + (i * 16 + p * 256 << pixel_shift),
2258 linesize);
c90b9442 2259 }
e5d40372
DB
2260 } else {
2261 h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
2262 h->mb + (p * 256 << pixel_shift),
2263 linesize,
2264 h->non_zero_count_cache + p * 5 * 8);
c90b9442 2265 }
e5d40372
DB
2266 } else if (h->cbp & 15) {
2267 if (transform_bypass) {
c90b9442 2268 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2ed00820
RB
2269 idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8
2270 : h->h264dsp.h264_add_pixels4;
e5d40372
DB
2271 for (i = 0; i < 16; i += di)
2272 if (h->non_zero_count_cache[scan8[i + p * 16]])
2273 idct_add(dest_y + block_offset[i],
2274 h->mb + (i * 16 + p * 256 << pixel_shift),
2275 linesize);
2276 } else {
2277 if (IS_8x8DCT(mb_type))
2278 h->h264dsp.h264_idct8_add4(dest_y, block_offset,
2279 h->mb + (p * 256 << pixel_shift),
2280 linesize,
2281 h->non_zero_count_cache + p * 5 * 8);
2282 else
2283 h->h264dsp.h264_idct_add16(dest_y, block_offset,
2284 h->mb + (p * 256 << pixel_shift),
2285 linesize,
2286 h->non_zero_count_cache + p * 5 * 8);
c90b9442
JGG
2287 }
2288 }
301fb921 2289 } else if (CONFIG_SVQ3_DECODER) {
e5d40372
DB
2290 for (i = 0; i < 16; i++)
2291 if (h->non_zero_count_cache[scan8[i + p * 16]] || h->mb[i * 16 + p * 256]) {
2292 // FIXME benchmark weird rule, & below
2293 uint8_t *const ptr = dest_y + block_offset[i];
2294 ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize,
2c541554 2295 h->qscale, IS_INTRA(mb_type) ? 1 : 0);
c90b9442 2296 }
c90b9442
JGG
2297 }
2298 }
2299}
2300
28fff0d9
MR
2301#define BITS 8
2302#define SIMPLE 1
2303#include "h264_mb_template.c"
e5d40372 2304
28fff0d9
MR
2305#undef BITS
2306#define BITS 16
2307#include "h264_mb_template.c"
bd91fee3 2308
28fff0d9
MR
2309#undef SIMPLE
2310#define SIMPLE 0
2311#include "h264_mb_template.c"
c90b9442 2312
e5d40372
DB
2313void ff_h264_hl_decode_mb(H264Context *h)
2314{
e5d40372 2315 const int mb_xy = h->mb_xy;
2c541554
AK
2316 const int mb_type = h->cur_pic.f.mb_type[mb_xy];
2317 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || h->qscale == 0;
bd91fee3 2318
c90b9442 2319 if (CHROMA444) {
e5d40372 2320 if (is_complex || h->pixel_shift)
c90b9442
JGG
2321 hl_decode_mb_444_complex(h);
2322 else
28fff0d9 2323 hl_decode_mb_444_simple_8(h);
c90b9442 2324 } else if (is_complex) {
bd91fee3 2325 hl_decode_mb_complex(h);
6e3ef511
OA
2326 } else if (h->pixel_shift) {
2327 hl_decode_mb_simple_16(h);
2328 } else
2329 hl_decode_mb_simple_8(h);
bd91fee3
AS
2330}
2331
e5d40372
DB
2332static int pred_weight_table(H264Context *h)
2333{
0da71265 2334 int list, i;
9f2d1b4f 2335 int luma_def, chroma_def;
115329f1 2336
e5d40372
DB
2337 h->use_weight = 0;
2338 h->use_weight_chroma = 0;
2c541554 2339 h->luma_log2_weight_denom = get_ue_golomb(&h->gb);
e5d40372 2340 if (h->sps.chroma_format_idc)
2c541554 2341 h->chroma_log2_weight_denom = get_ue_golomb(&h->gb);
e5d40372
DB
2342 luma_def = 1 << h->luma_log2_weight_denom;
2343 chroma_def = 1 << h->chroma_log2_weight_denom;
0da71265 2344
e5d40372 2345 for (list = 0; list < 2; list++) {
cb99c652
GB
2346 h->luma_weight_flag[list] = 0;
2347 h->chroma_weight_flag[list] = 0;
e5d40372 2348 for (i = 0; i < h->ref_count[list]; i++) {
0da71265 2349 int luma_weight_flag, chroma_weight_flag;
115329f1 2350
2c541554 2351 luma_weight_flag = get_bits1(&h->gb);
e5d40372 2352 if (luma_weight_flag) {
2c541554
AK
2353 h->luma_weight[i][list][0] = get_se_golomb(&h->gb);
2354 h->luma_weight[i][list][1] = get_se_golomb(&h->gb);
e5d40372
DB
2355 if (h->luma_weight[i][list][0] != luma_def ||
2356 h->luma_weight[i][list][1] != 0) {
2357 h->use_weight = 1;
2358 h->luma_weight_flag[list] = 1;
cb99c652 2359 }
e5d40372
DB
2360 } else {
2361 h->luma_weight[i][list][0] = luma_def;
2362 h->luma_weight[i][list][1] = 0;
0da71265
MN
2363 }
2364
e5d40372 2365 if (h->sps.chroma_format_idc) {
2c541554 2366 chroma_weight_flag = get_bits1(&h->gb);
e5d40372 2367 if (chroma_weight_flag) {
fef744d4 2368 int j;
e5d40372 2369 for (j = 0; j < 2; j++) {
2c541554
AK
2370 h->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb);
2371 h->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb);
e5d40372
DB
2372 if (h->chroma_weight[i][list][j][0] != chroma_def ||
2373 h->chroma_weight[i][list][j][1] != 0) {
2374 h->use_weight_chroma = 1;
2375 h->chroma_weight_flag[list] = 1;
cb99c652 2376 }
fef744d4 2377 }
e5d40372 2378 } else {
fef744d4 2379 int j;
e5d40372
DB
2380 for (j = 0; j < 2; j++) {
2381 h->chroma_weight[i][list][j][0] = chroma_def;
2382 h->chroma_weight[i][list][j][1] = 0;
fef744d4 2383 }
0da71265
MN
2384 }
2385 }
2386 }
e5d40372
DB
2387 if (h->slice_type_nos != AV_PICTURE_TYPE_B)
2388 break;
0da71265 2389 }
e5d40372 2390 h->use_weight = h->use_weight || h->use_weight_chroma;
0da71265
MN
2391 return 0;
2392}
2393
1052b76f
MN
2394/**
2395 * Initialize implicit_weight table.
6da88bd3 2396 * @param field 0/1 initialize the weight for interlaced MBAFF
1052b76f
MN
2397 * -1 initializes the rest
2398 */
e5d40372
DB
2399static void implicit_weight_table(H264Context *h, int field)
2400{
1052b76f 2401 int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
9f2d1b4f 2402
ce09f927
GB
2403 for (i = 0; i < 2; i++) {
2404 h->luma_weight_flag[i] = 0;
2405 h->chroma_weight_flag[i] = 0;
2406 }
2407
e5d40372 2408 if (field < 0) {
2c541554
AK
2409 if (h->picture_structure == PICT_FRAME) {
2410 cur_poc = h->cur_pic_ptr->poc;
4418aa9c 2411 } else {
2c541554 2412 cur_poc = h->cur_pic_ptr->field_poc[h->picture_structure - 1];
4418aa9c 2413 }
e5d40372
DB
2414 if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF &&
2415 h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) {
2416 h->use_weight = 0;
2417 h->use_weight_chroma = 0;
2418 return;
2419 }
2420 ref_start = 0;
2421 ref_count0 = h->ref_count[0];
2422 ref_count1 = h->ref_count[1];
2423 } else {
2c541554 2424 cur_poc = h->cur_pic_ptr->field_poc[field];
e5d40372
DB
2425 ref_start = 16;
2426 ref_count0 = 16 + 2 * h->ref_count[0];
2427 ref_count1 = 16 + 2 * h->ref_count[1];
1052b76f 2428 }
9f2d1b4f 2429
e5d40372
DB
2430 h->use_weight = 2;
2431 h->use_weight_chroma = 2;
2432 h->luma_log2_weight_denom = 5;
2433 h->chroma_log2_weight_denom = 5;
9f2d1b4f 2434
e5d40372 2435 for (ref0 = ref_start; ref0 < ref_count0; ref0++) {
9f2d1b4f 2436 int poc0 = h->ref_list[0][ref0].poc;
e5d40372 2437 for (ref1 = ref_start; ref1 < ref_count1; ref1++) {
87cf70eb
JD
2438 int w = 32;
2439 if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
2440 int poc1 = h->ref_list[1][ref1].poc;
e5d40372
DB
2441 int td = av_clip(poc1 - poc0, -128, 127);
2442 if (td) {
87cf70eb
JD
2443 int tb = av_clip(cur_poc - poc0, -128, 127);
2444 int tx = (16384 + (FFABS(td) >> 1)) / td;
e5d40372
DB
2445 int dist_scale_factor = (tb * tx + 32) >> 8;
2446 if (dist_scale_factor >= -64 && dist_scale_factor <= 128)
87cf70eb
JD
2447 w = 64 - dist_scale_factor;
2448 }
1052b76f 2449 }
e5d40372
DB
2450 if (field < 0) {
2451 h->implicit_weight[ref0][ref1][0] =
2452 h->implicit_weight[ref0][ref1][1] = w;
2453 } else {
2454 h->implicit_weight[ref0][ref1][field] = w;
72f86ec0 2455 }
9f2d1b4f
LM
2456 }
2457 }
2458}
2459
8fd57a66 2460/**
5175b937 2461 * instantaneous decoder refresh.
0da71265 2462 */
e5d40372
DB
2463static void idr(H264Context *h)
2464{
ea6f00c4 2465 ff_h264_remove_all_refs(h);
e5d40372
DB
2466 h->prev_frame_num = 0;
2467 h->prev_frame_num_offset = 0;
2468 h->prev_poc_msb =
2469 h->prev_poc_lsb = 0;
0da71265
MN
2470}
2471
7c33ad19 2472/* forget old pics after a seek */
9e696d2e 2473static void flush_change(H264Context *h)
e5d40372 2474{
7c33ad19 2475 int i;
adedd840
RB
2476 for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
2477 h->last_pocs[i] = INT_MIN;
e5d40372 2478 h->outputed_poc = h->next_outputed_poc = INT_MIN;
b19d493f 2479 h->prev_interlaced_frame = 1;
7c33ad19 2480 idr(h);
2c541554
AK
2481 if (h->cur_pic_ptr)
2482 h->cur_pic_ptr->f.reference = 0;
2483 h->first_field = 0;
9e696d2e
JG
2484 memset(h->ref_list[0], 0, sizeof(h->ref_list[0]));
2485 memset(h->ref_list[1], 0, sizeof(h->ref_list[1]));
2486 memset(h->default_ref_list[0], 0, sizeof(h->default_ref_list[0]));
2487 memset(h->default_ref_list[1], 0, sizeof(h->default_ref_list[1]));
9c095463 2488 ff_h264_reset_sei(h);
9e696d2e
JG
2489}
2490
2491/* forget old pics after a seek */
2492static void flush_dpb(AVCodecContext *avctx)
2493{
2494 H264Context *h = avctx->priv_data;
2495 int i;
2496
2497 for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
2498 if (h->delayed_pic[i])
2499 h->delayed_pic[i]->f.reference = 0;
2500 h->delayed_pic[i] = NULL;
2501 }
2502
2503 flush_change(h);
2c541554
AK
2504
2505 for (i = 0; i < h->picture_count; i++) {
2506 if (h->DPB[i].f.data[0])
2507 free_frame_buffer(h, &h->DPB[i]);
2508 }
2509 h->cur_pic_ptr = NULL;
2510
2511 h->mb_x = h->mb_y = 0;
2512
2513 h->parse_context.state = -1;
2514 h->parse_context.frame_start_found = 0;
2515 h->parse_context.overread = 0;
2516 h->parse_context.overread_index = 0;
2517 h->parse_context.index = 0;
2518 h->parse_context.last_index = 0;
7c33ad19
LM
2519}
2520
e5d40372
DB
2521static int init_poc(H264Context *h)
2522{
e5d40372 2523 const int max_frame_num = 1 << h->sps.log2_max_frame_num;
0da71265 2524 int field_poc[2];
2c541554 2525 Picture *cur = h->cur_pic_ptr;
0da71265 2526
e5d40372
DB
2527 h->frame_num_offset = h->prev_frame_num_offset;
2528 if (h->frame_num < h->prev_frame_num)
b78a6baa 2529 h->frame_num_offset += max_frame_num;
0da71265 2530
e5d40372
DB
2531 if (h->sps.poc_type == 0) {
2532 const int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb;
0da71265 2533
e5d40372 2534 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2)
0da71265 2535 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
e5d40372 2536 else if (h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb / 2)
0da71265
MN
2537 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
2538 else
2539 h->poc_msb = h->prev_poc_msb;
115329f1 2540 field_poc[0] =
0da71265 2541 field_poc[1] = h->poc_msb + h->poc_lsb;
2c541554 2542 if (h->picture_structure == PICT_FRAME)
0da71265 2543 field_poc[1] += h->delta_poc_bottom;
e5d40372 2544 } else if (h->sps.poc_type == 1) {
0da71265
MN
2545 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
2546 int i;
2547
e5d40372 2548 if (h->sps.poc_cycle_length != 0)
0da71265
MN
2549 abs_frame_num = h->frame_num_offset + h->frame_num;
2550 else
2551 abs_frame_num = 0;
2552
e5d40372 2553 if (h->nal_ref_idc == 0 && abs_frame_num > 0)
0da71265 2554 abs_frame_num--;
115329f1 2555
0da71265 2556 expected_delta_per_poc_cycle = 0;
e5d40372
DB
2557 for (i = 0; i < h->sps.poc_cycle_length; i++)
2558 // FIXME integrate during sps parse
2559 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[i];
0da71265 2560
e5d40372 2561 if (abs_frame_num > 0) {
0da71265
MN
2562 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
2563 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
2564
2565 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
e5d40372
DB
2566 for (i = 0; i <= frame_num_in_poc_cycle; i++)
2567 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[i];
0da71265
MN
2568 } else
2569 expectedpoc = 0;
2570
e5d40372 2571 if (h->nal_ref_idc == 0)
0da71265 2572 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
115329f1 2573
0da71265
MN
2574 field_poc[0] = expectedpoc + h->delta_poc[0];
2575 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
2576
2c541554 2577 if (h->picture_structure == PICT_FRAME)
0da71265 2578 field_poc[1] += h->delta_poc[1];
e5d40372
DB
2579 } else {
2580 int poc = 2 * (h->frame_num_offset + h->frame_num);
5710b371 2581
e5d40372 2582 if (!h->nal_ref_idc)
b78a6baa 2583 poc--;
5710b371 2584
e5d40372
DB
2585 field_poc[0] = poc;
2586 field_poc[1] = poc;
0da71265 2587 }
115329f1 2588
2c541554
AK
2589 if (h->picture_structure != PICT_BOTTOM_FIELD)
2590 h->cur_pic_ptr->field_poc[0] = field_poc[0];
2591 if (h->picture_structure != PICT_TOP_FIELD)
2592 h->cur_pic_ptr->field_poc[1] = field_poc[1];
e5d40372 2593 cur->poc = FFMIN(cur->field_poc[0], cur->field_poc[1]);
0da71265
MN
2594
2595 return 0;
2596}
2597
b41c1db3
2598/**
2599 * initialize scan tables
2600 */
e5d40372
DB
2601static void init_scan_tables(H264Context *h)
2602{
b41c1db3 2603 int i;
e5d40372
DB
2604 for (i = 0; i < 16; i++) {
2605#define T(x) (x >> 2) | ((x << 2) & 0xF)
ca32f7f2 2606 h->zigzag_scan[i] = T(zigzag_scan[i]);
e5d40372 2607 h->field_scan[i] = T(field_scan[i]);
b41c1db3 2608#undef T
b41c1db3 2609 }
e5d40372
DB
2610 for (i = 0; i < 64; i++) {
2611#define T(x) (x >> 3) | ((x & 7) << 3)
ca32f7f2
JGG
2612 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
2613 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
2614 h->field_scan8x8[i] = T(field_scan8x8[i]);
2615 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
b41c1db3 2616#undef T
b41c1db3 2617 }
e5d40372 2618 if (h->sps.transform_bypass) { // FIXME same ugly
b41c1db3 2619 h->zigzag_scan_q0 = zigzag_scan;
45beb850 2620 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
b41c1db3
2621 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
2622 h->field_scan_q0 = field_scan;
2623 h->field_scan8x8_q0 = field_scan8x8;
2624 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
e5d40372 2625 } else {
b41c1db3
2626 h->zigzag_scan_q0 = h->zigzag_scan;
2627 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
2628 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
2629 h->field_scan_q0 = h->field_scan;
2630 h->field_scan8x8_q0 = h->field_scan8x8;
2631 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
2632 }
2633}
afebe2f7 2634
e5d40372
DB
2635static int field_end(H264Context *h, int in_setup)
2636{
2c541554 2637 AVCodecContext *const avctx = h->avctx;
12fe7594 2638 int err = 0;
2c541554 2639 h->mb_y = 0;
256299d3 2640
2c541554
AK
2641 if (!in_setup && !h->droppable)
2642 ff_thread_report_progress(&h->cur_pic_ptr->f, INT_MAX,
2643 h->picture_structure == PICT_BOTTOM_FIELD);
256299d3 2644
e5d40372 2645 if (CONFIG_H264_VDPAU_DECODER &&
2c541554
AK
2646 h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
2647 ff_vdpau_h264_set_reference_frames(h);
256299d3 2648
e5d40372 2649 if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) {
2c541554 2650 if (!h->droppable) {
12fe7594 2651 err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
e5d40372
DB
2652 h->prev_poc_msb = h->poc_msb;
2653 h->prev_poc_lsb = h->poc_lsb;
6a9c8594 2654 }
e5d40372
DB
2655 h->prev_frame_num_offset = h->frame_num_offset;
2656 h->prev_frame_num = h->frame_num;
2657 h->outputed_poc = h->next_outputed_poc;
256299d3 2658 }
256299d3
MN
2659
2660 if (avctx->hwaccel) {
2661 if (avctx->hwaccel->end_frame(avctx) < 0)
e5d40372
DB
2662 av_log(avctx, AV_LOG_ERROR,
2663 "hardware accelerator failed to decode picture\n");
256299d3
MN
2664 }
2665
e5d40372 2666 if (CONFIG_H264_VDPAU_DECODER &&
2c541554
AK
2667 h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
2668 ff_vdpau_h264_picture_complete(h);
256299d3
MN
2669
2670 /*
2671 * FIXME: Error handling code does not seem to support interlaced
2672 * when slices span multiple rows
2673 * The ff_er_add_slice calls don't work right for bottom
2674 * fields; they cause massive erroneous error concealing
2675 * Error marking covers both fields (top and bottom).
2676 * This causes a mismatched s->error_count
2677 * and a bad error table. Further, the error count goes to
2678 * INT_MAX when called for bottom field, because mb_y is
2679 * past end by one (callers fault) and resync_mb_y != 0
2680 * causes problems for the first MB line, too.
2681 */
2c541554
AK
2682 if (!FIELD_PICTURE) {
2683 h->er.cur_pic = h->cur_pic_ptr;
2684 h->er.last_pic = h->ref_count[0] ? &h->ref_list[0][0] : NULL;
2685 h->er.next_pic = h->ref_count[1] ? &h->ref_list[1][0] : NULL;
2686 ff_er_frame_end(&h->er);
2687 }
2c541554 2688 emms_c();
d225a1e2 2689
e5d40372 2690 h->current_slice = 0;
12fe7594
DB
2691
2692 return err;
256299d3
MN
2693}
2694
afebe2f7 2695/**
49bd8e4b 2696 * Replicate H264 "master" context to thread contexts.
afebe2f7 2697 */
f1d8763a 2698static int clone_slice(H264Context *dst, H264Context *src)
afebe2f7 2699{
e5d40372 2700 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
2c541554
AK
2701 dst->cur_pic_ptr = src->cur_pic_ptr;
2702 dst->cur_pic = src->cur_pic;
2703 dst->linesize = src->linesize;
2704 dst->uvlinesize = src->uvlinesize;
2705 dst->first_field = src->first_field;
f1d8763a 2706
e5d40372
DB
2707 dst->prev_poc_msb = src->prev_poc_msb;
2708 dst->prev_poc_lsb = src->prev_poc_lsb;
2709 dst->prev_frame_num_offset = src->prev_frame_num_offset;
2710 dst->prev_frame_num = src->prev_frame_num;
2711 dst->short_ref_count = src->short_ref_count;
afebe2f7
2712
2713 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
2714 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
2715 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
50c21814
2716
2717 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
2718 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
f1d8763a
JG
2719
2720 return 0;
afebe2f7
2721}
2722
0da71265 2723/**
58c42af7 2724 * Compute profile from profile_idc and constraint_set?_flags.
fe9a3fbe
JG
2725 *
2726 * @param sps SPS
2727 *
2728 * @return profile as defined by FF_PROFILE_H264_*
2729 */
2730int ff_h264_get_profile(SPS *sps)
2731{
2732 int profile = sps->profile_idc;
2733
e5d40372 2734 switch (sps->profile_idc) {
fe9a3fbe
JG
2735 case FF_PROFILE_H264_BASELINE:
2736 // constraint_set1_flag set to 1
e5d40372 2737 profile |= (sps->constraint_set_flags & 1 << 1) ? FF_PROFILE_H264_CONSTRAINED : 0;
fe9a3fbe
JG
2738 break;
2739 case FF_PROFILE_H264_HIGH_10:
2740 case FF_PROFILE_H264_HIGH_422:
2741 case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
2742 // constraint_set3_flag set to 1
e5d40372 2743 profile |= (sps->constraint_set_flags & 1 << 3) ? FF_PROFILE_H264_INTRA : 0;
fe9a3fbe
JG
2744 break;
2745 }
2746
2747 return profile;
2748}
2749
072be3e8
JG
2750static int h264_set_parameter_from_sps(H264Context *h)
2751{
2c541554 2752 if (h->flags & CODEC_FLAG_LOW_DELAY ||
072be3e8
JG
2753 (h->sps.bitstream_restriction_flag &&
2754 !h->sps.num_reorder_frames)) {
2c541554
AK
2755 if (h->avctx->has_b_frames > 1 || h->delayed_pic[0])
2756 av_log(h->avctx, AV_LOG_WARNING, "Delayed frames seen. "
072be3e8
JG
2757 "Reenabling low delay requires a codec flush.\n");
2758 else
2c541554 2759 h->low_delay = 1;
072be3e8
JG
2760 }
2761
2c541554
AK
2762 if (h->avctx->has_b_frames < 2)
2763 h->avctx->has_b_frames = !h->low_delay;
072be3e8 2764
4987faee
LB
2765 if (h->sps.bit_depth_luma != h->sps.bit_depth_chroma) {
2766 av_log_missing_feature(h->avctx,
2767 "Different bit depth between chroma and luma", 1);
2768 return AVERROR_PATCHWELCOME;
2769 }
2770
2c541554 2771 if (h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
072be3e8 2772 h->cur_chroma_format_idc != h->sps.chroma_format_idc) {
2c541554
AK
2773 if (h->avctx->codec &&
2774 h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU &&
072be3e8 2775 (h->sps.bit_depth_luma != 8 || h->sps.chroma_format_idc > 1)) {
2c541554 2776 av_log(h->avctx, AV_LOG_ERROR,
072be3e8
JG
2777 "VDPAU decoding does not support video colorspace.\n");
2778 return AVERROR_INVALIDDATA;
2779 }
2780 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
2c541554 2781 h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
072be3e8
JG
2782 h->cur_chroma_format_idc = h->sps.chroma_format_idc;
2783 h->pixel_shift = h->sps.bit_depth_luma > 8;
2784
2785 ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma,
2786 h->sps.chroma_format_idc);
79dad2a9 2787 ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
e9d81735 2788 ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma);
2c541554 2789 ff_h264_pred_init(&h->hpc, h->avctx->codec_id, h->sps.bit_depth_luma,
072be3e8 2790 h->sps.chroma_format_idc);
2c541554
AK
2791 h->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
2792 ff_dsputil_init(&h->dsp, h->avctx);
2793 ff_videodsp_init(&h->vdsp, h->sps.bit_depth_luma);
072be3e8 2794 } else {
2c541554 2795 av_log(h->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n",
072be3e8
JG
2796 h->sps.bit_depth_luma);
2797 return AVERROR_INVALIDDATA;
2798 }
2799 }
2800 return 0;
2801}
2802
9e696d2e
JG
2803static enum PixelFormat get_pixel_format(H264Context *h)
2804{
9e696d2e
JG
2805 switch (h->sps.bit_depth_luma) {
2806 case 9:
2807 if (CHROMA444) {
2c541554 2808 if (h->avctx->colorspace == AVCOL_SPC_RGB) {
9e696d2e
JG
2809 return AV_PIX_FMT_GBRP9;
2810 } else
2811 return AV_PIX_FMT_YUV444P9;
2812 } else if (CHROMA422)
2813 return AV_PIX_FMT_YUV422P9;
2814 else
2815 return AV_PIX_FMT_YUV420P9;
2816 break;
2817 case 10:
2818 if (CHROMA444) {
2c541554 2819 if (h->avctx->colorspace == AVCOL_SPC_RGB) {
9e696d2e
JG
2820 return AV_PIX_FMT_GBRP10;
2821 } else
2822 return AV_PIX_FMT_YUV444P10;
2823 } else if (CHROMA422)
2824 return AV_PIX_FMT_YUV422P10;
2825 else
2826 return AV_PIX_FMT_YUV420P10;
2827 break;
2828 case 8:
2829 if (CHROMA444) {
2c541554 2830 if (h->avctx->colorspace == AVCOL_SPC_RGB) {
9e696d2e
JG
2831 return AV_PIX_FMT_GBRP;
2832 } else
2c541554 2833 return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ444P
9e696d2e
JG
2834 : AV_PIX_FMT_YUV444P;
2835 } else if (CHROMA422) {
2c541554 2836 return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ422P
9e696d2e
JG
2837 : AV_PIX_FMT_YUV422P;
2838 } else {
2c541554
AK
2839 return h->avctx->get_format(h->avctx, h->avctx->codec->pix_fmts ?
2840 h->avctx->codec->pix_fmts :
2841 h->avctx->color_range == AVCOL_RANGE_JPEG ?
d65522e8 2842 h264_hwaccel_pixfmt_list_jpeg_420 :
8d061989 2843 h264_hwaccel_pixfmt_list_420);
9e696d2e
JG
2844 }
2845 break;
2846 default:
2c541554 2847 av_log(h->avctx, AV_LOG_ERROR,
9e696d2e
JG
2848 "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
2849 return AVERROR_INVALIDDATA;
2850 }
2851}
2852
2853static int h264_slice_header_init(H264Context *h, int reinit)
2854{
2c541554
AK
2855 int nb_slices = (HAVE_THREADS &&
2856 h->avctx->active_thread_type & FF_THREAD_SLICE) ?
2857 h->avctx->thread_count : 1;
2858 int i;
9e696d2e 2859
2c541554
AK
2860 avcodec_set_dimensions(h->avctx, h->width, h->height);
2861 h->avctx->sample_aspect_ratio = h->sps.sar;
2862 av_assert0(h->avctx->sample_aspect_ratio.den);
2863 av_pix_fmt_get_chroma_sub_sample(h->avctx->pix_fmt,
2864 &h->chroma_x_shift, &h->chroma_y_shift);
9e696d2e
JG
2865
2866 if (h->sps.timing_info_present_flag) {
2867 int64_t den = h->sps.time_scale;
2868 if (h->x264_build < 44U)
2869 den *= 2;
2c541554 2870 av_reduce(&h->avctx->time_base.num, &h->avctx->time_base.den,
9e696d2e
JG
2871 h->sps.num_units_in_tick, den, 1 << 30);
2872 }
2873
2c541554 2874 h->avctx->hwaccel = ff_find_hwaccel(h->avctx->codec->id, h->avctx->pix_fmt);
9e696d2e 2875
2c541554 2876 if (reinit)
9e696d2e 2877 free_tables(h, 0);
2c541554 2878 h->first_field = 0;
9e696d2e
JG
2879 h->prev_interlaced_frame = 1;
2880
2881 init_scan_tables(h);
2882 if (ff_h264_alloc_tables(h) < 0) {
2c541554 2883 av_log(h->avctx, AV_LOG_ERROR,
9e696d2e
JG
2884 "Could not allocate memory for h264\n");
2885 return AVERROR(ENOMEM);
2886 }
2887
2c541554
AK
2888 if (nb_slices > MAX_THREADS || (nb_slices > h->mb_height && h->mb_height)) {
2889 int max_slices;
2890 if (h->mb_height)
2891 max_slices = FFMIN(MAX_THREADS, h->mb_height);
2892 else
2893 max_slices = MAX_THREADS;
2894 av_log(h->avctx, AV_LOG_WARNING, "too many threads/slices (%d),"
2895 " reducing to %d\n", nb_slices, max_slices);
2896 nb_slices = max_slices;
2897 }
2898 h->slice_context_count = nb_slices;
2899
2900 if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_SLICE)) {
9e696d2e 2901 if (context_init(h) < 0) {
2c541554 2902 av_log(h->avctx, AV_LOG_ERROR, "context_init() failed.\n");
9e696d2e
JG
2903 return -1;
2904 }
2905 } else {
2c541554 2906 for (i = 1; i < h->slice_context_count; i++) {
9e696d2e 2907 H264Context *c;
2c541554
AK
2908 c = h->thread_context[i] = av_mallocz(sizeof(H264Context));
2909 c->avctx = h->avctx;
2910 c->dsp = h->dsp;
2911 c->vdsp = h->vdsp;
9e696d2e 2912 c->h264dsp = h->h264dsp;
6bdb841b 2913 c->h264qpel = h->h264qpel;
4c51fe48 2914 c->h264chroma = h->h264chroma;
9e696d2e
JG
2915 c->sps = h->sps;
2916 c->pps = h->pps;
2917 c->pixel_shift = h->pixel_shift;
2c541554
AK
2918 c->width = h->width;
2919 c->height = h->height;
2920 c->linesize = h->linesize;
2921 c->uvlinesize = h->uvlinesize;
2922 c->chroma_x_shift = h->chroma_x_shift;
2923 c->chroma_y_shift = h->chroma_y_shift;
2924 c->qscale = h->qscale;
2925 c->droppable = h->droppable;
2926 c->data_partitioning = h->data_partitioning;
2927 c->low_delay = h->low_delay;
2928 c->mb_width = h->mb_width;
2929 c->mb_height = h->mb_height;
2930 c->mb_stride = h->mb_stride;
2931 c->mb_num = h->mb_num;
2932 c->flags = h->flags;
2933 c->workaround_bugs = h->workaround_bugs;
2934 c->pict_type = h->pict_type;
2935
9e696d2e
JG
2936 init_scan_tables(c);
2937 clone_tables(c, h, i);
2c541554 2938 c->context_initialized = 1;
9e696d2e
JG
2939 }
2940
2c541554 2941 for (i = 0; i < h->slice_context_count; i++)
9e696d2e 2942 if (context_init(h->thread_context[i]) < 0) {
2c541554 2943 av_log(h->avctx, AV_LOG_ERROR, "context_init() failed.\n");
9e696d2e
JG
2944 return -1;
2945 }
2946 }
2947
2c541554
AK
2948 h->context_initialized = 1;
2949
9e696d2e
JG
2950 return 0;
2951}
2952
fe9a3fbe 2953/**
58c42af7 2954 * Decode a slice header.
efd29844 2955 * This will also call ff_MPV_common_init() and frame_start() as needed.
afebe2f7
2956 *
2957 * @param h h264context
e5d40372
DB
2958 * @param h0 h264 master context (differs from 'h' when doing sliced based
2959 * parallel decoding)
afebe2f7 2960 *
d9526386 2961 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
0da71265 2962 */
e5d40372
DB
2963static int decode_slice_header(H264Context *h, H264Context *h0)
2964{
88e7a4d1 2965 unsigned int first_mb_in_slice;
ac658be5 2966 unsigned int pps_id;
072be3e8 2967 int num_ref_idx_active_override_flag, max_refs, ret;
41f5c62f 2968 unsigned int slice_type, tmp, i, j;
0bf79634 2969 int default_ref_list_done = 0;
ba0c8981 2970 int last_pic_structure, last_pic_droppable;
9e696d2e 2971 int needs_reinit = 0;
0da71265 2972
2c541554
AK
2973 h->me.qpel_put = h->h264qpel.put_h264_qpel_pixels_tab;
2974 h->me.qpel_avg = h->h264qpel.avg_h264_qpel_pixels_tab;
cf653d08 2975
2c541554 2976 first_mb_in_slice = get_ue_golomb(&h->gb);
0da71265 2977
e5d40372
DB
2978 if (first_mb_in_slice == 0) { // FIXME better field boundary detection
2979 if (h0->current_slice && FIELD_PICTURE) {
6a9c8594 2980 field_end(h, 1);
d225a1e2
MN
2981 }
2982
afebe2f7 2983 h0->current_slice = 0;
2c541554
AK
2984 if (!h0->first_field) {
2985 if (h->cur_pic_ptr && !h->droppable &&
2986 h->cur_pic_ptr->owner2 == h) {
2987 ff_thread_report_progress(&h->cur_pic_ptr->f, INT_MAX,
2988 h->picture_structure == PICT_BOTTOM_FIELD);
1e26a48f 2989 }
2c541554 2990 h->cur_pic_ptr = NULL;
1e26a48f 2991 }
66a4b2c1
MN
2992 }
2993
2c541554 2994 slice_type = get_ue_golomb_31(&h->gb);
e5d40372 2995 if (slice_type > 9) {
2c541554 2996 av_log(h->avctx, AV_LOG_ERROR,
e5d40372 2997 "slice type too large (%d) at %d %d\n",
2c541554 2998 h->slice_type, h->mb_x, h->mb_y);
5175b937 2999 return -1;
0da71265 3000 }
e5d40372 3001 if (slice_type > 4) {
0bf79634 3002 slice_type -= 5;
e5d40372
DB
3003 h->slice_type_fixed = 1;
3004 } else
3005 h->slice_type_fixed = 0;
115329f1 3006
e5d40372
DB
3007 slice_type = golomb_to_pict_type[slice_type];
3008 if (slice_type == AV_PICTURE_TYPE_I ||
3009 (h0->current_slice != 0 && slice_type == h0->last_slice_type)) {
0bf79634
LLL
3010 default_ref_list_done = 1;
3011 }
e5d40372
DB
3012 h->slice_type = slice_type;
3013 h->slice_type_nos = slice_type & 3;
0bf79634 3014
e5d40372 3015 // to make a few old functions happy, it's wrong though
2c541554 3016 h->pict_type = h->slice_type;
115329f1 3017
2c541554 3018 pps_id = get_ue_golomb(&h->gb);
e5d40372 3019 if (pps_id >= MAX_PPS_COUNT) {
2c541554 3020 av_log(h->avctx, AV_LOG_ERROR, "pps_id out of range\n");
0da71265
MN
3021 return -1;
3022 }
e5d40372 3023 if (!h0->pps_buffers[pps_id]) {
2c541554 3024 av_log(h->avctx, AV_LOG_ERROR,
e5d40372
DB
3025 "non-existing PPS %u referenced\n",
3026 pps_id);
8b92b792
MN
3027 return -1;
3028 }
e5d40372 3029 h->pps = *h0->pps_buffers[pps_id];
8b92b792 3030
e5d40372 3031 if (!h0->sps_buffers[h->pps.sps_id]) {
2c541554 3032 av_log(h->avctx, AV_LOG_ERROR,
e5d40372
DB
3033 "non-existing SPS %u referenced\n",
3034 h->pps.sps_id);
8b92b792
MN
3035 return -1;
3036 }
072be3e8
JG
3037
3038 if (h->pps.sps_id != h->current_sps_id ||
3039 h0->sps_buffers[h->pps.sps_id]->new) {
3040 h0->sps_buffers[h->pps.sps_id]->new = 0;
3041
3042 h->current_sps_id = h->pps.sps_id;
3043 h->sps = *h0->sps_buffers[h->pps.sps_id];
3044
2c541554
AK
3045 if (h->bit_depth_luma != h->sps.bit_depth_luma ||
3046 h->chroma_format_idc != h->sps.chroma_format_idc) {
3047 h->bit_depth_luma = h->sps.bit_depth_luma;
3048 h->chroma_format_idc = h->sps.chroma_format_idc;
3049 needs_reinit = 1;
3050 }
072be3e8
JG
3051 if ((ret = h264_set_parameter_from_sps(h)) < 0)
3052 return ret;
3053 }
239ea04c 3054
2c541554
AK
3055 h->avctx->profile = ff_h264_get_profile(&h->sps);
3056 h->avctx->level = h->sps.level_idc;
3057 h->avctx->refs = h->sps.ref_frame_count;
b08e38e8 3058
2c541554
AK
3059 if (h->mb_width != h->sps.mb_width ||
3060 h->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag))
9e696d2e
JG
3061 needs_reinit = 1;
3062
2c541554
AK
3063 h->mb_width = h->sps.mb_width;
3064 h->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3065 h->mb_num = h->mb_width * h->mb_height;
3066 h->mb_stride = h->mb_width + 1;
115329f1 3067
2c541554 3068 h->b_stride = h->mb_width * 4;
0da71265 3069
2c541554 3070 h->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
76741b0e 3071
2c541554 3072 h->width = 16 * h->mb_width - (2 >> CHROMA444) * FFMIN(h->sps.crop_right, (8 << CHROMA444) - 1);
e5d40372 3073 if (h->sps.frame_mbs_only_flag)
2c541554 3074 h->height = 16 * h->mb_height - (1 << h->chroma_y_shift) * FFMIN(h->sps.crop_bottom, (16 >> h->chroma_y_shift) - 1);
0da71265 3075 else
2c541554 3076 h->height = 16 * h->mb_height - (2 << h->chroma_y_shift) * FFMIN(h->sps.crop_bottom, (16 >> h->chroma_y_shift) - 1);
115329f1 3077
2c541554
AK
3078 if (FFALIGN(h->avctx->width, 16) == h->width &&
3079 FFALIGN(h->avctx->height, 16) == h->height) {
3080 h->width = h->avctx->width;
3081 h->height = h->avctx->height;
30f51509
MR
3082 }
3083
9e696d2e 3084 if (h->sps.video_signal_type_present_flag) {
2c541554 3085 h->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG
9e696d2e
JG
3086 : AVCOL_RANGE_MPEG;
3087 if (h->sps.colour_description_present_flag) {
2c541554 3088 if (h->avctx->colorspace != h->sps.colorspace)
9ac44ad9 3089 needs_reinit = 1;
2c541554
AK
3090 h->avctx->color_primaries = h->sps.color_primaries;
3091 h->avctx->color_trc = h->sps.color_trc;
3092 h->avctx->colorspace = h->sps.colorspace;
6a9c8594 3093 }
0da71265 3094 }
f3bdc3da 3095
2c541554
AK
3096 if (h->context_initialized &&
3097 (h->width != h->avctx->width ||
3098 h->height != h->avctx->height ||
3099 needs_reinit)) {
d545cf80 3100
9e696d2e 3101 if (h != h0) {
2c541554 3102 av_log(h->avctx, AV_LOG_ERROR, "changing width/height on "
9e696d2e 3103 "slice %d\n", h0->current_slice + 1);
e5d40372 3104 return AVERROR_INVALIDDATA;
d545cf80
OA
3105 }
3106
9e696d2e 3107 flush_change(h);
115329f1 3108
9ac44ad9
JG
3109 if ((ret = get_pixel_format(h)) < 0)
3110 return ret;
2c541554 3111 h->avctx->pix_fmt = ret;
9ac44ad9 3112
2c541554
AK
3113 av_log(h->avctx, AV_LOG_INFO, "Reinit context to %dx%d, "
3114 "pix_fmt: %d\n", h->width, h->height, h->avctx->pix_fmt);
9e696d2e
JG
3115
3116 if ((ret = h264_slice_header_init(h, 1)) < 0) {
2c541554 3117 av_log(h->avctx, AV_LOG_ERROR,
9e696d2e
JG
3118 "h264_slice_header_init() failed\n");
3119 return ret;
bac3ab13 3120 }
9e696d2e 3121 }
2c541554 3122 if (!h->context_initialized) {
9e696d2e 3123 if (h != h0) {
2c541554 3124 av_log(h->avctx, AV_LOG_ERROR,
9e696d2e
JG
3125 "Cannot (re-)initialize context during parallel decoding.\n");
3126 return -1;
3127 }
9ac44ad9
JG
3128
3129 if ((ret = get_pixel_format(h)) < 0)
3130 return ret;
2c541554 3131 h->avctx->pix_fmt = ret;
9ac44ad9 3132
9e696d2e 3133 if ((ret = h264_slice_header_init(h, 0)) < 0) {
2c541554 3134 av_log(h->avctx, AV_LOG_ERROR,
9e696d2e
JG
3135 "h264_slice_header_init() failed\n");
3136 return ret;
6a9c8594 3137 }
0da71265
MN
3138 }
3139
e5d40372 3140 if (h == h0 && h->dequant_coeff_pps != pps_id) {
0ce4fe48
RB
3141 h->dequant_coeff_pps = pps_id;
3142 init_dequant_tables(h);
3143 }
3144
2c541554 3145 h->frame_num = get_bits(&h->gb, h->sps.log2_max_frame_num);
0da71265 3146
e5d40372
DB
3147 h->mb_mbaff = 0;
3148 h->mb_aff_frame = 0;
2c541554
AK
3149 last_pic_structure = h0->picture_structure;
3150 last_pic_droppable = h0->droppable;
3151 h->droppable = h->nal_ref_idc == 0;
e5d40372 3152 if (h->sps.frame_mbs_only_flag) {
2c541554 3153 h->picture_structure = PICT_FRAME;
e5d40372 3154 } else {
2c541554
AK
3155 if (get_bits1(&h->gb)) { // field_pic_flag
3156 h->picture_structure = PICT_TOP_FIELD + get_bits1(&h->gb); // bottom_field_flag
6ba71fc4 3157 } else {
2c541554 3158 h->picture_structure = PICT_FRAME;
e5d40372 3159 h->mb_aff_frame = h->sps.mb_aff;
6867a90b 3160 }
0da71265 3161 }
2c541554 3162 h->mb_field_decoding_flag = h->picture_structure != PICT_FRAME;
2ddcf84b 3163
1e26a48f 3164 if (h0->current_slice != 0) {
2c541554
AK
3165 if (last_pic_structure != h->picture_structure ||
3166 last_pic_droppable != h->droppable) {
3167 av_log(h->avctx, AV_LOG_ERROR,
1e26a48f 3168 "Changing field mode (%d -> %d) between slices is not allowed\n",
2c541554
AK