Commit | Line | Data |
---|---|---|
0da71265 | 1 | /* |
ff3d4310 | 2 | * H.26L/H.264/AVC/JVT/14496-10/... decoder |
0da71265 MN |
3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
4 | * | |
2912e87a | 5 | * This file is part of Libav. |
b78e7197 | 6 | * |
2912e87a | 7 | * Libav is free software; you can redistribute it and/or |
0da71265 MN |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either | |
b78e7197 | 10 | * version 2.1 of the License, or (at your option) any later version. |
0da71265 | 11 | * |
2912e87a | 12 | * Libav is distributed in the hope that it will be useful, |
0da71265 MN |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
2912e87a | 18 | * License along with Libav; if not, write to the Free Software |
5509bffa | 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
0da71265 | 20 | */ |
115329f1 | 21 | |
0da71265 | 22 | /** |
ba87f080 | 23 | * @file |
0da71265 MN |
24 | * H.264 / AVC / MPEG4 part10 codec. |
25 | * @author Michael Niedermayer <michaelni@gmx.at> | |
26 | */ | |
27 | ||
737eb597 | 28 | #include "libavutil/imgutils.h" |
40e5d31b | 29 | #include "internal.h" |
55b9ef18 DB |
30 | #include "cabac.h" |
31 | #include "cabac_functions.h" | |
0da71265 MN |
32 | #include "dsputil.h" |
33 | #include "avcodec.h" | |
34 | #include "mpegvideo.h" | |
26b4fe82 | 35 | #include "h264.h" |
0da71265 | 36 | #include "h264data.h" |
188d3c51 | 37 | #include "h264_mvpred.h" |
0da71265 | 38 | #include "golomb.h" |
199436b9 | 39 | #include "mathops.h" |
626464fb | 40 | #include "rectangle.h" |
6a9c8594 | 41 | #include "thread.h" |
369122dd | 42 | #include "vdpau_internal.h" |
cfa5a81e | 43 | #include "libavutil/avassert.h" |
0da71265 | 44 | |
e5d40372 | 45 | // #undef NDEBUG |
0da71265 MN |
46 | #include <assert.h> |
47 | ||
0becb078 DB |
48 | const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 }; |
49 | ||
e5d40372 DB |
50 | static const uint8_t rem6[QP_MAX_NUM + 1] = { |
51 | 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, | |
52 | 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, | |
53 | 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, | |
acd8d10f PI |
54 | }; |
55 | ||
e5d40372 DB |
56 | static const uint8_t div6[QP_MAX_NUM + 1] = { |
57 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, | |
58 | 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, | |
59 | 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, | |
acd8d10f PI |
60 | }; |
61 | ||
716d413c AK |
62 | static const enum AVPixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = { |
63 | AV_PIX_FMT_DXVA2_VLD, | |
64 | AV_PIX_FMT_VAAPI_VLD, | |
65 | AV_PIX_FMT_VDA_VLD, | |
66 | AV_PIX_FMT_YUVJ420P, | |
67 | AV_PIX_FMT_NONE | |
0435fb16 BC |
68 | }; |
69 | ||
0da71265 | 70 | /** |
58c42af7 DB |
71 | * Check if the top & left blocks are available if needed and |
72 | * change the dc mode so it only uses the available blocks. | |
0da71265 | 73 | */ |
e5d40372 DB |
74 | int ff_h264_check_intra4x4_pred_mode(H264Context *h) |
75 | { | |
76 | MpegEncContext *const s = &h->s; | |
77 | static const int8_t top[12] = { | |
78 | -1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0 | |
79 | }; | |
80 | static const int8_t left[12] = { | |
81 | 0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED | |
82 | }; | |
2bedc0e8 MN |
83 | int i; |
84 | ||
e5d40372 DB |
85 | if (!(h->top_samples_available & 0x8000)) { |
86 | for (i = 0; i < 4; i++) { | |
87 | int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]]; | |
88 | if (status < 0) { | |
89 | av_log(h->s.avctx, AV_LOG_ERROR, | |
90 | "top block unavailable for requested intra4x4 mode %d at %d %d\n", | |
91 | status, s->mb_x, s->mb_y); | |
2bedc0e8 | 92 | return -1; |
e5d40372 DB |
93 | } else if (status) { |
94 | h->intra4x4_pred_mode_cache[scan8[0] + i] = status; | |
2bedc0e8 MN |
95 | } |
96 | } | |
97 | } | |
98 | ||
e5d40372 DB |
99 | if ((h->left_samples_available & 0x8888) != 0x8888) { |
100 | static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 }; | |
101 | for (i = 0; i < 4; i++) | |
102 | if (!(h->left_samples_available & mask[i])) { | |
103 | int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]]; | |
104 | if (status < 0) { | |
105 | av_log(h->s.avctx, AV_LOG_ERROR, | |
106 | "left block unavailable for requested intra4x4 mode %d at %d %d\n", | |
107 | status, s->mb_x, s->mb_y); | |
2bedc0e8 | 108 | return -1; |
e5d40372 DB |
109 | } else if (status) { |
110 | h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status; | |
2bedc0e8 MN |
111 | } |
112 | } | |
2bedc0e8 MN |
113 | } |
114 | ||
115 | return 0; | |
e5d40372 | 116 | } // FIXME cleanup like ff_h264_check_intra_pred_mode |
2bedc0e8 MN |
117 | |
118 | /** | |
58c42af7 DB |
119 | * Check if the top & left blocks are available if needed and |
120 | * change the dc mode so it only uses the available blocks. | |
2bedc0e8 | 121 | */ |
e5d40372 DB |
122 | int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma) |
123 | { | |
124 | MpegEncContext *const s = &h->s; | |
125 | static const int8_t top[7] = { LEFT_DC_PRED8x8, 1, -1, -1 }; | |
126 | static const int8_t left[7] = { TOP_DC_PRED8x8, -1, 2, -1, DC_128_PRED8x8 }; | |
127 | ||
128 | if (mode > 6U) { | |
129 | av_log(h->s.avctx, AV_LOG_ERROR, | |
130 | "out of range intra chroma pred mode at %d %d\n", | |
131 | s->mb_x, s->mb_y); | |
7440fe83 | 132 | return -1; |
5175b937 | 133 | } |
115329f1 | 134 | |
e5d40372 DB |
135 | if (!(h->top_samples_available & 0x8000)) { |
136 | mode = top[mode]; | |
137 | if (mode < 0) { | |
138 | av_log(h->s.avctx, AV_LOG_ERROR, | |
139 | "top block unavailable for requested intra mode at %d %d\n", | |
140 | s->mb_x, s->mb_y); | |
0da71265 MN |
141 | return -1; |
142 | } | |
143 | } | |
115329f1 | 144 | |
e5d40372 DB |
145 | if ((h->left_samples_available & 0x8080) != 0x8080) { |
146 | mode = left[mode]; | |
147 | if (is_chroma && (h->left_samples_available & 0x8080)) { | |
148 | // mad cow disease mode, aka MBAFF + constrained_intra_pred | |
149 | mode = ALZHEIMER_DC_L0T_PRED8x8 + | |
150 | (!(h->left_samples_available & 0x8000)) + | |
151 | 2 * (mode == DC_128_PRED8x8); | |
d1d10e91 | 152 | } |
e5d40372 DB |
153 | if (mode < 0) { |
154 | av_log(h->s.avctx, AV_LOG_ERROR, | |
155 | "left block unavailable for requested intra mode at %d %d\n", | |
156 | s->mb_x, s->mb_y); | |
0da71265 | 157 | return -1; |
115329f1 | 158 | } |
0da71265 MN |
159 | } |
160 | ||
161 | return mode; | |
162 | } | |
163 | ||
e5d40372 DB |
164 | const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, |
165 | int *dst_length, int *consumed, int length) | |
166 | { | |
0da71265 MN |
167 | int i, si, di; |
168 | uint8_t *dst; | |
24456882 | 169 | int bufidx; |
0da71265 | 170 | |
e5d40372 DB |
171 | // src[0]&0x80; // forbidden bit |
172 | h->nal_ref_idc = src[0] >> 5; | |
173 | h->nal_unit_type = src[0] & 0x1F; | |
0da71265 | 174 | |
e5d40372 DB |
175 | src++; |
176 | length--; | |
e08715d3 | 177 | |
58db34aa RB |
178 | #define STARTCODE_TEST \ |
179 | if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \ | |
180 | if (src[i + 2] != 3) { \ | |
181 | /* startcode, so we must be past the end */ \ | |
182 | length = i; \ | |
183 | } \ | |
184 | break; \ | |
185 | } | |
b250f9c6 | 186 | #if HAVE_FAST_UNALIGNED |
58db34aa RB |
187 | #define FIND_FIRST_ZERO \ |
188 | if (i > 0 && !src[i]) \ | |
189 | i--; \ | |
190 | while (src[i]) \ | |
191 | i++ | |
e5d40372 | 192 | #if HAVE_FAST_64BIT |
e5d40372 DB |
193 | for (i = 0; i + 1 < length; i += 9) { |
194 | if (!((~AV_RN64A(src + i) & | |
195 | (AV_RN64A(src + i) - 0x0100010001000101ULL)) & | |
196 | 0x8000800080008080ULL)) | |
58db34aa RB |
197 | continue; |
198 | FIND_FIRST_ZERO; | |
199 | STARTCODE_TEST; | |
200 | i -= 7; | |
201 | } | |
e5d40372 | 202 | #else |
e5d40372 DB |
203 | for (i = 0; i + 1 < length; i += 5) { |
204 | if (!((~AV_RN32A(src + i) & | |
205 | (AV_RN32A(src + i) - 0x01000101U)) & | |
206 | 0x80008080U)) | |
e08715d3 | 207 | continue; |
58db34aa RB |
208 | FIND_FIRST_ZERO; |
209 | STARTCODE_TEST; | |
210 | i -= 3; | |
211 | } | |
212 | #endif | |
e08715d3 | 213 | #else |
e5d40372 DB |
214 | for (i = 0; i + 1 < length; i += 2) { |
215 | if (src[i]) | |
216 | continue; | |
217 | if (i > 0 && src[i - 1] == 0) | |
218 | i--; | |
58db34aa | 219 | STARTCODE_TEST; |
0da71265 | 220 | } |
58db34aa | 221 | #endif |
0da71265 | 222 | |
e5d40372 DB |
223 | if (i >= length - 1) { // no escaped 0 |
224 | *dst_length = length; | |
225 | *consumed = length + 1; // +1 for the header | |
115329f1 | 226 | return src; |
0da71265 MN |
227 | } |
228 | ||
e5d40372 DB |
229 | // use second escape buffer for inter data |
230 | bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; | |
231 | av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], | |
232 | length + FF_INPUT_BUFFER_PADDING_SIZE); | |
233 | dst = h->rbsp_buffer[bufidx]; | |
0da71265 | 234 | |
e5d40372 | 235 | if (dst == NULL) |
ac658be5 | 236 | return NULL; |
ac658be5 | 237 | |
593af7cd | 238 | memcpy(dst, src, i); |
e5d40372 DB |
239 | si = di = i; |
240 | while (si + 2 < length) { | |
241 | // remove escapes (very rare 1:2^22) | |
242 | if (src[si + 2] > 3) { | |
243 | dst[di++] = src[si++]; | |
244 | dst[di++] = src[si++]; | |
245 | } else if (src[si] == 0 && src[si + 1] == 0) { | |
246 | if (src[si + 2] == 3) { // escape | |
247 | dst[di++] = 0; | |
248 | dst[di++] = 0; | |
249 | si += 3; | |
c8470cc1 | 250 | continue; |
e5d40372 | 251 | } else // next start code |
593af7cd | 252 | goto nsc; |
0da71265 MN |
253 | } |
254 | ||
e5d40372 | 255 | dst[di++] = src[si++]; |
0da71265 | 256 | } |
e5d40372 DB |
257 | while (si < length) |
258 | dst[di++] = src[si++]; | |
593af7cd | 259 | nsc: |
0da71265 | 260 | |
e5d40372 | 261 | memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE); |
d4369630 | 262 | |
e5d40372 DB |
263 | *dst_length = di; |
264 | *consumed = si + 1; // +1 for the header | |
265 | /* FIXME store exact number of bits in the getbitcontext | |
266 | * (it is needed for decoding) */ | |
0da71265 MN |
267 | return dst; |
268 | } | |
269 | ||
85297319 DEP |
270 | /** |
271 | * Identify the exact end of the bitstream | |
272 | * @return the length of the trailing, or 0 if damaged | |
273 | */ | |
b691fd7a | 274 | static int decode_rbsp_trailing(H264Context *h, const uint8_t *src) |
e5d40372 DB |
275 | { |
276 | int v = *src; | |
0da71265 MN |
277 | int r; |
278 | ||
a9c9a240 | 279 | tprintf(h->s.avctx, "rbsp trailing %X\n", v); |
0da71265 | 280 | |
e5d40372 DB |
281 | for (r = 1; r < 9; r++) { |
282 | if (v & 1) | |
283 | return r; | |
284 | v >>= 1; | |
0da71265 MN |
285 | } |
286 | return 0; | |
287 | } | |
288 | ||
e5d40372 DB |
289 | static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, |
290 | int height, int y_offset, int list) | |
291 | { | |
292 | int raw_my = h->mv_cache[list][scan8[n]][1]; | |
293 | int filter_height = (raw_my & 3) ? 2 : 0; | |
294 | int full_my = (raw_my >> 2) + y_offset; | |
295 | int top = full_my - filter_height; | |
296 | int bottom = full_my + filter_height + height; | |
6a9c8594 AS |
297 | |
298 | return FFMAX(abs(top), bottom); | |
299 | } | |
300 | ||
e5d40372 DB |
301 | static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, |
302 | int height, int y_offset, int list0, | |
303 | int list1, int *nrefs) | |
304 | { | |
305 | MpegEncContext *const s = &h->s; | |
6a9c8594 AS |
306 | int my; |
307 | ||
e5d40372 | 308 | y_offset += 16 * (s->mb_y >> MB_FIELD); |
6a9c8594 | 309 | |
e5d40372 DB |
310 | if (list0) { |
311 | int ref_n = h->ref_cache[0][scan8[n]]; | |
312 | Picture *ref = &h->ref_list[0][ref_n]; | |
6a9c8594 AS |
313 | |
314 | // Error resilience puts the current picture in the ref list. | |
315 | // Don't try to wait on these as it will cause a deadlock. | |
316 | // Fields can wait on each other, though. | |
e5d40372 DB |
317 | if (ref->f.thread_opaque != s->current_picture.f.thread_opaque || |
318 | (ref->f.reference & 3) != s->picture_structure) { | |
6a9c8594 | 319 | my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0); |
e5d40372 DB |
320 | if (refs[0][ref_n] < 0) |
321 | nrefs[0] += 1; | |
6a9c8594 AS |
322 | refs[0][ref_n] = FFMAX(refs[0][ref_n], my); |
323 | } | |
324 | } | |
325 | ||
e5d40372 DB |
326 | if (list1) { |
327 | int ref_n = h->ref_cache[1][scan8[n]]; | |
328 | Picture *ref = &h->ref_list[1][ref_n]; | |
6a9c8594 | 329 | |
e5d40372 DB |
330 | if (ref->f.thread_opaque != s->current_picture.f.thread_opaque || |
331 | (ref->f.reference & 3) != s->picture_structure) { | |
6a9c8594 | 332 | my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1); |
e5d40372 DB |
333 | if (refs[1][ref_n] < 0) |
334 | nrefs[1] += 1; | |
6a9c8594 AS |
335 | refs[1][ref_n] = FFMAX(refs[1][ref_n], my); |
336 | } | |
337 | } | |
338 | } | |
339 | ||
340 | /** | |
341 | * Wait until all reference frames are available for MC operations. | |
342 | * | |
343 | * @param h the H264 context | |
344 | */ | |
e5d40372 DB |
345 | static void await_references(H264Context *h) |
346 | { | |
347 | MpegEncContext *const s = &h->s; | |
348 | const int mb_xy = h->mb_xy; | |
657ccb5a | 349 | const int mb_type = s->current_picture.f.mb_type[mb_xy]; |
6a9c8594 | 350 | int refs[2][48]; |
e5d40372 | 351 | int nrefs[2] = { 0 }; |
6a9c8594 AS |
352 | int ref, list; |
353 | ||
354 | memset(refs, -1, sizeof(refs)); | |
355 | ||
e5d40372 | 356 | if (IS_16X16(mb_type)) { |
6a9c8594 | 357 | get_lowest_part_y(h, refs, 0, 16, 0, |
e5d40372 DB |
358 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
359 | } else if (IS_16X8(mb_type)) { | |
6a9c8594 | 360 | get_lowest_part_y(h, refs, 0, 8, 0, |
e5d40372 | 361 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
6a9c8594 | 362 | get_lowest_part_y(h, refs, 8, 8, 8, |
e5d40372 DB |
363 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); |
364 | } else if (IS_8X16(mb_type)) { | |
6a9c8594 | 365 | get_lowest_part_y(h, refs, 0, 16, 0, |
e5d40372 | 366 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
6a9c8594 | 367 | get_lowest_part_y(h, refs, 4, 16, 0, |
e5d40372 DB |
368 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); |
369 | } else { | |
6a9c8594 AS |
370 | int i; |
371 | ||
372 | assert(IS_8X8(mb_type)); | |
373 | ||
e5d40372 DB |
374 | for (i = 0; i < 4; i++) { |
375 | const int sub_mb_type = h->sub_mb_type[i]; | |
376 | const int n = 4 * i; | |
377 | int y_offset = (i & 2) << 2; | |
378 | ||
379 | if (IS_SUB_8X8(sub_mb_type)) { | |
380 | get_lowest_part_y(h, refs, n, 8, y_offset, | |
381 | IS_DIR(sub_mb_type, 0, 0), | |
382 | IS_DIR(sub_mb_type, 0, 1), | |
383 | nrefs); | |
384 | } else if (IS_SUB_8X4(sub_mb_type)) { | |
385 | get_lowest_part_y(h, refs, n, 4, y_offset, | |
386 | IS_DIR(sub_mb_type, 0, 0), | |
387 | IS_DIR(sub_mb_type, 0, 1), | |
388 | nrefs); | |
389 | get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4, | |
390 | IS_DIR(sub_mb_type, 0, 0), | |
391 | IS_DIR(sub_mb_type, 0, 1), | |
392 | nrefs); | |
393 | } else if (IS_SUB_4X8(sub_mb_type)) { | |
394 | get_lowest_part_y(h, refs, n, 8, y_offset, | |
395 | IS_DIR(sub_mb_type, 0, 0), | |
396 | IS_DIR(sub_mb_type, 0, 1), | |
397 | nrefs); | |
398 | get_lowest_part_y(h, refs, n + 1, 8, y_offset, | |
399 | IS_DIR(sub_mb_type, 0, 0), | |
400 | IS_DIR(sub_mb_type, 0, 1), | |
401 | nrefs); | |
402 | } else { | |
6a9c8594 AS |
403 | int j; |
404 | assert(IS_SUB_4X4(sub_mb_type)); | |
e5d40372 DB |
405 | for (j = 0; j < 4; j++) { |
406 | int sub_y_offset = y_offset + 2 * (j & 2); | |
407 | get_lowest_part_y(h, refs, n + j, 4, sub_y_offset, | |
408 | IS_DIR(sub_mb_type, 0, 0), | |
409 | IS_DIR(sub_mb_type, 0, 1), | |
410 | nrefs); | |
6a9c8594 AS |
411 | } |
412 | } | |
413 | } | |
414 | } | |
415 | ||
e5d40372 DB |
416 | for (list = h->list_count - 1; list >= 0; list--) |
417 | for (ref = 0; ref < 48 && nrefs[list]; ref++) { | |
6a9c8594 | 418 | int row = refs[list][ref]; |
e5d40372 DB |
419 | if (row >= 0) { |
420 | Picture *ref_pic = &h->ref_list[list][ref]; | |
421 | int ref_field = ref_pic->f.reference - 1; | |
6a9c8594 | 422 | int ref_field_picture = ref_pic->field_picture; |
e5d40372 | 423 | int pic_height = 16 * s->mb_height >> ref_field_picture; |
6a9c8594 AS |
424 | |
425 | row <<= MB_MBAFF; | |
426 | nrefs[list]--; | |
427 | ||
e5d40372 DB |
428 | if (!FIELD_PICTURE && ref_field_picture) { // frame referencing two fields |
429 | ff_thread_await_progress(&ref_pic->f, | |
430 | FFMIN((row >> 1) - !(row & 1), | |
431 | pic_height - 1), | |
432 | 1); | |
433 | ff_thread_await_progress(&ref_pic->f, | |
434 | FFMIN((row >> 1), pic_height - 1), | |
435 | 0); | |
436 | } else if (FIELD_PICTURE && !ref_field_picture) { // field referencing one field of a frame | |
437 | ff_thread_await_progress(&ref_pic->f, | |
438 | FFMIN(row * 2 + ref_field, | |
439 | pic_height - 1), | |
440 | 0); | |
441 | } else if (FIELD_PICTURE) { | |
442 | ff_thread_await_progress(&ref_pic->f, | |
443 | FFMIN(row, pic_height - 1), | |
444 | ref_field); | |
445 | } else { | |
446 | ff_thread_await_progress(&ref_pic->f, | |
447 | FFMIN(row, pic_height - 1), | |
448 | 0); | |
6a9c8594 AS |
449 | } |
450 | } | |
451 | } | |
66c6b5e2 | 452 | } |
66c6b5e2 | 453 | |
e5d40372 DB |
454 | static av_always_inline void mc_dir_part(H264Context *h, Picture *pic, |
455 | int n, int square, int height, | |
456 | int delta, int list, | |
457 | uint8_t *dest_y, uint8_t *dest_cb, | |
458 | uint8_t *dest_cr, | |
459 | int src_x_offset, int src_y_offset, | |
460 | qpel_mc_func *qpix_op, | |
461 | h264_chroma_mc_func chroma_op, | |
462 | int pixel_shift, int chroma_idc) | |
05fb63f5 | 463 | { |
e5d40372 DB |
464 | MpegEncContext *const s = &h->s; |
465 | const int mx = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8; | |
466 | int my = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8; | |
467 | const int luma_xy = (mx & 3) + ((my & 3) << 2); | |
468 | int offset = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize; | |
469 | uint8_t *src_y = pic->f.data[0] + offset; | |
470 | uint8_t *src_cb, *src_cr; | |
471 | int extra_width = h->emu_edge_width; | |
472 | int extra_height = h->emu_edge_height; | |
473 | int emu = 0; | |
474 | const int full_mx = mx >> 2; | |
475 | const int full_my = my >> 2; | |
476 | const int pic_width = 16 * s->mb_width; | |
477 | const int pic_height = 16 * s->mb_height >> MB_FIELD; | |
229d263c | 478 | int ysh; |
115329f1 | 479 | |
e5d40372 DB |
480 | if (mx & 7) |
481 | extra_width -= 3; | |
482 | if (my & 7) | |
483 | extra_height -= 3; | |
484 | ||
485 | if (full_mx < 0 - extra_width || | |
486 | full_my < 0 - extra_height || | |
487 | full_mx + 16 /*FIXME*/ > pic_width + extra_width || | |
488 | full_my + 16 /*FIXME*/ > pic_height + extra_height) { | |
489 | s->dsp.emulated_edge_mc(s->edge_emu_buffer, | |
490 | src_y - (2 << pixel_shift) - 2 * h->mb_linesize, | |
491 | h->mb_linesize, | |
492 | 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, | |
493 | full_my - 2, pic_width, pic_height); | |
494 | src_y = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; | |
495 | emu = 1; | |
496 | } | |
497 | ||
498 | qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps? | |
499 | if (!square) | |
5d18eaad | 500 | qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); |
115329f1 | 501 | |
e5d40372 DB |
502 | if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY) |
503 | return; | |
7b442ad9 | 504 | |
e5d40372 | 505 | if (chroma_idc == 3 /* yuv444 */) { |
657ccb5a | 506 | src_cb = pic->f.data[1] + offset; |
e5d40372 DB |
507 | if (emu) { |
508 | s->dsp.emulated_edge_mc(s->edge_emu_buffer, | |
509 | src_cb - (2 << pixel_shift) - 2 * h->mb_linesize, | |
510 | h->mb_linesize, | |
511 | 16 + 5, 16 + 5 /*FIXME*/, | |
512 | full_mx - 2, full_my - 2, | |
513 | pic_width, pic_height); | |
514 | src_cb = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; | |
c90b9442 | 515 | } |
e5d40372 DB |
516 | qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps? |
517 | if (!square) | |
c90b9442 | 518 | qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize); |
c90b9442 | 519 | |
657ccb5a | 520 | src_cr = pic->f.data[2] + offset; |
e5d40372 DB |
521 | if (emu) { |
522 | s->dsp.emulated_edge_mc(s->edge_emu_buffer, | |
523 | src_cr - (2 << pixel_shift) - 2 * h->mb_linesize, | |
524 | h->mb_linesize, | |
525 | 16 + 5, 16 + 5 /*FIXME*/, | |
526 | full_mx - 2, full_my - 2, | |
527 | pic_width, pic_height); | |
528 | src_cr = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; | |
c90b9442 | 529 | } |
e5d40372 DB |
530 | qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps? |
531 | if (!square) | |
c90b9442 | 532 | qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize); |
c90b9442 JGG |
533 | return; |
534 | } | |
535 | ||
05fb63f5 | 536 | ysh = 3 - (chroma_idc == 2 /* yuv422 */); |
e5d40372 | 537 | if (chroma_idc == 1 /* yuv420 */ && MB_FIELD) { |
5d18eaad | 538 | // chroma offset when predicting from a field of opposite parity |
e5d40372 DB |
539 | my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1)); |
540 | emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1); | |
5d18eaad | 541 | } |
229d263c | 542 | |
e5d40372 DB |
543 | src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + |
544 | (my >> ysh) * h->mb_uvlinesize; | |
545 | src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + | |
546 | (my >> ysh) * h->mb_uvlinesize; | |
5d18eaad | 547 | |
e5d40372 | 548 | if (emu) { |
05fb63f5 RB |
549 | s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, |
550 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), | |
551 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |
e5d40372 | 552 | src_cb = s->edge_emu_buffer; |
0da71265 | 553 | } |
e5d40372 DB |
554 | chroma_op(dest_cb, src_cb, h->mb_uvlinesize, |
555 | height >> (chroma_idc == 1 /* yuv420 */), | |
556 | mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7); | |
0da71265 | 557 | |
e5d40372 | 558 | if (emu) { |
05fb63f5 RB |
559 | s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, |
560 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), | |
561 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |
e5d40372 | 562 | src_cr = s->edge_emu_buffer; |
0da71265 | 563 | } |
05fb63f5 | 564 | chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), |
e5d40372 | 565 | mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7); |
0da71265 MN |
566 | } |
567 | ||
e5d40372 DB |
568 | static av_always_inline void mc_part_std(H264Context *h, int n, int square, |
569 | int height, int delta, | |
570 | uint8_t *dest_y, uint8_t *dest_cb, | |
571 | uint8_t *dest_cr, | |
572 | int x_offset, int y_offset, | |
573 | qpel_mc_func *qpix_put, | |
574 | h264_chroma_mc_func chroma_put, | |
575 | qpel_mc_func *qpix_avg, | |
576 | h264_chroma_mc_func chroma_avg, | |
577 | int list0, int list1, | |
578 | int pixel_shift, int chroma_idc) | |
05fb63f5 | 579 | { |
e5d40372 DB |
580 | MpegEncContext *const s = &h->s; |
581 | qpel_mc_func *qpix_op = qpix_put; | |
582 | h264_chroma_mc_func chroma_op = chroma_put; | |
115329f1 | 583 | |
e5d40372 | 584 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
05fb63f5 | 585 | if (chroma_idc == 3 /* yuv444 */) { |
e5d40372 DB |
586 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
587 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; | |
05fb63f5 | 588 | } else if (chroma_idc == 2 /* yuv422 */) { |
e5d40372 DB |
589 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
590 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; | |
591 | } else { /* yuv420 */ | |
592 | dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; | |
593 | dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; | |
c90b9442 | 594 | } |
e5d40372 DB |
595 | x_offset += 8 * s->mb_x; |
596 | y_offset += 8 * (s->mb_y >> MB_FIELD); | |
115329f1 | 597 | |
e5d40372 DB |
598 | if (list0) { |
599 | Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]]; | |
c2d33742 | 600 | mc_dir_part(h, ref, n, square, height, delta, 0, |
e5d40372 DB |
601 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
602 | qpix_op, chroma_op, pixel_shift, chroma_idc); | |
0da71265 | 603 | |
e5d40372 DB |
604 | qpix_op = qpix_avg; |
605 | chroma_op = chroma_avg; | |
0da71265 MN |
606 | } |
607 | ||
e5d40372 DB |
608 | if (list1) { |
609 | Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]]; | |
c2d33742 | 610 | mc_dir_part(h, ref, n, square, height, delta, 1, |
e5d40372 DB |
611 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
612 | qpix_op, chroma_op, pixel_shift, chroma_idc); | |
0da71265 MN |
613 | } |
614 | } | |
615 | ||
e5d40372 DB |
616 | static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, |
617 | int height, int delta, | |
618 | uint8_t *dest_y, uint8_t *dest_cb, | |
619 | uint8_t *dest_cr, | |
620 | int x_offset, int y_offset, | |
621 | qpel_mc_func *qpix_put, | |
622 | h264_chroma_mc_func chroma_put, | |
623 | h264_weight_func luma_weight_op, | |
624 | h264_weight_func chroma_weight_op, | |
625 | h264_biweight_func luma_weight_avg, | |
626 | h264_biweight_func chroma_weight_avg, | |
627 | int list0, int list1, | |
628 | int pixel_shift, int chroma_idc) | |
629 | { | |
630 | MpegEncContext *const s = &h->s; | |
c2d33742 | 631 | int chroma_height; |
9f2d1b4f | 632 | |
e5d40372 | 633 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
05fb63f5 | 634 | if (chroma_idc == 3 /* yuv444 */) { |
e5d40372 | 635 | chroma_height = height; |
c90b9442 | 636 | chroma_weight_avg = luma_weight_avg; |
e5d40372 DB |
637 | chroma_weight_op = luma_weight_op; |
638 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; | |
639 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; | |
05fb63f5 | 640 | } else if (chroma_idc == 2 /* yuv422 */) { |
c2d33742 | 641 | chroma_height = height; |
e5d40372 DB |
642 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
643 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; | |
644 | } else { /* yuv420 */ | |
c2d33742 | 645 | chroma_height = height >> 1; |
e5d40372 DB |
646 | dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; |
647 | dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; | |
c90b9442 | 648 | } |
e5d40372 DB |
649 | x_offset += 8 * s->mb_x; |
650 | y_offset += 8 * (s->mb_y >> MB_FIELD); | |
115329f1 | 651 | |
e5d40372 | 652 | if (list0 && list1) { |
9f2d1b4f LM |
653 | /* don't optimize for luma-only case, since B-frames usually |
654 | * use implicit weights => chroma too. */ | |
a394959b JG |
655 | uint8_t *tmp_cb = h->bipred_scratchpad; |
656 | uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift); | |
657 | uint8_t *tmp_y = h->bipred_scratchpad + 16 * h->mb_uvlinesize; | |
e5d40372 DB |
658 | int refn0 = h->ref_cache[0][scan8[n]]; |
659 | int refn1 = h->ref_cache[1][scan8[n]]; | |
9f2d1b4f | 660 | |
c2d33742 | 661 | mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0, |
9f2d1b4f | 662 | dest_y, dest_cb, dest_cr, |
05fb63f5 RB |
663 | x_offset, y_offset, qpix_put, chroma_put, |
664 | pixel_shift, chroma_idc); | |
c2d33742 | 665 | mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1, |
9f2d1b4f | 666 | tmp_y, tmp_cb, tmp_cr, |
05fb63f5 RB |
667 | x_offset, y_offset, qpix_put, chroma_put, |
668 | pixel_shift, chroma_idc); | |
9f2d1b4f | 669 | |
e5d40372 DB |
670 | if (h->use_weight == 2) { |
671 | int weight0 = h->implicit_weight[refn0][refn1][s->mb_y & 1]; | |
9f2d1b4f | 672 | int weight1 = 64 - weight0; |
e5d40372 DB |
673 | luma_weight_avg(dest_y, tmp_y, h->mb_linesize, |
674 | height, 5, weight0, weight1, 0); | |
c2d33742 RB |
675 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, |
676 | chroma_height, 5, weight0, weight1, 0); | |
677 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, | |
678 | chroma_height, 5, weight0, weight1, 0); | |
e5d40372 DB |
679 | } else { |
680 | luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, | |
681 | h->luma_log2_weight_denom, | |
682 | h->luma_weight[refn0][0][0], | |
683 | h->luma_weight[refn1][1][0], | |
684 | h->luma_weight[refn0][0][1] + | |
685 | h->luma_weight[refn1][1][1]); | |
686 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, | |
687 | h->chroma_log2_weight_denom, | |
688 | h->chroma_weight[refn0][0][0][0], | |
689 | h->chroma_weight[refn1][1][0][0], | |
690 | h->chroma_weight[refn0][0][0][1] + | |
691 | h->chroma_weight[refn1][1][0][1]); | |
692 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, | |
693 | h->chroma_log2_weight_denom, | |
694 | h->chroma_weight[refn0][0][1][0], | |
695 | h->chroma_weight[refn1][1][1][0], | |
696 | h->chroma_weight[refn0][0][1][1] + | |
697 | h->chroma_weight[refn1][1][1][1]); | |
9f2d1b4f | 698 | } |
e5d40372 DB |
699 | } else { |
700 | int list = list1 ? 1 : 0; | |
701 | int refn = h->ref_cache[list][scan8[n]]; | |
702 | Picture *ref = &h->ref_list[list][refn]; | |
c2d33742 | 703 | mc_dir_part(h, ref, n, square, height, delta, list, |
9f2d1b4f | 704 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
05fb63f5 | 705 | qpix_put, chroma_put, pixel_shift, chroma_idc); |
9f2d1b4f | 706 | |
e5d40372 DB |
707 | luma_weight_op(dest_y, h->mb_linesize, height, |
708 | h->luma_log2_weight_denom, | |
709 | h->luma_weight[refn][list][0], | |
710 | h->luma_weight[refn][list][1]); | |
711 | if (h->use_weight_chroma) { | |
712 | chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, | |
713 | h->chroma_log2_weight_denom, | |
714 | h->chroma_weight[refn][list][0][0], | |
715 | h->chroma_weight[refn][list][0][1]); | |
716 | chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, | |
717 | h->chroma_log2_weight_denom, | |
718 | h->chroma_weight[refn][list][1][0], | |
719 | h->chroma_weight[refn][list][1][1]); | |
9f2d1b4f LM |
720 | } |
721 | } | |
722 | } | |
723 | ||
e5d40372 DB |
724 | static av_always_inline void prefetch_motion(H264Context *h, int list, |
725 | int pixel_shift, int chroma_idc) | |
27209bb1 | 726 | { |
513fbd8e LM |
727 | /* fetch pixels for estimated mv 4 macroblocks ahead |
728 | * optimized for 64byte cache lines */ | |
e5d40372 | 729 | MpegEncContext *const s = &h->s; |
513fbd8e | 730 | const int refn = h->ref_cache[list][scan8[0]]; |
e5d40372 DB |
731 | if (refn >= 0) { |
732 | const int mx = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * s->mb_x + 8; | |
733 | const int my = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * s->mb_y; | |
657ccb5a | 734 | uint8_t **src = h->ref_list[list][refn].f.data; |
e5d40372 DB |
735 | int off = (mx << pixel_shift) + |
736 | (my + (s->mb_x & 3) * 4) * h->mb_linesize + | |
737 | (64 << pixel_shift); | |
738 | s->dsp.prefetch(src[0] + off, s->linesize, 4); | |
05fb63f5 | 739 | if (chroma_idc == 3 /* yuv444 */) { |
e5d40372 DB |
740 | s->dsp.prefetch(src[1] + off, s->linesize, 4); |
741 | s->dsp.prefetch(src[2] + off, s->linesize, 4); | |
742 | } else { | |
743 | off = ((mx >> 1) << pixel_shift) + | |
744 | ((my >> 1) + (s->mb_x & 7)) * s->uvlinesize + | |
745 | (64 << pixel_shift); | |
746 | s->dsp.prefetch(src[1] + off, src[2] - src[1], 2); | |
c90b9442 | 747 | } |
513fbd8e LM |
748 | } |
749 | } | |
750 | ||
e5d40372 DB |
751 | static void free_tables(H264Context *h, int free_rbsp) |
752 | { | |
7978debd | 753 | int i; |
afebe2f7 | 754 | H264Context *hx; |
6a9c8594 | 755 | |
0da71265 | 756 | av_freep(&h->intra4x4_pred_mode); |
e5017ab8 LA |
757 | av_freep(&h->chroma_pred_mode_table); |
758 | av_freep(&h->cbp_table); | |
9e528114 LA |
759 | av_freep(&h->mvd_table[0]); |
760 | av_freep(&h->mvd_table[1]); | |
5ad984c9 | 761 | av_freep(&h->direct_table); |
0da71265 MN |
762 | av_freep(&h->non_zero_count); |
763 | av_freep(&h->slice_table_base); | |
e5d40372 | 764 | h->slice_table = NULL; |
c988f975 | 765 | av_freep(&h->list_counts); |
e5017ab8 | 766 | |
0da71265 | 767 | av_freep(&h->mb2b_xy); |
d43c1922 | 768 | av_freep(&h->mb2br_xy); |
9f2d1b4f | 769 | |
e5d40372 | 770 | for (i = 0; i < MAX_THREADS; i++) { |
afebe2f7 | 771 | hx = h->thread_context[i]; |
e5d40372 DB |
772 | if (!hx) |
773 | continue; | |
afebe2f7 AÖ |
774 | av_freep(&hx->top_borders[1]); |
775 | av_freep(&hx->top_borders[0]); | |
a394959b | 776 | av_freep(&hx->bipred_scratchpad); |
e5d40372 | 777 | if (free_rbsp) { |
fcb7e535 RB |
778 | av_freep(&hx->rbsp_buffer[1]); |
779 | av_freep(&hx->rbsp_buffer[0]); | |
780 | hx->rbsp_buffer_size[0] = 0; | |
781 | hx->rbsp_buffer_size[1] = 0; | |
91078926 | 782 | } |
e5d40372 DB |
783 | if (i) |
784 | av_freep(&h->thread_context[i]); | |
afebe2f7 | 785 | } |
0da71265 MN |
786 | } |
787 | ||
e5d40372 DB |
788 | static void init_dequant8_coeff_table(H264Context *h) |
789 | { | |
790 | int i, j, q, x; | |
791 | const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8); | |
239ea04c | 792 | |
e5d40372 | 793 | for (i = 0; i < 6; i++) { |
c90b9442 | 794 | h->dequant8_coeff[i] = h->dequant8_buffer[i]; |
e5d40372 DB |
795 | for (j = 0; j < i; j++) |
796 | if (!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], | |
797 | 64 * sizeof(uint8_t))) { | |
c90b9442 JGG |
798 | h->dequant8_coeff[i] = h->dequant8_buffer[j]; |
799 | break; | |
800 | } | |
e5d40372 | 801 | if (j < i) |
c90b9442 | 802 | continue; |
239ea04c | 803 | |
e5d40372 | 804 | for (q = 0; q < max_qp + 1; q++) { |
d9ec210b | 805 | int shift = div6[q]; |
e5d40372 DB |
806 | int idx = rem6[q]; |
807 | for (x = 0; x < 64; x++) | |
808 | h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] = | |
809 | ((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] * | |
810 | h->pps.scaling_matrix8[i][x]) << shift; | |
239ea04c LM |
811 | } |
812 | } | |
813 | } | |
814 | ||
e5d40372 DB |
815 | static void init_dequant4_coeff_table(H264Context *h) |
816 | { | |
817 | int i, j, q, x; | |
818 | const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8); | |
819 | for (i = 0; i < 6; i++) { | |
239ea04c | 820 | h->dequant4_coeff[i] = h->dequant4_buffer[i]; |
e5d40372 DB |
821 | for (j = 0; j < i; j++) |
822 | if (!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], | |
823 | 16 * sizeof(uint8_t))) { | |
239ea04c LM |
824 | h->dequant4_coeff[i] = h->dequant4_buffer[j]; |
825 | break; | |
826 | } | |
e5d40372 | 827 | if (j < i) |
239ea04c LM |
828 | continue; |
829 | ||
e5d40372 | 830 | for (q = 0; q < max_qp + 1; q++) { |
d9ec210b | 831 | int shift = div6[q] + 2; |
e5d40372 DB |
832 | int idx = rem6[q]; |
833 | for (x = 0; x < 16; x++) | |
834 | h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] = | |
835 | ((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * | |
836 | h->pps.scaling_matrix4[i][x]) << shift; | |
239ea04c LM |
837 | } |
838 | } | |
839 | } | |
840 | ||
e5d40372 DB |
841 | static void init_dequant_tables(H264Context *h) |
842 | { | |
843 | int i, x; | |
239ea04c | 844 | init_dequant4_coeff_table(h); |
e5d40372 | 845 | if (h->pps.transform_8x8_mode) |
239ea04c | 846 | init_dequant8_coeff_table(h); |
e5d40372 DB |
847 | if (h->sps.transform_bypass) { |
848 | for (i = 0; i < 6; i++) | |
849 | for (x = 0; x < 16; x++) | |
850 | h->dequant4_coeff[i][0][x] = 1 << 6; | |
851 | if (h->pps.transform_8x8_mode) | |
852 | for (i = 0; i < 6; i++) | |
853 | for (x = 0; x < 64; x++) | |
854 | h->dequant8_coeff[i][0][x] = 1 << 6; | |
239ea04c LM |
855 | } |
856 | } | |
857 | ||
e5d40372 DB |
858 | int ff_h264_alloc_tables(H264Context *h) |
859 | { | |
860 | MpegEncContext *const s = &h->s; | |
861 | const int big_mb_num = s->mb_stride * (s->mb_height + 1); | |
862 | const int row_mb_num = s->mb_stride * 2 * s->avctx->thread_count; | |
863 | int x, y; | |
864 | ||
865 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, | |
866 | row_mb_num * 8 * sizeof(uint8_t), fail) | |
867 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count, | |
868 | big_mb_num * 48 * sizeof(uint8_t), fail) | |
869 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base, | |
870 | (big_mb_num + s->mb_stride) * sizeof(*h->slice_table_base), fail) | |
871 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, | |
872 | big_mb_num * sizeof(uint16_t), fail) | |
873 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, | |
874 | big_mb_num * sizeof(uint8_t), fail) | |
875 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], | |
876 | 16 * row_mb_num * sizeof(uint8_t), fail); | |
877 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], | |
878 | 16 * row_mb_num * sizeof(uint8_t), fail); | |
879 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, | |
880 | 4 * big_mb_num * sizeof(uint8_t), fail); | |
881 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, | |
882 | big_mb_num * sizeof(uint8_t), fail) | |
883 | ||
884 | memset(h->slice_table_base, -1, | |
885 | (big_mb_num + s->mb_stride) * sizeof(*h->slice_table_base)); | |
886 | h->slice_table = h->slice_table_base + s->mb_stride * 2 + 1; | |
887 | ||
888 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy, | |
889 | big_mb_num * sizeof(uint32_t), fail); | |
890 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy, | |
891 | big_mb_num * sizeof(uint32_t), fail); | |
892 | for (y = 0; y < s->mb_height; y++) | |
893 | for (x = 0; x < s->mb_width; x++) { | |
894 | const int mb_xy = x + y * s->mb_stride; | |
895 | const int b_xy = 4 * x + 4 * y * h->b_stride; | |
896 | ||
897 | h->mb2b_xy[mb_xy] = b_xy; | |
898 | h->mb2br_xy[mb_xy] = 8 * (FMO ? mb_xy : (mb_xy % (2 * s->mb_stride))); | |
0da71265 | 899 | } |
9f2d1b4f | 900 | |
e5d40372 | 901 | if (!h->dequant4_coeff[0]) |
56edbd81 LM |
902 | init_dequant_tables(h); |
903 | ||
0da71265 | 904 | return 0; |
e5d40372 | 905 | |
0da71265 | 906 | fail: |
91078926 | 907 | free_tables(h, 1); |
0da71265 MN |
908 | return -1; |
909 | } | |
910 | ||
afebe2f7 AÖ |
911 | /** |
912 | * Mimic alloc_tables(), but for every context thread. | |
913 | */ | |
e5d40372 DB |
914 | static void clone_tables(H264Context *dst, H264Context *src, int i) |
915 | { | |
916 | MpegEncContext *const s = &src->s; | |
917 | dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i * 8 * 2 * s->mb_stride; | |
918 | dst->non_zero_count = src->non_zero_count; | |
919 | dst->slice_table = src->slice_table; | |
920 | dst->cbp_table = src->cbp_table; | |
921 | dst->mb2b_xy = src->mb2b_xy; | |
922 | dst->mb2br_xy = src->mb2br_xy; | |
923 | dst->chroma_pred_mode_table = src->chroma_pred_mode_table; | |
924 | dst->mvd_table[0] = src->mvd_table[0] + i * 8 * 2 * s->mb_stride; | |
925 | dst->mvd_table[1] = src->mvd_table[1] + i * 8 * 2 * s->mb_stride; | |
926 | dst->direct_table = src->direct_table; | |
927 | dst->list_counts = src->list_counts; | |
a394959b | 928 | dst->bipred_scratchpad = NULL; |
e5d40372 DB |
929 | ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, |
930 | src->sps.chroma_format_idc); | |
afebe2f7 AÖ |
931 | } |
932 | ||
933 | /** | |
934 | * Init context | |
935 | * Allocate buffers which are not shared amongst multiple threads. | |
936 | */ | |
e5d40372 DB |
937 | static int context_init(H264Context *h) |
938 | { | |
939 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], | |
940 | h->s.mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail) | |
941 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], | |
942 | h->s.mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail) | |
943 | ||
944 | h->ref_cache[0][scan8[5] + 1] = | |
945 | h->ref_cache[0][scan8[7] + 1] = | |
946 | h->ref_cache[0][scan8[13] + 1] = | |
947 | h->ref_cache[1][scan8[5] + 1] = | |
948 | h->ref_cache[1][scan8[7] + 1] = | |
949 | h->ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE; | |
145061a1 | 950 | |
afebe2f7 | 951 | return 0; |
e5d40372 | 952 | |
afebe2f7 AÖ |
953 | fail: |
954 | return -1; // free_tables will clean up for us | |
955 | } | |
956 | ||
61c6eef5 JG |
957 | static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size, |
958 | int parse_extradata); | |
9855b2e3 | 959 | |
e5d40372 DB |
960 | static av_cold void common_init(H264Context *h) |
961 | { | |
962 | MpegEncContext *const s = &h->s; | |
0da71265 | 963 | |
e5d40372 DB |
964 | s->width = s->avctx->width; |
965 | s->height = s->avctx->height; | |
966 | s->codec_id = s->avctx->codec->id; | |
115329f1 | 967 | |
76741b0e BC |
968 | ff_h264dsp_init(&h->h264dsp, 8, 1); |
969 | ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1); | |
0da71265 | 970 | |
e5d40372 DB |
971 | h->dequant_coeff_pps = -1; |
972 | s->unrestricted_mv = 1; | |
56edbd81 | 973 | |
e5d40372 DB |
974 | /* needed so that IDCT permutation is known early */ |
975 | ff_dsputil_init(&s->dsp, s->avctx); | |
a5805aa9 | 976 | |
e5d40372 DB |
977 | memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t)); |
978 | memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t)); | |
0da71265 MN |
979 | } |
980 | ||
05e95319 HC |
981 | int ff_h264_decode_extradata(H264Context *h) |
982 | { | |
983 | AVCodecContext *avctx = h->s.avctx; | |
9855b2e3 | 984 | |
e5d40372 | 985 | if (avctx->extradata[0] == 1) { |
9855b2e3 MN |
986 | int i, cnt, nalsize; |
987 | unsigned char *p = avctx->extradata; | |
988 | ||
989 | h->is_avc = 1; | |
990 | ||
e5d40372 | 991 | if (avctx->extradata_size < 7) { |
9855b2e3 MN |
992 | av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); |
993 | return -1; | |
994 | } | |
995 | /* sps and pps in the avcC always have length coded with 2 bytes, | |
e5d40372 | 996 | * so put a fake nal_length_size = 2 while parsing them */ |
9855b2e3 MN |
997 | h->nal_length_size = 2; |
998 | // Decode sps from avcC | |
e5d40372 DB |
999 | cnt = *(p + 5) & 0x1f; // Number of sps |
1000 | p += 6; | |
9855b2e3 MN |
1001 | for (i = 0; i < cnt; i++) { |
1002 | nalsize = AV_RB16(p) + 2; | |
d1186ff7 LA |
1003 | if (p - avctx->extradata + nalsize > avctx->extradata_size) |
1004 | return -1; | |
61c6eef5 | 1005 | if (decode_nal_units(h, p, nalsize, 1) < 0) { |
e5d40372 DB |
1006 | av_log(avctx, AV_LOG_ERROR, |
1007 | "Decoding sps %d from avcC failed\n", i); | |
9855b2e3 MN |
1008 | return -1; |
1009 | } | |
1010 | p += nalsize; | |
1011 | } | |
1012 | // Decode pps from avcC | |
1013 | cnt = *(p++); // Number of pps | |
1014 | for (i = 0; i < cnt; i++) { | |
1015 | nalsize = AV_RB16(p) + 2; | |
d1186ff7 LA |
1016 | if (p - avctx->extradata + nalsize > avctx->extradata_size) |
1017 | return -1; | |
61c6eef5 | 1018 | if (decode_nal_units(h, p, nalsize, 1) < 0) { |
e5d40372 DB |
1019 | av_log(avctx, AV_LOG_ERROR, |
1020 | "Decoding pps %d from avcC failed\n", i); | |
9855b2e3 MN |
1021 | return -1; |
1022 | } | |
1023 | p += nalsize; | |
1024 | } | |
e5d40372 | 1025 | // Now store right nal length size, that will be used to parse all other nals |
0c17beba | 1026 | h->nal_length_size = (avctx->extradata[4] & 0x03) + 1; |
9855b2e3 MN |
1027 | } else { |
1028 | h->is_avc = 0; | |
61c6eef5 | 1029 | if (decode_nal_units(h, avctx->extradata, avctx->extradata_size, 1) < 0) |
9855b2e3 MN |
1030 | return -1; |
1031 | } | |
05e95319 HC |
1032 | return 0; |
1033 | } | |
1034 | ||
e5d40372 DB |
1035 | av_cold int ff_h264_decode_init(AVCodecContext *avctx) |
1036 | { | |
1037 | H264Context *h = avctx->priv_data; | |
1038 | MpegEncContext *const s = &h->s; | |
ea2bb12e | 1039 | int i; |
05e95319 | 1040 | |
efd29844 | 1041 | ff_MPV_decode_defaults(s); |
05e95319 HC |
1042 | |
1043 | s->avctx = avctx; | |
1044 | common_init(h); | |
1045 | ||
e5d40372 DB |
1046 | s->out_format = FMT_H264; |
1047 | s->workaround_bugs = avctx->workaround_bugs; | |
05e95319 | 1048 | |
e5d40372 DB |
1049 | /* set defaults */ |
1050 | // s->decode_mb = ff_h263_decode_mb; | |
05e95319 | 1051 | s->quarter_sample = 1; |
e5d40372 DB |
1052 | if (!avctx->has_b_frames) |
1053 | s->low_delay = 1; | |
05e95319 HC |
1054 | |
1055 | avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; | |
1056 | ||
1057 | ff_h264_decode_init_vlc(); | |
1058 | ||
6e3ef511 | 1059 | h->pixel_shift = 0; |
19a0729b | 1060 | h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; |
6e3ef511 | 1061 | |
05e95319 | 1062 | h->thread_context[0] = h; |
e5d40372 | 1063 | h->outputed_poc = h->next_outputed_poc = INT_MIN; |
ea2bb12e RB |
1064 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) |
1065 | h->last_pocs[i] = INT_MIN; | |
e5d40372 DB |
1066 | h->prev_poc_msb = 1 << 16; |
1067 | h->x264_build = -1; | |
05e95319 | 1068 | ff_h264_reset_sei(h); |
36ef5369 | 1069 | if (avctx->codec_id == AV_CODEC_ID_H264) { |
e5d40372 DB |
1070 | if (avctx->ticks_per_frame == 1) |
1071 | s->avctx->time_base.den *= 2; | |
05e95319 HC |
1072 | avctx->ticks_per_frame = 2; |
1073 | } | |
1074 | ||
e5d40372 | 1075 | if (avctx->extradata_size > 0 && avctx->extradata && |
05e95319 HC |
1076 | ff_h264_decode_extradata(h)) |
1077 | return -1; | |
1078 | ||
e5d40372 DB |
1079 | if (h->sps.bitstream_restriction_flag && |
1080 | s->avctx->has_b_frames < h->sps.num_reorder_frames) { | |
db8cb47d | 1081 | s->avctx->has_b_frames = h->sps.num_reorder_frames; |
e5d40372 | 1082 | s->low_delay = 0; |
db8cb47d | 1083 | } |
9855b2e3 | 1084 | |
0da71265 MN |
1085 | return 0; |
1086 | } | |
1087 | ||
e5d40372 DB |
1088 | #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size)))) |
1089 | ||
1090 | static void copy_picture_range(Picture **to, Picture **from, int count, | |
1091 | MpegEncContext *new_base, | |
1092 | MpegEncContext *old_base) | |
6a9c8594 AS |
1093 | { |
1094 | int i; | |
1095 | ||
e5d40372 | 1096 | for (i = 0; i < count; i++) { |
6a9c8594 | 1097 | assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) || |
e5d40372 DB |
1098 | IN_RANGE(from[i], old_base->picture, |
1099 | sizeof(Picture) * old_base->picture_count) || | |
6a9c8594 AS |
1100 | !from[i])); |
1101 | to[i] = REBASE_PICTURE(from[i], new_base, old_base); | |
1102 | } | |
1103 | } | |
1104 | ||
1105 | static void copy_parameter_set(void **to, void **from, int count, int size) | |
1106 | { | |
1107 | int i; | |
1108 | ||
e5d40372 DB |
1109 | for (i = 0; i < count; i++) { |
1110 | if (to[i] && !from[i]) | |
1111 | av_freep(&to[i]); | |
1112 | else if (from[i] && !to[i]) | |
1113 | to[i] = av_malloc(size); | |
6a9c8594 | 1114 | |
e5d40372 DB |
1115 | if (from[i]) |
1116 | memcpy(to[i], from[i], size); | |
6a9c8594 AS |
1117 | } |
1118 | } | |
1119 | ||
e5d40372 DB |
1120 | static int decode_init_thread_copy(AVCodecContext *avctx) |
1121 | { | |
1122 | H264Context *h = avctx->priv_data; | |
6a9c8594 | 1123 | |
f3a29b75 JR |
1124 | if (!avctx->internal->is_copy) |
1125 | return 0; | |
6a9c8594 AS |
1126 | memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); |
1127 | memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); | |
1128 | ||
0eae920c JG |
1129 | h->s.context_initialized = 0; |
1130 | ||
6a9c8594 AS |
1131 | return 0; |
1132 | } | |
1133 | ||
e5d40372 DB |
1134 | #define copy_fields(to, from, start_field, end_field) \ |
1135 | memcpy(&to->start_field, &from->start_field, \ | |
1136 | (char *)&to->end_field - (char *)&to->start_field) | |
1137 | ||
1138 | static int decode_update_thread_context(AVCodecContext *dst, | |
1139 | const AVCodecContext *src) | |
1140 | { | |
1141 | H264Context *h = dst->priv_data, *h1 = src->priv_data; | |
1142 | MpegEncContext *const s = &h->s, *const s1 = &h1->s; | |
6a9c8594 AS |
1143 | int inited = s->context_initialized, err; |
1144 | int i; | |
1145 | ||
e5d40372 DB |
1146 | if (dst == src || !s1->context_initialized) |
1147 | return 0; | |
6a9c8594 AS |
1148 | |
1149 | err = ff_mpeg_update_thread_context(dst, src); | |
e5d40372 DB |
1150 | if (err) |
1151 | return err; | |
6a9c8594 | 1152 | |
e5d40372 DB |
1153 | // FIXME handle width/height changing |
1154 | if (!inited) { | |
1155 | for (i = 0; i < MAX_SPS_COUNT; i++) | |
6a9c8594 AS |
1156 | av_freep(h->sps_buffers + i); |
1157 | ||
e5d40372 | 1158 | for (i = 0; i < MAX_PPS_COUNT; i++) |
6a9c8594 AS |
1159 | av_freep(h->pps_buffers + i); |
1160 | ||
e5d40372 DB |
1161 | // copy all fields after MpegEnc |
1162 | memcpy(&h->s + 1, &h1->s + 1, | |
1163 | sizeof(H264Context) - sizeof(MpegEncContext)); | |
6a9c8594 AS |
1164 | memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); |
1165 | memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); | |
bac3ab13 DB |
1166 | if (ff_h264_alloc_tables(h) < 0) { |
1167 | av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n"); | |
1168 | return AVERROR(ENOMEM); | |
1169 | } | |
6a9c8594 AS |
1170 | context_init(h); |
1171 | ||
e5d40372 DB |
1172 | for (i = 0; i < 2; i++) { |
1173 | h->rbsp_buffer[i] = NULL; | |
6a9c8594 AS |
1174 | h->rbsp_buffer_size[i] = 0; |
1175 | } | |
a394959b | 1176 | h->bipred_scratchpad = NULL; |
6a9c8594 AS |
1177 | |
1178 | h->thread_context[0] = h; | |
1179 | ||
6a9c8594 | 1180 | s->dsp.clear_blocks(h->mb); |
e5d40372 | 1181 | s->dsp.clear_blocks(h->mb + (24 * 16 << h->pixel_shift)); |
6a9c8594 AS |
1182 | } |
1183 | ||
a394959b JG |
1184 | /* frame_start may not be called for the next thread (if it's decoding |
1185 | * a bottom field) so this has to be allocated here */ | |
1186 | if (!h->bipred_scratchpad) | |
1187 | h->bipred_scratchpad = av_malloc(16 * 6 * s->linesize); | |
1188 | ||
e5d40372 DB |
1189 | // extradata/NAL handling |
1190 | h->is_avc = h1->is_avc; | |
6a9c8594 | 1191 | |
e5d40372 DB |
1192 | // SPS/PPS |
1193 | copy_parameter_set((void **)h->sps_buffers, (void **)h1->sps_buffers, | |
1194 | MAX_SPS_COUNT, sizeof(SPS)); | |
1195 | h->sps = h1->sps; | |
1196 | copy_parameter_set((void **)h->pps_buffers, (void **)h1->pps_buffers, | |
1197 | MAX_PPS_COUNT, sizeof(PPS)); | |
1198 | h->pps = h1->pps; | |
6a9c8594 | 1199 | |
e5d40372 DB |
1200 | // Dequantization matrices |
1201 | // FIXME these are big - can they be only copied when PPS changes? | |
6a9c8594 AS |
1202 | copy_fields(h, h1, dequant4_buffer, dequant4_coeff); |
1203 | ||
e5d40372 DB |
1204 | for (i = 0; i < 6; i++) |
1205 | h->dequant4_coeff[i] = h->dequant4_buffer[0] + | |
1206 | (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]); | |
6a9c8594 | 1207 | |
e5d40372 DB |
1208 | for (i = 0; i < 6; i++) |
1209 | h->dequant8_coeff[i] = h->dequant8_buffer[0] + | |
1210 | (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]); | |
6a9c8594 AS |
1211 | |
1212 | h->dequant_coeff_pps = h1->dequant_coeff_pps; | |
1213 | ||
e5d40372 | 1214 | // POC timing |
6a9c8594 AS |
1215 | copy_fields(h, h1, poc_lsb, redundant_pic_count); |
1216 | ||
e5d40372 | 1217 | // reference lists |
6a9c8594 | 1218 | copy_fields(h, h1, ref_count, list_count); |
e5d40372 | 1219 | copy_fields(h, h1, ref_list, intra_gb); |
6a9c8594 AS |
1220 | copy_fields(h, h1, short_ref, cabac_init_idc); |
1221 | ||
e5d40372 DB |
1222 | copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1); |
1223 | copy_picture_range(h->long_ref, h1->long_ref, 32, s, s1); | |
1224 | copy_picture_range(h->delayed_pic, h1->delayed_pic, | |
1225 | MAX_DELAYED_PIC_COUNT + 2, s, s1); | |
6a9c8594 AS |
1226 | |
1227 | h->last_slice_type = h1->last_slice_type; | |
1228 | ||
e5d40372 DB |
1229 | if (!s->current_picture_ptr) |
1230 | return 0; | |
6a9c8594 | 1231 | |
ba0c8981 | 1232 | if (!s->droppable) { |
12fe7594 | 1233 | err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
e5d40372 DB |
1234 | h->prev_poc_msb = h->poc_msb; |
1235 | h->prev_poc_lsb = h->poc_lsb; | |
6a9c8594 | 1236 | } |
e5d40372 DB |
1237 | h->prev_frame_num_offset = h->frame_num_offset; |
1238 | h->prev_frame_num = h->frame_num; | |
1239 | h->outputed_poc = h->next_outputed_poc; | |
6a9c8594 | 1240 | |
12fe7594 | 1241 | return err; |
6a9c8594 AS |
1242 | } |
1243 | ||
e5d40372 DB |
1244 | int ff_h264_frame_start(H264Context *h) |
1245 | { | |
1246 | MpegEncContext *const s = &h->s; | |
0da71265 | 1247 | int i; |
6e3ef511 | 1248 | const int pixel_shift = h->pixel_shift; |
0da71265 | 1249 | |
e5d40372 | 1250 | if (ff_MPV_frame_start(s, s->avctx) < 0) |
af8aa846 | 1251 | return -1; |
0da71265 | 1252 | ff_er_frame_start(s); |
3a22d7fa | 1253 | /* |
efd29844 | 1254 | * ff_MPV_frame_start uses pict_type to derive key_frame. |
3a22d7fa | 1255 | * This is incorrect for H.264; IDR markings must be used. |
1412060e | 1256 | * Zero here; IDR markings per slice in frame or fields are ORed in later. |
3a22d7fa JD |
1257 | * See decode_nal_units(). |
1258 | */ | |
657ccb5a | 1259 | s->current_picture_ptr->f.key_frame = 0; |
e5d40372 | 1260 | s->current_picture_ptr->mmco_reset = 0; |
0da71265 MN |
1261 | |
1262 | assert(s->linesize && s->uvlinesize); | |
1263 | ||
e5d40372 DB |
1264 | for (i = 0; i < 16; i++) { |
1265 | h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * s->linesize * ((scan8[i] - scan8[0]) >> 3); | |
1266 | h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * s->linesize * ((scan8[i] - scan8[0]) >> 3); | |
0da71265 | 1267 | } |
e5d40372 DB |
1268 | for (i = 0; i < 16; i++) { |
1269 | h->block_offset[16 + i] = | |
1270 | h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * s->uvlinesize * ((scan8[i] - scan8[0]) >> 3); | |
1271 | h->block_offset[48 + 16 + i] = | |
1272 | h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * s->uvlinesize * ((scan8[i] - scan8[0]) >> 3); | |
0da71265 MN |
1273 | } |
1274 | ||
934b0821 LM |
1275 | /* can't be in alloc_tables because linesize isn't known there. |
1276 | * FIXME: redo bipred weight to not require extra buffer? */ | |
e5d40372 | 1277 | for (i = 0; i < s->slice_context_count; i++) |
a394959b JG |
1278 | if (h->thread_context[i] && !h->thread_context[i]->bipred_scratchpad) |
1279 | h->thread_context[i]->bipred_scratchpad = av_malloc(16 * 6 * s->linesize); | |
e5d40372 DB |
1280 | |
1281 | /* Some macroblocks can be accessed before they're available in case | |
1282 | * of lost slices, MBAFF or threading. */ | |
1283 | memset(h->slice_table, -1, | |
1284 | (s->mb_height * s->mb_stride - 1) * sizeof(*h->slice_table)); | |
1285 | ||
1286 | // s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding || | |
1287 | // s->current_picture.f.reference /* || h->contains_intra */ || 1; | |
1288 | ||
1289 | /* We mark the current picture as non-reference after allocating it, so | |
1290 | * that if we break out due to an error it can be released automatically | |
1291 | * in the next ff_MPV_frame_start(). | |
1292 | * SVQ3 as well as most other codecs have only last/next/current and thus | |
1293 | * get released even with set reference, besides SVQ3 and others do not | |
1294 | * mark frames as reference later "naturally". */ | |
36ef5369 | 1295 | if (s->codec_id != AV_CODEC_ID_SVQ3) |
657ccb5a | 1296 | s->current_picture_ptr->f.reference = 0; |
357282c6 | 1297 | |
e5d40372 DB |
1298 | s->current_picture_ptr->field_poc[0] = |
1299 | s->current_picture_ptr->field_poc[1] = INT_MAX; | |
6a9c8594 AS |
1300 | |
1301 | h->next_output_pic = NULL; | |
1302 | ||
e5d40372 | 1303 | assert(s->current_picture_ptr->long_ref == 0); |
357282c6 | 1304 | |
af8aa846 | 1305 | return 0; |
0da71265 MN |
1306 | } |
1307 | ||
6a9c8594 | 1308 | /** |
e5d40372 DB |
1309 | * Run setup operations that must be run after slice header decoding. |
1310 | * This includes finding the next displayed frame. | |
1311 | * | |
1312 | * @param h h264 master context | |
1313 | * @param setup_finished enough NALs have been read that we can call | |
1314 | * ff_thread_finish_setup() | |
1315 | */ | |
1316 | static void decode_postinit(H264Context *h, int setup_finished) | |
1317 | { | |
1318 | MpegEncContext *const s = &h->s; | |
6a9c8594 AS |
1319 | Picture *out = s->current_picture_ptr; |
1320 | Picture *cur = s->current_picture_ptr; | |
1321 | int i, pics, out_of_order, out_idx; | |
adedd840 | 1322 | int invalid = 0, cnt = 0; |
6a9c8594 | 1323 | |
657ccb5a DB |
1324 | s->current_picture_ptr->f.qscale_type = FF_QSCALE_TYPE_H264; |
1325 | s->current_picture_ptr->f.pict_type = s->pict_type; | |
6a9c8594 | 1326 | |
e5d40372 DB |
1327 | if (h->next_output_pic) |
1328 | return; | |
6a9c8594 | 1329 | |
e5d40372 DB |
1330 | if (cur->field_poc[0] == INT_MAX || cur->field_poc[1] == INT_MAX) { |
1331 | /* FIXME: if we have two PAFF fields in one packet, we can't start | |
1332 | * the next thread here. If we have one field per packet, we can. | |
1333 | * The check in decode_nal_units() is not good enough to find this | |
1334 | * yet, so we assume the worst for now. */ | |
1335 | // if (setup_finished) | |
6a9c8594 AS |
1336 | // ff_thread_finish_setup(s->avctx); |
1337 | return; | |
1338 | } | |
1339 | ||
657ccb5a DB |
1340 | cur->f.interlaced_frame = 0; |
1341 | cur->f.repeat_pict = 0; | |
6a9c8594 AS |
1342 | |
1343 | /* Signal interlacing information externally. */ | |
e5d40372 DB |
1344 | /* Prioritize picture timing SEI information over used |
1345 | * decoding process if it exists. */ | |
6a9c8594 | 1346 | |
e5d40372 DB |
1347 | if (h->sps.pic_struct_present_flag) { |
1348 | switch (h->sei_pic_struct) { | |
6a9c8594 AS |
1349 | case SEI_PIC_STRUCT_FRAME: |
1350 | break; | |
1351 | case SEI_PIC_STRUCT_TOP_FIELD: | |
1352 | case SEI_PIC_STRUCT_BOTTOM_FIELD: | |
657ccb5a | 1353 | cur->f.interlaced_frame = 1; |
6a9c8594 AS |
1354 | break; |
1355 | case SEI_PIC_STRUCT_TOP_BOTTOM: | |
1356 | case SEI_PIC_STRUCT_BOTTOM_TOP: | |
1357 | if (FIELD_OR_MBAFF_PICTURE) | |
657ccb5a | 1358 | cur->f.interlaced_frame = 1; |
6a9c8594 AS |
1359 | else |
1360 | // try to flag soft telecine progressive | |
657ccb5a | 1361 | cur->f.interlaced_frame = h->prev_interlaced_frame; |
6a9c8594 AS |
1362 | break; |
1363 | case SEI_PIC_STRUCT_TOP_BOTTOM_TOP: | |
1364 | case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: | |
e5d40372 DB |
1365 | /* Signal the possibility of telecined film externally |
1366 | * (pic_struct 5,6). From these hints, let the applications | |
1367 | * decide if they apply deinterlacing. */ | |
657ccb5a | 1368 | cur->f.repeat_pict = 1; |
6a9c8594 AS |
1369 | break; |
1370 | case SEI_PIC_STRUCT_FRAME_DOUBLING: | |
e5d40372 | 1371 | // Force progressive here, doubling interlaced frame is a bad idea. |
657ccb5a | 1372 | cur->f.repeat_pict = 2; |
6a9c8594 AS |
1373 | break; |
1374 | case SEI_PIC_STRUCT_FRAME_TRIPLING: | |
657ccb5a | 1375 | cur->f.repeat_pict = 4; |
6a9c8594 AS |
1376 | break; |
1377 | } | |
1378 | ||
e5d40372 DB |
1379 | if ((h->sei_ct_type & 3) && |
1380 | h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP) | |
657ccb5a | 1381 | cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0; |
e5d40372 | 1382 | } else { |
6a9c8594 | 1383 | /* Derive interlacing flag from used decoding process. */ |
657ccb5a | 1384 | cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE; |
6a9c8594 | 1385 | } |
657ccb5a | 1386 | h->prev_interlaced_frame = cur->f.interlaced_frame; |
6a9c8594 | 1387 | |
e5d40372 | 1388 | if (cur->field_poc[0] != cur->field_poc[1]) { |
6a9c8594 | 1389 | /* Derive top_field_first from field pocs. */ |
657ccb5a | 1390 | cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1]; |
e5d40372 | 1391 | } else { |
657ccb5a | 1392 | if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) { |
e5d40372 DB |
1393 | /* Use picture timing SEI information. Even if it is a |
1394 | * information of a past frame, better than nothing. */ | |
1395 | if (h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM || | |
1396 | h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP) | |
657ccb5a | 1397 | cur->f.top_field_first = 1; |
6a9c8594 | 1398 | else |
657ccb5a | 1399 | cur->f.top_field_first = 0; |
e5d40372 | 1400 | } else { |
6a9c8594 | 1401 | /* Most likely progressive */ |
657ccb5a | 1402 | cur->f.top_field_first = 0; |
6a9c8594 AS |
1403 | } |
1404 | } | |
1405 | ||
e5d40372 | 1406 | // FIXME do something with unavailable reference frames |
6a9c8594 AS |
1407 | |
1408 | /* Sort B-frames into display order */ | |
1409 | ||
e5d40372 DB |
1410 | if (h->sps.bitstream_restriction_flag && |
1411 | s->avctx->has_b_frames < h->sps.num_reorder_frames) { | |
6a9c8594 | 1412 | s->avctx->has_b_frames = h->sps.num_reorder_frames; |
e5d40372 | 1413 | s->low_delay = 0; |
6a9c8594 AS |
1414 | } |
1415 | ||
e5d40372 DB |
1416 | if (s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT && |
1417 | !h->sps.bitstream_restriction_flag) { | |
2574f08d | 1418 | s->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1; |
e5d40372 | 1419 | s->low_delay = 0; |
6a9c8594 AS |
1420 | } |
1421 | ||
1422 | pics = 0; | |
e5d40372 DB |
1423 | while (h->delayed_pic[pics]) |
1424 | pics++; | |
6a9c8594 AS |
1425 | |
1426 | assert(pics <= MAX_DELAYED_PIC_COUNT); | |
1427 | ||
1428 | h->delayed_pic[pics++] = cur; | |
657ccb5a DB |
1429 | if (cur->f.reference == 0) |
1430 | cur->f.reference = DELAYED_PIC_REF; | |
6a9c8594 | 1431 | |
adedd840 RB |
1432 | /* Frame reordering. This code takes pictures from coding order and sorts |
1433 | * them by their incremental POC value into display order. It supports POC | |
1434 | * gaps, MMCO reset codes and random resets. | |
1435 | * A "display group" can start either with a IDR frame (f.key_frame = 1), | |
1436 | * and/or can be closed down with a MMCO reset code. In sequences where | |
1437 | * there is no delay, we can't detect that (since the frame was already | |
1438 | * output to the user), so we also set h->mmco_reset to detect the MMCO | |
1439 | * reset code. | |
1440 | * FIXME: if we detect insufficient delays (as per s->avctx->has_b_frames), | |
1441 | * we increase the delay between input and output. All frames affected by | |
1442 | * the lag (e.g. those that should have been output before another frame | |
1443 | * that we already returned to the user) will be dropped. This is a bug | |
1444 | * that we will fix later. */ | |
1445 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) { | |
1446 | cnt += out->poc < h->last_pocs[i]; | |
1447 | invalid += out->poc == INT_MIN; | |
1448 | } | |
e5d40372 DB |
1449 | if (!h->mmco_reset && !cur->f.key_frame && |
1450 | cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) { | |
adedd840 RB |
1451 | h->mmco_reset = 2; |
1452 | if (pics > 1) | |
1453 | h->delayed_pic[pics - 2]->mmco_reset = 2; | |
1454 | } | |
1455 | if (h->mmco_reset || cur->f.key_frame) { | |
1456 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) | |
1457 | h->last_pocs[i] = INT_MIN; | |
1458 | cnt = 0; | |
1459 | invalid = MAX_DELAYED_PIC_COUNT; | |
1460 | } | |
e5d40372 | 1461 | out = h->delayed_pic[0]; |
6a9c8594 | 1462 | out_idx = 0; |
e5d40372 DB |
1463 | for (i = 1; i < MAX_DELAYED_PIC_COUNT && |
1464 | h->delayed_pic[i] && | |
1465 | !h->delayed_pic[i - 1]->mmco_reset && | |
1466 | !h->delayed_pic[i]->f.key_frame; | |
1467 | i++) | |
1468 | if (h->delayed_pic[i]->poc < out->poc) { | |
1469 | out = h->delayed_pic[i]; | |
6a9c8594 AS |
1470 | out_idx = i; |
1471 | } | |
e5d40372 DB |
1472 | if (s->avctx->has_b_frames == 0 && |
1473 | (h->delayed_pic[0]->f.key_frame || h->mmco_reset)) | |
adedd840 | 1474 | h->next_outputed_poc = INT_MIN; |
e5d40372 DB |
1475 | out_of_order = !out->f.key_frame && !h->mmco_reset && |
1476 | (out->poc < h->next_outputed_poc); | |
6a9c8594 | 1477 | |
e5d40372 DB |
1478 | if (h->sps.bitstream_restriction_flag && |
1479 | s->avctx->has_b_frames >= h->sps.num_reorder_frames) { | |
1480 | } else if (out_of_order && pics - 1 == s->avctx->has_b_frames && | |
1481 | s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) { | |
ea2bb12e RB |
1482 | if (invalid + cnt < MAX_DELAYED_PIC_COUNT) { |
1483 | s->avctx->has_b_frames = FFMAX(s->avctx->has_b_frames, cnt); | |
ea2bb12e RB |
1484 | } |
1485 | s->low_delay = 0; | |
1486 | } else if (s->low_delay && | |
e5d40372 DB |
1487 | ((h->next_outputed_poc != INT_MIN && |
1488 | out->poc > h->next_outputed_poc + 2) || | |
ea2bb12e | 1489 | cur->f.pict_type == AV_PICTURE_TYPE_B)) { |
6a9c8594 AS |
1490 | s->low_delay = 0; |
1491 | s->avctx->has_b_frames++; | |
1492 | } | |
1493 | ||
e5d40372 | 1494 | if (pics > s->avctx->has_b_frames) { |
657ccb5a | 1495 | out->f.reference &= ~DELAYED_PIC_REF; |
e5d40372 DB |
1496 | // for frame threading, the owner must be the second field's thread or |
1497 | // else the first thread can release the picture and reuse it unsafely | |
1498 | out->owner2 = s; | |
1499 | for (i = out_idx; h->delayed_pic[i]; i++) | |
1500 | h->delayed_pic[i] = h->delayed_pic[i + 1]; | |
1501 | } | |
1502 | memmove(h->last_pocs, &h->last_pocs[1], | |
1503 | sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1)); | |
adedd840 | 1504 | h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc; |
e5d40372 | 1505 | if (!out_of_order && pics > s->avctx->has_b_frames) { |
6a9c8594 | 1506 | h->next_output_pic = out; |
adedd840 RB |
1507 | if (out->mmco_reset) { |
1508 | if (out_idx > 0) { | |
e5d40372 | 1509 | h->next_outputed_poc = out->poc; |
adedd840 RB |
1510 | h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset; |
1511 | } else { | |
1512 | h->next_outputed_poc = INT_MIN; | |
1513 | } | |
1514 | } else { | |
0b4c3232 RB |
1515 | if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f.key_frame) { |
1516 | h->next_outputed_poc = INT_MIN; | |
1517 | } else { | |
1518 | h->next_outputed_poc = out->poc; | |
1519 | } | |
adedd840 RB |
1520 | } |
1521 | h->mmco_reset = 0; | |
e5d40372 | 1522 | } else { |
6a9c8594 AS |
1523 | av_log(s->avctx, AV_LOG_DEBUG, "no picture\n"); |
1524 | } | |
1525 | ||
1526 | if (setup_finished) | |
1527 | ff_thread_finish_setup(s->avctx); | |
1528 | } | |
1529 | ||
76741b0e BC |
1530 | static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, |
1531 | uint8_t *src_cb, uint8_t *src_cr, | |
e5d40372 DB |
1532 | int linesize, int uvlinesize, |
1533 | int simple) | |
76741b0e | 1534 | { |
e5d40372 | 1535 | MpegEncContext *const s = &h->s; |
0b69d625 | 1536 | uint8_t *top_border; |
5f7f9719 | 1537 | int top_idx = 1; |
6e3ef511 | 1538 | const int pixel_shift = h->pixel_shift; |
76741b0e BC |
1539 | int chroma444 = CHROMA444; |
1540 | int chroma422 = CHROMA422; | |
115329f1 | 1541 | |
e5d40372 | 1542 | src_y -= linesize; |
53c05b1e MN |
1543 | src_cb -= uvlinesize; |
1544 | src_cr -= uvlinesize; | |
1545 | ||
e5d40372 DB |
1546 | if (!simple && FRAME_MBAFF) { |
1547 | if (s->mb_y & 1) { | |
1548 | if (!MB_MBAFF) { | |
0b69d625 | 1549 | top_border = h->top_borders[0][s->mb_x]; |
e5d40372 | 1550 | AV_COPY128(top_border, src_y + 15 * linesize); |
6e3ef511 | 1551 | if (pixel_shift) |
e5d40372 DB |
1552 | AV_COPY128(top_border + 16, src_y + 15 * linesize + 16); |
1553 | if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) { | |
1554 | if (chroma444) { | |
1555 | if (pixel_shift) { | |
1556 | AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize); | |
1557 | AV_COPY128(top_border + 48, src_cb + 15 * uvlinesize + 16); | |
1558 | AV_COPY128(top_border + 64, src_cr + 15 * uvlinesize); | |
1559 | AV_COPY128(top_border + 80, src_cr + 15 * uvlinesize + 16); | |
c90b9442 | 1560 | } else { |
e5d40372 DB |
1561 | AV_COPY128(top_border + 16, src_cb + 15 * uvlinesize); |
1562 | AV_COPY128(top_border + 32, src_cr + 15 * uvlinesize); | |
c90b9442 | 1563 | } |
e5d40372 | 1564 | } else if (chroma422) { |
76741b0e | 1565 | if (pixel_shift) { |
e5d40372 DB |
1566 | AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize); |
1567 | AV_COPY128(top_border + 48, src_cr + 15 * uvlinesize); | |
76741b0e | 1568 | } else { |
e5d40372 DB |
1569 | AV_COPY64(top_border + 16, src_cb + 15 * uvlinesize); |
1570 | AV_COPY64(top_border + 24, src_cr + 15 * uvlinesize); | |
76741b0e | 1571 | } |
6e3ef511 | 1572 | } else { |
c90b9442 | 1573 | if (pixel_shift) { |
e5d40372 DB |
1574 | AV_COPY128(top_border + 32, src_cb + 7 * uvlinesize); |
1575 | AV_COPY128(top_border + 48, src_cr + 7 * uvlinesize); | |
c90b9442 | 1576 | } else { |
e5d40372 DB |
1577 | AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize); |
1578 | AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize); | |
c90b9442 | 1579 | } |
6e3ef511 | 1580 | } |
5f7f9719 MN |
1581 | } |
1582 | } | |
e5d40372 | 1583 | } else if (MB_MBAFF) { |
c988f975 | 1584 | top_idx = 0; |
e5d40372 | 1585 | } else |
c988f975 | 1586 | return; |
5f7f9719 MN |
1587 | } |
1588 | ||
0b69d625 | 1589 | top_border = h->top_borders[top_idx][s->mb_x]; |
09f21198 | 1590 | /* There are two lines saved, the line above the top macroblock |
e5d40372 DB |
1591 | * of a pair, and the line above the bottom macroblock. */ |
1592 | AV_COPY128(top_border, src_y + 16 * linesize); | |
6e3ef511 | 1593 | if (pixel_shift) |
e5d40372 DB |
1594 | AV_COPY128(top_border + 16, src_y + 16 * linesize + 16); |
1595 | ||
1596 | if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) { | |
1597 | if (chroma444) { | |
1598 | if (pixel_shift) { | |
1599 | AV_COPY128(top_border + 32, src_cb + 16 * linesize); | |
1600 | AV_COPY128(top_border + 48, src_cb + 16 * linesize + 16); | |
1601 | AV_COPY128(top_border + 64, src_cr + 16 * linesize); | |
1602 | AV_COPY128(top_border + 80, src_cr + 16 * linesize + 16); | |
c90b9442 | 1603 | } else { |
e5d40372 DB |
1604 | AV_COPY128(top_border + 16, src_cb + 16 * linesize); |
1605 | AV_COPY128(top_border + 32, src_cr + 16 * linesize); | |
c90b9442 | 1606 | } |
e5d40372 | 1607 | } else if (chroma422) { |
76741b0e | 1608 | if (pixel_shift) { |
e5d40372 DB |
1609 | AV_COPY128(top_border + 32, src_cb + 16 * uvlinesize); |
1610 | AV_COPY128(top_border + 48, src_cr + 16 * uvlinesize); | |
76741b0e | 1611 | } else { |
e5d40372 DB |
1612 | AV_COPY64(top_border + 16, src_cb + 16 * uvlinesize); |
1613 | AV_COPY64(top_border + 24, src_cr + 16 * uvlinesize); | |
76741b0e | 1614 | } |
6e3ef511 | 1615 | } else { |
c90b9442 | 1616 | if (pixel_shift) { |
e5d40372 DB |
1617 | AV_COPY128(top_border + 32, src_cb + 8 * uvlinesize); |
1618 | AV_COPY128(top_border + 48, src_cr + 8 * uvlinesize); | |
c90b9442 | 1619 | } else { |
e5d40372 DB |
1620 | AV_COPY64(top_border + 16, src_cb + 8 * uvlinesize); |
1621 | AV_COPY64(top_border + 24, src_cr + 8 * uvlinesize); | |
c90b9442 | 1622 | } |
6e3ef511 | 1623 | } |
53c05b1e MN |
1624 | } |
1625 | } | |
1626 | ||
bbdd52ed | 1627 | static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y, |
e5d40372 DB |
1628 | uint8_t *src_cb, uint8_t *src_cr, |
1629 | int linesize, int uvlinesize, | |
1630 | int xchg, int chroma444, | |
1631 | int simple, int pixel_shift) | |
1632 | { | |
1633 | MpegEncContext *const s = &h->s; | |
4e987f82 | 1634 | int deblock_topleft; |
b69378e2 | 1635 | int deblock_top; |
5f7f9719 | 1636 | int top_idx = 1; |
1e4f1c56 AS |
1637 | uint8_t *top_border_m1; |
1638 | uint8_t *top_border; | |
5f7f9719 | 1639 | |
e5d40372 DB |
1640 | if (!simple && FRAME_MBAFF) { |
1641 | if (s->mb_y & 1) { | |
1642 | if (!MB_MBAFF) | |
c988f975 | 1643 | return; |
e5d40372 | 1644 | } else { |
5f7f9719 MN |
1645 | top_idx = MB_MBAFF ? 0 : 1; |
1646 | } | |
5f7f9719 | 1647 | } |
b69378e2 | 1648 | |
e5d40372 | 1649 | if (h->deblocking_filter == 2) { |
4e987f82 RB |
1650 | deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num; |
1651 | deblock_top = h->top_type; | |
b69378e2 | 1652 | } else { |
4e987f82 RB |
1653 | deblock_topleft = (s->mb_x > 0); |
1654 | deblock_top = (s->mb_y > !!MB_FIELD); | |
b69378e2 | 1655 | } |
53c05b1e | 1656 | |
e5d40372 | 1657 | src_y -= linesize + 1 + pixel_shift; |
6e3ef511 OA |
1658 | src_cb -= uvlinesize + 1 + pixel_shift; |
1659 | src_cr -= uvlinesize + 1 + pixel_shift; | |
53c05b1e | 1660 | |
e5d40372 | 1661 | top_border_m1 = h->top_borders[top_idx][s->mb_x - 1]; |
1e4f1c56 AS |
1662 | top_border = h->top_borders[top_idx][s->mb_x]; |
1663 | ||
e5d40372 DB |
1664 | #define XCHG(a, b, xchg) \ |
1665 | if (pixel_shift) { \ | |
1666 | if (xchg) { \ | |
1667 | AV_SWAP64(b + 0, a + 0); \ | |
1668 | AV_SWAP64(b + 8, a + 8); \ | |
1669 | } else { \ | |
1670 | AV_COPY128(b, a); \ | |
1671 | } \ | |
1672 | } else if (xchg) \ | |
1673 | AV_SWAP64(b, a); \ | |
1674 | else \ | |
1675 | AV_COPY64(b, a); | |
1676 | ||
1677 | if (deblock_top) { | |
1678 | if (deblock_topleft) { | |
1679 | XCHG(top_border_m1 + (8 << pixel_shift), | |
1680 | src_y - (7 << pixel_shift), 1); | |
c988f975 | 1681 | } |
6e3ef511 OA |
1682 | XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg); |
1683 | XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1); | |
e5d40372 DB |
1684 | if (s->mb_x + 1 < s->mb_width) { |
1685 | XCHG(h->top_borders[top_idx][s->mb_x + 1], | |
1686 | src_y + (17 << pixel_shift), 1); | |
43efd19a | 1687 | } |
53c05b1e | 1688 | } |
e5d40372 DB |
1689 | if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) { |
1690 | if (chroma444) { | |
1691 | if (deblock_topleft) { | |
c90b9442 JGG |
1692 | XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
1693 | XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1); | |
1694 | } | |
1695 | XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg); | |
1696 | XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1); | |
1697 | XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg); | |
1698 | XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1); | |
e5d40372 DB |
1699 | if (s->mb_x + 1 < s->mb_width) { |
1700 | XCHG(h->top_borders[top_idx][s->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1); | |
1701 | XCHG(h->top_borders[top_idx][s->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1); | |
c90b9442 JGG |
1702 | } |
1703 | } else { | |
e5d40372 DB |
1704 | if (deblock_top) { |
1705 | if (deblock_topleft) { | |
c90b9442 JGG |
1706 | XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
1707 | XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); | |
1708 | } | |
e5d40372 DB |
1709 | XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1); |
1710 | XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1); | |
c988f975 | 1711 | } |
53c05b1e | 1712 | } |
53c05b1e MN |
1713 | } |
1714 | } | |
1715 | ||
e5d40372 DB |
1716 | static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, |
1717 | int index) | |
1718 | { | |
6e3ef511 | 1719 | if (high_bit_depth) { |
e5d40372 | 1720 | return AV_RN32A(((int32_t *)mb) + index); |
6e3ef511 OA |
1721 | } else |
1722 | return AV_RN16A(mb + index); | |
1723 | } | |
1724 | ||
e5d40372 DB |
1725 | static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, |
1726 | int index, int value) | |
1727 | { | |
6e3ef511 | 1728 | if (high_bit_depth) { |
e5d40372 | 1729 | AV_WN32A(((int32_t *)mb) + index, value); |
6e3ef511 OA |
1730 | } else |
1731 | AV_WN16A(mb + index, value); | |
1732 | } | |
1733 | ||
e5d40372 DB |
1734 | static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, |
1735 | int mb_type, int is_h264, | |
1736 | int simple, | |
1737 | int transform_bypass, | |
1738 | int pixel_shift, | |
1739 | int *block_offset, | |
1740 | int linesize, | |
1741 | uint8_t *dest_y, int p) | |
c90b9442 | 1742 | { |
e5d40372 | 1743 | MpegEncContext *const s = &h->s; |
c90b9442 JGG |
1744 | void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); |
1745 | void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); | |
1746 | int i; | |
e5d40372 DB |
1747 | int qscale = p == 0 ? s->qscale : h->chroma_qp[p - 1]; |
1748 | block_offset += 16 * p; | |
1749 | if (IS_INTRA4x4(mb_type)) { | |
1750 | if (simple || !s->encoding) { | |
1751 | if (IS_8x8DCT(mb_type)) { | |
1752 | if (transform_bypass) { | |
1753 | idct_dc_add = | |
1754 | idct_add = s->dsp.add_pixels8; | |
1755 | } else { | |
c90b9442 JGG |
1756 | idct_dc_add = h->h264dsp.h264_idct8_dc_add; |
1757 | idct_add = h->h264dsp.h264_idct8_add; | |
1758 | } | |
e5d40372 DB |
1759 | for (i = 0; i < 16; i += 4) { |
1760 | uint8_t *const ptr = dest_y + block_offset[i]; | |
1761 | const int dir = h->intra4x4_pred_mode_cache[scan8[i]]; | |
1762 | if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) { | |
1763 | h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
1764 | } else { | |
1765 | const int nnz = h->non_zero_count_cache[scan8[i + p * 16]]; | |
1766 | h->hpc.pred8x8l[dir](ptr, (h->topleft_samples_available << i) & 0x8000, | |
1767 | (h->topright_samples_available << i) & 0x4000, linesize); | |
1768 | if (nnz) { | |
1769 | if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) | |
1770 | idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
c90b9442 | 1771 | else |
e5d40372 | 1772 | idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
c90b9442 JGG |
1773 | } |
1774 | } | |
1775 | } | |
e5d40372 DB |
1776 | } else { |
1777 | if (transform_bypass) { | |
1778 | idct_dc_add = | |
1779 | idct_add = s->dsp.add_pixels4; | |
1780 | } else { | |
c90b9442 JGG |
1781 | idct_dc_add = h->h264dsp.h264_idct_dc_add; |
1782 | idct_add = h->h264dsp.h264_idct_add; | |
1783 | } | |
e5d40372 DB |
1784 | for (i = 0; i < 16; i++) { |
1785 | uint8_t *const ptr = dest_y + block_offset[i]; | |
1786 | const int dir = h->intra4x4_pred_mode_cache[scan8[i]]; | |
c90b9442 | 1787 | |
e5d40372 DB |
1788 | if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) { |
1789 | h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
1790 | } else { | |
c90b9442 JGG |
1791 | uint8_t *topright; |
1792 | int nnz, tr; | |
1793 | uint64_t tr_high; | |
e5d40372 DB |
1794 | if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) { |
1795 | const int topright_avail = (h->topright_samples_available << i) & 0x8000; | |
6371ce4b | 1796 | assert(s->mb_y || linesize <= block_offset[i]); |
e5d40372 | 1797 | if (!topright_avail) { |
c90b9442 | 1798 | if (pixel_shift) { |
e5d40372 DB |
1799 | tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL; |
1800 | topright = (uint8_t *)&tr_high; | |
c90b9442 | 1801 | } else { |
e5d40372 DB |
1802 | tr = ptr[3 - linesize] * 0x01010101u; |
1803 | topright = (uint8_t *)&tr; | |
c90b9442 | 1804 | } |
e5d40372 DB |
1805 | } else |
1806 | topright = ptr + (4 << pixel_shift) - linesize; | |
1807 | } else | |
1808 | topright = NULL; | |
1809 | ||
1810 | h->hpc.pred4x4[dir](ptr, topright, linesize); | |
1811 | nnz = h->non_zero_count_cache[scan8[i + p * 16]]; | |
1812 | if (nnz) { | |
1813 | if (is_h264) { | |
1814 | if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) | |
1815 | idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
c90b9442 | 1816 | else |
e5d40372 | 1817 | idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
301fb921 | 1818 | } else if (CONFIG_SVQ3_DECODER) |
e5d40372 | 1819 | ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, qscale, 0); |
c90b9442 JGG |
1820 | } |
1821 | } | |
1822 | } | |
1823 | } | |
1824 | } | |
e5d40372 DB |
1825 | } else { |
1826 | h->hpc.pred16x16[h->intra16x16_pred_mode](dest_y, linesize); | |
1827 | if (is_h264) { | |
1828 | if (h->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) { | |
1829 | if (!transform_bypass) | |
1830 | h->h264dsp.h264_luma_dc_dequant_idct(h->mb + (p * 256 << pixel_shift), | |
1831 | h->mb_luma_dc[p], | |
1832 | h->dequant4_coeff[p][qscale][0]); | |
1833 | else { | |
1834 | static const uint8_t dc_mapping[16] = { | |
1835 | 0 * 16, 1 * 16, 4 * 16, 5 * 16, | |
1836 | 2 * 16, 3 * 16, 6 * 16, 7 * 16, | |
1837 | 8 * 16, 9 * 16, 12 * 16, 13 * 16, | |
1838 | 10 * 16, 11 * 16, 14 * 16, 15 * 16 }; | |
1839 | for (i = 0; i < 16; i++) | |
1840 | dctcoef_set(h->mb + (p * 256 << pixel_shift), | |
1841 | pixel_shift, dc_mapping[i], | |
1842 | dctcoef_get(h->mb_luma_dc[p], | |
1843 | pixel_shift, i)); | |
c90b9442 JGG |
1844 | } |
1845 | } | |
301fb921 | 1846 | } else if (CONFIG_SVQ3_DECODER) |
e5d40372 DB |
1847 | ff_svq3_luma_dc_dequant_idct_c(h->mb + p * 256, |
1848 | h->mb_luma_dc[p], qscale); | |
c90b9442 JGG |
1849 | } |
1850 | } | |
1851 | ||
e5d40372 DB |
1852 | static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, |
1853 | int is_h264, int simple, | |
1854 | int transform_bypass, | |
1855 | int pixel_shift, | |
1856 | int *block_offset, | |
1857 | int linesize, | |
1858 | uint8_t *dest_y, int p) | |
c90b9442 | 1859 | { |
e5d40372 | 1860 | MpegEncContext *const s = &h->s; |
c90b9442 JGG |
1861 | void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); |
1862 | int i; | |
e5d40372 DB |
1863 | block_offset += 16 * p; |
1864 | if (!IS_INTRA4x4(mb_type)) { | |
1865 | if (is_h264) { | |
1866 | if (IS_INTRA16x16(mb_type)) { | |
1867 | if (transform_bypass) { | |
1868 | if (h->sps.profile_idc == 244 && | |
1869 | (h->intra16x16_pred_mode == VERT_PRED8x8 || | |
1870 | h->intra16x16_pred_mode == HOR_PRED8x8)) { | |
1871 | h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, | |
1872 | h->mb + (p * 256 << pixel_shift), | |
1873 | linesize); | |
1874 | } else { | |
1875 | for (i = 0; i < 16; i++) | |
1876 | if (h->non_zero_count_cache[scan8[i + p * 16]] || | |
1877 | dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) | |
1878 | s->dsp.add_pixels4(dest_y + block_offset[i], | |
1879 | h->mb + (i * 16 + p * 256 << pixel_shift), | |
1880 | linesize); | |
c90b9442 | 1881 | } |
e5d40372 DB |
1882 | } else { |
1883 | h->h264dsp.h264_idct_add16intra(dest_y, block_offset, | |
1884 | h->mb + (p * 256 << pixel_shift), | |
1885 | linesize, | |
1886 | h->non_zero_count_cache + p * 5 * 8); | |
c90b9442 | 1887 | } |
e5d40372 DB |
1888 | } else if (h->cbp & 15) { |
1889 | if (transform_bypass) { | |
c90b9442 | 1890 | const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
e5d40372 DB |
1891 | idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 |
1892 | : s->dsp.add_pixels4; | |
1893 | for (i = 0; i < 16; i += di) | |
1894 | if (h->non_zero_count_cache[scan8[i + p * 16]]) | |
1895 | idct_add(dest_y + block_offset[i], | |
1896 | h->mb + (i * 16 + p * 256 << pixel_shift), | |
1897 | linesize); | |
1898 | } else { | |
1899 | if (IS_8x8DCT(mb_type)) | |
1900 | h->h264dsp.h264_idct8_add4(dest_y, block_offset, | |
1901 | h->mb + (p * 256 << pixel_shift), | |
1902 | linesize, | |
1903 | h->non_zero_count_cache + p * 5 * 8); | |
1904 | else | |
1905 | h->h264dsp.h264_idct_add16(dest_y, block_offset, | |
1906 | h->mb + (p * 256 << pixel_shift), | |
1907 | linesize, | |
1908 | h->non_zero_count_cache + p * 5 * 8); | |
c90b9442 JGG |
1909 | } |
1910 | } | |
301fb921 | 1911 | } else if (CONFIG_SVQ3_DECODER) { |
e5d40372 DB |
1912 | for (i = 0; i < 16; i++) |
1913 | if (h->non_zero_count_cache[scan8[i + p * 16]] || h->mb[i * 16 + p * 256]) { | |
1914 | // FIXME benchmark weird rule, & below | |
1915 | uint8_t *const ptr = dest_y + block_offset[i]; | |
1916 | ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, | |
1917 | s->qscale, IS_INTRA(mb_type) ? 1 : 0); | |
c90b9442 | 1918 | } |
c90b9442 JGG |
1919 | } |
1920 | } | |
1921 | } | |
1922 | ||
28fff0d9 MR |
1923 | #define BITS 8 |
1924 | #define SIMPLE 1 | |
1925 | #include "h264_mb_template.c" | |
e5d40372 | 1926 | |
28fff0d9 MR |
1927 | #undef BITS |
1928 | #define BITS 16 | |
1929 | #include "h264_mb_template.c" | |
bd91fee3 | 1930 | |
28fff0d9 MR |
1931 | #undef SIMPLE |
1932 | #define SIMPLE 0 | |
1933 | #include "h264_mb_template.c" | |
c90b9442 | 1934 | |
e5d40372 DB |
1935 | void ff_h264_hl_decode_mb(H264Context *h) |
1936 | { | |
1937 | MpegEncContext *const s = &h->s; | |
1938 | const int mb_xy = h->mb_xy; | |
657ccb5a | 1939 | const int mb_type = s->current_picture.f.mb_type[mb_xy]; |
e5d40372 | 1940 | int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; |
bd91fee3 | 1941 | |
c90b9442 | 1942 | if (CHROMA444) { |
e5d40372 | 1943 | if (is_complex || h->pixel_shift) |
c90b9442 JGG |
1944 | hl_decode_mb_444_complex(h); |
1945 | else | |
28fff0d9 | 1946 | hl_decode_mb_444_simple_8(h); |
c90b9442 | 1947 | } else if (is_complex) { |
bd91fee3 | 1948 | hl_decode_mb_complex(h); |
6e3ef511 OA |
1949 | } else if (h->pixel_shift) { |
1950 | hl_decode_mb_simple_16(h); | |
1951 | } else | |
1952 | hl_decode_mb_simple_8(h); | |
bd91fee3 AS |
1953 | } |
1954 | ||
e5d40372 DB |
1955 | static int pred_weight_table(H264Context *h) |
1956 | { | |
1957 | MpegEncContext *const s = &h->s; | |
0da71265 | 1958 | int list, i; |
9f2d1b4f | 1959 | int luma_def, chroma_def; |
115329f1 | 1960 | |
e5d40372 DB |
1961 | h->use_weight = 0; |
1962 | h->use_weight_chroma = 0; | |
1963 | h->luma_log2_weight_denom = get_ue_golomb(&s->gb); | |
1964 | if (h->sps.chroma_format_idc) | |
1965 | h->chroma_log2_weight_denom = get_ue_golomb(&s->gb); | |
1966 | luma_def = 1 << h->luma_log2_weight_denom; | |
1967 | chroma_def = 1 << h->chroma_log2_weight_denom; | |
0da71265 | 1968 | |
e5d40372 | 1969 | for (list = 0; list < 2; list++) { |
cb99c652 GB |
1970 | h->luma_weight_flag[list] = 0; |
1971 | h->chroma_weight_flag[list] = 0; | |
e5d40372 | 1972 | for (i = 0; i < h->ref_count[list]; i++) { |
0da71265 | 1973 | int luma_weight_flag, chroma_weight_flag; |
115329f1 | 1974 | |
e5d40372 DB |
1975 | luma_weight_flag = get_bits1(&s->gb); |
1976 | if (luma_weight_flag) { | |
1977 | h->luma_weight[i][list][0] = get_se_golomb(&s->gb); | |
1978 | h->luma_weight[i][list][1] = get_se_golomb(&s->gb); | |
1979 | if (h->luma_weight[i][list][0] != luma_def || | |
1980 | h->luma_weight[i][list][1] != 0) { | |
1981 | h->use_weight = 1; | |
1982 | h->luma_weight_flag[list] = 1; | |
cb99c652 | 1983 | } |
e5d40372 DB |
1984 | } else { |
1985 | h->luma_weight[i][list][0] = luma_def; | |
1986 | h->luma_weight[i][list][1] = 0; | |
0da71265 MN |
1987 | } |
1988 | ||
e5d40372 DB |
1989 | if (h->sps.chroma_format_idc) { |
1990 | chroma_weight_flag = get_bits1(&s->gb); | |
1991 | if (chroma_weight_flag) { | |
fef744d4 | 1992 | int j; |
e5d40372 DB |
1993 | for (j = 0; j < 2; j++) { |
1994 | h->chroma_weight[i][list][j][0] = get_se_golomb(&s->gb); | |
1995 | h->chroma_weight[i][list][j][1] = get_se_golomb(&s->gb); | |
1996 | if (h->chroma_weight[i][list][j][0] != chroma_def || | |
1997 | h->chroma_weight[i][list][j][1] != 0) { | |
1998 | h->use_weight_chroma = 1; | |
1999 | h->chroma_weight_flag[list] = 1; | |
cb99c652 | 2000 | } |
fef744d4 | 2001 | } |
e5d40372 | 2002 | } else { |
fef744d4 | 2003 | int j; |
e5d40372 DB |
2004 | for (j = 0; j < 2; j++) { |
2005 | h->chroma_weight[i][list][j][0] = chroma_def; | |
2006 | h->chroma_weight[i][list][j][1] = 0; | |
fef744d4 | 2007 | } |
0da71265 MN |
2008 | } |
2009 | } | |
2010 | } | |
e5d40372 DB |
2011 | if (h->slice_type_nos != AV_PICTURE_TYPE_B) |
2012 | break; | |
0da71265 | 2013 | } |
e5d40372 | 2014 | h->use_weight = h->use_weight || h->use_weight_chroma; |
0da71265 MN |
2015 | return 0; |
2016 | } | |
2017 | ||
1052b76f MN |
2018 | /** |
2019 | * Initialize implicit_weight table. | |
6da88bd3 | 2020 | * @param field 0/1 initialize the weight for interlaced MBAFF |
1052b76f MN |
2021 | * -1 initializes the rest |
2022 | */ | |
e5d40372 DB |
2023 | static void implicit_weight_table(H264Context *h, int field) |
2024 | { | |
2025 | MpegEncContext *const s = &h->s; | |
1052b76f | 2026 | int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1; |
9f2d1b4f | 2027 | |
ce09f927 GB |
2028 | for (i = 0; i < 2; i++) { |
2029 | h->luma_weight_flag[i] = 0; | |
2030 | h->chroma_weight_flag[i] = 0; | |
2031 | } | |
2032 | ||
e5d40372 | 2033 | if (field < 0) { |
4418aa9c RB |
2034 | if (s->picture_structure == PICT_FRAME) { |
2035 | cur_poc = s->current_picture_ptr->poc; | |
2036 | } else { | |
2037 | cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1]; | |
2038 | } | |
e5d40372 DB |
2039 | if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF && |
2040 | h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) { | |
2041 | h->use_weight = 0; | |
2042 | h->use_weight_chroma = 0; | |
2043 | return; | |
2044 | } | |
2045 | ref_start = 0; | |
2046 | ref_count0 = h->ref_count[0]; | |
2047 | ref_count1 = h->ref_count[1]; | |
2048 | } else { | |
2049 | cur_poc = s->current_picture_ptr->field_poc[field]; | |
2050 | ref_start = 16; | |
2051 | ref_count0 = 16 + 2 * h->ref_count[0]; | |
2052 | ref_count1 = 16 + 2 * h->ref_count[1]; | |
1052b76f | 2053 | } |
9f2d1b4f | 2054 | |
e5d40372 DB |
2055 | h->use_weight = 2; |
2056 | h->use_weight_chroma = 2; | |
2057 | h->luma_log2_weight_denom = 5; | |
2058 | h->chroma_log2_weight_denom = 5; | |
9f2d1b4f | 2059 | |
e5d40372 | 2060 | for (ref0 = ref_start; ref0 < ref_count0; ref0++) { |
9f2d1b4f | 2061 | int poc0 = h->ref_list[0][ref0].poc; |
e5d40372 | 2062 | for (ref1 = ref_start; ref1 < ref_count1; ref1++) { |
87cf70eb JD |
2063 | int w = 32; |
2064 | if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) { | |
2065 | int poc1 = h->ref_list[1][ref1].poc; | |
e5d40372 DB |
2066 | int td = av_clip(poc1 - poc0, -128, 127); |
2067 | if (td) { | |
87cf70eb JD |
2068 | int tb = av_clip(cur_poc - poc0, -128, 127); |
2069 | int tx = (16384 + (FFABS(td) >> 1)) / td; | |
e5d40372 DB |
2070 | int dist_scale_factor = (tb * tx + 32) >> 8; |
2071 | if (dist_scale_factor >= -64 && dist_scale_factor <= 128) | |
87cf70eb JD |
2072 | w = 64 - dist_scale_factor; |
2073 | } | |
1052b76f | 2074 | } |
e5d40372 DB |
2075 | if (field < 0) { |
2076 | h->implicit_weight[ref0][ref1][0] = | |
2077 | h->implicit_weight[ref0][ref1][1] = w; | |
2078 | } else { | |
2079 | h->implicit_weight[ref0][ref1][field] = w; | |
72f86ec0 | 2080 | } |
9f2d1b4f LM |
2081 | } |
2082 | } | |
2083 | } | |
2084 | ||
8fd57a66 | 2085 | /** |
5175b937 | 2086 | * instantaneous decoder refresh. |
0da71265 | 2087 | */ |
e5d40372 DB |
2088 | static void idr(H264Context *h) |
2089 | { | |
ea6f00c4 | 2090 | ff_h264_remove_all_refs(h); |
e5d40372 DB |
2091 | h->prev_frame_num = 0; |
2092 | h->prev_frame_num_offset = 0; | |
2093 | h->prev_poc_msb = | |
2094 | h->prev_poc_lsb = 0; | |
0da71265 MN |
2095 | } |
2096 | ||
7c33ad19 | 2097 | /* forget old pics after a seek */ |
e5d40372 DB |
2098 | static void flush_dpb(AVCodecContext *avctx) |
2099 | { | |
2100 | H264Context *h = avctx->priv_data; | |
7c33ad19 | 2101 | int i; |
e5d40372 DB |
2102 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) { |
2103 | if (h->delayed_pic[i]) | |
657ccb5a | 2104 | h->delayed_pic[i]->f.reference = 0; |
e5d40372 | 2105 | h->delayed_pic[i] = NULL; |
285b570f | 2106 | } |
adedd840 RB |
2107 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) |
2108 | h->last_pocs[i] = INT_MIN; | |
e5d40372 | 2109 | h->outputed_poc = h->next_outputed_poc = INT_MIN; |
b19d493f | 2110 | h->prev_interlaced_frame = 1; |
7c33ad19 | 2111 | idr(h); |
e5d40372 | 2112 | if (h->s.current_picture_ptr) |
657ccb5a | 2113 | h->s.current_picture_ptr->f.reference = 0; |
e5d40372 | 2114 | h->s.first_field = 0; |
9c095463 | 2115 | ff_h264_reset_sei(h); |
e240f898 | 2116 | ff_mpeg_flush(avctx); |
7c33ad19 LM |
2117 | } |
2118 | ||
e5d40372 DB |
2119 | static int init_poc(H264Context *h) |
2120 | { | |
2121 | MpegEncContext *const s = &h->s; | |
2122 | const int max_frame_num = 1 << h->sps.log2_max_frame_num; | |
0da71265 | 2123 | int field_poc[2]; |
357282c6 | 2124 | Picture *cur = s->current_picture_ptr; |
0da71265 | 2125 | |
e5d40372 DB |
2126 | h->frame_num_offset = h->prev_frame_num_offset; |
2127 | if (h->frame_num < h->prev_frame_num) | |
b78a6baa | 2128 | h->frame_num_offset += max_frame_num; |
0da71265 | 2129 | |
e5d40372 DB |
2130 | if (h->sps.poc_type == 0) { |
2131 | const int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb; | |
0da71265 | 2132 | |
e5d40372 | 2133 | if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2) |
0da71265 | 2134 | h->poc_msb = h->prev_poc_msb + max_poc_lsb; |
e5d40372 | 2135 | else if (h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb / 2) |
0da71265 MN |
2136 | h->poc_msb = h->prev_poc_msb - max_poc_lsb; |
2137 | else | |
2138 | h->poc_msb = h->prev_poc_msb; | |
115329f1 | 2139 | field_poc[0] = |
0da71265 | 2140 | field_poc[1] = h->poc_msb + h->poc_lsb; |
e5d40372 | 2141 | if (s->picture_structure == PICT_FRAME) |
0da71265 | 2142 | field_poc[1] += h->delta_poc_bottom; |
e5d40372 | 2143 | } else if (h->sps.poc_type == 1) { |
0da71265 MN |
2144 | int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc; |
2145 | int i; | |
2146 | ||
e5d40372 | 2147 | if (h->sps.poc_cycle_length != 0) |
0da71265 MN |
2148 | abs_frame_num = h->frame_num_offset + h->frame_num; |
2149 | else | |
2150 | abs_frame_num = 0; | |
2151 | ||
e5d40372 | 2152 | if (h->nal_ref_idc == 0 && abs_frame_num > 0) |
0da71265 | 2153 | abs_frame_num--; |
115329f1 | 2154 | |
0da71265 | 2155 | expected_delta_per_poc_cycle = 0; |
e5d40372 DB |
2156 | for (i = 0; i < h->sps.poc_cycle_length; i++) |
2157 | // FIXME integrate during sps parse | |
2158 | expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[i]; | |
0da71265 | 2159 | |
e5d40372 | 2160 | if (abs_frame_num > 0) { |
0da71265 MN |
2161 | int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length; |
2162 | int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length; | |
2163 | ||
2164 | expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle; | |
e5d40372 DB |
2165 | for (i = 0; i <= frame_num_in_poc_cycle; i++) |
2166 | expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[i]; | |
0da71265 MN |
2167 | } else |
2168 | expectedpoc = 0; | |
2169 | ||
e5d40372 | 2170 | if (h->nal_ref_idc == 0) |
0da71265 | 2171 | expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic; |
115329f1 | 2172 | |
0da71265 MN |
2173 | field_poc[0] = expectedpoc + h->delta_poc[0]; |
2174 | field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field; | |
2175 | ||
e5d40372 | 2176 | if (s->picture_structure == PICT_FRAME) |
0da71265 | 2177 | field_poc[1] += h->delta_poc[1]; |
e5d40372 DB |
2178 | } else { |
2179 | int poc = 2 * (h->frame_num_offset + h->frame_num); | |
5710b371 | 2180 | |
e5d40372 | 2181 | if (!h->nal_ref_idc) |
b78a6baa | 2182 | poc--; |
5710b371 | 2183 | |
e5d40372 DB |
2184 | field_poc[0] = poc; |
2185 | field_poc[1] = poc; | |
0da71265 | 2186 | } |
115329f1 | 2187 | |
e5d40372 DB |
2188 | if (s->picture_structure != PICT_BOTTOM_FIELD) |
2189 | s->current_picture_ptr->field_poc[0] = field_poc[0]; | |
2190 | if (s->picture_structure != PICT_TOP_FIELD) | |
2191 | s->current_picture_ptr->field_poc[1] = field_poc[1]; | |
2192 | cur->poc = FFMIN(cur->field_poc[0], cur->field_poc[1]); | |
0da71265 MN |
2193 | |
2194 | return 0; | |
2195 | } | |
2196 | ||
b41c1db3 AÖ |
2197 | /** |
2198 | * initialize scan tables | |
2199 | */ | |
e5d40372 DB |
2200 | static void init_scan_tables(H264Context *h) |
2201 | { | |
b41c1db3 | 2202 | int i; |
e5d40372 DB |
2203 | for (i = 0; i < 16; i++) { |
2204 | #define T(x) (x >> 2) | ((x << 2) & 0xF) | |
ca32f7f2 | 2205 | h->zigzag_scan[i] = T(zigzag_scan[i]); |
e5d40372 | 2206 | h->field_scan[i] = T(field_scan[i]); |
b41c1db3 | 2207 | #undef T |
b41c1db3 | 2208 | } |
e5d40372 DB |
2209 | for (i = 0; i < 64; i++) { |
2210 | #define T(x) (x >> 3) | ((x & 7) << 3) | |
ca32f7f2 JGG |
2211 | h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); |
2212 | h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); | |
2213 | h->field_scan8x8[i] = T(field_scan8x8[i]); | |
2214 | h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); | |
b41c1db3 | 2215 | #undef T |
b41c1db3 | 2216 | } |
e5d40372 | 2217 | if (h->sps.transform_bypass) { // FIXME same ugly |
b41c1db3 | 2218 | h->zigzag_scan_q0 = zigzag_scan; |
45beb850 | 2219 | h->zigzag_scan8x8_q0 = ff_zigzag_direct; |
b41c1db3 AÖ |
2220 | h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; |
2221 | h->field_scan_q0 = field_scan; | |
2222 | h->field_scan8x8_q0 = field_scan8x8; | |
2223 | h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc; | |
e5d40372 | 2224 | } else { |
b41c1db3 AÖ |
2225 | h->zigzag_scan_q0 = h->zigzag_scan; |
2226 | h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; | |
2227 | h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; | |
2228 | h->field_scan_q0 = h->field_scan; | |
2229 | h->field_scan8x8_q0 = h->field_scan8x8; | |
2230 | h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; | |
2231 | } | |
2232 | } | |
afebe2f7 | 2233 | |
e5d40372 DB |
2234 | static int field_end(H264Context *h, int in_setup) |
2235 | { | |
2236 | MpegEncContext *const s = &h->s; | |
2237 | AVCodecContext *const avctx = s->avctx; | |
12fe7594 | 2238 | int err = 0; |
e5d40372 | 2239 | s->mb_y = 0; |
256299d3 | 2240 | |
ba0c8981 | 2241 | if (!in_setup && !s->droppable) |
1e26a48f | 2242 | ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, |
47c0ac96 | 2243 | s->picture_structure == PICT_BOTTOM_FIELD); |
256299d3 | 2244 | |
e5d40372 DB |
2245 | if (CONFIG_H264_VDPAU_DECODER && |
2246 | s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) | |
256299d3 MN |
2247 | ff_vdpau_h264_set_reference_frames(s); |
2248 | ||
e5d40372 | 2249 | if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) { |
ba0c8981 | 2250 | if (!s->droppable) { |
12fe7594 | 2251 | err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
e5d40372 DB |
2252 | h->prev_poc_msb = h->poc_msb; |
2253 | h->prev_poc_lsb = h->poc_lsb; | |
6a9c8594 | 2254 | } |
e5d40372 DB |
2255 | h->prev_frame_num_offset = h->frame_num_offset; |
2256 | h->prev_frame_num = h->frame_num; | |
2257 | h->outputed_poc = h->next_outputed_poc; | |
256299d3 | 2258 | } |
256299d3 MN |
2259 | |
2260 | if (avctx->hwaccel) { | |
2261 | if (avctx->hwaccel->end_frame(avctx) < 0) | |
e5d40372 DB |
2262 | av_log(avctx, AV_LOG_ERROR, |
2263 | "hardware accelerator failed to decode picture\n"); | |
256299d3 MN |
2264 | } |
2265 | ||
e5d40372 DB |
2266 | if (CONFIG_H264_VDPAU_DECODER && |
2267 | s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) | |
256299d3 MN |
2268 | ff_vdpau_h264_picture_complete(s); |
2269 | ||
2270 | /* | |
2271 | * FIXME: Error handling code does not seem to support interlaced | |
2272 | * when slices span multiple rows | |
2273 | * The ff_er_add_slice calls don't work right for bottom | |
2274 | * fields; they cause massive erroneous error concealing | |
2275 | * Error marking covers both fields (top and bottom). | |
2276 | * This causes a mismatched s->error_count | |
2277 | * and a bad error table. Further, the error count goes to | |
2278 | * INT_MAX when called for bottom field, because mb_y is | |
2279 | * past end by one (callers fault) and resync_mb_y != 0 | |
2280 | * causes problems for the first MB line, too. | |
2281 | */ | |
2282 | if (!FIELD_PICTURE) | |
2283 | ff_er_frame_end(s); | |
2284 | ||
efd29844 | 2285 | ff_MPV_frame_end(s); |
d225a1e2 | 2286 | |
e5d40372 | 2287 | h->current_slice = 0; |
12fe7594 DB |
2288 | |
2289 | return err; | |
256299d3 MN |
2290 | } |
2291 | ||
afebe2f7 | 2292 | /** |
49bd8e4b | 2293 | * Replicate H264 "master" context to thread contexts. |
afebe2f7 | 2294 | */ |
f1d8763a | 2295 | static int clone_slice(H264Context *dst, H264Context *src) |
afebe2f7 | 2296 | { |
f1d8763a JG |
2297 | int ret; |
2298 | ||
e5d40372 DB |
2299 | memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset)); |
2300 | dst->s.current_picture_ptr = src->s.current_picture_ptr; | |
2301 | dst->s.current_picture = src->s.current_picture; | |
2302 | dst->s.linesize = src->s.linesize; | |
2303 | dst->s.uvlinesize = src->s.uvlinesize; | |
2304 | dst->s.first_field = src->s.first_field; | |
2305 | ||
f1d8763a JG |
2306 | if (!dst->s.edge_emu_buffer && |
2307 | (ret = ff_mpv_frame_size_alloc(&dst->s, dst->s.linesize))) { | |
2308 | av_log(dst->s.avctx, AV_LOG_ERROR, | |
2309 | "Failed to allocate scratch buffers\n"); | |
2310 | return ret; | |
2311 | } | |
2312 | ||
e5d40372 DB |
2313 | dst->prev_poc_msb = src->prev_poc_msb; |
2314 | dst->prev_poc_lsb = src->prev_poc_lsb; | |
2315 | dst->prev_frame_num_offset = src->prev_frame_num_offset; | |
2316 | dst->prev_frame_num = src->prev_frame_num; | |
2317 | dst->short_ref_count = src->short_ref_count; | |
afebe2f7 AÖ |
2318 | |
2319 | memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref)); | |
2320 | memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref)); | |
2321 | memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list)); | |
2322 | memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list)); | |
50c21814 AÖ |
2323 | |
2324 | memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff)); | |
2325 | memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff)); | |
f1d8763a JG |
2326 | |
2327 | return 0; | |
afebe2f7 AÖ |
2328 | } |
2329 | ||
0da71265 | 2330 | /** |
58c42af7 | 2331 | * Compute profile from profile_idc and constraint_set?_flags. |
fe9a3fbe JG |
2332 | * |
2333 | * @param sps SPS | |
2334 | * | |
2335 | * @return profile as defined by FF_PROFILE_H264_* | |
2336 | */ | |
2337 | int ff_h264_get_profile(SPS *sps) | |
2338 | { | |
2339 | int profile = sps->profile_idc; | |
2340 | ||
e5d40372 | 2341 | switch (sps->profile_idc) { |
fe9a3fbe JG |
2342 | case FF_PROFILE_H264_BASELINE: |
2343 | // constraint_set1_flag set to 1 | |
e5d40372 | 2344 | profile |= (sps->constraint_set_flags & 1 << 1) ? FF_PROFILE_H264_CONSTRAINED : 0; |
fe9a3fbe JG |
2345 | break; |
2346 | case FF_PROFILE_H264_HIGH_10: | |
2347 | case FF_PROFILE_H264_HIGH_422: | |
2348 | case FF_PROFILE_H264_HIGH_444_PREDICTIVE: | |
2349 | // constraint_set3_flag set to 1 | |
e5d40372 | 2350 | profile |= (sps->constraint_set_flags & 1 << 3) ? FF_PROFILE_H264_INTRA : 0; |
fe9a3fbe JG |
2351 | break; |
2352 | } | |
2353 | ||
2354 | return profile; | |
2355 | } | |
2356 | ||
072be3e8 JG |
2357 | static int h264_set_parameter_from_sps(H264Context *h) |
2358 | { | |
2359 | MpegEncContext *s = &h->s; | |
2360 | ||
2361 | if (s->flags & CODEC_FLAG_LOW_DELAY || | |
2362 | (h->sps.bitstream_restriction_flag && | |
2363 | !h->sps.num_reorder_frames)) { | |
2364 | if (s->avctx->has_b_frames > 1 || h->delayed_pic[0]) | |
2365 | av_log(h->s.avctx, AV_LOG_WARNING, "Delayed frames seen. " | |
2366 | "Reenabling low delay requires a codec flush.\n"); | |
2367 | else | |
2368 | s->low_delay = 1; | |
2369 | } | |
2370 | ||
2371 | if (s->avctx->has_b_frames < 2) | |
2372 | s->avctx->has_b_frames = !s->low_delay; | |
2373 | ||
2374 | if (s->avctx->bits_per_raw_sample != h->sps.bit_depth_luma || | |
2375 | h->cur_chroma_format_idc != h->sps.chroma_format_idc) { | |
2376 | if (s->avctx->codec && | |
2377 | s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU && | |
2378 | (h->sps.bit_depth_luma != 8 || h->sps.chroma_format_idc > 1)) { | |
2379 | av_log(s->avctx, AV_LOG_ERROR, | |
2380 | "VDPAU decoding does not support video colorspace.\n"); | |
2381 | return AVERROR_INVALIDDATA; | |
2382 | } | |
2383 | if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) { | |
2384 | s->avctx->bits_per_raw_sample = h->sps.bit_depth_luma; | |
2385 | h->cur_chroma_format_idc = h->sps.chroma_format_idc; | |
2386 | h->pixel_shift = h->sps.bit_depth_luma > 8; | |
2387 | ||
2388 | ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, | |
2389 | h->sps.chroma_format_idc); | |
2390 | ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, | |
2391 | h->sps.chroma_format_idc); | |
2392 | s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16; | |
2393 | ff_dsputil_init(&s->dsp, s->avctx); | |
2394 | } else { | |
2395 | av_log(s->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", | |
2396 | h->sps.bit_depth_luma); | |
2397 | return AVERROR_INVALIDDATA; | |
2398 | } | |
2399 | } | |
2400 | return 0; | |
2401 | } | |
2402 | ||
fe9a3fbe | 2403 | /** |
58c42af7 | 2404 | * Decode a slice header. |
efd29844 | 2405 | * This will also call ff_MPV_common_init() and frame_start() as needed. |
afebe2f7 AÖ |
2406 | * |
2407 | * @param h h264context | |
e5d40372 DB |
2408 | * @param h0 h264 master context (differs from 'h' when doing sliced based |
2409 | * parallel decoding) | |
afebe2f7 | 2410 | * |
d9526386 | 2411 | * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded |
0da71265 | 2412 | */ |
e5d40372 DB |
2413 | static int decode_slice_header(H264Context *h, H264Context *h0) |
2414 | { | |
2415 | MpegEncContext *const s = &h->s; | |
2416 | MpegEncContext *const s0 = &h0->s; | |
88e7a4d1 | 2417 | unsigned int first_mb_in_slice; |
ac658be5 | 2418 | unsigned int pps_id; |
072be3e8 | 2419 | int num_ref_idx_active_override_flag, max_refs, ret; |
41f5c62f | 2420 | unsigned int slice_type, tmp, i, j; |
0bf79634 | 2421 | int default_ref_list_done = 0; |
ba0c8981 | 2422 | int last_pic_structure, last_pic_droppable; |
0da71265 | 2423 | |
9a0dda8b | 2424 | /* FIXME: 2tap qpel isn't implemented for high bit depth. */ |
e5d40372 DB |
2425 | if ((s->avctx->flags2 & CODEC_FLAG2_FAST) && |
2426 | !h->nal_ref_idc && !h->pixel_shift) { | |
2427 | s->me.qpel_put = s->dsp.put_2tap_qpel_pixels_tab; | |
2428 | s->me.qpel_avg = s->dsp.avg_2tap_qpel_pixels_tab; | |
2429 | } else { | |
2430 | s->me.qpel_put = s->dsp.put_h264_qpel_pixels_tab; | |
2431 | s->me.qpel_avg = s->dsp.avg_h264_qpel_pixels_tab; | |
cf653d08 JD |
2432 | } |
2433 | ||
e5d40372 | 2434 | first_mb_in_slice = get_ue_golomb(&s->gb); |
0da71265 | 2435 | |
e5d40372 DB |
2436 | if (first_mb_in_slice == 0) { // FIXME better field boundary detection |
2437 | if (h0->current_slice && FIELD_PICTURE) { | |
6a9c8594 | 2438 | field_end(h, 1); |
d225a1e2 MN |
2439 | } |
2440 | ||
afebe2f7 | 2441 | h0->current_slice = 0; |
1e26a48f | 2442 | if (!s0->first_field) { |
ba0c8981 | 2443 | if (s->current_picture_ptr && !s->droppable && |
1e26a48f RB |
2444 | s->current_picture_ptr->owner2 == s) { |
2445 | ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, | |
2446 | s->picture_structure == PICT_BOTTOM_FIELD); | |
2447 | } | |
e5d40372 | 2448 | s->current_picture_ptr = NULL; |
1e26a48f | 2449 | } |
66a4b2c1 MN |
2450 | } |
2451 | ||
e5d40372 DB |
2452 | slice_type = get_ue_golomb_31(&s->gb); |
2453 | if (slice_type > 9) { | |
2454 | av_log(h->s.avctx, AV_LOG_ERROR, | |
2455 | "slice type too large (%d) at %d %d\n", | |
2456 | h->slice_type, s->mb_x, s->mb_y); | |
5175b937 | 2457 | return -1; |
0da71265 | 2458 | } |
e5d40372 | 2459 | if (slice_type > 4) { |
0bf79634 | 2460 | slice_type -= 5; |
e5d40372 DB |
2461 | h->slice_type_fixed = 1; |
2462 | } else | |
2463 | h->slice_type_fixed = 0; | |
115329f1 | 2464 | |
e5d40372 DB |
2465 | slice_type = golomb_to_pict_type[slice_type]; |
2466 | if (slice_type == AV_PICTURE_TYPE_I || | |
2467 | (h0->current_slice != 0 && slice_type == h0->last_slice_type)) { | |
0bf79634 LLL |
2468 | default_ref_list_done = 1; |
2469 | } | |
e5d40372 DB |
2470 | h->slice_type = slice_type; |
2471 | h->slice_type_nos = slice_type & 3; | |
0bf79634 | 2472 | |
e5d40372 DB |
2473 | // to make a few old functions happy, it's wrong though |
2474 | s->pict_type = h->slice_type; | |
115329f1 | 2475 | |
e5d40372 DB |
2476 | pps_id = get_ue_golomb(&s->gb); |
2477 | if (pps_id >= MAX_PPS_COUNT) { | |
9b879566 | 2478 | av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n"); |
0da71265 MN |
2479 | return -1; |
2480 | } | |
e5d40372 DB |
2481 | if (!h0->pps_buffers[pps_id]) { |
2482 | av_log(h->s.avctx, AV_LOG_ERROR, | |
2483 | "non-existing PPS %u referenced\n", | |
2484 | pps_id); | |
8b92b792 MN |
2485 | return -1; |
2486 | } | |
e5d40372 | 2487 | h->pps = *h0->pps_buffers[pps_id]; |
8b92b792 | 2488 | |
e5d40372 DB |
2489 | if (!h0->sps_buffers[h->pps.sps_id]) { |
2490 | av_log(h->s.avctx, AV_LOG_ERROR, | |
2491 | "non-existing SPS %u referenced\n", | |
2492 | h->pps.sps_id); | |
8b92b792 MN |
2493 | return -1; |
2494 | } | |
072be3e8 JG |
2495 | |
2496 | if (h->pps.sps_id != h->current_sps_id || | |
2497 | h0->sps_buffers[h->pps.sps_id]->new) { | |
2498 | h0->sps_buffers[h->pps.sps_id]->new = 0; | |
2499 | ||
2500 | h->current_sps_id = h->pps.sps_id; | |
2501 | h->sps = *h0->sps_buffers[h->pps.sps_id]; | |
2502 | ||
2503 | if ((ret = h264_set_parameter_from_sps(h)) < 0) | |
2504 | return ret; | |
2505 | } | |
239ea04c | 2506 | |
fe9a3fbe | 2507 | s->avctx->profile = ff_h264_get_profile(&h->sps); |
fa37cf0d | 2508 | s->avctx->level = h->sps.level_idc; |
6752a3cc | 2509 | s->avctx->refs = h->sps.ref_frame_count; |
b08e38e8 | 2510 | |
e5d40372 DB |
2511 | s->mb_width = h->sps.mb_width; |
2512 | s->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); | |
115329f1 | 2513 | |
e5d40372 | 2514 | h->b_stride = s->mb_width * 4; |
0da71265 | 2515 | |
76741b0e BC |
2516 | s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p |
2517 | ||
e5d40372 DB |
2518 | s->width = 16 * s->mb_width - (2 >> CHROMA444) * FFMIN(h->sps.crop_right, (8 << CHROMA444) - 1); |
2519 | if (h->sps.frame_mbs_only_flag) | |
2520 | s->height = 16 * s->mb_height - (1 << s->chroma_y_shift) * FFMIN(h->sps.crop_bottom, (16 >> s->chroma_y_shift) - 1); | |
0da71265 | 2521 | else |
e5d40372 | 2522 | s->height = 16 * s->mb_height - (2 << s->chroma_y_shift) * FFMIN(h->sps.crop_bottom, (16 >> s->chroma_y_shift) - 1); |
115329f1 | 2523 | |
30f51509 MR |
2524 | if (FFALIGN(s->avctx->width, 16) == s->width && |
2525 | FFALIGN(s->avctx->height, 16) == s->height) { | |
2526 | s->width = s->avctx->width; | |
2527 | s->height = s->avctx->height; | |
2528 | } | |
2529 | ||
e5d40372 DB |
2530 | if (s->context_initialized && |
2531 | (s->width != s->avctx->width || s->height != s->avctx->height || | |
2532 | av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) { | |
732f9fcf | 2533 | if (h != h0 || (HAVE_THREADS && h->s.avctx->active_thread_type & FF_THREAD_FRAME)) { |
e5d40372 | 2534 | av_log_missing_feature(s->avctx, |
f75f4194 | 2535 | "Width/height changing with threads", 0); |
732f9fcf | 2536 | return AVERROR_PATCHWELCOME; // width / height changed during parallelized decoding |
6a9c8594 | 2537 | } |
91078926 | 2538 | free_tables(h, 0); |
ff7f75e1 | 2539 | flush_dpb(s->avctx); |
efd29844 | 2540 | ff_MPV_common_end(s); |
0da71265 MN |
2541 | } |
2542 | if (!s->context_initialized) { | |
33aec3f4 | 2543 | if (h != h0) { |
e5d40372 DB |
2544 | av_log(h->s.avctx, AV_LOG_ERROR, |
2545 | "Cannot (re-)initialize context during parallel decoding.\n"); | |
33aec3f4 MN |
2546 | return -1; |
2547 | } | |
f3bdc3da RD |
2548 | |
2549 | avcodec_set_dimensions(s->avctx, s->width, s->height); | |
e5d40372 | 2550 | s->avctx->sample_aspect_ratio = h->sps.sar; |
cfa5a81e | 2551 | av_assert0(s->avctx->sample_aspect_ratio.den); |
f3bdc3da | 2552 | |
e5d40372 DB |
2553 | if (h->sps.video_signal_type_present_flag) { |
2554 | s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG | |
2555 | : AVCOL_RANGE_MPEG; | |
2556 | if (h->sps.colour_description_present_flag) { | |
c4dffe7e DC |
2557 | s->avctx->color_primaries = h->sps.color_primaries; |
2558 | s->avctx->color_trc = h->sps.color_trc; | |
2559 | s->avctx->colorspace = h->sps.colorspace; | |
2560 | } | |
2561 | } | |
2562 | ||
e5d40372 DB |
2563 | if (h->sps.timing_info_present_flag) { |
2564 | int64_t den = h->sps.time_scale; | |
2565 | if (h->x264_build < 44U) | |
3102d180 | 2566 | den *= 2; |
f3bdc3da | 2567 | av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, |
e5d40372 | 2568 | h->sps.num_units_in_tick, den, 1 << 30); |
f3bdc3da | 2569 | } |
d545cf80 OA |
2570 | |
2571 | switch (h->sps.bit_depth_luma) { | |
e5d40372 DB |
2572 | case 9: |
2573 | if (CHROMA444) { | |
2574 | if (s->avctx->colorspace == AVCOL_SPC_RGB) { | |
716d413c | 2575 | s->avctx->pix_fmt = AV_PIX_FMT_GBRP9; |
e5d40372 | 2576 | } else |
716d413c | 2577 | s->avctx->pix_fmt = AV_PIX_FMT_YUV444P9; |
e5d40372 | 2578 | } else if (CHROMA422) |
716d413c | 2579 | s->avctx->pix_fmt = AV_PIX_FMT_YUV422P9; |
e5d40372 | 2580 | else |
716d413c | 2581 | s->avctx->pix_fmt = AV_PIX_FMT_YUV420P9; |
e5d40372 DB |
2582 | break; |
2583 | case 10: | |
2584 | if (CHROMA444) { | |
2585 | if (s->avctx->colorspace == AVCOL_SPC_RGB) { | |
716d413c | 2586 | s->avctx->pix_fmt = AV_PIX_FMT_GBRP10; |
e5d40372 | 2587 | } else |
716d413c | 2588 | s->avctx->pix_fmt = AV_PIX_FMT_YUV444P10; |
e5d40372 | 2589 | } else if (CHROMA422) |
716d413c | 2590 | s->avctx->pix_fmt = AV_PIX_FMT_YUV422P10; |
e5d40372 | 2591 | else |
716d413c | 2592 | s->avctx->pix_fmt = AV_PIX_FMT_YUV420P10; |
e5d40372 DB |
2593 | break; |
2594 | case 8: | |
2595 | if (CHROMA444) { | |
2596 | if (s->avctx->colorspace == AVCOL_SPC_RGB) { | |
716d413c | 2597 | s->avctx->pix_fmt = AV_PIX_FMT_GBRP; |
e5d40372 | 2598 | } else |
716d413c AK |
2599 | s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ444P |
2600 | : AV_PIX_FMT_YUV444P; | |
e5d40372 | 2601 | } else if (CHROMA422) { |
716d413c AK |
2602 | s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ422P |
2603 | : AV_PIX_FMT_YUV422P; | |
e5d40372 DB |
2604 | } else { |
2605 | s->avctx->pix_fmt = s->avctx->get_format(s->avctx, | |
2606 | s->avctx->codec->pix_fmts ? | |
2607 | s->avctx->codec->pix_fmts : | |
2608 | s->avctx->color_range == AVCOL_RANGE_JPEG ? | |
2609 | hwaccel_pixfmt_list_h264_jpeg_420 : | |
2610 | ff_hwaccel_pixfmt_list_420); | |
2611 | } | |
2612 | break; | |
2613 | default: | |
2614 | av_log(s->avctx, AV_LOG_ERROR, | |
2615 | "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); | |
2616 | return AVERROR_INVALIDDATA; | |
d545cf80 OA |
2617 | } |
2618 | ||
e5d40372 DB |
2619 | s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, |
2620 | s->avctx->pix_fmt); | |
f3bdc3da | 2621 | |
efd29844 MS |
2622 | if (ff_MPV_common_init(s) < 0) { |
2623 | av_log(h->s.avctx, AV_LOG_ERROR, "ff_MPV_common_init() failed.\n"); | |
0da71265 | 2624 | return -1; |
33aec3f4 | 2625 | } |
12d96de3 | 2626 | s->first_field = 0; |
b19d493f | 2627 | h->prev_interlaced_frame = 1; |
115329f1 | 2628 | |
b41c1db3 | 2629 | init_scan_tables(h); |
bac3ab13 | 2630 | if (ff_h264_alloc_tables(h) < 0) { |
e5d40372 DB |
2631 | av_log(h->s.avctx, AV_LOG_ERROR, |
2632 | "Could not allocate memory for h264\n"); | |
bac3ab13 DB |
2633 | return AVERROR(ENOMEM); |
2634 | } | |
0da71265 | 2635 | |
e5d40372 | 2636 | if (!HAVE_THREADS || !(s->avctx->active_thread_type & FF_THREAD_SLICE)) { |
33aec3f4 MN |
2637 | if (context_init(h) < 0) { |
2638 | av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n"); | |
afebe2f7 | 2639 | return -1; |
33aec3f4 | 2640 | } |
6a9c8594 | 2641 | } else { |
e5d40372 | 2642 | for (i = 1; i < s->slice_context_count; i++) { |
6a9c8594 AS |
2643 | H264Context *c; |
2644 | c = h->thread_context[i] = av_malloc(sizeof(H264Context)); | |
2645 | memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext)); | |
2646 | memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext)); | |
e5d40372 DB |
2647 | c->h264dsp = h->h264dsp; |
2648 | c->sps = h->sps; | |
2649 | c->pps = h->pps; | |
6a9c8594 AS |
2650 | c->pixel_shift = h->pixel_shift; |
2651 | init_scan_tables(c); | |
2652 | clone_tables(c, h, i); | |
2653 | } | |
2654 | ||
e5d40372 | 2655 | for (i = 0; i < s->slice_context_count; i++) |
33aec3f4 | 2656 | if (context_init(h->thread_context[i]) < 0) { |
e5d40372 DB |
2657 | av_log(h->s.avctx, AV_LOG_ERROR, |
2658 | "context_init() failed.\n"); | |
6a9c8594 | 2659 | return -1; |
33aec3f4 | 2660 | } |
6a9c8594 | 2661 | } |
0da71265 MN |
2662 | } |
2663 | ||
e5d40372 | 2664 | if (h == h0 && h->dequant_coeff_pps != pps_id) { |
0ce4fe48 RB |
2665 | h->dequant_coeff_pps = pps_id; |
2666 | init_dequant_tables(h); | |
2667 | } | |
2668 | ||
e5d40372 | 2669 | h->frame_num = get_bits(&s->gb, h->sps.log2_max_frame_num); |
0da71265 | 2670 | |
e5d40372 DB |
2671 | h->mb_mbaff = 0; |
2672 | h->mb_aff_frame = 0; | |
12d96de3 | 2673 | last_pic_structure = s0->picture_structure; |
ba0c8981 DB |
2674 | last_pic_droppable = s0->droppable; |
2675 | s->droppable = h->nal_ref_idc == 0; | |
e5d40372 DB |
2676 | if (h->sps.frame_mbs_only_flag) { |
2677 | s->picture_structure = PICT_FRAME; | |
2678 | } else { | |
2679 | if (get_bits1(&s->gb)) { // field_pic_flag | |
2680 | s->picture_structure = PICT_TOP_FIELD + get_bits1(&s->gb); // bottom_field_flag | |
6ba71fc4 | 2681 | } else { |
e5d40372 DB |
2682 | s->picture_structure = PICT_FRAME; |
2683 | h->mb_aff_frame = h->sps.mb_aff; | |
6867a90b | 2684 | } |
0da71265 | 2685 | } |
e5d40372 | 2686 | h->mb_field_decoding_flag = s->picture_structure != PICT_FRAME; |
2ddcf84b | 2687 | |
1e26a48f RB |
2688 | if (h0->current_slice != 0) { |
2689 | if (last_pic_structure != s->picture_structure || | |
ba0c8981 | 2690 | last_pic_droppable != s->droppable) { |
1e26a48f RB |
2691 | av_log(h->s.avctx, AV_LOG_ERROR, |
2692 | "Changing field mode (%d -> %d) between slices is not allowed\n", | |
2693 | last_pic_structure, s->picture_structure); | |
2694 | s->picture_structure = last_pic_structure; | |
ba0c8981 | 2695 | s->droppable = last_pic_droppable; |
1e26a48f | 2696 | return AVERROR_INVALIDDATA; |
5945c7b3 | 2697 | } else if (!s0->current_picture_ptr) { |
0b300daa JG |
2698 | av_log(s->avctx, AV_LOG_ERROR, |
2699 | "unset current_picture_ptr on %d. slice\n", | |
2700 | h0->current_slice + 1); | |
2701 | return AVERROR_INVALIDDATA; | |
1e26a48f RB |
2702 | } |
2703 | } else { | |
e5d40372 DB |
2704 | /* Shorten frame num gaps so we don't have to allocate reference |
2705 | * frames just to throw them away */ | |
2706 | if (h->frame_num != h->prev_frame_num) { | |
2707 | int unwrap_prev_frame_num = h->prev_frame_num; | |
2708 | int max_frame_num = 1 << h->sps.log2_max_frame_num; | |
3803af22 | 2709 | |
e5d40372 DB |
2710 | if (unwrap_prev_frame_num > h->frame_num) |
2711 | unwrap_prev_frame_num -= max_frame_num; | |
3803af22 AS |
2712 | |
2713 | if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) { | |
2714 | unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1; | |
2715 | if (unwrap_prev_frame_num < 0) | |
2716 | unwrap_prev_frame_num += max_frame_num; | |
2717 | ||
2718 | h->prev_frame_num = unwrap_prev_frame_num; | |
2719 | } | |
2720 | } | |
6a9c8594 | 2721 | |
1e26a48f RB |
2722 | /* See if we have a decoded first field looking for a pair... |
2723 | * Here, we're using that to see if we should mark previously | |
2724 | * decode frames as "finished". | |
2725 | * We have to do that before the "dummy" in-between frame allocation, | |
2726 | * since that can modify s->current_picture_ptr. */ | |
2727 | if (s0->first_field) { | |
2728 | assert(s0->current_picture_ptr); | |
2729 | assert(s0->current_picture_ptr->f.data[0]); | |
2730 | assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF); | |
2731 | ||
2732 | /* Mark old field/frame as completed */ | |
ba0c8981 | 2733 | if (!last_pic_droppable && s0->current_picture_ptr->owner2 == s0) { |
1e26a48f RB |
2734 | ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX, |
2735 | last_pic_structure == PICT_BOTTOM_FIELD); | |
2736 | } | |
2737 | ||
2738 | /* figure out if we have a complementary field pair */ | |
2739 | if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) { | |
2740 | /* Previous field is unmatched. Don't display it, but let it | |
2741 | * remain for reference if marked as such. */ | |
ba0c8981 | 2742 | if (!last_pic_droppable && last_pic_structure != PICT_FRAME) { |
1e26a48f RB |
2743 | ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX, |
2744 | last_pic_structure == PICT_TOP_FIELD); | |
2745 | } | |
2746 | } else { | |
2747 | if (s0->current_picture_ptr->frame_num != h->frame_num) { | |
2748 | /* This and previous field were reference, but had | |
2749 | * different frame_nums. Consider this field first in | |
2750 | * pair. Throw away previous field except for reference | |
2751 | * purposes. */ | |
ba0c8981 | 2752 | if (!last_pic_droppable && last_pic_structure != PICT_FRAME) { |
1e26a48f RB |
2753 | ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX, |
2754 | last_pic_structure == PICT_TOP_FIELD); | |
2755 | } | |
2756 | } else { | |
2757 | /* Second field in complementary pair */ | |
2758 | if (!((last_pic_structure == PICT_TOP_FIELD && | |
2759 | s->picture_structure == PICT_BOTTOM_FIELD) || | |
2760 | (last_pic_structure == PICT_BOTTOM_FIELD && | |
2761 | s->picture_structure == PICT_TOP_FIELD))) { | |
2762 | av_log(s->avctx, AV_LOG_ERROR, | |
2763 | "Invalid field mode combination %d/%d\n", | |
2764 | last_pic_structure, s->picture_structure); | |
2765 | s->picture_structure = last_pic_structure; | |
ba0c8981 | 2766 | s->droppable = last_pic_droppable; |
1e26a48f | 2767 | return AVERROR_INVALIDDATA; |
ba0c8981 | 2768 | } else if (last_pic_droppable != s->droppable) { |
1e26a48f RB |
2769 | av_log(s->avctx, AV_LOG_ERROR, |
2770 | "Cannot combine reference and non-reference fields in the same frame\n"); | |
2771 | av_log_ask_for_sample(s->avctx, NULL); | |
2772 | s->picture_structure = last_pic_structure; | |
ba0c8981 | 2773 | s->droppable = last_pic_droppable; |
1e26a48f RB |
2774 | return AVERROR_INVALIDDATA; |
2775 | } | |
2776 | ||
2777 | /* Take ownership of this buffer. Note that if another thread owned | |
2778 | * the first field of this buffer, we're not operating on that pointer, | |
2779 | * so the original thread is still responsible for reporting progress | |
2780 | * on that first field (or if that was us, we just did that above). | |
2781 | * By taking ownership, we assign responsibility to ourselves to | |
2782 | * report progress on the second field. */ | |
2783 | s0->current_picture_ptr->owner2 = s0; | |
2784 | } | |
2785 | } | |
2786 | } | |
2787 | ||
e5d40372 DB |
2788 | while (h->frame_num != h->prev_frame_num && |
2789 | h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) { | |
4dece8c7 | 2790 | Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL; |
e5d40372 DB |
2791 | av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", |
2792 | h->frame_num, h->prev_frame_num); | |
903d58f6 | 2793 | if (ff_h264_frame_start(h) < 0) |
66e6038c | 2794 | return -1; |
26b86e47 | 2795 | h->prev_frame_num++; |
e5d40372 DB |
2796 | h->prev_frame_num %= 1 << h->sps.log2_max_frame_num; |
2797 | s->current_picture_ptr->frame_num = h->prev_frame_num; | |
47c0ac96 DB |
2798 | ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 0); |
2799 | ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 1); | |
3d542120 | 2800 | ff_generate_sliding_window_mmcos(h); |
12fe7594 | 2801 | if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 && |
9abc9873 | 2802 | (s->avctx->err_recognition & AV_EF_EXPLODE)) |
12fe7594 | 2803 | return AVERROR_INVALIDDATA; |
e2983d6e JGG |
2804 | /* Error concealment: if a ref is missing, copy the previous ref in its place. |
2805 | * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions | |
2806 | * about there being no actual duplicates. | |
2807 | * FIXME: this doesn't copy padding for out-of-frame motion vectors. Given we're | |
da9cea77 | 2808 | * concealing a lost frame, this probably isn't noticeable by comparison, but it should |
e2983d6e | 2809 | * be fixed. */ |
4dece8c7 JGG |
2810 | if (h->short_ref_count) { |
2811 | if (prev) { | |
657ccb5a | 2812 | av_image_copy(h->short_ref[0]->f.data, h->short_ref[0]->f.linesize, |
e5d40372 DB |
2813 | (const uint8_t **)prev->f.data, prev->f.linesize, |
2814 | s->avctx->pix_fmt, s->mb_width * 16, s->mb_height * 16); | |
2815 | h->short_ref[0]->poc = prev->poc + 2; | |
4dece8c7 JGG |
2816 | } |
2817 | h->short_ref[0]->frame_num = h->prev_frame_num; | |
2818 | } | |
26b86e47 MN |
2819 | } |
2820 | ||
1e26a48f RB |
2821 | /* See if we have a decoded first field looking for a pair... |
2822 | * We're using that to see whether to continue decoding in that | |
2823 | * frame, or to allocate a new one. */ | |
12d96de3 JD |
2824 | if (s0->first_field) { |
2825 | assert(s0->current_picture_ptr); | |
657ccb5a | 2826 | assert(s0->current_picture_ptr->f.data[0]); |
95a06eb4 | 2827 | assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF); |
12d96de3 JD |
2828 | |
2829 | /* figure out if we have a complementary field pair */ | |
2830 | if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) { | |
e5d40372 DB |
2831 | /* Previous field is unmatched. Don't display it, but let it |
2832 | * remain for reference if marked as such. */ | |
12d96de3 | 2833 | s0->current_picture_ptr = NULL; |
e5d40372 | 2834 | s0->first_field = FIELD_PICTURE; |
12d96de3 | 2835 | } else { |
1e26a48f RB |
2836 | if (s0->current_picture_ptr->frame_num != h->frame_num) { |
2837 | /* This and the previous field had different frame_nums. | |
2838 | * Consider this field first in pair. Throw away previous | |
2839 | * one except for reference purposes. */ | |
e5d40372 | 2840 | s0->first_field = 1; |
12d96de3 | 2841 | s0->current_picture_ptr = NULL; |
12d96de3 JD |
2842 | } else { |
2843 | /* Second field in complementary pair */ | |
2844 | s0->first_field = 0; | |
2845 | } | |
2846 | } | |
12d96de3 JD |
2847 | } else { |
2848 | /* Frame or first field in a potentially complementary pair */ | |
12d96de3 JD |
2849 | s0->first_field = FIELD_PICTURE; |
2850 | } | |
2851 | ||
e5d40372 | 2852 | if (!FIELD_PICTURE || s0->first_field) { |
6a9c8594 AS |
2853 | if (ff_h264_frame_start(h) < 0) { |
2854 | s0->first_field = 0; | |
2855 | return -1; | |
2856 | } | |
2857 | } else { | |
2858 | ff_release_unused_pictures(s, 0); | |
12d96de3 | 2859 | } |
2ddcf84b | 2860 | } |
f1d8763a JG |
2861 | if (h != h0 && (ret = clone_slice(h, h0)) < 0) |
2862 | return ret; | |
2ddcf84b | 2863 | |
e5d40372 | 2864 | s->current_picture_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup |
2ddcf84b | 2865 | |
88e7a4d1 | 2866 | assert(s->mb_num == s->mb_width * s->mb_height); |
e5d40372 DB |
2867 | if (first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num || |
2868 | first_mb_in_slice >= s->mb_num) { | |
88e7a4d1 | 2869 | av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n"); |
6b53b87e MN |
2870 | return -1; |
2871 | } | |
e5d40372 | 2872 | s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width; |
f3e53d9f JD |
2873 | s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE; |
2874 | if (s->picture_structure == PICT_BOTTOM_FIELD) | |
2875 | s->resync_mb_y = s->mb_y = s->mb_y + 1; | |
88e7a4d1 | 2876 | assert(s->mb_y < s->mb_height); |
115329f1 | 2877 | |
e5d40372 DB |
2878 | if (s->picture_structure == PICT_FRAME) { |
2879 | h->curr_pic_num = h->frame_num; | |
2880 | h->max_pic_num = 1 << h->sps.log2_max_frame_num; | |
2881 | } else { | |
2882 | h->curr_pic_num = 2 * h->frame_num + 1; | |
2883 | h->max_pic_num = 1 << (h->sps.log2_max_frame_num + 1); | |
0da71265 | 2884 | } |
115329f1 | 2885 | |
e5d40372 | 2886 | if (h->nal_unit_type == NAL_IDR_SLICE) |
1df1df0b | 2887 | get_ue_golomb(&s->gb); /* idr_pic_id */ |
115329f1 | 2888 | |
e5d40372 DB |
2889 | if (h->sps.poc_type == 0) { |
2890 | h->poc_lsb = get_bits(&s->gb, h->sps.log2_max_poc_lsb); | |
115329f1 | 2891 | |
e5d40372 DB |
2892 | if (h->pps.pic_order_present == 1 && s->picture_structure == PICT_FRAME) |
2893 | h->delta_poc_bottom = get_se_golomb(&s->gb); | |
0da71265 | 2894 | } |
115329f1 | 2895 | |
e5d40372 DB |
2896 | if (h->sps.poc_type == 1 && !h->sps.delta_pic_order_always_zero_flag) { |
2897 | h->delta_poc[0] = get_se_golomb(&s->gb); | |
115329f1 | 2898 | |
e5d40372 DB |
2899 | if (h->pps.pic_order_present == 1 && s->picture_structure == PICT_FRAME) |
2900 | h->delta_poc[1] = get_se_golomb(&s->gb); | |
0da71265 | 2901 | } |
115329f1 | 2902 | |
0da71265 | 2903 | init_poc(h); |
115329f1 | 2904 | |
e5d40372 DB |
2905 | if (h->pps.redundant_pic_cnt_present) |
2906 | h->redundant_pic_count = get_ue_golomb(&s->gb); | |
0da71265 | 2907 | |
e5d40372 DB |
2908 | // set defaults, might be overridden a few lines later |
2909 | h->ref_count[0] = h->pps.ref_count[0]; | |
2910 | h->ref_count[1] = h->pps.ref_count[1]; | |
0da71265 | 2911 | |
e5d40372 | 2912 | if (h->slice_type_nos != AV_PICTURE_TYPE_I) { |
e5d40372 DB |
2913 | if (h->slice_type_nos == AV_PICTURE_TYPE_B) |
2914 | h->direct_spatial_mv_pred = get_bits1(&s->gb); | |
2915 | num_ref_idx_active_override_flag = get_bits1(&s->gb); | |
115329f1 | 2916 | |
e5d40372 DB |
2917 | if (num_ref_idx_active_override_flag) { |
2918 | h->ref_count[0] = get_ue_golomb(&s->gb) + 1; | |
6e5cdf26 JG |
2919 | if (h->ref_count[0] < 1) |
2920 | return AVERROR_INVALIDDATA; | |
2921 | if (h->slice_type_nos == AV_PICTURE_TYPE_B) { | |
e5d40372 | 2922 | h->ref_count[1] = get_ue_golomb(&s->gb) + 1; |
6e5cdf26 JG |
2923 | if (h->ref_count[1] < 1) |
2924 | return AVERROR_INVALIDDATA; | |
2925 | } | |
e0febda2 | 2926 | } |
0da71265 | 2927 | |
e5d40372 DB |
2928 | if (h->slice_type_nos == AV_PICTURE_TYPE_B) |
2929 | h->list_count = 2; | |
187696fa | 2930 | else |
e5d40372 DB |
2931 | h->list_count = 1; |
2932 | } else | |
2933 | h->list_count = 0; | |
0da71265 | 2934 | |
60b6b8c0 JG |
2935 | max_refs = s->picture_structure == PICT_FRAME ? 16 : 32; |
2936 | ||
2937 | if (h->ref_count[0] > max_refs || h->ref_count[1] > max_refs) { | |
2938 | av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n"); | |
2939 | h->ref_count[0] = h->ref_count[1] = 1; | |
2940 | return AVERROR_INVALIDDATA; | |
2941 | } | |
2942 | ||
e5d40372 | 2943 | if (!default_ref_list_done) |
ea6f00c4 | 2944 | ff_h264_fill_default_ref_list(h); |
0da71265 | 2945 | |
e5d40372 DB |
2946 | if (h->slice_type_nos != AV_PICTURE_TYPE_I && |
2947 | ff_h264_decode_ref_pic_list_reordering(h) < 0) { | |
2948 | h->ref_count[1] = h->ref_count[0] = 0; | |
806bb93f | 2949 | return -1; |
4c7a232f | 2950 | } |
0da71265 | 2951 | |
e5d40372 DB |
2952 | if (h->slice_type_nos != AV_PICTURE_TYPE_I) { |
2953 | s->last_picture_ptr = &h->ref_list[0][0]; | |
d5e83122 | 2954 | s->last_picture_ptr->owner2 = s; |
8d2fc163 | 2955 | ff_copy_picture(&s->last_picture, s->last_picture_ptr); |
07dff5c7 | 2956 | } |
e5d40372 DB |
2957 | if (h->slice_type_nos == AV_PICTURE_TYPE_B) { |
2958 | s->next_picture_ptr = &h->ref_list[1][0]; | |
d5e83122 | 2959 | s->next_picture_ptr->owner2 = s; |
8d2fc163 | 2960 | ff_copy_picture(&s->next_picture, s->next_picture_ptr); |
07dff5c7 MN |
2961 | } |
2962 | ||
e5d40372 DB |
2963 | if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) || |
2964 | (h->pps.weighted_bipred_idc == 1 && | |
2965 | h->slice_type_nos == AV_PICTURE_TYPE_B)) | |
0da71265 | 2966 | pred_weight_table(h); |
e5d40372 DB |
2967 | else if (h->pps.weighted_bipred_idc == 2 && |
2968 | h->slice_type_nos == AV_PICTURE_TYPE_B) { | |
1052b76f | 2969 | implicit_weight_table(h, -1); |
e5d40372 | 2970 | } else { |
9f2d1b4f | 2971 | h->use_weight = 0; |
cb99c652 GB |
2972 | for (i = 0; i < 2; i++) { |
2973 | h->luma_weight_flag[i] = 0; | |
2974 | h->chroma_weight_flag[i] = 0; | |
2975 | } | |
2976 | } | |
115329f1 | 2977 | |
e5d40372 DB |
2978 | if (h->nal_ref_idc && ff_h264_decode_ref_pic_marking(h0, &s->gb) < 0 && |
2979 | (s->avctx->err_recognition & AV_EF_EXPLODE)) | |
12fe7594 | 2980 | return AVERROR_INVALIDDATA; |
0da71265 | 2981 | |
e5d40372 | 2982 | if (FRAME_MBAFF) { |
ea6f00c4 | 2983 | ff_h264_fill_mbaff_ref_list(h); |
5d18eaad | 2984 | |
e5d40372 | 2985 | if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) { |
1052b76f MN |
2986 | implicit_weight_table(h, 0); |
2987 | implicit_weight_table(h, 1); | |
2988 | } | |
2989 | } | |
2990 | ||
e5d40372 | 2991 | if (h->slice_type_nos == AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred) |
943f69a6 MN |
2992 | ff_h264_direct_dist_scale_factor(h); |
2993 | ff_h264_direct_ref_list_init(h); | |
8f56e219 | 2994 | |
e5d40372 | 2995 | if (h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac) { |
9963b332 | 2996 | tmp = get_ue_golomb_31(&s->gb); |
e5d40372 | 2997 | if (tmp > 2) { |
88e7a4d1 MN |
2998 | av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n"); |
2999 | return -1; | |
3000 | } | |
e5d40372 | 3001 | h->cabac_init_idc = tmp; |
88e7a4d1 | 3002 | } |
e5017ab8 LA |
3003 | |
3004 | h->last_qscale_diff = 0; | |
88e7a4d1 | 3005 | tmp = h->pps.init_qp + get_se_golomb(&s->gb); |
e5d40372 | 3006 | if (tmp > 51 + 6 * (h->sps.bit_depth_luma - 8)) { |
88e7a4d1 | 3007 | av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp); |
3ebc7e04 MN |
3008 | return -1; |
3009 | } | |
e5d40372 | 3010 | s->qscale = tmp; |
4691a77d AÖ |
3011 | h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); |
3012 | h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); | |
e5d40372 DB |
3013 | // FIXME qscale / qp ... stuff |
3014 | if (h->slice_type == AV_PICTURE_TYPE_SP) | |
1df1df0b | 3015 | get_bits1(&s->gb); /* sp_for_switch_flag */ |
e5d40372 DB |
3016 | if (h->slice_type == AV_PICTURE_TYPE_SP || |
3017 | h->slice_type == AV_PICTURE_TYPE_SI) | |
1df1df0b | 3018 | get_se_golomb(&s->gb); /* slice_qs_delta */ |
0da71265 | 3019 | |
e5d40372 | 3020 | h->deblocking_filter = 1; |
0c32e19d | 3021 | h->slice_alpha_c0_offset = 52; |
e5d40372 DB |
3022 | h->slice_beta_offset = 52; |
3023 | if (h->pps.deblocking_filter_parameters_present) { | |
3024 | tmp = get_ue_golomb_31(&s->gb); | |
3025 | if (tmp > 2) { | |
3026 | av_log(s->avctx, AV_LOG_ERROR, | |
3027 | "deblocking_filter_idc %u out of range\n", tmp); | |
88e7a4d1 MN |
3028 | return -1; |
3029 | } | |
e5d40372 DB |
3030 | h->deblocking_filter = tmp; |
3031 | if (h->deblocking_filter < 2) | |
3032 | h->deblocking_filter ^= 1; // 1<->0 | |
53c05b1e | 3033 | |
e5d40372 | 3034 | if (h->deblocking_filter) { |
0c32e19d MN |
3035 | h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1; |
3036 | h->slice_beta_offset += get_se_golomb(&s->gb) << 1; | |
e5d40372 DB |
3037 | if (h->slice_alpha_c0_offset > 104U || |
3038 | h->slice_beta_offset > 104U) { | |
3039 | av_log(s->avctx, AV_LOG_ERROR, | |
3040 | "deblocking filter parameters %d %d out of range\n", | |
3041 | h->slice_alpha_c0_offset, h->slice_beta_offset); | |
0c32e19d MN |
3042 | return -1; |
3043 | } | |
0da71265 | 3044 | } |
980a82b7 | 3045 | } |
afebe2f7 | 3046 | |
e5d40372 DB |
3047 | if (s->avctx->skip_loop_filter >= AVDISCARD_ALL || |
3048 | (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && | |
3049 | h->slice_type_nos != AV_PICTURE_TYPE_I) || | |
3050 | (s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && | |
3051 | h->slice_type_nos == AV_PICTURE_TYPE_B) || | |
3052 | (s->avctx->skip_loop_filter >= AVDISCARD_NONREF && | |
3053 | h->nal_ref_idc == 0)) | |
3054 | h->deblocking_filter = 0; | |
61858a76 | 3055 | |
e5d40372 DB |
3056 | if (h->deblocking_filter == 1 && h0->max_contexts > 1) { |
3057 | if (s->avctx->flags2 & CODEC_FLAG2_FAST) { | |
ec970c21 | 3058 | /* Cheat slightly for speed: |
e5d40372 | 3059 | * Do not bother to deblock across slices. */ |
ec970c21 AÖ |
3060 | h->deblocking_filter = 2; |
3061 | } else { | |
7ae94d52 | 3062 | h0->max_contexts = 1; |
e5d40372 DB |
3063 | if (!h0->single_decode_warning) { |
3064 | av_log(s->avctx, AV_LOG_INFO, | |
3065 | "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); | |
7ae94d52 AÖ |
3066 | h0->single_decode_warning = 1; |
3067 | } | |
33aec3f4 | 3068 | if (h != h0) { |
e5d40372 DB |
3069 | av_log(h->s.avctx, AV_LOG_ERROR, |
3070 | "Deblocking switched inside frame.\n"); | |
33aec3f4 MN |
3071 | return 1; |
3072 | } | |
ec970c21 | 3073 | } |
afebe2f7 | 3074 | } |
e5d40372 DB |
3075 | h->qp_thresh = 15 + 52 - |
3076 | FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - | |
3077 | FFMAX3(0, | |
3078 | h->pps.chroma_qp_index_offset[0], | |
3079 | h->pps.chroma_qp_index_offset[1]) + | |
3080 | 6 * (h->sps.bit_depth_luma - 8); | |
0da71265 | 3081 | |
afebe2f7 AÖ |
3082 | h0->last_slice_type = slice_type; |
3083 | h->slice_num = ++h0->current_slice; | |
e5d40372 DB |
3084 | if (h->slice_num >= MAX_SLICES) { |
3085 | av_log(s->avctx, AV_LOG_ERROR, | |
3086 | "Too many slices, increase MAX_SLICES and recompile\n"); | |
b735aeea | 3087 | } |
5175b937 | 3088 | |
e5d40372 | 3089 | for (j = 0; j < 2; j++) { |
6d7e6b26 | 3090 | int id_list[16]; |
e5d40372 DB |
3091 | int *ref2frm = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][j]; |
3092 | for (i = 0; i < 16; i++) { | |
3093 | id_list[i] = 60; | |
657ccb5a | 3094 | if (h->ref_list[j][i].f.data[0]) { |
6d7e6b26 | 3095 | int k; |
657ccb5a | 3096 | uint8_t *base = h->ref_list[j][i].f.base[0]; |
e5d40372 | 3097 | for (k = 0; k < h->short_ref_count; k++) |
657ccb5a | 3098 | if (h->short_ref[k]->f.base[0] == base) { |
e5d40372 | 3099 | id_list[i] = k; |
6d7e6b26 MN |
3100 | break; |
3101 | } | |
e5d40372 | 3102 | for (k = 0; k < h->long_ref_count; k++) |
657ccb5a | 3103 | if (h->long_ref[k] && h->long_ref[k]->f.base[0] == base) { |
e5d40372 | 3104 | id_list[i] = h->short_ref_count + k; |
6d7e6b26 MN |
3105 | break; |
3106 | } | |
3107 | } | |
3108 | } | |
3109 | ||
e5d40372 DB |
3110 | ref2frm[0] = |
3111 | ref2frm[1] = -1; | |
3112 | for (i = 0; i < 16; i++) | |
3113 | ref2frm[i + 2] = 4 * id_list[i] + | |
3114 | (h->ref_list[j][i].f.reference & 3); | |
3115 | ref2frm[18 + 0] = | |
3116 | ref2frm[18 + 1] = -1; | |
3117 | for (i = 16; i < 48; i++) | |
3118 | ref2frm[i + 4] = 4 * id_list[(i - 16) >> 1] + | |
3119 | (h->ref_list[j][i].f.reference & 3); | |
3120 | } | |
3121 | ||
3122 | // FIXME: fix draw_edges + PAFF + frame threads | |
3123 | h->emu_edge_width = (s->flags & CODEC_FLAG_EMU_EDGE || | |
3124 | (!h->sps.frame_mbs_only_flag && | |
3125 | s->avctx->active_thread_type)) | |
3126 | ? 0 : 16; | |
3127 | h->emu_edge_height = (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width; | |
3128 | ||
3129 | if (s->avctx->debug & FF_DEBUG_PICT_INFO) { | |
3130 | av_log(h->s.avctx, AV_LOG_DEBUG, | |
3131 | "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n", | |
6867a90b | 3132 | h->slice_num, |
e5d40372 | 3133 | (s->picture_structure == PICT_FRAME ? "F" : s->picture_structure == PICT_TOP_FIELD ? "T" : "B"), |
115329f1 | 3134 | first_mb_in_slice, |
e5d40372 DB |
3135 | av_get_picture_type_char(h->slice_type), |
3136 | h->slice_type_fixed ? " fix" : "", | |
3137 | h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "", | |
0da71265 | 3138 | pps_id, h->frame_num, |
e5d40372 DB |
3139 | s->current_picture_ptr->field_poc[0], |
3140 | s->current_picture_ptr->field_poc[1], | |
0da71265 MN |
3141 | h->ref_count[0], h->ref_count[1], |
3142 | s->qscale, | |
e5d40372 DB |
3143 | h->deblocking_filter, |
3144 | h->slice_alpha_c0_offset / 2 - 26, h->slice_beta_offset / 2 - 26, | |
9f2d1b4f | 3145 | h->use_weight, |
e5d40372 DB |
3146 | h->use_weight == 1 && h->use_weight_chroma ? "c" : "", |
3147 | h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_ |