Commit | Line | Data |
---|---|---|
0da71265 | 1 | /* |
ff3d4310 | 2 | * H.26L/H.264/AVC/JVT/14496-10/... decoder |
0da71265 MN |
3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
4 | * | |
2912e87a | 5 | * This file is part of Libav. |
b78e7197 | 6 | * |
2912e87a | 7 | * Libav is free software; you can redistribute it and/or |
0da71265 MN |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either | |
b78e7197 | 10 | * version 2.1 of the License, or (at your option) any later version. |
0da71265 | 11 | * |
2912e87a | 12 | * Libav is distributed in the hope that it will be useful, |
0da71265 MN |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
2912e87a | 18 | * License along with Libav; if not, write to the Free Software |
5509bffa | 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
0da71265 | 20 | */ |
115329f1 | 21 | |
0da71265 | 22 | /** |
ba87f080 | 23 | * @file |
0da71265 MN |
24 | * H.264 / AVC / MPEG4 part10 codec. |
25 | * @author Michael Niedermayer <michaelni@gmx.at> | |
26 | */ | |
27 | ||
16c22122 | 28 | #include "libavutil/avassert.h" |
737eb597 | 29 | #include "libavutil/imgutils.h" |
5b10ef72 | 30 | #include "libavutil/stereo3d.h" |
40e5d31b | 31 | #include "internal.h" |
55b9ef18 DB |
32 | #include "cabac.h" |
33 | #include "cabac_functions.h" | |
0da71265 | 34 | #include "dsputil.h" |
5f401b7b | 35 | #include "error_resilience.h" |
0da71265 MN |
36 | #include "avcodec.h" |
37 | #include "mpegvideo.h" | |
26b4fe82 | 38 | #include "h264.h" |
0da71265 | 39 | #include "h264data.h" |
79dad2a9 | 40 | #include "h264chroma.h" |
188d3c51 | 41 | #include "h264_mvpred.h" |
0da71265 | 42 | #include "golomb.h" |
199436b9 | 43 | #include "mathops.h" |
626464fb | 44 | #include "rectangle.h" |
75d5156a | 45 | #include "svq3.h" |
6a9c8594 | 46 | #include "thread.h" |
0da71265 | 47 | |
0da71265 MN |
48 | #include <assert.h> |
49 | ||
0becb078 DB |
50 | const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 }; |
51 | ||
e5d40372 DB |
52 | static const uint8_t rem6[QP_MAX_NUM + 1] = { |
53 | 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, | |
54 | 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, | |
55 | 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, | |
acd8d10f PI |
56 | }; |
57 | ||
e5d40372 DB |
58 | static const uint8_t div6[QP_MAX_NUM + 1] = { |
59 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, | |
60 | 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, | |
61 | 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, | |
acd8d10f PI |
62 | }; |
63 | ||
c4e43560 DB |
64 | static const uint8_t field_scan[16] = { |
65 | 0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4, | |
66 | 0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4, | |
67 | 2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4, | |
68 | 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, | |
69 | }; | |
70 | ||
71 | static const uint8_t field_scan8x8[64] = { | |
72 | 0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8, | |
73 | 1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8, | |
74 | 2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8, | |
75 | 0 + 7 * 8, 1 + 4 * 8, 2 + 1 * 8, 3 + 0 * 8, | |
76 | 2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8, | |
77 | 2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8, | |
78 | 2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8, | |
79 | 3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8, | |
80 | 3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8, | |
81 | 4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8, | |
82 | 4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8, | |
83 | 5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8, | |
84 | 5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8, | |
85 | 7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8, | |
86 | 6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8, | |
87 | 7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8, | |
88 | }; | |
89 | ||
90 | static const uint8_t field_scan8x8_cavlc[64] = { | |
91 | 0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8, | |
92 | 2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8, | |
93 | 3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8, | |
94 | 5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8, | |
95 | 0 + 1 * 8, 0 + 3 * 8, 1 + 3 * 8, 1 + 4 * 8, | |
96 | 1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8, | |
97 | 3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8, | |
98 | 5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8, | |
99 | 0 + 2 * 8, 0 + 4 * 8, 0 + 5 * 8, 2 + 1 * 8, | |
100 | 1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8, | |
101 | 3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8, | |
102 | 5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8, | |
103 | 1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8, | |
104 | 1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8, | |
105 | 3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8, | |
106 | 6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8, | |
107 | }; | |
108 | ||
109 | // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] | |
110 | static const uint8_t zigzag_scan8x8_cavlc[64] = { | |
111 | 0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8, | |
112 | 4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8, | |
113 | 3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8, | |
114 | 2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8, | |
115 | 1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8, | |
116 | 3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8, | |
117 | 2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8, | |
118 | 3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8, | |
119 | 0 + 1 * 8, 3 + 0 * 8, 0 + 4 * 8, 4 + 0 * 8, | |
120 | 2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8, | |
121 | 1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8, | |
122 | 4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8, | |
123 | 0 + 2 * 8, 2 + 1 * 8, 1 + 3 * 8, 5 + 0 * 8, | |
124 | 1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8, | |
125 | 0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8, | |
126 | 5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8, | |
127 | }; | |
128 | ||
129 | static const uint8_t dequant4_coeff_init[6][3] = { | |
130 | { 10, 13, 16 }, | |
131 | { 11, 14, 18 }, | |
132 | { 13, 16, 20 }, | |
133 | { 14, 18, 23 }, | |
134 | { 16, 20, 25 }, | |
135 | { 18, 23, 29 }, | |
136 | }; | |
137 | ||
138 | static const uint8_t dequant8_coeff_init_scan[16] = { | |
139 | 0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1 | |
140 | }; | |
141 | ||
142 | static const uint8_t dequant8_coeff_init[6][6] = { | |
143 | { 20, 18, 32, 19, 25, 24 }, | |
144 | { 22, 19, 35, 21, 28, 26 }, | |
145 | { 26, 23, 42, 24, 33, 31 }, | |
146 | { 28, 25, 45, 26, 35, 33 }, | |
147 | { 32, 28, 51, 30, 40, 38 }, | |
148 | { 36, 32, 58, 34, 46, 43 }, | |
149 | }; | |
150 | ||
8d061989 RB |
151 | static const enum AVPixelFormat h264_hwaccel_pixfmt_list_420[] = { |
152 | #if CONFIG_H264_DXVA2_HWACCEL | |
153 | AV_PIX_FMT_DXVA2_VLD, | |
154 | #endif | |
155 | #if CONFIG_H264_VAAPI_HWACCEL | |
156 | AV_PIX_FMT_VAAPI_VLD, | |
157 | #endif | |
158 | #if CONFIG_H264_VDA_HWACCEL | |
159 | AV_PIX_FMT_VDA_VLD, | |
160 | #endif | |
161 | #if CONFIG_H264_VDPAU_HWACCEL | |
162 | AV_PIX_FMT_VDPAU, | |
163 | #endif | |
164 | AV_PIX_FMT_YUV420P, | |
165 | AV_PIX_FMT_NONE | |
166 | }; | |
167 | ||
d65522e8 | 168 | static const enum AVPixelFormat h264_hwaccel_pixfmt_list_jpeg_420[] = { |
78bc4d69 | 169 | #if CONFIG_H264_DXVA2_HWACCEL |
716d413c | 170 | AV_PIX_FMT_DXVA2_VLD, |
78bc4d69 RDC |
171 | #endif |
172 | #if CONFIG_H264_VAAPI_HWACCEL | |
716d413c | 173 | AV_PIX_FMT_VAAPI_VLD, |
78bc4d69 RDC |
174 | #endif |
175 | #if CONFIG_H264_VDA_HWACCEL | |
716d413c | 176 | AV_PIX_FMT_VDA_VLD, |
78bc4d69 RDC |
177 | #endif |
178 | #if CONFIG_H264_VDPAU_HWACCEL | |
ec0e9200 | 179 | AV_PIX_FMT_VDPAU, |
78bc4d69 | 180 | #endif |
716d413c AK |
181 | AV_PIX_FMT_YUVJ420P, |
182 | AV_PIX_FMT_NONE | |
0435fb16 BC |
183 | }; |
184 | ||
54974c62 AK |
185 | static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type, |
186 | int (*mv)[2][4][2], | |
187 | int mb_x, int mb_y, int mb_intra, int mb_skipped) | |
188 | { | |
16c22122 | 189 | H264Context *h = opaque; |
54974c62 | 190 | |
2c541554 AK |
191 | h->mb_x = mb_x; |
192 | h->mb_y = mb_y; | |
193 | h->mb_xy = mb_x + mb_y * h->mb_stride; | |
54974c62 AK |
194 | memset(h->non_zero_count_cache, 0, sizeof(h->non_zero_count_cache)); |
195 | assert(ref >= 0); | |
196 | /* FIXME: It is possible albeit uncommon that slice references | |
197 | * differ between slices. We take the easy approach and ignore | |
198 | * it for now. If this turns out to have any relevance in | |
199 | * practice then correct remapping should be added. */ | |
200 | if (ref >= h->ref_count[0]) | |
201 | ref = 0; | |
759001c5 | 202 | fill_rectangle(&h->cur_pic.ref_index[0][4 * h->mb_xy], |
54974c62 AK |
203 | 2, 2, 2, ref, 1); |
204 | fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1); | |
205 | fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, | |
2c541554 | 206 | pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4); |
7bece9b2 | 207 | assert(!FRAME_MBAFF(h)); |
54974c62 AK |
208 | ff_h264_hl_decode_mb(h); |
209 | } | |
210 | ||
2c541554 AK |
211 | void ff_h264_draw_horiz_band(H264Context *h, int y, int height) |
212 | { | |
64e43869 RB |
213 | AVCodecContext *avctx = h->avctx; |
214 | Picture *cur = &h->cur_pic; | |
215 | Picture *last = h->ref_list[0][0].f.data[0] ? &h->ref_list[0][0] : NULL; | |
216 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt); | |
217 | int vshift = desc->log2_chroma_h; | |
218 | const int field_pic = h->picture_structure != PICT_FRAME; | |
219 | if (field_pic) { | |
220 | height <<= 1; | |
16c22122 | 221 | y <<= 1; |
64e43869 RB |
222 | } |
223 | ||
224 | height = FFMIN(height, avctx->height - y); | |
225 | ||
226 | if (field_pic && h->first_field && !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD)) | |
227 | return; | |
228 | ||
229 | if (avctx->draw_horiz_band) { | |
230 | AVFrame *src; | |
231 | int offset[AV_NUM_DATA_POINTERS]; | |
232 | int i; | |
233 | ||
234 | if (cur->f.pict_type == AV_PICTURE_TYPE_B || h->low_delay || | |
16c22122 | 235 | (avctx->slice_flags & SLICE_FLAG_CODED_ORDER)) |
64e43869 RB |
236 | src = &cur->f; |
237 | else if (last) | |
238 | src = &last->f; | |
239 | else | |
240 | return; | |
241 | ||
242 | offset[0] = y * src->linesize[0]; | |
243 | offset[1] = | |
244 | offset[2] = (y >> vshift) * src->linesize[1]; | |
245 | for (i = 3; i < AV_NUM_DATA_POINTERS; i++) | |
246 | offset[i] = 0; | |
247 | ||
248 | emms_c(); | |
249 | ||
250 | avctx->draw_horiz_band(avctx, src, offset, | |
251 | y, h->picture_structure, height); | |
252 | } | |
2c541554 AK |
253 | } |
254 | ||
759001c5 | 255 | static void unref_picture(H264Context *h, Picture *pic) |
2c541554 | 256 | { |
759001c5 | 257 | int off = offsetof(Picture, tf) + sizeof(pic->tf); |
2c541554 AK |
258 | int i; |
259 | ||
a553c6a3 | 260 | if (!pic->f.buf[0]) |
759001c5 AK |
261 | return; |
262 | ||
263 | ff_thread_release_buffer(h->avctx, &pic->tf); | |
264 | av_buffer_unref(&pic->hwaccel_priv_buf); | |
2c541554 | 265 | |
759001c5 AK |
266 | av_buffer_unref(&pic->qscale_table_buf); |
267 | av_buffer_unref(&pic->mb_type_buf); | |
2c541554 | 268 | for (i = 0; i < 2; i++) { |
759001c5 AK |
269 | av_buffer_unref(&pic->motion_val_buf[i]); |
270 | av_buffer_unref(&pic->ref_index_buf[i]); | |
2c541554 | 271 | } |
759001c5 AK |
272 | |
273 | memset((uint8_t*)pic + off, 0, sizeof(*pic) - off); | |
2c541554 AK |
274 | } |
275 | ||
276 | static void release_unused_pictures(H264Context *h, int remove_current) | |
277 | { | |
278 | int i; | |
279 | ||
280 | /* release non reference frames */ | |
759001c5 | 281 | for (i = 0; i < MAX_PICTURE_COUNT; i++) { |
a553c6a3 | 282 | if (h->DPB[i].f.buf[0] && !h->DPB[i].reference && |
2c541554 | 283 | (remove_current || &h->DPB[i] != h->cur_pic_ptr)) { |
759001c5 | 284 | unref_picture(h, &h->DPB[i]); |
2c541554 AK |
285 | } |
286 | } | |
287 | } | |
288 | ||
759001c5 AK |
289 | static int ref_picture(H264Context *h, Picture *dst, Picture *src) |
290 | { | |
291 | int ret, i; | |
292 | ||
293 | av_assert0(!dst->f.buf[0]); | |
294 | av_assert0(src->f.buf[0]); | |
295 | ||
296 | src->tf.f = &src->f; | |
297 | dst->tf.f = &dst->f; | |
298 | ret = ff_thread_ref_frame(&dst->tf, &src->tf); | |
299 | if (ret < 0) | |
300 | goto fail; | |
301 | ||
759001c5 AK |
302 | dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf); |
303 | dst->mb_type_buf = av_buffer_ref(src->mb_type_buf); | |
304 | if (!dst->qscale_table_buf || !dst->mb_type_buf) | |
305 | goto fail; | |
306 | dst->qscale_table = src->qscale_table; | |
307 | dst->mb_type = src->mb_type; | |
308 | ||
16c22122 | 309 | for (i = 0; i < 2; i++) { |
759001c5 AK |
310 | dst->motion_val_buf[i] = av_buffer_ref(src->motion_val_buf[i]); |
311 | dst->ref_index_buf[i] = av_buffer_ref(src->ref_index_buf[i]); | |
312 | if (!dst->motion_val_buf[i] || !dst->ref_index_buf[i]) | |
313 | goto fail; | |
314 | dst->motion_val[i] = src->motion_val[i]; | |
315 | dst->ref_index[i] = src->ref_index[i]; | |
316 | } | |
317 | ||
318 | if (src->hwaccel_picture_private) { | |
319 | dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf); | |
320 | if (!dst->hwaccel_priv_buf) | |
321 | goto fail; | |
322 | dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data; | |
323 | } | |
324 | ||
325 | for (i = 0; i < 2; i++) | |
326 | dst->field_poc[i] = src->field_poc[i]; | |
327 | ||
328 | memcpy(dst->ref_poc, src->ref_poc, sizeof(src->ref_poc)); | |
329 | memcpy(dst->ref_count, src->ref_count, sizeof(src->ref_count)); | |
330 | ||
16c22122 DB |
331 | dst->poc = src->poc; |
332 | dst->frame_num = src->frame_num; | |
333 | dst->mmco_reset = src->mmco_reset; | |
334 | dst->pic_id = src->pic_id; | |
335 | dst->long_ref = src->long_ref; | |
336 | dst->mbaff = src->mbaff; | |
337 | dst->field_picture = src->field_picture; | |
338 | dst->needs_realloc = src->needs_realloc; | |
339 | dst->reference = src->reference; | |
28096e0a | 340 | dst->recovered = src->recovered; |
759001c5 AK |
341 | |
342 | return 0; | |
343 | fail: | |
344 | unref_picture(h, dst); | |
345 | return ret; | |
346 | } | |
347 | ||
2c541554 AK |
348 | static int alloc_scratch_buffers(H264Context *h, int linesize) |
349 | { | |
350 | int alloc_size = FFALIGN(FFABS(linesize) + 32, 32); | |
351 | ||
352 | if (h->bipred_scratchpad) | |
353 | return 0; | |
354 | ||
355 | h->bipred_scratchpad = av_malloc(16 * 6 * alloc_size); | |
356 | // edge emu needs blocksize + filter length - 1 | |
357 | // (= 21x21 for h264) | |
358 | h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 21); | |
359 | h->me.scratchpad = av_mallocz(alloc_size * 2 * 16 * 2); | |
360 | ||
361 | if (!h->bipred_scratchpad || !h->edge_emu_buffer || !h->me.scratchpad) { | |
362 | av_freep(&h->bipred_scratchpad); | |
363 | av_freep(&h->edge_emu_buffer); | |
364 | av_freep(&h->me.scratchpad); | |
365 | return AVERROR(ENOMEM); | |
366 | } | |
367 | ||
368 | h->me.temp = h->me.scratchpad; | |
369 | ||
370 | return 0; | |
371 | } | |
372 | ||
759001c5 | 373 | static int init_table_pools(H264Context *h) |
2c541554 AK |
374 | { |
375 | const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1; | |
376 | const int mb_array_size = h->mb_stride * h->mb_height; | |
377 | const int b4_stride = h->mb_width * 4 + 1; | |
378 | const int b4_array_size = b4_stride * h->mb_height * 4; | |
759001c5 AK |
379 | |
380 | h->qscale_table_pool = av_buffer_pool_init(big_mb_num + h->mb_stride, | |
381 | av_buffer_allocz); | |
382 | h->mb_type_pool = av_buffer_pool_init((big_mb_num + h->mb_stride) * | |
383 | sizeof(uint32_t), av_buffer_allocz); | |
384 | h->motion_val_pool = av_buffer_pool_init(2 * (b4_array_size + 4) * | |
385 | sizeof(int16_t), av_buffer_allocz); | |
386 | h->ref_index_pool = av_buffer_pool_init(4 * mb_array_size, av_buffer_allocz); | |
387 | ||
388 | if (!h->qscale_table_pool || !h->mb_type_pool || !h->motion_val_pool || | |
389 | !h->ref_index_pool) { | |
390 | av_buffer_pool_uninit(&h->qscale_table_pool); | |
391 | av_buffer_pool_uninit(&h->mb_type_pool); | |
392 | av_buffer_pool_uninit(&h->motion_val_pool); | |
393 | av_buffer_pool_uninit(&h->ref_index_pool); | |
394 | return AVERROR(ENOMEM); | |
395 | } | |
396 | ||
397 | return 0; | |
398 | } | |
399 | ||
400 | static int alloc_picture(H264Context *h, Picture *pic) | |
401 | { | |
2c541554 AK |
402 | int i, ret = 0; |
403 | ||
404 | av_assert0(!pic->f.data[0]); | |
405 | ||
9c9ede44 HL |
406 | pic->tf.f = &pic->f; |
407 | ret = ff_thread_get_buffer(h->avctx, &pic->tf, pic->reference ? | |
408 | AV_GET_BUFFER_FLAG_REF : 0); | |
409 | if (ret < 0) | |
410 | goto fail; | |
411 | ||
412 | h->linesize = pic->f.linesize[0]; | |
413 | h->uvlinesize = pic->f.linesize[1]; | |
414 | ||
2c541554 AK |
415 | if (h->avctx->hwaccel) { |
416 | const AVHWAccel *hwaccel = h->avctx->hwaccel; | |
759001c5 | 417 | av_assert0(!pic->hwaccel_picture_private); |
2c541554 | 418 | if (hwaccel->priv_data_size) { |
759001c5 AK |
419 | pic->hwaccel_priv_buf = av_buffer_allocz(hwaccel->priv_data_size); |
420 | if (!pic->hwaccel_priv_buf) | |
2c541554 | 421 | return AVERROR(ENOMEM); |
759001c5 | 422 | pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data; |
2c541554 AK |
423 | } |
424 | } | |
2c541554 | 425 | |
759001c5 AK |
426 | if (!h->qscale_table_pool) { |
427 | ret = init_table_pools(h); | |
428 | if (ret < 0) | |
429 | goto fail; | |
430 | } | |
2c541554 | 431 | |
759001c5 AK |
432 | pic->qscale_table_buf = av_buffer_pool_get(h->qscale_table_pool); |
433 | pic->mb_type_buf = av_buffer_pool_get(h->mb_type_pool); | |
434 | if (!pic->qscale_table_buf || !pic->mb_type_buf) | |
435 | goto fail; | |
2c541554 | 436 | |
759001c5 AK |
437 | pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1; |
438 | pic->qscale_table = pic->qscale_table_buf->data + 2 * h->mb_stride + 1; | |
439 | ||
440 | for (i = 0; i < 2; i++) { | |
441 | pic->motion_val_buf[i] = av_buffer_pool_get(h->motion_val_pool); | |
442 | pic->ref_index_buf[i] = av_buffer_pool_get(h->ref_index_pool); | |
443 | if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) | |
444 | goto fail; | |
2c541554 | 445 | |
759001c5 AK |
446 | pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4; |
447 | pic->ref_index[i] = pic->ref_index_buf[i]->data; | |
448 | } | |
2c541554 AK |
449 | |
450 | return 0; | |
451 | fail: | |
759001c5 | 452 | unref_picture(h, pic); |
2c541554 AK |
453 | return (ret < 0) ? ret : AVERROR(ENOMEM); |
454 | } | |
455 | ||
456 | static inline int pic_is_unused(H264Context *h, Picture *pic) | |
457 | { | |
a553c6a3 | 458 | if (!pic->f.buf[0]) |
2c541554 | 459 | return 1; |
759001c5 AK |
460 | if (pic->needs_realloc && !(pic->reference & DELAYED_PIC_REF)) |
461 | return 1; | |
2c541554 AK |
462 | return 0; |
463 | } | |
464 | ||
465 | static int find_unused_picture(H264Context *h) | |
466 | { | |
467 | int i; | |
468 | ||
759001c5 | 469 | for (i = 0; i < MAX_PICTURE_COUNT; i++) { |
2c541554 AK |
470 | if (pic_is_unused(h, &h->DPB[i])) |
471 | break; | |
472 | } | |
759001c5 | 473 | if (i == MAX_PICTURE_COUNT) |
2c541554 AK |
474 | return AVERROR_INVALIDDATA; |
475 | ||
476 | if (h->DPB[i].needs_realloc) { | |
477 | h->DPB[i].needs_realloc = 0; | |
759001c5 | 478 | unref_picture(h, &h->DPB[i]); |
2c541554 AK |
479 | } |
480 | ||
481 | return i; | |
482 | } | |
483 | ||
0da71265 | 484 | /** |
58c42af7 DB |
485 | * Check if the top & left blocks are available if needed and |
486 | * change the dc mode so it only uses the available blocks. | |
0da71265 | 487 | */ |
e5d40372 DB |
488 | int ff_h264_check_intra4x4_pred_mode(H264Context *h) |
489 | { | |
e5d40372 DB |
490 | static const int8_t top[12] = { |
491 | -1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0 | |
492 | }; | |
493 | static const int8_t left[12] = { | |
494 | 0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED | |
495 | }; | |
2bedc0e8 MN |
496 | int i; |
497 | ||
e5d40372 DB |
498 | if (!(h->top_samples_available & 0x8000)) { |
499 | for (i = 0; i < 4; i++) { | |
500 | int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]]; | |
501 | if (status < 0) { | |
2c541554 | 502 | av_log(h->avctx, AV_LOG_ERROR, |
e5d40372 | 503 | "top block unavailable for requested intra4x4 mode %d at %d %d\n", |
2c541554 | 504 | status, h->mb_x, h->mb_y); |
b3dc260e | 505 | return AVERROR_INVALIDDATA; |
e5d40372 DB |
506 | } else if (status) { |
507 | h->intra4x4_pred_mode_cache[scan8[0] + i] = status; | |
2bedc0e8 MN |
508 | } |
509 | } | |
510 | } | |
511 | ||
e5d40372 DB |
512 | if ((h->left_samples_available & 0x8888) != 0x8888) { |
513 | static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 }; | |
514 | for (i = 0; i < 4; i++) | |
515 | if (!(h->left_samples_available & mask[i])) { | |
516 | int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]]; | |
517 | if (status < 0) { | |
2c541554 | 518 | av_log(h->avctx, AV_LOG_ERROR, |
e5d40372 | 519 | "left block unavailable for requested intra4x4 mode %d at %d %d\n", |
2c541554 | 520 | status, h->mb_x, h->mb_y); |
b3dc260e | 521 | return AVERROR_INVALIDDATA; |
e5d40372 DB |
522 | } else if (status) { |
523 | h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status; | |
2bedc0e8 MN |
524 | } |
525 | } | |
2bedc0e8 MN |
526 | } |
527 | ||
528 | return 0; | |
e5d40372 | 529 | } // FIXME cleanup like ff_h264_check_intra_pred_mode |
2bedc0e8 MN |
530 | |
531 | /** | |
58c42af7 DB |
532 | * Check if the top & left blocks are available if needed and |
533 | * change the dc mode so it only uses the available blocks. | |
2bedc0e8 | 534 | */ |
e5d40372 DB |
535 | int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma) |
536 | { | |
e5d40372 DB |
537 | static const int8_t top[7] = { LEFT_DC_PRED8x8, 1, -1, -1 }; |
538 | static const int8_t left[7] = { TOP_DC_PRED8x8, -1, 2, -1, DC_128_PRED8x8 }; | |
539 | ||
540 | if (mode > 6U) { | |
2c541554 | 541 | av_log(h->avctx, AV_LOG_ERROR, |
e5d40372 | 542 | "out of range intra chroma pred mode at %d %d\n", |
2c541554 | 543 | h->mb_x, h->mb_y); |
b3dc260e | 544 | return AVERROR_INVALIDDATA; |
5175b937 | 545 | } |
115329f1 | 546 | |
e5d40372 DB |
547 | if (!(h->top_samples_available & 0x8000)) { |
548 | mode = top[mode]; | |
549 | if (mode < 0) { | |
2c541554 | 550 | av_log(h->avctx, AV_LOG_ERROR, |
e5d40372 | 551 | "top block unavailable for requested intra mode at %d %d\n", |
2c541554 | 552 | h->mb_x, h->mb_y); |
b3dc260e | 553 | return AVERROR_INVALIDDATA; |
0da71265 MN |
554 | } |
555 | } | |
115329f1 | 556 | |
e5d40372 DB |
557 | if ((h->left_samples_available & 0x8080) != 0x8080) { |
558 | mode = left[mode]; | |
559 | if (is_chroma && (h->left_samples_available & 0x8080)) { | |
560 | // mad cow disease mode, aka MBAFF + constrained_intra_pred | |
561 | mode = ALZHEIMER_DC_L0T_PRED8x8 + | |
562 | (!(h->left_samples_available & 0x8000)) + | |
563 | 2 * (mode == DC_128_PRED8x8); | |
d1d10e91 | 564 | } |
e5d40372 | 565 | if (mode < 0) { |
2c541554 | 566 | av_log(h->avctx, AV_LOG_ERROR, |
e5d40372 | 567 | "left block unavailable for requested intra mode at %d %d\n", |
2c541554 | 568 | h->mb_x, h->mb_y); |
b3dc260e | 569 | return AVERROR_INVALIDDATA; |
115329f1 | 570 | } |
0da71265 MN |
571 | } |
572 | ||
573 | return mode; | |
574 | } | |
575 | ||
e5d40372 DB |
576 | const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, |
577 | int *dst_length, int *consumed, int length) | |
578 | { | |
0da71265 MN |
579 | int i, si, di; |
580 | uint8_t *dst; | |
24456882 | 581 | int bufidx; |
0da71265 | 582 | |
e5d40372 DB |
583 | // src[0]&0x80; // forbidden bit |
584 | h->nal_ref_idc = src[0] >> 5; | |
585 | h->nal_unit_type = src[0] & 0x1F; | |
0da71265 | 586 | |
e5d40372 DB |
587 | src++; |
588 | length--; | |
e08715d3 | 589 | |
58db34aa | 590 | #define STARTCODE_TEST \ |
16c22122 DB |
591 | if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \ |
592 | if (src[i + 2] != 3) { \ | |
593 | /* startcode, so we must be past the end */ \ | |
594 | length = i; \ | |
595 | } \ | |
596 | break; \ | |
597 | } | |
598 | ||
b250f9c6 | 599 | #if HAVE_FAST_UNALIGNED |
58db34aa | 600 | #define FIND_FIRST_ZERO \ |
16c22122 DB |
601 | if (i > 0 && !src[i]) \ |
602 | i--; \ | |
603 | while (src[i]) \ | |
604 | i++ | |
605 | ||
e5d40372 | 606 | #if HAVE_FAST_64BIT |
e5d40372 DB |
607 | for (i = 0; i + 1 < length; i += 9) { |
608 | if (!((~AV_RN64A(src + i) & | |
609 | (AV_RN64A(src + i) - 0x0100010001000101ULL)) & | |
610 | 0x8000800080008080ULL)) | |
58db34aa RB |
611 | continue; |
612 | FIND_FIRST_ZERO; | |
613 | STARTCODE_TEST; | |
614 | i -= 7; | |
615 | } | |
e5d40372 | 616 | #else |
e5d40372 DB |
617 | for (i = 0; i + 1 < length; i += 5) { |
618 | if (!((~AV_RN32A(src + i) & | |
619 | (AV_RN32A(src + i) - 0x01000101U)) & | |
620 | 0x80008080U)) | |
e08715d3 | 621 | continue; |
58db34aa RB |
622 | FIND_FIRST_ZERO; |
623 | STARTCODE_TEST; | |
624 | i -= 3; | |
625 | } | |
626 | #endif | |
e08715d3 | 627 | #else |
e5d40372 DB |
628 | for (i = 0; i + 1 < length; i += 2) { |
629 | if (src[i]) | |
630 | continue; | |
631 | if (i > 0 && src[i - 1] == 0) | |
632 | i--; | |
58db34aa | 633 | STARTCODE_TEST; |
0da71265 | 634 | } |
58db34aa | 635 | #endif |
0da71265 | 636 | |
e5d40372 DB |
637 | if (i >= length - 1) { // no escaped 0 |
638 | *dst_length = length; | |
639 | *consumed = length + 1; // +1 for the header | |
115329f1 | 640 | return src; |
0da71265 MN |
641 | } |
642 | ||
e5d40372 DB |
643 | // use second escape buffer for inter data |
644 | bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; | |
645 | av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], | |
646 | length + FF_INPUT_BUFFER_PADDING_SIZE); | |
647 | dst = h->rbsp_buffer[bufidx]; | |
0da71265 | 648 | |
e5d40372 | 649 | if (dst == NULL) |
ac658be5 | 650 | return NULL; |
ac658be5 | 651 | |
593af7cd | 652 | memcpy(dst, src, i); |
e5d40372 DB |
653 | si = di = i; |
654 | while (si + 2 < length) { | |
655 | // remove escapes (very rare 1:2^22) | |
656 | if (src[si + 2] > 3) { | |
657 | dst[di++] = src[si++]; | |
658 | dst[di++] = src[si++]; | |
659 | } else if (src[si] == 0 && src[si + 1] == 0) { | |
660 | if (src[si + 2] == 3) { // escape | |
661 | dst[di++] = 0; | |
662 | dst[di++] = 0; | |
663 | si += 3; | |
c8470cc1 | 664 | continue; |
e5d40372 | 665 | } else // next start code |
593af7cd | 666 | goto nsc; |
0da71265 MN |
667 | } |
668 | ||
e5d40372 | 669 | dst[di++] = src[si++]; |
0da71265 | 670 | } |
e5d40372 DB |
671 | while (si < length) |
672 | dst[di++] = src[si++]; | |
0da71265 | 673 | |
16c22122 | 674 | nsc: |
e5d40372 | 675 | memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE); |
d4369630 | 676 | |
e5d40372 DB |
677 | *dst_length = di; |
678 | *consumed = si + 1; // +1 for the header | |
679 | /* FIXME store exact number of bits in the getbitcontext | |
680 | * (it is needed for decoding) */ | |
0da71265 MN |
681 | return dst; |
682 | } | |
683 | ||
85297319 DEP |
684 | /** |
685 | * Identify the exact end of the bitstream | |
686 | * @return the length of the trailing, or 0 if damaged | |
687 | */ | |
b691fd7a | 688 | static int decode_rbsp_trailing(H264Context *h, const uint8_t *src) |
e5d40372 DB |
689 | { |
690 | int v = *src; | |
0da71265 MN |
691 | int r; |
692 | ||
2c541554 | 693 | tprintf(h->avctx, "rbsp trailing %X\n", v); |
0da71265 | 694 | |
e5d40372 DB |
695 | for (r = 1; r < 9; r++) { |
696 | if (v & 1) | |
697 | return r; | |
698 | v >>= 1; | |
0da71265 MN |
699 | } |
700 | return 0; | |
701 | } | |
702 | ||
e5d40372 DB |
703 | static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, |
704 | int height, int y_offset, int list) | |
705 | { | |
16c22122 | 706 | int raw_my = h->mv_cache[list][scan8[n]][1]; |
fb845ffd RB |
707 | int filter_height_up = (raw_my & 3) ? 2 : 0; |
708 | int filter_height_down = (raw_my & 3) ? 3 : 0; | |
16c22122 DB |
709 | int full_my = (raw_my >> 2) + y_offset; |
710 | int top = full_my - filter_height_up; | |
711 | int bottom = full_my + filter_height_down + height; | |
6a9c8594 AS |
712 | |
713 | return FFMAX(abs(top), bottom); | |
714 | } | |
715 | ||
e5d40372 DB |
716 | static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, |
717 | int height, int y_offset, int list0, | |
718 | int list1, int *nrefs) | |
719 | { | |
6a9c8594 AS |
720 | int my; |
721 | ||
da6be8fc | 722 | y_offset += 16 * (h->mb_y >> MB_FIELD(h)); |
6a9c8594 | 723 | |
e5d40372 DB |
724 | if (list0) { |
725 | int ref_n = h->ref_cache[0][scan8[n]]; | |
726 | Picture *ref = &h->ref_list[0][ref_n]; | |
6a9c8594 AS |
727 | |
728 | // Error resilience puts the current picture in the ref list. | |
729 | // Don't try to wait on these as it will cause a deadlock. | |
730 | // Fields can wait on each other, though. | |
16c22122 | 731 | if (ref->tf.progress->data != h->cur_pic.tf.progress->data || |
759001c5 | 732 | (ref->reference & 3) != h->picture_structure) { |
6a9c8594 | 733 | my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0); |
e5d40372 DB |
734 | if (refs[0][ref_n] < 0) |
735 | nrefs[0] += 1; | |
6a9c8594 AS |
736 | refs[0][ref_n] = FFMAX(refs[0][ref_n], my); |
737 | } | |
738 | } | |
739 | ||
e5d40372 DB |
740 | if (list1) { |
741 | int ref_n = h->ref_cache[1][scan8[n]]; | |
742 | Picture *ref = &h->ref_list[1][ref_n]; | |
6a9c8594 | 743 | |
759001c5 AK |
744 | if (ref->tf.progress->data != h->cur_pic.tf.progress->data || |
745 | (ref->reference & 3) != h->picture_structure) { | |
6a9c8594 | 746 | my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1); |
e5d40372 DB |
747 | if (refs[1][ref_n] < 0) |
748 | nrefs[1] += 1; | |
6a9c8594 AS |
749 | refs[1][ref_n] = FFMAX(refs[1][ref_n], my); |
750 | } | |
751 | } | |
752 | } | |
753 | ||
754 | /** | |
755 | * Wait until all reference frames are available for MC operations. | |
756 | * | |
757 | * @param h the H264 context | |
758 | */ | |
e5d40372 DB |
759 | static void await_references(H264Context *h) |
760 | { | |
e5d40372 | 761 | const int mb_xy = h->mb_xy; |
759001c5 | 762 | const int mb_type = h->cur_pic.mb_type[mb_xy]; |
6a9c8594 | 763 | int refs[2][48]; |
e5d40372 | 764 | int nrefs[2] = { 0 }; |
6a9c8594 AS |
765 | int ref, list; |
766 | ||
767 | memset(refs, -1, sizeof(refs)); | |
768 | ||
e5d40372 | 769 | if (IS_16X16(mb_type)) { |
6a9c8594 | 770 | get_lowest_part_y(h, refs, 0, 16, 0, |
e5d40372 DB |
771 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
772 | } else if (IS_16X8(mb_type)) { | |
6a9c8594 | 773 | get_lowest_part_y(h, refs, 0, 8, 0, |
e5d40372 | 774 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
6a9c8594 | 775 | get_lowest_part_y(h, refs, 8, 8, 8, |
e5d40372 DB |
776 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); |
777 | } else if (IS_8X16(mb_type)) { | |
6a9c8594 | 778 | get_lowest_part_y(h, refs, 0, 16, 0, |
e5d40372 | 779 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
6a9c8594 | 780 | get_lowest_part_y(h, refs, 4, 16, 0, |
e5d40372 DB |
781 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); |
782 | } else { | |
6a9c8594 AS |
783 | int i; |
784 | ||
785 | assert(IS_8X8(mb_type)); | |
786 | ||
e5d40372 DB |
787 | for (i = 0; i < 4; i++) { |
788 | const int sub_mb_type = h->sub_mb_type[i]; | |
789 | const int n = 4 * i; | |
790 | int y_offset = (i & 2) << 2; | |
791 | ||
792 | if (IS_SUB_8X8(sub_mb_type)) { | |
793 | get_lowest_part_y(h, refs, n, 8, y_offset, | |
794 | IS_DIR(sub_mb_type, 0, 0), | |
795 | IS_DIR(sub_mb_type, 0, 1), | |
796 | nrefs); | |
797 | } else if (IS_SUB_8X4(sub_mb_type)) { | |
798 | get_lowest_part_y(h, refs, n, 4, y_offset, | |
799 | IS_DIR(sub_mb_type, 0, 0), | |
800 | IS_DIR(sub_mb_type, 0, 1), | |
801 | nrefs); | |
802 | get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4, | |
803 | IS_DIR(sub_mb_type, 0, 0), | |
804 | IS_DIR(sub_mb_type, 0, 1), | |
805 | nrefs); | |
806 | } else if (IS_SUB_4X8(sub_mb_type)) { | |
807 | get_lowest_part_y(h, refs, n, 8, y_offset, | |
808 | IS_DIR(sub_mb_type, 0, 0), | |
809 | IS_DIR(sub_mb_type, 0, 1), | |
810 | nrefs); | |
811 | get_lowest_part_y(h, refs, n + 1, 8, y_offset, | |
812 | IS_DIR(sub_mb_type, 0, 0), | |
813 | IS_DIR(sub_mb_type, 0, 1), | |
814 | nrefs); | |
815 | } else { | |
6a9c8594 AS |
816 | int j; |
817 | assert(IS_SUB_4X4(sub_mb_type)); | |
e5d40372 DB |
818 | for (j = 0; j < 4; j++) { |
819 | int sub_y_offset = y_offset + 2 * (j & 2); | |
820 | get_lowest_part_y(h, refs, n + j, 4, sub_y_offset, | |
821 | IS_DIR(sub_mb_type, 0, 0), | |
822 | IS_DIR(sub_mb_type, 0, 1), | |
823 | nrefs); | |
6a9c8594 AS |
824 | } |
825 | } | |
826 | } | |
827 | } | |
828 | ||
e5d40372 DB |
829 | for (list = h->list_count - 1; list >= 0; list--) |
830 | for (ref = 0; ref < 48 && nrefs[list]; ref++) { | |
6a9c8594 | 831 | int row = refs[list][ref]; |
e5d40372 DB |
832 | if (row >= 0) { |
833 | Picture *ref_pic = &h->ref_list[list][ref]; | |
759001c5 | 834 | int ref_field = ref_pic->reference - 1; |
6a9c8594 | 835 | int ref_field_picture = ref_pic->field_picture; |
2c541554 | 836 | int pic_height = 16 * h->mb_height >> ref_field_picture; |
6a9c8594 | 837 | |
82313eaa | 838 | row <<= MB_MBAFF(h); |
6a9c8594 AS |
839 | nrefs[list]--; |
840 | ||
7fa00653 | 841 | if (!FIELD_PICTURE(h) && ref_field_picture) { // frame referencing two fields |
759001c5 | 842 | ff_thread_await_progress(&ref_pic->tf, |
e5d40372 DB |
843 | FFMIN((row >> 1) - !(row & 1), |
844 | pic_height - 1), | |
845 | 1); | |
759001c5 | 846 | ff_thread_await_progress(&ref_pic->tf, |
e5d40372 DB |
847 | FFMIN((row >> 1), pic_height - 1), |
848 | 0); | |
7fa00653 | 849 | } else if (FIELD_PICTURE(h) && !ref_field_picture) { // field referencing one field of a frame |
759001c5 | 850 | ff_thread_await_progress(&ref_pic->tf, |
e5d40372 DB |
851 | FFMIN(row * 2 + ref_field, |
852 | pic_height - 1), | |
853 | 0); | |
7fa00653 | 854 | } else if (FIELD_PICTURE(h)) { |
759001c5 | 855 | ff_thread_await_progress(&ref_pic->tf, |
e5d40372 DB |
856 | FFMIN(row, pic_height - 1), |
857 | ref_field); | |
858 | } else { | |
759001c5 | 859 | ff_thread_await_progress(&ref_pic->tf, |
e5d40372 DB |
860 | FFMIN(row, pic_height - 1), |
861 | 0); | |
6a9c8594 AS |
862 | } |
863 | } | |
864 | } | |
66c6b5e2 | 865 | } |
66c6b5e2 | 866 | |
e5d40372 DB |
867 | static av_always_inline void mc_dir_part(H264Context *h, Picture *pic, |
868 | int n, int square, int height, | |
869 | int delta, int list, | |
870 | uint8_t *dest_y, uint8_t *dest_cb, | |
871 | uint8_t *dest_cr, | |
872 | int src_x_offset, int src_y_offset, | |
873 | qpel_mc_func *qpix_op, | |
874 | h264_chroma_mc_func chroma_op, | |
875 | int pixel_shift, int chroma_idc) | |
05fb63f5 | 876 | { |
e5d40372 DB |
877 | const int mx = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8; |
878 | int my = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8; | |
879 | const int luma_xy = (mx & 3) + ((my & 3) << 2); | |
93f30547 | 880 | ptrdiff_t offset = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize; |
e5d40372 DB |
881 | uint8_t *src_y = pic->f.data[0] + offset; |
882 | uint8_t *src_cb, *src_cr; | |
fae6fd5b RB |
883 | int extra_width = 0; |
884 | int extra_height = 0; | |
e5d40372 DB |
885 | int emu = 0; |
886 | const int full_mx = mx >> 2; | |
887 | const int full_my = my >> 2; | |
2c541554 | 888 | const int pic_width = 16 * h->mb_width; |
da6be8fc | 889 | const int pic_height = 16 * h->mb_height >> MB_FIELD(h); |
229d263c | 890 | int ysh; |
115329f1 | 891 | |
e5d40372 DB |
892 | if (mx & 7) |
893 | extra_width -= 3; | |
894 | if (my & 7) | |
895 | extra_height -= 3; | |
896 | ||
897 | if (full_mx < 0 - extra_width || | |
898 | full_my < 0 - extra_height || | |
899 | full_mx + 16 /*FIXME*/ > pic_width + extra_width || | |
900 | full_my + 16 /*FIXME*/ > pic_height + extra_height) { | |
2c541554 | 901 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, |
8c53d39e | 902 | src_y - (2 << pixel_shift) - 2 * h->mb_linesize, |
458446ac | 903 | h->mb_linesize, h->mb_linesize, |
8c53d39e RB |
904 | 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, |
905 | full_my - 2, pic_width, pic_height); | |
2c541554 | 906 | src_y = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; |
e5d40372 DB |
907 | emu = 1; |
908 | } | |
909 | ||
910 | qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps? | |
911 | if (!square) | |
5d18eaad | 912 | qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); |
115329f1 | 913 | |
2c541554 | 914 | if (CONFIG_GRAY && h->flags & CODEC_FLAG_GRAY) |
e5d40372 | 915 | return; |
7b442ad9 | 916 | |
e5d40372 | 917 | if (chroma_idc == 3 /* yuv444 */) { |
657ccb5a | 918 | src_cb = pic->f.data[1] + offset; |
e5d40372 | 919 | if (emu) { |
2c541554 | 920 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, |
8c53d39e | 921 | src_cb - (2 << pixel_shift) - 2 * h->mb_linesize, |
458446ac | 922 | h->mb_linesize, h->mb_linesize, |
8c53d39e RB |
923 | 16 + 5, 16 + 5 /*FIXME*/, |
924 | full_mx - 2, full_my - 2, | |
925 | pic_width, pic_height); | |
2c541554 | 926 | src_cb = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; |
c90b9442 | 927 | } |
e5d40372 DB |
928 | qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps? |
929 | if (!square) | |
c90b9442 | 930 | qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize); |
c90b9442 | 931 | |
657ccb5a | 932 | src_cr = pic->f.data[2] + offset; |
e5d40372 | 933 | if (emu) { |
2c541554 | 934 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, |
8c53d39e | 935 | src_cr - (2 << pixel_shift) - 2 * h->mb_linesize, |
458446ac | 936 | h->mb_linesize, h->mb_linesize, |
8c53d39e RB |
937 | 16 + 5, 16 + 5 /*FIXME*/, |
938 | full_mx - 2, full_my - 2, | |
939 | pic_width, pic_height); | |
2c541554 | 940 | src_cr = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; |
c90b9442 | 941 | } |
e5d40372 DB |
942 | qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps? |
943 | if (!square) | |
c90b9442 | 944 | qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize); |
c90b9442 JGG |
945 | return; |
946 | } | |
947 | ||
05fb63f5 | 948 | ysh = 3 - (chroma_idc == 2 /* yuv422 */); |
da6be8fc | 949 | if (chroma_idc == 1 /* yuv420 */ && MB_FIELD(h)) { |
5d18eaad | 950 | // chroma offset when predicting from a field of opposite parity |
759001c5 | 951 | my += 2 * ((h->mb_y & 1) - (pic->reference - 1)); |
e5d40372 | 952 | emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1); |
5d18eaad | 953 | } |
229d263c | 954 | |
e5d40372 DB |
955 | src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + |
956 | (my >> ysh) * h->mb_uvlinesize; | |
957 | src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + | |
958 | (my >> ysh) * h->mb_uvlinesize; | |
5d18eaad | 959 | |
e5d40372 | 960 | if (emu) { |
458446ac RB |
961 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cb, |
962 | h->mb_uvlinesize, h->mb_uvlinesize, | |
8c53d39e RB |
963 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), |
964 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |
2c541554 | 965 | src_cb = h->edge_emu_buffer; |
0da71265 | 966 | } |
e5d40372 DB |
967 | chroma_op(dest_cb, src_cb, h->mb_uvlinesize, |
968 | height >> (chroma_idc == 1 /* yuv420 */), | |
969 | mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7); | |
0da71265 | 970 | |
e5d40372 | 971 | if (emu) { |
458446ac RB |
972 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cr, |
973 | h->mb_uvlinesize, h->mb_uvlinesize, | |
8c53d39e RB |
974 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), |
975 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |
2c541554 | 976 | src_cr = h->edge_emu_buffer; |
0da71265 | 977 | } |
05fb63f5 | 978 | chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), |
e5d40372 | 979 | mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7); |
0da71265 MN |
980 | } |
981 | ||
e5d40372 DB |
982 | static av_always_inline void mc_part_std(H264Context *h, int n, int square, |
983 | int height, int delta, | |
984 | uint8_t *dest_y, uint8_t *dest_cb, | |
985 | uint8_t *dest_cr, | |
986 | int x_offset, int y_offset, | |
987 | qpel_mc_func *qpix_put, | |
988 | h264_chroma_mc_func chroma_put, | |
989 | qpel_mc_func *qpix_avg, | |
990 | h264_chroma_mc_func chroma_avg, | |
991 | int list0, int list1, | |
992 | int pixel_shift, int chroma_idc) | |
05fb63f5 | 993 | { |
e5d40372 DB |
994 | qpel_mc_func *qpix_op = qpix_put; |
995 | h264_chroma_mc_func chroma_op = chroma_put; | |
115329f1 | 996 | |
e5d40372 | 997 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
05fb63f5 | 998 | if (chroma_idc == 3 /* yuv444 */) { |
e5d40372 DB |
999 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
1000 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; | |
05fb63f5 | 1001 | } else if (chroma_idc == 2 /* yuv422 */) { |
e5d40372 DB |
1002 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
1003 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; | |
1004 | } else { /* yuv420 */ | |
1005 | dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; | |
1006 | dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; | |
c90b9442 | 1007 | } |
2c541554 | 1008 | x_offset += 8 * h->mb_x; |
da6be8fc | 1009 | y_offset += 8 * (h->mb_y >> MB_FIELD(h)); |
115329f1 | 1010 | |
e5d40372 DB |
1011 | if (list0) { |
1012 | Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]]; | |
c2d33742 | 1013 | mc_dir_part(h, ref, n, square, height, delta, 0, |
e5d40372 DB |
1014 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
1015 | qpix_op, chroma_op, pixel_shift, chroma_idc); | |
0da71265 | 1016 | |
e5d40372 DB |
1017 | qpix_op = qpix_avg; |
1018 | chroma_op = chroma_avg; | |
0da71265 MN |
1019 | } |
1020 | ||
e5d40372 DB |
1021 | if (list1) { |
1022 | Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]]; | |
c2d33742 | 1023 | mc_dir_part(h, ref, n, square, height, delta, 1, |
e5d40372 DB |
1024 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
1025 | qpix_op, chroma_op, pixel_shift, chroma_idc); | |
0da71265 MN |
1026 | } |
1027 | } | |
1028 | ||
e5d40372 DB |
1029 | static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, |
1030 | int height, int delta, | |
1031 | uint8_t *dest_y, uint8_t *dest_cb, | |
1032 | uint8_t *dest_cr, | |
1033 | int x_offset, int y_offset, | |
1034 | qpel_mc_func *qpix_put, | |
1035 | h264_chroma_mc_func chroma_put, | |
1036 | h264_weight_func luma_weight_op, | |
1037 | h264_weight_func chroma_weight_op, | |
1038 | h264_biweight_func luma_weight_avg, | |
1039 | h264_biweight_func chroma_weight_avg, | |
1040 | int list0, int list1, | |
1041 | int pixel_shift, int chroma_idc) | |
1042 | { | |
c2d33742 | 1043 | int chroma_height; |
9f2d1b4f | 1044 | |
e5d40372 | 1045 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
05fb63f5 | 1046 | if (chroma_idc == 3 /* yuv444 */) { |
e5d40372 | 1047 | chroma_height = height; |
c90b9442 | 1048 | chroma_weight_avg = luma_weight_avg; |
e5d40372 DB |
1049 | chroma_weight_op = luma_weight_op; |
1050 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; | |
1051 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; | |
05fb63f5 | 1052 | } else if (chroma_idc == 2 /* yuv422 */) { |
c2d33742 | 1053 | chroma_height = height; |
e5d40372 DB |
1054 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
1055 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; | |
1056 | } else { /* yuv420 */ | |
c2d33742 | 1057 | chroma_height = height >> 1; |
e5d40372 DB |
1058 | dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; |
1059 | dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; | |
c90b9442 | 1060 | } |
2c541554 | 1061 | x_offset += 8 * h->mb_x; |
da6be8fc | 1062 | y_offset += 8 * (h->mb_y >> MB_FIELD(h)); |
115329f1 | 1063 | |
e5d40372 | 1064 | if (list0 && list1) { |
9f2d1b4f LM |
1065 | /* don't optimize for luma-only case, since B-frames usually |
1066 | * use implicit weights => chroma too. */ | |
a394959b JG |
1067 | uint8_t *tmp_cb = h->bipred_scratchpad; |
1068 | uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift); | |
1069 | uint8_t *tmp_y = h->bipred_scratchpad + 16 * h->mb_uvlinesize; | |
e5d40372 DB |
1070 | int refn0 = h->ref_cache[0][scan8[n]]; |
1071 | int refn1 = h->ref_cache[1][scan8[n]]; | |
9f2d1b4f | 1072 | |
c2d33742 | 1073 | mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0, |
9f2d1b4f | 1074 | dest_y, dest_cb, dest_cr, |
05fb63f5 RB |
1075 | x_offset, y_offset, qpix_put, chroma_put, |
1076 | pixel_shift, chroma_idc); | |
c2d33742 | 1077 | mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1, |
9f2d1b4f | 1078 | tmp_y, tmp_cb, tmp_cr, |
05fb63f5 RB |
1079 | x_offset, y_offset, qpix_put, chroma_put, |
1080 | pixel_shift, chroma_idc); | |
9f2d1b4f | 1081 | |
e5d40372 | 1082 | if (h->use_weight == 2) { |
2c541554 | 1083 | int weight0 = h->implicit_weight[refn0][refn1][h->mb_y & 1]; |
9f2d1b4f | 1084 | int weight1 = 64 - weight0; |
e5d40372 DB |
1085 | luma_weight_avg(dest_y, tmp_y, h->mb_linesize, |
1086 | height, 5, weight0, weight1, 0); | |
c2d33742 RB |
1087 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, |
1088 | chroma_height, 5, weight0, weight1, 0); | |
1089 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, | |
1090 | chroma_height, 5, weight0, weight1, 0); | |
e5d40372 DB |
1091 | } else { |
1092 | luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, | |
1093 | h->luma_log2_weight_denom, | |
1094 | h->luma_weight[refn0][0][0], | |
1095 | h->luma_weight[refn1][1][0], | |
1096 | h->luma_weight[refn0][0][1] + | |
1097 | h->luma_weight[refn1][1][1]); | |
1098 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, | |
1099 | h->chroma_log2_weight_denom, | |
1100 | h->chroma_weight[refn0][0][0][0], | |
1101 | h->chroma_weight[refn1][1][0][0], | |
1102 | h->chroma_weight[refn0][0][0][1] + | |
1103 | h->chroma_weight[refn1][1][0][1]); | |
1104 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, | |
1105 | h->chroma_log2_weight_denom, | |
1106 | h->chroma_weight[refn0][0][1][0], | |
1107 | h->chroma_weight[refn1][1][1][0], | |
1108 | h->chroma_weight[refn0][0][1][1] + | |
1109 | h->chroma_weight[refn1][1][1][1]); | |
9f2d1b4f | 1110 | } |
e5d40372 DB |
1111 | } else { |
1112 | int list = list1 ? 1 : 0; | |
1113 | int refn = h->ref_cache[list][scan8[n]]; | |
1114 | Picture *ref = &h->ref_list[list][refn]; | |
c2d33742 | 1115 | mc_dir_part(h, ref, n, square, height, delta, list, |
9f2d1b4f | 1116 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
05fb63f5 | 1117 | qpix_put, chroma_put, pixel_shift, chroma_idc); |
9f2d1b4f | 1118 | |
e5d40372 DB |
1119 | luma_weight_op(dest_y, h->mb_linesize, height, |
1120 | h->luma_log2_weight_denom, | |
1121 | h->luma_weight[refn][list][0], | |
1122 | h->luma_weight[refn][list][1]); | |
1123 | if (h->use_weight_chroma) { | |
1124 | chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, | |
1125 | h->chroma_log2_weight_denom, | |
1126 | h->chroma_weight[refn][list][0][0], | |
1127 | h->chroma_weight[refn][list][0][1]); | |
1128 | chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, | |
1129 | h->chroma_log2_weight_denom, | |
1130 | h->chroma_weight[refn][list][1][0], | |
1131 | h->chroma_weight[refn][list][1][1]); | |
9f2d1b4f LM |
1132 | } |
1133 | } | |
1134 | } | |
1135 | ||
e5d40372 DB |
1136 | static av_always_inline void prefetch_motion(H264Context *h, int list, |
1137 | int pixel_shift, int chroma_idc) | |
27209bb1 | 1138 | { |
513fbd8e LM |
1139 | /* fetch pixels for estimated mv 4 macroblocks ahead |
1140 | * optimized for 64byte cache lines */ | |
513fbd8e | 1141 | const int refn = h->ref_cache[list][scan8[0]]; |
e5d40372 | 1142 | if (refn >= 0) { |
2c541554 AK |
1143 | const int mx = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * h->mb_x + 8; |
1144 | const int my = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * h->mb_y; | |
657ccb5a | 1145 | uint8_t **src = h->ref_list[list][refn].f.data; |
e5d40372 | 1146 | int off = (mx << pixel_shift) + |
2c541554 | 1147 | (my + (h->mb_x & 3) * 4) * h->mb_linesize + |
e5d40372 | 1148 | (64 << pixel_shift); |
2c541554 | 1149 | h->vdsp.prefetch(src[0] + off, h->linesize, 4); |
05fb63f5 | 1150 | if (chroma_idc == 3 /* yuv444 */) { |
2c541554 AK |
1151 | h->vdsp.prefetch(src[1] + off, h->linesize, 4); |
1152 | h->vdsp.prefetch(src[2] + off, h->linesize, 4); | |
e5d40372 DB |
1153 | } else { |
1154 | off = ((mx >> 1) << pixel_shift) + | |
2c541554 | 1155 | ((my >> 1) + (h->mb_x & 7)) * h->uvlinesize + |
e5d40372 | 1156 | (64 << pixel_shift); |
2c541554 | 1157 | h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); |
c90b9442 | 1158 | } |
513fbd8e LM |
1159 | } |
1160 | } | |
1161 | ||
e5d40372 DB |
1162 | static void free_tables(H264Context *h, int free_rbsp) |
1163 | { | |
7978debd | 1164 | int i; |
afebe2f7 | 1165 | H264Context *hx; |
6a9c8594 | 1166 | |
0da71265 | 1167 | av_freep(&h->intra4x4_pred_mode); |
e5017ab8 LA |
1168 | av_freep(&h->chroma_pred_mode_table); |
1169 | av_freep(&h->cbp_table); | |
9e528114 LA |
1170 | av_freep(&h->mvd_table[0]); |
1171 | av_freep(&h->mvd_table[1]); | |
5ad984c9 | 1172 | av_freep(&h->direct_table); |
0da71265 MN |
1173 | av_freep(&h->non_zero_count); |
1174 | av_freep(&h->slice_table_base); | |
e5d40372 | 1175 | h->slice_table = NULL; |
c988f975 | 1176 | av_freep(&h->list_counts); |
e5017ab8 | 1177 | |
0da71265 | 1178 | av_freep(&h->mb2b_xy); |
d43c1922 | 1179 | av_freep(&h->mb2br_xy); |
9f2d1b4f | 1180 | |
759001c5 AK |
1181 | av_buffer_pool_uninit(&h->qscale_table_pool); |
1182 | av_buffer_pool_uninit(&h->mb_type_pool); | |
1183 | av_buffer_pool_uninit(&h->motion_val_pool); | |
1184 | av_buffer_pool_uninit(&h->ref_index_pool); | |
1185 | ||
1186 | if (free_rbsp && h->DPB) { | |
1187 | for (i = 0; i < MAX_PICTURE_COUNT; i++) | |
1188 | unref_picture(h, &h->DPB[i]); | |
2c541554 | 1189 | av_freep(&h->DPB); |
2c541554 | 1190 | } else if (h->DPB) { |
759001c5 | 1191 | for (i = 0; i < MAX_PICTURE_COUNT; i++) |
2c541554 AK |
1192 | h->DPB[i].needs_realloc = 1; |
1193 | } | |
1194 | ||
1195 | h->cur_pic_ptr = NULL; | |
1196 | ||
e5d40372 | 1197 | for (i = 0; i < MAX_THREADS; i++) { |
afebe2f7 | 1198 | hx = h->thread_context[i]; |
e5d40372 DB |
1199 | if (!hx) |
1200 | continue; | |
afebe2f7 AÖ |
1201 | av_freep(&hx->top_borders[1]); |
1202 | av_freep(&hx->top_borders[0]); | |
a394959b | 1203 | av_freep(&hx->bipred_scratchpad); |
2c541554 AK |
1204 | av_freep(&hx->edge_emu_buffer); |
1205 | av_freep(&hx->dc_val_base); | |
1206 | av_freep(&hx->me.scratchpad); | |
1207 | av_freep(&hx->er.mb_index2xy); | |
1208 | av_freep(&hx->er.error_status_table); | |
1209 | av_freep(&hx->er.er_temp_buffer); | |
1210 | av_freep(&hx->er.mbintra_table); | |
1211 | av_freep(&hx->er.mbskip_table); | |
1212 | ||
e5d40372 | 1213 | if (free_rbsp) { |
fcb7e535 RB |
1214 | av_freep(&hx->rbsp_buffer[1]); |
1215 | av_freep(&hx->rbsp_buffer[0]); | |
1216 | hx->rbsp_buffer_size[0] = 0; | |
1217 | hx->rbsp_buffer_size[1] = 0; | |
91078926 | 1218 | } |
e5d40372 DB |
1219 | if (i) |
1220 | av_freep(&h->thread_context[i]); | |
afebe2f7 | 1221 | } |
0da71265 MN |
1222 | } |
1223 | ||
e5d40372 DB |
1224 | static void init_dequant8_coeff_table(H264Context *h) |
1225 | { | |
1226 | int i, j, q, x; | |
1227 | const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8); | |
239ea04c | 1228 | |
e5d40372 | 1229 | for (i = 0; i < 6; i++) { |
c90b9442 | 1230 | h->dequant8_coeff[i] = h->dequant8_buffer[i]; |
e5d40372 DB |
1231 | for (j = 0; j < i; j++) |
1232 | if (!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], | |
1233 | 64 * sizeof(uint8_t))) { | |
c90b9442 JGG |
1234 | h->dequant8_coeff[i] = h->dequant8_buffer[j]; |
1235 | break; | |
1236 | } | |
e5d40372 | 1237 | if (j < i) |
c90b9442 | 1238 | continue; |
239ea04c | 1239 | |
e5d40372 | 1240 | for (q = 0; q < max_qp + 1; q++) { |
d9ec210b | 1241 | int shift = div6[q]; |
e5d40372 DB |
1242 | int idx = rem6[q]; |
1243 | for (x = 0; x < 64; x++) | |
1244 | h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] = | |
1245 | ((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] * | |
1246 | h->pps.scaling_matrix8[i][x]) << shift; | |
239ea04c LM |
1247 | } |
1248 | } | |
1249 | } | |
1250 | ||
e5d40372 DB |
1251 | static void init_dequant4_coeff_table(H264Context *h) |
1252 | { | |
1253 | int i, j, q, x; | |
1254 | const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8); | |
1255 | for (i = 0; i < 6; i++) { | |
239ea04c | 1256 | h->dequant4_coeff[i] = h->dequant4_buffer[i]; |
e5d40372 DB |
1257 | for (j = 0; j < i; j++) |
1258 | if (!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], | |
1259 | 16 * sizeof(uint8_t))) { | |
239ea04c LM |
1260 | h->dequant4_coeff[i] = h->dequant4_buffer[j]; |
1261 | break; | |
1262 | } | |
e5d40372 | 1263 | if (j < i) |
239ea04c LM |
1264 | continue; |
1265 | ||
e5d40372 | 1266 | for (q = 0; q < max_qp + 1; q++) { |
d9ec210b | 1267 | int shift = div6[q] + 2; |
e5d40372 DB |
1268 | int idx = rem6[q]; |
1269 | for (x = 0; x < 16; x++) | |
1270 | h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] = | |
1271 | ((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * | |
1272 | h->pps.scaling_matrix4[i][x]) << shift; | |
239ea04c LM |
1273 | } |
1274 | } | |
1275 | } | |
1276 | ||
e5d40372 DB |
1277 | static void init_dequant_tables(H264Context *h) |
1278 | { | |
1279 | int i, x; | |
239ea04c | 1280 | init_dequant4_coeff_table(h); |
e5d40372 | 1281 | if (h->pps.transform_8x8_mode) |
239ea04c | 1282 | init_dequant8_coeff_table(h); |
e5d40372 DB |
1283 | if (h->sps.transform_bypass) { |
1284 | for (i = 0; i < 6; i++) | |
1285 | for (x = 0; x < 16; x++) | |
1286 | h->dequant4_coeff[i][0][x] = 1 << 6; | |
1287 | if (h->pps.transform_8x8_mode) | |
1288 | for (i = 0; i < 6; i++) | |
1289 | for (x = 0; x < 64; x++) | |
1290 | h->dequant8_coeff[i][0][x] = 1 << 6; | |
239ea04c LM |
1291 | } |
1292 | } | |
1293 | ||
e5d40372 DB |
1294 | int ff_h264_alloc_tables(H264Context *h) |
1295 | { | |
16c22122 DB |
1296 | const int big_mb_num = h->mb_stride * (h->mb_height + 1); |
1297 | const int row_mb_num = h->mb_stride * 2 * h->avctx->thread_count; | |
2c541554 | 1298 | int x, y, i; |
e5d40372 | 1299 | |
2c541554 | 1300 | FF_ALLOCZ_OR_GOTO(h->avctx, h->intra4x4_pred_mode, |
e5d40372 | 1301 | row_mb_num * 8 * sizeof(uint8_t), fail) |
2c541554 | 1302 | FF_ALLOCZ_OR_GOTO(h->avctx, h->non_zero_count, |
e5d40372 | 1303 | big_mb_num * 48 * sizeof(uint8_t), fail) |
2c541554 AK |
1304 | FF_ALLOCZ_OR_GOTO(h->avctx, h->slice_table_base, |
1305 | (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base), fail) | |
1306 | FF_ALLOCZ_OR_GOTO(h->avctx, h->cbp_table, | |
e5d40372 | 1307 | big_mb_num * sizeof(uint16_t), fail) |
2c541554 | 1308 | FF_ALLOCZ_OR_GOTO(h->avctx, h->chroma_pred_mode_table, |
e5d40372 | 1309 | big_mb_num * sizeof(uint8_t), fail) |
2c541554 | 1310 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[0], |
e5d40372 | 1311 | 16 * row_mb_num * sizeof(uint8_t), fail); |
2c541554 | 1312 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[1], |
e5d40372 | 1313 | 16 * row_mb_num * sizeof(uint8_t), fail); |
2c541554 | 1314 | FF_ALLOCZ_OR_GOTO(h->avctx, h->direct_table, |
e5d40372 | 1315 | 4 * big_mb_num * sizeof(uint8_t), fail); |
2c541554 | 1316 | FF_ALLOCZ_OR_GOTO(h->avctx, h->list_counts, |
e5d40372 DB |
1317 | big_mb_num * sizeof(uint8_t), fail) |
1318 | ||
1319 | memset(h->slice_table_base, -1, | |
2c541554 AK |
1320 | (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base)); |
1321 | h->slice_table = h->slice_table_base + h->mb_stride * 2 + 1; | |
e5d40372 | 1322 | |
2c541554 | 1323 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mb2b_xy, |
e5d40372 | 1324 | big_mb_num * sizeof(uint32_t), fail); |
2c541554 | 1325 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mb2br_xy, |
e5d40372 | 1326 | big_mb_num * sizeof(uint32_t), fail); |
2c541554 AK |
1327 | for (y = 0; y < h->mb_height; y++) |
1328 | for (x = 0; x < h->mb_width; x++) { | |
1329 | const int mb_xy = x + y * h->mb_stride; | |
e5d40372 DB |
1330 | const int b_xy = 4 * x + 4 * y * h->b_stride; |
1331 | ||
1332 | h->mb2b_xy[mb_xy] = b_xy; | |
2c541554 | 1333 | h->mb2br_xy[mb_xy] = 8 * (FMO ? mb_xy : (mb_xy % (2 * h->mb_stride))); |
0da71265 | 1334 | } |
9f2d1b4f | 1335 | |
e5d40372 | 1336 | if (!h->dequant4_coeff[0]) |
56edbd81 LM |
1337 | init_dequant_tables(h); |
1338 | ||
2c541554 | 1339 | if (!h->DPB) { |
759001c5 | 1340 | h->DPB = av_mallocz_array(MAX_PICTURE_COUNT, sizeof(*h->DPB)); |
2c541554 AK |
1341 | if (!h->DPB) |
1342 | return AVERROR(ENOMEM); | |
759001c5 | 1343 | for (i = 0; i < MAX_PICTURE_COUNT; i++) |
598ce4ab AK |
1344 | av_frame_unref(&h->DPB[i].f); |
1345 | av_frame_unref(&h->cur_pic.f); | |
2c541554 AK |
1346 | } |
1347 | ||
0da71265 | 1348 | return 0; |
e5d40372 | 1349 | |
0da71265 | 1350 | fail: |
91078926 | 1351 | free_tables(h, 1); |
b3dc260e | 1352 | return AVERROR(ENOMEM); |
0da71265 MN |
1353 | } |
1354 | ||
afebe2f7 AÖ |
1355 | /** |
1356 | * Mimic alloc_tables(), but for every context thread. | |
1357 | */ | |
e5d40372 DB |
1358 | static void clone_tables(H264Context *dst, H264Context *src, int i) |
1359 | { | |
2c541554 | 1360 | dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i * 8 * 2 * src->mb_stride; |
e5d40372 DB |
1361 | dst->non_zero_count = src->non_zero_count; |
1362 | dst->slice_table = src->slice_table; | |
1363 | dst->cbp_table = src->cbp_table; | |
1364 | dst->mb2b_xy = src->mb2b_xy; | |
1365 | dst->mb2br_xy = src->mb2br_xy; | |
1366 | dst->chroma_pred_mode_table = src->chroma_pred_mode_table; | |
2c541554 AK |
1367 | dst->mvd_table[0] = src->mvd_table[0] + i * 8 * 2 * src->mb_stride; |
1368 | dst->mvd_table[1] = src->mvd_table[1] + i * 8 * 2 * src->mb_stride; | |
e5d40372 DB |
1369 | dst->direct_table = src->direct_table; |
1370 | dst->list_counts = src->list_counts; | |
2c541554 AK |
1371 | dst->DPB = src->DPB; |
1372 | dst->cur_pic_ptr = src->cur_pic_ptr; | |
1373 | dst->cur_pic = src->cur_pic; | |
a394959b | 1374 | dst->bipred_scratchpad = NULL; |
2c541554 AK |
1375 | dst->edge_emu_buffer = NULL; |
1376 | dst->me.scratchpad = NULL; | |
1377 | ff_h264_pred_init(&dst->hpc, src->avctx->codec_id, src->sps.bit_depth_luma, | |
e5d40372 | 1378 | src->sps.chroma_format_idc); |
afebe2f7 AÖ |
1379 | } |
1380 | ||
1381 | /** | |
1382 | * Init context | |
1383 | * Allocate buffers which are not shared amongst multiple threads. | |
1384 | */ | |
e5d40372 DB |
1385 | static int context_init(H264Context *h) |
1386 | { | |
2c541554 AK |
1387 | ERContext *er = &h->er; |
1388 | int mb_array_size = h->mb_height * h->mb_stride; | |
1389 | int y_size = (2 * h->mb_width + 1) * (2 * h->mb_height + 1); | |
1390 | int c_size = h->mb_stride * (h->mb_height + 1); | |
1391 | int yc_size = y_size + 2 * c_size; | |
1392 | int x, y, i; | |
1393 | ||
1394 | FF_ALLOCZ_OR_GOTO(h->avctx, h->top_borders[0], | |
1395 | h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail) | |
1396 | FF_ALLOCZ_OR_GOTO(h->avctx, h->top_borders[1], | |
1397 | h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail) | |
e5d40372 DB |
1398 | |
1399 | h->ref_cache[0][scan8[5] + 1] = | |
1400 | h->ref_cache[0][scan8[7] + 1] = | |
1401 | h->ref_cache[0][scan8[13] + 1] = | |
1402 | h->ref_cache[1][scan8[5] + 1] = | |
1403 | h->ref_cache[1][scan8[7] + 1] = | |
1404 | h->ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE; | |
145061a1 | 1405 | |
0b499c9b RB |
1406 | if (CONFIG_ERROR_RESILIENCE) { |
1407 | /* init ER */ | |
1408 | er->avctx = h->avctx; | |
1409 | er->dsp = &h->dsp; | |
1410 | er->decode_mb = h264_er_decode_mb; | |
1411 | er->opaque = h; | |
1412 | er->quarter_sample = 1; | |
1413 | ||
1414 | er->mb_num = h->mb_num; | |
1415 | er->mb_width = h->mb_width; | |
1416 | er->mb_height = h->mb_height; | |
1417 | er->mb_stride = h->mb_stride; | |
1418 | er->b8_stride = h->mb_width * 2 + 1; | |
2c541554 | 1419 | |
0b499c9b RB |
1420 | FF_ALLOCZ_OR_GOTO(h->avctx, er->mb_index2xy, (h->mb_num + 1) * sizeof(int), |
1421 | fail); // error ressilience code looks cleaner with this | |
1422 | for (y = 0; y < h->mb_height; y++) | |
1423 | for (x = 0; x < h->mb_width; x++) | |
1424 | er->mb_index2xy[x + y * h->mb_width] = x + y * h->mb_stride; | |
2c541554 | 1425 | |
0b499c9b | 1426 | er->mb_index2xy[h->mb_height * h->mb_width] = (h->mb_height - 1) * |
16c22122 | 1427 | h->mb_stride + h->mb_width; |
2c541554 | 1428 | |
0b499c9b RB |
1429 | FF_ALLOCZ_OR_GOTO(h->avctx, er->error_status_table, |
1430 | mb_array_size * sizeof(uint8_t), fail); | |
2c541554 | 1431 | |
0b499c9b RB |
1432 | FF_ALLOC_OR_GOTO(h->avctx, er->mbintra_table, mb_array_size, fail); |
1433 | memset(er->mbintra_table, 1, mb_array_size); | |
2c541554 | 1434 | |
0b499c9b | 1435 | FF_ALLOCZ_OR_GOTO(h->avctx, er->mbskip_table, mb_array_size + 2, fail); |
2c541554 | 1436 | |
0b499c9b RB |
1437 | FF_ALLOC_OR_GOTO(h->avctx, er->er_temp_buffer, h->mb_height * h->mb_stride, |
1438 | fail); | |
1439 | ||
1440 | FF_ALLOCZ_OR_GOTO(h->avctx, h->dc_val_base, yc_size * sizeof(int16_t), fail); | |
1441 | er->dc_val[0] = h->dc_val_base + h->mb_width * 2 + 2; | |
1442 | er->dc_val[1] = h->dc_val_base + y_size + h->mb_stride + 1; | |
1443 | er->dc_val[2] = er->dc_val[1] + c_size; | |
1444 | for (i = 0; i < yc_size; i++) | |
1445 | h->dc_val_base[i] = 1024; | |
1446 | } | |
54974c62 | 1447 | |
afebe2f7 | 1448 | return 0; |
e5d40372 | 1449 | |
afebe2f7 | 1450 | fail: |
b3dc260e | 1451 | return AVERROR(ENOMEM); // free_tables will clean up for us |
afebe2f7 AÖ |
1452 | } |
1453 | ||
61c6eef5 JG |
1454 | static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size, |
1455 | int parse_extradata); | |
9855b2e3 | 1456 | |
05e95319 HC |
1457 | int ff_h264_decode_extradata(H264Context *h) |
1458 | { | |
2c541554 | 1459 | AVCodecContext *avctx = h->avctx; |
b3dc260e | 1460 | int ret; |
9855b2e3 | 1461 | |
e5d40372 | 1462 | if (avctx->extradata[0] == 1) { |
9855b2e3 MN |
1463 | int i, cnt, nalsize; |
1464 | unsigned char *p = avctx->extradata; | |
1465 | ||
1466 | h->is_avc = 1; | |
1467 | ||
e5d40372 | 1468 | if (avctx->extradata_size < 7) { |
9855b2e3 | 1469 | av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); |
b3dc260e | 1470 | return AVERROR_INVALIDDATA; |
9855b2e3 MN |
1471 | } |
1472 | /* sps and pps in the avcC always have length coded with 2 bytes, | |
e5d40372 | 1473 | * so put a fake nal_length_size = 2 while parsing them */ |
9855b2e3 MN |
1474 | h->nal_length_size = 2; |
1475 | // Decode sps from avcC | |
e5d40372 DB |
1476 | cnt = *(p + 5) & 0x1f; // Number of sps |
1477 | p += 6; | |
9855b2e3 MN |
1478 | for (i = 0; i < cnt; i++) { |
1479 | nalsize = AV_RB16(p) + 2; | |
d1186ff7 | 1480 | if (p - avctx->extradata + nalsize > avctx->extradata_size) |
b3dc260e VG |
1481 | return AVERROR_INVALIDDATA; |
1482 | ret = decode_nal_units(h, p, nalsize, 1); | |
1483 | if (ret < 0) { | |
e5d40372 DB |
1484 | av_log(avctx, AV_LOG_ERROR, |
1485 | "Decoding sps %d from avcC failed\n", i); | |
b3dc260e | 1486 | return ret; |
9855b2e3 MN |
1487 | } |
1488 | p += nalsize; | |
1489 | } | |
1490 | // Decode pps from avcC | |
1491 | cnt = *(p++); // Number of pps | |
1492 | for (i = 0; i < cnt; i++) { | |
1493 | nalsize = AV_RB16(p) + 2; | |
d1186ff7 | 1494 | if (p - avctx->extradata + nalsize > avctx->extradata_size) |
b3dc260e VG |
1495 | return AVERROR_INVALIDDATA; |
1496 | ret = decode_nal_units(h, p, nalsize, 1); | |
1497 | if (ret < 0) { | |
e5d40372 DB |
1498 | av_log(avctx, AV_LOG_ERROR, |
1499 | "Decoding pps %d from avcC failed\n", i); | |
b3dc260e | 1500 | return ret; |
9855b2e3 MN |
1501 | } |
1502 | p += nalsize; | |
1503 | } | |
e5d40372 | 1504 | // Now store right nal length size, that will be used to parse all other nals |
0c17beba | 1505 | h->nal_length_size = (avctx->extradata[4] & 0x03) + 1; |
9855b2e3 MN |
1506 | } else { |
1507 | h->is_avc = 0; | |
b3dc260e VG |
1508 | ret = decode_nal_units(h, avctx->extradata, avctx->extradata_size, 1); |
1509 | if (ret < 0) | |
1510 | return ret; | |
9855b2e3 | 1511 | } |
05e95319 HC |
1512 | return 0; |
1513 | } | |
1514 | ||
e5d40372 DB |
1515 | av_cold int ff_h264_decode_init(AVCodecContext *avctx) |
1516 | { | |
1517 | H264Context *h = avctx->priv_data; | |
ea2bb12e | 1518 | int i; |
b3dc260e | 1519 | int ret; |
05e95319 | 1520 | |
2c541554 | 1521 | h->avctx = avctx; |
48d0fd2d | 1522 | |
48d0fd2d AK |
1523 | h->bit_depth_luma = 8; |
1524 | h->chroma_format_idc = 1; | |
1525 | ||
1526 | ff_h264dsp_init(&h->h264dsp, 8, 1); | |
1527 | ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma); | |
1528 | ff_h264qpel_init(&h->h264qpel, 8); | |
1529 | ff_h264_pred_init(&h->hpc, h->avctx->codec_id, 8, 1); | |
1530 | ||
1531 | h->dequant_coeff_pps = -1; | |
1532 | ||
1533 | /* needed so that IDCT permutation is known early */ | |
85deb51a RB |
1534 | if (CONFIG_ERROR_RESILIENCE) |
1535 | ff_dsputil_init(&h->dsp, h->avctx); | |
48d0fd2d AK |
1536 | ff_videodsp_init(&h->vdsp, 8); |
1537 | ||
1538 | memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t)); | |
1539 | memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t)); | |
05e95319 | 1540 | |
2c541554 | 1541 | h->picture_structure = PICT_FRAME; |
2c541554 AK |
1542 | h->slice_context_count = 1; |
1543 | h->workaround_bugs = avctx->workaround_bugs; | |
1544 | h->flags = avctx->flags; | |
05e95319 | 1545 | |
e5d40372 DB |
1546 | /* set defaults */ |
1547 | // s->decode_mb = ff_h263_decode_mb; | |
e5d40372 | 1548 | if (!avctx->has_b_frames) |
2c541554 | 1549 | h->low_delay = 1; |
05e95319 HC |
1550 | |
1551 | avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; | |
1552 | ||
1553 | ff_h264_decode_init_vlc(); | |
1554 | ||
cab8c5f8 AK |
1555 | ff_init_cabac_states(); |
1556 | ||
16c22122 | 1557 | h->pixel_shift = 0; |
19a0729b | 1558 | h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; |
6e3ef511 | 1559 | |
05e95319 | 1560 | h->thread_context[0] = h; |
e5d40372 | 1561 | h->outputed_poc = h->next_outputed_poc = INT_MIN; |
ea2bb12e RB |
1562 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) |
1563 | h->last_pocs[i] = INT_MIN; | |
e5d40372 DB |
1564 | h->prev_poc_msb = 1 << 16; |
1565 | h->x264_build = -1; | |
05e95319 | 1566 | ff_h264_reset_sei(h); |
28096e0a JS |
1567 | h->recovery_frame = -1; |
1568 | h->frame_recovered = 0; | |
36ef5369 | 1569 | if (avctx->codec_id == AV_CODEC_ID_H264) { |
e5d40372 | 1570 | if (avctx->ticks_per_frame == 1) |
2c541554 | 1571 | h->avctx->time_base.den *= 2; |
05e95319 HC |
1572 | avctx->ticks_per_frame = 2; |
1573 | } | |
1574 | ||
b3dc260e VG |
1575 | if (avctx->extradata_size > 0 && avctx->extradata) { |
1576 | ret = ff_h264_decode_extradata(h); | |
1577 | if (ret < 0) | |
1578 | return ret; | |
1579 | } | |
05e95319 | 1580 | |
e5d40372 | 1581 | if (h->sps.bitstream_restriction_flag && |
2c541554 AK |
1582 | h->avctx->has_b_frames < h->sps.num_reorder_frames) { |
1583 | h->avctx->has_b_frames = h->sps.num_reorder_frames; | |
1584 | h->low_delay = 0; | |
db8cb47d | 1585 | } |
9855b2e3 | 1586 | |
759001c5 AK |
1587 | avctx->internal->allocate_progress = 1; |
1588 | ||
0da71265 MN |
1589 | return 0; |
1590 | } | |
1591 | ||
e5d40372 | 1592 | #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size)))) |
2c541554 AK |
1593 | #undef REBASE_PICTURE |
1594 | #define REBASE_PICTURE(pic, new_ctx, old_ctx) \ | |
1595 | ((pic && pic >= old_ctx->DPB && \ | |
16c22122 DB |
1596 | pic < old_ctx->DPB + MAX_PICTURE_COUNT) ? \ |
1597 | &new_ctx->DPB[pic - old_ctx->DPB] : NULL) | |
e5d40372 DB |
1598 | |
1599 | static void copy_picture_range(Picture **to, Picture **from, int count, | |
2c541554 AK |
1600 | H264Context *new_base, |
1601 | H264Context *old_base) | |
6a9c8594 AS |
1602 | { |
1603 | int i; | |
1604 | ||
e5d40372 | 1605 | for (i = 0; i < count; i++) { |
6a9c8594 | 1606 | assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) || |
2c541554 | 1607 | IN_RANGE(from[i], old_base->DPB, |
759001c5 | 1608 | sizeof(Picture) * MAX_PICTURE_COUNT) || |
6a9c8594 AS |
1609 | !from[i])); |
1610 | to[i] = REBASE_PICTURE(from[i], new_base, old_base); | |
1611 | } | |
1612 | } | |
1613 | ||
58d13cea | 1614 | static int copy_parameter_set(void **to, void **from, int count, int size) |
6a9c8594 AS |
1615 | { |
1616 | int i; | |
1617 | ||
e5d40372 | 1618 | for (i = 0; i < count; i++) { |
58d13cea | 1619 | if (to[i] && !from[i]) { |
e5d40372 | 1620 | av_freep(&to[i]); |
58d13cea | 1621 | } else if (from[i] && !to[i]) { |
e5d40372 | 1622 | to[i] = av_malloc(size); |
58d13cea DB |
1623 | if (!to[i]) |
1624 | return AVERROR(ENOMEM); | |
1625 | } | |
6a9c8594 | 1626 | |
e5d40372 DB |
1627 | if (from[i]) |
1628 | memcpy(to[i], from[i], size); | |
6a9c8594 | 1629 | } |
58d13cea DB |
1630 | |
1631 | return 0; | |
6a9c8594 AS |
1632 | } |
1633 | ||
e5d40372 DB |
1634 | static int decode_init_thread_copy(AVCodecContext *avctx) |
1635 | { | |
1636 | H264Context *h = avctx->priv_data; | |
6a9c8594 | 1637 | |
f3a29b75 JR |
1638 | if (!avctx->internal->is_copy) |
1639 | return 0; | |
6a9c8594 AS |
1640 | memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); |
1641 | memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); | |
1642 | ||
2c541554 | 1643 | h->context_initialized = 0; |
0eae920c | 1644 | |
6a9c8594 AS |
1645 | return 0; |
1646 | } | |
1647 | ||
e5d40372 DB |
1648 | #define copy_fields(to, from, start_field, end_field) \ |
1649 | memcpy(&to->start_field, &from->start_field, \ | |
1650 | (char *)&to->end_field - (char *)&to->start_field) | |
1651 | ||
9e696d2e JG |
1652 | static int h264_slice_header_init(H264Context *, int); |
1653 | ||
1654 | static int h264_set_parameter_from_sps(H264Context *h); | |
1655 | ||
e5d40372 DB |
1656 | static int decode_update_thread_context(AVCodecContext *dst, |
1657 | const AVCodecContext *src) | |
1658 | { | |
1659 | H264Context *h = dst->priv_data, *h1 = src->priv_data; | |
2c541554 AK |
1660 | int inited = h->context_initialized, err = 0; |
1661 | int context_reinitialized = 0; | |
759001c5 | 1662 | int i, ret; |
6a9c8594 | 1663 | |
2c541554 | 1664 | if (dst == src || !h1->context_initialized) |
e5d40372 | 1665 | return 0; |
6a9c8594 | 1666 | |
9e696d2e | 1667 | if (inited && |
16c22122 DB |
1668 | (h->width != h1->width || |
1669 | h->height != h1->height || | |
1670 | h->mb_width != h1->mb_width || | |
1671 | h->mb_height != h1->mb_height || | |
9e696d2e JG |
1672 | h->sps.bit_depth_luma != h1->sps.bit_depth_luma || |
1673 | h->sps.chroma_format_idc != h1->sps.chroma_format_idc || | |
1674 | h->sps.colorspace != h1->sps.colorspace)) { | |
1675 | ||
a157c7f2 JG |
1676 | /* set bits_per_raw_sample to the previous value. the check for changed |
1677 | * bit depth in h264_set_parameter_from_sps() uses it and sets it to | |
1678 | * the current value */ | |
1679 | h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma; | |
1680 | ||
9e696d2e JG |
1681 | av_freep(&h->bipred_scratchpad); |
1682 | ||
2c541554 AK |
1683 | h->width = h1->width; |
1684 | h->height = h1->height; | |
1685 | h->mb_height = h1->mb_height; | |
1686 | h->mb_width = h1->mb_width; | |
1687 | h->mb_num = h1->mb_num; | |
1688 | h->mb_stride = h1->mb_stride; | |
9e696d2e JG |
1689 | h->b_stride = h1->b_stride; |
1690 | ||
1691 | if ((err = h264_slice_header_init(h, 1)) < 0) { | |
2c541554 | 1692 | av_log(h->avctx, AV_LOG_ERROR, "h264_slice_header_init() failed"); |
9e696d2e JG |
1693 | return err; |
1694 | } | |
2c541554 | 1695 | context_reinitialized = 1; |
9e696d2e | 1696 | |
2c541554 | 1697 | /* update linesize on resize. The decoder doesn't |
25408b2a | 1698 | * necessarily call h264_frame_start in the new thread */ |
2c541554 AK |
1699 | h->linesize = h1->linesize; |
1700 | h->uvlinesize = h1->uvlinesize; | |
9e696d2e JG |
1701 | |
1702 | /* copy block_offset since frame_start may not be called */ | |
1703 | memcpy(h->block_offset, h1->block_offset, sizeof(h->block_offset)); | |
9e696d2e JG |
1704 | } |
1705 | ||
e5d40372 DB |
1706 | if (!inited) { |
1707 | for (i = 0; i < MAX_SPS_COUNT; i++) | |
6a9c8594 AS |
1708 | av_freep(h->sps_buffers + i); |
1709 | ||
e5d40372 | 1710 | for (i = 0; i < MAX_PPS_COUNT; i++) |
6a9c8594 AS |
1711 | av_freep(h->pps_buffers + i); |
1712 | ||
2c541554 | 1713 | memcpy(h, h1, sizeof(*h1)); |
6a9c8594 AS |
1714 | memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); |
1715 | memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); | |
2c541554 AK |
1716 | memset(&h->er, 0, sizeof(h->er)); |
1717 | memset(&h->me, 0, sizeof(h->me)); | |
e8cafd27 MS |
1718 | memset(&h->mb, 0, sizeof(h->mb)); |
1719 | memset(&h->mb_luma_dc, 0, sizeof(h->mb_luma_dc)); | |
1720 | memset(&h->mb_padding, 0, sizeof(h->mb_padding)); | |
2c541554 AK |
1721 | h->context_initialized = 0; |
1722 | ||
759001c5 | 1723 | memset(&h->cur_pic, 0, sizeof(h->cur_pic)); |
598ce4ab | 1724 | av_frame_unref(&h->cur_pic.f); |
759001c5 | 1725 | h->cur_pic.tf.f = &h->cur_pic.f; |
2c541554 | 1726 | |
16c22122 DB |
1727 | h->avctx = dst; |
1728 | h->DPB = NULL; | |
759001c5 | 1729 | h->qscale_table_pool = NULL; |
16c22122 DB |
1730 | h->mb_type_pool = NULL; |
1731 | h->ref_index_pool = NULL; | |
1732 | h->motion_val_pool = NULL; | |
2c541554 | 1733 | |
b3dc260e VG |
1734 | ret = ff_h264_alloc_tables(h); |
1735 | if (ret < 0) { | |
bac3ab13 | 1736 | av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n"); |
b3dc260e | 1737 | return ret; |
bac3ab13 | 1738 | } |
c1076d84 VG |
1739 | ret = context_init(h); |
1740 | if (ret < 0) { | |
1741 | av_log(dst, AV_LOG_ERROR, "context_init() failed.\n"); | |
1742 | return ret; | |
1743 | } | |
6a9c8594 | 1744 | |
e5d40372 DB |
1745 | for (i = 0; i < 2; i++) { |
1746 | h->rbsp_buffer[i] = NULL; | |
6a9c8594 AS |
1747 | h->rbsp_buffer_size[i] = 0; |
1748 | } | |
a394959b | 1749 | h->bipred_scratchpad = NULL; |
2c541554 | 1750 | h->edge_emu_buffer = NULL; |
6a9c8594 AS |
1751 | |
1752 | h->thread_context[0] = h; | |
1753 | ||
2c541554 | 1754 | h->context_initialized = 1; |
6a9c8594 AS |
1755 | } |
1756 | ||
2c541554 AK |
1757 | h->avctx->coded_height = h1->avctx->coded_height; |
1758 | h->avctx->coded_width = h1->avctx->coded_width; | |
1759 | h->avctx->width = h1->avctx->width; | |
1760 | h->avctx->height = h1->avctx->height; | |
1761 | h->coded_picture_number = h1->coded_picture_number; | |
1762 | h->first_field = h1->first_field; | |
1763 | h->picture_structure = h1->picture_structure; | |
1764 | h->qscale = h1->qscale; | |
1765 | h->droppable = h1->droppable; | |
1766 | h->data_partitioning = h1->data_partitioning; | |
1767 | h->low_delay = h1->low_delay; | |
1768 | ||
759001c5 AK |
1769 | for (i = 0; i < MAX_PICTURE_COUNT; i++) { |
1770 | unref_picture(h, &h->DPB[i]); | |
a553c6a3 | 1771 | if (h1->DPB[i].f.buf[0] && |
759001c5 AK |
1772 | (ret = ref_picture(h, &h->DPB[i], &h1->DPB[i])) < 0) |
1773 | return ret; | |
1774 | } | |
2c541554 | 1775 | |
16c22122 | 1776 | h->cur_pic_ptr = REBASE_PICTURE(h1->cur_pic_ptr, h, h1); |
759001c5 AK |
1777 | unref_picture(h, &h->cur_pic); |
1778 | if ((ret = ref_picture(h, &h->cur_pic, &h1->cur_pic)) < 0) | |
1779 | return ret; | |
2c541554 AK |
1780 | |
1781 | h->workaround_bugs = h1->workaround_bugs; | |
1782 | h->low_delay = h1->low_delay; | |
1783 | h->droppable = h1->droppable; | |
1784 | ||
a394959b JG |
1785 | /* frame_start may not be called for the next thread (if it's decoding |
1786 | * a bottom field) so this has to be allocated here */ | |
2c541554 AK |
1787 | err = alloc_scratch_buffers(h, h1->linesize); |
1788 | if (err < 0) | |
1789 | return err; | |
a394959b | 1790 | |
e5d40372 DB |
1791 | // extradata/NAL handling |
1792 | h->is_avc = h1->is_avc; | |
6a9c8594 | 1793 | |
e5d40372 | 1794 | // SPS/PPS |
58d13cea DB |
1795 | if ((ret = copy_parameter_set((void **)h->sps_buffers, |
1796 | (void **)h1->sps_buffers, | |
1797 | MAX_SPS_COUNT, sizeof(SPS))) < 0) | |
1798 | return ret; | |
e5d40372 | 1799 | h->sps = h1->sps; |
58d13cea DB |
1800 | if ((ret = copy_parameter_set((void **)h->pps_buffers, |
1801 | (void **)h1->pps_buffers, | |
1802 | MAX_PPS_COUNT, sizeof(PPS))) < 0) | |
1803 | return ret; | |
e5d40372 | 1804 | h->pps = h1->pps; |
6a9c8594 | 1805 | |
e5d40372 DB |
1806 | // Dequantization matrices |
1807 | // FIXME these are big - can they be only copied when PPS changes? | |
6a9c8594 AS |
1808 | copy_fields(h, h1, dequant4_buffer, dequant4_coeff); |
1809 | ||
e5d40372 DB |
1810 | for (i = 0; i < 6; i++) |
1811 | h->dequant4_coeff[i] = h->dequant4_buffer[0] + | |
1812 | (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]); | |
6a9c8594 | 1813 | |
e5d40372 DB |
1814 | for (i = 0; i < 6; i++) |
1815 | h->dequant8_coeff[i] = h->dequant8_buffer[0] + | |
1816 | (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]); | |
6a9c8594 AS |
1817 | |
1818 | h->dequant_coeff_pps = h1->dequant_coeff_pps; | |
1819 | ||
e5d40372 | 1820 | // POC timing |
6a9c8594 AS |
1821 | copy_fields(h, h1, poc_lsb, redundant_pic_count); |
1822 | ||
e5d40372 | 1823 | // reference lists |
6a9c8594 AS |
1824 | copy_fields(h, h1, short_ref, cabac_init_idc); |
1825 | ||
2c541554 AK |
1826 | copy_picture_range(h->short_ref, h1->short_ref, 32, h, h1); |
1827 | copy_picture_range(h->long_ref, h1->long_ref, 32, h, h1); | |
e5d40372 | 1828 | copy_picture_range(h->delayed_pic, h1->delayed_pic, |
2c541554 | 1829 | MAX_DELAYED_PIC_COUNT + 2, h, h1); |
6a9c8594 AS |
1830 | |
1831 | h->last_slice_type = h1->last_slice_type; | |
1832 | ||
2c541554 AK |
1833 | if (context_reinitialized) |
1834 | h264_set_parameter_from_sps(h); | |
1835 | ||
1836 | if (!h->cur_pic_ptr) | |
e5d40372 | 1837 | return 0; |
6a9c8594 | 1838 | |
2c541554 | 1839 | if (!h->droppable) { |
12fe7594 | 1840 | err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
e5d40372 DB |
1841 | h->prev_poc_msb = h->poc_msb; |
1842 | h->prev_poc_lsb = h->poc_lsb; | |
6a9c8594 | 1843 | } |
e5d40372 DB |
1844 | h->prev_frame_num_offset = h->frame_num_offset; |
1845 | h->prev_frame_num = h->frame_num; | |
1846 | h->outputed_poc = h->next_outputed_poc; | |
6a9c8594 | 1847 | |
28096e0a JS |
1848 | h->recovery_frame = h1->recovery_frame; |
1849 | h->frame_recovered = h1->frame_recovered; | |
1850 | ||
12fe7594 | 1851 | return err; |
6a9c8594 AS |
1852 | } |
1853 | ||
25408b2a | 1854 | static int h264_frame_start(H264Context *h) |
e5d40372 | 1855 | { |
2c541554 AK |
1856 | Picture *pic; |
1857 | int i, ret; | |
6e3ef511 | 1858 | const int pixel_shift = h->pixel_shift; |
0da71265 | 1859 | |
2c541554 AK |
1860 | release_unused_pictures(h, 1); |
1861 | h->cur_pic_ptr = NULL; | |
1862 | ||
1863 | i = find_unused_picture(h); | |
1864 | if (i < 0) { | |
1865 | av_log(h->avctx, AV_LOG_ERROR, "no frame buffer available\n"); | |
1866 | return i; | |
1867 | } | |
1868 | pic = &h->DPB[i]; | |
1869 | ||
16c22122 | 1870 | pic->reference = h->droppable ? 0 : h->picture_structure; |
2c541554 AK |
1871 | pic->f.coded_picture_number = h->coded_picture_number++; |
1872 | pic->field_picture = h->picture_structure != PICT_FRAME; | |
3a22d7fa | 1873 | /* |
2c541554 AK |
1874 | * Zero key_frame here; IDR markings per slice in frame or fields are ORed |
1875 | * in later. | |
3a22d7fa JD |
1876 | * See decode_nal_units(). |
1877 | */ | |
2c541554 AK |
1878 | pic->f.key_frame = 0; |
1879 | pic->mmco_reset = 0; | |
28096e0a | 1880 | pic->recovered = 0; |
2c541554 AK |
1881 | |
1882 | if ((ret = alloc_picture(h, pic)) < 0) | |
1883 | return ret; | |
1884 | ||
1885 | h->cur_pic_ptr = pic; | |
759001c5 AK |
1886 | unref_picture(h, &h->cur_pic); |
1887 | if ((ret = ref_picture(h, &h->cur_pic, h->cur_pic_ptr)) < 0) | |
1888 | return ret; | |
2c541554 | 1889 | |
0b499c9b RB |
1890 | if (CONFIG_ERROR_RESILIENCE) |
1891 | ff_er_frame_start(&h->er); | |
0da71265 | 1892 | |
2c541554 | 1893 | assert(h->linesize && h->uvlinesize); |
0da71265 | 1894 | |
e5d40372 | 1895 | for (i = 0; i < 16; i++) { |
2c541554 AK |
1896 | h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->linesize * ((scan8[i] - scan8[0]) >> 3); |
1897 | h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->linesize * ((scan8[i] - scan8[0]) >> 3); | |
0da71265 | 1898 | } |
e5d40372 DB |
1899 | for (i = 0; i < 16; i++) { |
1900 | h->block_offset[16 + i] = | |
2c541554 | 1901 | h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3); |
e5d40372 | 1902 | h->block_offset[48 + 16 + i] = |
2c541554 | 1903 | h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3); |
0da71265 MN |
1904 | } |
1905 | ||
934b0821 LM |
1906 | /* can't be in alloc_tables because linesize isn't known there. |
1907 | * FIXME: redo bipred weight to not require extra buffer? */ | |
2c541554 AK |
1908 | for (i = 0; i < h->slice_context_count; i++) |
1909 | if (h->thread_context[i]) { | |
1910 | ret = alloc_scratch_buffers(h->thread_context[i], h->linesize); | |
1911 | if (ret < 0) | |
1912 | return ret; | |
1913 | } | |
e5d40372 DB |
1914 | |
1915 | /* Some macroblocks can be accessed before they're available in case | |
1916 | * of lost slices, MBAFF or threading. */ | |
1917 | memset(h->slice_table, -1, | |
2c541554 | 1918 | (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table)); |
e5d40372 DB |
1919 | |
1920 | // s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding || | |
1921 | // s->current_picture.f.reference /* || h->contains_intra */ || 1; | |
1922 | ||
1923 | /* We mark the current picture as non-reference after allocating it, so | |
1924 | * that if we break out due to an error it can be released automatically | |
1925 | * in the next ff_MPV_frame_start(). | |
f08fefc4 AK |
1926 | */ |
1927 | h->cur_pic_ptr->reference = 0; | |
357282c6 | 1928 | |
2c541554 | 1929 | h->cur_pic_ptr->field_poc[0] = h->cur_pic_ptr->field_poc[1] = INT_MAX; |
6a9c8594 AS |
1930 | |
1931 | h->next_output_pic = NULL; | |
1932 | ||
2c541554 | 1933 | assert(h->cur_pic_ptr->long_ref == 0); |
357282c6 | 1934 | |
af8aa846 | 1935 | return 0; |
0da71265 MN |
1936 | } |
1937 | ||
6a9c8594 | 1938 | /** |
e5d40372 DB |
1939 | * Run setup operations that must be run after slice header decoding. |
1940 | * This includes finding the next displayed frame. | |
1941 | * | |
1942 | * @param h h264 master context | |
1943 | * @param setup_finished enough NALs have been read that we can call | |
1944 | * ff_thread_finish_setup() | |
1945 | */ | |
1946 | static void decode_postinit(H264Context *h, int setup_finished) | |
1947 | { | |
2c541554 AK |
1948 | Picture *out = h->cur_pic_ptr; |
1949 | Picture *cur = h->cur_pic_ptr; | |
6a9c8594 | 1950 | int i, pics, out_of_order, out_idx; |
adedd840 | 1951 | int invalid = 0, cnt = 0; |
6a9c8594 | 1952 | |
16c22122 | 1953 | h->cur_pic_ptr->f.pict_type = h->pict_type; |
6a9c8594 | 1954 | |
e5d40372 DB |
1955 | if (h->next_output_pic) |
1956 | return; | |
6a9c8594 | 1957 | |
e5d40372 DB |
1958 | if (cur->field_poc[0] == INT_MAX || cur->field_poc[1] == INT_MAX) { |
1959 | /* FIXME: if we have two PAFF fields in one packet, we can't start | |
1960 | * the next thread here. If we have one field per packet, we can. | |
1961 | * The check in decode_nal_units() is not good enough to find this | |
1962 | * yet, so we assume the worst for now. */ | |
1963 | // if (setup_finished) | |
2c541554 | 1964 | // ff_thread_finish_setup(h->avctx); |
6a9c8594 AS |
1965 | return; |
1966 | } | |
1967 | ||
657ccb5a DB |
1968 | cur->f.interlaced_frame = 0; |
1969 | cur->f.repeat_pict = 0; | |
6a9c8594 AS |
1970 | |
1971 | /* Signal interlacing information externally. */ | |
e5d40372 DB |
1972 | /* Prioritize picture timing SEI information over used |
1973 | * decoding process if it exists. */ | |
6a9c8594 | 1974 | |
e5d40372 DB |
1975 | if (h->sps.pic_struct_present_flag) { |
1976 | switch (h->sei_pic_struct) { | |
6a9c8594 AS |
1977 | case SEI_PIC_STRUCT_FRAME: |
1978 | break; | |
1979 | case SEI_PIC_STRUCT_TOP_FIELD: | |
1980 | case SEI_PIC_STRUCT_BOTTOM_FIELD: | |
657ccb5a | 1981 | cur->f.interlaced_frame = 1; |
6a9c8594 AS |
1982 | break; |
1983 | case SEI_PIC_STRUCT_TOP_BOTTOM: | |
1984 | case SEI_PIC_STRUCT_BOTTOM_TOP: | |
a6931d8e | 1985 | if (FIELD_OR_MBAFF_PICTURE(h)) |
657ccb5a | 1986 | cur->f.interlaced_frame = 1; |
6a9c8594 AS |
1987 | else |
1988 | // try to flag soft telecine progressive | |
657ccb5a | 1989 | cur->f.interlaced_frame = h->prev_interlaced_frame; |
6a9c8594 AS |
1990 | break; |
1991 | case SEI_PIC_STRUCT_TOP_BOTTOM_TOP: | |
1992 | case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: | |
e5d40372 DB |
1993 | /* Signal the possibility of telecined film externally |
1994 | * (pic_struct 5,6). From these hints, let the applications | |
1995 | * decide if they apply deinterlacing. */ | |
657ccb5a | 1996 | cur->f.repeat_pict = 1; |
6a9c8594 AS |
1997 | break; |
1998 | case SEI_PIC_STRUCT_FRAME_DOUBLING: | |
657ccb5a | 1999 | cur->f.repeat_pict = 2; |
6a9c8594 AS |
2000 | break; |
2001 | case SEI_PIC_STRUCT_FRAME_TRIPLING: | |
657ccb5a | 2002 | cur->f.repeat_pict = 4; |
6a9c8594 AS |
2003 | break; |
2004 | } | |
2005 | ||
e5d40372 DB |
2006 | if ((h->sei_ct_type & 3) && |
2007 | h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP) | |
657ccb5a | 2008 | cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0; |
e5d40372 | 2009 | } else { |
6a9c8594 | 2010 | /* Derive interlacing flag from used decoding process. */ |
a6931d8e | 2011 | cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE(h); |
6a9c8594 | 2012 | } |
657ccb5a | 2013 | h->prev_interlaced_frame = cur->f.interlaced_frame; |
6a9c8594 | 2014 | |
e5d40372 | 2015 | if (cur->field_poc[0] != cur->field_poc[1]) { |
6a9c8594 | 2016 | /* Derive top_field_first from field pocs. */ |
657ccb5a | 2017 | cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1]; |
e5d40372 | 2018 | } else { |
657ccb5a | 2019 | if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) { |
e5d40372 DB |
2020 | /* Use picture timing SEI information. Even if it is a |
2021 | * information of a past frame, better than nothing. */ | |
2022 | if (h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM || | |
2023 | h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP) | |
657ccb5a | 2024 | cur->f.top_field_first = 1; |
6a9c8594 | 2025 | else |
657ccb5a | 2026 | cur->f.top_field_first = 0; |
e5d40372 | 2027 | } else { |
6a9c8594 | 2028 | /* Most likely progressive */ |
657ccb5a | 2029 | cur->f.top_field_first = 0; |
6a9c8594 AS |
2030 | } |
2031 | } | |
2032 | ||
5b10ef72 VG |
2033 | if (h->sei_frame_packing_present && |
2034 | h->frame_packing_arrangement_type >= 0 && | |
2035 | h->frame_packing_arrangement_type <= 6 && | |
2036 | h->content_interpretation_type > 0 && | |
2037 | h->content_interpretation_type < 3) { | |
2038 | AVStereo3D *stereo = av_stereo3d_create_side_data(&cur->f); | |
2039 | if (!stereo) | |
2040 | return; | |
2041 | ||
2042 | switch (h->frame_packing_arrangement_type) { | |
2043 | case 0: | |
2044 | stereo->type = AV_STEREO3D_CHECKERBOARD; | |
2045 | break; | |
2046 | case 1: | |
2047 | stereo->type = AV_STEREO3D_LINES; | |
2048 | break; | |
2049 | case 2: | |
2050 | stereo->type = AV_STEREO3D_COLUMNS; | |
2051 | break; | |
2052 | case 3: | |
2053 | if (h->quincunx_subsampling) | |
2054 | stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX; | |
2055 | else | |
2056 | stereo->type = AV_STEREO3D_SIDEBYSIDE; | |
2057 | break; | |
2058 | case 4: | |
2059 | stereo->type = AV_STEREO3D_TOPBOTTOM; | |
2060 | break; | |
2061 | case 5: | |
2062 | stereo->type = AV_STEREO3D_FRAMESEQUENCE; | |
2063 | break; | |
2064 | case 6: | |
2065 | stereo->type = AV_STEREO3D_2D; | |
2066 | break; | |
2067 | } | |
2068 | ||
2069 | if (h->content_interpretation_type == 2) | |
2070 | stereo->flags = AV_STEREO3D_FLAG_INVERT; | |
2071 | } | |
2072 | ||
e5d40372 | 2073 | // FIXME do something with unavailable reference frames |
6a9c8594 AS |
2074 | |
2075 | /* Sort B-frames into display order */ | |
2076 | ||
e5d40372 | 2077 | if (h->sps.bitstream_restriction_flag && |
2c541554 AK |
2078 | h->avctx->has_b_frames < h->sps.num_reorder_frames) { |
2079 | h->avctx->has_b_frames = h->sps.num_reorder_frames; | |
2080 | h->low_delay = 0; | |
6a9c8594 AS |
2081 | } |
2082 | ||
2c541554 | 2083 | if (h->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT && |
e5d40372 | 2084 | !h->sps.bitstream_restriction_flag) { |
2c541554 AK |
2085 | h->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1; |
2086 | h->low_delay = 0; | |
6a9c8594 AS |
2087 | } |
2088 | ||
2089 | pics = 0; | |
e5d40372 DB |
2090 | while (h->delayed_pic[pics]) |
2091 | pics++; | |
6a9c8594 AS |
2092 | |
2093 | assert(pics <= MAX_DELAYED_PIC_COUNT); | |
2094 | ||
2095 | h->delayed_pic[pics++] = cur; | |
759001c5 AK |
2096 | if (cur->reference == 0) |
2097 | cur->reference = DELAYED_PIC_REF; | |
6a9c8594 | 2098 | |
adedd840 RB |
2099 | /* Frame reordering. This code takes pictures from coding order and sorts |
2100 | * them by their incremental POC value into display order. It supports POC | |
2101 | * gaps, MMCO reset codes and random resets. | |
2102 | * A "display group" can start either with a IDR frame (f.key_frame = 1), | |
2103 | * and/or can be closed down with a MMCO reset code. In sequences where | |
2104 | * there is no delay, we can't detect that (since the frame was already | |
2105 | * output to the user), so we also set h->mmco_reset to detect the MMCO | |
2106 | * reset code. | |
2c541554 | 2107 | * FIXME: if we detect insufficient delays (as per h->avctx->has_b_frames), |
adedd840 RB |
2108 | * we increase the delay between input and output. All frames affected by |
2109 | * the lag (e.g. those that should have been output before another frame | |
2110 | * that we already returned to the user) will be dropped. This is a bug | |
2111 | * that we will fix later. */ | |
2112 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) { | |
2113 | cnt += out->poc < h->last_pocs[i]; | |
2114 | invalid += out->poc == INT_MIN; | |
2115 | } | |
e5d40372 DB |
2116 | if (!h->mmco_reset && !cur->f.key_frame && |
2117 | cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) { | |
adedd840 RB |
2118 | h->mmco_reset = 2; |
2119 | if (pics > 1) | |
2120 | h->delayed_pic[pics - 2]->mmco_reset = 2; | |
2121 | } | |
2122 | if (h->mmco_reset || cur->f.key_frame) { | |
2123 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) | |
2124 | h->last_pocs[i] = INT_MIN; | |
2125 | cnt = 0; | |
2126 | invalid = MAX_DELAYED_PIC_COUNT; | |
2127 | } | |
e5d40372 | 2128 | out = h->delayed_pic[0]; |
6a9c8594 | 2129 | out_idx = 0; |
e5d40372 DB |
2130 | for (i = 1; i < MAX_DELAYED_PIC_COUNT && |
2131 | h->delayed_pic[i] && | |
2132 | !h->delayed_pic[i - 1]->mmco_reset && | |
2133 | !h->delayed_pic[i]->f.key_frame; | |
2134 | i++) | |
2135 | if (h->delayed_pic[i]->poc < out->poc) { | |
2136 | out = h->delayed_pic[i]; | |
6a9c8594 AS |
2137 | out_idx = i; |
2138 | } | |
2c541554 | 2139 | if (h->avctx->has_b_frames == 0 && |
e5d40372 | 2140 | (h->delayed_pic[0]->f.key_frame || h->mmco_reset)) |
adedd840 | 2141 | h->next_outputed_poc = INT_MIN; |
e5d40372 DB |
2142 | out_of_order = !out->f.key_frame && !h->mmco_reset && |
2143 | (out->poc < h->next_outputed_poc); | |
6a9c8594 | 2144 | |
e5d40372 | 2145 | if (h->sps.bitstream_restriction_flag && |
2c541554 AK |
2146 | h->avctx->has_b_frames >= h->sps.num_reorder_frames) { |
2147 | } else if (out_of_order && pics - 1 == h->avctx->has_b_frames && | |
2148 | h->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) { | |
ea2bb12e | 2149 | if (invalid + cnt < MAX_DELAYED_PIC_COUNT) { |
2c541554 | 2150 | h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, cnt); |
ea2bb12e | 2151 | } |
2c541554 AK |
2152 | h->low_delay = 0; |
2153 | } else if (h->low_delay && | |
e5d40372 DB |
2154 | ((h->next_outputed_poc != INT_MIN && |
2155 | out->poc > h->next_outputed_poc + 2) || | |
ea2bb12e | 2156 | cur->f.pict_type == AV_PICTURE_TYPE_B)) { |
2c541554 AK |
2157 | h->low_delay = 0; |
2158 | h->avctx->has_b_frames++; | |
6a9c8594 AS |
2159 | } |
2160 | ||
2c541554 | 2161 | if (pics > h->avctx->has_b_frames) { |
759001c5 | 2162 | out->reference &= ~DELAYED_PIC_REF; |
e5d40372 DB |
2163 | // for frame threading, the owner must be the second field's thread or |
2164 | // else the first thread can release the picture and reuse it unsafely | |
e5d40372 DB |
2165 | for (i = out_idx; h->delayed_pic[i]; i++) |
2166 | h->delayed_pic[i] = h->delayed_pic[i + 1]; | |
2167 | } | |
2168 | memmove(h->last_pocs, &h->last_pocs[1], | |
2169 | sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1)); | |
adedd840 | 2170 | h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc; |
2c541554 | 2171 | if (!out_of_order && pics > h->avctx->has_b_frames) { |
6a9c8594 | 2172 | h->next_output_pic = out; |
adedd840 RB |
2173 | if (out->mmco_reset) { |
2174 | if (out_idx > 0) { | |
e5d40372 | 2175 | h->next_outputed_poc = out->poc; |
adedd840 RB |
2176 | h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset; |
2177 | } else { | |
2178 | h->next_outputed_poc = INT_MIN; | |
2179 | } | |
2180 | } else { | |
0b4c3232 RB |
2181 | if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f.key_frame) { |
2182 | h->next_outputed_poc = INT_MIN; | |
2183 | } else { | |
2184 | h->next_outputed_poc = out->poc; | |
2185 | } | |
adedd840 RB |
2186 | } |
2187 | h->mmco_reset = 0; | |
e5d40372 | 2188 | } else { |
2c541554 | 2189 | av_log(h->avctx, AV_LOG_DEBUG, "no picture\n"); |
6a9c8594 AS |
2190 | } |
2191 | ||
28096e0a JS |
2192 | if (h->next_output_pic) { |
2193 | if (h->next_output_pic->recovered) { | |
2194 | // We have reached an recovery point and all frames after it in | |
2195 | // display order are "recovered". | |
2196 | h->frame_recovered |= FRAME_RECOVERED_SEI; | |
2197 | } | |
2198 | h->next_output_pic->recovered |= !!(h->frame_recovered & FRAME_RECOVERED_SEI); | |
2199 | } | |
2200 | ||
05fa79b8 | 2201 | if (setup_finished && !h->avctx->hwaccel) |
2c541554 | 2202 | ff_thread_finish_setup(h->avctx); |
6a9c8594 AS |
2203 | } |
2204 | ||
76741b0e BC |
2205 | static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, |
2206 | uint8_t *src_cb, uint8_t *src_cr, | |
e5d40372 DB |
2207 | int linesize, int uvlinesize, |
2208 | int simple) | |
76741b0e | 2209 | { |
0b69d625 | 2210 | uint8_t *top_border; |
5f7f9719 | 2211 | int top_idx = 1; |
6e3ef511 | 2212 | const int pixel_shift = h->pixel_shift; |
23e85be5 | 2213 | int chroma444 = CHROMA444(h); |
e962bd08 | 2214 | int chroma422 = CHROMA422(h); |
115329f1 | 2215 | |
e5d40372 | 2216 | src_y -= linesize; |
53c05b1e MN |
2217 | src_cb -= uvlinesize; |
2218 | src_cr -= uvlinesize; | |
2219 | ||
7bece9b2 | 2220 | if (!simple && FRAME_MBAFF(h)) { |
2c541554 | 2221 | if (h->mb_y & 1) { |
82313eaa | 2222 | if (!MB_MBAFF(h)) { |
2c541554 | 2223 | top_border = h->top_borders[0][h->mb_x]; |
e5d40372 | 2224 | AV_COPY128(top_border, src_y + 15 * linesize); |
6e3ef511 | 2225 | if (pixel_shift) |
e5d40372 | 2226 | AV_COPY128(top_border + 16, src_y + 15 * linesize + 16); |
2c541554 | 2227 | if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { |
e5d40372 DB |
2228 | if (chroma444) { |
2229 | if (pixel_shift) { | |
2230 | AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize); | |
2231 | AV_COPY128(top_border + 48, src_cb + 15 * uvlinesize + 16); | |
2232 | AV_COPY128(top_border + 64, src_cr + 15 * uvlinesize); | |
2233 | AV_COPY128(top_border + 80, src_cr + 15 * uvlinesize + 16); | |
c90b9442 | 2234 | } else { |
e5d40372 DB |
2235 | AV_COPY128(top_border + 16, src_cb + 15 * uvlinesize); |
2236 | AV_COPY128(top_border + 32, src_cr + 15 * uvlinesize); | |
c90b9442 | 2237 | } |
e5d40372 | 2238 | } else if (chroma422) { |
76741b0e | 2239 | if (pixel_shift) { |
e5d40372 DB |
2240 | AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize); |
2241 | AV_COPY128(top_border + 48, src_cr + 15 * uvlinesize); | |
76741b0e | 2242 | } else { |
e5d40372 DB |
2243 | AV_COPY64(top_border + 16, src_cb + 15 * uvlinesize); |
2244 | AV_COPY64(top_border + 24, src_cr + 15 * uvlinesize); | |
76741b0e | 2245 | } |
6e3ef511 | 2246 | } else { |
c90b9442 | 2247 | if (pixel_shift) { |
e5d40372 DB |
2248 | AV_COPY128(top_border + 32, src_cb + 7 * uvlinesize); |
2249 | AV_COPY128(top_border + 48, src_cr + 7 * uvlinesize); | |
c90b9442 | 2250 | } else { |
e5d40372 DB |
2251 | AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize); |
2252 | AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize); | |
c90b9442 | 2253 | } |
6e3ef511 | 2254 | } |
5f7f9719 MN |
2255 | } |
2256 | } | |
82313eaa | 2257 | } else if (MB_MBAFF(h)) { |
c988f975 | 2258 | top_idx = 0; |
e5d40372 | 2259 | } else |
c988f975 | 2260 | return; |
5f7f9719 MN |
2261 | } |
2262 | ||
2c541554 | 2263 | top_border = h->top_borders[top_idx][h->mb_x]; |
09f21198 | 2264 | /* There are two lines saved, the line above the top macroblock |
e5d40372 DB |
2265 | * of a pair, and the line above the bottom macroblock. */ |
2266 | AV_COPY128(top_border, src_y + 16 * linesize); | |
6e3ef511 | 2267 | if (pixel_shift) |
e5d40372 DB |
2268 | AV_COPY128(top_border + 16, src_y + 16 * linesize + 16); |
2269 | ||
2c541554 | 2270 | if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { |
e5d40372 DB |
2271 | if (chroma444) { |
2272 | if (pixel_shift) { | |
2273 | AV_COPY128(top_border + 32, src_cb + 16 * linesize); | |
2274 | AV_COPY128(top_border + 48, src_cb + 16 * linesize + 16); | |
2275 | AV_COPY128(top_border + 64, src_cr + 16 * linesize); | |
2276 | AV_COPY128(top_border + 80, src_cr + 16 * linesize + 16); | |
c90b9442 | 2277 | } else { |
e5d40372 DB |
2278 | AV_COPY128(top_border + 16, src_cb + 16 * linesize); |
2279 | AV_COPY128(top_border + 32, src_cr + 16 * linesize); | |
c90b9442 | 2280 | } |
e5d40372 | 2281 | } else if (chroma422) { |
76741b0e | 2282 | if (pixel_shift) { |
e5d40372 DB |
2283 | AV_COPY128(top_border + 32, src_cb + 16 * uvlinesize); |
2284 | AV_COPY128(top_border + 48, src_cr + 16 * uvlinesize); | |
76741b0e | 2285 | } else { |
e5d40372 DB |
2286 | AV_COPY64(top_border + 16, src_cb + 16 * uvlinesize); |
2287 | AV_COPY64(top_border + 24, src_cr + 16 * uvlinesize); | |
76741b0e | 2288 | } |
6e3ef511 | 2289 | } else { |
c90b9442 | 2290 | if (pixel_shift) { |
e5d40372 DB |
2291 | AV_COPY128(top_border + 32, src_cb + 8 * uvlinesize); |
2292 | AV_COPY128(top_border + 48, src_cr + 8 * uvlinesize); | |
c90b9442 | 2293 | } else { |
e5d40372 DB |
2294 | AV_COPY64(top_border + 16, src_cb + 8 * uvlinesize); |
2295 | AV_COPY64(top_border + 24, src_cr + 8 * uvlinesize); | |
c90b9442 | 2296 | } |
6e3ef511 | 2297 | } |
53c05b1e MN |
2298 | } |
2299 | } | |
2300 | ||
bbdd52ed | 2301 | static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y, |
e5d40372 DB |
2302 | uint8_t *src_cb, uint8_t *src_cr, |
2303 | int linesize, int uvlinesize, | |
2304 | int xchg, int chroma444, | |
2305 | int simple, int pixel_shift) | |
2306 | { | |
4e987f82 | 2307 | int deblock_topleft; |
b69378e2 | 2308 | int deblock_top; |
5f7f9719 | 2309 | int top_idx = 1; |
1e4f1c56 AS |
2310 | uint8_t *top_border_m1; |
2311 | uint8_t *top_border; | |
5f7f9719 | 2312 | |
7bece9b2 | 2313 | if (!simple && FRAME_MBAFF(h)) { |
2c541554 | 2314 | if (h->mb_y & 1) { |
82313eaa | 2315 | if (!MB_MBAFF(h)) |
c988f975 | 2316 | return; |
e5d40372 | 2317 | } else { |
82313eaa | 2318 | top_idx = MB_MBAFF(h) ? 0 : 1; |
5f7f9719 | 2319 | } |
5f7f9719 | 2320 | } |
b69378e2 | 2321 | |
e5d40372 | 2322 | if (h->deblocking_filter == 2) { |
2c541554 | 2323 | deblock_topleft = h->slice_table[h->mb_xy - 1 - h->mb_stride] == h->slice_num; |
4e987f82 | 2324 | deblock_top = h->top_type; |
b69378e2 | 2325 | } else { |
2c541554 | 2326 | deblock_topleft = (h->mb_x > 0); |
da6be8fc | 2327 | deblock_top = (h->mb_y > !!MB_FIELD(h)); |
b69378e2 | 2328 | } |
53c05b1e | 2329 | |
e5d40372 | 2330 | src_y -= linesize + 1 + pixel_shift; |
6e3ef511 OA |
2331 | src_cb -= uvlinesize + 1 + pixel_shift; |
2332 | src_cr -= uvlinesize + 1 + pixel_shift; | |
53c05b1e | 2333 | |
2c541554 AK |
2334 | top_border_m1 = h->top_borders[top_idx][h->mb_x - 1]; |
2335 | top_border = h->top_borders[top_idx][h->mb_x]; | |
1e4f1c56 | 2336 | |
e5d40372 DB |
2337 | #define XCHG(a, b, xchg) \ |
2338 | if (pixel_shift) { \ | |
2339 | if (xchg) { \ | |
2340 | AV_SWAP64(b + 0, a + 0); \ | |
2341 | AV_SWAP64(b + 8, a + 8); \ | |
2342 | } else { \ | |
2343 | AV_COPY128(b, a); \ | |
2344 | } \ | |
2345 | } else if (xchg) \ | |
2346 | AV_SWAP64(b, a); \ | |
2347 | else \ | |
2348 | AV_COPY64(b, a); | |
2349 | ||
2350 | if (deblock_top) { | |
2351 | if (deblock_topleft) { | |
2352 | XCHG(top_border_m1 + (8 << pixel_shift), | |
2353 | src_y - (7 << pixel_shift), 1); | |
c988f975 | 2354 | } |
6e3ef511 OA |
2355 | XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg); |
2356 | XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1); | |
2c541554 AK |
2357 | if (h->mb_x + 1 < h->mb_width) { |
2358 | XCHG(h->top_borders[top_idx][h->mb_x + 1], | |
e5d40372 | 2359 | src_y + (17 << pixel_shift), 1); |
43efd19a | 2360 | } |
53c05b1e | 2361 | } |
2c541554 | 2362 | if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { |
e5d40372 DB |
2363 | if (chroma444) { |
2364 | if (deblock_topleft) { | |
c90b9442 JGG |
2365 | XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
2366 | XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1); | |
2367 | } | |
2368 | XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg); | |
2369 | XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1); | |
2370 | XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg); | |
2371 | XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1); | |
2c541554 AK |
2372 | if (h->mb_x + 1 < h->mb_width) { |
2373 | XCHG(h->top_borders[top_idx][h->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1); | |
2374 | XCHG(h->top_borders[top_idx][h->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1); | |
c90b9442 JGG |
2375 | } |
2376 | } else { | |
e5d40372 DB |
2377 | if (deblock_top) { |
2378 | if (deblock_topleft) { | |
c90b9442 JGG |
2379 | XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
2380 | XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); | |
2381 | } | |
e5d40372 DB |
2382 | XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1); |
2383 | XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1); | |
c988f975 | 2384 | } |
53c05b1e | 2385 | } |
53c05b1e MN |
2386 | } |
2387 | } | |
2388 | ||
88bd7fdc | 2389 | static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth, |
e5d40372 DB |
2390 | int index) |
2391 | { | |
6e3ef511 | 2392 | if (high_bit_depth) { |
e5d40372 | 2393 | return AV_RN32A(((int32_t *)mb) + index); |
6e3ef511 OA |
2394 | } else |
2395 | return AV_RN16A(mb + index); | |
2396 | } | |
2397 | ||
88bd7fdc | 2398 | static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth, |
e5d40372 DB |
2399 | int index, int value) |
2400 | { | |
6e3ef511 | 2401 | if (high_bit_depth) { |
e5d40372 | 2402 | AV_WN32A(((int32_t *)mb) + index, value); |
6e3ef511 OA |
2403 | } else |
2404 | AV_WN16A(mb + index, value); | |
2405 | } | |
2406 | ||
e5d40372 DB |
2407 | static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, |
2408 | int mb_type, int is_h264, | |
2409 | int simple, | |
2410 | int transform_bypass, | |
2411 | int pixel_shift, | |
2412 | int *block_offset, | |
2413 | int linesize, | |
2414 | uint8_t *dest_y, int p) | |
c90b9442 | 2415 | { |
88bd7fdc DB |
2416 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); |
2417 | void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride); | |
c90b9442 | 2418 | int i; |
2c541554 | 2419 | int qscale = p == 0 ? h->qscale : h->chroma_qp[p - 1]; |
e5d40372 DB |
2420 | block_offset += 16 * p; |
2421 | if (IS_INTRA4x4(mb_type)) { | |
2491f9ee AK |
2422 | if (IS_8x8DCT(mb_type)) { |
2423 | if (transform_bypass) { | |
16c22122 DB |
2424 | idct_dc_add = |
2425 | idct_add = h->h264dsp.h264_add_pixels8_clear; | |
2491f9ee AK |
2426 | } else { |
2427 | idct_dc_add = h->h264dsp.h264_idct8_dc_add; | |
2428 | idct_add = h->h264dsp.h264_idct8_add; | |
2429 | } | |
2430 | for (i = 0; i < 16; i += 4) { | |
2431 | uint8_t *const ptr = dest_y + block_offset[i]; | |
2432 | const int dir = h->intra4x4_pred_mode_cache[scan8[i]]; | |
2433 | if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) { | |
2434 | h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
e5d40372 | 2435 | } else { |
2491f9ee AK |
2436 | const int nnz = h->non_zero_count_cache[scan8[i + p * 16]]; |
2437 | h->hpc.pred8x8l[dir](ptr, (h->topleft_samples_available << i) & 0x8000, | |
2438 | (h->topright_samples_available << i) & 0x4000, linesize); | |
2439 | if (nnz) { | |
2440 | if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) | |
2441 | idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
2442 | else | |
2443 | idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
c90b9442 JGG |
2444 | } |
2445 | } | |
2491f9ee AK |
2446 | } |
2447 | } else { | |
2448 | if (transform_bypass) { | |
2449 | idct_dc_add = | |
62844c3f | 2450 | idct_add = h->h264dsp.h264_add_pixels4_clear; |
e5d40372 | 2451 | } else { |
2491f9ee AK |
2452 | idct_dc_add = h->h264dsp.h264_idct_dc_add; |
2453 | idct_add = h->h264dsp.h264_idct_add; | |
2454 | } | |
2455 | for (i = 0; i < 16; i++) { | |
2456 | uint8_t *const ptr = dest_y + block_offset[i]; | |
2457 | const int dir = h->intra4x4_pred_mode_cache[scan8[i]]; | |
c90b9442 | 2458 | |
2491f9ee AK |
2459 | if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) { |
2460 | h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
2461 | } else { | |
2462 | uint8_t *topright; | |
2463 | int nnz, tr; | |
2464 | uint64_t tr_high; | |
2465 | if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) { | |
2466 | const int topright_avail = (h->topright_samples_available << i) & 0x8000; | |
2c541554 | 2467 | assert(h->mb_y || linesize <= block_offset[i]); |
2491f9ee AK |
2468 | if (!topright_avail) { |
2469 | if (pixel_shift) { | |
2470 | tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL; | |
2471 | topright = (uint8_t *)&tr_high; | |
2472 | } else { | |
2473 | tr = ptr[3 - linesize] * 0x01010101u; | |
2474 | topright = (uint8_t *)&tr; | |
2475 | } | |
e5d40372 | 2476 | } else |
2491f9ee AK |
2477 | topright = ptr + (4 << pixel_shift) - linesize; |
2478 | } else | |
2479 | topright = NULL; | |
2480 | ||
2481 | h->hpc.pred4x4[dir](ptr, topright, linesize); | |
2482 | nnz = h->non_zero_count_cache[scan8[i + p * 16]]; | |
2483 | if (nnz) { | |
2484 | if (is_h264) { | |
2485 | if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) | |
2486 | idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
2487 | else | |
2488 | idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
2489 | } else if (CONFIG_SVQ3_DECODER) | |
2490 | ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, qscale, 0); | |
c90b9442 JGG |
2491 | } |
2492 | } | |
2493 | } | |
2494 | } | |
e5d40372 DB |
2495 | } else { |
2496 | h->hpc.pred16x16[h->intra16x16_pred_mode](dest_y, linesize); | |
2497 | if (is_h264) { | |
2498 | if (h->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) { | |
2499 | if (!transform_bypass) | |
2500 | h->h264dsp.h264_luma_dc_dequant_idct(h->mb + (p * 256 << pixel_shift), | |
2501 | h->mb_luma_dc[p], | |
2502 | h->dequant4_coeff[p][qscale][0]); | |
2503 | else { | |
2504 | static const uint8_t dc_mapping[16] = { | |
2505 | 0 * 16, 1 * 16, 4 * 16, 5 * 16, | |
2506 | 2 * 16, 3 * 16, 6 * 16, 7 * 16, | |
2507 | 8 * 16, 9 * 16, 12 * 16, 13 * 16, | |
16c22122 DB |
2508 | 10 * 16, 11 * 16, 14 * 16, 15 * 16 |
2509 | }; | |
e5d40372 DB |
2510 | for (i = 0; i < 16; i++) |
2511 | dctcoef_set(h->mb + (p * 256 << pixel_shift), | |
2512 | pixel_shift, dc_mapping[i], | |
2513 | dctcoef_get(h->mb_luma_dc[p], | |
2514 | pixel_shift, i)); | |
c90b9442 JGG |
2515 | } |
2516 | } | |
301fb921 | 2517 | } else if (CONFIG_SVQ3_DECODER) |
e5d40372 DB |
2518 | ff_svq3_luma_dc_dequant_idct_c(h->mb + p * 256, |
2519 | h->mb_luma_dc[p], qscale); | |
c90b9442 JGG |
2520 | } |
2521 | } | |
2522 | ||
e5d40372 DB |
2523 | static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, |
2524 | int is_h264, int simple, | |
2525 | int transform_bypass, | |
2526 | int pixel_shift, | |
2527 | int *block_offset, | |
2528 | int linesize, | |
2529 | uint8_t *dest_y, int p) | |
c90b9442 | 2530 | { |
88bd7fdc | 2531 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); |
c90b9442 | 2532 | int i; |
e5d40372 DB |
2533 | block_offset += 16 * p; |
2534 | if (!IS_INTRA4x4(mb_type)) { | |
2535 | if (is_h264) { | |
2536 | if (IS_INTRA16x16(mb_type)) { | |
2537 | if (transform_bypass) { | |
2538 | if (h->sps.profile_idc == 244 && | |
2539 | (h->intra16x16_pred_mode == VERT_PRED8x8 || | |
2540 | h->intra16x16_pred_mode == HOR_PRED8x8)) { | |
2541 | h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, | |
2542 | h->mb + (p * 256 << pixel_shift), | |
2543 | linesize); | |
2544 | } else { | |
2545 | for (i = 0; i < 16; i++) | |
2546 | if (h->non_zero_count_cache[scan8[i + p * 16]] || | |
2547 | dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) | |
62844c3f RB |
2548 | h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i], |
2549 | h->mb + (i * 16 + p * 256 << pixel_shift), | |
2550 | linesize); | |
c90b9442 | 2551 | } |
e5d40372 DB |
2552 | } else { |
2553 | h->h264dsp.h264_idct_add16intra(dest_y, block_offset, | |
2554 | h->mb + (p * 256 << pixel_shift), | |
2555 | linesize, | |
2556 | h->non_zero_count_cache + p * 5 * 8); | |
c90b9442 | 2557 | } |
e5d40372 DB |
2558 | } else if (h->cbp & 15) { |
2559 | if (transform_bypass) { | |
c90b9442 | 2560 | const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
62844c3f RB |
2561 | idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear |
2562 | : h->h264dsp.h264_add_pixels4_clear; | |
e5d40372 DB |
2563 | for (i = 0; i < 16; i += di) |
2564 | if (h->non_zero_count_cache[scan8[i + p * 16]]) | |
2565 | idct_add(dest_y + block_offset[i], | |
2566 | h->mb + (i * 16 + p * 256 << pixel_shift), | |
2567 | linesize); | |
2568 | } else { | |
2569 | if (IS_8x8DCT(mb_type)) | |
2570 | h->h264dsp.h264_idct8_add4(dest_y, block_offset, | |
2571 | h->mb + (p * 256 << pixel_shift), | |
2572 | linesize, | |
2573 | h->non_zero_count_cache + p * 5 * 8); | |
2574 | else | |
2575 | h->h264dsp.h264_idct_add16(dest_y, block_offset, | |
2576 | h->mb + (p * 256 << pixel_shift), | |
2577 | linesize, | |
2578 | h->non_zero_count_cache + p * 5 * 8); | |
c90b9442 JGG |
2579 | } |
2580 | } | |
301fb921 | 2581 | } else if (CONFIG_SVQ3_DECODER) { |
e5d40372 DB |
2582 | for (i = 0; i < 16; i++) |
2583 | if (h->non_zero_count_cache[scan8[i + p * 16]] || h->mb[i * 16 + p * 256]) { | |
2584 | // FIXME benchmark weird rule, & below | |
2585 | uint8_t *const ptr = dest_y + block_offset[i]; | |
2586 | ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, | |
2c541554 | 2587 | h->qscale, IS_INTRA(mb_type) ? 1 : 0); |
c90b9442 | 2588 | } |
c90b9442 JGG |
2589 | } |
2590 | } | |
2591 | } | |
2592 | ||
28fff0d9 MR |
2593 | #define BITS 8 |
2594 | #define SIMPLE 1 | |
2595 | #include "h264_mb_template.c" | |
e5d40372 | 2596 | |
28fff0d9 MR |
2597 | #undef BITS |
2598 | #define BITS 16 | |
2599 | #include "h264_mb_template.c" | |
bd91fee3 | 2600 | |
28fff0d9 MR |
2601 | #undef SIMPLE |
2602 | #define SIMPLE 0 | |
2603 | #include "h264_mb_template.c" | |
c90b9442 | 2604 | |
e5d40372 DB |
2605 | void ff_h264_hl_decode_mb(H264Context *h) |
2606 | { | |
e5d40372 | 2607 | const int mb_xy = h->mb_xy; |
759001c5 | 2608 | const int mb_type = h->cur_pic.mb_type[mb_xy]; |
16c22122 DB |
2609 | int is_complex = CONFIG_SMALL || h->is_complex || |
2610 | IS_INTRA_PCM(mb_type) || h->qscale == 0; | |
bd91fee3 | 2611 | |
23e85be5 | 2612 | if (CHROMA444(h)) { |
e5d40372 | 2613 | if (is_complex || h->pixel_shift) |
c90b9442 JGG |
2614 | hl_decode_mb_444_complex(h); |
2615 | else | |
28fff0d9 | 2616 | hl_decode_mb_444_simple_8(h); |
c90b9442 | 2617 | } else if (is_complex) { |
bd91fee3 | 2618 | hl_decode_mb_complex(h); |
6e3ef511 OA |
2619 | } else if (h->pixel_shift) { |
2620 | hl_decode_mb_simple_16(h); | |
2621 | } else | |
2622 | hl_decode_mb_simple_8(h); | |
bd91fee3 AS |
2623 | } |
2624 | ||
4baba6c8 | 2625 | int ff_pred_weight_table(H264Context *h) |
e5d40372 | 2626 | { |
0da71265 | 2627 | int list, i; |
9f2d1b4f | 2628 | int luma_def, chroma_def; |
115329f1 | 2629 | |
e5d40372 DB |
2630 | h->use_weight = 0; |
2631 | h->use_weight_chroma = 0; | |
2c541554 | 2632 | h->luma_log2_weight_denom = get_ue_golomb(&h->gb); |
e5d40372 | 2633 | if (h->sps.chroma_format_idc) |
2c541554 | 2634 | h->chroma_log2_weight_denom = get_ue_golomb(&h->gb); |
e5d40372 DB |
2635 | luma_def = 1 << h->luma_log2_weight_denom; |
2636 | chroma_def = 1 << h->chroma_log2_weight_denom; | |
0da71265 | 2637 | |
e5d40372 | 2638 | for (list = 0; list < 2; list++) { |
cb99c652 GB |
2639 | h->luma_weight_flag[list] = 0; |
2640 | h->chroma_weight_flag[list] = 0; | |
e5d40372 | 2641 | for (i = 0; i < h->ref_count[list]; i++) { |
0da71265 | 2642 | int luma_weight_flag, chroma_weight_flag; |
115329f1 | 2643 | |
2c541554 | 2644 | luma_weight_flag = get_bits1(&h->gb); |
e5d40372 | 2645 | if (luma_weight_flag) { |
2c541554 AK |
2646 | h->luma_weight[i][list][0] = get_se_golomb(&h->gb); |
2647 | h->luma_weight[i][list][1] = get_se_golomb(&h->gb); | |
e5d40372 DB |
2648 | if (h->luma_weight[i][list][0] != luma_def || |
2649 | h->luma_weight[i][list][1] != 0) { | |
2650 | h->use_weight = 1; | |
2651 | h->luma_weight_flag[list] = 1; | |
cb99c652 | 2652 | } |
e5d40372 DB |
2653 | } else { |
2654 | h->luma_weight[i][list][0] = luma_def; | |
2655 | h->luma_weight[i][list][1] = 0; | |
0da71265 MN |
2656 | } |
2657 | ||
e5d40372 | 2658 | if (h->sps.chroma_format_idc) { |
2c541554 | 2659 | chroma_weight_flag = get_bits1(&h->gb); |
e5d40372 | 2660 | if (chroma_weight_flag) { |
fef744d4 | 2661 | int j; |
e5d40372 | 2662 | for (j = 0; j < 2; j++) { |
2c541554 AK |
2663 | h->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb); |
2664 | h->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb); | |
e5d40372 DB |
2665 | if (h->chroma_weight[i][list][j][0] != chroma_def || |
2666 | h->chroma_weight[i][list][j][1] != 0) { | |
16c22122 | 2667 | h->use_weight_chroma = 1; |
e5d40372 | 2668 | h->chroma_weight_flag[list] = 1; |
cb99c652 | 2669 | } |
fef744d4 | 2670 | } |
e5d40372 | 2671 | } else { |
fef744d4 | 2672 | int j; |
e5d40372 DB |
2673 | for (j = 0; j < 2; j++) { |
2674 | h->chroma_weight[i][list][j][0] = chroma_def; | |
2675 | h->chroma_weight[i][list][j][1] = 0; | |
fef744d4 | 2676 | } |
0da71265 MN |
2677 | } |
2678 | } | |
2679 | } | |
e5d40372 DB |
2680 | if (h->slice_type_nos != AV_PICTURE_TYPE_B) |
2681 | break; | |
0da71265 | 2682 | } |
e5d40372 | 2683 | h->use_weight = h->use_weight || h->use_weight_chroma; |
0da71265 MN |
2684 | return 0; |
2685 | } | |
2686 | ||
1052b76f MN |
2687 | /** |
2688 | * Initialize implicit_weight table. | |
6da88bd3 | 2689 | * @param field 0/1 initialize the weight for interlaced MBAFF |
1052b76f MN |
2690 | * -1 initializes the rest |
2691 | */ | |
e5d40372 DB |
2692 | static void implicit_weight_table(H264Context *h, int field) |
2693 | { | |
1052b76f | 2694 | int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1; |
9f2d1b4f | 2695 | |
ce09f927 GB |
2696 | for (i = 0; i < 2; i++) { |
2697 | h->luma_weight_flag[i] = 0; | |
2698 | h->chroma_weight_flag[i] = 0; | |
2699 | } | |
2700 | ||
e5d40372 | 2701 | if (field < 0) { |
2c541554 AK |
2702 | if (h->picture_structure == PICT_FRAME) { |
2703 | cur_poc = h->cur_pic_ptr->poc; | |
4418aa9c | 2704 | } else { |
2c541554 | 2705 | cur_poc = h->cur_pic_ptr->field_poc[h->picture_structure - 1]; |
4418aa9c | 2706 | } |
7bece9b2 | 2707 | if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF(h) && |
e5d40372 | 2708 | h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) { |
16c22122 | 2709 | h->use_weight = 0; |
e5d40372 DB |
2710 | h->use_weight_chroma = 0; |
2711 | return; | |
2712 | } | |
2713 | ref_start = 0; | |
2714 | ref_count0 = h->ref_count[0]; | |
2715 | ref_count1 = h->ref_count[1]; | |
2716 | } else { | |
2c541554 | 2717 | cur_poc = h->cur_pic_ptr->field_poc[field]; |
e5d40372 DB |
2718 | ref_start = 16; |
2719 | ref_count0 = 16 + 2 * h->ref_count[0]; | |
2720 | ref_count1 = 16 + 2 * h->ref_count[1]; | |
1052b76f | 2721 | } |
9f2d1b4f | 2722 | |
e5d40372 DB |
2723 | h->use_weight = 2; |
2724 | h->use_weight_chroma = 2; | |
2725 | h->luma_log2_weight_denom = 5; | |
2726 | h->chroma_log2_weight_denom = 5; | |
9f2d1b4f | 2727 | |
e5d40372 | 2728 | for (ref0 = ref_start; ref0 < ref_count0; ref0++) { |
9f2d1b4f | 2729 | int poc0 = h->ref_list[0][ref0].poc; |
e5d40372 | 2730 | for (ref1 = ref_start; ref1 < ref_count1; ref1++) { |
87cf70eb JD |
2731 | int w = 32; |
2732 | if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) { | |
2733 | int poc1 = h->ref_list[1][ref1].poc; | |
e5d40372 DB |
2734 | int td = av_clip(poc1 - poc0, -128, 127); |
2735 | if (td) { | |
87cf70eb JD |
2736 | int tb = av_clip(cur_poc - poc0, -128, 127); |
2737 | int tx = (16384 + (FFABS(td) >> 1)) / td; | |
e5d40372 DB |
2738 | int dist_scale_factor = (tb * tx + 32) >> 8; |
2739 | if (dist_scale_factor >= -64 && dist_scale_factor <= 128) | |
87cf70eb JD |
2740 | w = 64 - dist_scale_factor; |
2741 | } | |
1052b76f | 2742 | } |
e5d40372 DB |
2743 | if (field < 0) { |
2744 | h->implicit_weight[ref0][ref1][0] = | |
2745 | h->implicit_weight[ref0][ref1][1] = w; | |
2746 | } else { | |
2747 | h->implicit_weight[ref0][ref1][field] = w; | |
72f86ec0 | 2748 | } |
9f2d1b4f LM |
2749 | } |
2750 | } | |
2751 | } | |
2752 | ||
8fd57a66 | 2753 | /** |
5175b937 | 2754 | * instantaneous decoder refresh. |
0da71265 | 2755 | */ |
e5d40372 DB |
2756 | static void idr(H264Context *h) |
2757 | { | |
ea6f00c4 | 2758 | ff_h264_remove_all_refs(h); |
e5d40372 DB |
2759 | h->prev_frame_num = 0; |
2760 | h->prev_frame_num_offset = 0; | |
2761 | h->prev_poc_msb = | |
2762 | h->prev_poc_lsb = 0; | |
0da71265 MN |
2763 | } |
2764 | ||
7c33ad19 | 2765 | /* forget old pics after a seek */ |
9e696d2e | 2766 | static void flush_change(H264Context *h) |
e5d40372 | 2767 | { |
7c33ad19 | 2768 | int i; |
adedd840 RB |
2769 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) |
2770 | h->last_pocs[i] = INT_MIN; | |
16c22122 | 2771 | h->outputed_poc = h->next_outputed_poc = INT_MIN; |
b19d493f | 2772 | h->prev_interlaced_frame = 1; |
7c33ad19 | 2773 | idr(h); |
2c541554 | 2774 | if (h->cur_pic_ptr) |
759001c5 | 2775 | h->cur_pic_ptr->reference = 0; |
2c541554 | 2776 | h->first_field = 0; |
9e696d2e JG |
2777 | memset(h->ref_list[0], 0, sizeof(h->ref_list[0])); |
2778 | memset(h->ref_list[1], 0, sizeof(h->ref_list[1])); | |
2779 | memset(h->default_ref_list[0], 0, sizeof(h->default_ref_list[0])); | |
2780 | memset(h->default_ref_list[1], 0, sizeof(h->default_ref_list[1])); | |
9c095463 | 2781 | ff_h264_reset_sei(h); |
28096e0a JS |
2782 | h->recovery_frame = -1; |
2783 | h->frame_recovered = 0; | |
9e696d2e JG |
2784 | } |
2785 | ||
2786 | /* forget old pics after a seek */ | |
2787 | static void flush_dpb(AVCodecContext *avctx) | |
2788 | { | |
2789 | H264Context *h = avctx->priv_data; | |
2790 | int i; | |
2791 | ||
2792 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) { | |
2793 | if (h->delayed_pic[i]) | |
759001c5 | 2794 | h->delayed_pic[i]->reference = 0; |
9e696d2e JG |
2795 | h->delayed_pic[i] = NULL; |
2796 | } | |
2797 | ||
2798 | flush_change(h); | |
2c541554 | 2799 | |
555000c7 AK |
2800 | if (h->DPB) |
2801 | for (i = 0; i < MAX_PICTURE_COUNT; i++) | |
2802 | unref_picture(h, &h->DPB[i]); | |
2c541554 | 2803 | h->cur_pic_ptr = NULL; |
759001c5 | 2804 | unref_picture(h, &h->cur_pic); |
2c541554 AK |
2805 | |
2806 | h->mb_x = h->mb_y = 0; | |
2807 | ||
2808 | h->parse_context.state = -1; | |
2809 | h->parse_context.frame_start_found = 0; | |
2810 | h->parse_context.overread = 0; | |
2811 | h->parse_context.overread_index = 0; | |
2812 | h->parse_context.index = 0; | |
2813 | h->parse_context.last_index = 0; | |
9eda9d33 AK |
2814 | |
2815 | free_tables(h, 1); | |
2816 | h->context_initialized = 0; | |
7c33ad19 LM |
2817 | } |
2818 | ||
3f1a7ceb | 2819 | int ff_init_poc(H264Context *h, int pic_field_poc[2], int *pic_poc) |
e5d40372 | 2820 | { |
e5d40372 | 2821 | const int max_frame_num = 1 << h->sps.log2_max_frame_num; |
0da71265 MN |
2822 | int field_poc[2]; |
2823 | ||
e5d40372 DB |
2824 | h->frame_num_offset = h->prev_frame_num_offset; |
2825 | if (h->frame_num < h->prev_frame_num) | |
b78a6baa | 2826 | h->frame_num_offset += max_frame_num; |
0da71265 | 2827 | |
e5d40372 DB |
2828 | if (h->sps.poc_type == 0) { |
2829 | const int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb; | |
0da71265 | 2830 | |
16c22122 DB |
2831 | if (h->poc_lsb < h->prev_poc_lsb && |
2832 | h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2) | |
0da71265 | 2833 | h->poc_msb = h->prev_poc_msb + max_poc_lsb; |
16c22122 DB |
2834 | else if (h->poc_lsb > h->prev_poc_lsb && |
2835 | h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb / 2) | |
0da71265 MN |
2836 | h->poc_msb = h->prev_poc_msb - max_poc_lsb; |
2837 | else | |
2838 | h->poc_msb = h->prev_poc_msb; | |
115329f1 | 2839 | field_poc[0] = |
0da71265 | 2840 | field_poc[1] = h->poc_msb + h->poc_lsb; |
2c541554 | 2841 | if (h->picture_structure == PICT_FRAME) |
0da71265 | 2842 | field_poc[1] += h->delta_poc_bottom; |
e5d40372 | 2843 | } else if (h->sps.poc_type == 1) { |
0da71265 MN |
2844 | int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc; |
2845 | int i; | |
2846 | ||
e5d40372 | 2847 | if (h->sps.poc_cycle_length != 0) |
0da71265 MN |
2848 | abs_frame_num = h->frame_num_offset + h->frame_num; |
2849 | else | |
2850 | abs_frame_num = 0; | |
2851 | ||
e5d40372 | 2852 | if (h->nal_ref_idc == 0 && abs_frame_num > 0) |
0da71265 | 2853 | abs_frame_num--; |
115329f1 | 2854 | |
0da71265 | 2855 | expected_delta_per_poc_cycle = 0; |
e5d40372 DB |
2856 | for (i = 0; i < h->sps.poc_cycle_length; i++) |
2857 | // FIXME integrate during sps parse | |
2858 | expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[i]; | |
0da71265 | 2859 | |
e5d40372 | 2860 | if (abs_frame_num > 0) { |
0da71265 MN |
2861 | int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length; |
2862 | int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length; | |
2863 | ||
2864 | expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle; | |
e5d40372 DB |
2865 | for (i = 0; i <= frame_num_in_poc_cycle; i++) |
2866 | expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[i]; | |
0da71265 MN |
2867 | } else |
2868 | expectedpoc = 0; | |
2869 | ||
e5d40372 | 2870 | if (h->nal_ref_idc == 0) |
0da71265 | 2871 | expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic; |
115329f1 | 2872 | |
0da71265 MN |
2873 | field_poc[0] = expectedpoc + h->delta_poc[0]; |
2874 | field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field; | |
2875 | ||
2c541554 | 2876 | if (h->picture_structure == PICT_FRAME) |
0da71265 | 2877 | field_poc[1] += h->delta_poc[1]; |
e5d40372 DB |
2878 | } else { |
2879 | int poc = 2 * (h->frame_num_offset + h->frame_num); | |
5710b371 | 2880 | |
e5d40372 | 2881 | if (!h->nal_ref_idc) |
b78a6baa | 2882 | poc--; |
5710b371 | 2883 | |
e5d40372 DB |
2884 | field_poc[0] = poc; |
2885 | field_poc[1] = poc; | |
0da71265 | 2886 | } |
115329f1 | 2887 | |
2c541554 | 2888 | if (h->picture_structure != PICT_BOTTOM_FIELD) |
3f1a7ceb | 2889 | pic_field_poc[0] = field_poc[0]; |
2c541554 | 2890 | if (h->picture_structure != PICT_TOP_FIELD) |
3f1a7ceb | 2891 | pic_field_poc[1] = field_poc[1]; |
a8b19271 | 2892 | *pic_poc = FFMIN(pic_field_poc[0], pic_field_poc[1]); |
0da71265 MN |
2893 | |
2894 | return 0; | |
2895 | } | |
2896 | ||
b41c1db3 AÖ |
2897 | /** |
2898 | * initialize scan tables | |
2899 | */ | |
e5d40372 DB |
2900 | static void init_scan_tables(H264Context *h) |
2901 | { | |
b41c1db3 | 2902 | int i; |
e5d40372 DB |
2903 | for (i = 0; i < 16; i++) { |
2904 | #define T(x) (x >> 2) | ((x << 2) & 0xF) | |
ca32f7f2 | 2905 | h->zigzag_scan[i] = T(zigzag_scan[i]); |
e5d40372 | 2906 | h->field_scan[i] = T(field_scan[i]); |
b41c1db3 | 2907 | #undef T |
b41c1db3 | 2908 | } |
e5d40372 DB |
2909 | for (i = 0; i < 64; i++) { |
2910 | #define T(x) (x >> 3) | ((x & 7) << 3) | |
ca32f7f2 JGG |
2911 | h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); |
2912 | h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); | |
2913 | h->field_scan8x8[i] = T(field_scan8x8[i]); | |
2914 | h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); | |
b41c1db3 | 2915 | #undef T |
b41c1db3 | 2916 | } |
e5d40372 | 2917 | if (h->sps.transform_bypass) { // FIXME same ugly |
b41c1db3 | 2918 | h->zigzag_scan_q0 = zigzag_scan; |
45beb850 | 2919 | h->zigzag_scan8x8_q0 = ff_zigzag_direct; |
b41c1db3 AÖ |
2920 | h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; |
2921 | h->field_scan_q0 = field_scan; | |
2922 | h->field_scan8x8_q0 = field_scan8x8; | |
2923 | h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc; | |
e5d40372 | 2924 | } else { |
b41c1db3 AÖ |
2925 | h->zigzag_scan_q0 = h->zigzag_scan; |
2926 | h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; | |
2927 | h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; | |
2928 | h->field_scan_q0 = h->field_scan; | |
2929 | h->field_scan8x8_q0 = h->field_scan8x8; | |
2930 | h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; | |
2931 | } | |
2932 | } | |
afebe2f7 | 2933 | |
e5d40372 DB |
2934 | static int field_end(H264Context *h, int in_setup) |
2935 | { | |
2c541554 | 2936 | AVCodecContext *const avctx = h->avctx; |
12fe7594 | 2937 | int err = 0; |
2c541554 | 2938 | h->mb_y = 0; |
256299d3 | 2939 | |
2c541554 | 2940 | if (!in_setup && !h->droppable) |
759001c5 | 2941 | ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, |
2c541554 | 2942 | h->picture_structure == PICT_BOTTOM_FIELD); |
256299d3 | 2943 | |
e5d40372 | 2944 | if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) { |
2c541554 | 2945 | if (!h->droppable) { |
12fe7594 | 2946 | err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
e5d40372 DB |
2947 | h->prev_poc_msb = h->poc_msb; |
2948 | h->prev_poc_lsb = h->poc_lsb; | |
6a9c8594 | 2949 | } |
e5d40372 DB |
2950 | h->prev_frame_num_offset = h->frame_num_offset; |
2951 | h->prev_frame_num = h->frame_num; | |
2952 | h->outputed_poc = h->next_outputed_poc; | |
256299d3 | 2953 | } |
256299d3 MN |
2954 | |
2955 | if (avctx->hwaccel) { | |
2956 | if (avctx->hwaccel->end_frame(avctx) < 0) | |
e5d40372 DB |
2957 | av_log(avctx, AV_LOG_ERROR, |
2958 | "hardware accelerator failed to decode picture\n"); | |
256299d3 MN |
2959 | } |
2960 | ||
256299d3 MN |
2961 | /* |
2962 | * FIXME: Error handling code does not seem to support interlaced | |
2963 | * when slices span multiple rows | |
2964 | * The ff_er_add_slice calls don't work right for bottom | |
2965 | * fields; they cause massive erroneous error concealing | |
2966 | * Error marking covers both fields (top and bottom). | |
2967 | * This causes a mismatched s->error_count | |
2968 | * and a bad error table. Further, the error count goes to | |
2969 | * INT_MAX when called for bottom field, because mb_y is | |
2970 | * past end by one (callers fault) and resync_mb_y != 0 | |
2971 | * causes problems for the first MB line, too. | |
2972 | */ | |
0b499c9b | 2973 | if (CONFIG_ERROR_RESILIENCE && !FIELD_PICTURE(h)) { |
2c541554 AK |
2974 | h->er.cur_pic = h->cur_pic_ptr; |
2975 | h->er.last_pic = h->ref_count[0] ? &h->ref_list[0][0] : NULL; | |
2976 | h->er.next_pic = h->ref_count[1] ? &h->ref_list[1][0] : NULL; | |
2977 | ff_er_frame_end(&h->er); | |
2978 | } | |
2c541554 | 2979 | emms_c(); |
d225a1e2 | 2980 | |
e5d40372 | 2981 | h->current_slice = 0; |
12fe7594 DB |
2982 | |
2983 | return err; | |
256299d3 MN |
2984 | } |
2985 | ||
afebe2f7 | 2986 | /** |
49bd8e4b | 2987 | * Replicate H264 "master" context to thread contexts. |
afebe2f7 | 2988 | */ |
f1d8763a | 2989 | static int clone_slice(H264Context *dst, H264Context *src) |
afebe2f7 | 2990 | { |
e5d40372 | 2991 | memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset)); |
2c541554 AK |
2992 | dst->cur_pic_ptr = src->cur_pic_ptr; |
2993 | dst->cur_pic = src->cur_pic; | |
2994 | dst->linesize = src->linesize; | |
2995 | dst->uvlinesize = src->uvlinesize; | |
2996 | dst->first_field = src->first_field; | |
f1d8763a | 2997 | |
e5d40372 DB |
2998 | dst->prev_poc_msb = src->prev_poc_msb; |
2999 | dst->prev_poc_lsb = src->prev_poc_lsb; | |
3000 | dst->prev_frame_num_offset = src->prev_frame_num_offset; | |
3001 | dst->prev_frame_num = src->prev_frame_num; | |
3002 | dst->short_ref_count = src->short_ref_count; | |
afebe2f7 AÖ |
3003 | |
3004 | memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref)); | |
3005 | memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref)); | |
3006 | memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list)); | |
50c21814 AÖ |
3007 | |
3008 | memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff)); | |
3009 | memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff)); | |
f1d8763a JG |
3010 | |
3011 | return 0; | |
afebe2f7 AÖ |
3012 | } |
3013 | ||
0da71265 | 3014 | /** |
58c42af7 | 3015 | * Compute profile from profile_idc and constraint_set?_flags. |
fe9a3fbe JG |
3016 | * |
3017 | * @param sps SPS | |
3018 | * | |
3019 | * @return profile as defined by FF_PROFILE_H264_* | |
3020 | */ | |
3021 | int ff_h264_get_profile(SPS *sps) | |
3022 | { | |
3023 | int profile = sps->profile_idc; | |
3024 | ||
e5d40372 | 3025 | switch (sps->profile_idc) { |
fe9a3fbe JG |
3026 | case FF_PROFILE_H264_BASELINE: |
3027 | // constraint_set1_flag set to 1 | |
e5d40372 | 3028 | profile |= (sps->constraint_set_flags & 1 << 1) ? FF_PROFILE_H264_CONSTRAINED : 0; |
fe9a3fbe JG |
3029 | break; |
3030 | case FF_PROFILE_H264_HIGH_10: | |
3031 | case FF_PROFILE_H264_HIGH_422: | |
3032 | case FF_PROFILE_H264_HIGH_444_PREDICTIVE: | |
3033 | // constraint_set3_flag set to 1 | |
e5d40372 | 3034 | profile |= (sps->constraint_set_flags & 1 << 3) ? FF_PROFILE_H264_INTRA : 0; |
fe9a3fbe JG |
3035 | break; |
3036 | } | |
3037 | ||
3038 | return profile; | |
3039 | } | |
3040 | ||
072be3e8 JG |
3041 | static int h264_set_parameter_from_sps(H264Context *h) |
3042 | { | |
2c541554 | 3043 | if (h->flags & CODEC_FLAG_LOW_DELAY || |
072be3e8 JG |
3044 | (h->sps.bitstream_restriction_flag && |
3045 | !h->sps.num_reorder_frames)) { | |
2c541554 AK |
3046 | if (h->avctx->has_b_frames > 1 || h->delayed_pic[0]) |
3047 | av_log(h->avctx, AV_LOG_WARNING, "Delayed frames seen. " | |
072be3e8 JG |
3048 | "Reenabling low delay requires a codec flush.\n"); |
3049 | else | |
2c541554 | 3050 | h->low_delay = 1; |
072be3e8 JG |
3051 | } |
3052 | ||
2c541554 AK |
3053 | if (h->avctx->has_b_frames < 2) |
3054 | h->avctx->has_b_frames = !h->low_delay; | |
072be3e8 | 3055 | |
2c541554 | 3056 | if (h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma || |
072be3e8 | 3057 | h->cur_chroma_format_idc != h->sps.chroma_format_idc) { |
072be3e8 | 3058 | if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) { |
2c541554 | 3059 | h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma; |
072be3e8 JG |
3060 | h->cur_chroma_format_idc = h->sps.chroma_format_idc; |
3061 | h->pixel_shift = h->sps.bit_depth_luma > 8; | |
3062 | ||
3063 | ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, | |
3064 | h->sps.chroma_format_idc); | |
79dad2a9 | 3065 | ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma); |
e9d81735 | 3066 | ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma); |
2c541554 | 3067 | ff_h264_pred_init(&h->hpc, h->avctx->codec_id, h->sps.bit_depth_luma, |
072be3e8 | 3068 | h->sps.chroma_format_idc); |
85deb51a RB |
3069 | if (CONFIG_ERROR_RESILIENCE) |
3070 | ff_dsputil_init(&h->dsp, h->avctx); | |
2c541554 | 3071 | ff_videodsp_init(&h->vdsp, h->sps.bit_depth_luma); |
072be3e8 | 3072 | } else { |
2c541554 | 3073 | av_log(h->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", |
072be3e8 JG |
3074 | h->sps.bit_depth_luma); |
3075 | return AVERROR_INVALIDDATA; | |
3076 | } | |
3077 | } | |
3078 | return 0; | |
3079 | } | |
3080 | ||
542b83fc | 3081 | static enum AVPixelFormat get_pixel_format(H264Context *h) |
9e696d2e | 3082 | { |
9e696d2e JG |
3083 | switch (h->sps.bit_depth_luma) { |
3084 | case 9: | |
23e85be5 | 3085 | if (CHROMA444(h)) { |
2c541554 | 3086 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
9e696d2e JG |
3087 | return AV_PIX_FMT_GBRP9; |
3088 | } else | |
3089 | return AV_PIX_FMT_YUV444P9; | |
e962bd08 | 3090 | } else if (CHROMA422(h)) |
9e696d2e JG |
3091 | return AV_PIX_FMT_YUV422P9; |
3092 | else | |
3093 | return AV_PIX_FMT_YUV420P9; | |
3094 | break; | |
3095 | case 10: | |
23e85be5 | 3096 | if (CHROMA444(h)) { |
2c541554 | 3097 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
9e696d2e JG |
3098 | return AV_PIX_FMT_GBRP10; |
3099 | } else | |
3100 | return AV_PIX_FMT_YUV444P10; | |
e962bd08 | 3101 | } else if (CHROMA422(h)) |
9e696d2e JG |
3102 | return AV_PIX_FMT_YUV422P10; |
3103 | else | |
3104 | return AV_PIX_FMT_YUV420P10; | |
3105 | break; | |
3106 | case 8: | |
23e85be5 | 3107 | if (CHROMA444(h)) { |
2c541554 | 3108 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
9e696d2e JG |
3109 | return AV_PIX_FMT_GBRP; |
3110 | } else | |
2c541554 | 3111 | return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ444P |
9e696d2e | 3112 | : AV_PIX_FMT_YUV444P; |
e962bd08 | 3113 | } else if (CHROMA422(h)) { |
2c541554 | 3114 | return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ422P |
9e696d2e JG |
3115 | : AV_PIX_FMT_YUV422P; |
3116 | } else { | |
2c541554 AK |
3117 | return h->avctx->get_format(h->avctx, h->avctx->codec->pix_fmts ? |
3118 | h->avctx->codec->pix_fmts : | |
3119 | h->avctx->color_range == AVCOL_RANGE_JPEG ? | |
d65522e8 | 3120 | h264_hwaccel_pixfmt_list_jpeg_420 : |
8d061989 | 3121 | h264_hwaccel_pixfmt_list_420); |
9e696d2e JG |
3122 | } |
3123 | break; | |
3124 | default: | |
2c541554 | 3125 | av_log(h->avctx, AV_LOG_ERROR, |
9e696d2e JG |
3126 | "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); |
3127 | return AVERROR_INVALIDDATA; | |
3128 | } | |
3129 | } | |
3130 | ||
5e83d9ac AK |
3131 | /* export coded and cropped frame dimensions to AVCodecContext */ |
3132 | static int init_dimensions(H264Context *h) | |
3133 | { | |
3134 | int width = h->width - (h->sps.crop_right + h->sps.crop_left); | |
3135 | int height = h->height - (h->sps.crop_top + h->sps.crop_bottom); | |
3136 | ||
3137 | /* handle container cropping */ | |
3138 | if (!h->sps.crop && | |
3139 | FFALIGN(h->avctx->width, 16) == h->width && | |
3140 | FFALIGN(h->avctx->height, 16) == h->height) { | |
3141 | width = h->avctx->width; | |
3142 | height = h->avctx->height; | |
3143 | } | |
3144 | ||
3145 | if (width <= 0 || height <= 0) { | |
3146 | av_log(h->avctx, AV_LOG_ERROR, "Invalid cropped dimensions: %dx%d.\n", | |
3147 | width, height); | |
3148 | if (h->avctx->err_recognition & AV_EF_EXPLODE) | |
3149 | return AVERROR_INVALIDDATA; | |
3150 | ||
3151 | av_log(h->avctx, AV_LOG_WARNING, "Ignoring cropping information.\n"); | |
3152 | h->sps.crop_bottom = h->sps.crop_top = h->sps.crop_right = h->sps.crop_left = 0; | |
16c22122 | 3153 | h->sps.crop = 0; |
5e83d9ac AK |
3154 | |
3155 | width = h->width; | |
3156 | height = h->height; | |
3157 | } | |