2 * H.26L/H.264/AVC/JVT/14496-10/... decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "libavutil/avassert.h"
29 #include "libavutil/imgutils.h"
30 #include "libavutil/stereo3d.h"
33 #include "cabac_functions.h"
35 #include "error_resilience.h"
37 #include "mpegvideo.h"
40 #include "h264chroma.h"
41 #include "h264_mvpred.h"
44 #include "rectangle.h"
50 const uint16_t ff_h264_mb_sizes
[4] = { 256, 384, 512, 768 };
52 static const uint8_t rem6
[QP_MAX_NUM
+ 1] = {
53 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
54 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
55 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
58 static const uint8_t div6
[QP_MAX_NUM
+ 1] = {
59 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3,
60 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
61 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
64 static const uint8_t field_scan
[16] = {
65 0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4,
66 0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4,
67 2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4,
68 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4,
71 static const uint8_t field_scan8x8
[64] = {
72 0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8,
73 1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8,
74 2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8,
75 0 + 7 * 8, 1 + 4 * 8, 2 + 1 * 8, 3 + 0 * 8,
76 2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8,
77 2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8,
78 2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8,
79 3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8,
80 3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8,
81 4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8,
82 4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8,
83 5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8,
84 5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8,
85 7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8,
86 6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8,
87 7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8,
90 static const uint8_t field_scan8x8_cavlc
[64] = {
91 0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8,
92 2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8,
93 3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8,
94 5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8,
95 0 + 1 * 8, 0 + 3 * 8, 1 + 3 * 8, 1 + 4 * 8,
96 1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8,
97 3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8,
98 5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8,
99 0 + 2 * 8, 0 + 4 * 8, 0 + 5 * 8, 2 + 1 * 8,
100 1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8,
101 3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8,
102 5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8,
103 1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8,
104 1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8,
105 3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8,
106 6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8,
109 // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
110 static const uint8_t zigzag_scan8x8_cavlc
[64] = {
111 0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8,
112 4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8,
113 3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8,
114 2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8,
115 1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8,
116 3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8,
117 2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8,
118 3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8,
119 0 + 1 * 8, 3 + 0 * 8, 0 + 4 * 8, 4 + 0 * 8,
120 2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8,
121 1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8,
122 4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8,
123 0 + 2 * 8, 2 + 1 * 8, 1 + 3 * 8, 5 + 0 * 8,
124 1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8,
125 0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8,
126 5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8,
129 static const uint8_t dequant4_coeff_init
[6][3] = {
138 static const uint8_t dequant8_coeff_init_scan
[16] = {
139 0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1
142 static const uint8_t dequant8_coeff_init
[6][6] = {
143 { 20, 18, 32, 19, 25, 24 },
144 { 22, 19, 35, 21, 28, 26 },
145 { 26, 23, 42, 24, 33, 31 },
146 { 28, 25, 45, 26, 35, 33 },
147 { 32, 28, 51, 30, 40, 38 },
148 { 36, 32, 58, 34, 46, 43 },
151 static const enum AVPixelFormat h264_hwaccel_pixfmt_list_420
[] = {
152 #if CONFIG_H264_DXVA2_HWACCEL
153 AV_PIX_FMT_DXVA2_VLD
,
155 #if CONFIG_H264_VAAPI_HWACCEL
156 AV_PIX_FMT_VAAPI_VLD
,
158 #if CONFIG_H264_VDA_HWACCEL
161 #if CONFIG_H264_VDPAU_HWACCEL
168 static const enum AVPixelFormat h264_hwaccel_pixfmt_list_jpeg_420
[] = {
169 #if CONFIG_H264_DXVA2_HWACCEL
170 AV_PIX_FMT_DXVA2_VLD
,
172 #if CONFIG_H264_VAAPI_HWACCEL
173 AV_PIX_FMT_VAAPI_VLD
,
175 #if CONFIG_H264_VDA_HWACCEL
178 #if CONFIG_H264_VDPAU_HWACCEL
185 static void h264_er_decode_mb(void *opaque
, int ref
, int mv_dir
, int mv_type
,
187 int mb_x
, int mb_y
, int mb_intra
, int mb_skipped
)
189 H264Context
*h
= opaque
;
193 h
->mb_xy
= mb_x
+ mb_y
* h
->mb_stride
;
194 memset(h
->non_zero_count_cache
, 0, sizeof(h
->non_zero_count_cache
));
196 /* FIXME: It is possible albeit uncommon that slice references
197 * differ between slices. We take the easy approach and ignore
198 * it for now. If this turns out to have any relevance in
199 * practice then correct remapping should be added. */
200 if (ref
>= h
->ref_count
[0])
202 fill_rectangle(&h
->cur_pic
.ref_index
[0][4 * h
->mb_xy
],
204 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, ref
, 1);
205 fill_rectangle(h
->mv_cache
[0][scan8
[0]], 4, 4, 8,
206 pack16to32((*mv
)[0][0][0], (*mv
)[0][0][1]), 4);
207 assert(!FRAME_MBAFF(h
));
208 ff_h264_hl_decode_mb(h
);
211 void ff_h264_draw_horiz_band(H264Context
*h
, int y
, int height
)
213 AVCodecContext
*avctx
= h
->avctx
;
214 Picture
*cur
= &h
->cur_pic
;
215 Picture
*last
= h
->ref_list
[0][0].f
.data
[0] ?
&h
->ref_list
[0][0] : NULL
;
216 const AVPixFmtDescriptor
*desc
= av_pix_fmt_desc_get(avctx
->pix_fmt
);
217 int vshift
= desc
->log2_chroma_h
;
218 const int field_pic
= h
->picture_structure
!= PICT_FRAME
;
224 height
= FFMIN(height
, avctx
->height
- y
);
226 if (field_pic
&& h
->first_field
&& !(avctx
->slice_flags
& SLICE_FLAG_ALLOW_FIELD
))
229 if (avctx
->draw_horiz_band
) {
231 int offset
[AV_NUM_DATA_POINTERS
];
234 if (cur
->f
.pict_type
== AV_PICTURE_TYPE_B
|| h
->low_delay
||
235 (avctx
->slice_flags
& SLICE_FLAG_CODED_ORDER
))
242 offset
[0] = y
* src
->linesize
[0];
244 offset
[2] = (y
>> vshift
) * src
->linesize
[1];
245 for (i
= 3; i
< AV_NUM_DATA_POINTERS
; i
++)
250 avctx
->draw_horiz_band(avctx
, src
, offset
,
251 y
, h
->picture_structure
, height
);
255 static void unref_picture(H264Context
*h
, Picture
*pic
)
257 int off
= offsetof(Picture
, tf
) + sizeof(pic
->tf
);
263 ff_thread_release_buffer(h
->avctx
, &pic
->tf
);
264 av_buffer_unref(&pic
->hwaccel_priv_buf
);
266 av_buffer_unref(&pic
->qscale_table_buf
);
267 av_buffer_unref(&pic
->mb_type_buf
);
268 for (i
= 0; i
< 2; i
++) {
269 av_buffer_unref(&pic
->motion_val_buf
[i
]);
270 av_buffer_unref(&pic
->ref_index_buf
[i
]);
273 memset((uint8_t*)pic
+ off
, 0, sizeof(*pic
) - off
);
276 static void release_unused_pictures(H264Context
*h
, int remove_current
)
280 /* release non reference frames */
281 for (i
= 0; i
< MAX_PICTURE_COUNT
; i
++) {
282 if (h
->DPB
[i
].f
.buf
[0] && !h
->DPB
[i
].reference
&&
283 (remove_current
|| &h
->DPB
[i
] != h
->cur_pic_ptr
)) {
284 unref_picture(h
, &h
->DPB
[i
]);
289 static int ref_picture(H264Context
*h
, Picture
*dst
, Picture
*src
)
293 av_assert0(!dst
->f
.buf
[0]);
294 av_assert0(src
->f
.buf
[0]);
298 ret
= ff_thread_ref_frame(&dst
->tf
, &src
->tf
);
302 dst
->qscale_table_buf
= av_buffer_ref(src
->qscale_table_buf
);
303 dst
->mb_type_buf
= av_buffer_ref(src
->mb_type_buf
);
304 if (!dst
->qscale_table_buf
|| !dst
->mb_type_buf
)
306 dst
->qscale_table
= src
->qscale_table
;
307 dst
->mb_type
= src
->mb_type
;
309 for (i
= 0; i
< 2; i
++) {
310 dst
->motion_val_buf
[i
] = av_buffer_ref(src
->motion_val_buf
[i
]);
311 dst
->ref_index_buf
[i
] = av_buffer_ref(src
->ref_index_buf
[i
]);
312 if (!dst
->motion_val_buf
[i
] || !dst
->ref_index_buf
[i
])
314 dst
->motion_val
[i
] = src
->motion_val
[i
];
315 dst
->ref_index
[i
] = src
->ref_index
[i
];
318 if (src
->hwaccel_picture_private
) {
319 dst
->hwaccel_priv_buf
= av_buffer_ref(src
->hwaccel_priv_buf
);
320 if (!dst
->hwaccel_priv_buf
)
322 dst
->hwaccel_picture_private
= dst
->hwaccel_priv_buf
->data
;
325 for (i
= 0; i
< 2; i
++)
326 dst
->field_poc
[i
] = src
->field_poc
[i
];
328 memcpy(dst
->ref_poc
, src
->ref_poc
, sizeof(src
->ref_poc
));
329 memcpy(dst
->ref_count
, src
->ref_count
, sizeof(src
->ref_count
));
332 dst
->frame_num
= src
->frame_num
;
333 dst
->mmco_reset
= src
->mmco_reset
;
334 dst
->pic_id
= src
->pic_id
;
335 dst
->long_ref
= src
->long_ref
;
336 dst
->mbaff
= src
->mbaff
;
337 dst
->field_picture
= src
->field_picture
;
338 dst
->needs_realloc
= src
->needs_realloc
;
339 dst
->reference
= src
->reference
;
340 dst
->recovered
= src
->recovered
;
344 unref_picture(h
, dst
);
348 static int alloc_scratch_buffers(H264Context
*h
, int linesize
)
350 int alloc_size
= FFALIGN(FFABS(linesize
) + 32, 32);
352 if (h
->bipred_scratchpad
)
355 h
->bipred_scratchpad
= av_malloc(16 * 6 * alloc_size
);
356 // edge emu needs blocksize + filter length - 1
357 // (= 21x21 for h264)
358 h
->edge_emu_buffer
= av_mallocz(alloc_size
* 2 * 21);
359 h
->me
.scratchpad
= av_mallocz(alloc_size
* 2 * 16 * 2);
361 if (!h
->bipred_scratchpad
|| !h
->edge_emu_buffer
|| !h
->me
.scratchpad
) {
362 av_freep(&h
->bipred_scratchpad
);
363 av_freep(&h
->edge_emu_buffer
);
364 av_freep(&h
->me
.scratchpad
);
365 return AVERROR(ENOMEM
);
368 h
->me
.temp
= h
->me
.scratchpad
;
373 static int init_table_pools(H264Context
*h
)
375 const int big_mb_num
= h
->mb_stride
* (h
->mb_height
+ 1) + 1;
376 const int mb_array_size
= h
->mb_stride
* h
->mb_height
;
377 const int b4_stride
= h
->mb_width
* 4 + 1;
378 const int b4_array_size
= b4_stride
* h
->mb_height
* 4;
380 h
->qscale_table_pool
= av_buffer_pool_init(big_mb_num
+ h
->mb_stride
,
382 h
->mb_type_pool
= av_buffer_pool_init((big_mb_num
+ h
->mb_stride
) *
383 sizeof(uint32_t), av_buffer_allocz
);
384 h
->motion_val_pool
= av_buffer_pool_init(2 * (b4_array_size
+ 4) *
385 sizeof(int16_t), av_buffer_allocz
);
386 h
->ref_index_pool
= av_buffer_pool_init(4 * mb_array_size
, av_buffer_allocz
);
388 if (!h
->qscale_table_pool
|| !h
->mb_type_pool
|| !h
->motion_val_pool
||
389 !h
->ref_index_pool
) {
390 av_buffer_pool_uninit(&h
->qscale_table_pool
);
391 av_buffer_pool_uninit(&h
->mb_type_pool
);
392 av_buffer_pool_uninit(&h
->motion_val_pool
);
393 av_buffer_pool_uninit(&h
->ref_index_pool
);
394 return AVERROR(ENOMEM
);
400 static int alloc_picture(H264Context
*h
, Picture
*pic
)
404 av_assert0(!pic
->f
.data
[0]);
407 ret
= ff_thread_get_buffer(h
->avctx
, &pic
->tf
, pic
->reference ?
408 AV_GET_BUFFER_FLAG_REF
: 0);
412 h
->linesize
= pic
->f
.linesize
[0];
413 h
->uvlinesize
= pic
->f
.linesize
[1];
415 if (h
->avctx
->hwaccel
) {
416 const AVHWAccel
*hwaccel
= h
->avctx
->hwaccel
;
417 av_assert0(!pic
->hwaccel_picture_private
);
418 if (hwaccel
->priv_data_size
) {
419 pic
->hwaccel_priv_buf
= av_buffer_allocz(hwaccel
->priv_data_size
);
420 if (!pic
->hwaccel_priv_buf
)
421 return AVERROR(ENOMEM
);
422 pic
->hwaccel_picture_private
= pic
->hwaccel_priv_buf
->data
;
426 if (!h
->qscale_table_pool
) {
427 ret
= init_table_pools(h
);
432 pic
->qscale_table_buf
= av_buffer_pool_get(h
->qscale_table_pool
);
433 pic
->mb_type_buf
= av_buffer_pool_get(h
->mb_type_pool
);
434 if (!pic
->qscale_table_buf
|| !pic
->mb_type_buf
)
437 pic
->mb_type
= (uint32_t*)pic
->mb_type_buf
->data
+ 2 * h
->mb_stride
+ 1;
438 pic
->qscale_table
= pic
->qscale_table_buf
->data
+ 2 * h
->mb_stride
+ 1;
440 for (i
= 0; i
< 2; i
++) {
441 pic
->motion_val_buf
[i
] = av_buffer_pool_get(h
->motion_val_pool
);
442 pic
->ref_index_buf
[i
] = av_buffer_pool_get(h
->ref_index_pool
);
443 if (!pic
->motion_val_buf
[i
] || !pic
->ref_index_buf
[i
])
446 pic
->motion_val
[i
] = (int16_t (*)[2])pic
->motion_val_buf
[i
]->data
+ 4;
447 pic
->ref_index
[i
] = pic
->ref_index_buf
[i
]->data
;
452 unref_picture(h
, pic
);
453 return (ret
< 0) ? ret
: AVERROR(ENOMEM
);
456 static inline int pic_is_unused(H264Context
*h
, Picture
*pic
)
460 if (pic
->needs_realloc
&& !(pic
->reference
& DELAYED_PIC_REF
))
465 static int find_unused_picture(H264Context
*h
)
469 for (i
= 0; i
< MAX_PICTURE_COUNT
; i
++) {
470 if (pic_is_unused(h
, &h
->DPB
[i
]))
473 if (i
== MAX_PICTURE_COUNT
)
474 return AVERROR_INVALIDDATA
;
476 if (h
->DPB
[i
].needs_realloc
) {
477 h
->DPB
[i
].needs_realloc
= 0;
478 unref_picture(h
, &h
->DPB
[i
]);
485 * Check if the top & left blocks are available if needed and
486 * change the dc mode so it only uses the available blocks.
488 int ff_h264_check_intra4x4_pred_mode(H264Context
*h
)
490 static const int8_t top
[12] = {
491 -1, 0, LEFT_DC_PRED
, -1, -1, -1, -1, -1, 0
493 static const int8_t left
[12] = {
494 0, -1, TOP_DC_PRED
, 0, -1, -1, -1, 0, -1, DC_128_PRED
498 if (!(h
->top_samples_available
& 0x8000)) {
499 for (i
= 0; i
< 4; i
++) {
500 int status
= top
[h
->intra4x4_pred_mode_cache
[scan8
[0] + i
]];
502 av_log(h
->avctx
, AV_LOG_ERROR
,
503 "top block unavailable for requested intra4x4 mode %d at %d %d\n",
504 status
, h
->mb_x
, h
->mb_y
);
505 return AVERROR_INVALIDDATA
;
507 h
->intra4x4_pred_mode_cache
[scan8
[0] + i
] = status
;
512 if ((h
->left_samples_available
& 0x8888) != 0x8888) {
513 static const int mask
[4] = { 0x8000, 0x2000, 0x80, 0x20 };
514 for (i
= 0; i
< 4; i
++)
515 if (!(h
->left_samples_available
& mask
[i
])) {
516 int status
= left
[h
->intra4x4_pred_mode_cache
[scan8
[0] + 8 * i
]];
518 av_log(h
->avctx
, AV_LOG_ERROR
,
519 "left block unavailable for requested intra4x4 mode %d at %d %d\n",
520 status
, h
->mb_x
, h
->mb_y
);
521 return AVERROR_INVALIDDATA
;
523 h
->intra4x4_pred_mode_cache
[scan8
[0] + 8 * i
] = status
;
529 } // FIXME cleanup like ff_h264_check_intra_pred_mode
532 * Check if the top & left blocks are available if needed and
533 * change the dc mode so it only uses the available blocks.
535 int ff_h264_check_intra_pred_mode(H264Context
*h
, int mode
, int is_chroma
)
537 static const int8_t top
[4] = { LEFT_DC_PRED8x8
, 1, -1, -1 };
538 static const int8_t left
[5] = { TOP_DC_PRED8x8
, -1, 2, -1, DC_128_PRED8x8
};
541 av_log(h
->avctx
, AV_LOG_ERROR
,
542 "out of range intra chroma pred mode at %d %d\n",
544 return AVERROR_INVALIDDATA
;
547 if (!(h
->top_samples_available
& 0x8000)) {
550 av_log(h
->avctx
, AV_LOG_ERROR
,
551 "top block unavailable for requested intra mode at %d %d\n",
553 return AVERROR_INVALIDDATA
;
557 if ((h
->left_samples_available
& 0x8080) != 0x8080) {
559 if (is_chroma
&& (h
->left_samples_available
& 0x8080)) {
560 // mad cow disease mode, aka MBAFF + constrained_intra_pred
561 mode
= ALZHEIMER_DC_L0T_PRED8x8
+
562 (!(h
->left_samples_available
& 0x8000)) +
563 2 * (mode
== DC_128_PRED8x8
);
566 av_log(h
->avctx
, AV_LOG_ERROR
,
567 "left block unavailable for requested intra mode at %d %d\n",
569 return AVERROR_INVALIDDATA
;
576 const uint8_t *ff_h264_decode_nal(H264Context
*h
, const uint8_t *src
,
577 int *dst_length
, int *consumed
, int length
)
583 // src[0]&0x80; // forbidden bit
584 h
->nal_ref_idc
= src
[0] >> 5;
585 h
->nal_unit_type
= src
[0] & 0x1F;
590 #define STARTCODE_TEST \
591 if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \
592 if (src[i + 2] != 3) { \
593 /* startcode, so we must be past the end */ \
599 #if HAVE_FAST_UNALIGNED
600 #define FIND_FIRST_ZERO \
601 if (i > 0 && !src[i]) \
607 for (i
= 0; i
+ 1 < length
; i
+= 9) {
608 if (!((~AV_RN64A(src
+ i
) &
609 (AV_RN64A(src
+ i
) - 0x0100010001000101ULL
)) &
610 0x8000800080008080ULL
))
617 for (i
= 0; i
+ 1 < length
; i
+= 5) {
618 if (!((~AV_RN32A(src
+ i
) &
619 (AV_RN32A(src
+ i
) - 0x01000101U
)) &
628 for (i
= 0; i
+ 1 < length
; i
+= 2) {
631 if (i
> 0 && src
[i
- 1] == 0)
637 if (i
>= length
- 1) { // no escaped 0
638 *dst_length
= length
;
639 *consumed
= length
+ 1; // +1 for the header
643 // use second escape buffer for inter data
644 bufidx
= h
->nal_unit_type
== NAL_DPC ?
1 : 0;
645 av_fast_malloc(&h
->rbsp_buffer
[bufidx
], &h
->rbsp_buffer_size
[bufidx
],
646 length
+ FF_INPUT_BUFFER_PADDING_SIZE
);
647 dst
= h
->rbsp_buffer
[bufidx
];
654 while (si
+ 2 < length
) {
655 // remove escapes (very rare 1:2^22)
656 if (src
[si
+ 2] > 3) {
657 dst
[di
++] = src
[si
++];
658 dst
[di
++] = src
[si
++];
659 } else if (src
[si
] == 0 && src
[si
+ 1] == 0) {
660 if (src
[si
+ 2] == 3) { // escape
665 } else // next start code
669 dst
[di
++] = src
[si
++];
672 dst
[di
++] = src
[si
++];
675 memset(dst
+ di
, 0, FF_INPUT_BUFFER_PADDING_SIZE
);
678 *consumed
= si
+ 1; // +1 for the header
679 /* FIXME store exact number of bits in the getbitcontext
680 * (it is needed for decoding) */
685 * Identify the exact end of the bitstream
686 * @return the length of the trailing, or 0 if damaged
688 static int decode_rbsp_trailing(H264Context
*h
, const uint8_t *src
)
693 tprintf(h
->avctx
, "rbsp trailing %X\n", v
);
695 for (r
= 1; r
< 9; r
++) {
703 static inline int get_lowest_part_list_y(H264Context
*h
, Picture
*pic
, int n
,
704 int height
, int y_offset
, int list
)
706 int raw_my
= h
->mv_cache
[list
][scan8
[n
]][1];
707 int filter_height_up
= (raw_my
& 3) ?
2 : 0;
708 int filter_height_down
= (raw_my
& 3) ?
3 : 0;
709 int full_my
= (raw_my
>> 2) + y_offset
;
710 int top
= full_my
- filter_height_up
;
711 int bottom
= full_my
+ filter_height_down
+ height
;
713 return FFMAX(abs(top
), bottom
);
716 static inline void get_lowest_part_y(H264Context
*h
, int refs
[2][48], int n
,
717 int height
, int y_offset
, int list0
,
718 int list1
, int *nrefs
)
722 y_offset
+= 16 * (h
->mb_y
>> MB_FIELD(h
));
725 int ref_n
= h
->ref_cache
[0][scan8
[n
]];
726 Picture
*ref
= &h
->ref_list
[0][ref_n
];
728 // Error resilience puts the current picture in the ref list.
729 // Don't try to wait on these as it will cause a deadlock.
730 // Fields can wait on each other, though.
731 if (ref
->tf
.progress
->data
!= h
->cur_pic
.tf
.progress
->data
||
732 (ref
->reference
& 3) != h
->picture_structure
) {
733 my
= get_lowest_part_list_y(h
, ref
, n
, height
, y_offset
, 0);
734 if (refs
[0][ref_n
] < 0)
736 refs
[0][ref_n
] = FFMAX(refs
[0][ref_n
], my
);
741 int ref_n
= h
->ref_cache
[1][scan8
[n
]];
742 Picture
*ref
= &h
->ref_list
[1][ref_n
];
744 if (ref
->tf
.progress
->data
!= h
->cur_pic
.tf
.progress
->data
||
745 (ref
->reference
& 3) != h
->picture_structure
) {
746 my
= get_lowest_part_list_y(h
, ref
, n
, height
, y_offset
, 1);
747 if (refs
[1][ref_n
] < 0)
749 refs
[1][ref_n
] = FFMAX(refs
[1][ref_n
], my
);
755 * Wait until all reference frames are available for MC operations.
757 * @param h the H264 context
759 static void await_references(H264Context
*h
)
761 const int mb_xy
= h
->mb_xy
;
762 const int mb_type
= h
->cur_pic
.mb_type
[mb_xy
];
764 int nrefs
[2] = { 0 };
767 memset(refs
, -1, sizeof(refs
));
769 if (IS_16X16(mb_type
)) {
770 get_lowest_part_y(h
, refs
, 0, 16, 0,
771 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1), nrefs
);
772 } else if (IS_16X8(mb_type
)) {
773 get_lowest_part_y(h
, refs
, 0, 8, 0,
774 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1), nrefs
);
775 get_lowest_part_y(h
, refs
, 8, 8, 8,
776 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1), nrefs
);
777 } else if (IS_8X16(mb_type
)) {
778 get_lowest_part_y(h
, refs
, 0, 16, 0,
779 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1), nrefs
);
780 get_lowest_part_y(h
, refs
, 4, 16, 0,
781 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1), nrefs
);
785 assert(IS_8X8(mb_type
));
787 for (i
= 0; i
< 4; i
++) {
788 const int sub_mb_type
= h
->sub_mb_type
[i
];
790 int y_offset
= (i
& 2) << 2;
792 if (IS_SUB_8X8(sub_mb_type
)) {
793 get_lowest_part_y(h
, refs
, n
, 8, y_offset
,
794 IS_DIR(sub_mb_type
, 0, 0),
795 IS_DIR(sub_mb_type
, 0, 1),
797 } else if (IS_SUB_8X4(sub_mb_type
)) {
798 get_lowest_part_y(h
, refs
, n
, 4, y_offset
,
799 IS_DIR(sub_mb_type
, 0, 0),
800 IS_DIR(sub_mb_type
, 0, 1),
802 get_lowest_part_y(h
, refs
, n
+ 2, 4, y_offset
+ 4,
803 IS_DIR(sub_mb_type
, 0, 0),
804 IS_DIR(sub_mb_type
, 0, 1),
806 } else if (IS_SUB_4X8(sub_mb_type
)) {
807 get_lowest_part_y(h
, refs
, n
, 8, y_offset
,
808 IS_DIR(sub_mb_type
, 0, 0),
809 IS_DIR(sub_mb_type
, 0, 1),
811 get_lowest_part_y(h
, refs
, n
+ 1, 8, y_offset
,
812 IS_DIR(sub_mb_type
, 0, 0),
813 IS_DIR(sub_mb_type
, 0, 1),
817 assert(IS_SUB_4X4(sub_mb_type
));
818 for (j
= 0; j
< 4; j
++) {
819 int sub_y_offset
= y_offset
+ 2 * (j
& 2);
820 get_lowest_part_y(h
, refs
, n
+ j
, 4, sub_y_offset
,
821 IS_DIR(sub_mb_type
, 0, 0),
822 IS_DIR(sub_mb_type
, 0, 1),
829 for (list
= h
->list_count
- 1; list
>= 0; list
--)
830 for (ref
= 0; ref
< 48 && nrefs
[list
]; ref
++) {
831 int row
= refs
[list
][ref
];
833 Picture
*ref_pic
= &h
->ref_list
[list
][ref
];
834 int ref_field
= ref_pic
->reference
- 1;
835 int ref_field_picture
= ref_pic
->field_picture
;
836 int pic_height
= 16 * h
->mb_height
>> ref_field_picture
;
841 if (!FIELD_PICTURE(h
) && ref_field_picture
) { // frame referencing two fields
842 ff_thread_await_progress(&ref_pic
->tf
,
843 FFMIN((row
>> 1) - !(row
& 1),
846 ff_thread_await_progress(&ref_pic
->tf
,
847 FFMIN((row
>> 1), pic_height
- 1),
849 } else if (FIELD_PICTURE(h
) && !ref_field_picture
) { // field referencing one field of a frame
850 ff_thread_await_progress(&ref_pic
->tf
,
851 FFMIN(row
* 2 + ref_field
,
854 } else if (FIELD_PICTURE(h
)) {
855 ff_thread_await_progress(&ref_pic
->tf
,
856 FFMIN(row
, pic_height
- 1),
859 ff_thread_await_progress(&ref_pic
->tf
,
860 FFMIN(row
, pic_height
- 1),
867 static av_always_inline
void mc_dir_part(H264Context
*h
, Picture
*pic
,
868 int n
, int square
, int height
,
870 uint8_t *dest_y
, uint8_t *dest_cb
,
872 int src_x_offset
, int src_y_offset
,
873 qpel_mc_func
*qpix_op
,
874 h264_chroma_mc_func chroma_op
,
875 int pixel_shift
, int chroma_idc
)
877 const int mx
= h
->mv_cache
[list
][scan8
[n
]][0] + src_x_offset
* 8;
878 int my
= h
->mv_cache
[list
][scan8
[n
]][1] + src_y_offset
* 8;
879 const int luma_xy
= (mx
& 3) + ((my
& 3) << 2);
880 ptrdiff_t offset
= ((mx
>> 2) << pixel_shift
) + (my
>> 2) * h
->mb_linesize
;
881 uint8_t *src_y
= pic
->f
.data
[0] + offset
;
882 uint8_t *src_cb
, *src_cr
;
884 int extra_height
= 0;
886 const int full_mx
= mx
>> 2;
887 const int full_my
= my
>> 2;
888 const int pic_width
= 16 * h
->mb_width
;
889 const int pic_height
= 16 * h
->mb_height
>> MB_FIELD(h
);
897 if (full_mx
< 0 - extra_width
||
898 full_my
< 0 - extra_height
||
899 full_mx
+ 16 /*FIXME*/ > pic_width
+ extra_width
||
900 full_my
+ 16 /*FIXME*/ > pic_height
+ extra_height
) {
901 h
->vdsp
.emulated_edge_mc(h
->edge_emu_buffer
,
902 src_y
- (2 << pixel_shift
) - 2 * h
->mb_linesize
,
903 h
->mb_linesize
, h
->mb_linesize
,
904 16 + 5, 16 + 5 /*FIXME*/, full_mx
- 2,
905 full_my
- 2, pic_width
, pic_height
);
906 src_y
= h
->edge_emu_buffer
+ (2 << pixel_shift
) + 2 * h
->mb_linesize
;
910 qpix_op
[luma_xy
](dest_y
, src_y
, h
->mb_linesize
); // FIXME try variable height perhaps?
912 qpix_op
[luma_xy
](dest_y
+ delta
, src_y
+ delta
, h
->mb_linesize
);
914 if (CONFIG_GRAY
&& h
->flags
& CODEC_FLAG_GRAY
)
917 if (chroma_idc
== 3 /* yuv444 */) {
918 src_cb
= pic
->f
.data
[1] + offset
;
920 h
->vdsp
.emulated_edge_mc(h
->edge_emu_buffer
,
921 src_cb
- (2 << pixel_shift
) - 2 * h
->mb_linesize
,
922 h
->mb_linesize
, h
->mb_linesize
,
923 16 + 5, 16 + 5 /*FIXME*/,
924 full_mx
- 2, full_my
- 2,
925 pic_width
, pic_height
);
926 src_cb
= h
->edge_emu_buffer
+ (2 << pixel_shift
) + 2 * h
->mb_linesize
;
928 qpix_op
[luma_xy
](dest_cb
, src_cb
, h
->mb_linesize
); // FIXME try variable height perhaps?
930 qpix_op
[luma_xy
](dest_cb
+ delta
, src_cb
+ delta
, h
->mb_linesize
);
932 src_cr
= pic
->f
.data
[2] + offset
;
934 h
->vdsp
.emulated_edge_mc(h
->edge_emu_buffer
,
935 src_cr
- (2 << pixel_shift
) - 2 * h
->mb_linesize
,
936 h
->mb_linesize
, h
->mb_linesize
,
937 16 + 5, 16 + 5 /*FIXME*/,
938 full_mx
- 2, full_my
- 2,
939 pic_width
, pic_height
);
940 src_cr
= h
->edge_emu_buffer
+ (2 << pixel_shift
) + 2 * h
->mb_linesize
;
942 qpix_op
[luma_xy
](dest_cr
, src_cr
, h
->mb_linesize
); // FIXME try variable height perhaps?
944 qpix_op
[luma_xy
](dest_cr
+ delta
, src_cr
+ delta
, h
->mb_linesize
);
948 ysh
= 3 - (chroma_idc
== 2 /* yuv422 */);
949 if (chroma_idc
== 1 /* yuv420 */ && MB_FIELD(h
)) {
950 // chroma offset when predicting from a field of opposite parity
951 my
+= 2 * ((h
->mb_y
& 1) - (pic
->reference
- 1));
952 emu
|= (my
>> 3) < 0 || (my
>> 3) + 8 >= (pic_height
>> 1);
955 src_cb
= pic
->f
.data
[1] + ((mx
>> 3) << pixel_shift
) +
956 (my
>> ysh
) * h
->mb_uvlinesize
;
957 src_cr
= pic
->f
.data
[2] + ((mx
>> 3) << pixel_shift
) +
958 (my
>> ysh
) * h
->mb_uvlinesize
;
961 h
->vdsp
.emulated_edge_mc(h
->edge_emu_buffer
, src_cb
,
962 h
->mb_uvlinesize
, h
->mb_uvlinesize
,
963 9, 8 * chroma_idc
+ 1, (mx
>> 3), (my
>> ysh
),
964 pic_width
>> 1, pic_height
>> (chroma_idc
== 1 /* yuv420 */));
965 src_cb
= h
->edge_emu_buffer
;
967 chroma_op(dest_cb
, src_cb
, h
->mb_uvlinesize
,
968 height
>> (chroma_idc
== 1 /* yuv420 */),
969 mx
& 7, (my
<< (chroma_idc
== 2 /* yuv422 */)) & 7);
972 h
->vdsp
.emulated_edge_mc(h
->edge_emu_buffer
, src_cr
,
973 h
->mb_uvlinesize
, h
->mb_uvlinesize
,
974 9, 8 * chroma_idc
+ 1, (mx
>> 3), (my
>> ysh
),
975 pic_width
>> 1, pic_height
>> (chroma_idc
== 1 /* yuv420 */));
976 src_cr
= h
->edge_emu_buffer
;
978 chroma_op(dest_cr
, src_cr
, h
->mb_uvlinesize
, height
>> (chroma_idc
== 1 /* yuv420 */),
979 mx
& 7, (my
<< (chroma_idc
== 2 /* yuv422 */)) & 7);
982 static av_always_inline
void mc_part_std(H264Context
*h
, int n
, int square
,
983 int height
, int delta
,
984 uint8_t *dest_y
, uint8_t *dest_cb
,
986 int x_offset
, int y_offset
,
987 qpel_mc_func
*qpix_put
,
988 h264_chroma_mc_func chroma_put
,
989 qpel_mc_func
*qpix_avg
,
990 h264_chroma_mc_func chroma_avg
,
991 int list0
, int list1
,
992 int pixel_shift
, int chroma_idc
)
994 qpel_mc_func
*qpix_op
= qpix_put
;
995 h264_chroma_mc_func chroma_op
= chroma_put
;
997 dest_y
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
998 if (chroma_idc
== 3 /* yuv444 */) {
999 dest_cb
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
1000 dest_cr
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
1001 } else if (chroma_idc
== 2 /* yuv422 */) {
1002 dest_cb
+= (x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_uvlinesize
;
1003 dest_cr
+= (x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_uvlinesize
;
1004 } else { /* yuv420 */
1005 dest_cb
+= (x_offset
<< pixel_shift
) + y_offset
* h
->mb_uvlinesize
;
1006 dest_cr
+= (x_offset
<< pixel_shift
) + y_offset
* h
->mb_uvlinesize
;
1008 x_offset
+= 8 * h
->mb_x
;
1009 y_offset
+= 8 * (h
->mb_y
>> MB_FIELD(h
));
1012 Picture
*ref
= &h
->ref_list
[0][h
->ref_cache
[0][scan8
[n
]]];
1013 mc_dir_part(h
, ref
, n
, square
, height
, delta
, 0,
1014 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1015 qpix_op
, chroma_op
, pixel_shift
, chroma_idc
);
1018 chroma_op
= chroma_avg
;
1022 Picture
*ref
= &h
->ref_list
[1][h
->ref_cache
[1][scan8
[n
]]];
1023 mc_dir_part(h
, ref
, n
, square
, height
, delta
, 1,
1024 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1025 qpix_op
, chroma_op
, pixel_shift
, chroma_idc
);
1029 static av_always_inline
void mc_part_weighted(H264Context
*h
, int n
, int square
,
1030 int height
, int delta
,
1031 uint8_t *dest_y
, uint8_t *dest_cb
,
1033 int x_offset
, int y_offset
,
1034 qpel_mc_func
*qpix_put
,
1035 h264_chroma_mc_func chroma_put
,
1036 h264_weight_func luma_weight_op
,
1037 h264_weight_func chroma_weight_op
,
1038 h264_biweight_func luma_weight_avg
,
1039 h264_biweight_func chroma_weight_avg
,
1040 int list0
, int list1
,
1041 int pixel_shift
, int chroma_idc
)
1045 dest_y
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
1046 if (chroma_idc
== 3 /* yuv444 */) {
1047 chroma_height
= height
;
1048 chroma_weight_avg
= luma_weight_avg
;
1049 chroma_weight_op
= luma_weight_op
;
1050 dest_cb
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
1051 dest_cr
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
1052 } else if (chroma_idc
== 2 /* yuv422 */) {
1053 chroma_height
= height
;
1054 dest_cb
+= (x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_uvlinesize
;
1055 dest_cr
+= (x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_uvlinesize
;
1056 } else { /* yuv420 */
1057 chroma_height
= height
>> 1;
1058 dest_cb
+= (x_offset
<< pixel_shift
) + y_offset
* h
->mb_uvlinesize
;
1059 dest_cr
+= (x_offset
<< pixel_shift
) + y_offset
* h
->mb_uvlinesize
;
1061 x_offset
+= 8 * h
->mb_x
;
1062 y_offset
+= 8 * (h
->mb_y
>> MB_FIELD(h
));
1064 if (list0
&& list1
) {
1065 /* don't optimize for luma-only case, since B-frames usually
1066 * use implicit weights => chroma too. */
1067 uint8_t *tmp_cb
= h
->bipred_scratchpad
;
1068 uint8_t *tmp_cr
= h
->bipred_scratchpad
+ (16 << pixel_shift
);
1069 uint8_t *tmp_y
= h
->bipred_scratchpad
+ 16 * h
->mb_uvlinesize
;
1070 int refn0
= h
->ref_cache
[0][scan8
[n
]];
1071 int refn1
= h
->ref_cache
[1][scan8
[n
]];
1073 mc_dir_part(h
, &h
->ref_list
[0][refn0
], n
, square
, height
, delta
, 0,
1074 dest_y
, dest_cb
, dest_cr
,
1075 x_offset
, y_offset
, qpix_put
, chroma_put
,
1076 pixel_shift
, chroma_idc
);
1077 mc_dir_part(h
, &h
->ref_list
[1][refn1
], n
, square
, height
, delta
, 1,
1078 tmp_y
, tmp_cb
, tmp_cr
,
1079 x_offset
, y_offset
, qpix_put
, chroma_put
,
1080 pixel_shift
, chroma_idc
);
1082 if (h
->use_weight
== 2) {
1083 int weight0
= h
->implicit_weight
[refn0
][refn1
][h
->mb_y
& 1];
1084 int weight1
= 64 - weight0
;
1085 luma_weight_avg(dest_y
, tmp_y
, h
->mb_linesize
,
1086 height
, 5, weight0
, weight1
, 0);
1087 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
,
1088 chroma_height
, 5, weight0
, weight1
, 0);
1089 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
,
1090 chroma_height
, 5, weight0
, weight1
, 0);
1092 luma_weight_avg(dest_y
, tmp_y
, h
->mb_linesize
, height
,
1093 h
->luma_log2_weight_denom
,
1094 h
->luma_weight
[refn0
][0][0],
1095 h
->luma_weight
[refn1
][1][0],
1096 h
->luma_weight
[refn0
][0][1] +
1097 h
->luma_weight
[refn1
][1][1]);
1098 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, chroma_height
,
1099 h
->chroma_log2_weight_denom
,
1100 h
->chroma_weight
[refn0
][0][0][0],
1101 h
->chroma_weight
[refn1
][1][0][0],
1102 h
->chroma_weight
[refn0
][0][0][1] +
1103 h
->chroma_weight
[refn1
][1][0][1]);
1104 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, chroma_height
,
1105 h
->chroma_log2_weight_denom
,
1106 h
->chroma_weight
[refn0
][0][1][0],
1107 h
->chroma_weight
[refn1
][1][1][0],
1108 h
->chroma_weight
[refn0
][0][1][1] +
1109 h
->chroma_weight
[refn1
][1][1][1]);
1112 int list
= list1 ?
1 : 0;
1113 int refn
= h
->ref_cache
[list
][scan8
[n
]];
1114 Picture
*ref
= &h
->ref_list
[list
][refn
];
1115 mc_dir_part(h
, ref
, n
, square
, height
, delta
, list
,
1116 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1117 qpix_put
, chroma_put
, pixel_shift
, chroma_idc
);
1119 luma_weight_op(dest_y
, h
->mb_linesize
, height
,
1120 h
->luma_log2_weight_denom
,
1121 h
->luma_weight
[refn
][list
][0],
1122 h
->luma_weight
[refn
][list
][1]);
1123 if (h
->use_weight_chroma
) {
1124 chroma_weight_op(dest_cb
, h
->mb_uvlinesize
, chroma_height
,
1125 h
->chroma_log2_weight_denom
,
1126 h
->chroma_weight
[refn
][list
][0][0],
1127 h
->chroma_weight
[refn
][list
][0][1]);
1128 chroma_weight_op(dest_cr
, h
->mb_uvlinesize
, chroma_height
,
1129 h
->chroma_log2_weight_denom
,
1130 h
->chroma_weight
[refn
][list
][1][0],
1131 h
->chroma_weight
[refn
][list
][1][1]);
1136 static av_always_inline
void prefetch_motion(H264Context
*h
, int list
,
1137 int pixel_shift
, int chroma_idc
)
1139 /* fetch pixels for estimated mv 4 macroblocks ahead
1140 * optimized for 64byte cache lines */
1141 const int refn
= h
->ref_cache
[list
][scan8
[0]];
1143 const int mx
= (h
->mv_cache
[list
][scan8
[0]][0] >> 2) + 16 * h
->mb_x
+ 8;
1144 const int my
= (h
->mv_cache
[list
][scan8
[0]][1] >> 2) + 16 * h
->mb_y
;
1145 uint8_t **src
= h
->ref_list
[list
][refn
].f
.data
;
1146 int off
= (mx
<< pixel_shift
) +
1147 (my
+ (h
->mb_x
& 3) * 4) * h
->mb_linesize
+
1148 (64 << pixel_shift
);
1149 h
->vdsp
.prefetch(src
[0] + off
, h
->linesize
, 4);
1150 if (chroma_idc
== 3 /* yuv444 */) {
1151 h
->vdsp
.prefetch(src
[1] + off
, h
->linesize
, 4);
1152 h
->vdsp
.prefetch(src
[2] + off
, h
->linesize
, 4);
1154 off
= ((mx
>> 1) << pixel_shift
) +
1155 ((my
>> 1) + (h
->mb_x
& 7)) * h
->uvlinesize
+
1156 (64 << pixel_shift
);
1157 h
->vdsp
.prefetch(src
[1] + off
, src
[2] - src
[1], 2);
1162 static void free_tables(H264Context
*h
, int free_rbsp
)
1167 av_freep(&h
->intra4x4_pred_mode
);
1168 av_freep(&h
->chroma_pred_mode_table
);
1169 av_freep(&h
->cbp_table
);
1170 av_freep(&h
->mvd_table
[0]);
1171 av_freep(&h
->mvd_table
[1]);
1172 av_freep(&h
->direct_table
);
1173 av_freep(&h
->non_zero_count
);
1174 av_freep(&h
->slice_table_base
);
1175 h
->slice_table
= NULL
;
1176 av_freep(&h
->list_counts
);
1178 av_freep(&h
->mb2b_xy
);
1179 av_freep(&h
->mb2br_xy
);
1181 av_buffer_pool_uninit(&h
->qscale_table_pool
);
1182 av_buffer_pool_uninit(&h
->mb_type_pool
);
1183 av_buffer_pool_uninit(&h
->motion_val_pool
);
1184 av_buffer_pool_uninit(&h
->ref_index_pool
);
1186 if (free_rbsp
&& h
->DPB
) {
1187 for (i
= 0; i
< MAX_PICTURE_COUNT
; i
++)
1188 unref_picture(h
, &h
->DPB
[i
]);
1190 } else if (h
->DPB
) {
1191 for (i
= 0; i
< MAX_PICTURE_COUNT
; i
++)
1192 h
->DPB
[i
].needs_realloc
= 1;
1195 h
->cur_pic_ptr
= NULL
;
1197 for (i
= 0; i
< MAX_THREADS
; i
++) {
1198 hx
= h
->thread_context
[i
];
1201 av_freep(&hx
->top_borders
[1]);
1202 av_freep(&hx
->top_borders
[0]);
1203 av_freep(&hx
->bipred_scratchpad
);
1204 av_freep(&hx
->edge_emu_buffer
);
1205 av_freep(&hx
->dc_val_base
);
1206 av_freep(&hx
->me
.scratchpad
);
1207 av_freep(&hx
->er
.mb_index2xy
);
1208 av_freep(&hx
->er
.error_status_table
);
1209 av_freep(&hx
->er
.er_temp_buffer
);
1210 av_freep(&hx
->er
.mbintra_table
);
1211 av_freep(&hx
->er
.mbskip_table
);
1214 av_freep(&hx
->rbsp_buffer
[1]);
1215 av_freep(&hx
->rbsp_buffer
[0]);
1216 hx
->rbsp_buffer_size
[0] = 0;
1217 hx
->rbsp_buffer_size
[1] = 0;
1220 av_freep(&h
->thread_context
[i
]);
1224 static void init_dequant8_coeff_table(H264Context
*h
)
1227 const int max_qp
= 51 + 6 * (h
->sps
.bit_depth_luma
- 8);
1229 for (i
= 0; i
< 6; i
++) {
1230 h
->dequant8_coeff
[i
] = h
->dequant8_buffer
[i
];
1231 for (j
= 0; j
< i
; j
++)
1232 if (!memcmp(h
->pps
.scaling_matrix8
[j
], h
->pps
.scaling_matrix8
[i
],
1233 64 * sizeof(uint8_t))) {
1234 h
->dequant8_coeff
[i
] = h
->dequant8_buffer
[j
];
1240 for (q
= 0; q
< max_qp
+ 1; q
++) {
1241 int shift
= div6
[q
];
1243 for (x
= 0; x
< 64; x
++)
1244 h
->dequant8_coeff
[i
][q
][(x
>> 3) | ((x
& 7) << 3)] =
1245 ((uint32_t)dequant8_coeff_init
[idx
][dequant8_coeff_init_scan
[((x
>> 1) & 12) | (x
& 3)]] *
1246 h
->pps
.scaling_matrix8
[i
][x
]) << shift
;
1251 static void init_dequant4_coeff_table(H264Context
*h
)
1254 const int max_qp
= 51 + 6 * (h
->sps
.bit_depth_luma
- 8);
1255 for (i
= 0; i
< 6; i
++) {
1256 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[i
];
1257 for (j
= 0; j
< i
; j
++)
1258 if (!memcmp(h
->pps
.scaling_matrix4
[j
], h
->pps
.scaling_matrix4
[i
],
1259 16 * sizeof(uint8_t))) {
1260 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[j
];
1266 for (q
= 0; q
< max_qp
+ 1; q
++) {
1267 int shift
= div6
[q
] + 2;
1269 for (x
= 0; x
< 16; x
++)
1270 h
->dequant4_coeff
[i
][q
][(x
>> 2) | ((x
<< 2) & 0xF)] =
1271 ((uint32_t)dequant4_coeff_init
[idx
][(x
& 1) + ((x
>> 2) & 1)] *
1272 h
->pps
.scaling_matrix4
[i
][x
]) << shift
;
1277 static void init_dequant_tables(H264Context
*h
)
1280 init_dequant4_coeff_table(h
);
1281 if (h
->pps
.transform_8x8_mode
)
1282 init_dequant8_coeff_table(h
);
1283 if (h
->sps
.transform_bypass
) {
1284 for (i
= 0; i
< 6; i
++)
1285 for (x
= 0; x
< 16; x
++)
1286 h
->dequant4_coeff
[i
][0][x
] = 1 << 6;
1287 if (h
->pps
.transform_8x8_mode
)
1288 for (i
= 0; i
< 6; i
++)
1289 for (x
= 0; x
< 64; x
++)
1290 h
->dequant8_coeff
[i
][0][x
] = 1 << 6;
1294 int ff_h264_alloc_tables(H264Context
*h
)
1296 const int big_mb_num
= h
->mb_stride
* (h
->mb_height
+ 1);
1297 const int row_mb_num
= h
->mb_stride
* 2 * h
->avctx
->thread_count
;
1300 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->intra4x4_pred_mode
,
1301 row_mb_num
* 8 * sizeof(uint8_t), fail
)
1302 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->non_zero_count
,
1303 big_mb_num
* 48 * sizeof(uint8_t), fail
)
1304 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->slice_table_base
,
1305 (big_mb_num
+ h
->mb_stride
) * sizeof(*h
->slice_table_base
), fail
)
1306 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->cbp_table
,
1307 big_mb_num
* sizeof(uint16_t), fail
)
1308 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->chroma_pred_mode_table
,
1309 big_mb_num
* sizeof(uint8_t), fail
)
1310 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->mvd_table
[0],
1311 16 * row_mb_num
* sizeof(uint8_t), fail
);
1312 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->mvd_table
[1],
1313 16 * row_mb_num
* sizeof(uint8_t), fail
);
1314 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->direct_table
,
1315 4 * big_mb_num
* sizeof(uint8_t), fail
);
1316 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->list_counts
,
1317 big_mb_num
* sizeof(uint8_t), fail
)
1319 memset(h
->slice_table_base
, -1,
1320 (big_mb_num
+ h
->mb_stride
) * sizeof(*h
->slice_table_base
));
1321 h
->slice_table
= h
->slice_table_base
+ h
->mb_stride
* 2 + 1;
1323 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->mb2b_xy
,
1324 big_mb_num
* sizeof(uint32_t), fail
);
1325 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->mb2br_xy
,
1326 big_mb_num
* sizeof(uint32_t), fail
);
1327 for (y
= 0; y
< h
->mb_height
; y
++)
1328 for (x
= 0; x
< h
->mb_width
; x
++) {
1329 const int mb_xy
= x
+ y
* h
->mb_stride
;
1330 const int b_xy
= 4 * x
+ 4 * y
* h
->b_stride
;
1332 h
->mb2b_xy
[mb_xy
] = b_xy
;
1333 h
->mb2br_xy
[mb_xy
] = 8 * (FMO ? mb_xy
: (mb_xy
% (2 * h
->mb_stride
)));
1336 if (!h
->dequant4_coeff
[0])
1337 init_dequant_tables(h
);
1340 h
->DPB
= av_mallocz_array(MAX_PICTURE_COUNT
, sizeof(*h
->DPB
));
1342 return AVERROR(ENOMEM
);
1343 for (i
= 0; i
< MAX_PICTURE_COUNT
; i
++)
1344 av_frame_unref(&h
->DPB
[i
].f
);
1345 av_frame_unref(&h
->cur_pic
.f
);
1352 return AVERROR(ENOMEM
);
1356 * Mimic alloc_tables(), but for every context thread.
1358 static void clone_tables(H264Context
*dst
, H264Context
*src
, int i
)
1360 dst
->intra4x4_pred_mode
= src
->intra4x4_pred_mode
+ i
* 8 * 2 * src
->mb_stride
;
1361 dst
->non_zero_count
= src
->non_zero_count
;
1362 dst
->slice_table
= src
->slice_table
;
1363 dst
->cbp_table
= src
->cbp_table
;
1364 dst
->mb2b_xy
= src
->mb2b_xy
;
1365 dst
->mb2br_xy
= src
->mb2br_xy
;
1366 dst
->chroma_pred_mode_table
= src
->chroma_pred_mode_table
;
1367 dst
->mvd_table
[0] = src
->mvd_table
[0] + i
* 8 * 2 * src
->mb_stride
;
1368 dst
->mvd_table
[1] = src
->mvd_table
[1] + i
* 8 * 2 * src
->mb_stride
;
1369 dst
->direct_table
= src
->direct_table
;
1370 dst
->list_counts
= src
->list_counts
;
1371 dst
->DPB
= src
->DPB
;
1372 dst
->cur_pic_ptr
= src
->cur_pic_ptr
;
1373 dst
->cur_pic
= src
->cur_pic
;
1374 dst
->bipred_scratchpad
= NULL
;
1375 dst
->edge_emu_buffer
= NULL
;
1376 dst
->me
.scratchpad
= NULL
;
1377 ff_h264_pred_init(&dst
->hpc
, src
->avctx
->codec_id
, src
->sps
.bit_depth_luma
,
1378 src
->sps
.chroma_format_idc
);
1383 * Allocate buffers which are not shared amongst multiple threads.
1385 static int context_init(H264Context
*h
)
1387 ERContext
*er
= &h
->er
;
1388 int mb_array_size
= h
->mb_height
* h
->mb_stride
;
1389 int y_size
= (2 * h
->mb_width
+ 1) * (2 * h
->mb_height
+ 1);
1390 int c_size
= h
->mb_stride
* (h
->mb_height
+ 1);
1391 int yc_size
= y_size
+ 2 * c_size
;
1394 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->top_borders
[0],
1395 h
->mb_width
* 16 * 3 * sizeof(uint8_t) * 2, fail
)
1396 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->top_borders
[1],
1397 h
->mb_width
* 16 * 3 * sizeof(uint8_t) * 2, fail
)
1399 h
->ref_cache
[0][scan8
[5] + 1] =
1400 h
->ref_cache
[0][scan8
[7] + 1] =
1401 h
->ref_cache
[0][scan8
[13] + 1] =
1402 h
->ref_cache
[1][scan8
[5] + 1] =
1403 h
->ref_cache
[1][scan8
[7] + 1] =
1404 h
->ref_cache
[1][scan8
[13] + 1] = PART_NOT_AVAILABLE
;
1406 if (CONFIG_ERROR_RESILIENCE
) {
1408 er
->avctx
= h
->avctx
;
1410 er
->decode_mb
= h264_er_decode_mb
;
1412 er
->quarter_sample
= 1;
1414 er
->mb_num
= h
->mb_num
;
1415 er
->mb_width
= h
->mb_width
;
1416 er
->mb_height
= h
->mb_height
;
1417 er
->mb_stride
= h
->mb_stride
;
1418 er
->b8_stride
= h
->mb_width
* 2 + 1;
1420 FF_ALLOCZ_OR_GOTO(h
->avctx
, er
->mb_index2xy
, (h
->mb_num
+ 1) * sizeof(int),
1421 fail
); // error ressilience code looks cleaner with this
1422 for (y
= 0; y
< h
->mb_height
; y
++)
1423 for (x
= 0; x
< h
->mb_width
; x
++)
1424 er
->mb_index2xy
[x
+ y
* h
->mb_width
] = x
+ y
* h
->mb_stride
;
1426 er
->mb_index2xy
[h
->mb_height
* h
->mb_width
] = (h
->mb_height
- 1) *
1427 h
->mb_stride
+ h
->mb_width
;
1429 FF_ALLOCZ_OR_GOTO(h
->avctx
, er
->error_status_table
,
1430 mb_array_size
* sizeof(uint8_t), fail
);
1432 FF_ALLOC_OR_GOTO(h
->avctx
, er
->mbintra_table
, mb_array_size
, fail
);
1433 memset(er
->mbintra_table
, 1, mb_array_size
);
1435 FF_ALLOCZ_OR_GOTO(h
->avctx
, er
->mbskip_table
, mb_array_size
+ 2, fail
);
1437 FF_ALLOC_OR_GOTO(h
->avctx
, er
->er_temp_buffer
, h
->mb_height
* h
->mb_stride
,
1440 FF_ALLOCZ_OR_GOTO(h
->avctx
, h
->dc_val_base
, yc_size
* sizeof(int16_t), fail
);
1441 er
->dc_val
[0] = h
->dc_val_base
+ h
->mb_width
* 2 + 2;
1442 er
->dc_val
[1] = h
->dc_val_base
+ y_size
+ h
->mb_stride
+ 1;
1443 er
->dc_val
[2] = er
->dc_val
[1] + c_size
;
1444 for (i
= 0; i
< yc_size
; i
++)
1445 h
->dc_val_base
[i
] = 1024;
1451 return AVERROR(ENOMEM
); // free_tables will clean up for us
1454 static int decode_nal_units(H264Context
*h
, const uint8_t *buf
, int buf_size
,
1455 int parse_extradata
);
1457 int ff_h264_decode_extradata(H264Context
*h
)
1459 AVCodecContext
*avctx
= h
->avctx
;
1462 if (avctx
->extradata
[0] == 1) {
1463 int i
, cnt
, nalsize
;
1464 unsigned char *p
= avctx
->extradata
;
1468 if (avctx
->extradata_size
< 7) {
1469 av_log(avctx
, AV_LOG_ERROR
, "avcC too short\n");
1470 return AVERROR_INVALIDDATA
;
1472 /* sps and pps in the avcC always have length coded with 2 bytes,
1473 * so put a fake nal_length_size = 2 while parsing them */
1474 h
->nal_length_size
= 2;
1475 // Decode sps from avcC
1476 cnt
= *(p
+ 5) & 0x1f; // Number of sps
1478 for (i
= 0; i
< cnt
; i
++) {
1479 nalsize
= AV_RB16(p
) + 2;
1480 if (p
- avctx
->extradata
+ nalsize
> avctx
->extradata_size
)
1481 return AVERROR_INVALIDDATA
;
1482 ret
= decode_nal_units(h
, p
, nalsize
, 1);
1484 av_log(avctx
, AV_LOG_ERROR
,
1485 "Decoding sps %d from avcC failed\n", i
);
1490 // Decode pps from avcC
1491 cnt
= *(p
++); // Number of pps
1492 for (i
= 0; i
< cnt
; i
++) {
1493 nalsize
= AV_RB16(p
) + 2;
1494 if (p
- avctx
->extradata
+ nalsize
> avctx
->extradata_size
)
1495 return AVERROR_INVALIDDATA
;
1496 ret
= decode_nal_units(h
, p
, nalsize
, 1);
1498 av_log(avctx
, AV_LOG_ERROR
,
1499 "Decoding pps %d from avcC failed\n", i
);
1504 // Now store right nal length size, that will be used to parse all other nals
1505 h
->nal_length_size
= (avctx
->extradata
[4] & 0x03) + 1;
1508 ret
= decode_nal_units(h
, avctx
->extradata
, avctx
->extradata_size
, 1);
1515 av_cold
int ff_h264_decode_init(AVCodecContext
*avctx
)
1517 H264Context
*h
= avctx
->priv_data
;
1523 h
->bit_depth_luma
= 8;
1524 h
->chroma_format_idc
= 1;
1526 ff_h264dsp_init(&h
->h264dsp
, 8, 1);
1527 ff_h264chroma_init(&h
->h264chroma
, h
->sps
.bit_depth_chroma
);
1528 ff_h264qpel_init(&h
->h264qpel
, 8);
1529 ff_h264_pred_init(&h
->hpc
, h
->avctx
->codec_id
, 8, 1);
1531 h
->dequant_coeff_pps
= -1;
1533 /* needed so that IDCT permutation is known early */
1534 if (CONFIG_ERROR_RESILIENCE
)
1535 ff_dsputil_init(&h
->dsp
, h
->avctx
);
1536 ff_videodsp_init(&h
->vdsp
, 8);
1538 memset(h
->pps
.scaling_matrix4
, 16, 6 * 16 * sizeof(uint8_t));
1539 memset(h
->pps
.scaling_matrix8
, 16, 2 * 64 * sizeof(uint8_t));
1541 h
->picture_structure
= PICT_FRAME
;
1542 h
->slice_context_count
= 1;
1543 h
->workaround_bugs
= avctx
->workaround_bugs
;
1544 h
->flags
= avctx
->flags
;
1547 // s->decode_mb = ff_h263_decode_mb;
1548 if (!avctx
->has_b_frames
)
1551 avctx
->chroma_sample_location
= AVCHROMA_LOC_LEFT
;
1553 ff_h264_decode_init_vlc();
1555 ff_init_cabac_states();
1558 h
->sps
.bit_depth_luma
= avctx
->bits_per_raw_sample
= 8;
1560 h
->thread_context
[0] = h
;
1561 h
->outputed_poc
= h
->next_outputed_poc
= INT_MIN
;
1562 for (i
= 0; i
< MAX_DELAYED_PIC_COUNT
; i
++)
1563 h
->last_pocs
[i
] = INT_MIN
;
1564 h
->prev_poc_msb
= 1 << 16;
1566 ff_h264_reset_sei(h
);
1567 h
->recovery_frame
= -1;
1568 h
->frame_recovered
= 0;
1569 if (avctx
->codec_id
== AV_CODEC_ID_H264
) {
1570 if (avctx
->ticks_per_frame
== 1)
1571 h
->avctx
->time_base
.den
*= 2;
1572 avctx
->ticks_per_frame
= 2;
1575 if (avctx
->extradata_size
> 0 && avctx
->extradata
) {
1576 ret
= ff_h264_decode_extradata(h
);
1581 if (h
->sps
.bitstream_restriction_flag
&&
1582 h
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
) {
1583 h
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
1587 avctx
->internal
->allocate_progress
= 1;
1592 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size))))
1593 #undef REBASE_PICTURE
1594 #define REBASE_PICTURE(pic, new_ctx, old_ctx) \
1595 ((pic && pic >= old_ctx->DPB && \
1596 pic < old_ctx->DPB + MAX_PICTURE_COUNT) ? \
1597 &new_ctx->DPB[pic - old_ctx->DPB] : NULL)
1599 static void copy_picture_range(Picture
**to
, Picture
**from
, int count
,
1600 H264Context
*new_base
,
1601 H264Context
*old_base
)
1605 for (i
= 0; i
< count
; i
++) {
1606 assert((IN_RANGE(from
[i
], old_base
, sizeof(*old_base
)) ||
1607 IN_RANGE(from
[i
], old_base
->DPB
,
1608 sizeof(Picture
) * MAX_PICTURE_COUNT
) ||
1610 to
[i
] = REBASE_PICTURE(from
[i
], new_base
, old_base
);
1614 static int copy_parameter_set(void **to
, void **from
, int count
, int size
)
1618 for (i
= 0; i
< count
; i
++) {
1619 if (to
[i
] && !from
[i
]) {
1621 } else if (from
[i
] && !to
[i
]) {
1622 to
[i
] = av_malloc(size
);
1624 return AVERROR(ENOMEM
);
1628 memcpy(to
[i
], from
[i
], size
);
1634 static int decode_init_thread_copy(AVCodecContext
*avctx
)
1636 H264Context
*h
= avctx
->priv_data
;
1638 if (!avctx
->internal
->is_copy
)
1640 memset(h
->sps_buffers
, 0, sizeof(h
->sps_buffers
));
1641 memset(h
->pps_buffers
, 0, sizeof(h
->pps_buffers
));
1643 h
->context_initialized
= 0;
1648 #define copy_fields(to, from, start_field, end_field) \
1649 memcpy(&to->start_field, &from->start_field, \
1650 (char *)&to->end_field - (char *)&to->start_field)
1652 static int h264_slice_header_init(H264Context
*, int);
1654 static int h264_set_parameter_from_sps(H264Context
*h
);
1656 static int decode_update_thread_context(AVCodecContext
*dst
,
1657 const AVCodecContext
*src
)
1659 H264Context
*h
= dst
->priv_data
, *h1
= src
->priv_data
;
1660 int inited
= h
->context_initialized
, err
= 0;
1661 int context_reinitialized
= 0;
1664 if (dst
== src
|| !h1
->context_initialized
)
1668 (h
->width
!= h1
->width
||
1669 h
->height
!= h1
->height
||
1670 h
->mb_width
!= h1
->mb_width
||
1671 h
->mb_height
!= h1
->mb_height
||
1672 h
->sps
.bit_depth_luma
!= h1
->sps
.bit_depth_luma
||
1673 h
->sps
.chroma_format_idc
!= h1
->sps
.chroma_format_idc
||
1674 h
->sps
.colorspace
!= h1
->sps
.colorspace
)) {
1676 /* set bits_per_raw_sample to the previous value. the check for changed
1677 * bit depth in h264_set_parameter_from_sps() uses it and sets it to
1678 * the current value */
1679 h
->avctx
->bits_per_raw_sample
= h
->sps
.bit_depth_luma
;
1681 av_freep(&h
->bipred_scratchpad
);
1683 h
->width
= h1
->width
;
1684 h
->height
= h1
->height
;
1685 h
->mb_height
= h1
->mb_height
;
1686 h
->mb_width
= h1
->mb_width
;
1687 h
->mb_num
= h1
->mb_num
;
1688 h
->mb_stride
= h1
->mb_stride
;
1689 h
->b_stride
= h1
->b_stride
;
1691 if ((err
= h264_slice_header_init(h
, 1)) < 0) {
1692 av_log(h
->avctx
, AV_LOG_ERROR
, "h264_slice_header_init() failed");
1695 context_reinitialized
= 1;
1697 /* update linesize on resize. The decoder doesn't
1698 * necessarily call h264_frame_start in the new thread */
1699 h
->linesize
= h1
->linesize
;
1700 h
->uvlinesize
= h1
->uvlinesize
;
1702 /* copy block_offset since frame_start may not be called */
1703 memcpy(h
->block_offset
, h1
->block_offset
, sizeof(h
->block_offset
));
1707 for (i
= 0; i
< MAX_SPS_COUNT
; i
++)
1708 av_freep(h
->sps_buffers
+ i
);
1710 for (i
= 0; i
< MAX_PPS_COUNT
; i
++)
1711 av_freep(h
->pps_buffers
+ i
);
1713 memcpy(h
, h1
, sizeof(*h1
));
1714 memset(h
->sps_buffers
, 0, sizeof(h
->sps_buffers
));
1715 memset(h
->pps_buffers
, 0, sizeof(h
->pps_buffers
));
1716 memset(&h
->er
, 0, sizeof(h
->er
));
1717 memset(&h
->me
, 0, sizeof(h
->me
));
1718 memset(&h
->mb
, 0, sizeof(h
->mb
));
1719 memset(&h
->mb_luma_dc
, 0, sizeof(h
->mb_luma_dc
));
1720 memset(&h
->mb_padding
, 0, sizeof(h
->mb_padding
));
1721 h
->context_initialized
= 0;
1723 memset(&h
->cur_pic
, 0, sizeof(h
->cur_pic
));
1724 av_frame_unref(&h
->cur_pic
.f
);
1725 h
->cur_pic
.tf
.f
= &h
->cur_pic
.f
;
1729 h
->qscale_table_pool
= NULL
;
1730 h
->mb_type_pool
= NULL
;
1731 h
->ref_index_pool
= NULL
;
1732 h
->motion_val_pool
= NULL
;
1734 ret
= ff_h264_alloc_tables(h
);
1736 av_log(dst
, AV_LOG_ERROR
, "Could not allocate memory for h264\n");
1739 ret
= context_init(h
);
1741 av_log(dst
, AV_LOG_ERROR
, "context_init() failed.\n");
1745 for (i
= 0; i
< 2; i
++) {
1746 h
->rbsp_buffer
[i
] = NULL
;
1747 h
->rbsp_buffer_size
[i
] = 0;
1749 h
->bipred_scratchpad
= NULL
;
1750 h
->edge_emu_buffer
= NULL
;
1752 h
->thread_context
[0] = h
;
1754 h
->context_initialized
= 1;
1757 h
->avctx
->coded_height
= h1
->avctx
->coded_height
;
1758 h
->avctx
->coded_width
= h1
->avctx
->coded_width
;
1759 h
->avctx
->width
= h1
->avctx
->width
;
1760 h
->avctx
->height
= h1
->avctx
->height
;
1761 h
->coded_picture_number
= h1
->coded_picture_number
;
1762 h
->first_field
= h1
->first_field
;
1763 h
->picture_structure
= h1
->picture_structure
;
1764 h
->qscale
= h1
->qscale
;
1765 h
->droppable
= h1
->droppable
;
1766 h
->data_partitioning
= h1
->data_partitioning
;
1767 h
->low_delay
= h1
->low_delay
;
1769 for (i
= 0; i
< MAX_PICTURE_COUNT
; i
++) {
1770 unref_picture(h
, &h
->DPB
[i
]);
1771 if (h1
->DPB
[i
].f
.buf
[0] &&
1772 (ret
= ref_picture(h
, &h
->DPB
[i
], &h1
->DPB
[i
])) < 0)
1776 h
->cur_pic_ptr
= REBASE_PICTURE(h1
->cur_pic_ptr
, h
, h1
);
1777 unref_picture(h
, &h
->cur_pic
);
1778 if ((ret
= ref_picture(h
, &h
->cur_pic
, &h1
->cur_pic
)) < 0)
1781 h
->workaround_bugs
= h1
->workaround_bugs
;
1782 h
->low_delay
= h1
->low_delay
;
1783 h
->droppable
= h1
->droppable
;
1785 /* frame_start may not be called for the next thread (if it's decoding
1786 * a bottom field) so this has to be allocated here */
1787 err
= alloc_scratch_buffers(h
, h1
->linesize
);
1791 // extradata/NAL handling
1792 h
->is_avc
= h1
->is_avc
;
1795 if ((ret
= copy_parameter_set((void **)h
->sps_buffers
,
1796 (void **)h1
->sps_buffers
,
1797 MAX_SPS_COUNT
, sizeof(SPS
))) < 0)
1800 if ((ret
= copy_parameter_set((void **)h
->pps_buffers
,
1801 (void **)h1
->pps_buffers
,
1802 MAX_PPS_COUNT
, sizeof(PPS
))) < 0)
1806 // Dequantization matrices
1807 // FIXME these are big - can they be only copied when PPS changes?
1808 copy_fields(h
, h1
, dequant4_buffer
, dequant4_coeff
);
1810 for (i
= 0; i
< 6; i
++)
1811 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[0] +
1812 (h1
->dequant4_coeff
[i
] - h1
->dequant4_buffer
[0]);
1814 for (i
= 0; i
< 6; i
++)
1815 h
->dequant8_coeff
[i
] = h
->dequant8_buffer
[0] +
1816 (h1
->dequant8_coeff
[i
] - h1
->dequant8_buffer
[0]);
1818 h
->dequant_coeff_pps
= h1
->dequant_coeff_pps
;
1821 copy_fields(h
, h1
, poc_lsb
, redundant_pic_count
);
1824 copy_fields(h
, h1
, short_ref
, cabac_init_idc
);
1826 copy_picture_range(h
->short_ref
, h1
->short_ref
, 32, h
, h1
);
1827 copy_picture_range(h
->long_ref
, h1
->long_ref
, 32, h
, h1
);
1828 copy_picture_range(h
->delayed_pic
, h1
->delayed_pic
,
1829 MAX_DELAYED_PIC_COUNT
+ 2, h
, h1
);
1831 h
->last_slice_type
= h1
->last_slice_type
;
1833 if (context_reinitialized
)
1834 h264_set_parameter_from_sps(h
);
1836 if (!h
->cur_pic_ptr
)
1839 if (!h
->droppable
) {
1840 err
= ff_h264_execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
1841 h
->prev_poc_msb
= h
->poc_msb
;
1842 h
->prev_poc_lsb
= h
->poc_lsb
;
1844 h
->prev_frame_num_offset
= h
->frame_num_offset
;
1845 h
->prev_frame_num
= h
->frame_num
;
1846 h
->outputed_poc
= h
->next_outputed_poc
;
1848 h
->recovery_frame
= h1
->recovery_frame
;
1849 h
->frame_recovered
= h1
->frame_recovered
;
1854 static int h264_frame_start(H264Context
*h
)
1858 const int pixel_shift
= h
->pixel_shift
;
1860 release_unused_pictures(h
, 1);
1861 h
->cur_pic_ptr
= NULL
;
1863 i
= find_unused_picture(h
);
1865 av_log(h
->avctx
, AV_LOG_ERROR
, "no frame buffer available\n");
1870 pic
->reference
= h
->droppable ?
0 : h
->picture_structure
;
1871 pic
->f
.coded_picture_number
= h
->coded_picture_number
++;
1872 pic
->field_picture
= h
->picture_structure
!= PICT_FRAME
;
1874 * Zero key_frame here; IDR markings per slice in frame or fields are ORed
1876 * See decode_nal_units().
1878 pic
->f
.key_frame
= 0;
1879 pic
->mmco_reset
= 0;
1882 if ((ret
= alloc_picture(h
, pic
)) < 0)
1885 h
->cur_pic_ptr
= pic
;
1886 unref_picture(h
, &h
->cur_pic
);
1887 if ((ret
= ref_picture(h
, &h
->cur_pic
, h
->cur_pic_ptr
)) < 0)
1890 if (CONFIG_ERROR_RESILIENCE
)
1891 ff_er_frame_start(&h
->er
);
1893 assert(h
->linesize
&& h
->uvlinesize
);
1895 for (i
= 0; i
< 16; i
++) {
1896 h
->block_offset
[i
] = (4 * ((scan8
[i
] - scan8
[0]) & 7) << pixel_shift
) + 4 * h
->linesize
* ((scan8
[i
] - scan8
[0]) >> 3);
1897 h
->block_offset
[48 + i
] = (4 * ((scan8
[i
] - scan8
[0]) & 7) << pixel_shift
) + 8 * h
->linesize
* ((scan8
[i
] - scan8
[0]) >> 3);
1899 for (i
= 0; i
< 16; i
++) {
1900 h
->block_offset
[16 + i
] =
1901 h
->block_offset
[32 + i
] = (4 * ((scan8
[i
] - scan8
[0]) & 7) << pixel_shift
) + 4 * h
->uvlinesize
* ((scan8
[i
] - scan8
[0]) >> 3);
1902 h
->block_offset
[48 + 16 + i
] =
1903 h
->block_offset
[48 + 32 + i
] = (4 * ((scan8
[i
] - scan8
[0]) & 7) << pixel_shift
) + 8 * h
->uvlinesize
* ((scan8
[i
] - scan8
[0]) >> 3);
1906 /* can't be in alloc_tables because linesize isn't known there.
1907 * FIXME: redo bipred weight to not require extra buffer? */
1908 for (i
= 0; i
< h
->slice_context_count
; i
++)
1909 if (h
->thread_context
[i
]) {
1910 ret
= alloc_scratch_buffers(h
->thread_context
[i
], h
->linesize
);
1915 /* Some macroblocks can be accessed before they're available in case
1916 * of lost slices, MBAFF or threading. */
1917 memset(h
->slice_table
, -1,
1918 (h
->mb_height
* h
->mb_stride
- 1) * sizeof(*h
->slice_table
));
1920 // s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding ||
1921 // s->current_picture.f.reference /* || h->contains_intra */ || 1;
1923 /* We mark the current picture as non-reference after allocating it, so
1924 * that if we break out due to an error it can be released automatically
1925 * in the next ff_MPV_frame_start().
1927 h
->cur_pic_ptr
->reference
= 0;
1929 h
->cur_pic_ptr
->field_poc
[0] = h
->cur_pic_ptr
->field_poc
[1] = INT_MAX
;
1931 h
->next_output_pic
= NULL
;
1933 assert(h
->cur_pic_ptr
->long_ref
== 0);
1939 * Run setup operations that must be run after slice header decoding.
1940 * This includes finding the next displayed frame.
1942 * @param h h264 master context
1943 * @param setup_finished enough NALs have been read that we can call
1944 * ff_thread_finish_setup()
1946 static void decode_postinit(H264Context
*h
, int setup_finished
)
1948 Picture
*out
= h
->cur_pic_ptr
;
1949 Picture
*cur
= h
->cur_pic_ptr
;
1950 int i
, pics
, out_of_order
, out_idx
;
1951 int invalid
= 0, cnt
= 0;
1953 h
->cur_pic_ptr
->f
.pict_type
= h
->pict_type
;
1955 if (h
->next_output_pic
)
1958 if (cur
->field_poc
[0] == INT_MAX
|| cur
->field_poc
[1] == INT_MAX
) {
1959 /* FIXME: if we have two PAFF fields in one packet, we can't start
1960 * the next thread here. If we have one field per packet, we can.
1961 * The check in decode_nal_units() is not good enough to find this
1962 * yet, so we assume the worst for now. */
1963 // if (setup_finished)
1964 // ff_thread_finish_setup(h->avctx);
1968 cur
->f
.interlaced_frame
= 0;
1969 cur
->f
.repeat_pict
= 0;
1971 /* Signal interlacing information externally. */
1972 /* Prioritize picture timing SEI information over used
1973 * decoding process if it exists. */
1975 if (h
->sps
.pic_struct_present_flag
) {
1976 switch (h
->sei_pic_struct
) {
1977 case SEI_PIC_STRUCT_FRAME
:
1979 case SEI_PIC_STRUCT_TOP_FIELD
:
1980 case SEI_PIC_STRUCT_BOTTOM_FIELD
:
1981 cur
->f
.interlaced_frame
= 1;
1983 case SEI_PIC_STRUCT_TOP_BOTTOM
:
1984 case SEI_PIC_STRUCT_BOTTOM_TOP
:
1985 if (FIELD_OR_MBAFF_PICTURE(h
))
1986 cur
->f
.interlaced_frame
= 1;
1988 // try to flag soft telecine progressive
1989 cur
->f
.interlaced_frame
= h
->prev_interlaced_frame
;
1991 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP
:
1992 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM
:
1993 /* Signal the possibility of telecined film externally
1994 * (pic_struct 5,6). From these hints, let the applications
1995 * decide if they apply deinterlacing. */
1996 cur
->f
.repeat_pict
= 1;
1998 case SEI_PIC_STRUCT_FRAME_DOUBLING
:
1999 cur
->f
.repeat_pict
= 2;
2001 case SEI_PIC_STRUCT_FRAME_TRIPLING
:
2002 cur
->f
.repeat_pict
= 4;
2006 if ((h
->sei_ct_type
& 3) &&
2007 h
->sei_pic_struct
<= SEI_PIC_STRUCT_BOTTOM_TOP
)
2008 cur
->f
.interlaced_frame
= (h
->sei_ct_type
& (1 << 1)) != 0;
2010 /* Derive interlacing flag from used decoding process. */
2011 cur
->f
.interlaced_frame
= FIELD_OR_MBAFF_PICTURE(h
);
2013 h
->prev_interlaced_frame
= cur
->f
.interlaced_frame
;
2015 if (cur
->field_poc
[0] != cur
->field_poc
[1]) {
2016 /* Derive top_field_first from field pocs. */
2017 cur
->f
.top_field_first
= cur
->field_poc
[0] < cur
->field_poc
[1];
2019 if (cur
->f
.interlaced_frame
|| h
->sps
.pic_struct_present_flag
) {
2020 /* Use picture timing SEI information. Even if it is a
2021 * information of a past frame, better than nothing. */
2022 if (h
->sei_pic_struct
== SEI_PIC_STRUCT_TOP_BOTTOM
||
2023 h
->sei_pic_struct
== SEI_PIC_STRUCT_TOP_BOTTOM_TOP
)
2024 cur
->f
.top_field_first
= 1;
2026 cur
->f
.top_field_first
= 0;
2028 /* Most likely progressive */
2029 cur
->f
.top_field_first
= 0;
2033 if (h
->sei_frame_packing_present
&&
2034 h
->frame_packing_arrangement_type
>= 0 &&
2035 h
->frame_packing_arrangement_type
<= 6 &&
2036 h
->content_interpretation_type
> 0 &&
2037 h
->content_interpretation_type
< 3) {
2038 AVStereo3D
*stereo
= av_stereo3d_create_side_data(&cur
->f
);
2042 switch (h
->frame_packing_arrangement_type
) {
2044 stereo
->type
= AV_STEREO3D_CHECKERBOARD
;
2047 stereo
->type
= AV_STEREO3D_LINES
;
2050 stereo
->type
= AV_STEREO3D_COLUMNS
;
2053 if (h
->quincunx_subsampling
)
2054 stereo
->type
= AV_STEREO3D_SIDEBYSIDE_QUINCUNX
;
2056 stereo
->type
= AV_STEREO3D_SIDEBYSIDE
;
2059 stereo
->type
= AV_STEREO3D_TOPBOTTOM
;
2062 stereo
->type
= AV_STEREO3D_FRAMESEQUENCE
;
2065 stereo
->type
= AV_STEREO3D_2D
;
2069 if (h
->content_interpretation_type
== 2)
2070 stereo
->flags
= AV_STEREO3D_FLAG_INVERT
;
2073 // FIXME do something with unavailable reference frames
2075 /* Sort B-frames into display order */
2077 if (h
->sps
.bitstream_restriction_flag
&&
2078 h
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
) {
2079 h
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
2083 if (h
->avctx
->strict_std_compliance
>= FF_COMPLIANCE_STRICT
&&
2084 !h
->sps
.bitstream_restriction_flag
) {
2085 h
->avctx
->has_b_frames
= MAX_DELAYED_PIC_COUNT
- 1;
2090 while (h
->delayed_pic
[pics
])
2093 assert(pics
<= MAX_DELAYED_PIC_COUNT
);
2095 h
->delayed_pic
[pics
++] = cur
;
2096 if (cur
->reference
== 0)
2097 cur
->reference
= DELAYED_PIC_REF
;
2099 /* Frame reordering. This code takes pictures from coding order and sorts
2100 * them by their incremental POC value into display order. It supports POC
2101 * gaps, MMCO reset codes and random resets.
2102 * A "display group" can start either with a IDR frame (f.key_frame = 1),
2103 * and/or can be closed down with a MMCO reset code. In sequences where
2104 * there is no delay, we can't detect that (since the frame was already
2105 * output to the user), so we also set h->mmco_reset to detect the MMCO
2107 * FIXME: if we detect insufficient delays (as per h->avctx->has_b_frames),
2108 * we increase the delay between input and output. All frames affected by
2109 * the lag (e.g. those that should have been output before another frame
2110 * that we already returned to the user) will be dropped. This is a bug
2111 * that we will fix later. */
2112 for (i
= 0; i
< MAX_DELAYED_PIC_COUNT
; i
++) {
2113 cnt
+= out
->poc
< h
->last_pocs
[i
];
2114 invalid
+= out
->poc
== INT_MIN
;
2116 if (!h
->mmco_reset
&& !cur
->f
.key_frame
&&
2117 cnt
+ invalid
== MAX_DELAYED_PIC_COUNT
&& cnt
> 0) {
2120 h
->delayed_pic
[pics
- 2]->mmco_reset
= 2;
2122 if (h
->mmco_reset
|| cur
->f
.key_frame
) {
2123 for (i
= 0; i
< MAX_DELAYED_PIC_COUNT
; i
++)
2124 h
->last_pocs
[i
] = INT_MIN
;
2126 invalid
= MAX_DELAYED_PIC_COUNT
;
2128 out
= h
->delayed_pic
[0];
2130 for (i
= 1; i
< MAX_DELAYED_PIC_COUNT
&&
2131 h
->delayed_pic
[i
] &&
2132 !h
->delayed_pic
[i
- 1]->mmco_reset
&&
2133 !h
->delayed_pic
[i
]->f
.key_frame
;
2135 if (h
->delayed_pic
[i
]->poc
< out
->poc
) {
2136 out
= h
->delayed_pic
[i
];
2139 if (h
->avctx
->has_b_frames
== 0 &&
2140 (h
->delayed_pic
[0]->f
.key_frame
|| h
->mmco_reset
))
2141 h
->next_outputed_poc
= INT_MIN
;
2142 out_of_order
= !out
->f
.key_frame
&& !h
->mmco_reset
&&
2143 (out
->poc
< h
->next_outputed_poc
);
2145 if (h
->sps
.bitstream_restriction_flag
&&
2146 h
->avctx
->has_b_frames
>= h
->sps
.num_reorder_frames
) {
2147 } else if (out_of_order
&& pics
- 1 == h
->avctx
->has_b_frames
&&
2148 h
->avctx
->has_b_frames
< MAX_DELAYED_PIC_COUNT
) {
2149 if (invalid
+ cnt
< MAX_DELAYED_PIC_COUNT
) {
2150 h
->avctx
->has_b_frames
= FFMAX(h
->avctx
->has_b_frames
, cnt
);
2153 } else if (h
->low_delay
&&
2154 ((h
->next_outputed_poc
!= INT_MIN
&&
2155 out
->poc
> h
->next_outputed_poc
+ 2) ||
2156 cur
->f
.pict_type
== AV_PICTURE_TYPE_B
)) {
2158 h
->avctx
->has_b_frames
++;
2161 if (pics
> h
->avctx
->has_b_frames
) {
2162 out
->reference
&= ~DELAYED_PIC_REF
;
2163 // for frame threading, the owner must be the second field's thread or
2164 // else the first thread can release the picture and reuse it unsafely
2165 for (i
= out_idx
; h
->delayed_pic
[i
]; i
++)
2166 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+ 1];
2168 memmove(h
->last_pocs
, &h
->last_pocs
[1],
2169 sizeof(*h
->last_pocs
) * (MAX_DELAYED_PIC_COUNT
- 1));
2170 h
->last_pocs
[MAX_DELAYED_PIC_COUNT
- 1] = cur
->poc
;
2171 if (!out_of_order
&& pics
> h
->avctx
->has_b_frames
) {
2172 h
->next_output_pic
= out
;
2173 if (out
->mmco_reset
) {
2175 h
->next_outputed_poc
= out
->poc
;
2176 h
->delayed_pic
[out_idx
- 1]->mmco_reset
= out
->mmco_reset
;
2178 h
->next_outputed_poc
= INT_MIN
;
2181 if (out_idx
== 0 && pics
> 1 && h
->delayed_pic
[0]->f
.key_frame
) {
2182 h
->next_outputed_poc
= INT_MIN
;
2184 h
->next_outputed_poc
= out
->poc
;
2189 av_log(h
->avctx
, AV_LOG_DEBUG
, "no picture\n");
2192 if (h
->next_output_pic
) {
2193 if (h
->next_output_pic
->recovered
) {
2194 // We have reached an recovery point and all frames after it in
2195 // display order are "recovered".
2196 h
->frame_recovered
|= FRAME_RECOVERED_SEI
;
2198 h
->next_output_pic
->recovered
|= !!(h
->frame_recovered
& FRAME_RECOVERED_SEI
);
2201 if (setup_finished
&& !h
->avctx
->hwaccel
)
2202 ff_thread_finish_setup(h
->avctx
);
2205 static av_always_inline
void backup_mb_border(H264Context
*h
, uint8_t *src_y
,
2206 uint8_t *src_cb
, uint8_t *src_cr
,
2207 int linesize
, int uvlinesize
,
2210 uint8_t *top_border
;
2212 const int pixel_shift
= h
->pixel_shift
;
2213 int chroma444
= CHROMA444(h
);
2214 int chroma422
= CHROMA422(h
);
2217 src_cb
-= uvlinesize
;
2218 src_cr
-= uvlinesize
;
2220 if (!simple
&& FRAME_MBAFF(h
)) {
2223 top_border
= h
->top_borders
[0][h
->mb_x
];
2224 AV_COPY128(top_border
, src_y
+ 15 * linesize
);
2226 AV_COPY128(top_border
+ 16, src_y
+ 15 * linesize
+ 16);
2227 if (simple
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) {
2230 AV_COPY128(top_border
+ 32, src_cb
+ 15 * uvlinesize
);
2231 AV_COPY128(top_border
+ 48, src_cb
+ 15 * uvlinesize
+ 16);
2232 AV_COPY128(top_border
+ 64, src_cr
+ 15 * uvlinesize
);
2233 AV_COPY128(top_border
+ 80, src_cr
+ 15 * uvlinesize
+ 16);
2235 AV_COPY128(top_border
+ 16, src_cb
+ 15 * uvlinesize
);
2236 AV_COPY128(top_border
+ 32, src_cr
+ 15 * uvlinesize
);
2238 } else if (chroma422
) {
2240 AV_COPY128(top_border
+ 32, src_cb
+ 15 * uvlinesize
);
2241 AV_COPY128(top_border
+ 48, src_cr
+ 15 * uvlinesize
);
2243 AV_COPY64(top_border
+ 16, src_cb
+ 15 * uvlinesize
);
2244 AV_COPY64(top_border
+ 24, src_cr
+ 15 * uvlinesize
);
2248 AV_COPY128(top_border
+ 32, src_cb
+ 7 * uvlinesize
);
2249 AV_COPY128(top_border
+ 48, src_cr
+ 7 * uvlinesize
);
2251 AV_COPY64(top_border
+ 16, src_cb
+ 7 * uvlinesize
);
2252 AV_COPY64(top_border
+ 24, src_cr
+ 7 * uvlinesize
);
2257 } else if (MB_MBAFF(h
)) {
2263 top_border
= h
->top_borders
[top_idx
][h
->mb_x
];
2264 /* There are two lines saved, the line above the top macroblock
2265 * of a pair, and the line above the bottom macroblock. */
2266 AV_COPY128(top_border
, src_y
+ 16 * linesize
);
2268 AV_COPY128(top_border
+ 16, src_y
+ 16 * linesize
+ 16);
2270 if (simple
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) {
2273 AV_COPY128(top_border
+ 32, src_cb
+ 16 * linesize
);
2274 AV_COPY128(top_border
+ 48, src_cb
+ 16 * linesize
+ 16);
2275 AV_COPY128(top_border
+ 64, src_cr
+ 16 * linesize
);
2276 AV_COPY128(top_border
+ 80, src_cr
+ 16 * linesize
+ 16);
2278 AV_COPY128(top_border
+ 16, src_cb
+ 16 * linesize
);
2279 AV_COPY128(top_border
+ 32, src_cr
+ 16 * linesize
);
2281 } else if (chroma422
) {
2283 AV_COPY128(top_border
+ 32, src_cb
+ 16 * uvlinesize
);
2284 AV_COPY128(top_border
+ 48, src_cr
+ 16 * uvlinesize
);
2286 AV_COPY64(top_border
+ 16, src_cb
+ 16 * uvlinesize
);
2287 AV_COPY64(top_border
+ 24, src_cr
+ 16 * uvlinesize
);
2291 AV_COPY128(top_border
+ 32, src_cb
+ 8 * uvlinesize
);
2292 AV_COPY128(top_border
+ 48, src_cr
+ 8 * uvlinesize
);
2294 AV_COPY64(top_border
+ 16, src_cb
+ 8 * uvlinesize
);
2295 AV_COPY64(top_border
+ 24, src_cr
+ 8 * uvlinesize
);
2301 static av_always_inline
void xchg_mb_border(H264Context
*h
, uint8_t *src_y
,
2302 uint8_t *src_cb
, uint8_t *src_cr
,
2303 int linesize
, int uvlinesize
,
2304 int xchg
, int chroma444
,
2305 int simple
, int pixel_shift
)
2307 int deblock_topleft
;
2310 uint8_t *top_border_m1
;
2311 uint8_t *top_border
;
2313 if (!simple
&& FRAME_MBAFF(h
)) {
2318 top_idx
= MB_MBAFF(h
) ?
0 : 1;
2322 if (h
->deblocking_filter
== 2) {
2323 deblock_topleft
= h
->slice_table
[h
->mb_xy
- 1 - h
->mb_stride
] == h
->slice_num
;
2324 deblock_top
= h
->top_type
;
2326 deblock_topleft
= (h
->mb_x
> 0);
2327 deblock_top
= (h
->mb_y
> !!MB_FIELD(h
));
2330 src_y
-= linesize
+ 1 + pixel_shift
;
2331 src_cb
-= uvlinesize
+ 1 + pixel_shift
;
2332 src_cr
-= uvlinesize
+ 1 + pixel_shift
;
2334 top_border_m1
= h
->top_borders
[top_idx
][h
->mb_x
- 1];
2335 top_border
= h
->top_borders
[top_idx
][h
->mb_x
];
2337 #define XCHG(a, b, xchg) \
2338 if (pixel_shift) { \
2340 AV_SWAP64(b + 0, a + 0); \
2341 AV_SWAP64(b + 8, a + 8); \
2351 if (deblock_topleft
) {
2352 XCHG(top_border_m1
+ (8 << pixel_shift
),
2353 src_y
- (7 << pixel_shift
), 1);
2355 XCHG(top_border
+ (0 << pixel_shift
), src_y
+ (1 << pixel_shift
), xchg
);
2356 XCHG(top_border
+ (8 << pixel_shift
), src_y
+ (9 << pixel_shift
), 1);
2357 if (h
->mb_x
+ 1 < h
->mb_width
) {
2358 XCHG(h
->top_borders
[top_idx
][h
->mb_x
+ 1],
2359 src_y
+ (17 << pixel_shift
), 1);
2362 if (simple
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) {
2364 if (deblock_topleft
) {
2365 XCHG(top_border_m1
+ (24 << pixel_shift
), src_cb
- (7 << pixel_shift
), 1);
2366 XCHG(top_border_m1
+ (40 << pixel_shift
), src_cr
- (7 << pixel_shift
), 1);
2368 XCHG(top_border
+ (16 << pixel_shift
), src_cb
+ (1 << pixel_shift
), xchg
);
2369 XCHG(top_border
+ (24 << pixel_shift
), src_cb
+ (9 << pixel_shift
), 1);
2370 XCHG(top_border
+ (32 << pixel_shift
), src_cr
+ (1 << pixel_shift
), xchg
);
2371 XCHG(top_border
+ (40 << pixel_shift
), src_cr
+ (9 << pixel_shift
), 1);
2372 if (h
->mb_x
+ 1 < h
->mb_width
) {
2373 XCHG(h
->top_borders
[top_idx
][h
->mb_x
+ 1] + (16 << pixel_shift
), src_cb
+ (17 << pixel_shift
), 1);
2374 XCHG(h
->top_borders
[top_idx
][h
->mb_x
+ 1] + (32 << pixel_shift
), src_cr
+ (17 << pixel_shift
), 1);
2378 if (deblock_topleft
) {
2379 XCHG(top_border_m1
+ (16 << pixel_shift
), src_cb
- (7 << pixel_shift
), 1);
2380 XCHG(top_border_m1
+ (24 << pixel_shift
), src_cr
- (7 << pixel_shift
), 1);
2382 XCHG(top_border
+ (16 << pixel_shift
), src_cb
+ 1 + pixel_shift
, 1);
2383 XCHG(top_border
+ (24 << pixel_shift
), src_cr
+ 1 + pixel_shift
, 1);
2389 static av_always_inline
int dctcoef_get(int16_t *mb
, int high_bit_depth
,
2392 if (high_bit_depth
) {
2393 return AV_RN32A(((int32_t *)mb
) + index
);
2395 return AV_RN16A(mb
+ index
);
2398 static av_always_inline
void dctcoef_set(int16_t *mb
, int high_bit_depth
,
2399 int index
, int value
)
2401 if (high_bit_depth
) {
2402 AV_WN32A(((int32_t *)mb
) + index
, value
);
2404 AV_WN16A(mb
+ index
, value
);
2407 static av_always_inline
void hl_decode_mb_predict_luma(H264Context
*h
,
2408 int mb_type
, int is_h264
,
2410 int transform_bypass
,
2414 uint8_t *dest_y
, int p
)
2416 void (*idct_add
)(uint8_t *dst
, int16_t *block
, int stride
);
2417 void (*idct_dc_add
)(uint8_t *dst
, int16_t *block
, int stride
);
2419 int qscale
= p
== 0 ? h
->qscale
: h
->chroma_qp
[p
- 1];
2420 block_offset
+= 16 * p
;
2421 if (IS_INTRA4x4(mb_type
)) {
2422 if (IS_8x8DCT(mb_type
)) {
2423 if (transform_bypass
) {
2425 idct_add
= h
->h264dsp
.h264_add_pixels8_clear
;
2427 idct_dc_add
= h
->h264dsp
.h264_idct8_dc_add
;
2428 idct_add
= h
->h264dsp
.h264_idct8_add
;
2430 for (i
= 0; i
< 16; i
+= 4) {
2431 uint8_t *const ptr
= dest_y
+ block_offset
[i
];
2432 const int dir
= h
->intra4x4_pred_mode_cache
[scan8
[i
]];
2433 if (transform_bypass
&& h
->sps
.profile_idc
== 244 && dir
<= 1) {
2434 h
->hpc
.pred8x8l_add
[dir
](ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
2436 const int nnz
= h
->non_zero_count_cache
[scan8
[i
+ p
* 16]];
2437 h
->hpc
.pred8x8l
[dir
](ptr
, (h
->topleft_samples_available
<< i
) & 0x8000,
2438 (h
->topright_samples_available
<< i
) & 0x4000, linesize
);
2440 if (nnz
== 1 && dctcoef_get(h
->mb
, pixel_shift
, i
* 16 + p
* 256))
2441 idct_dc_add(ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
2443 idct_add(ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
2448 if (transform_bypass
) {
2450 idct_add
= h
->h264dsp
.h264_add_pixels4_clear
;
2452 idct_dc_add
= h
->h264dsp
.h264_idct_dc_add
;
2453 idct_add
= h
->h264dsp
.h264_idct_add
;
2455 for (i
= 0; i
< 16; i
++) {
2456 uint8_t *const ptr
= dest_y
+ block_offset
[i
];
2457 const int dir
= h
->intra4x4_pred_mode_cache
[scan8
[i
]];
2459 if (transform_bypass
&& h
->sps
.profile_idc
== 244 && dir
<= 1) {
2460 h
->hpc
.pred4x4_add
[dir
](ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
2465 if (dir
== DIAG_DOWN_LEFT_PRED
|| dir
== VERT_LEFT_PRED
) {
2466 const int topright_avail
= (h
->topright_samples_available
<< i
) & 0x8000;
2467 assert(h
->mb_y
|| linesize
<= block_offset
[i
]);
2468 if (!topright_avail
) {
2470 tr_high
= ((uint16_t *)ptr
)[3 - linesize
/ 2] * 0x0001000100010001ULL
;
2471 topright
= (uint8_t *)&tr_high
;
2473 tr
= ptr
[3 - linesize
] * 0x01010101u
;
2474 topright
= (uint8_t *)&tr
;
2477 topright
= ptr
+ (4 << pixel_shift
) - linesize
;
2481 h
->hpc
.pred4x4
[dir
](ptr
, topright
, linesize
);
2482 nnz
= h
->non_zero_count_cache
[scan8
[i
+ p
* 16]];
2485 if (nnz
== 1 && dctcoef_get(h
->mb
, pixel_shift
, i
* 16 + p
* 256))
2486 idct_dc_add(ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
2488 idct_add(ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
2489 } else if (CONFIG_SVQ3_DECODER
)
2490 ff_svq3_add_idct_c(ptr
, h
->mb
+ i
* 16 + p
* 256, linesize
, qscale
, 0);
2496 h
->hpc
.pred16x16
[h
->intra16x16_pred_mode
](dest_y
, linesize
);
2498 if (h
->non_zero_count_cache
[scan8
[LUMA_DC_BLOCK_INDEX
+ p
]]) {
2499 if (!transform_bypass
)
2500 h
->h264dsp
.h264_luma_dc_dequant_idct(h
->mb
+ (p
* 256 << pixel_shift
),
2502 h
->dequant4_coeff
[p
][qscale
][0]);
2504 static const uint8_t dc_mapping
[16] = {
2505 0 * 16, 1 * 16, 4 * 16, 5 * 16,
2506 2 * 16, 3 * 16, 6 * 16, 7 * 16,
2507 8 * 16, 9 * 16, 12 * 16, 13 * 16,
2508 10 * 16, 11 * 16, 14 * 16, 15 * 16
2510 for (i
= 0; i
< 16; i
++)
2511 dctcoef_set(h
->mb
+ (p
* 256 << pixel_shift
),
2512 pixel_shift
, dc_mapping
[i
],
2513 dctcoef_get(h
->mb_luma_dc
[p
],
2517 } else if (CONFIG_SVQ3_DECODER
)
2518 ff_svq3_luma_dc_dequant_idct_c(h
->mb
+ p
* 256,
2519 h
->mb_luma_dc
[p
], qscale
);
2523 static av_always_inline
void hl_decode_mb_idct_luma(H264Context
*h
, int mb_type
,
2524 int is_h264
, int simple
,
2525 int transform_bypass
,
2529 uint8_t *dest_y
, int p
)
2531 void (*idct_add
)(uint8_t *dst
, int16_t *block
, int stride
);
2533 block_offset
+= 16 * p
;
2534 if (!IS_INTRA4x4(mb_type
)) {
2536 if (IS_INTRA16x16(mb_type
)) {
2537 if (transform_bypass
) {
2538 if (h
->sps
.profile_idc
== 244 &&
2539 (h
->intra16x16_pred_mode
== VERT_PRED8x8
||
2540 h
->intra16x16_pred_mode
== HOR_PRED8x8
)) {
2541 h
->hpc
.pred16x16_add
[h
->intra16x16_pred_mode
](dest_y
, block_offset
,
2542 h
->mb
+ (p
* 256 << pixel_shift
),
2545 for (i
= 0; i
< 16; i
++)
2546 if (h
->non_zero_count_cache
[scan8
[i
+ p
* 16]] ||
2547 dctcoef_get(h
->mb
, pixel_shift
, i
* 16 + p
* 256))
2548 h
->h264dsp
.h264_add_pixels4_clear(dest_y
+ block_offset
[i
],
2549 h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
),
2553 h
->h264dsp
.h264_idct_add16intra(dest_y
, block_offset
,
2554 h
->mb
+ (p
* 256 << pixel_shift
),
2556 h
->non_zero_count_cache
+ p
* 5 * 8);
2558 } else if (h
->cbp
& 15) {
2559 if (transform_bypass
) {
2560 const int di
= IS_8x8DCT(mb_type
) ?
4 : 1;
2561 idct_add
= IS_8x8DCT(mb_type
) ? h
->h264dsp
.h264_add_pixels8_clear
2562 : h
->h264dsp
.h264_add_pixels4_clear
;
2563 for (i
= 0; i
< 16; i
+= di
)
2564 if (h
->non_zero_count_cache
[scan8
[i
+ p
* 16]])
2565 idct_add(dest_y
+ block_offset
[i
],
2566 h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
),
2569 if (IS_8x8DCT(mb_type
))
2570 h
->h264dsp
.h264_idct8_add4(dest_y
, block_offset
,
2571 h
->mb
+ (p
* 256 << pixel_shift
),
2573 h
->non_zero_count_cache
+ p
* 5 * 8);
2575 h
->h264dsp
.h264_idct_add16(dest_y
, block_offset
,
2576 h
->mb
+ (p
* 256 << pixel_shift
),
2578 h
->non_zero_count_cache
+ p
* 5 * 8);
2581 } else if (CONFIG_SVQ3_DECODER
) {
2582 for (i
= 0; i
< 16; i
++)
2583 if (h
->non_zero_count_cache
[scan8
[i
+ p
* 16]] || h
->mb
[i
* 16 + p
* 256]) {
2584 // FIXME benchmark weird rule, & below
2585 uint8_t *const ptr
= dest_y
+ block_offset
[i
];
2586 ff_svq3_add_idct_c(ptr
, h
->mb
+ i
* 16 + p
* 256, linesize
,
2587 h
->qscale
, IS_INTRA(mb_type
) ?
1 : 0);
2595 #include "h264_mb_template.c"
2599 #include "h264_mb_template.c"
2603 #include "h264_mb_template.c"
2605 void ff_h264_hl_decode_mb(H264Context
*h
)
2607 const int mb_xy
= h
->mb_xy
;
2608 const int mb_type
= h
->cur_pic
.mb_type
[mb_xy
];
2609 int is_complex
= CONFIG_SMALL
|| h
->is_complex
||
2610 IS_INTRA_PCM(mb_type
) || h
->qscale
== 0;
2613 if (is_complex
|| h
->pixel_shift
)
2614 hl_decode_mb_444_complex(h
);
2616 hl_decode_mb_444_simple_8(h
);
2617 } else if (is_complex
) {
2618 hl_decode_mb_complex(h
);
2619 } else if (h
->pixel_shift
) {
2620 hl_decode_mb_simple_16(h
);
2622 hl_decode_mb_simple_8(h
);
2625 int ff_pred_weight_table(H264Context
*h
)
2628 int luma_def
, chroma_def
;
2631 h
->use_weight_chroma
= 0;
2632 h
->luma_log2_weight_denom
= get_ue_golomb(&h
->gb
);
2633 if (h
->sps
.chroma_format_idc
)
2634 h
->chroma_log2_weight_denom
= get_ue_golomb(&h
->gb
);
2635 luma_def
= 1 << h
->luma_log2_weight_denom
;
2636 chroma_def
= 1 << h
->chroma_log2_weight_denom
;
2638 for (list
= 0; list
< 2; list
++) {
2639 h
->luma_weight_flag
[list
] = 0;
2640 h
->chroma_weight_flag
[list
] = 0;
2641 for (i
= 0; i
< h
->ref_count
[list
]; i
++) {
2642 int luma_weight_flag
, chroma_weight_flag
;
2644 luma_weight_flag
= get_bits1(&h
->gb
);
2645 if (luma_weight_flag
) {
2646 h
->luma_weight
[i
][list
][0] = get_se_golomb(&h
->gb
);
2647 h
->luma_weight
[i
][list
][1] = get_se_golomb(&h
->gb
);
2648 if (h
->luma_weight
[i
][list
][0] != luma_def
||
2649 h
->luma_weight
[i
][list
][1] != 0) {
2651 h
->luma_weight_flag
[list
] = 1;
2654 h
->luma_weight
[i
][list
][0] = luma_def
;
2655 h
->luma_weight
[i
][list
][1] = 0;
2658 if (h
->sps
.chroma_format_idc
) {
2659 chroma_weight_flag
= get_bits1(&h
->gb
);
2660 if (chroma_weight_flag
) {
2662 for (j
= 0; j
< 2; j
++) {
2663 h
->chroma_weight
[i
][list
][j
][0] = get_se_golomb(&h
->gb
);
2664 h
->chroma_weight
[i
][list
][j
][1] = get_se_golomb(&h
->gb
);
2665 if (h
->chroma_weight
[i
][list
][j
][0] != chroma_def
||
2666 h
->chroma_weight
[i
][list
][j
][1] != 0) {
2667 h
->use_weight_chroma
= 1;
2668 h
->chroma_weight_flag
[list
] = 1;
2673 for (j
= 0; j
< 2; j
++) {
2674 h
->chroma_weight
[i
][list
][j
][0] = chroma_def
;
2675 h
->chroma_weight
[i
][list
][j
][1] = 0;
2680 if (h
->slice_type_nos
!= AV_PICTURE_TYPE_B
)
2683 h
->use_weight
= h
->use_weight
|| h
->use_weight_chroma
;
2688 * Initialize implicit_weight table.
2689 * @param field 0/1 initialize the weight for interlaced MBAFF
2690 * -1 initializes the rest
2692 static void implicit_weight_table(H264Context
*h
, int field
)
2694 int ref0
, ref1
, i
, cur_poc
, ref_start
, ref_count0
, ref_count1
;
2696 for (i
= 0; i
< 2; i
++) {
2697 h
->luma_weight_flag
[i
] = 0;
2698 h
->chroma_weight_flag
[i
] = 0;
2702 if (h
->picture_structure
== PICT_FRAME
) {
2703 cur_poc
= h
->cur_pic_ptr
->poc
;
2705 cur_poc
= h
->cur_pic_ptr
->field_poc
[h
->picture_structure
- 1];
2707 if (h
->ref_count
[0] == 1 && h
->ref_count
[1] == 1 && !FRAME_MBAFF(h
) &&
2708 h
->ref_list
[0][0].poc
+ h
->ref_list
[1][0].poc
== 2 * cur_poc
) {
2710 h
->use_weight_chroma
= 0;
2714 ref_count0
= h
->ref_count
[0];
2715 ref_count1
= h
->ref_count
[1];
2717 cur_poc
= h
->cur_pic_ptr
->field_poc
[field
];
2719 ref_count0
= 16 + 2 * h
->ref_count
[0];
2720 ref_count1
= 16 + 2 * h
->ref_count
[1];
2724 h
->use_weight_chroma
= 2;
2725 h
->luma_log2_weight_denom
= 5;
2726 h
->chroma_log2_weight_denom
= 5;
2728 for (ref0
= ref_start
; ref0
< ref_count0
; ref0
++) {
2729 int poc0
= h
->ref_list
[0][ref0
].poc
;
2730 for (ref1
= ref_start
; ref1
< ref_count1
; ref1
++) {
2732 if (!h
->ref_list
[0][ref0
].long_ref
&& !h
->ref_list
[1][ref1
].long_ref
) {
2733 int poc1
= h
->ref_list
[1][ref1
].poc
;
2734 int td
= av_clip(poc1
- poc0
, -128, 127);
2736 int tb
= av_clip(cur_poc
- poc0
, -128, 127);
2737 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
2738 int dist_scale_factor
= (tb
* tx
+ 32) >> 8;
2739 if (dist_scale_factor
>= -64 && dist_scale_factor
<= 128)
2740 w
= 64 - dist_scale_factor
;
2744 h
->implicit_weight
[ref0
][ref1
][0] =
2745 h
->implicit_weight
[ref0
][ref1
][1] = w
;
2747 h
->implicit_weight
[ref0
][ref1
][field
] = w
;
2754 * instantaneous decoder refresh.
2756 static void idr(H264Context
*h
)
2758 ff_h264_remove_all_refs(h
);
2759 h
->prev_frame_num
= 0;
2760 h
->prev_frame_num_offset
= 0;
2762 h
->prev_poc_lsb
= 0;
2765 /* forget old pics after a seek */
2766 static void flush_change(H264Context
*h
)
2769 for (i
= 0; i
< MAX_DELAYED_PIC_COUNT
; i
++)
2770 h
->last_pocs
[i
] = INT_MIN
;
2771 h
->outputed_poc
= h
->next_outputed_poc
= INT_MIN
;
2772 h
->prev_interlaced_frame
= 1;
2775 h
->cur_pic_ptr
->reference
= 0;
2777 memset(h
->ref_list
[0], 0, sizeof(h
->ref_list
[0]));
2778 memset(h
->ref_list
[1], 0, sizeof(h
->ref_list
[1]));
2779 memset(h
->default_ref_list
[0], 0, sizeof(h
->default_ref_list
[0]));
2780 memset(h
->default_ref_list
[1], 0, sizeof(h
->default_ref_list
[1]));
2781 ff_h264_reset_sei(h
);
2782 h
->recovery_frame
= -1;
2783 h
->frame_recovered
= 0;
2786 /* forget old pics after a seek */
2787 static void flush_dpb(AVCodecContext
*avctx
)
2789 H264Context
*h
= avctx
->priv_data
;
2792 for (i
= 0; i
< MAX_DELAYED_PIC_COUNT
; i
++) {
2793 if (h
->delayed_pic
[i
])
2794 h
->delayed_pic
[i
]->reference
= 0;
2795 h
->delayed_pic
[i
] = NULL
;
2801 for (i
= 0; i
< MAX_PICTURE_COUNT
; i
++)
2802 unref_picture(h
, &h
->DPB
[i
]);
2803 h
->cur_pic_ptr
= NULL
;
2804 unref_picture(h
, &h
->cur_pic
);
2806 h
->mb_x
= h
->mb_y
= 0;
2808 h
->parse_context
.state
= -1;
2809 h
->parse_context
.frame_start_found
= 0;
2810 h
->parse_context
.overread
= 0;
2811 h
->parse_context
.overread_index
= 0;
2812 h
->parse_context
.index
= 0;
2813 h
->parse_context
.last_index
= 0;
2816 h
->context_initialized
= 0;
2819 int ff_init_poc(H264Context
*h
, int pic_field_poc
[2], int *pic_poc
)