2 * H.26L/H.264/AVC/JVT/14496-10/... decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "libavutil/imgutils.h"
32 #include "mpegvideo.h"
35 #include "h264_mvpred.h"
38 #include "rectangle.h"
40 #include "vdpau_internal.h"
41 #include "libavutil/avassert.h"
48 static const uint8_t rem6
[QP_MAX_NUM
+1]={
49 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
52 static const uint8_t div6
[QP_MAX_NUM
+1]={
53 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
56 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420
[] = {
64 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
66 int ff_h264_check_intra4x4_pred_mode(H264Context
*h
){
67 MpegEncContext
* const s
= &h
->s
;
68 static const int8_t top
[12]= {-1, 0,LEFT_DC_PRED
,-1,-1,-1,-1,-1, 0};
69 static const int8_t left
[12]= { 0,-1, TOP_DC_PRED
, 0,-1,-1,-1, 0,-1,DC_128_PRED
};
72 if(!(h
->top_samples_available
&0x8000)){
74 int status
= top
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + i
] ];
76 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
79 h
->intra4x4_pred_mode_cache
[scan8
[0] + i
]= status
;
84 if((h
->left_samples_available
&0x8888)!=0x8888){
85 static const int mask
[4]={0x8000,0x2000,0x80,0x20};
87 if(!(h
->left_samples_available
&mask
[i
])){
88 int status
= left
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
] ];
90 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
93 h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
]= status
;
100 } //FIXME cleanup like ff_h264_check_intra_pred_mode
103 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
105 int ff_h264_check_intra_pred_mode(H264Context
*h
, int mode
){
106 MpegEncContext
* const s
= &h
->s
;
107 static const int8_t top
[7]= {LEFT_DC_PRED8x8
, 1,-1,-1};
108 static const int8_t left
[7]= { TOP_DC_PRED8x8
,-1, 2,-1,DC_128_PRED8x8
};
111 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "out of range intra chroma pred mode at %d %d\n", s
->mb_x
, s
->mb_y
);
115 if(!(h
->top_samples_available
&0x8000)){
118 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
123 if((h
->left_samples_available
&0x8080) != 0x8080){
125 if(h
->left_samples_available
&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
126 mode
= ALZHEIMER_DC_L0T_PRED8x8
+ (!(h
->left_samples_available
&0x8000)) + 2*(mode
== DC_128_PRED8x8
);
129 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
137 const uint8_t *ff_h264_decode_nal(H264Context
*h
, const uint8_t *src
, int *dst_length
, int *consumed
, int length
){
142 // src[0]&0x80; //forbidden bit
143 h
->nal_ref_idc
= src
[0]>>5;
144 h
->nal_unit_type
= src
[0]&0x1F;
148 #if HAVE_FAST_UNALIGNED
151 for(i
=0; i
+1<length
; i
+=9){
152 if(!((~AV_RN64A(src
+i
) & (AV_RN64A(src
+i
) - 0x0100010001000101ULL
)) & 0x8000800080008080ULL
))
155 for(i
=0; i
+1<length
; i
+=5){
156 if(!((~AV_RN32A(src
+i
) & (AV_RN32A(src
+i
) - 0x01000101U
)) & 0x80008080U
))
159 if(i
>0 && !src
[i
]) i
--;
163 for(i
=0; i
+1<length
; i
+=2){
165 if(i
>0 && src
[i
-1]==0) i
--;
167 if(i
+2<length
&& src
[i
+1]==0 && src
[i
+2]<=3){
169 /* startcode, so we must be past the end */
177 if(i
>=length
-1){ //no escaped 0
179 *consumed
= length
+1; //+1 for the header
183 bufidx
= h
->nal_unit_type
== NAL_DPC ?
1 : 0; // use second escape buffer for inter data
184 av_fast_malloc(&h
->rbsp_buffer
[bufidx
], &h
->rbsp_buffer_size
[bufidx
], length
+FF_INPUT_BUFFER_PADDING_SIZE
);
185 dst
= h
->rbsp_buffer
[bufidx
];
191 //printf("decoding esc\n");
195 //remove escapes (very rare 1:2^22)
197 dst
[di
++]= src
[si
++];
198 dst
[di
++]= src
[si
++];
199 }else if(src
[si
]==0 && src
[si
+1]==0){
200 if(src
[si
+2]==3){ //escape
205 }else //next start code
209 dst
[di
++]= src
[si
++];
212 dst
[di
++]= src
[si
++];
215 memset(dst
+di
, 0, FF_INPUT_BUFFER_PADDING_SIZE
);
218 *consumed
= si
+ 1;//+1 for the header
219 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
224 * Identify the exact end of the bitstream
225 * @return the length of the trailing, or 0 if damaged
227 static int ff_h264_decode_rbsp_trailing(H264Context
*h
, const uint8_t *src
){
231 tprintf(h
->s
.avctx
, "rbsp trailing %X\n", v
);
240 static inline int get_lowest_part_list_y(H264Context
*h
, Picture
*pic
, int n
, int height
,
241 int y_offset
, int list
){
242 int raw_my
= h
->mv_cache
[list
][ scan8
[n
] ][1];
243 int filter_height
= (raw_my
&3) ?
2 : 0;
244 int full_my
= (raw_my
>>2) + y_offset
;
245 int top
= full_my
- filter_height
, bottom
= full_my
+ height
+ filter_height
;
247 return FFMAX(abs(top
), bottom
);
250 static inline void get_lowest_part_y(H264Context
*h
, int refs
[2][48], int n
, int height
,
251 int y_offset
, int list0
, int list1
, int *nrefs
){
252 MpegEncContext
* const s
= &h
->s
;
255 y_offset
+= 16*(s
->mb_y
>> MB_FIELD
);
258 int ref_n
= h
->ref_cache
[0][ scan8
[n
] ];
259 Picture
*ref
= &h
->ref_list
[0][ref_n
];
261 // Error resilience puts the current picture in the ref list.
262 // Don't try to wait on these as it will cause a deadlock.
263 // Fields can wait on each other, though.
264 if (ref
->f
.thread_opaque
!= s
->current_picture
.f
.thread_opaque
||
265 (ref
->f
.reference
& 3) != s
->picture_structure
) {
266 my
= get_lowest_part_list_y(h
, ref
, n
, height
, y_offset
, 0);
267 if (refs
[0][ref_n
] < 0) nrefs
[0] += 1;
268 refs
[0][ref_n
] = FFMAX(refs
[0][ref_n
], my
);
273 int ref_n
= h
->ref_cache
[1][ scan8
[n
] ];
274 Picture
*ref
= &h
->ref_list
[1][ref_n
];
276 if (ref
->f
.thread_opaque
!= s
->current_picture
.f
.thread_opaque
||
277 (ref
->f
.reference
& 3) != s
->picture_structure
) {
278 my
= get_lowest_part_list_y(h
, ref
, n
, height
, y_offset
, 1);
279 if (refs
[1][ref_n
] < 0) nrefs
[1] += 1;
280 refs
[1][ref_n
] = FFMAX(refs
[1][ref_n
], my
);
286 * Wait until all reference frames are available for MC operations.
288 * @param h the H264 context
290 static void await_references(H264Context
*h
){
291 MpegEncContext
* const s
= &h
->s
;
292 const int mb_xy
= h
->mb_xy
;
293 const int mb_type
= s
->current_picture
.f
.mb_type
[mb_xy
];
298 memset(refs
, -1, sizeof(refs
));
300 if(IS_16X16(mb_type
)){
301 get_lowest_part_y(h
, refs
, 0, 16, 0,
302 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1), nrefs
);
303 }else if(IS_16X8(mb_type
)){
304 get_lowest_part_y(h
, refs
, 0, 8, 0,
305 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1), nrefs
);
306 get_lowest_part_y(h
, refs
, 8, 8, 8,
307 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1), nrefs
);
308 }else if(IS_8X16(mb_type
)){
309 get_lowest_part_y(h
, refs
, 0, 16, 0,
310 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1), nrefs
);
311 get_lowest_part_y(h
, refs
, 4, 16, 0,
312 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1), nrefs
);
316 assert(IS_8X8(mb_type
));
319 const int sub_mb_type
= h
->sub_mb_type
[i
];
321 int y_offset
= (i
&2)<<2;
323 if(IS_SUB_8X8(sub_mb_type
)){
324 get_lowest_part_y(h
, refs
, n
, 8, y_offset
,
325 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1), nrefs
);
326 }else if(IS_SUB_8X4(sub_mb_type
)){
327 get_lowest_part_y(h
, refs
, n
, 4, y_offset
,
328 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1), nrefs
);
329 get_lowest_part_y(h
, refs
, n
+2, 4, y_offset
+4,
330 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1), nrefs
);
331 }else if(IS_SUB_4X8(sub_mb_type
)){
332 get_lowest_part_y(h
, refs
, n
, 8, y_offset
,
333 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1), nrefs
);
334 get_lowest_part_y(h
, refs
, n
+1, 8, y_offset
,
335 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1), nrefs
);
338 assert(IS_SUB_4X4(sub_mb_type
));
340 int sub_y_offset
= y_offset
+ 2*(j
&2);
341 get_lowest_part_y(h
, refs
, n
+j
, 4, sub_y_offset
,
342 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1), nrefs
);
348 for(list
=h
->list_count
-1; list
>=0; list
--){
349 for(ref
=0; ref
<48 && nrefs
[list
]; ref
++){
350 int row
= refs
[list
][ref
];
352 Picture
*ref_pic
= &h
->ref_list
[list
][ref
];
353 int ref_field
= ref_pic
->f
.reference
- 1;
354 int ref_field_picture
= ref_pic
->field_picture
;
355 int pic_height
= 16*s
->mb_height
>> ref_field_picture
;
360 if(!FIELD_PICTURE
&& ref_field_picture
){ // frame referencing two fields
361 ff_thread_await_progress((AVFrame
*)ref_pic
, FFMIN((row
>> 1) - !(row
&1), pic_height
-1), 1);
362 ff_thread_await_progress((AVFrame
*)ref_pic
, FFMIN((row
>> 1) , pic_height
-1), 0);
363 }else if(FIELD_PICTURE
&& !ref_field_picture
){ // field referencing one field of a frame
364 ff_thread_await_progress((AVFrame
*)ref_pic
, FFMIN(row
*2 + ref_field
, pic_height
-1), 0);
365 }else if(FIELD_PICTURE
){
366 ff_thread_await_progress((AVFrame
*)ref_pic
, FFMIN(row
, pic_height
-1), ref_field
);
368 ff_thread_await_progress((AVFrame
*)ref_pic
, FFMIN(row
, pic_height
-1), 0);
377 * DCT transforms the 16 dc values.
378 * @param qp quantization parameter ??? FIXME
380 static void h264_luma_dc_dct_c(DCTELEM
*block
/*, int qp*/){
381 // const int qmul= dequant_coeff[qp][0];
383 int temp
[16]; //FIXME check if this is a good idea
384 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
385 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
388 const int offset
= y_offset
[i
];
389 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
390 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
391 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
392 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
401 const int offset
= x_offset
[i
];
402 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
403 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
404 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
405 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
407 block
[stride
*0 +offset
]= (z0
+ z3
)>>1;
408 block
[stride
*2 +offset
]= (z1
+ z2
)>>1;
409 block
[stride
*8 +offset
]= (z1
- z2
)>>1;
410 block
[stride
*10+offset
]= (z0
- z3
)>>1;
419 static void chroma_dc_dct_c(DCTELEM
*block
){
420 const int stride
= 16*2;
421 const int xStride
= 16;
424 a
= block
[stride
*0 + xStride
*0];
425 b
= block
[stride
*0 + xStride
*1];
426 c
= block
[stride
*1 + xStride
*0];
427 d
= block
[stride
*1 + xStride
*1];
434 block
[stride
*0 + xStride
*0]= (a
+c
);
435 block
[stride
*0 + xStride
*1]= (e
+b
);
436 block
[stride
*1 + xStride
*0]= (a
-c
);
437 block
[stride
*1 + xStride
*1]= (e
-b
);
441 static inline void mc_dir_part(H264Context
*h
, Picture
*pic
, int n
, int square
, int chroma_height
, int delta
, int list
,
442 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
443 int src_x_offset
, int src_y_offset
,
444 qpel_mc_func
*qpix_op
, h264_chroma_mc_func chroma_op
,
445 int pixel_shift
, int chroma444
){
446 MpegEncContext
* const s
= &h
->s
;
447 const int mx
= h
->mv_cache
[list
][ scan8
[n
] ][0] + src_x_offset
*8;
448 int my
= h
->mv_cache
[list
][ scan8
[n
] ][1] + src_y_offset
*8;
449 const int luma_xy
= (mx
&3) + ((my
&3)<<2);
450 int offset
= ((mx
>>2) << pixel_shift
) + (my
>>2)*h
->mb_linesize
;
451 uint8_t * src_y
= pic
->f
.data
[0] + offset
;
452 uint8_t * src_cb
, * src_cr
;
453 int extra_width
= h
->emu_edge_width
;
454 int extra_height
= h
->emu_edge_height
;
456 const int full_mx
= mx
>>2;
457 const int full_my
= my
>>2;
458 const int pic_width
= 16*s
->mb_width
;
459 const int pic_height
= 16*s
->mb_height
>> MB_FIELD
;
461 if(mx
&7) extra_width
-= 3;
462 if(my
&7) extra_height
-= 3;
464 if( full_mx
< 0-extra_width
465 || full_my
< 0-extra_height
466 || full_mx
+ 16/*FIXME*/ > pic_width
+ extra_width
467 || full_my
+ 16/*FIXME*/ > pic_height
+ extra_height
){
468 s
->dsp
.emulated_edge_mc(s
->edge_emu_buffer
, src_y
- (2 << pixel_shift
) - 2*h
->mb_linesize
, h
->mb_linesize
, 16+5, 16+5/*FIXME*/, full_mx
-2, full_my
-2, pic_width
, pic_height
);
469 src_y
= s
->edge_emu_buffer
+ (2 << pixel_shift
) + 2*h
->mb_linesize
;
473 qpix_op
[luma_xy
](dest_y
, src_y
, h
->mb_linesize
); //FIXME try variable height perhaps?
475 qpix_op
[luma_xy
](dest_y
+ delta
, src_y
+ delta
, h
->mb_linesize
);
478 if(CONFIG_GRAY
&& s
->flags
&CODEC_FLAG_GRAY
) return;
481 src_cb
= pic
->f
.data
[1] + offset
;
483 s
->dsp
.emulated_edge_mc(s
->edge_emu_buffer
, src_cb
- (2 << pixel_shift
) - 2*h
->mb_linesize
, h
->mb_linesize
,
484 16+5, 16+5/*FIXME*/, full_mx
-2, full_my
-2, pic_width
, pic_height
);
485 src_cb
= s
->edge_emu_buffer
+ (2 << pixel_shift
) + 2*h
->mb_linesize
;
487 qpix_op
[luma_xy
](dest_cb
, src_cb
, h
->mb_linesize
); //FIXME try variable height perhaps?
489 qpix_op
[luma_xy
](dest_cb
+ delta
, src_cb
+ delta
, h
->mb_linesize
);
492 src_cr
= pic
->f
.data
[2] + offset
;
494 s
->dsp
.emulated_edge_mc(s
->edge_emu_buffer
, src_cr
- (2 << pixel_shift
) - 2*h
->mb_linesize
, h
->mb_linesize
,
495 16+5, 16+5/*FIXME*/, full_mx
-2, full_my
-2, pic_width
, pic_height
);
496 src_cr
= s
->edge_emu_buffer
+ (2 << pixel_shift
) + 2*h
->mb_linesize
;
498 qpix_op
[luma_xy
](dest_cr
, src_cr
, h
->mb_linesize
); //FIXME try variable height perhaps?
500 qpix_op
[luma_xy
](dest_cr
+ delta
, src_cr
+ delta
, h
->mb_linesize
);
506 // chroma offset when predicting from a field of opposite parity
507 my
+= 2 * ((s
->mb_y
& 1) - (pic
->f
.reference
- 1));
508 emu
|= (my
>>3) < 0 || (my
>>3) + 8 >= (pic_height
>>1);
510 src_cb
= pic
->f
.data
[1] + ((mx
>> 3) << pixel_shift
) + (my
>> 3) * h
->mb_uvlinesize
;
511 src_cr
= pic
->f
.data
[2] + ((mx
>> 3) << pixel_shift
) + (my
>> 3) * h
->mb_uvlinesize
;
514 s
->dsp
.emulated_edge_mc(s
->edge_emu_buffer
, src_cb
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
515 src_cb
= s
->edge_emu_buffer
;
517 chroma_op(dest_cb
, src_cb
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
520 s
->dsp
.emulated_edge_mc(s
->edge_emu_buffer
, src_cr
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
521 src_cr
= s
->edge_emu_buffer
;
523 chroma_op(dest_cr
, src_cr
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
526 static inline void mc_part_std(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
527 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
528 int x_offset
, int y_offset
,
529 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
530 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
531 int list0
, int list1
, int pixel_shift
, int chroma444
){
532 MpegEncContext
* const s
= &h
->s
;
533 qpel_mc_func
*qpix_op
= qpix_put
;
534 h264_chroma_mc_func chroma_op
= chroma_put
;
536 dest_y
+= (2*x_offset
<< pixel_shift
) + 2*y_offset
*h
->mb_linesize
;
538 dest_cb
+= (2*x_offset
<< pixel_shift
) + 2*y_offset
*h
->mb_linesize
;
539 dest_cr
+= (2*x_offset
<< pixel_shift
) + 2*y_offset
*h
->mb_linesize
;
541 dest_cb
+= ( x_offset
<< pixel_shift
) + y_offset
*h
->mb_uvlinesize
;
542 dest_cr
+= ( x_offset
<< pixel_shift
) + y_offset
*h
->mb_uvlinesize
;
544 x_offset
+= 8*s
->mb_x
;
545 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
548 Picture
*ref
= &h
->ref_list
[0][ h
->ref_cache
[0][ scan8
[n
] ] ];
549 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 0,
550 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
551 qpix_op
, chroma_op
, pixel_shift
, chroma444
);
554 chroma_op
= chroma_avg
;
558 Picture
*ref
= &h
->ref_list
[1][ h
->ref_cache
[1][ scan8
[n
] ] ];
559 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 1,
560 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
561 qpix_op
, chroma_op
, pixel_shift
, chroma444
);
565 static inline void mc_part_weighted(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
566 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
567 int x_offset
, int y_offset
,
568 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
569 h264_weight_func luma_weight_op
, h264_weight_func chroma_weight_op
,
570 h264_biweight_func luma_weight_avg
, h264_biweight_func chroma_weight_avg
,
571 int list0
, int list1
, int pixel_shift
, int chroma444
){
572 MpegEncContext
* const s
= &h
->s
;
574 dest_y
+= (2*x_offset
<< pixel_shift
) + 2*y_offset
*h
->mb_linesize
;
576 chroma_weight_avg
= luma_weight_avg
;
577 chroma_weight_op
= luma_weight_op
;
578 dest_cb
+= (2*x_offset
<< pixel_shift
) + 2*y_offset
*h
->mb_linesize
;
579 dest_cr
+= (2*x_offset
<< pixel_shift
) + 2*y_offset
*h
->mb_linesize
;
581 dest_cb
+= ( x_offset
<< pixel_shift
) + y_offset
*h
->mb_uvlinesize
;
582 dest_cr
+= ( x_offset
<< pixel_shift
) + y_offset
*h
->mb_uvlinesize
;
584 x_offset
+= 8*s
->mb_x
;
585 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
588 /* don't optimize for luma-only case, since B-frames usually
589 * use implicit weights => chroma too. */
590 uint8_t *tmp_cb
= s
->obmc_scratchpad
;
591 uint8_t *tmp_cr
= s
->obmc_scratchpad
+ (16 << pixel_shift
);
592 uint8_t *tmp_y
= s
->obmc_scratchpad
+ 16*h
->mb_uvlinesize
;
593 int refn0
= h
->ref_cache
[0][ scan8
[n
] ];
594 int refn1
= h
->ref_cache
[1][ scan8
[n
] ];
596 mc_dir_part(h
, &h
->ref_list
[0][refn0
], n
, square
, chroma_height
, delta
, 0,
597 dest_y
, dest_cb
, dest_cr
,
598 x_offset
, y_offset
, qpix_put
, chroma_put
, pixel_shift
, chroma444
);
599 mc_dir_part(h
, &h
->ref_list
[1][refn1
], n
, square
, chroma_height
, delta
, 1,
600 tmp_y
, tmp_cb
, tmp_cr
,
601 x_offset
, y_offset
, qpix_put
, chroma_put
, pixel_shift
, chroma444
);
603 if(h
->use_weight
== 2){
604 int weight0
= h
->implicit_weight
[refn0
][refn1
][s
->mb_y
&1];
605 int weight1
= 64 - weight0
;
606 luma_weight_avg( dest_y
, tmp_y
, h
-> mb_linesize
, 5, weight0
, weight1
, 0);
607 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
608 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
610 luma_weight_avg(dest_y
, tmp_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
611 h
->luma_weight
[refn0
][0][0] , h
->luma_weight
[refn1
][1][0],
612 h
->luma_weight
[refn0
][0][1] + h
->luma_weight
[refn1
][1][1]);
613 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
614 h
->chroma_weight
[refn0
][0][0][0] , h
->chroma_weight
[refn1
][1][0][0],
615 h
->chroma_weight
[refn0
][0][0][1] + h
->chroma_weight
[refn1
][1][0][1]);
616 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
617 h
->chroma_weight
[refn0
][0][1][0] , h
->chroma_weight
[refn1
][1][1][0],
618 h
->chroma_weight
[refn0
][0][1][1] + h
->chroma_weight
[refn1
][1][1][1]);
621 int list
= list1 ?
1 : 0;
622 int refn
= h
->ref_cache
[list
][ scan8
[n
] ];
623 Picture
*ref
= &h
->ref_list
[list
][refn
];
624 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, list
,
625 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
626 qpix_put
, chroma_put
, pixel_shift
, chroma444
);
628 luma_weight_op(dest_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
629 h
->luma_weight
[refn
][list
][0], h
->luma_weight
[refn
][list
][1]);
630 if(h
->use_weight_chroma
){
631 chroma_weight_op(dest_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
632 h
->chroma_weight
[refn
][list
][0][0], h
->chroma_weight
[refn
][list
][0][1]);
633 chroma_weight_op(dest_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
634 h
->chroma_weight
[refn
][list
][1][0], h
->chroma_weight
[refn
][list
][1][1]);
639 static inline void mc_part(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
640 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
641 int x_offset
, int y_offset
,
642 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
643 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
644 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
,
645 int list0
, int list1
, int pixel_shift
, int chroma444
){
646 if((h
->use_weight
==2 && list0
&& list1
647 && (h
->implicit_weight
[ h
->ref_cache
[0][scan8
[n
]] ][ h
->ref_cache
[1][scan8
[n
]] ][h
->s
.mb_y
&1] != 32))
649 mc_part_weighted(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
650 x_offset
, y_offset
, qpix_put
, chroma_put
,
651 weight_op
[0], weight_op
[3], weight_avg
[0],
652 weight_avg
[3], list0
, list1
, pixel_shift
, chroma444
);
654 mc_part_std(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
655 x_offset
, y_offset
, qpix_put
, chroma_put
, qpix_avg
,
656 chroma_avg
, list0
, list1
, pixel_shift
, chroma444
);
659 static inline void prefetch_motion(H264Context
*h
, int list
, int pixel_shift
, int chroma444
){
660 /* fetch pixels for estimated mv 4 macroblocks ahead
661 * optimized for 64byte cache lines */
662 MpegEncContext
* const s
= &h
->s
;
663 const int refn
= h
->ref_cache
[list
][scan8
[0]];
665 const int mx
= (h
->mv_cache
[list
][scan8
[0]][0]>>2) + 16*s
->mb_x
+ 8;
666 const int my
= (h
->mv_cache
[list
][scan8
[0]][1]>>2) + 16*s
->mb_y
;
667 uint8_t **src
= h
->ref_list
[list
][refn
].f
.data
;
668 int off
= (mx
<< pixel_shift
) + (my
+ (s
->mb_x
&3)*4)*h
->mb_linesize
+ (64 << pixel_shift
);
669 s
->dsp
.prefetch(src
[0]+off
, s
->linesize
, 4);
671 s
->dsp
.prefetch(src
[1]+off
, s
->linesize
, 4);
672 s
->dsp
.prefetch(src
[2]+off
, s
->linesize
, 4);
674 off
= ((mx
>>1) << pixel_shift
) + ((my
>>1) + (s
->mb_x
&7))*s
->uvlinesize
+ (64 << pixel_shift
);
675 s
->dsp
.prefetch(src
[1]+off
, src
[2]-src
[1], 2);
680 static av_always_inline
void hl_motion(H264Context
*h
, uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
681 qpel_mc_func (*qpix_put
)[16], h264_chroma_mc_func (*chroma_put
),
682 qpel_mc_func (*qpix_avg
)[16], h264_chroma_mc_func (*chroma_avg
),
683 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
,
684 int pixel_shift
, int chroma444
){
685 MpegEncContext
* const s
= &h
->s
;
686 const int mb_xy
= h
->mb_xy
;
687 const int mb_type
= s
->current_picture
.f
.mb_type
[mb_xy
];
689 assert(IS_INTER(mb_type
));
691 if(HAVE_PTHREADS
&& (s
->avctx
->active_thread_type
& FF_THREAD_FRAME
))
693 prefetch_motion(h
, 0, pixel_shift
, chroma444
);
695 if(IS_16X16(mb_type
)){
696 mc_part(h
, 0, 1, 8, 0, dest_y
, dest_cb
, dest_cr
, 0, 0,
697 qpix_put
[0], chroma_put
[0], qpix_avg
[0], chroma_avg
[0],
698 weight_op
, weight_avg
,
699 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1),
700 pixel_shift
, chroma444
);
701 }else if(IS_16X8(mb_type
)){
702 mc_part(h
, 0, 0, 4, 8 << pixel_shift
, dest_y
, dest_cb
, dest_cr
, 0, 0,
703 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
704 &weight_op
[1], &weight_avg
[1],
705 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1),
706 pixel_shift
, chroma444
);
707 mc_part(h
, 8, 0, 4, 8 << pixel_shift
, dest_y
, dest_cb
, dest_cr
, 0, 4,
708 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
709 &weight_op
[1], &weight_avg
[1],
710 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1),
711 pixel_shift
, chroma444
);
712 }else if(IS_8X16(mb_type
)){
713 mc_part(h
, 0, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 0, 0,
714 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
715 &weight_op
[2], &weight_avg
[2],
716 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1),
717 pixel_shift
, chroma444
);
718 mc_part(h
, 4, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 4, 0,
719 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
720 &weight_op
[2], &weight_avg
[2],
721 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1),
722 pixel_shift
, chroma444
);
726 assert(IS_8X8(mb_type
));
729 const int sub_mb_type
= h
->sub_mb_type
[i
];
731 int x_offset
= (i
&1)<<2;
732 int y_offset
= (i
&2)<<1;
734 if(IS_SUB_8X8(sub_mb_type
)){
735 mc_part(h
, n
, 1, 4, 0, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
736 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
737 &weight_op
[3], &weight_avg
[3],
738 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1),
739 pixel_shift
, chroma444
);
740 }else if(IS_SUB_8X4(sub_mb_type
)){
741 mc_part(h
, n
, 0, 2, 4 << pixel_shift
, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
742 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
743 &weight_op
[4], &weight_avg
[4],
744 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1),
745 pixel_shift
, chroma444
);
746 mc_part(h
, n
+2, 0, 2, 4 << pixel_shift
, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
+2,
747 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
748 &weight_op
[4], &weight_avg
[4],
749 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1),
750 pixel_shift
, chroma444
);
751 }else if(IS_SUB_4X8(sub_mb_type
)){
752 mc_part(h
, n
, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
753 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
754 &weight_op
[5], &weight_avg
[5],
755 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1),
756 pixel_shift
, chroma444
);
757 mc_part(h
, n
+1, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
+2, y_offset
,
758 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
759 &weight_op
[5], &weight_avg
[5],
760 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1),
761 pixel_shift
, chroma444
);
764 assert(IS_SUB_4X4(sub_mb_type
));
766 int sub_x_offset
= x_offset
+ 2*(j
&1);
767 int sub_y_offset
= y_offset
+ (j
&2);
768 mc_part(h
, n
+j
, 1, 2, 0, dest_y
, dest_cb
, dest_cr
, sub_x_offset
, sub_y_offset
,
769 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
770 &weight_op
[6], &weight_avg
[6],
771 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1),
772 pixel_shift
, chroma444
);
778 prefetch_motion(h
, 1, pixel_shift
, chroma444
);
781 static void free_tables(H264Context
*h
, int free_rbsp
){
785 av_freep(&h
->intra4x4_pred_mode
);
786 av_freep(&h
->chroma_pred_mode_table
);
787 av_freep(&h
->cbp_table
);
788 av_freep(&h
->mvd_table
[0]);
789 av_freep(&h
->mvd_table
[1]);
790 av_freep(&h
->direct_table
);
791 av_freep(&h
->non_zero_count
);
792 av_freep(&h
->slice_table_base
);
793 h
->slice_table
= NULL
;
794 av_freep(&h
->list_counts
);
796 av_freep(&h
->mb2b_xy
);
797 av_freep(&h
->mb2br_xy
);
799 for(i
= 0; i
< MAX_THREADS
; i
++) {
800 hx
= h
->thread_context
[i
];
802 av_freep(&hx
->top_borders
[1]);
803 av_freep(&hx
->top_borders
[0]);
804 av_freep(&hx
->s
.obmc_scratchpad
);
806 av_freep(&hx
->rbsp_buffer
[1]);
807 av_freep(&hx
->rbsp_buffer
[0]);
808 hx
->rbsp_buffer_size
[0] = 0;
809 hx
->rbsp_buffer_size
[1] = 0;
811 if (i
) av_freep(&h
->thread_context
[i
]);
815 static void init_dequant8_coeff_table(H264Context
*h
){
817 const int max_qp
= 51 + 6*(h
->sps
.bit_depth_luma
-8);
820 h
->dequant8_coeff
[i
] = h
->dequant8_buffer
[i
];
822 if(!memcmp(h
->pps
.scaling_matrix8
[j
], h
->pps
.scaling_matrix8
[i
], 64*sizeof(uint8_t))){
823 h
->dequant8_coeff
[i
] = h
->dequant8_buffer
[j
];
830 for(q
=0; q
<max_qp
+1; q
++){
834 h
->dequant8_coeff
[i
][q
][(x
>>3)|((x
&7)<<3)] =
835 ((uint32_t)dequant8_coeff_init
[idx
][ dequant8_coeff_init_scan
[((x
>>1)&12) | (x
&3)] ] *
836 h
->pps
.scaling_matrix8
[i
][x
]) << shift
;
841 static void init_dequant4_coeff_table(H264Context
*h
){
843 const int max_qp
= 51 + 6*(h
->sps
.bit_depth_luma
-8);
845 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[i
];
847 if(!memcmp(h
->pps
.scaling_matrix4
[j
], h
->pps
.scaling_matrix4
[i
], 16*sizeof(uint8_t))){
848 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[j
];
855 for(q
=0; q
<max_qp
+1; q
++){
856 int shift
= div6
[q
] + 2;
859 h
->dequant4_coeff
[i
][q
][(x
>>2)|((x
<<2)&0xF)] =
860 ((uint32_t)dequant4_coeff_init
[idx
][(x
&1) + ((x
>>2)&1)] *
861 h
->pps
.scaling_matrix4
[i
][x
]) << shift
;
866 static void init_dequant_tables(H264Context
*h
){
868 init_dequant4_coeff_table(h
);
869 if(h
->pps
.transform_8x8_mode
)
870 init_dequant8_coeff_table(h
);
871 if(h
->sps
.transform_bypass
){
874 h
->dequant4_coeff
[i
][0][x
] = 1<<6;
875 if(h
->pps
.transform_8x8_mode
)
878 h
->dequant8_coeff
[i
][0][x
] = 1<<6;
883 int ff_h264_alloc_tables(H264Context
*h
){
884 MpegEncContext
* const s
= &h
->s
;
885 const int big_mb_num
= s
->mb_stride
* (s
->mb_height
+1);
886 const int row_mb_num
= 2*s
->mb_stride
*s
->avctx
->thread_count
;
889 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->intra4x4_pred_mode
, row_mb_num
* 8 * sizeof(uint8_t), fail
)
891 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->non_zero_count
, big_mb_num
* 48 * sizeof(uint8_t), fail
)
892 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->slice_table_base
, (big_mb_num
+s
->mb_stride
) * sizeof(*h
->slice_table_base
), fail
)
893 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->cbp_table
, big_mb_num
* sizeof(uint16_t), fail
)
895 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->chroma_pred_mode_table
, big_mb_num
* sizeof(uint8_t), fail
)
896 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->mvd_table
[0], 16*row_mb_num
* sizeof(uint8_t), fail
);
897 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->mvd_table
[1], 16*row_mb_num
* sizeof(uint8_t), fail
);
898 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->direct_table
, 4*big_mb_num
* sizeof(uint8_t) , fail
);
899 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->list_counts
, big_mb_num
* sizeof(uint8_t), fail
)
901 memset(h
->slice_table_base
, -1, (big_mb_num
+s
->mb_stride
) * sizeof(*h
->slice_table_base
));
902 h
->slice_table
= h
->slice_table_base
+ s
->mb_stride
*2 + 1;
904 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->mb2b_xy
, big_mb_num
* sizeof(uint32_t), fail
);
905 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->mb2br_xy
, big_mb_num
* sizeof(uint32_t), fail
);
906 for(y
=0; y
<s
->mb_height
; y
++){
907 for(x
=0; x
<s
->mb_width
; x
++){
908 const int mb_xy
= x
+ y
*s
->mb_stride
;
909 const int b_xy
= 4*x
+ 4*y
*h
->b_stride
;
911 h
->mb2b_xy
[mb_xy
]= b_xy
;
912 h
->mb2br_xy
[mb_xy
]= 8*(FMO ? mb_xy
: (mb_xy
% (2*s
->mb_stride
)));
916 s
->obmc_scratchpad
= NULL
;
918 if(!h
->dequant4_coeff
[0])
919 init_dequant_tables(h
);
928 * Mimic alloc_tables(), but for every context thread.
930 static void clone_tables(H264Context
*dst
, H264Context
*src
, int i
){
931 MpegEncContext
* const s
= &src
->s
;
932 dst
->intra4x4_pred_mode
= src
->intra4x4_pred_mode
+ i
*8*2*s
->mb_stride
;
933 dst
->non_zero_count
= src
->non_zero_count
;
934 dst
->slice_table
= src
->slice_table
;
935 dst
->cbp_table
= src
->cbp_table
;
936 dst
->mb2b_xy
= src
->mb2b_xy
;
937 dst
->mb2br_xy
= src
->mb2br_xy
;
938 dst
->chroma_pred_mode_table
= src
->chroma_pred_mode_table
;
939 dst
->mvd_table
[0] = src
->mvd_table
[0] + i
*8*2*s
->mb_stride
;
940 dst
->mvd_table
[1] = src
->mvd_table
[1] + i
*8*2*s
->mb_stride
;
941 dst
->direct_table
= src
->direct_table
;
942 dst
->list_counts
= src
->list_counts
;
944 dst
->s
.obmc_scratchpad
= NULL
;
945 ff_h264_pred_init(&dst
->hpc
, src
->s
.codec_id
, src
->sps
.bit_depth_luma
);
950 * Allocate buffers which are not shared amongst multiple threads.
952 static int context_init(H264Context
*h
){
953 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->top_borders
[0], h
->s
.mb_width
* 16*3 * sizeof(uint8_t)*2, fail
)
954 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->top_borders
[1], h
->s
.mb_width
* 16*3 * sizeof(uint8_t)*2, fail
)
956 h
->ref_cache
[0][scan8
[5 ]+1] = h
->ref_cache
[0][scan8
[7 ]+1] = h
->ref_cache
[0][scan8
[13]+1] =
957 h
->ref_cache
[1][scan8
[5 ]+1] = h
->ref_cache
[1][scan8
[7 ]+1] = h
->ref_cache
[1][scan8
[13]+1] = PART_NOT_AVAILABLE
;
961 return -1; // free_tables will clean up for us
964 static int decode_nal_units(H264Context
*h
, const uint8_t *buf
, int buf_size
);
966 static av_cold
void common_init(H264Context
*h
){
967 MpegEncContext
* const s
= &h
->s
;
969 s
->width
= s
->avctx
->width
;
970 s
->height
= s
->avctx
->height
;
971 s
->codec_id
= s
->avctx
->codec
->id
;
973 ff_h264dsp_init(&h
->h264dsp
, 8);
974 ff_h264_pred_init(&h
->hpc
, s
->codec_id
, 8);
976 h
->dequant_coeff_pps
= -1;
977 s
->unrestricted_mv
=1;
980 dsputil_init(&s
->dsp
, s
->avctx
); // needed so that idct permutation is known early
982 memset(h
->pps
.scaling_matrix4
, 16, 6*16*sizeof(uint8_t));
983 memset(h
->pps
.scaling_matrix8
, 16, 2*64*sizeof(uint8_t));
986 int ff_h264_decode_extradata(H264Context
*h
)
988 AVCodecContext
*avctx
= h
->s
.avctx
;
990 if(avctx
->extradata
[0] == 1){
992 unsigned char *p
= avctx
->extradata
;
996 if(avctx
->extradata_size
< 7) {
997 av_log(avctx
, AV_LOG_ERROR
, "avcC too short\n");
1000 /* sps and pps in the avcC always have length coded with 2 bytes,
1001 so put a fake nal_length_size = 2 while parsing them */
1002 h
->nal_length_size
= 2;
1003 // Decode sps from avcC
1004 cnt
= *(p
+5) & 0x1f; // Number of sps
1006 for (i
= 0; i
< cnt
; i
++) {
1007 nalsize
= AV_RB16(p
) + 2;
1008 if (p
- avctx
->extradata
+ nalsize
> avctx
->extradata_size
)
1010 if(decode_nal_units(h
, p
, nalsize
) < 0) {
1011 av_log(avctx
, AV_LOG_ERROR
, "Decoding sps %d from avcC failed\n", i
);
1016 // Decode pps from avcC
1017 cnt
= *(p
++); // Number of pps
1018 for (i
= 0; i
< cnt
; i
++) {
1019 nalsize
= AV_RB16(p
) + 2;
1020 if (p
- avctx
->extradata
+ nalsize
> avctx
->extradata_size
)
1022 if (decode_nal_units(h
, p
, nalsize
) < 0) {
1023 av_log(avctx
, AV_LOG_ERROR
, "Decoding pps %d from avcC failed\n", i
);
1028 // Now store right nal length size, that will be use to parse all other nals
1029 h
->nal_length_size
= (avctx
->extradata
[4] & 0x03) + 1;
1032 if(decode_nal_units(h
, avctx
->extradata
, avctx
->extradata_size
) < 0)
1038 av_cold
int ff_h264_decode_init(AVCodecContext
*avctx
){
1039 H264Context
*h
= avctx
->priv_data
;
1040 MpegEncContext
* const s
= &h
->s
;
1042 MPV_decode_defaults(s
);
1047 s
->out_format
= FMT_H264
;
1048 s
->workaround_bugs
= avctx
->workaround_bugs
;
1051 // s->decode_mb= ff_h263_decode_mb;
1052 s
->quarter_sample
= 1;
1053 if(!avctx
->has_b_frames
)
1056 avctx
->chroma_sample_location
= AVCHROMA_LOC_LEFT
;
1058 ff_h264_decode_init_vlc();
1061 h
->sps
.bit_depth_luma
= avctx
->bits_per_raw_sample
= 8;
1063 h
->thread_context
[0] = h
;
1064 h
->outputed_poc
= h
->next_outputed_poc
= INT_MIN
;
1065 h
->prev_poc_msb
= 1<<16;
1067 ff_h264_reset_sei(h
);
1068 if(avctx
->codec_id
== CODEC_ID_H264
){
1069 if(avctx
->ticks_per_frame
== 1){
1070 s
->avctx
->time_base
.den
*=2;
1072 avctx
->ticks_per_frame
= 2;
1075 if(avctx
->extradata_size
> 0 && avctx
->extradata
&&
1076 ff_h264_decode_extradata(h
))
1079 if(h
->sps
.bitstream_restriction_flag
&& s
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
){
1080 s
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
1087 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
1088 static void copy_picture_range(Picture
**to
, Picture
**from
, int count
, MpegEncContext
*new_base
, MpegEncContext
*old_base
)
1092 for (i
=0; i
<count
; i
++){
1093 assert((IN_RANGE(from
[i
], old_base
, sizeof(*old_base
)) ||
1094 IN_RANGE(from
[i
], old_base
->picture
, sizeof(Picture
) * old_base
->picture_count
) ||
1096 to
[i
] = REBASE_PICTURE(from
[i
], new_base
, old_base
);
1100 static void copy_parameter_set(void **to
, void **from
, int count
, int size
)
1104 for (i
=0; i
<count
; i
++){
1105 if (to
[i
] && !from
[i
]) av_freep(&to
[i
]);
1106 else if (from
[i
] && !to
[i
]) to
[i
] = av_malloc(size
);
1108 if (from
[i
]) memcpy(to
[i
], from
[i
], size
);
1112 static int decode_init_thread_copy(AVCodecContext
*avctx
){
1113 H264Context
*h
= avctx
->priv_data
;
1115 if (!avctx
->is_copy
) return 0;
1116 memset(h
->sps_buffers
, 0, sizeof(h
->sps_buffers
));
1117 memset(h
->pps_buffers
, 0, sizeof(h
->pps_buffers
));
1122 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
1123 static int decode_update_thread_context(AVCodecContext
*dst
, const AVCodecContext
*src
){
1124 H264Context
*h
= dst
->priv_data
, *h1
= src
->priv_data
;
1125 MpegEncContext
* const s
= &h
->s
, * const s1
= &h1
->s
;
1126 int inited
= s
->context_initialized
, err
;
1129 if(dst
== src
|| !s1
->context_initialized
) return 0;
1131 err
= ff_mpeg_update_thread_context(dst
, src
);
1134 //FIXME handle width/height changing
1136 for(i
= 0; i
< MAX_SPS_COUNT
; i
++)
1137 av_freep(h
->sps_buffers
+ i
);
1139 for(i
= 0; i
< MAX_PPS_COUNT
; i
++)
1140 av_freep(h
->pps_buffers
+ i
);
1142 memcpy(&h
->s
+ 1, &h1
->s
+ 1, sizeof(H264Context
) - sizeof(MpegEncContext
)); //copy all fields after MpegEnc
1143 memset(h
->sps_buffers
, 0, sizeof(h
->sps_buffers
));
1144 memset(h
->pps_buffers
, 0, sizeof(h
->pps_buffers
));
1145 if (ff_h264_alloc_tables(h
) < 0) {
1146 av_log(dst
, AV_LOG_ERROR
, "Could not allocate memory for h264\n");
1147 return AVERROR(ENOMEM
);
1152 h
->rbsp_buffer
[i
] = NULL
;
1153 h
->rbsp_buffer_size
[i
] = 0;
1156 h
->thread_context
[0] = h
;
1158 // frame_start may not be called for the next thread (if it's decoding a bottom field)
1159 // so this has to be allocated here
1160 h
->s
.obmc_scratchpad
= av_malloc(16*6*s
->linesize
);
1162 s
->dsp
.clear_blocks(h
->mb
);
1163 s
->dsp
.clear_blocks(h
->mb
+(24*16<<h
->pixel_shift
));
1166 //extradata/NAL handling
1167 h
->is_avc
= h1
->is_avc
;
1170 copy_parameter_set((void**)h
->sps_buffers
, (void**)h1
->sps_buffers
, MAX_SPS_COUNT
, sizeof(SPS
));
1172 copy_parameter_set((void**)h
->pps_buffers
, (void**)h1
->pps_buffers
, MAX_PPS_COUNT
, sizeof(PPS
));
1175 //Dequantization matrices
1176 //FIXME these are big - can they be only copied when PPS changes?
1177 copy_fields(h
, h1
, dequant4_buffer
, dequant4_coeff
);
1180 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[0] + (h1
->dequant4_coeff
[i
] - h1
->dequant4_buffer
[0]);
1183 h
->dequant8_coeff
[i
] = h
->dequant8_buffer
[0] + (h1
->dequant8_coeff
[i
] - h1
->dequant8_buffer
[0]);
1185 h
->dequant_coeff_pps
= h1
->dequant_coeff_pps
;
1188 copy_fields(h
, h1
, poc_lsb
, redundant_pic_count
);
1191 copy_fields(h
, h1
, ref_count
, list_count
);
1192 copy_fields(h
, h1
, ref_list
, intra_gb
);
1193 copy_fields(h
, h1
, short_ref
, cabac_init_idc
);
1195 copy_picture_range(h
->short_ref
, h1
->short_ref
, 32, s
, s1
);
1196 copy_picture_range(h
->long_ref
, h1
->long_ref
, 32, s
, s1
);
1197 copy_picture_range(h
->delayed_pic
, h1
->delayed_pic
, MAX_DELAYED_PIC_COUNT
+2, s
, s1
);
1199 h
->last_slice_type
= h1
->last_slice_type
;
1201 if(!s
->current_picture_ptr
) return 0;
1204 err
= ff_h264_execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
1205 h
->prev_poc_msb
= h
->poc_msb
;
1206 h
->prev_poc_lsb
= h
->poc_lsb
;
1208 h
->prev_frame_num_offset
= h
->frame_num_offset
;
1209 h
->prev_frame_num
= h
->frame_num
;
1210 h
->outputed_poc
= h
->next_outputed_poc
;
1215 int ff_h264_frame_start(H264Context
*h
){
1216 MpegEncContext
* const s
= &h
->s
;
1218 const int pixel_shift
= h
->pixel_shift
;
1219 int thread_count
= (s
->avctx
->active_thread_type
& FF_THREAD_SLICE
) ? s
->avctx
->thread_count
: 1;
1221 if(MPV_frame_start(s
, s
->avctx
) < 0)
1223 ff_er_frame_start(s
);
1225 * MPV_frame_start uses pict_type to derive key_frame.
1226 * This is incorrect for H.264; IDR markings must be used.
1227 * Zero here; IDR markings per slice in frame or fields are ORed in later.
1228 * See decode_nal_units().
1230 s
->current_picture_ptr
->f
.key_frame
= 0;
1231 s
->current_picture_ptr
->mmco_reset
= 0;
1233 assert(s
->linesize
&& s
->uvlinesize
);
1235 for(i
=0; i
<16; i
++){
1236 h
->block_offset
[i
]= (4*((scan8
[i
] - scan8
[0])&7) << pixel_shift
) + 4*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
1237 h
->block_offset
[48+i
]= (4*((scan8
[i
] - scan8
[0])&7) << pixel_shift
) + 8*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
1239 for(i
=0; i
<16; i
++){
1240 h
->block_offset
[16+i
]=
1241 h
->block_offset
[32+i
]= (4*((scan8
[i
] - scan8
[0])&7) << pixel_shift
) + 4*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
1242 h
->block_offset
[48+16+i
]=
1243 h
->block_offset
[48+32+i
]= (4*((scan8
[i
] - scan8
[0])&7) << pixel_shift
) + 8*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
1246 /* can't be in alloc_tables because linesize isn't known there.
1247 * FIXME: redo bipred weight to not require extra buffer? */
1248 for(i
= 0; i
< thread_count
; i
++)
1249 if(h
->thread_context
[i
] && !h
->thread_context
[i
]->s
.obmc_scratchpad
)
1250 h
->thread_context
[i
]->s
.obmc_scratchpad
= av_malloc(16*6*s
->linesize
);
1252 /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
1253 memset(h
->slice_table
, -1, (s
->mb_height
*s
->mb_stride
-1) * sizeof(*h
->slice_table
));
1255 // s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.f.reference /*|| h->contains_intra*/ || 1;
1257 // We mark the current picture as non-reference after allocating it, so
1258 // that if we break out due to an error it can be released automatically
1259 // in the next MPV_frame_start().
1260 // SVQ3 as well as most other codecs have only last/next/current and thus
1261 // get released even with set reference, besides SVQ3 and others do not
1262 // mark frames as reference later "naturally".
1263 if(s
->codec_id
!= CODEC_ID_SVQ3
)
1264 s
->current_picture_ptr
->f
.reference
= 0;
1266 s
->current_picture_ptr
->field_poc
[0]=
1267 s
->current_picture_ptr
->field_poc
[1]= INT_MAX
;
1269 h
->next_output_pic
= NULL
;
1271 assert(s
->current_picture_ptr
->long_ref
==0);
1277 * Run setup operations that must be run after slice header decoding.
1278 * This includes finding the next displayed frame.
1280 * @param h h264 master context
1281 * @param setup_finished enough NALs have been read that we can call
1282 * ff_thread_finish_setup()
1284 static void decode_postinit(H264Context
*h
, int setup_finished
){
1285 MpegEncContext
* const s
= &h
->s
;
1286 Picture
*out
= s
->current_picture_ptr
;
1287 Picture
*cur
= s
->current_picture_ptr
;
1288 int i
, pics
, out_of_order
, out_idx
;
1290 s
->current_picture_ptr
->f
.qscale_type
= FF_QSCALE_TYPE_H264
;
1291 s
->current_picture_ptr
->f
.pict_type
= s
->pict_type
;
1293 if (h
->next_output_pic
) return;
1295 if (cur
->field_poc
[0]==INT_MAX
|| cur
->field_poc
[1]==INT_MAX
) {
1296 //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
1297 //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
1298 //to find this yet, so we assume the worst for now.
1299 //if (setup_finished)
1300 // ff_thread_finish_setup(s->avctx);
1304 cur
->f
.interlaced_frame
= 0;
1305 cur
->f
.repeat_pict
= 0;
1307 /* Signal interlacing information externally. */
1308 /* Prioritize picture timing SEI information over used decoding process if it exists. */
1310 if(h
->sps
.pic_struct_present_flag
){
1311 switch (h
->sei_pic_struct
)
1313 case SEI_PIC_STRUCT_FRAME
:
1315 case SEI_PIC_STRUCT_TOP_FIELD
:
1316 case SEI_PIC_STRUCT_BOTTOM_FIELD
:
1317 cur
->f
.interlaced_frame
= 1;
1319 case SEI_PIC_STRUCT_TOP_BOTTOM
:
1320 case SEI_PIC_STRUCT_BOTTOM_TOP
:
1321 if (FIELD_OR_MBAFF_PICTURE
)
1322 cur
->f
.interlaced_frame
= 1;
1324 // try to flag soft telecine progressive
1325 cur
->f
.interlaced_frame
= h
->prev_interlaced_frame
;
1327 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP
:
1328 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM
:
1329 // Signal the possibility of telecined film externally (pic_struct 5,6)
1330 // From these hints, let the applications decide if they apply deinterlacing.
1331 cur
->f
.repeat_pict
= 1;
1333 case SEI_PIC_STRUCT_FRAME_DOUBLING
:
1334 // Force progressive here, as doubling interlaced frame is a bad idea.
1335 cur
->f
.repeat_pict
= 2;
1337 case SEI_PIC_STRUCT_FRAME_TRIPLING
:
1338 cur
->f
.repeat_pict
= 4;
1342 if ((h
->sei_ct_type
& 3) && h
->sei_pic_struct
<= SEI_PIC_STRUCT_BOTTOM_TOP
)
1343 cur
->f
.interlaced_frame
= (h
->sei_ct_type
& (1 << 1)) != 0;
1345 /* Derive interlacing flag from used decoding process. */
1346 cur
->f
.interlaced_frame
= FIELD_OR_MBAFF_PICTURE
;
1348 h
->prev_interlaced_frame
= cur
->f
.interlaced_frame
;
1350 if (cur
->field_poc
[0] != cur
->field_poc
[1]){
1351 /* Derive top_field_first from field pocs. */
1352 cur
->f
.top_field_first
= cur
->field_poc
[0] < cur
->field_poc
[1];
1354 if (cur
->f
.interlaced_frame
|| h
->sps
.pic_struct_present_flag
) {
1355 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
1356 if(h
->sei_pic_struct
== SEI_PIC_STRUCT_TOP_BOTTOM
1357 || h
->sei_pic_struct
== SEI_PIC_STRUCT_TOP_BOTTOM_TOP
)
1358 cur
->f
.top_field_first
= 1;
1360 cur
->f
.top_field_first
= 0;
1362 /* Most likely progressive */
1363 cur
->f
.top_field_first
= 0;
1367 //FIXME do something with unavailable reference frames
1369 /* Sort B-frames into display order */
1371 if(h
->sps
.bitstream_restriction_flag
1372 && s
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
){
1373 s
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
1377 if( s
->avctx
->strict_std_compliance
>= FF_COMPLIANCE_STRICT
1378 && !h
->sps
.bitstream_restriction_flag
){
1379 s
->avctx
->has_b_frames
= MAX_DELAYED_PIC_COUNT
;
1384 while(h
->delayed_pic
[pics
]) pics
++;
1386 assert(pics
<= MAX_DELAYED_PIC_COUNT
);
1388 h
->delayed_pic
[pics
++] = cur
;
1389 if (cur
->f
.reference
== 0)
1390 cur
->f
.reference
= DELAYED_PIC_REF
;
1392 out
= h
->delayed_pic
[0];
1394 for (i
= 1; h
->delayed_pic
[i
] && !h
->delayed_pic
[i
]->f
.key_frame
&& !h
->delayed_pic
[i
]->mmco_reset
; i
++)
1395 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
1396 out
= h
->delayed_pic
[i
];
1399 if (s
->avctx
->has_b_frames
== 0 && (h
->delayed_pic
[0]->f
.key_frame
|| h
->delayed_pic
[0]->mmco_reset
))
1400 h
->next_outputed_poc
= INT_MIN
;
1401 out_of_order
= out
->poc
< h
->next_outputed_poc
;
1403 if(h
->sps
.bitstream_restriction_flag
&& s
->avctx
->has_b_frames
>= h
->sps
.num_reorder_frames
)
1405 else if((out_of_order
&& pics
-1 == s
->avctx
->has_b_frames
&& s
->avctx
->has_b_frames
< MAX_DELAYED_PIC_COUNT
)
1407 ((h
->next_outputed_poc
!= INT_MIN
&& out
->poc
> h
->next_outputed_poc
+ 2)
1408 || cur
->f
.pict_type
== AV_PICTURE_TYPE_B
)))
1411 s
->avctx
->has_b_frames
++;
1414 if(out_of_order
|| pics
> s
->avctx
->has_b_frames
){
1415 out
->f
.reference
&= ~DELAYED_PIC_REF
;
1416 out
->owner2
= s
; // for frame threading, the owner must be the second field's thread
1417 // or else the first thread can release the picture and reuse it unsafely
1418 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
1419 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
1421 if(!out_of_order
&& pics
> s
->avctx
->has_b_frames
){
1422 h
->next_output_pic
= out
;
1423 if (out_idx
== 0 && h
->delayed_pic
[0] && (h
->delayed_pic
[0]->f
.key_frame
|| h
->delayed_pic
[0]->mmco_reset
)) {
1424 h
->next_outputed_poc
= INT_MIN
;
1426 h
->next_outputed_poc
= out
->poc
;
1428 av_log(s
->avctx
, AV_LOG_DEBUG
, "no picture\n");
1432 ff_thread_finish_setup(s
->avctx
);
1435 static av_always_inline
void backup_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int chroma444
, int simple
){
1436 MpegEncContext
* const s
= &h
->s
;
1437 uint8_t *top_border
;
1439 const int pixel_shift
= h
->pixel_shift
;
1442 src_cb
-= uvlinesize
;
1443 src_cr
-= uvlinesize
;
1445 if(!simple
&& FRAME_MBAFF
){
1448 top_border
= h
->top_borders
[0][s
->mb_x
];
1449 AV_COPY128(top_border
, src_y
+ 15*linesize
);
1451 AV_COPY128(top_border
+16, src_y
+15*linesize
+16);
1452 if(simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
1455 AV_COPY128(top_border
+32, src_cb
+ 15*uvlinesize
);
1456 AV_COPY128(top_border
+48, src_cb
+ 15*uvlinesize
+16);
1457 AV_COPY128(top_border
+64, src_cr
+ 15*uvlinesize
);
1458 AV_COPY128(top_border
+80, src_cr
+ 15*uvlinesize
+16);
1460 AV_COPY128(top_border
+16, src_cb
+ 15*uvlinesize
);
1461 AV_COPY128(top_border
+32, src_cr
+ 15*uvlinesize
);
1465 AV_COPY128(top_border
+32, src_cb
+7*uvlinesize
);
1466 AV_COPY128(top_border
+48, src_cr
+7*uvlinesize
);
1468 AV_COPY64(top_border
+16, src_cb
+7*uvlinesize
);
1469 AV_COPY64(top_border
+24, src_cr
+7*uvlinesize
);
1480 top_border
= h
->top_borders
[top_idx
][s
->mb_x
];
1481 // There are two lines saved, the line above the the top macroblock of a pair,
1482 // and the line above the bottom macroblock
1483 AV_COPY128(top_border
, src_y
+ 16*linesize
);
1485 AV_COPY128(top_border
+16, src_y
+16*linesize
+16);
1487 if(simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
1490 AV_COPY128(top_border
+32, src_cb
+ 16*linesize
);
1491 AV_COPY128(top_border
+48, src_cb
+ 16*linesize
+16);
1492 AV_COPY128(top_border
+64, src_cr
+ 16*linesize
);
1493 AV_COPY128(top_border
+80, src_cr
+ 16*linesize
+16);
1495 AV_COPY128(top_border
+16, src_cb
+ 16*linesize
);
1496 AV_COPY128(top_border
+32, src_cr
+ 16*linesize
);
1500 AV_COPY128(top_border
+32, src_cb
+8*uvlinesize
);
1501 AV_COPY128(top_border
+48, src_cr
+8*uvlinesize
);
1503 AV_COPY64(top_border
+16, src_cb
+8*uvlinesize
);
1504 AV_COPY64(top_border
+24, src_cr
+8*uvlinesize
);
1510 static av_always_inline
void xchg_mb_border(H264Context
*h
, uint8_t *src_y
,
1511 uint8_t *src_cb
, uint8_t *src_cr
,
1512 int linesize
, int uvlinesize
,
1513 int xchg
, int chroma444
,
1514 int simple
, int pixel_shift
){
1515 MpegEncContext
* const s
= &h
->s
;
1516 int deblock_topleft
;
1519 uint8_t *top_border_m1
;
1520 uint8_t *top_border
;
1522 if(!simple
&& FRAME_MBAFF
){
1527 top_idx
= MB_MBAFF ?
0 : 1;
1531 if(h
->deblocking_filter
== 2) {
1532 deblock_topleft
= h
->slice_table
[h
->mb_xy
- 1 - s
->mb_stride
] == h
->slice_num
;
1533 deblock_top
= h
->top_type
;
1535 deblock_topleft
= (s
->mb_x
> 0);
1536 deblock_top
= (s
->mb_y
> !!MB_FIELD
);
1539 src_y
-= linesize
+ 1 + pixel_shift
;
1540 src_cb
-= uvlinesize
+ 1 + pixel_shift
;
1541 src_cr
-= uvlinesize
+ 1 + pixel_shift
;
1543 top_border_m1
= h
->top_borders
[top_idx
][s
->mb_x
-1];
1544 top_border
= h
->top_borders
[top_idx
][s
->mb_x
];
1546 #define XCHG(a,b,xchg)\
1549 AV_SWAP64(b+0,a+0);\
1550 AV_SWAP64(b+8,a+8);\
1555 if (xchg) AV_SWAP64(b,a);\
1556 else AV_COPY64(b,a);
1559 if(deblock_topleft
){
1560 XCHG(top_border_m1
+ (8 << pixel_shift
), src_y
- (7 << pixel_shift
), 1);
1562 XCHG(top_border
+ (0 << pixel_shift
), src_y
+ (1 << pixel_shift
), xchg
);
1563 XCHG(top_border
+ (8 << pixel_shift
), src_y
+ (9 << pixel_shift
), 1);
1564 if(s
->mb_x
+1 < s
->mb_width
){
1565 XCHG(h
->top_borders
[top_idx
][s
->mb_x
+1], src_y
+ (17 << pixel_shift
), 1);
1568 if(simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
1570 if(deblock_topleft
){
1571 XCHG(top_border_m1
+ (24 << pixel_shift
), src_cb
- (7 << pixel_shift
), 1);
1572 XCHG(top_border_m1
+ (40 << pixel_shift
), src_cr
- (7 << pixel_shift
), 1);
1574 XCHG(top_border
+ (16 << pixel_shift
), src_cb
+ (1 << pixel_shift
), xchg
);
1575 XCHG(top_border
+ (24 << pixel_shift
), src_cb
+ (9 << pixel_shift
), 1);
1576 XCHG(top_border
+ (32 << pixel_shift
), src_cr
+ (1 << pixel_shift
), xchg
);
1577 XCHG(top_border
+ (40 << pixel_shift
), src_cr
+ (9 << pixel_shift
), 1);
1578 if(s
->mb_x
+1 < s
->mb_width
){
1579 XCHG(h
->top_borders
[top_idx
][s
->mb_x
+1] + (16 << pixel_shift
), src_cb
+ (17 << pixel_shift
), 1);
1580 XCHG(h
->top_borders
[top_idx
][s
->mb_x
+1] + (32 << pixel_shift
), src_cr
+ (17 << pixel_shift
), 1);
1584 if(deblock_topleft
){
1585 XCHG(top_border_m1
+ (16 << pixel_shift
), src_cb
- (7 << pixel_shift
), 1);
1586 XCHG(top_border_m1
+ (24 << pixel_shift
), src_cr
- (7 << pixel_shift
), 1);
1588 XCHG(top_border
+ (16 << pixel_shift
), src_cb
+1+pixel_shift
, 1);
1589 XCHG(top_border
+ (24 << pixel_shift
), src_cr
+1+pixel_shift
, 1);
1595 static av_always_inline
int dctcoef_get(DCTELEM
*mb
, int high_bit_depth
, int index
) {
1596 if (high_bit_depth
) {
1597 return AV_RN32A(((int32_t*)mb
) + index
);
1599 return AV_RN16A(mb
+ index
);
1602 static av_always_inline
void dctcoef_set(DCTELEM
*mb
, int high_bit_depth
, int index
, int value
) {
1603 if (high_bit_depth
) {
1604 AV_WN32A(((int32_t*)mb
) + index
, value
);
1606 AV_WN16A(mb
+ index
, value
);
1609 static av_always_inline
void hl_decode_mb_predict_luma(H264Context
*h
, int mb_type
, int is_h264
, int simple
, int transform_bypass
,
1610 int pixel_shift
, int *block_offset
, int linesize
, uint8_t *dest_y
, int p
)
1612 MpegEncContext
* const s
= &h
->s
;
1613 void (*idct_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
1614 void (*idct_dc_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
1616 int qscale
= p
== 0 ? s
->qscale
: h
->chroma_qp
[p
-1];
1617 block_offset
+= 16*p
;
1618 if(IS_INTRA4x4(mb_type
)){
1619 if(simple
|| !s
->encoding
){
1620 if(IS_8x8DCT(mb_type
)){
1621 if(transform_bypass
){
1623 idct_add
= s
->dsp
.add_pixels8
;
1625 idct_dc_add
= h
->h264dsp
.h264_idct8_dc_add
;
1626 idct_add
= h
->h264dsp
.h264_idct8_add
;
1628 for(i
=0; i
<16; i
+=4){
1629 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
1630 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
1631 if(transform_bypass
&& h
->sps
.profile_idc
==244 && dir
<=1){
1632 h
->hpc
.pred8x8l_add
[dir
](ptr
, h
->mb
+ (i
*16+p
*256 << pixel_shift
), linesize
);
1634 const int nnz
= h
->non_zero_count_cache
[ scan8
[i
+p
*16] ];
1635 h
->hpc
.pred8x8l
[ dir
](ptr
, (h
->topleft_samples_available
<<i
)&0x8000,
1636 (h
->topright_samples_available
<<i
)&0x4000, linesize
);
1638 if(nnz
== 1 && dctcoef_get(h
->mb
, pixel_shift
, i
*16+p
*256))
1639 idct_dc_add(ptr
, h
->mb
+ (i
*16+p
*256 << pixel_shift
), linesize
);
1641 idct_add (ptr
, h
->mb
+ (i
*16+p
*256 << pixel_shift
), linesize
);
1646 if(transform_bypass
){
1648 idct_add
= s
->dsp
.add_pixels4
;
1650 idct_dc_add
= h
->h264dsp
.h264_idct_dc_add
;
1651 idct_add
= h
->h264dsp
.h264_idct_add
;
1653 for(i
=0; i
<16; i
++){
1654 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
1655 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
1657 if(transform_bypass
&& h
->sps
.profile_idc
==244 && dir
<=1){
1658 h
->hpc
.pred4x4_add
[dir
](ptr
, h
->mb
+ (i
*16+p
*256 << pixel_shift
), linesize
);
1663 if(dir
== DIAG_DOWN_LEFT_PRED
|| dir
== VERT_LEFT_PRED
){
1664 const int topright_avail
= (h
->topright_samples_available
<<i
)&0x8000;
1665 assert(s
->mb_y
|| linesize
<= block_offset
[i
]);
1666 if(!topright_avail
){
1668 tr_high
= ((uint16_t*)ptr
)[3 - linesize
/2]*0x0001000100010001ULL
;
1669 topright
= (uint8_t*) &tr_high
;
1671 tr
= ptr
[3 - linesize
]*0x01010101u
;
1672 topright
= (uint8_t*) &tr
;
1675 topright
= ptr
+ (4 << pixel_shift
) - linesize
;
1679 h
->hpc
.pred4x4
[ dir
](ptr
, topright
, linesize
);
1680 nnz
= h
->non_zero_count_cache
[ scan8
[i
+p
*16] ];
1683 if(nnz
== 1 && dctcoef_get(h
->mb
, pixel_shift
, i
*16+p
*256))
1684 idct_dc_add(ptr
, h
->mb
+ (i
*16+p
*256 << pixel_shift
), linesize
);
1686 idct_add (ptr
, h
->mb
+ (i
*16+p
*256 << pixel_shift
), linesize
);
1688 ff_svq3_add_idct_c(ptr
, h
->mb
+ i
*16+p
*256, linesize
, qscale
, 0);
1695 h
->hpc
.pred16x16
[ h
->intra16x16_pred_mode
](dest_y
, linesize
);
1697 if(h
->non_zero_count_cache
[ scan8
[LUMA_DC_BLOCK_INDEX
+p
] ]){
1698 if(!transform_bypass
)
1699 h
->h264dsp
.h264_luma_dc_dequant_idct(h
->mb
+(p
*256 << pixel_shift
), h
->mb_luma_dc
[p
], h
->dequant4_coeff
[p
][qscale
][0]);
1701 static const uint8_t dc_mapping
[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
1702 8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
1703 for(i
= 0; i
< 16; i
++)
1704 dctcoef_set(h
->mb
+p
*256, pixel_shift
, dc_mapping
[i
], dctcoef_get(h
->mb_luma_dc
[p
], pixel_shift
, i
));
1708 ff_svq3_luma_dc_dequant_idct_c(h
->mb
+p
*256, h
->mb_luma_dc
[p
], qscale
);
1712 static av_always_inline
void hl_decode_mb_idct_luma(H264Context
*h
, int mb_type
, int is_h264
, int simple
, int transform_bypass
,
1713 int pixel_shift
, int *block_offset
, int linesize
, uint8_t *dest_y
, int p
)
1715 MpegEncContext
* const s
= &h
->s
;
1716 void (*idct_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
1718 block_offset
+= 16*p
;
1719 if(!IS_INTRA4x4(mb_type
)){
1721 if(IS_INTRA16x16(mb_type
)){
1722 if(transform_bypass
){
1723 if(h
->sps
.profile_idc
==244 && (h
->intra16x16_pred_mode
==VERT_PRED8x8
|| h
->intra16x16_pred_mode
==HOR_PRED8x8
)){
1724 h
->hpc
.pred16x16_add
[h
->intra16x16_pred_mode
](dest_y
, block_offset
, h
->mb
+ (p
*256 << pixel_shift
), linesize
);
1726 for(i
=0; i
<16; i
++){
1727 if(h
->non_zero_count_cache
[ scan8
[i
+p
*16] ] || dctcoef_get(h
->mb
, pixel_shift
, i
*16+p
*256))
1728 s
->dsp
.add_pixels4(dest_y
+ block_offset
[i
], h
->mb
+ (i
*16+p
*256 << pixel_shift
), linesize
);
1732 h
->h264dsp
.h264_idct_add16intra(dest_y
, block_offset
, h
->mb
+ (p
*256 << pixel_shift
), linesize
, h
->non_zero_count_cache
+p
*5*8);
1734 }else if(h
->cbp
&15){
1735 if(transform_bypass
){
1736 const int di
= IS_8x8DCT(mb_type
) ?
4 : 1;
1737 idct_add
= IS_8x8DCT(mb_type
) ? s
->dsp
.add_pixels8
: s
->dsp
.add_pixels4
;
1738 for(i
=0; i
<16; i
+=di
){
1739 if(h
->non_zero_count_cache
[ scan8
[i
+p
*16] ]){
1740 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ (i
*16+p
*256 << pixel_shift
), linesize
);
1744 if(IS_8x8DCT(mb_type
)){
1745 h
->h264dsp
.h264_idct8_add4(dest_y
, block_offset
, h
->mb
+ (p
*256 << pixel_shift
), linesize
, h
->non_zero_count_cache
+p
*5*8);
1747 h
->h264dsp
.h264_idct_add16(dest_y
, block_offset
, h
->mb
+ (p
*256 << pixel_shift
), linesize
, h
->non_zero_count_cache
+p
*5*8);
1752 for(i
=0; i
<16; i
++){
1753 if(h
->non_zero_count_cache
[ scan8
[i
+p
*16] ] || h
->mb
[i
*16+p
*256]){ //FIXME benchmark weird rule, & below
1754 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
1755 ff_svq3_add_idct_c(ptr
, h
->mb
+ i
*16 + p
*256, linesize
, s
->qscale
, IS_INTRA(mb_type
) ?
1 : 0);
1762 static av_always_inline
void hl_decode_mb_internal(H264Context
*h
, int simple
, int pixel_shift
){
1763 MpegEncContext
* const s
= &h
->s
;
1764 const int mb_x
= s
->mb_x
;
1765 const int mb_y
= s
->mb_y
;
1766 const int mb_xy
= h
->mb_xy
;
1767 const int mb_type
= s
->current_picture
.f
.mb_type
[mb_xy
];
1768 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
1769 int linesize
, uvlinesize
/*dct_offset*/;
1771 int *block_offset
= &h
->block_offset
[0];
1772 const int transform_bypass
= !simple
&& (s
->qscale
== 0 && h
->sps
.transform_bypass
);
1773 /* is_h264 should always be true if SVQ3 is disabled. */
1774 const int is_h264
= !CONFIG_SVQ3_DECODER
|| simple
|| s
->codec_id
== CODEC_ID_H264
;
1775 void (*idct_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
1777 dest_y
= s
->current_picture
.f
.data
[0] + ((mb_x
<< pixel_shift
) + mb_y
* s
->linesize
) * 16;
1778 dest_cb
= s
->current_picture
.f
.data
[1] + ((mb_x
<< pixel_shift
) + mb_y
* s
->uvlinesize
) * 8;
1779 dest_cr
= s
->current_picture
.f
.data
[2] + ((mb_x
<< pixel_shift
) + mb_y
* s
->uvlinesize
) * 8;
1781 s
->dsp
.prefetch(dest_y
+ (s
->mb_x
&3)*4*s
->linesize
+ (64 << pixel_shift
), s
->linesize
, 4);
1782 s
->dsp
.prefetch(dest_cb
+ (s
->mb_x
&7)*s
->uvlinesize
+ (64 << pixel_shift
), dest_cr
- dest_cb
, 2);
1784 h
->list_counts
[mb_xy
]= h
->list_count
;
1786 if (!simple
&& MB_FIELD
) {
1787 linesize
= h
->mb_linesize
= s
->linesize
* 2;
1788 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
* 2;
1789 block_offset
= &h
->block_offset
[48];
1790 if(mb_y
&1){ //FIXME move out of this function?
1791 dest_y
-= s
->linesize
*15;
1792 dest_cb
-= s
->uvlinesize
*7;
1793 dest_cr
-= s
->uvlinesize
*7;
1797 for(list
=0; list
<h
->list_count
; list
++){
1798 if(!USES_LIST(mb_type
, list
))
1800 if(IS_16X16(mb_type
)){
1801 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
1802 fill_rectangle(ref
, 4, 4, 8, (16+*ref
)^(s
->mb_y
&1), 1);
1804 for(i
=0; i
<16; i
+=4){
1805 int ref
= h
->ref_cache
[list
][scan8
[i
]];
1807 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2, 8, (16+ref
)^(s
->mb_y
&1), 1);
1813 linesize
= h
->mb_linesize
= s
->linesize
;
1814 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
;
1815 // dct_offset = s->linesize * 16;
1818 if (!simple
&& IS_INTRA_PCM(mb_type
)) {
1820 const int bit_depth
= h
->sps
.bit_depth_luma
;
1823 init_get_bits(&gb
, (uint8_t*)h
->mb
, 384*bit_depth
);
1825 for (i
= 0; i
< 16; i
++) {
1826 uint16_t *tmp_y
= (uint16_t*)(dest_y
+ i
*linesize
);
1827 for (j
= 0; j
< 16; j
++)
1828 tmp_y
[j
] = get_bits(&gb
, bit_depth
);
1830 if(simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
1831 if (!h
->sps
.chroma_format_idc
) {
1832 for (i
= 0; i
< 8; i
++) {
1833 uint16_t *tmp_cb
= (uint16_t*)(dest_cb
+ i
*uvlinesize
);
1834 for (j
= 0; j
< 8; j
++) {
1835 tmp_cb
[j
] = 1 << (bit_depth
- 1);
1838 for (i
= 0; i
< 8; i
++) {
1839 uint16_t *tmp_cr
= (uint16_t*)(dest_cr
+ i
*uvlinesize
);
1840 for (j
= 0; j
< 8; j
++) {
1841 tmp_cr
[j
] = 1 << (bit_depth
- 1);
1845 for (i
= 0; i
< 8; i
++) {
1846 uint16_t *tmp_cb
= (uint16_t*)(dest_cb
+ i
*uvlinesize
);
1847 for (j
= 0; j
< 8; j
++)
1848 tmp_cb
[j
] = get_bits(&gb
, bit_depth
);
1850 for (i
= 0; i
< 8; i
++) {
1851 uint16_t *tmp_cr
= (uint16_t*)(dest_cr
+ i
*uvlinesize
);
1852 for (j
= 0; j
< 8; j
++)
1853 tmp_cr
[j
] = get_bits(&gb
, bit_depth
);
1858 for (i
=0; i
<16; i
++) {
1859 memcpy(dest_y
+ i
* linesize
, h
->mb
+ i
*8, 16);
1861 if(simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
1862 if (!h
->sps
.chroma_format_idc
) {
1863 for (i
= 0; i
< 8; i
++) {
1864 memset(dest_cb
+ i
*uvlinesize
, 128, 8);
1865 memset(dest_cr
+ i
*uvlinesize
, 128, 8);
1868 for (i
= 0; i
< 8; i
++) {
1869 memcpy(dest_cb
+ i
*uvlinesize
, h
->mb
+ 128 + i
*4, 8);
1870 memcpy(dest_cr
+ i
*uvlinesize
, h
->mb
+ 160 + i
*4, 8);
1876 if(IS_INTRA(mb_type
)){
1877 if(h
->deblocking_filter
)
1878 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 1, 0, simple
, pixel_shift
);
1880 if(simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
1881 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cb
, uvlinesize
);
1882 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cr
, uvlinesize
);
1885 hl_decode_mb_predict_luma(h
, mb_type
, is_h264
, simple
, transform_bypass
, pixel_shift
, block_offset
, linesize
, dest_y
, 0);
1887 if(h
->deblocking_filter
)
1888 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 0, 0, simple
, pixel_shift
);
1890 hl_motion(h
, dest_y
, dest_cb
, dest_cr
,
1891 s
->me
.qpel_put
, s
->dsp
.put_h264_chroma_pixels_tab
,
1892 s
->me
.qpel_avg
, s
->dsp
.avg_h264_chroma_pixels_tab
,
1893 h
->h264dsp
.weight_h264_pixels_tab
,
1894 h
->h264dsp
.biweight_h264_pixels_tab
, pixel_shift
, 0);
1897 hl_decode_mb_idct_luma(h
, mb_type
, is_h264
, simple
, transform_bypass
, pixel_shift
, block_offset
, linesize
, dest_y
, 0);
1899 if((simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)) && (h
->cbp
&0x30)){
1900 uint8_t *dest
[2] = {dest_cb
, dest_cr
};
1901 if(transform_bypass
){
1902 if(IS_INTRA(mb_type
) && h
->sps
.profile_idc
==244 && (h
->chroma_pred_mode
==VERT_PRED8x8
|| h
->chroma_pred_mode
==HOR_PRED8x8
)){
1903 h
->hpc
.pred8x8_add
[h
->chroma_pred_mode
](dest
[0], block_offset
+ 16, h
->mb
+ (16*16*1 << pixel_shift
), uvlinesize
);
1904 h
->hpc
.pred8x8_add
[h
->chroma_pred_mode
](dest
[1], block_offset
+ 32, h
->mb
+ (16*16*2 << pixel_shift
), uvlinesize
);
1906 idct_add
= s
->dsp
.add_pixels4
;
1908 for(i
=j
*16; i
<j
*16+4; i
++){
1909 if(h
->non_zero_count_cache
[ scan8
[i
] ] || dctcoef_get(h
->mb
, pixel_shift
, i
*16))
1910 idct_add (dest
[j
-1] + block_offset
[i
], h
->mb
+ (i
*16 << pixel_shift
), uvlinesize
);
1916 if(h
->non_zero_count_cache
[ scan8
[CHROMA_DC_BLOCK_INDEX
+0] ])
1917 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ (16*16*1 << pixel_shift
), h
->dequant4_coeff
[IS_INTRA(mb_type
) ?
1:4][h
->chroma_qp
[0]][0]);
1918 if(h
->non_zero_count_cache
[ scan8
[CHROMA_DC_BLOCK_INDEX
+1] ])
1919 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ (16*16*2 << pixel_shift
), h
->dequant4_coeff
[IS_INTRA(mb_type
) ?
2:5][h
->chroma_qp
[1]][0]);
1920 h
->h264dsp
.h264_idct_add8(dest
, block_offset
,
1922 h
->non_zero_count_cache
);
1924 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ 16*16*1, h
->dequant4_coeff
[IS_INTRA(mb_type
) ?
1:4][h
->chroma_qp
[0]][0]);
1925 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ 16*16*2, h
->dequant4_coeff
[IS_INTRA(mb_type
) ?
2:5][h
->chroma_qp
[1]][0]);
1927 for(i
=j
*16; i
<j
*16+4; i
++){
1928 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){
1929 uint8_t * const ptr
= dest
[j
-1] + block_offset
[i
];
1930 ff_svq3_add_idct_c(ptr
, h
->mb
+ i
*16, uvlinesize
, ff_h264_chroma_qp
[0][s
->qscale
+ 12] - 12, 2);
1938 if(h
->cbp
|| IS_INTRA(mb_type
))
1940 s
->dsp
.clear_blocks(h
->mb
);
1941 s
->dsp
.clear_blocks(h
->mb
+(24*16<<pixel_shift
));
1945 static av_always_inline
void hl_decode_mb_444_internal(H264Context
*h
, int simple
, int pixel_shift
){
1946 MpegEncContext
* const s
= &h
->s
;
1947 const int mb_x
= s
->mb_x
;
1948 const int mb_y
= s
->mb_y
;
1949 const int mb_xy
= h
->mb_xy
;
1950 const int mb_type
= s
->current_picture
.f
.mb_type
[mb_xy
];
1954 int *block_offset
= &h
->block_offset
[0];
1955 const int transform_bypass
= !simple
&& (s
->qscale
== 0 && h
->sps
.transform_bypass
);
1956 const int plane_count
= (simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)) ?
3 : 1;
1958 for (p
= 0; p
< plane_count
; p
++)
1960 dest
[p
] = s
->current_picture
.f
.data
[p
] + ((mb_x
<< pixel_shift
) + mb_y
* s
->linesize
) * 16;
1961 s
->dsp
.prefetch(dest
[p
] + (s
->mb_x
&3)*4*s
->linesize
+ (64 << pixel_shift
), s
->linesize
, 4);
1964 h
->list_counts
[mb_xy
]= h
->list_count
;
1966 if (!simple
&& MB_FIELD
) {
1967 linesize
= h
->mb_linesize
= h
->mb_uvlinesize
= s
->linesize
* 2;
1968 block_offset
= &h
->block_offset
[48];
1969 if(mb_y
&1) //FIXME move out of this function?
1970 for (p
= 0; p
< 3; p
++)
1971 dest
[p
] -= s
->linesize
*15;
1974 for(list
=0; list
<h
->list_count
; list
++){
1975 if(!USES_LIST(mb_type
, list
))
1977 if(IS_16X16(mb_type
)){
1978 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
1979 fill_rectangle(ref
, 4, 4, 8, (16+*ref
)^(s
->mb_y
&1), 1);
1981 for(i
=0; i
<16; i
+=4){
1982 int ref
= h
->ref_cache
[list
][scan8
[i
]];
1984 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2, 8, (16+ref
)^(s
->mb_y
&1), 1);
1990 linesize
= h
->mb_linesize
= h
->mb_uvlinesize
= s
->linesize
;
1993 if (!simple
&& IS_INTRA_PCM(mb_type
)) {
1995 const int bit_depth
= h
->sps
.bit_depth_luma
;
1997 init_get_bits(&gb
, (uint8_t*)h
->mb
, 768*bit_depth
);
1999 for (p
= 0; p
< plane_count
; p
++) {
2000 for (i
= 0; i
< 16; i
++) {
2001 uint16_t *tmp
= (uint16_t*)(dest
[p
] + i
*linesize
);
2002 for (j
= 0; j
< 16; j
++)
2003 tmp
[j
] = get_bits(&gb
, bit_depth
);
2007 for (p
= 0; p
< plane_count
; p
++) {
2008 for (i
= 0; i
< 16; i
++) {
2009 memcpy(dest
[p
] + i
*linesize
, h
->mb
+ p
*128 + i
*8, 16);
2014 if(IS_INTRA(mb_type
)){
2015 if(h
->deblocking_filter
)
2016 xchg_mb_border(h
, dest
[0], dest
[1], dest
[2], linesize
, linesize
, 1, 1, simple
, pixel_shift
);
2018 for (p
= 0; p
< plane_count
; p
++)
2019 hl_decode_mb_predict_luma(h
, mb_type
, 1, simple
, transform_bypass
, pixel_shift
, block_offset
, linesize
, dest
[p
], p
);
2021 if(h
->deblocking_filter
)
2022 xchg_mb_border(h
, dest
[0], dest
[1], dest
[2], linesize
, linesize
, 0, 1, simple
, pixel_shift
);
2024 hl_motion(h
, dest
[0], dest
[1], dest
[2],
2025 s
->me
.qpel_put
, s
->dsp
.put_h264_chroma_pixels_tab
,
2026 s
->me
.qpel_avg
, s
->dsp
.avg_h264_chroma_pixels_tab
,
2027 h
->h264dsp
.weight_h264_pixels_tab
,
2028 h
->h264dsp
.biweight_h264_pixels_tab
, pixel_shift
, 1);
2031 for (p
= 0; p
< plane_count
; p
++)
2032 hl_decode_mb_idct_luma(h
, mb_type
, 1, simple
, transform_bypass
, pixel_shift
, block_offset
, linesize
, dest
[p
], p
);
2034 if(h
->cbp
|| IS_INTRA(mb_type
))
2036 s
->dsp
.clear_blocks(h
->mb
);
2037 s
->dsp
.clear_blocks(h
->mb
+(24*16<<pixel_shift
));
2042 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2044 #define hl_decode_mb_simple(sh, bits) \
2045 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
2046 hl_decode_mb_internal(h, 1, sh); \
2048 hl_decode_mb_simple(0, 8);
2049 hl_decode_mb_simple(1, 16);
2052 * Process a macroblock; this handles edge cases, such as interlacing.
2054 static void av_noinline
hl_decode_mb_complex(H264Context
*h
){
2055 hl_decode_mb_internal(h
, 0, h
->pixel_shift
);
2058 static void av_noinline
hl_decode_mb_444_complex(H264Context
*h
){
2059 hl_decode_mb_444_internal(h
, 0, h
->pixel_shift
);
2062 static void av_noinline
hl_decode_mb_444_simple(H264Context
*h
){
2063 hl_decode_mb_444_internal(h
, 1, 0);
2066 void ff_h264_hl_decode_mb(H264Context
*h
){
2067 MpegEncContext
* const s
= &h
->s
;
2068 const int mb_xy
= h
->mb_xy
;
2069 const int mb_type
= s
->current_picture
.f
.mb_type
[mb_xy
];
2070 int is_complex
= CONFIG_SMALL
|| h
->is_complex
|| IS_INTRA_PCM(mb_type
) || s
->qscale
== 0;
2073 if(is_complex
|| h
->pixel_shift
)
2074 hl_decode_mb_444_complex(h
);
2076 hl_decode_mb_444_simple(h
);
2077 } else if (is_complex
) {
2078 hl_decode_mb_complex(h
);
2079 } else if (h
->pixel_shift
) {
2080 hl_decode_mb_simple_16(h
);
2082 hl_decode_mb_simple_8(h
);
2085 static int pred_weight_table(H264Context
*h
){
2086 MpegEncContext
* const s
= &h
->s
;
2088 int luma_def
, chroma_def
;
2091 h
->use_weight_chroma
= 0;
2092 h
->luma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
2093 if(h
->sps
.chroma_format_idc
)
2094 h
->chroma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
2095 luma_def
= 1<<h
->luma_log2_weight_denom
;
2096 chroma_def
= 1<<h
->chroma_log2_weight_denom
;
2098 for(list
=0; list
<2; list
++){
2099 h
->luma_weight_flag
[list
] = 0;
2100 h
->chroma_weight_flag
[list
] = 0;
2101 for(i
=0; i
<h
->ref_count
[list
]; i
++){
2102 int luma_weight_flag
, chroma_weight_flag
;
2104 luma_weight_flag
= get_bits1(&s
->gb
);
2105 if(luma_weight_flag
){
2106 h
->luma_weight
[i
][list
][0]= get_se_golomb(&s
->gb
);
2107 h
->luma_weight
[i
][list
][1]= get_se_golomb(&s
->gb
);
2108 if( h
->luma_weight
[i
][list
][0] != luma_def
2109 || h
->luma_weight
[i
][list
][1] != 0) {
2111 h
->luma_weight_flag
[list
]= 1;
2114 h
->luma_weight
[i
][list
][0]= luma_def
;
2115 h
->luma_weight
[i
][list
][1]= 0;
2118 if(h
->sps
.chroma_format_idc
){
2119 chroma_weight_flag
= get_bits1(&s
->gb
);
2120 if(chroma_weight_flag
){
2123 h
->chroma_weight
[i
][list
][j
][0]= get_se_golomb(&s
->gb
);
2124 h
->chroma_weight
[i
][list
][j
][1]= get_se_golomb(&s
->gb
);
2125 if( h
->chroma_weight
[i
][list
][j
][0] != chroma_def
2126 || h
->chroma_weight
[i
][list
][j
][1] != 0) {
2127 h
->use_weight_chroma
= 1;
2128 h
->chroma_weight_flag
[list
]= 1;
2134 h
->chroma_weight
[i
][list
][j
][0]= chroma_def
;
2135 h
->chroma_weight
[i
][list
][j
][1]= 0;
2140 if(h
->slice_type_nos
!= AV_PICTURE_TYPE_B
) break;
2142 h
->use_weight
= h
->use_weight
|| h
->use_weight_chroma
;
2147 * Initialize implicit_weight table.
2148 * @param field 0/1 initialize the weight for interlaced MBAFF
2149 * -1 initializes the rest
2151 static void implicit_weight_table(H264Context
*h
, int field
){
2152 MpegEncContext
* const s
= &h
->s
;
2153 int ref0
, ref1
, i
, cur_poc
, ref_start
, ref_count0
, ref_count1
;
2155 for (i
= 0; i
< 2; i
++) {
2156 h
->luma_weight_flag
[i
] = 0;
2157 h
->chroma_weight_flag
[i
] = 0;
2161 if (s
->picture_structure
== PICT_FRAME
) {
2162 cur_poc
= s
->current_picture_ptr
->poc
;
2164 cur_poc
= s
->current_picture_ptr
->field_poc
[s
->picture_structure
- 1];
2166 if( h
->ref_count
[0] == 1 && h
->ref_count
[1] == 1 && !FRAME_MBAFF
2167 && h
->ref_list
[0][0].poc
+ h
->ref_list
[1][0].poc
== 2*cur_poc
){
2169 h
->use_weight_chroma
= 0;
2173 ref_count0
= h
->ref_count
[0];
2174 ref_count1
= h
->ref_count
[1];
2176 cur_poc
= s
->current_picture_ptr
->field_poc
[field
];
2178 ref_count0
= 16+2*h
->ref_count
[0];
2179 ref_count1
= 16+2*h
->ref_count
[1];
2183 h
->use_weight_chroma
= 2;
2184 h
->luma_log2_weight_denom
= 5;
2185 h
->chroma_log2_weight_denom
= 5;
2187 for(ref0
=ref_start
; ref0
< ref_count0
; ref0
++){
2188 int poc0
= h
->ref_list
[0][ref0
].poc
;
2189 for(ref1
=ref_start
; ref1
< ref_count1
; ref1
++){
2191 if (!h
->ref_list
[0][ref0
].long_ref
&& !h
->ref_list
[1][ref1
].long_ref
) {
2192 int poc1
= h
->ref_list
[1][ref1
].poc
;
2193 int td
= av_clip(poc1
- poc0
, -128, 127);
2195 int tb
= av_clip(cur_poc
- poc0
, -128, 127);
2196 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
2197 int dist_scale_factor
= (tb
*tx
+ 32) >> 8;
2198 if(dist_scale_factor
>= -64 && dist_scale_factor
<= 128)
2199 w
= 64 - dist_scale_factor
;
2203 h
->implicit_weight
[ref0
][ref1
][0]=
2204 h
->implicit_weight
[ref0
][ref1
][1]= w
;
2206 h
->implicit_weight
[ref0
][ref1
][field
]=w
;
2213 * instantaneous decoder refresh.
2215 static void idr(H264Context
*h
){
2216 ff_h264_remove_all_refs(h
);
2217 h
->prev_frame_num
= 0;
2218 h
->prev_frame_num_offset
= 0;
2223 /* forget old pics after a seek */
2224 static void flush_dpb(AVCodecContext
*avctx
){
2225 H264Context
*h
= avctx
->priv_data
;
2227 for(i
=0; i
<MAX_DELAYED_PIC_COUNT
; i
++) {
2228 if(h
->delayed_pic
[i
])
2229 h
->delayed_pic
[i
]->f
.reference
= 0;
2230 h
->delayed_pic
[i
]= NULL
;
2232 h
->outputed_poc
=h
->next_outputed_poc
= INT_MIN
;
2233 h
->prev_interlaced_frame
= 1;
2235 if(h
->s
.current_picture_ptr
)
2236 h
->s
.current_picture_ptr
->f
.reference
= 0;
2237 h
->s
.first_field
= 0;
2238 ff_h264_reset_sei(h
);
2239 ff_mpeg_flush(avctx
);
2242 static int init_poc(H264Context
*h
){
2243 MpegEncContext
* const s
= &h
->s
;
2244 const int max_frame_num
= 1<<h
->sps
.log2_max_frame_num
;
2246 Picture
*cur
= s
->current_picture_ptr
;
2248 h
->frame_num_offset
= h
->prev_frame_num_offset
;
2249 if(h
->frame_num
< h
->prev_frame_num
)
2250 h
->frame_num_offset
+= max_frame_num
;
2252 if(h
->sps
.poc_type
==0){
2253 const int max_poc_lsb
= 1<<h
->sps
.log2_max_poc_lsb
;
2255 if (h
->poc_lsb
< h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
>= max_poc_lsb
/2)
2256 h
->poc_msb
= h
->prev_poc_msb
+ max_poc_lsb
;
2257 else if(h
->poc_lsb
> h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
< -max_poc_lsb
/2)
2258 h
->poc_msb
= h
->prev_poc_msb
- max_poc_lsb
;
2260 h
->poc_msb
= h
->prev_poc_msb
;
2261 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
2263 field_poc
[1] = h
->poc_msb
+ h
->poc_lsb
;
2264 if(s
->picture_structure
== PICT_FRAME
)
2265 field_poc
[1] += h
->delta_poc_bottom
;
2266 }else if(h
->sps
.poc_type
==1){
2267 int abs_frame_num
, expected_delta_per_poc_cycle
, expectedpoc
;
2270 if(h
->sps
.poc_cycle_length
!= 0)
2271 abs_frame_num
= h
->frame_num_offset
+ h
->frame_num
;
2275 if(h
->nal_ref_idc
==0 && abs_frame_num
> 0)
2278 expected_delta_per_poc_cycle
= 0;
2279 for(i
=0; i
< h
->sps
.poc_cycle_length
; i
++)
2280 expected_delta_per_poc_cycle
+= h
->sps
.offset_for_ref_frame
[ i
]; //FIXME integrate during sps parse
2282 if(abs_frame_num
> 0){
2283 int poc_cycle_cnt
= (abs_frame_num
- 1) / h
->sps
.poc_cycle_length
;
2284 int frame_num_in_poc_cycle
= (abs_frame_num
- 1) % h
->sps
.poc_cycle_length
;
2286 expectedpoc
= poc_cycle_cnt
* expected_delta_per_poc_cycle
;
2287 for(i
= 0; i
<= frame_num_in_poc_cycle
; i
++)
2288 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_ref_frame
[ i
];
2292 if(h
->nal_ref_idc
== 0)
2293 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_non_ref_pic
;
2295 field_poc
[0] = expectedpoc
+ h
->delta_poc
[0];
2296 field_poc
[1] = field_poc
[0] + h
->sps
.offset_for_top_to_bottom_field
;
2298 if(s
->picture_structure
== PICT_FRAME
)
2299 field_poc
[1] += h
->delta_poc
[1];
2301 int poc
= 2*(h
->frame_num_offset
+ h
->frame_num
);
2310 if(s
->picture_structure
!= PICT_BOTTOM_FIELD
)
2311 s
->current_picture_ptr
->field_poc
[0]= field_poc
[0];
2312 if(s
->picture_structure
!= PICT_TOP_FIELD
)
2313 s
->current_picture_ptr
->field_poc
[1]= field_poc
[1];
2314 cur
->poc
= FFMIN(cur
->field_poc
[0], cur
->field_poc
[1]);
2321 * initialize scan tables
2323 static void init_scan_tables(H264Context
*h
){
2325 for(i
=0; i
<16; i
++){
2326 #define T(x) (x>>2) | ((x<<2) & 0xF)
2327 h
->zigzag_scan
[i
] = T(zigzag_scan
[i
]);
2328 h
-> field_scan
[i
] = T( field_scan
[i
]);
2331 for(i
=0; i
<64; i
++){
2332 #define T(x) (x>>3) | ((x&7)<<3)
2333 h
->zigzag_scan8x8
[i
] = T(ff_zigzag_direct
[i
]);
2334 h
->zigzag_scan8x8_cavlc
[i
] = T(zigzag_scan8x8_cavlc
[i
]);
2335 h
->field_scan8x8
[i
] = T(field_scan8x8
[i
]);
2336 h
->field_scan8x8_cavlc
[i
] = T(field_scan8x8_cavlc
[i
]);
2339 if(h
->sps
.transform_bypass
){ //FIXME same ugly
2340 h
->zigzag_scan_q0
= zigzag_scan
;
2341 h
->zigzag_scan8x8_q0
= ff_zigzag_direct
;
2342 h
->zigzag_scan8x8_cavlc_q0
= zigzag_scan8x8_cavlc
;
2343 h
->field_scan_q0
= field_scan
;
2344 h
->field_scan8x8_q0
= field_scan8x8
;
2345 h
->field_scan8x8_cavlc_q0
= field_scan8x8_cavlc
;
2347 h
->zigzag_scan_q0
= h
->zigzag_scan
;
2348 h
->zigzag_scan8x8_q0
= h
->zigzag_scan8x8
;
2349 h
->zigzag_scan8x8_cavlc_q0
= h
->zigzag_scan8x8_cavlc
;
2350 h
->field_scan_q0
= h
->field_scan
;
2351 h
->field_scan8x8_q0
= h
->field_scan8x8
;
2352 h
->field_scan8x8_cavlc_q0
= h
->field_scan8x8_cavlc
;
2356 static int field_end(H264Context
*h
, int in_setup
){
2357 MpegEncContext
* const s
= &h
->s
;
2358 AVCodecContext
* const avctx
= s
->avctx
;
2362 if (!in_setup
&& !s
->dropable
)
2363 ff_thread_report_progress((AVFrame
*)s
->current_picture_ptr
, (16*s
->mb_height
>> FIELD_PICTURE
) - 1,
2364 s
->picture_structure
==PICT_BOTTOM_FIELD
);
2366 if (CONFIG_H264_VDPAU_DECODER
&& s
->avctx
->codec
->capabilities
&CODEC_CAP_HWACCEL_VDPAU
)
2367 ff_vdpau_h264_set_reference_frames(s
);
2369 if(in_setup
|| !(avctx
->active_thread_type
&FF_THREAD_FRAME
)){
2371 err
= ff_h264_execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
2372 h
->prev_poc_msb
= h
->poc_msb
;
2373 h
->prev_poc_lsb
= h
->poc_lsb
;
2375 h
->prev_frame_num_offset
= h
->frame_num_offset
;
2376 h
->prev_frame_num
= h
->frame_num
;
2377 h
->outputed_poc
= h
->next_outputed_poc
;
2380 if (avctx
->hwaccel
) {
2381 if (avctx
->hwaccel
->end_frame(avctx
) < 0)
2382 av_log(avctx
, AV_LOG_ERROR
, "hardware accelerator failed to decode picture\n");
2385 if (CONFIG_H264_VDPAU_DECODER
&& s
->avctx
->codec
->capabilities
&CODEC_CAP_HWACCEL_VDPAU
)
2386 ff_vdpau_h264_picture_complete(s
);
2389 * FIXME: Error handling code does not seem to support interlaced
2390 * when slices span multiple rows
2391 * The ff_er_add_slice calls don't work right for bottom
2392 * fields; they cause massive erroneous error concealing
2393 * Error marking covers both fields (top and bottom).
2394 * This causes a mismatched s->error_count
2395 * and a bad error table. Further, the error count goes to
2396 * INT_MAX when called for bottom field, because mb_y is
2397 * past end by one (callers fault) and resync_mb_y != 0
2398 * causes problems for the first MB line, too.
2411 * Replicate H264 "master" context to thread contexts.
2413 static void clone_slice(H264Context
*dst
, H264Context
*src
)
2415 memcpy(dst
->block_offset
, src
->block_offset
, sizeof(dst
->block_offset
));
2416 dst
->s
.current_picture_ptr
= src
->s
.current_picture_ptr
;
2417 dst
->s
.current_picture
= src
->s
.current_picture
;
2418 dst
->s
.linesize
= src
->s
.linesize
;
2419 dst
->s
.uvlinesize
= src
->s
.uvlinesize
;
2420 dst
->s
.first_field
= src
->s
.first_field
;
2422 dst
->prev_poc_msb
= src
->prev_poc_msb
;
2423 dst
->prev_poc_lsb
= src
->prev_poc_lsb
;
2424 dst
->prev_frame_num_offset
= src
->prev_frame_num_offset
;
2425 dst
->prev_frame_num
= src
->prev_frame_num
;
2426 dst
->short_ref_count
= src
->short_ref_count
;
2428 memcpy(dst
->short_ref
, src
->short_ref
, sizeof(dst
->short_ref
));
2429 memcpy(dst
->long_ref
, src
->long_ref
, sizeof(dst
->long_ref
));
2430 memcpy(dst
->default_ref_list
, src
->default_ref_list
, sizeof(dst
->default_ref_list
));
2431 memcpy(dst
->ref_list
, src
->ref_list
, sizeof(dst
->ref_list
));
2433 memcpy(dst
->dequant4_coeff
, src
->dequant4_coeff
, sizeof(src
->dequant4_coeff
));
2434 memcpy(dst
->dequant8_coeff
, src
->dequant8_coeff
, sizeof(src
->dequant8_coeff
));
2438 * computes profile from profile_idc and constraint_set?_flags
2442 * @return profile as defined by FF_PROFILE_H264_*
2444 int ff_h264_get_profile(SPS
*sps
)
2446 int profile
= sps
->profile_idc
;
2448 switch(sps
->profile_idc
) {
2449 case FF_PROFILE_H264_BASELINE
:
2450 // constraint_set1_flag set to 1
2451 profile
|= (sps
->constraint_set_flags
& 1<<1) ? FF_PROFILE_H264_CONSTRAINED
: 0;
2453 case FF_PROFILE_H264_HIGH_10
:
2454 case FF_PROFILE_H264_HIGH_422
:
2455 case FF_PROFILE_H264_HIGH_444_PREDICTIVE
:
2456 // constraint_set3_flag set to 1
2457 profile
|= (sps
->constraint_set_flags
& 1<<3) ? FF_PROFILE_H264_INTRA
: 0;
2465 * decodes a slice header.
2466 * This will also call MPV_common_init() and frame_start() as needed.
2468 * @param h h264context
2469 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
2471 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
2473 static int decode_slice_header(H264Context
*h
, H264Context
*h0
){
2474 MpegEncContext
* const s
= &h
->s
;
2475 MpegEncContext
* const s0
= &h0
->s
;
2476 unsigned int first_mb_in_slice
;
2477 unsigned int pps_id
;
2478 int num_ref_idx_active_override_flag
;
2479 unsigned int slice_type
, tmp
, i
, j
;
2480 int default_ref_list_done
= 0;
2481 int last_pic_structure
;
2483 s
->dropable
= h
->nal_ref_idc
== 0;
2485 /* FIXME: 2tap qpel isn't implemented for high bit depth. */
2486 if((s
->avctx
->flags2
& CODEC_FLAG2_FAST
) && !h
->nal_ref_idc
&& !h
->pixel_shift
){
2487 s
->me
.qpel_put
= s
->dsp
.put_2tap_qpel_pixels_tab
;
2488 s
->me
.qpel_avg
= s
->dsp
.avg_2tap_qpel_pixels_tab
;
2490 s
->me
.qpel_put
= s
->dsp
.put_h264_qpel_pixels_tab
;
2491 s
->me
.qpel_avg
= s
->dsp
.avg_h264_qpel_pixels_tab
;
2494 first_mb_in_slice
= get_ue_golomb(&s
->gb
);
2496 if(first_mb_in_slice
== 0){ //FIXME better field boundary detection
2497 if(h0
->current_slice
&& FIELD_PICTURE
){
2501 h0
->current_slice
= 0;
2502 if (!s0
->first_field
)
2503 s
->current_picture_ptr
= NULL
;
2506 slice_type
= get_ue_golomb_31(&s
->gb
);
2508 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "slice type too large (%d) at %d %d\n", h
->slice_type
, s
->mb_x
, s
->mb_y
);
2513 h
->slice_type_fixed
=1;
2515 h
->slice_type_fixed
=0;
2517 slice_type
= golomb_to_pict_type
[ slice_type
];
2518 if (slice_type
== AV_PICTURE_TYPE_I
2519 || (h0
->current_slice
!= 0 && slice_type
== h0
->last_slice_type
) ) {
2520 default_ref_list_done
= 1;
2522 h
->slice_type
= slice_type
;
2523 h
->slice_type_nos
= slice_type
& 3;
2525 s
->pict_type
= h
->slice_type
; // to make a few old functions happy, it's wrong though
2527 pps_id
= get_ue_golomb(&s
->gb
);
2528 if(pps_id
>=MAX_PPS_COUNT
){
2529 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "pps_id out of range\n");
2532 if(!h0
->pps_buffers
[pps_id
]) {
2533 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non-existing PPS %u referenced\n", pps_id
);
2536 h
->pps
= *h0
->pps_buffers
[pps_id
];
2538 if(!h0
->sps_buffers
[h
->pps
.sps_id
]) {
2539 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non-existing SPS %u referenced\n", h
->pps
.sps_id
);
2542 h
->sps
= *h0
->sps_buffers
[h
->pps
.sps_id
];
2544 s
->avctx
->profile
= ff_h264_get_profile(&h
->sps
);
2545 s
->avctx
->level
= h
->sps
.level_idc
;
2546 s
->avctx
->refs
= h
->sps
.ref_frame_count
;
2548 if(h
== h0
&& h
->dequant_coeff_pps
!= pps_id
){
2549 h
->dequant_coeff_pps
= pps_id
;
2550 init_dequant_tables(h
);
2553 s
->mb_width
= h
->sps
.mb_width
;
2554 s
->mb_height
= h
->sps
.mb_height
* (2 - h
->sps
.frame_mbs_only_flag
);
2556 h
->b_stride
= s
->mb_width
*4;
2558 s
->width
= 16*s
->mb_width
- (2>>CHROMA444
)*FFMIN(h
->sps
.crop_right
, (8<<CHROMA444
)-1);
2559 if(h
->sps
.frame_mbs_only_flag
)
2560 s
->height
= 16*s
->mb_height
- (2>>CHROMA444
)*FFMIN(h
->sps
.crop_bottom
, (8<<CHROMA444
)-1);
2562 s
->height
= 16*s
->mb_height
- (4>>CHROMA444
)*FFMIN(h
->sps
.crop_bottom
, (8<<CHROMA444
)-1);
2564 if (s
->context_initialized
2565 && ( s
->width
!= s
->avctx
->width
|| s
->height
!= s
->avctx
->height
2566 || av_cmp_q(h
->sps
.sar
, s
->avctx
->sample_aspect_ratio
))) {
2568 av_log_missing_feature(s
->avctx
, "Width/height changing with threads is", 0);
2569 return -1; // width / height changed during parallelized decoding
2572 flush_dpb(s
->avctx
);
2575 if (!s
->context_initialized
) {
2577 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "Cannot (re-)initialize context during parallel decoding.\n");
2581 avcodec_set_dimensions(s
->avctx
, s
->width
, s
->height
);
2582 s
->avctx
->sample_aspect_ratio
= h
->sps
.sar
;
2583 av_assert0(s
->avctx
->sample_aspect_ratio
.den
);
2585 if(h
->sps
.video_signal_type_present_flag
){
2586 s
->avctx
->color_range
= h
->sps
.full_range ? AVCOL_RANGE_JPEG
: AVCOL_RANGE_MPEG
;
2587 if(h
->sps
.colour_description_present_flag
){
2588 s
->avctx
->color_primaries
= h
->sps
.color_primaries
;
2589 s
->avctx
->color_trc
= h
->sps
.color_trc
;
2590 s
->avctx
->colorspace
= h
->sps
.colorspace
;
2594 if(h
->sps
.timing_info_present_flag
){
2595 int64_t den
= h
->sps
.time_scale
;
2596 if(h
->x264_build
< 44U)
2598 av_reduce(&s
->avctx
->time_base
.num
, &s
->avctx
->time_base
.den
,
2599 h
->sps
.num_units_in_tick
, den
, 1<<30);
2602 switch (h
->sps
.bit_depth_luma
) {
2604 s
->avctx
->pix_fmt
= CHROMA444 ? PIX_FMT_YUV444P9
: PIX_FMT_YUV420P9
;
2607 s
->avctx
->pix_fmt
= CHROMA444 ? PIX_FMT_YUV444P10
: PIX_FMT_YUV420P10
;
2611 s
->avctx
->pix_fmt
= s
->avctx
->color_range
== AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P
: PIX_FMT_YUV444P
;
2613 s
->avctx
->pix_fmt
= s
->avctx
->get_format(s
->avctx
,
2614 s
->avctx
->codec
->pix_fmts ?
2615 s
->avctx
->codec
->pix_fmts
:
2616 s
->avctx
->color_range
== AVCOL_RANGE_JPEG ?
2617 hwaccel_pixfmt_list_h264_jpeg_420
:
2618 ff_hwaccel_pixfmt_list_420
);
2622 s
->avctx
->hwaccel
= ff_find_hwaccel(s
->avctx
->codec
->id
, s
->avctx
->pix_fmt
);
2624 if (MPV_common_init(s
) < 0) {
2625 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MPV_common_init() failed.\n");
2629 h
->prev_interlaced_frame
= 1;
2631 init_scan_tables(h
);
2632 if (ff_h264_alloc_tables(h
) < 0) {
2633 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "Could not allocate memory for h264\n");
2634 return AVERROR(ENOMEM
);
2637 if (!HAVE_THREADS
|| !(s
->avctx
->active_thread_type
&FF_THREAD_SLICE
)) {
2638 if (context_init(h
) < 0) {
2639 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "context_init() failed.\n");
2643 for(i
= 1; i
< s
->avctx
->thread_count
; i
++) {
2645 c
= h
->thread_context
[i
] = av_malloc(sizeof(H264Context
));
2646 memcpy(c
, h
->s
.thread_context
[i
], sizeof(MpegEncContext
));
2647 memset(&c
->s
+ 1, 0, sizeof(H264Context
) - sizeof(MpegEncContext
));
2648 c
->h264dsp
= h
->h264dsp
;
2651 c
->pixel_shift
= h
->pixel_shift
;
2652 init_scan_tables(c
);
2653 clone_tables(c
, h
, i
);
2656 for(i
= 0; i
< s
->avctx
->thread_count
; i
++)
2657 if (context_init(h
->thread_context
[i
]) < 0) {
2658 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "context_init() failed.\n");
2664 h
->frame_num
= get_bits(&s
->gb
, h
->sps
.log2_max_frame_num
);
2667 h
->mb_aff_frame
= 0;
2668 last_pic_structure
= s0
->picture_structure
;
2669 if(h
->sps
.frame_mbs_only_flag
){
2670 s
->picture_structure
= PICT_FRAME
;
2672 if(get_bits1(&s
->gb
)) { //field_pic_flag
2673 s
->picture_structure
= PICT_TOP_FIELD
+ get_bits1(&s
->gb
); //bottom_field_flag
2675 s
->picture_structure
= PICT_FRAME
;
2676 h
->mb_aff_frame
= h
->sps
.mb_aff
;
2679 h
->mb_field_decoding_flag
= s
->picture_structure
!= PICT_FRAME
;
2681 if(h0
->current_slice
== 0){
2682 // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away
2683 if(h
->frame_num
!= h
->prev_frame_num
) {
2684 int unwrap_prev_frame_num
= h
->prev_frame_num
, max_frame_num
= 1<<h
->sps
.log2_max_frame_num
;
2686 if (unwrap_prev_frame_num
> h
->frame_num
) unwrap_prev_frame_num
-= max_frame_num
;
2688 if ((h
->frame_num
- unwrap_prev_frame_num
) > h
->sps
.ref_frame_count
) {
2689 unwrap_prev_frame_num
= (h
->frame_num
- h
->sps
.ref_frame_count
) - 1;
2690 if (unwrap_prev_frame_num
< 0)
2691 unwrap_prev_frame_num
+= max_frame_num
;
2693 h
->prev_frame_num
= unwrap_prev_frame_num
;
2697 while(h
->frame_num
!= h
->prev_frame_num
&&
2698 h
->frame_num
!= (h
->prev_frame_num
+1)%(1<<h
->sps
.log2_max_frame_num
)){
2699 Picture
*prev
= h
->short_ref_count ? h
->short_ref
[0] : NULL
;
2700 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "Frame num gap %d %d\n", h
->frame_num
, h
->prev_frame_num
);
2701 if (ff_h264_frame_start(h
) < 0)
2703 h
->prev_frame_num
++;
2704 h
->prev_frame_num
%= 1<<h
->sps
.log2_max_frame_num
;
2705 s
->current_picture_ptr
->frame_num
= h
->prev_frame_num
;
2706 ff_thread_report_progress((AVFrame
*)s
->current_picture_ptr
, INT_MAX
, 0);
2707 ff_thread_report_progress((AVFrame
*)s
->current_picture_ptr
, INT_MAX
, 1);
2708 ff_generate_sliding_window_mmcos(h
);
2709 if (ff_h264_execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
) < 0 &&
2710 s
->avctx
->error_recognition
>= FF_ER_EXPLODE
)
2711 return AVERROR_INVALIDDATA
;