Make the h264 loop filter bit depth aware.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265 1/*
ff3d4310 2 * H.26L/H.264/AVC/JVT/14496-10/... decoder
0da71265
MN
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
2912e87a 5 * This file is part of Libav.
b78e7197 6 *
2912e87a 7 * Libav is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
2912e87a 12 * Libav is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
2912e87a 18 * License along with Libav; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
ba87f080 23 * @file
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
737eb597 28#include "libavutil/imgutils.h"
40e5d31b 29#include "internal.h"
0da71265
MN
30#include "dsputil.h"
31#include "avcodec.h"
32#include "mpegvideo.h"
26b4fe82 33#include "h264.h"
0da71265 34#include "h264data.h"
188d3c51 35#include "h264_mvpred.h"
0da71265 36#include "golomb.h"
199436b9 37#include "mathops.h"
626464fb 38#include "rectangle.h"
369122dd 39#include "vdpau_internal.h"
cfa5a81e 40#include "libavutil/avassert.h"
0da71265 41
e5017ab8
LA
42#include "cabac.h"
43
2848ce84 44//#undef NDEBUG
0da71265
MN
45#include <assert.h>
46
d9ec210b 47static const uint8_t rem6[52]={
acd8d10f
PI
480, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
49};
50
d9ec210b 51static const uint8_t div6[52]={
acd8d10f
PI
520, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
53};
54
0435fb16
BC
55static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
56 PIX_FMT_DXVA2_VLD,
57 PIX_FMT_VAAPI_VLD,
58 PIX_FMT_YUVJ420P,
59 PIX_FMT_NONE
60};
61
903d58f6 62void ff_h264_write_back_intra_pred_mode(H264Context *h){
5b0fb524 63 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
0da71265 64
662a5b23
MN
65 AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
66 mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
67 mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
68 mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
0da71265
MN
69}
70
71/**
72 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
73 */
2bedc0e8
MN
74int ff_h264_check_intra4x4_pred_mode(H264Context *h){
75 MpegEncContext * const s = &h->s;
76 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
77 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
78 int i;
79
80 if(!(h->top_samples_available&0x8000)){
81 for(i=0; i<4; i++){
82 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
83 if(status<0){
84 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
85 return -1;
86 } else if(status){
87 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
88 }
89 }
90 }
91
92 if((h->left_samples_available&0x8888)!=0x8888){
93 static const int mask[4]={0x8000,0x2000,0x80,0x20};
94 for(i=0; i<4; i++){
95 if(!(h->left_samples_available&mask[i])){
96 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
97 if(status<0){
98 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
99 return -1;
100 } else if(status){
101 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
102 }
103 }
104 }
105 }
106
107 return 0;
108} //FIXME cleanup like ff_h264_check_intra_pred_mode
109
110/**
111 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
112 */
903d58f6 113int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
0da71265
MN
114 MpegEncContext * const s = &h->s;
115 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
116 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 117
43ff0714 118 if(mode > 6U) {
5175b937 119 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 120 return -1;
5175b937 121 }
115329f1 122
0da71265
MN
123 if(!(h->top_samples_available&0x8000)){
124 mode= top[ mode ];
125 if(mode<0){
9b879566 126 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
127 return -1;
128 }
129 }
115329f1 130
d1d10e91 131 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 132 mode= left[ mode ];
d1d10e91
MN
133 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
134 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
135 }
0da71265 136 if(mode<0){
9b879566 137 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 138 return -1;
115329f1 139 }
0da71265
MN
140 }
141
142 return mode;
143}
144
1790a5e9 145const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
146 int i, si, di;
147 uint8_t *dst;
24456882 148 int bufidx;
0da71265 149
bb270c08 150// src[0]&0x80; //forbidden bit
0da71265
MN
151 h->nal_ref_idc= src[0]>>5;
152 h->nal_unit_type= src[0]&0x1F;
153
154 src++; length--;
e08715d3 155
b250f9c6
AJ
156#if HAVE_FAST_UNALIGNED
157# if HAVE_FAST_64BIT
e08715d3
MN
158# define RS 7
159 for(i=0; i+1<length; i+=9){
19769ece 160 if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
e08715d3
MN
161# else
162# define RS 3
163 for(i=0; i+1<length; i+=5){
19769ece 164 if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
e08715d3
MN
165# endif
166 continue;
167 if(i>0 && !src[i]) i--;
168 while(src[i]) i++;
169#else
170# define RS 0
0da71265
MN
171 for(i=0; i+1<length; i+=2){
172 if(src[i]) continue;
173 if(i>0 && src[i-1]==0) i--;
e08715d3 174#endif
0da71265
MN
175 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
176 if(src[i+2]!=3){
177 /* startcode, so we must be past the end */
178 length=i;
179 }
180 break;
181 }
abb27cfb 182 i-= RS;
0da71265
MN
183 }
184
185 if(i>=length-1){ //no escaped 0
186 *dst_length= length;
187 *consumed= length+1; //+1 for the header
115329f1 188 return src;
0da71265
MN
189 }
190
24456882 191 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
238ef6da 192 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 193 dst= h->rbsp_buffer[bufidx];
0da71265 194
ac658be5
FOL
195 if (dst == NULL){
196 return NULL;
197 }
198
3b66c4c5 199//printf("decoding esc\n");
593af7cd
MN
200 memcpy(dst, src, i);
201 si=di=i;
202 while(si+2<length){
0da71265 203 //remove escapes (very rare 1:2^22)
593af7cd
MN
204 if(src[si+2]>3){
205 dst[di++]= src[si++];
206 dst[di++]= src[si++];
207 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
208 if(src[si+2]==3){ //escape
209 dst[di++]= 0;
210 dst[di++]= 0;
211 si+=3;
c8470cc1 212 continue;
0da71265 213 }else //next start code
593af7cd 214 goto nsc;
0da71265
MN
215 }
216
217 dst[di++]= src[si++];
218 }
593af7cd
MN
219 while(si<length)
220 dst[di++]= src[si++];
221nsc:
0da71265 222
d4369630
AS
223 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
224
0da71265
MN
225 *dst_length= di;
226 *consumed= si + 1;//+1 for the header
90b5b51e 227//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
228 return dst;
229}
230
85297319
DEP
231/**
232 * Identify the exact end of the bitstream
233 * @return the length of the trailing, or 0 if damaged
234 */
235static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
236 int v= *src;
237 int r;
238
a9c9a240 239 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
240
241 for(r=1; r<9; r++){
242 if(v&1) return r;
243 v>>=1;
244 }
245 return 0;
246}
247
66c6b5e2
RB
248#if 0
249/**
250 * DCT transforms the 16 dc values.
251 * @param qp quantization parameter ??? FIXME
252 */
253static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
254// const int qmul= dequant_coeff[qp][0];
255 int i;
256 int temp[16]; //FIXME check if this is a good idea
257 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
258 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
259
260 for(i=0; i<4; i++){
261 const int offset= y_offset[i];
262 const int z0= block[offset+stride*0] + block[offset+stride*4];
263 const int z1= block[offset+stride*0] - block[offset+stride*4];
264 const int z2= block[offset+stride*1] - block[offset+stride*5];
265 const int z3= block[offset+stride*1] + block[offset+stride*5];
266
267 temp[4*i+0]= z0+z3;
268 temp[4*i+1]= z1+z2;
269 temp[4*i+2]= z1-z2;
270 temp[4*i+3]= z0-z3;
271 }
272
273 for(i=0; i<4; i++){
274 const int offset= x_offset[i];
275 const int z0= temp[4*0+i] + temp[4*2+i];
276 const int z1= temp[4*0+i] - temp[4*2+i];
277 const int z2= temp[4*1+i] - temp[4*3+i];
278 const int z3= temp[4*1+i] + temp[4*3+i];
279
280 block[stride*0 +offset]= (z0 + z3)>>1;
281 block[stride*2 +offset]= (z1 + z2)>>1;
282 block[stride*8 +offset]= (z1 - z2)>>1;
283 block[stride*10+offset]= (z0 - z3)>>1;
284 }
285}
286#endif
287
288#undef xStride
289#undef stride
290
66c6b5e2
RB
291#if 0
292static void chroma_dc_dct_c(DCTELEM *block){
293 const int stride= 16*2;
294 const int xStride= 16;
295 int a,b,c,d,e;
296
297 a= block[stride*0 + xStride*0];
298 b= block[stride*0 + xStride*1];
299 c= block[stride*1 + xStride*0];
300 d= block[stride*1 + xStride*1];
301
302 e= a-b;
303 a= a+b;
304 b= c-d;
305 c= c+d;
306
307 block[stride*0 + xStride*0]= (a+c);
308 block[stride*0 + xStride*1]= (e+b);
309 block[stride*1 + xStride*0]= (a-c);
310 block[stride*1 + xStride*1]= (e-b);
311}
312#endif
313
0da71265
MN
314static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
315 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
316 int src_x_offset, int src_y_offset,
317 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
318 MpegEncContext * const s = &h->s;
319 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 320 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 321 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
322 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
323 uint8_t * src_cb, * src_cr;
324 int extra_width= h->emu_edge_width;
325 int extra_height= h->emu_edge_height;
0da71265
MN
326 int emu=0;
327 const int full_mx= mx>>2;
328 const int full_my= my>>2;
fbd312fd 329 const int pic_width = 16*s->mb_width;
0d43dd8c 330 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 331
0da71265
MN
332 if(mx&7) extra_width -= 3;
333 if(my&7) extra_height -= 3;
115329f1
DB
334
335 if( full_mx < 0-extra_width
336 || full_my < 0-extra_height
337 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 338 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2e279598 339 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
5d18eaad 340 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
341 emu=1;
342 }
115329f1 343
5d18eaad 344 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 345 if(!square){
5d18eaad 346 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 347 }
115329f1 348
49fb20cb 349 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 350
0d43dd8c 351 if(MB_FIELD){
5d18eaad 352 // chroma offset when predicting from a field of opposite parity
2143b118 353 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
354 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
355 }
356 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
357 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
358
0da71265 359 if(emu){
2e279598 360 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
361 src_cb= s->edge_emu_buffer;
362 }
5d18eaad 363 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
364
365 if(emu){
2e279598 366 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
367 src_cr= s->edge_emu_buffer;
368 }
5d18eaad 369 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
370}
371
9f2d1b4f 372static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
373 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
374 int x_offset, int y_offset,
375 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
376 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
377 int list0, int list1){
378 MpegEncContext * const s = &h->s;
379 qpel_mc_func *qpix_op= qpix_put;
380 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 381
5d18eaad
LM
382 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
383 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
384 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 385 x_offset += 8*s->mb_x;
0d43dd8c 386 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 387
0da71265 388 if(list0){
1924f3ce 389 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
390 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
391 dest_y, dest_cb, dest_cr, x_offset, y_offset,
392 qpix_op, chroma_op);
393
394 qpix_op= qpix_avg;
395 chroma_op= chroma_avg;
396 }
397
398 if(list1){
1924f3ce 399 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
400 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
401 dest_y, dest_cb, dest_cr, x_offset, y_offset,
402 qpix_op, chroma_op);
403 }
404}
405
9f2d1b4f
LM
406static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
407 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
408 int x_offset, int y_offset,
409 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
410 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
411 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
412 int list0, int list1){
413 MpegEncContext * const s = &h->s;
414
5d18eaad
LM
415 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
416 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
417 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 418 x_offset += 8*s->mb_x;
0d43dd8c 419 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 420
9f2d1b4f
LM
421 if(list0 && list1){
422 /* don't optimize for luma-only case, since B-frames usually
423 * use implicit weights => chroma too. */
424 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
425 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
426 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
427 int refn0 = h->ref_cache[0][ scan8[n] ];
428 int refn1 = h->ref_cache[1][ scan8[n] ];
429
430 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
431 dest_y, dest_cb, dest_cr,
432 x_offset, y_offset, qpix_put, chroma_put);
433 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
434 tmp_y, tmp_cb, tmp_cr,
435 x_offset, y_offset, qpix_put, chroma_put);
436
437 if(h->use_weight == 2){
1052b76f 438 int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
9f2d1b4f 439 int weight1 = 64 - weight0;
5d18eaad
LM
440 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
441 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
442 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 443 }else{
5d18eaad 444 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
3d9137c8
MN
445 h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
446 h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
5d18eaad 447 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8
MN
448 h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
449 h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
5d18eaad 450 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8
MN
451 h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
452 h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
9f2d1b4f
LM
453 }
454 }else{
455 int list = list1 ? 1 : 0;
456 int refn = h->ref_cache[list][ scan8[n] ];
457 Picture *ref= &h->ref_list[list][refn];
458 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
459 dest_y, dest_cb, dest_cr, x_offset, y_offset,
460 qpix_put, chroma_put);
461
5d18eaad 462 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3d9137c8 463 h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
9f2d1b4f 464 if(h->use_weight_chroma){
5d18eaad 465 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8 466 h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
5d18eaad 467 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8 468 h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
9f2d1b4f
LM
469 }
470 }
471}
472
473static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
474 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
475 int x_offset, int y_offset,
476 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
477 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 478 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
479 int list0, int list1){
480 if((h->use_weight==2 && list0 && list1
1052b76f 481 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
9f2d1b4f
LM
482 || h->use_weight==1)
483 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
484 x_offset, y_offset, qpix_put, chroma_put,
485 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
486 else
487 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
488 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
489}
490
513fbd8e
LM
491static inline void prefetch_motion(H264Context *h, int list){
492 /* fetch pixels for estimated mv 4 macroblocks ahead
493 * optimized for 64byte cache lines */
494 MpegEncContext * const s = &h->s;
495 const int refn = h->ref_cache[list][scan8[0]];
496 if(refn >= 0){
497 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
498 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
499 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 500 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
501 s->dsp.prefetch(src[0]+off, s->linesize, 4);
502 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
503 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
504 }
505}
506
0da71265
MN
507static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
508 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
509 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
510 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 511 MpegEncContext * const s = &h->s;
64514ee8 512 const int mb_xy= h->mb_xy;
0da71265 513 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 514
0da71265 515 assert(IS_INTER(mb_type));
115329f1 516
513fbd8e
LM
517 prefetch_motion(h, 0);
518
0da71265
MN
519 if(IS_16X16(mb_type)){
520 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
521 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
7231ccf4 522 weight_op, weight_avg,
0da71265
MN
523 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
524 }else if(IS_16X8(mb_type)){
525 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
526 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 527 &weight_op[1], &weight_avg[1],
0da71265
MN
528 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
529 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
530 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 531 &weight_op[1], &weight_avg[1],
0da71265
MN
532 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
533 }else if(IS_8X16(mb_type)){
5d18eaad 534 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 535 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 536 &weight_op[2], &weight_avg[2],
0da71265 537 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 538 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 539 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 540 &weight_op[2], &weight_avg[2],
0da71265
MN
541 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
542 }else{
543 int i;
115329f1 544
0da71265
MN
545 assert(IS_8X8(mb_type));
546
547 for(i=0; i<4; i++){
548 const int sub_mb_type= h->sub_mb_type[i];
549 const int n= 4*i;
550 int x_offset= (i&1)<<2;
551 int y_offset= (i&2)<<1;
552
553 if(IS_SUB_8X8(sub_mb_type)){
554 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
555 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 556 &weight_op[3], &weight_avg[3],
0da71265
MN
557 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
558 }else if(IS_SUB_8X4(sub_mb_type)){
559 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
560 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 561 &weight_op[4], &weight_avg[4],
0da71265
MN
562 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
563 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
564 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 565 &weight_op[4], &weight_avg[4],
0da71265
MN
566 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
567 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 568 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 569 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 570 &weight_op[5], &weight_avg[5],
0da71265 571 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 572 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 573 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 574 &weight_op[5], &weight_avg[5],
0da71265
MN
575 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
576 }else{
577 int j;
578 assert(IS_SUB_4X4(sub_mb_type));
579 for(j=0; j<4; j++){
580 int sub_x_offset= x_offset + 2*(j&1);
581 int sub_y_offset= y_offset + (j&2);
582 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
583 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 584 &weight_op[6], &weight_avg[6],
0da71265
MN
585 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
586 }
587 }
588 }
589 }
513fbd8e
LM
590
591 prefetch_motion(h, 1);
0da71265
MN
592}
593
0da71265 594
91078926 595static void free_tables(H264Context *h, int free_rbsp){
7978debd 596 int i;
afebe2f7 597 H264Context *hx;
0da71265 598 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
599 av_freep(&h->chroma_pred_mode_table);
600 av_freep(&h->cbp_table);
9e528114
LA
601 av_freep(&h->mvd_table[0]);
602 av_freep(&h->mvd_table[1]);
5ad984c9 603 av_freep(&h->direct_table);
0da71265
MN
604 av_freep(&h->non_zero_count);
605 av_freep(&h->slice_table_base);
606 h->slice_table= NULL;
c988f975 607 av_freep(&h->list_counts);
e5017ab8 608
0da71265 609 av_freep(&h->mb2b_xy);
d43c1922 610 av_freep(&h->mb2br_xy);
9f2d1b4f 611
6752dd5a 612 for(i = 0; i < MAX_THREADS; i++) {
afebe2f7
613 hx = h->thread_context[i];
614 if(!hx) continue;
615 av_freep(&hx->top_borders[1]);
616 av_freep(&hx->top_borders[0]);
617 av_freep(&hx->s.obmc_scratchpad);
91078926 618 if (free_rbsp){
fcb7e535
RB
619 av_freep(&hx->rbsp_buffer[1]);
620 av_freep(&hx->rbsp_buffer[0]);
621 hx->rbsp_buffer_size[0] = 0;
622 hx->rbsp_buffer_size[1] = 0;
91078926 623 }
d2d5e067 624 if (i) av_freep(&h->thread_context[i]);
afebe2f7 625 }
0da71265
MN
626}
627
239ea04c
LM
628static void init_dequant8_coeff_table(H264Context *h){
629 int i,q,x;
630 h->dequant8_coeff[0] = h->dequant8_buffer[0];
631 h->dequant8_coeff[1] = h->dequant8_buffer[1];
632
633 for(i=0; i<2; i++ ){
634 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
635 h->dequant8_coeff[1] = h->dequant8_buffer[0];
636 break;
637 }
638
639 for(q=0; q<52; q++){
d9ec210b
DP
640 int shift = div6[q];
641 int idx = rem6[q];
239ea04c 642 for(x=0; x<64; x++)
ca32f7f2 643 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
548a1c8a
LM
644 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
645 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
646 }
647 }
648}
649
650static void init_dequant4_coeff_table(H264Context *h){
651 int i,j,q,x;
652 for(i=0; i<6; i++ ){
653 h->dequant4_coeff[i] = h->dequant4_buffer[i];
654 for(j=0; j<i; j++){
655 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
656 h->dequant4_coeff[i] = h->dequant4_buffer[j];
657 break;
658 }
659 }
660 if(j<i)
661 continue;
662
663 for(q=0; q<52; q++){
d9ec210b
DP
664 int shift = div6[q] + 2;
665 int idx = rem6[q];
239ea04c 666 for(x=0; x<16; x++)
ca32f7f2 667 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
ab2e3e2c 668 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
669 h->pps.scaling_matrix4[i][x]) << shift;
670 }
671 }
672}
673
674static void init_dequant_tables(H264Context *h){
675 int i,x;
676 init_dequant4_coeff_table(h);
677 if(h->pps.transform_8x8_mode)
678 init_dequant8_coeff_table(h);
679 if(h->sps.transform_bypass){
680 for(i=0; i<6; i++)
681 for(x=0; x<16; x++)
682 h->dequant4_coeff[i][0][x] = 1<<6;
683 if(h->pps.transform_8x8_mode)
684 for(i=0; i<2; i++)
685 for(x=0; x<64; x++)
686 h->dequant8_coeff[i][0][x] = 1<<6;
687 }
688}
689
690
903d58f6 691int ff_h264_alloc_tables(H264Context *h){
0da71265 692 MpegEncContext * const s = &h->s;
7bc9090a 693 const int big_mb_num= s->mb_stride * (s->mb_height+1);
145061a1 694 const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
239ea04c 695 int x,y;
0da71265 696
145061a1 697 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail)
e5017ab8 698
c988f975 699 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail)
d31dbec3
RP
700 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
701 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
0da71265 702
d31dbec3 703 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
145061a1
MN
704 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
705 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
36b54927 706 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
c988f975 707 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
e5017ab8 708
b735aeea 709 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 710 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 711
d31dbec3 712 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
d43c1922 713 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
0da71265
MN
714 for(y=0; y<s->mb_height; y++){
715 for(x=0; x<s->mb_width; x++){
7bc9090a 716 const int mb_xy= x + y*s->mb_stride;
0da71265 717 const int b_xy = 4*x + 4*y*h->b_stride;
115329f1 718
0da71265 719 h->mb2b_xy [mb_xy]= b_xy;
e1c88a21 720 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
0da71265
MN
721 }
722 }
9f2d1b4f 723
9c6221ae
GV
724 s->obmc_scratchpad = NULL;
725
56edbd81
LM
726 if(!h->dequant4_coeff[0])
727 init_dequant_tables(h);
728
0da71265
MN
729 return 0;
730fail:
91078926 731 free_tables(h, 1);
0da71265
MN
732 return -1;
733}
734
afebe2f7
735/**
736 * Mimic alloc_tables(), but for every context thread.
737 */
145061a1
MN
738static void clone_tables(H264Context *dst, H264Context *src, int i){
739 MpegEncContext * const s = &src->s;
740 dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
afebe2f7
741 dst->non_zero_count = src->non_zero_count;
742 dst->slice_table = src->slice_table;
743 dst->cbp_table = src->cbp_table;
744 dst->mb2b_xy = src->mb2b_xy;
d43c1922 745 dst->mb2br_xy = src->mb2br_xy;
afebe2f7 746 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
145061a1
MN
747 dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride;
748 dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride;
afebe2f7 749 dst->direct_table = src->direct_table;
fb823b77 750 dst->list_counts = src->list_counts;
afebe2f7 751
afebe2f7
752 dst->s.obmc_scratchpad = NULL;
753 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
754}
755
756/**
757 * Init context
758 * Allocate buffers which are not shared amongst multiple threads.
759 */
760static int context_init(H264Context *h){
d31dbec3
RP
761 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
762 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
afebe2f7 763
145061a1
MN
764 h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
765 h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
766
afebe2f7
767 return 0;
768fail:
769 return -1; // free_tables will clean up for us
770}
771
9855b2e3
MN
772static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
773
98a6fff9 774static av_cold void common_init(H264Context *h){
0da71265 775 MpegEncContext * const s = &h->s;
0da71265
MN
776
777 s->width = s->avctx->width;
778 s->height = s->avctx->height;
779 s->codec_id= s->avctx->codec->id;
115329f1 780
4693b031 781 ff_h264dsp_init(&h->h264dsp);
c92a30bb 782 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 783
239ea04c 784 h->dequant_coeff_pps= -1;
9a41c2c7 785 s->unrestricted_mv=1;
0da71265 786 s->decode=1; //FIXME
56edbd81 787
a5805aa9
MN
788 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
789
56edbd81
LM
790 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
791 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
792}
793
05e95319
HC
794int ff_h264_decode_extradata(H264Context *h)
795{
796 AVCodecContext *avctx = h->s.avctx;
9855b2e3 797
05e95319 798 if(*(char *)avctx->extradata == 1){
9855b2e3
MN
799 int i, cnt, nalsize;
800 unsigned char *p = avctx->extradata;
801
802 h->is_avc = 1;
803
804 if(avctx->extradata_size < 7) {
805 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
806 return -1;
807 }
808 /* sps and pps in the avcC always have length coded with 2 bytes,
809 so put a fake nal_length_size = 2 while parsing them */
810 h->nal_length_size = 2;
811 // Decode sps from avcC
812 cnt = *(p+5) & 0x1f; // Number of sps
813 p += 6;
814 for (i = 0; i < cnt; i++) {
815 nalsize = AV_RB16(p) + 2;
816 if(decode_nal_units(h, p, nalsize) < 0) {
817 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
818 return -1;
819 }
820 p += nalsize;
821 }
822 // Decode pps from avcC
823 cnt = *(p++); // Number of pps
824 for (i = 0; i < cnt; i++) {
825 nalsize = AV_RB16(p) + 2;
826 if(decode_nal_units(h, p, nalsize) != nalsize) {
827 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
828 return -1;
829 }
830 p += nalsize;
831 }
832 // Now store right nal length size, that will be use to parse all other nals
833 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
834 } else {
835 h->is_avc = 0;
05e95319 836 if(decode_nal_units(h, avctx->extradata, avctx->extradata_size) < 0)
9855b2e3
MN
837 return -1;
838 }
05e95319
HC
839 return 0;
840}
841
842av_cold int ff_h264_decode_init(AVCodecContext *avctx){
843 H264Context *h= avctx->priv_data;
844 MpegEncContext * const s = &h->s;
845
846 MPV_decode_defaults(s);
847
848 s->avctx = avctx;
849 common_init(h);
850
851 s->out_format = FMT_H264;
852 s->workaround_bugs= avctx->workaround_bugs;
853
854 // set defaults
855// s->decode_mb= ff_h263_decode_mb;
856 s->quarter_sample = 1;
857 if(!avctx->has_b_frames)
858 s->low_delay= 1;
859
860 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
861
862 ff_h264_decode_init_vlc();
863
864 h->thread_context[0] = h;
865 h->outputed_poc = INT_MIN;
866 h->prev_poc_msb= 1<<16;
867 h->x264_build = -1;
868 ff_h264_reset_sei(h);
869 if(avctx->codec_id == CODEC_ID_H264){
870 if(avctx->ticks_per_frame == 1){
871 s->avctx->time_base.den *=2;
872 }
873 avctx->ticks_per_frame = 2;
874 }
875
876 if(avctx->extradata_size > 0 && avctx->extradata &&
877 ff_h264_decode_extradata(h))
878 return -1;
879
db8cb47d
MN
880 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
881 s->avctx->has_b_frames = h->sps.num_reorder_frames;
882 s->low_delay = 0;
883 }
9855b2e3 884
0da71265
MN
885 return 0;
886}
887
903d58f6 888int ff_h264_frame_start(H264Context *h){
0da71265
MN
889 MpegEncContext * const s = &h->s;
890 int i;
891
af8aa846
MN
892 if(MPV_frame_start(s, s->avctx) < 0)
893 return -1;
0da71265 894 ff_er_frame_start(s);
3a22d7fa
JD
895 /*
896 * MPV_frame_start uses pict_type to derive key_frame.
897 * This is incorrect for H.264; IDR markings must be used.
1412060e 898 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
899 * See decode_nal_units().
900 */
901 s->current_picture_ptr->key_frame= 0;
c173a088 902 s->current_picture_ptr->mmco_reset= 0;
0da71265
MN
903
904 assert(s->linesize && s->uvlinesize);
905
906 for(i=0; i<16; i++){
907 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 908 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
909 }
910 for(i=0; i<4; i++){
911 h->block_offset[16+i]=
912 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
913 h->block_offset[24+16+i]=
914 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
915 }
916
934b0821
LM
917 /* can't be in alloc_tables because linesize isn't known there.
918 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7 919 for(i = 0; i < s->avctx->thread_count; i++)
0f016023 920 if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
afebe2f7 921 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad 922
2ce1c2e0 923 /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
5820b90d 924 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 925
0da71265 926// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 927
1412060e 928 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
929 // that if we break out due to an error it can be released automatically
930 // in the next MPV_frame_start().
931 // SVQ3 as well as most other codecs have only last/next/current and thus
932 // get released even with set reference, besides SVQ3 and others do not
933 // mark frames as reference later "naturally".
934 if(s->codec_id != CODEC_ID_SVQ3)
935 s->current_picture_ptr->reference= 0;
357282c6
MN
936
937 s->current_picture_ptr->field_poc[0]=
938 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 939 assert(s->current_picture_ptr->long_ref==0);
357282c6 940
af8aa846 941 return 0;
0da71265
MN
942}
943
93cc10fa 944static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e 945 MpegEncContext * const s = &h->s;
0b69d625 946 uint8_t *top_border;
5f7f9719 947 int top_idx = 1;
115329f1 948
53c05b1e
MN
949 src_y -= linesize;
950 src_cb -= uvlinesize;
951 src_cr -= uvlinesize;
952
5f7f9719
MN
953 if(!simple && FRAME_MBAFF){
954 if(s->mb_y&1){
5f7f9719 955 if(!MB_MBAFF){
0b69d625
AS
956 top_border = h->top_borders[0][s->mb_x];
957 AV_COPY128(top_border, src_y + 15*linesize);
49fb20cb 958 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
959 AV_COPY64(top_border+16, src_cb+7*uvlinesize);
960 AV_COPY64(top_border+24, src_cr+7*uvlinesize);
5f7f9719
MN
961 }
962 }
c988f975
MN
963 }else if(MB_MBAFF){
964 top_idx = 0;
965 }else
966 return;
5f7f9719
MN
967 }
968
0b69d625 969 top_border = h->top_borders[top_idx][s->mb_x];
3b66c4c5 970 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 971 // and the line above the bottom macroblock
0b69d625 972 AV_COPY128(top_border, src_y + 16*linesize);
53c05b1e 973
49fb20cb 974 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
975 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
976 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
53c05b1e
MN
977 }
978}
979
93cc10fa 980static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e 981 MpegEncContext * const s = &h->s;
b69378e2
982 int deblock_left;
983 int deblock_top;
5f7f9719 984 int top_idx = 1;
1e4f1c56
AS
985 uint8_t *top_border_m1;
986 uint8_t *top_border;
5f7f9719
MN
987
988 if(!simple && FRAME_MBAFF){
989 if(s->mb_y&1){
c988f975
MN
990 if(!MB_MBAFF)
991 return;
5f7f9719 992 }else{
5f7f9719
MN
993 top_idx = MB_MBAFF ? 0 : 1;
994 }
5f7f9719 995 }
b69378e2
996
997 if(h->deblocking_filter == 2) {
024bf79f
MN
998 deblock_left = h->left_type[0];
999 deblock_top = h->top_type;
b69378e2
1000 } else {
1001 deblock_left = (s->mb_x > 0);
6c805007 1002 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 1003 }
53c05b1e
MN
1004
1005 src_y -= linesize + 1;
1006 src_cb -= uvlinesize + 1;
1007 src_cr -= uvlinesize + 1;
1008
1e4f1c56
AS
1009 top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
1010 top_border = h->top_borders[top_idx][s->mb_x];
1011
0b69d625
AS
1012#define XCHG(a,b,xchg)\
1013if (xchg) AV_SWAP64(b,a);\
1014else AV_COPY64(b,a);
d89dc06a 1015
d89dc06a 1016 if(deblock_top){
c988f975 1017 if(deblock_left){
0b69d625 1018 XCHG(top_border_m1+8, src_y -7, 1);
c988f975 1019 }
0b69d625
AS
1020 XCHG(top_border+0, src_y +1, xchg);
1021 XCHG(top_border+8, src_y +9, 1);
cad4368a 1022 if(s->mb_x+1 < s->mb_width){
0b69d625 1023 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
43efd19a 1024 }
53c05b1e 1025 }
53c05b1e 1026
49fb20cb 1027 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 1028 if(deblock_top){
c988f975 1029 if(deblock_left){
0b69d625
AS
1030 XCHG(top_border_m1+16, src_cb -7, 1);
1031 XCHG(top_border_m1+24, src_cr -7, 1);
c988f975 1032 }
0b69d625
AS
1033 XCHG(top_border+16, src_cb+1, 1);
1034 XCHG(top_border+24, src_cr+1, 1);
53c05b1e 1035 }
53c05b1e
MN
1036 }
1037}
1038
5a6a6cc7 1039static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
1040 MpegEncContext * const s = &h->s;
1041 const int mb_x= s->mb_x;
1042 const int mb_y= s->mb_y;
64514ee8 1043 const int mb_xy= h->mb_xy;
0da71265
MN
1044 const int mb_type= s->current_picture.mb_type[mb_xy];
1045 uint8_t *dest_y, *dest_cb, *dest_cr;
1046 int linesize, uvlinesize /*dct_offset*/;
1047 int i;
6867a90b 1048 int *block_offset = &h->block_offset[0];
41e4055b 1049 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed 1050 /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb 1051 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca 1052 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 1053 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 1054
6120a343
MN
1055 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
1056 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
1057 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 1058
a957c27b
LM
1059 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1060 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
1061
c988f975
MN
1062 h->list_counts[mb_xy]= h->list_count;
1063
bd91fee3 1064 if (!simple && MB_FIELD) {
5d18eaad
LM
1065 linesize = h->mb_linesize = s->linesize * 2;
1066 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 1067 block_offset = &h->block_offset[24];
1412060e 1068 if(mb_y&1){ //FIXME move out of this function?
0da71265 1069 dest_y -= s->linesize*15;
6867a90b
LLL
1070 dest_cb-= s->uvlinesize*7;
1071 dest_cr-= s->uvlinesize*7;
0da71265 1072 }
5d18eaad
LM
1073 if(FRAME_MBAFF) {
1074 int list;
3425501d 1075 for(list=0; list<h->list_count; list++){
5d18eaad
LM
1076 if(!USES_LIST(mb_type, list))
1077 continue;
1078 if(IS_16X16(mb_type)){
1079 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 1080 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
1081 }else{
1082 for(i=0; i<16; i+=4){
5d18eaad
LM
1083 int ref = h->ref_cache[list][scan8[i]];
1084 if(ref >= 0)
1710856c 1085 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
1086 }
1087 }
1088 }
1089 }
0da71265 1090 } else {
5d18eaad
LM
1091 linesize = h->mb_linesize = s->linesize;
1092 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
1093// dct_offset = s->linesize * 16;
1094 }
115329f1 1095
bd91fee3 1096 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
1097 for (i=0; i<16; i++) {
1098 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 1099 }
c1708e8d
MN
1100 for (i=0; i<8; i++) {
1101 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
1102 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 1103 }
e7e09b49
LLL
1104 } else {
1105 if(IS_INTRA(mb_type)){
5f7f9719 1106 if(h->deblocking_filter)
93cc10fa 1107 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 1108
49fb20cb 1109 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
1110 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
1111 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 1112 }
0da71265 1113
e7e09b49 1114 if(IS_INTRA4x4(mb_type)){
bd91fee3 1115 if(simple || !s->encoding){
43efd19a 1116 if(IS_8x8DCT(mb_type)){
1eb96035
MN
1117 if(transform_bypass){
1118 idct_dc_add =
1119 idct_add = s->dsp.add_pixels8;
dae006d7 1120 }else{
4693b031
MR
1121 idct_dc_add = h->h264dsp.h264_idct8_dc_add;
1122 idct_add = h->h264dsp.h264_idct8_add;
1eb96035 1123 }
43efd19a
LM
1124 for(i=0; i<16; i+=4){
1125 uint8_t * const ptr= dest_y + block_offset[i];
1126 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
1127 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1128 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
1129 }else{
ac0623b2
MN
1130 const int nnz = h->non_zero_count_cache[ scan8[i] ];
1131 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1132 (h->topright_samples_available<<i)&0x4000, linesize);
1133 if(nnz){
1134 if(nnz == 1 && h->mb[i*16])
1135 idct_dc_add(ptr, h->mb + i*16, linesize);
1136 else
1137 idct_add (ptr, h->mb + i*16, linesize);
1138 }
41e4055b 1139 }
43efd19a 1140 }
1eb96035
MN
1141 }else{
1142 if(transform_bypass){
1143 idct_dc_add =
1144 idct_add = s->dsp.add_pixels4;
1145 }else{
4693b031
MR
1146 idct_dc_add = h->h264dsp.h264_idct_dc_add;
1147 idct_add = h->h264dsp.h264_idct_add;
1eb96035 1148 }
aebb5d6d
MN
1149 for(i=0; i<16; i++){
1150 uint8_t * const ptr= dest_y + block_offset[i];
1151 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 1152
aebb5d6d
MN
1153 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1154 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
1155 }else{
1156 uint8_t *topright;
1157 int nnz, tr;
1158 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
1159 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1160 assert(mb_y || linesize <= block_offset[i]);
1161 if(!topright_avail){
1162 tr= ptr[3 - linesize]*0x01010101;
1163 topright= (uint8_t*) &tr;
1164 }else
1165 topright= ptr + 4 - linesize;
ac0623b2 1166 }else
aebb5d6d
MN
1167 topright= NULL;
1168
1169 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
1170 nnz = h->non_zero_count_cache[ scan8[i] ];
1171 if(nnz){
1172 if(is_h264){
1173 if(nnz == 1 && h->mb[i*16])
1174 idct_dc_add(ptr, h->mb + i*16, linesize);
1175 else
1176 idct_add (ptr, h->mb + i*16, linesize);
1177 }else
881b5b80 1178 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
aebb5d6d 1179 }
ac0623b2 1180 }
41e4055b 1181 }
8b82a956 1182 }
0da71265 1183 }
e7e09b49 1184 }else{
c92a30bb 1185 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2e186601
JGG
1186 if(is_h264){
1187 if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
0d1d01cf
JGG
1188 if(!transform_bypass)
1189 h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]);
1190 else{
1191 static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
1192 8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
1193 for(i = 0; i < 16; i++)
1194 h->mb[dc_mapping[i]] = h->mb_luma_dc[i];
1195 }
2e186601
JGG
1196 }
1197 }else
290fabc6 1198 ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
0da71265 1199 }
5f7f9719 1200 if(h->deblocking_filter)
93cc10fa 1201 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 1202 }else if(is_h264){
e7e09b49 1203 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
1204 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
1205 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
4693b031 1206 h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
0da71265 1207 }
e7e09b49
LLL
1208
1209
1210 if(!IS_INTRA4x4(mb_type)){
bd91fee3 1211 if(is_h264){
ef9d1d15 1212 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
1213 if(transform_bypass){
1214 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
1215 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
1216 }else{
1217 for(i=0; i<16; i++){
1218 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 1219 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1220 }
2fd1f0e0
MN
1221 }
1222 }else{
4693b031 1223 h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 1224 }
49c084a7 1225 }else if(h->cbp&15){
2fd1f0e0 1226 if(transform_bypass){
0a8ca22f 1227 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 1228 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 1229 for(i=0; i<16; i+=di){
62bc966f 1230 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 1231 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1232 }
ef9d1d15 1233 }
2fd1f0e0
MN
1234 }else{
1235 if(IS_8x8DCT(mb_type)){
4693b031 1236 h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2fd1f0e0 1237 }else{
4693b031 1238 h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2fd1f0e0
MN
1239 }
1240 }
4704097a 1241 }
e7e09b49
LLL
1242 }else{
1243 for(i=0; i<16; i++){
1244 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 1245 uint8_t * const ptr= dest_y + block_offset[i];
881b5b80 1246 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
e7e09b49 1247 }
4704097a 1248 }
0da71265
MN
1249 }
1250 }
0da71265 1251
49fb20cb 1252 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
1253 uint8_t *dest[2] = {dest_cb, dest_cr};
1254 if(transform_bypass){
96465b90
MN
1255 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
1256 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
1257 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
1258 }else{
c25ac15a 1259 idct_add = s->dsp.add_pixels4;
96465b90
MN
1260 for(i=16; i<16+8; i++){
1261 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1262 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1263 }
1264 }
ef9d1d15 1265 }else{
aebb5d6d 1266 if(is_h264){
2e186601 1267 if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
e39e3aba 1268 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2e186601 1269 if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
e39e3aba 1270 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
1d16a1cf
RB
1271 h->h264dsp.h264_idct_add8(dest, block_offset,
1272 h->mb, uvlinesize,
1273 h->non_zero_count_cache);
aebb5d6d 1274 }else{
e39e3aba
OA
1275 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1276 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d
MN
1277 for(i=16; i<16+8; i++){
1278 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
1279 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
881b5b80 1280 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2);
aebb5d6d 1281 }
e7e09b49 1282 }
4704097a 1283 }
0da71265
MN
1284 }
1285 }
1286 }
c212fb0c
MN
1287 if(h->cbp || IS_INTRA(mb_type))
1288 s->dsp.clear_blocks(h->mb);
0da71265
MN
1289}
1290
0da71265 1291/**
bd91fee3
AS
1292 * Process a macroblock; this case avoids checks for expensive uncommon cases.
1293 */
1294static void hl_decode_mb_simple(H264Context *h){
1295 hl_decode_mb_internal(h, 1);
1296}
1297
1298/**
1299 * Process a macroblock; this handles edge cases, such as interlacing.
1300 */
1301static void av_noinline hl_decode_mb_complex(H264Context *h){
1302 hl_decode_mb_internal(h, 0);
1303}
1304
903d58f6 1305void ff_h264_hl_decode_mb(H264Context *h){
bd91fee3 1306 MpegEncContext * const s = &h->s;
64514ee8 1307 const int mb_xy= h->mb_xy;
bd91fee3 1308 const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb 1309 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 1310
bd91fee3
AS
1311 if (is_complex)
1312 hl_decode_mb_complex(h);
1313 else hl_decode_mb_simple(h);
1314}
1315
0da71265
MN
1316static int pred_weight_table(H264Context *h){
1317 MpegEncContext * const s = &h->s;
1318 int list, i;
9f2d1b4f 1319 int luma_def, chroma_def;
115329f1 1320
9f2d1b4f
LM
1321 h->use_weight= 0;
1322 h->use_weight_chroma= 0;
0da71265 1323 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
09fffe9b
JGG
1324 if(CHROMA)
1325 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
1326 luma_def = 1<<h->luma_log2_weight_denom;
1327 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
1328
1329 for(list=0; list<2; list++){
cb99c652
GB
1330 h->luma_weight_flag[list] = 0;
1331 h->chroma_weight_flag[list] = 0;
0da71265
MN
1332 for(i=0; i<h->ref_count[list]; i++){
1333 int luma_weight_flag, chroma_weight_flag;
115329f1 1334
0da71265
MN
1335 luma_weight_flag= get_bits1(&s->gb);
1336 if(luma_weight_flag){
3d9137c8
MN
1337 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
1338 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
1339 if( h->luma_weight[i][list][0] != luma_def
1340 || h->luma_weight[i][list][1] != 0) {
9f2d1b4f 1341 h->use_weight= 1;
cb99c652
GB
1342 h->luma_weight_flag[list]= 1;
1343 }
9f2d1b4f 1344 }else{
3d9137c8
MN
1345 h->luma_weight[i][list][0]= luma_def;
1346 h->luma_weight[i][list][1]= 0;
0da71265
MN
1347 }
1348
0af6967e 1349 if(CHROMA){
fef744d4
MN
1350 chroma_weight_flag= get_bits1(&s->gb);
1351 if(chroma_weight_flag){
1352 int j;
1353 for(j=0; j<2; j++){
3d9137c8
MN
1354 h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
1355 h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
1356 if( h->chroma_weight[i][list][j][0] != chroma_def
1357 || h->chroma_weight[i][list][j][1] != 0) {
fef744d4 1358 h->use_weight_chroma= 1;
cb99c652
GB
1359 h->chroma_weight_flag[list]= 1;
1360 }
fef744d4
MN
1361 }
1362 }else{
1363 int j;
1364 for(j=0; j<2; j++){
3d9137c8
MN
1365 h->chroma_weight[i][list][j][0]= chroma_def;
1366 h->chroma_weight[i][list][j][1]= 0;
fef744d4 1367 }
0da71265
MN
1368 }
1369 }
1370 }
975a1447 1371 if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;
0da71265 1372 }
9f2d1b4f 1373 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
1374 return 0;
1375}
1376
1052b76f
MN
1377/**
1378 * Initialize implicit_weight table.
6da88bd3 1379 * @param field 0/1 initialize the weight for interlaced MBAFF
1052b76f
MN
1380 * -1 initializes the rest
1381 */
1382static void implicit_weight_table(H264Context *h, int field){
9f2d1b4f 1383 MpegEncContext * const s = &h->s;
1052b76f 1384 int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
9f2d1b4f 1385
ce09f927
GB
1386 for (i = 0; i < 2; i++) {
1387 h->luma_weight_flag[i] = 0;
1388 h->chroma_weight_flag[i] = 0;
1389 }
1390
1052b76f
MN
1391 if(field < 0){
1392 cur_poc = s->current_picture_ptr->poc;
1393 if( h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
9f2d1b4f
LM
1394 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
1395 h->use_weight= 0;
1396 h->use_weight_chroma= 0;
1397 return;
1398 }
1052b76f
MN
1399 ref_start= 0;
1400 ref_count0= h->ref_count[0];
1401 ref_count1= h->ref_count[1];
1402 }else{
1403 cur_poc = s->current_picture_ptr->field_poc[field];
1404 ref_start= 16;
1405 ref_count0= 16+2*h->ref_count[0];
1406 ref_count1= 16+2*h->ref_count[1];
1407 }
9f2d1b4f
LM
1408
1409 h->use_weight= 2;
1410 h->use_weight_chroma= 2;
1411 h->luma_log2_weight_denom= 5;
1412 h->chroma_log2_weight_denom= 5;
1413
1052b76f 1414 for(ref0=ref_start; ref0 < ref_count0; ref0++){
9f2d1b4f 1415 int poc0 = h->ref_list[0][ref0].poc;
1052b76f 1416 for(ref1=ref_start; ref1 < ref_count1; ref1++){
738386a5 1417 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 1418 int td = av_clip(poc1 - poc0, -128, 127);
1052b76f 1419 int w= 32;
9f2d1b4f 1420 if(td){
f66e4f5f 1421 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 1422 int tx = (16384 + (FFABS(td) >> 1)) / td;
72f86ec0
MN
1423 int dist_scale_factor = (tb*tx + 32) >> 8;
1424 if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
1052b76f
MN
1425 w = 64 - dist_scale_factor;
1426 }
1427 if(field<0){
1428 h->implicit_weight[ref0][ref1][0]=
1429 h->implicit_weight[ref0][ref1][1]= w;
1430 }else{
1431 h->implicit_weight[ref0][ref1][field]=w;
72f86ec0 1432 }
9f2d1b4f
LM
1433 }
1434 }
1435}
1436
8fd57a66 1437/**
5175b937 1438 * instantaneous decoder refresh.
0da71265
MN
1439 */
1440static void idr(H264Context *h){
ea6f00c4 1441 ff_h264_remove_all_refs(h);
a149c1a5 1442 h->prev_frame_num= 0;
80f8e035
MN
1443 h->prev_frame_num_offset= 0;
1444 h->prev_poc_msb=
1445 h->prev_poc_lsb= 0;
0da71265
MN
1446}
1447
7c33ad19
LM
1448/* forget old pics after a seek */
1449static void flush_dpb(AVCodecContext *avctx){
1450 H264Context *h= avctx->priv_data;
1451 int i;
64b9d48f 1452 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
1453 if(h->delayed_pic[i])
1454 h->delayed_pic[i]->reference= 0;
7c33ad19 1455 h->delayed_pic[i]= NULL;
285b570f 1456 }
df8a7dff 1457 h->outputed_poc= INT_MIN;
b19d493f 1458 h->prev_interlaced_frame = 1;
7c33ad19 1459 idr(h);
ca159196
MR
1460 if(h->s.current_picture_ptr)
1461 h->s.current_picture_ptr->reference= 0;
12d96de3 1462 h->s.first_field= 0;
9c095463 1463 ff_h264_reset_sei(h);
e240f898 1464 ff_mpeg_flush(avctx);
7c33ad19
LM
1465}
1466
0da71265
MN
1467static int init_poc(H264Context *h){
1468 MpegEncContext * const s = &h->s;
1469 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
1470 int field_poc[2];
357282c6 1471 Picture *cur = s->current_picture_ptr;
0da71265 1472
b78a6baa 1473 h->frame_num_offset= h->prev_frame_num_offset;
5710b371 1474 if(h->frame_num < h->prev_frame_num)
b78a6baa 1475 h->frame_num_offset += max_frame_num;
0da71265
MN
1476
1477 if(h->sps.poc_type==0){
1478 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
1479
1480 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
1481 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
1482 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
1483 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
1484 else
1485 h->poc_msb = h->prev_poc_msb;
1486//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
115329f1 1487 field_poc[0] =
0da71265 1488 field_poc[1] = h->poc_msb + h->poc_lsb;
115329f1 1489 if(s->picture_structure == PICT_FRAME)
0da71265
MN
1490 field_poc[1] += h->delta_poc_bottom;
1491 }else if(h->sps.poc_type==1){
1492 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
1493 int i;
1494
1495 if(h->sps.poc_cycle_length != 0)
1496 abs_frame_num = h->frame_num_offset + h->frame_num;
1497 else
1498 abs_frame_num = 0;
1499
1500 if(h->nal_ref_idc==0 && abs_frame_num > 0)
1501 abs_frame_num--;
115329f1 1502
0da71265
MN
1503 expected_delta_per_poc_cycle = 0;
1504 for(i=0; i < h->sps.poc_cycle_length; i++)
1505 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
1506
1507 if(abs_frame_num > 0){
1508 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
1509 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
1510
1511 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
1512 for(i = 0; i <= frame_num_in_poc_cycle; i++)
1513 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
1514 } else
1515 expectedpoc = 0;
1516
115329f1 1517 if(h->nal_ref_idc == 0)
0da71265 1518 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
115329f1 1519
0da71265
MN
1520 field_poc[0] = expectedpoc + h->delta_poc[0];
1521 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
1522
1523 if(s->picture_structure == PICT_FRAME)
1524 field_poc[1] += h->delta_poc[1];
1525 }else{
b78a6baa 1526 int poc= 2*(h->frame_num_offset + h->frame_num);
5710b371 1527
b78a6baa
MN
1528 if(!h->nal_ref_idc)
1529 poc--;
5710b371 1530
0da71265
MN
1531 field_poc[0]= poc;
1532 field_poc[1]= poc;
1533 }
115329f1 1534
357282c6 1535 if(s->picture_structure != PICT_BOTTOM_FIELD)
0da71265 1536 s->current_picture_ptr->field_poc[0]= field_poc[0];
357282c6 1537 if(s->picture_structure != PICT_TOP_FIELD)
0da71265 1538 s->current_picture_ptr->field_poc[1]= field_poc[1];
357282c6 1539 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
0da71265
MN
1540
1541 return 0;
1542}
1543
b41c1db3
1544
1545/**
1546 * initialize scan tables
1547 */
1548static void init_scan_tables(H264Context *h){
b41c1db3 1549 int i;
ca32f7f2 1550 for(i=0; i<16; i++){
b41c1db3 1551#define T(x) (x>>2) | ((x<<2) & 0xF)
ca32f7f2
JGG
1552 h->zigzag_scan[i] = T(zigzag_scan[i]);
1553 h-> field_scan[i] = T( field_scan[i]);
b41c1db3 1554#undef T
b41c1db3 1555 }
ca32f7f2 1556 for(i=0; i<64; i++){
b41c1db3 1557#define T(x) (x>>3) | ((x&7)<<3)
ca32f7f2
JGG
1558 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
1559 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
1560 h->field_scan8x8[i] = T(field_scan8x8[i]);
1561 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
b41c1db3 1562#undef T
b41c1db3
1563 }
1564 if(h->sps.transform_bypass){ //FIXME same ugly
1565 h->zigzag_scan_q0 = zigzag_scan;
45beb850 1566 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
b41c1db3
1567 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
1568 h->field_scan_q0 = field_scan;
1569 h->field_scan8x8_q0 = field_scan8x8;
1570 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
1571 }else{
1572 h->zigzag_scan_q0 = h->zigzag_scan;
1573 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
1574 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
1575 h->field_scan_q0 = h->field_scan;
1576 h->field_scan8x8_q0 = h->field_scan8x8;
1577 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
1578 }
1579}
afebe2f7 1580
256299d3
MN
1581static void field_end(H264Context *h){
1582 MpegEncContext * const s = &h->s;
1583 AVCodecContext * const avctx= s->avctx;
1584 s->mb_y= 0;
1585
1586 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
1587 s->current_picture_ptr->pict_type= s->pict_type;
1588
1589 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1590 ff_vdpau_h264_set_reference_frames(s);
1591
1592 if(!s->dropable) {
ea6f00c4 1593 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
256299d3
MN
1594 h->prev_poc_msb= h->poc_msb;
1595 h->prev_poc_lsb= h->poc_lsb;
1596 }
1597 h->prev_frame_num_offset= h->frame_num_offset;
1598 h->prev_frame_num= h->frame_num;
1599
1600 if (avctx->hwaccel) {
1601 if (avctx->hwaccel->end_frame(avctx) < 0)
1602 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
1603 }
1604
1605 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1606 ff_vdpau_h264_picture_complete(s);
1607
1608 /*
1609 * FIXME: Error handling code does not seem to support interlaced
1610 * when slices span multiple rows
1611 * The ff_er_add_slice calls don't work right for bottom
1612 * fields; they cause massive erroneous error concealing
1613 * Error marking covers both fields (top and bottom).
1614 * This causes a mismatched s->error_count
1615 * and a bad error table. Further, the error count goes to
1616 * INT_MAX when called for bottom field, because mb_y is
1617 * past end by one (callers fault) and resync_mb_y != 0
1618 * causes problems for the first MB line, too.
1619 */
1620 if (!FIELD_PICTURE)
1621 ff_er_frame_end(s);
1622
1623 MPV_frame_end(s);
d225a1e2
MN
1624
1625 h->current_slice=0;
256299d3
MN
1626}
1627
afebe2f7 1628/**
49bd8e4b 1629 * Replicate H264 "master" context to thread contexts.
afebe2f7
1630 */
1631static void clone_slice(H264Context *dst, H264Context *src)
1632{
1633 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
1634 dst->s.current_picture_ptr = src->s.current_picture_ptr;
1635 dst->s.current_picture = src->s.current_picture;
1636 dst->s.linesize = src->s.linesize;
1637 dst->s.uvlinesize = src->s.uvlinesize;
12d96de3 1638 dst->s.first_field = src->s.first_field;
afebe2f7
1639
1640 dst->prev_poc_msb = src->prev_poc_msb;
1641 dst->prev_poc_lsb = src->prev_poc_lsb;
1642 dst->prev_frame_num_offset = src->prev_frame_num_offset;
1643 dst->prev_frame_num = src->prev_frame_num;
1644 dst->short_ref_count = src->short_ref_count;
1645
1646 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
1647 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
1648 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
1649 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
50c21814
1650
1651 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
1652 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
afebe2f7
1653}
1654
0da71265 1655/**
fe9a3fbe
JG
1656 * computes profile from profile_idc and constraint_set?_flags
1657 *
1658 * @param sps SPS
1659 *
1660 * @return profile as defined by FF_PROFILE_H264_*
1661 */
1662int ff_h264_get_profile(SPS *sps)
1663{
1664 int profile = sps->profile_idc;
1665
1666 switch(sps->profile_idc) {
1667 case FF_PROFILE_H264_BASELINE:
1668 // constraint_set1_flag set to 1
1669 profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
1670 break;
1671 case FF_PROFILE_H264_HIGH_10:
1672 case FF_PROFILE_H264_HIGH_422:
1673 case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
1674 // constraint_set3_flag set to 1
1675 profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
1676 break;
1677 }
1678
1679 return profile;
1680}
1681
1682/**
0da71265 1683 * decodes a slice header.
9c852bcf 1684 * This will also call MPV_common_init() and frame_start() as needed.
afebe2f7
1685 *
1686 * @param h h264context
1687 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
1688 *
d9526386 1689 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
0da71265 1690 */
afebe2f7 1691static int decode_slice_header(H264Context *h, H264Context *h0){
0da71265 1692 MpegEncContext * const s = &h->s;
12d96de3 1693 MpegEncContext * const s0 = &h0->s;
88e7a4d1 1694 unsigned int first_mb_in_slice;
ac658be5 1695 unsigned int pps_id;
0da71265 1696 int num_ref_idx_active_override_flag;
41f5c62f 1697 unsigned int slice_type, tmp, i, j;
0bf79634 1698 int default_ref_list_done = 0;
12d96de3 1699 int last_pic_structure;
0da71265 1700
2f944356 1701 s->dropable= h->nal_ref_idc == 0;
0da71265 1702
cf653d08
JD
1703 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
1704 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
1705 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
1706 }else{
1707 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
1708 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
1709 }
1710
0da71265
MN
1711 first_mb_in_slice= get_ue_golomb(&s->gb);
1712
d225a1e2
MN
1713 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
1714 if(h0->current_slice && FIELD_PICTURE){
1715 field_end(h);
1716 }
1717
afebe2f7 1718 h0->current_slice = 0;
12d96de3 1719 if (!s0->first_field)
f6e3c460 1720 s->current_picture_ptr= NULL;
66a4b2c1
MN
1721 }
1722
9963b332 1723 slice_type= get_ue_golomb_31(&s->gb);
0bf79634 1724 if(slice_type > 9){
9b879566 1725 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
5175b937 1726 return -1;
0da71265 1727 }
0bf79634
LLL
1728 if(slice_type > 4){
1729 slice_type -= 5;
0da71265
MN
1730 h->slice_type_fixed=1;
1731 }else
1732 h->slice_type_fixed=0;
115329f1 1733
ee2a957f 1734 slice_type= golomb_to_pict_type[ slice_type ];
975a1447 1735 if (slice_type == AV_PICTURE_TYPE_I
afebe2f7 1736 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
0bf79634
LLL
1737 default_ref_list_done = 1;
1738 }
1739 h->slice_type= slice_type;
e3e6f18f 1740 h->slice_type_nos= slice_type & 3;
0bf79634 1741
1412060e 1742 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
115329f1 1743
0da71265 1744 pps_id= get_ue_golomb(&s->gb);
ac658be5 1745 if(pps_id>=MAX_PPS_COUNT){
9b879566 1746 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
0da71265
MN
1747 return -1;
1748 }
afebe2f7 1749 if(!h0->pps_buffers[pps_id]) {
a0f80050 1750 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
8b92b792
MN
1751 return -1;
1752 }
afebe2f7 1753 h->pps= *h0->pps_buffers[pps_id];
8b92b792 1754
afebe2f7 1755 if(!h0->sps_buffers[h->pps.sps_id]) {
a0f80050 1756 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
8b92b792
MN
1757 return -1;
1758 }
afebe2f7 1759 h->sps = *h0->sps_buffers[h->pps.sps_id];
239ea04c 1760
fe9a3fbe 1761 s->avctx->profile = ff_h264_get_profile(&h->sps);
fa37cf0d 1762 s->avctx->level = h->sps.level_idc;
6752a3cc 1763 s->avctx->refs = h->sps.ref_frame_count;
b08e38e8 1764
50c21814 1765 if(h == h0 && h->dequant_coeff_pps != pps_id){
50eaa857 1766 h->dequant_coeff_pps = pps_id;
239ea04c
LM
1767 init_dequant_tables(h);
1768 }
115329f1 1769
0da71265 1770 s->mb_width= h->sps.mb_width;
6867a90b 1771 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
115329f1 1772
bf4665ee 1773 h->b_stride= s->mb_width*4;
0da71265 1774
faf3dfb9 1775 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
0da71265 1776 if(h->sps.frame_mbs_only_flag)
faf3dfb9 1777 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
0da71265 1778 else
00d1e96b 1779 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 7);
115329f1
DB
1780
1781 if (s->context_initialized
5388f0b4
JK
1782 && ( s->width != s->avctx->width || s->height != s->avctx->height
1783 || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
afebe2f7
1784 if(h != h0)
1785 return -1; // width / height changed during parallelized decoding
91078926 1786 free_tables(h, 0);
ff7f75e1 1787 flush_dpb(s->avctx);
0da71265
MN
1788 MPV_common_end(s);
1789 }
1790 if (!s->context_initialized) {
afebe2f7
1791 if(h != h0)
1792 return -1; // we cant (re-)initialize context during parallel decoding
f3bdc3da
RD
1793
1794 avcodec_set_dimensions(s->avctx, s->width, s->height);
1795 s->avctx->sample_aspect_ratio= h->sps.sar;
cfa5a81e 1796 av_assert0(s->avctx->sample_aspect_ratio.den);
f3bdc3da 1797
c4dffe7e
DC
1798 if(h->sps.video_signal_type_present_flag){
1799 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
1800 if(h->sps.colour_description_present_flag){
1801 s->avctx->color_primaries = h->sps.color_primaries;
1802 s->avctx->color_trc = h->sps.color_trc;
1803 s->avctx->colorspace = h->sps.colorspace;
1804 }
1805 }
1806
f3bdc3da 1807 if(h->sps.timing_info_present_flag){
3102d180 1808 int64_t den= h->sps.time_scale;
055a6aa7 1809 if(h->x264_build < 44U)
3102d180 1810 den *= 2;
f3bdc3da 1811 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3102d180 1812 h->sps.num_units_in_tick, den, 1<<30);
f3bdc3da 1813 }
0435fb16 1814 s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
9ad7dfc1
BC
1815 s->avctx->codec->pix_fmts ?
1816 s->avctx->codec->pix_fmts :
0435fb16
BC
1817 s->avctx->color_range == AVCOL_RANGE_JPEG ?
1818 hwaccel_pixfmt_list_h264_jpeg_420 :
1819 ff_hwaccel_pixfmt_list_420);
f3bdc3da
RD
1820 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
1821
0da71265
MN
1822 if (MPV_common_init(s) < 0)
1823 return -1;
12d96de3 1824 s->first_field = 0;
b19d493f 1825 h->prev_interlaced_frame = 1;
115329f1 1826
b41c1db3 1827 init_scan_tables(h);
903d58f6 1828 ff_h264_alloc_tables(h);
0da71265 1829
afebe2f7
1830 for(i = 1; i < s->avctx->thread_count; i++) {
1831 H264Context *c;
1832 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
79db7ac6 1833 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
afebe2f7 1834 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
7a5c850b 1835 c->h264dsp = h->h264dsp;
afebe2f7
1836 c->sps = h->sps;
1837 c->pps = h->pps;
1838 init_scan_tables(c);
145061a1 1839 clone_tables(c, h, i);
afebe2f7
1840 }
1841
1842 for(i = 0; i < s->avctx->thread_count; i++)
1843 if(context_init(h->thread_context[i]) < 0)
1844 return -1;
0da71265
MN
1845 }
1846
0da71265
MN
1847 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
1848
5d18eaad 1849 h->mb_mbaff = 0;
6ba71fc4 1850 h->mb_aff_frame = 0;
12d96de3 1851 last_pic_structure = s0->picture_structure;
0da71265
MN
1852 if(h->sps.frame_mbs_only_flag){
1853 s->picture_structure= PICT_FRAME;
1854 }else{
6ba71fc4 1855 if(get_bits1(&s->gb)) { //field_pic_flag
0da71265 1856 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
6ba71fc4 1857 } else {
0da71265 1858 s->picture_structure= PICT_FRAME;
6ba71fc4 1859 h->mb_aff_frame = h->sps.mb_aff;
6867a90b 1860 }
0da71265 1861 }
44e9dcf1 1862 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
2ddcf84b
JD
1863
1864 if(h0->current_slice == 0){
26b86e47
MN
1865 while(h->frame_num != h->prev_frame_num &&
1866 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
4dece8c7 1867 Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
bb943bb8 1868 av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
903d58f6 1869 if (ff_h264_frame_start(h) < 0)
66e6038c 1870 return -1;
26b86e47
MN
1871 h->prev_frame_num++;
1872 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
1873 s->current_picture_ptr->frame_num= h->prev_frame_num;
3d542120
MN
1874 ff_generate_sliding_window_mmcos(h);
1875 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
e2983d6e
JGG
1876 /* Error concealment: if a ref is missing, copy the previous ref in its place.
1877 * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
1878 * about there being no actual duplicates.
1879 * FIXME: this doesn't copy padding for out-of-frame motion vectors. Given we're
1880 * concealing a lost frame, this probably isn't noticable by comparison, but it should
1881 * be fixed. */
4dece8c7
JGG
1882 if (h->short_ref_count) {
1883 if (prev) {
1884 av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,
1885 (const uint8_t**)prev->data, prev->linesize,
5dd7f994 1886 s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
4dece8c7
JGG
1887 h->short_ref[0]->poc = prev->poc+2;
1888 }
1889 h->short_ref[0]->frame_num = h->prev_frame_num;
1890 }
26b86e47
MN
1891 }
1892
12d96de3
JD
1893 /* See if we have a decoded first field looking for a pair... */
1894 if (s0->first_field) {
1895 assert(s0->current_picture_ptr);
1896 assert(s0->current_picture_ptr->data[0]);
1897 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
1898
1899 /* figure out if we have a complementary field pair */
1900 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
1901 /*
1902 * Previous field is unmatched. Don't display it, but let it
1903 * remain for reference if marked as such.
1904 */
1905 s0->current_picture_ptr = NULL;
1906 s0->first_field = FIELD_PICTURE;
1907
1908 } else {
1909 if (h->nal_ref_idc &&
1910 s0->current_picture_ptr->reference &&
1911 s0->current_picture_ptr->frame_num != h->frame_num) {
1912 /*
1913 * This and previous field were reference, but had
1914 * different frame_nums. Consider this field first in
1915 * pair. Throw away previous field except for reference
1916 * purposes.
1917 */
1918 s0->first_field = 1;
1919 s0->current_picture_ptr = NULL;
1920
1921 } else {
1922 /* Second field in complementary pair */
1923 s0->first_field = 0;
1924 }
1925 }
1926
1927 } else {
1928 /* Frame or first field in a potentially complementary pair */
1929 assert(!s0->current_picture_ptr);
1930 s0->first_field = FIELD_PICTURE;
1931 }
1932
903d58f6 1933 if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) {
12d96de3 1934 s0->first_field = 0;
2ddcf84b 1935 return -1;
12d96de3 1936 }
2ddcf84b
JD
1937 }
1938 if(h != h0)
1939 clone_slice(h, h0);
1940
1941 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
1942
88e7a4d1 1943 assert(s->mb_num == s->mb_width * s->mb_height);
f3e53d9f 1944 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
88e7a4d1
MN
1945 first_mb_in_slice >= s->mb_num){
1946 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
6b53b87e
MN
1947 return -1;
1948 }
88e7a4d1 1949 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
f3e53d9f
JD
1950 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
1951 if (s->picture_structure == PICT_BOTTOM_FIELD)
1952 s->resync_mb_y = s->mb_y = s->mb_y + 1;
88e7a4d1 1953 assert(s->mb_y < s->mb_height);
115329f1 1954
0da71265
MN
1955 if(s->picture_structure==PICT_FRAME){
1956 h->curr_pic_num= h->frame_num;
1957 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
1958 }else{
f57e2af6 1959 h->curr_pic_num= 2*h->frame_num + 1;
0da71265
MN
1960 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
1961 }
115329f1 1962
0da71265 1963 if(h->nal_unit_type == NAL_IDR_SLICE){
1df1df0b 1964 get_ue_golomb(&s->gb); /* idr_pic_id */
0da71265 1965 }
115329f1 1966
0da71265
MN
1967 if(h->sps.poc_type==0){
1968 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
115329f1 1969
0da71265
MN
1970 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
1971 h->delta_poc_bottom= get_se_golomb(&s->gb);
1972 }
1973 }
115329f1 1974
0da71265
MN
1975 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
1976 h->delta_poc[0]= get_se_golomb(&s->gb);
115329f1 1977
0da71265
MN
1978 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
1979 h->delta_poc[1]= get_se_golomb(&s->gb);
1980 }
115329f1 1981
0da71265 1982 init_poc(h);
115329f1 1983
0da71265
MN
1984 if(h->pps.redundant_pic_cnt_present){
1985 h->redundant_pic_count= get_ue_golomb(&s->gb);
1986 }
1987
1412060e 1988 //set defaults, might be overridden a few lines later
0da71265
MN
1989 h->ref_count[0]= h->pps.ref_count[0];
1990 h->ref_count[1]= h->pps.ref_count[1];
1991
975a1447
SS
1992 if(h->slice_type_nos != AV_PICTURE_TYPE_I){
1993 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
0da71265
MN
1994 h->direct_spatial_mv_pred= get_bits1(&s->gb);
1995 }
1996 num_ref_idx_active_override_flag= get_bits1(&s->gb);
115329f1 1997
0da71265
MN
1998 if(num_ref_idx_active_override_flag){
1999 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
975a1447 2000 if(h->slice_type_nos==AV_PICTURE_TYPE_B)
0da71265
MN
2001 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
2002
187696fa 2003 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
9b879566 2004 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
88e7a4d1 2005 h->ref_count[0]= h->ref_count[1]= 1;
0da71265
MN
2006 return -1;
2007 }
2008 }
975a1447 2009 if(h->slice_type_nos == AV_PICTURE_TYPE_B)
187696fa
MN
2010 h->list_count= 2;
2011 else
2012 h->list_count= 1;
2013 }else
2014 h->list_count= 0;
0da71265 2015
0bf79634 2016 if(!default_ref_list_done){
ea6f00c4 2017 ff_h264_fill_default_ref_list(h);
0da71265
MN
2018 }
2019
975a1447 2020 if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0)
806bb93f 2021 return -1;
0da71265 2022
975a1447 2023 if(h->slice_type_nos!=AV_PICTURE_TYPE_I){
07dff5c7 2024 s->last_picture_ptr= &h->ref_list[0][0];
8d2fc163 2025 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
07dff5c7 2026 }
975a1447 2027 if(h->slice_type_nos==AV_PICTURE_TYPE_B){
07dff5c7 2028 s->next_picture_ptr= &h->ref_list[1][0];
8d2fc163 2029 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
07dff5c7
MN
2030 }
2031
975a1447
SS
2032 if( (h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P )
2033 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )
0da71265 2034 pred_weight_table(h);
975a1447 2035 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
1052b76f 2036 implicit_weight_table(h, -1);
1a29c6a0 2037 }else {
9f2d1b4f 2038 h->use_weight = 0;
cb99c652
GB
2039 for (i = 0; i < 2; i++) {
2040 h->luma_weight_flag[i] = 0;
2041 h->chroma_weight_flag[i] = 0;
2042 }
2043 }
115329f1 2044
2ddcf84b 2045 if(h->nal_ref_idc)
ea6f00c4 2046 ff_h264_decode_ref_pic_marking(h0, &s->gb);
0da71265 2047
1052b76f 2048 if(FRAME_MBAFF){
ea6f00c4 2049 ff_h264_fill_mbaff_ref_list(h);
5d18eaad 2050
975a1447 2051 if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
1052b76f
MN
2052 implicit_weight_table(h, 0);
2053 implicit_weight_table(h, 1);
2054 }
2055 }
2056
975a1447 2057 if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
943f69a6
MN
2058 ff_h264_direct_dist_scale_factor(h);
2059 ff_h264_direct_ref_list_init(h);
8f56e219 2060
975a1447 2061 if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){
9963b332 2062 tmp = get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2063 if(tmp > 2){
2064 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
2065 return -1;
2066 }
2067 h->cabac_init_idc= tmp;
2068 }
e5017ab8
LA
2069
2070 h->last_qscale_diff = 0;
88e7a4d1
MN
2071 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
2072 if(tmp>51){
2073 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3ebc7e04
MN
2074 return -1;
2075 }
88e7a4d1 2076 s->qscale= tmp;
4691a77d
2077 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2078 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
0da71265 2079 //FIXME qscale / qp ... stuff
975a1447 2080 if(h->slice_type == AV_PICTURE_TYPE_SP){
1df1df0b 2081 get_bits1(&s->gb); /* sp_for_switch_flag */
0da71265 2082 }
975a1447 2083 if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){
1df1df0b 2084 get_se_golomb(&s->gb); /* slice_qs_delta */
0da71265
MN
2085 }
2086
53c05b1e 2087 h->deblocking_filter = 1;
0c32e19d
MN
2088 h->slice_alpha_c0_offset = 52;
2089 h->slice_beta_offset = 52;
0da71265 2090 if( h->pps.deblocking_filter_parameters_present ) {
9963b332 2091 tmp= get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2092 if(tmp > 2){
2093 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
2094 return -1;
2095 }
2096 h->deblocking_filter= tmp;
115329f1 2097 if(h->deblocking_filter < 2)
53c05b1e
MN
2098 h->deblocking_filter^= 1; // 1<->0
2099
2100 if( h->deblocking_filter ) {
0c32e19d
MN
2101 h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
2102 h->slice_beta_offset += get_se_golomb(&s->gb) << 1;
2103 if( h->slice_alpha_c0_offset > 104U
2104 || h->slice_beta_offset > 104U){
2105 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
2106 return -1;
2107 }
0da71265 2108 }
980a82b7 2109 }
afebe2f7 2110
61858a76 2111 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
975a1447
SS
2112 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)
2113 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == AV_PICTURE_TYPE_B)
61858a76
RD
2114 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
2115 h->deblocking_filter= 0;
2116
afebe2f7 2117 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
ec970c21
2118 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
2119 /* Cheat slightly for speed:
5d81d641 2120 Do not bother to deblock across slices. */
ec970c21
2121 h->deblocking_filter = 2;
2122 } else {
7ae94d52
2123 h0->max_contexts = 1;
2124 if(!h0->single_decode_warning) {
2125 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
2126 h0->single_decode_warning = 1;
2127 }
2128 if(h != h0)
2129 return 1; // deblocking switched inside frame
ec970c21 2130 }
afebe2f7 2131 }
0c32e19d 2132 h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
afebe2f7 2133
0da71265
MN
2134#if 0 //FMO
2135 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
2136 slice_group_change_cycle= get_bits(&s->gb, ?);
2137#endif
2138
afebe2f7
2139 h0->last_slice_type = slice_type;
2140 h->slice_num = ++h0->current_slice;
b735aeea
MN
2141 if(h->slice_num >= MAX_SLICES){
2142 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
2143 }
5175b937 2144
c32867b5 2145 for(j=0; j<2; j++){
6d7e6b26 2146 int id_list[16];
b735aeea 2147 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
6d7e6b26
MN
2148 for(i=0; i<16; i++){
2149 id_list[i]= 60;
2150 if(h->ref_list[j][i].data[0]){
2151 int k;
2152 uint8_t *base= h->ref_list[j][i].base[0];
2153 for(k=0; k<h->short_ref_count; k++)
2154 if(h->short_ref[k]->base[0] == base){
2155 id_list[i]= k;
2156 break;
2157 }
2158 for(k=0; k<h->long_ref_count; k++)
2159 if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
2160 id_list[i]= h->short_ref_count + k;
2161 break;
2162 }
2163 }
2164 }
2165
c32867b5
MN
2166 ref2frm[0]=
2167 ref2frm[1]= -1;
d50cdd82 2168 for(i=0; i<16; i++)
6d7e6b26 2169 ref2frm[i+2]= 4*id_list[i]
c32867b5 2170 +(h->ref_list[j][i].reference&3);
d50cdd82
MN
2171 ref2frm[18+0]=
2172 ref2frm[18+1]= -1;
2173 for(i=16; i<48; i++)
6d7e6b26 2174 ref2frm[i+4]= 4*id_list[(i-16)>>1]
d50cdd82 2175 +(h->ref_list[j][i].reference&3);
c32867b5
MN
2176 }
2177
5d18eaad 2178 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
8a11a969 2179 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
5d18eaad 2180
0da71265 2181 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
49573a87 2182 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
6867a90b
LLL
2183 h->slice_num,
2184 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
115329f1 2185 first_mb_in_slice,
6209669d 2186 av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
0da71265
MN
2187 pps_id, h->frame_num,
2188 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
2189 h->ref_count[0], h->ref_count[1],
2190 s->qscale,
0c32e19d 2191 h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
9f2d1b4f 2192 h->use_weight,
4806b922 2193 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
975a1447 2194 h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
0da71265
MN
2195 );
2196 }
2197
2198 return 0;
2199}
2200
0dc343d4 2201int ff_h264_get_slice_type(const H264Context *h)
75dd6938
LA
2202{
2203 switch (h->slice_type) {
975a1447
SS
2204 case AV_PICTURE_TYPE_P: return 0;
2205 case AV_PICTURE_TYPE_B: return 1;
2206 case AV_PICTURE_TYPE_I: return 2;
2207 case AV_PICTURE_TYPE_SP: return 3;
2208 case AV_PICTURE_TYPE_SI: return 4;
75dd6938
LA
2209 default: return -1;
2210 }
2211}
2212
d02bb3ec
DB
2213/**
2214 *
2215 * @return non zero if the loop filter can be skiped
2216 */
2217static int fill_filter_caches(H264Context *h, int mb_type){
2218 MpegEncContext * const s = &h->s;
2219 const int mb_xy= h->mb_xy;
2220 int top_xy, left_xy[2];
2221 int top_type, left_type[2];
2222
2223 top_xy = mb_xy - (s->mb_stride << MB_FIELD);
2224
2225 //FIXME deblocking could skip the intra and nnz parts.
2226
2227 /* Wow, what a mess, why didn't they simplify the interlacing & intra
2228 * stuff, I can't imagine that these complex rules are worth it. */
2229
2230 left_xy[1] = left_xy[0] = mb_xy-1;
2231 if(FRAME_MBAFF){
2232 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
2233 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
2234 if(s->mb_y&1){
2235 if (left_mb_field_flag != curr_mb_field_flag) {
2236 left_xy[0] -= s->mb_stride;
2237 }
2238 }else{
2239 if(curr_mb_field_flag){
2240 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
2241 }
2242 if (left_mb_field_flag != curr_mb_field_flag) {
2243 left_xy[1] += s->mb_stride;
2244 }
2245 }
2246 }
2247
2248 h->top_mb_xy = top_xy;
2249 h->left_mb_xy[0] = left_xy[0];
2250 h->left_mb_xy[1] = left_xy[1];
2251 {
2252 //for sufficiently low qp, filtering wouldn't do anything
2253 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
2254 int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
2255 int qp = s->current_picture.qscale_table[mb_xy];
2256 if(qp <= qp_thresh
2257 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
2258 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
2259 if(!FRAME_MBAFF)
2260 return 1;
2261 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh)
2262 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
2263 return 1;
2264 }
2265 }
2266
2267 top_type = s->current_picture.mb_type[top_xy] ;
2268 left_type[0] = s->current_picture.mb_type[left_xy[0]];
2269 left_type[1] = s->current_picture.mb_type[left_xy[1]];
2270 if(h->deblocking_filter == 2){
2271 if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
2272 if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
2273 }else{
2274 if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
2275 if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
2276 }
2277 h->top_type = top_type ;
2278 h->left_type[0]= left_type[0];
2279 h->left_type[1]= left_type[1];
2280
2281 if(IS_INTRA(mb_type))
2282 return 0;
2283
2284 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
2285 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
2286 AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
2287 AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
2288 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
2289
2290 h->cbp= h->cbp_table[mb_xy];
2291
2292 {
2293 int list;
2294 for(list=0; list<h->list_count; list++){
2295 int8_t *ref;
2296 int y, b_stride;
2297 int16_t (*mv_dst)[2];
2298 int16_t (*mv_src)[2];
2299
2300 if(!USES_LIST(mb_type, list)){
2301 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
2302 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2303 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2304 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2305 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2306 continue;
2307 }
2308
2309 ref = &s->current_picture.ref_index[list][4*mb_xy];
2310 {
2311 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
2312 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2313 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2314 ref += 2;
2315 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2316 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2317 }
2318
2319 b_stride = h->b_stride;
2320 mv_dst = &h->mv_cache[list][scan8[0]];
2321 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
2322 for(y=0; y<4; y++){
2323 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
2324 }
2325
2326 }
2327 }
2328
2329
2330/*
23310 . T T. T T T T
23321 L . .L . . . .
23332 L . .L . . . .
23343 . T TL . . . .
23354 L . .L . . . .
23365 L . .. . . . .
2337*/
2338//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
2339 if(top_type){
2340 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
2341 }
2342
2343 if(left_type[0]){
2344 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
2345 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
2346 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
2347 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
2348 }
2349
2350 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
2351 if(!CABAC && h->pps.transform_8x8_mode){
2352 if(IS_8x8DCT(top_type)){
2353 h->non_zero_count_cache[4+8*0]=
2354 h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
2355 h->non_zero_count_cache[6+8*0]=
2356 h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
2357 }
2358 if(IS_8x8DCT(left_type[0])){
2359 h->non_zero_count_cache[3+8*1]=
2360 h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
2361 }
2362 if(IS_8x8DCT(left_type[1])){
2363 h->non_zero_count_cache[3+8*3]=
2364 h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
2365 }
2366
2367 if(IS_8x8DCT(mb_type)){
2368 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
2369 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
2370
2371 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
2372 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
2373
2374 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
2375 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
2376
2377 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
2378 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
2379 }
2380 }
2381
2382 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
2383 int list;
2384 for(list=0; list<h->list_count; list++){
2385 if(USES_LIST(top_type, list)){
2386 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
2387 const int b8_xy= 4*top_xy + 2;
2388 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
2389 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
2390 h->ref_cache[list][scan8[0] + 0 - 1*8]=
2391 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
2392 h->ref_cache[list][scan8[0] + 2 - 1*8]=
2393 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
2394 }else{
2395 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
2396 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2397 }
2398
2399 if(!IS_INTERLACED(mb_type^left_type[0])){
2400 if(USES_LIST(left_type[0], list)){
2401 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
2402 const int b8_xy= 4*left_xy[0] + 1;
2403 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
2404 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
2405 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
2406 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
2407 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
2408 h->ref_cache[list][scan8[0] - 1 + 0 ]=
2409 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
2410 h->ref_cache[list][scan8[0] - 1 +16 ]=
2411 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
2412 }else{
2413 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
2414 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
2415 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
2416 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
2417 h->ref_cache[list][scan8[0] - 1 + 0 ]=
2418 h->ref_cache[list][scan8[0] - 1 + 8 ]=
2419 h->ref_cache[list][scan8[0] - 1 + 16 ]=
2420 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
2421 }
2422 }
2423 }
2424 }
2425
2426 return 0;
2427}
2428
c988f975
MN
2429static void loop_filter(H264Context *h){
2430 MpegEncContext * const s = &h->s;
2431 uint8_t *dest_y, *dest_cb, *dest_cr;
2432 int linesize, uvlinesize, mb_x, mb_y;
2433 const int end_mb_y= s->mb_y + FRAME_MBAFF;
2434 const int old_slice_type= h->slice_type;
2435
2436 if(h->deblocking_filter) {
2437 for(mb_x= 0; mb_x<s->mb_width; mb_x++){
2438 for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
78998bf2 2439 int mb_xy, mb_type;
c988f975
MN
2440 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
2441 h->slice_num= h->slice_table[mb_xy];
2442 mb_type= s->current_picture.mb_type[mb_xy];
2443 h->list_count= h->list_counts[mb_xy];
c988f975
MN
2444
2445 if(FRAME_MBAFF)
2446 h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
2447
c988f975
MN
2448 s->mb_x= mb_x;
2449 s->mb_y= mb_y;
2450 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2451 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2452 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2453 //FIXME simplify above
2454
2455 if (MB_FIELD) {
2456 linesize = h->mb_linesize = s->linesize * 2;
2457 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2458 if(mb_y&1){ //FIXME move out of this function?
2459 dest_y -= s->linesize*15;
2460 dest_cb-= s->uvlinesize*7;
2461 dest_cr-= s->uvlinesize*7;
2462 }
2463 } else {
2464 linesize = h->mb_linesize = s->linesize;
2465 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2466 }
77d40dce 2467 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
aaa995d7 2468 if(fill_filter_caches(h, mb_type))
44a5e7b6 2469 continue;
c988f975
MN
2470 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2471 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2472
77d40dce 2473 if (FRAME_MBAFF) {
c988f975
MN
2474 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2475 } else {
2476 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2477 }
2478 }
2479 }
2480 }
2481 h->slice_type= old_slice_type;
2482 s->mb_x= 0;
2483 s->mb_y= end_mb_y - FRAME_MBAFF;
f4b8b825
MN
2484 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2485 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
c988f975
MN
2486}
2487
69a28f3e
MN
2488static void predict_field_decoding_flag(H264Context *h){
2489 MpegEncContext * const s = &h->s;
2490 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2491 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
2492 ? s->current_picture.mb_type[mb_xy-1]
2493 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
2494 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
2495 : 0;
2496 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
2497}
2498
3a84713a
RS
2499static int decode_slice(struct AVCodecContext *avctx, void *arg){
2500 H264Context *h = *(void**)arg;
0da71265
MN
2501 MpegEncContext * const s = &h->s;
2502 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
2503
2504 s->mb_skip_run= -1;
0da71265 2505
89db0bae 2506 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
5317c95b 2507 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
89db0bae 2508
e5017ab8 2509 if( h->pps.cabac ) {
e5017ab8
LA
2510 /* realign */
2511 align_get_bits( &s->gb );
2512
2513 /* init cabac */
d61c4e73 2514 ff_init_cabac_states( &h->cabac);
e5017ab8
LA
2515 ff_init_cabac_decoder( &h->cabac,
2516 s->gb.buffer + get_bits_count(&s->gb)/8,
6e44ba15 2517 (get_bits_left(&s->gb) + 7)/8);
cc51b282
MN
2518
2519 ff_h264_init_cabac_states(h);
95c26348 2520
e5017ab8 2521 for(;;){
851ded89 2522//START_TIMER
cc51b282 2523 int ret = ff_h264_decode_mb_cabac(h);
6867a90b 2524 int eos;
851ded89 2525//STOP_TIMER("decode_mb_cabac")
0da71265 2526
903d58f6 2527 if(ret>=0) ff_h264_hl_decode_mb(h);
0da71265 2528
5d18eaad 2529 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8
LA
2530 s->mb_y++;
2531
cc51b282 2532 ret = ff_h264_decode_mb_cabac(h);
e5017ab8 2533
903d58f6 2534 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2535 s->mb_y--;
2536 }
6867a90b 2537 eos = get_cabac_terminate( &h->cabac );
e5017ab8 2538
3566042a
MN
2539 if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
2540 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2541 return 0;
2542 }
5659b509 2543 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
706da4af 2544 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
e5017ab8
LA
2545 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2546 return -1;
2547 }
2548
2549 if( ++s->mb_x >= s->mb_width ) {
2550 s->mb_x = 0;
c988f975 2551 loop_filter(h);
e5017ab8 2552 ff_draw_horiz_band(s, 16*s->mb_y, 16);
5175b937 2553 ++s->mb_y;
f3e53d9f 2554 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b 2555 ++s->mb_y;
69cc3183
MN
2556 if(FRAME_MBAFF && s->mb_y < s->mb_height)
2557 predict_field_decoding_flag(h);
6867a90b 2558 }
0da71265 2559 }
0da71265 2560
e5017ab8 2561 if( eos || s->mb_y >= s->mb_height ) {
a9c9a240 2562 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8 2563 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
0da71265 2564 return 0;
e5017ab8 2565 }
e5017ab8
LA
2566 }
2567
2568 } else {
2569 for(;;){
e1e94902 2570 int ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2571
903d58f6 2572 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8 2573
5d18eaad 2574 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8 2575 s->mb_y++;
e1e94902 2576 ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2577
903d58f6 2578 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2579 s->mb_y--;
2580 }
2581
2582 if(ret<0){
2583 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2584 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2585
2586 return -1;
2587 }
e5017ab8
LA
2588
2589 if(++s->mb_x >= s->mb_width){
2590 s->mb_x=0;
c988f975 2591 loop_filter(h);
e5017ab8 2592 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6867a90b 2593 ++s->mb_y;
f3e53d9f 2594 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b 2595 ++s->mb_y;
69cc3183
MN
2596 if(FRAME_MBAFF && s->mb_y < s->mb_height)
2597 predict_field_decoding_flag(h);
6867a90b
LLL
2598 }
2599 if(s->mb_y >= s->mb_height){
a9c9a240 2600 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2601
2602 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
2603 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2604
2605 return 0;
2606 }else{
2607 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2608
2609 return -1;
2610 }
2611 }
2612 }
2613
2614 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
a9c9a240 2615 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2616 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
2617 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2618
2619 return 0;
2620 }else{
2621 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2622
2623 return -1;
2624 }
2625 }
0da71265
MN
2626 }
2627 }
e5017ab8 2628
0da71265
MN
2629#if 0
2630 for(;s->mb_y < s->mb_height; s->mb_y++){
2631 for(;s->mb_x < s->mb_width; s->mb_x++){
2632 int ret= decode_mb(h);
115329f1 2633
903d58f6 2634 ff_h264_hl_decode_mb(h);
0da71265
MN
2635
2636 if(ret<0){
267f7edc 2637 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2638 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2639
2640 return -1;
2641 }
115329f1 2642
0da71265
MN
2643 if(++s->mb_x >= s->mb_width){
2644 s->mb_x=0;
2645 if(++s->mb_y >= s->mb_height){
2646 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2647 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2648
2649 return 0;
2650 }else{
2651 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2652
2653 return -1;
2654 }
2655 }
2656 }
115329f1 2657
0da71265
MN
2658 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
2659 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2660 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2661
2662 return 0;
2663 }else{
2664 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2665
2666 return -1;
2667 }
2668 }
2669 }
2670 s->mb_x=0;
2671 ff_draw_horiz_band(s, 16*s->mb_y, 16);
2672 }
2673#endif
2674 return -1; //not reached
2675}
2676
afebe2f7
2677/**
2678 * Call decode_slice() for each context.
2679 *
2680 * @param h h264 master context
2681 * @param context_count number of contexts to execute
2682 */
2683static void execute_decode_slices(H264Context *h, int context_count){
2684 MpegEncContext * const s = &h->s;
2685 AVCodecContext * const avctx= s->avctx;
2686 H264Context *hx;
2687 int i;
2688
40e5d31b
GB
2689 if (s->avctx->hwaccel)
2690 return;
0d3d172f 2691 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
369122dd 2692 return;
afebe2f7 2693 if(context_count == 1) {
74e8b78b 2694 decode_slice(avctx, &h);
afebe2f7
2695 } else {
2696 for(i = 1; i < context_count; i++) {
2697 hx = h->thread_context[i];
047599a4 2698 hx->s.error_recognition = avctx->error_recognition;
afebe2f7
2699 hx->s.error_count = 0;
2700 }
2701
2702 avctx->execute(avctx, (void *)decode_slice,
01418506 2703 h->thread_context, NULL, context_count, sizeof(void*));
afebe2f7
2704
2705 /* pull back stuff from slices to master context */
2706 hx = h->thread_context[context_count - 1];
2707 s->mb_x = hx->s.mb_x;
2708 s->mb_y = hx->s.mb_y;
12d96de3
JD
2709 s->dropable = hx->s.dropable;
2710 s->picture_structure = hx->s.picture_structure;
afebe2f7
2711 for(i = 1; i < context_count; i++)
2712 h->s.error_count += h->thread_context[i]->s.error_count;
2713 }
2714}
2715
2716
30317501 2717static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
0da71265
MN
2718 MpegEncContext * const s = &h->s;
2719 AVCodecContext * const avctx= s->avctx;
2720 int buf_index=0;
afebe2f7
2721 H264Context *hx; ///< thread context
2722 int context_count = 0;
74b14aac 2723 int next_avc= h->is_avc ? 0 : buf_size;
afebe2f7
2724
2725 h->max_contexts = avctx->thread_count;
66a4b2c1 2726 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
afebe2f7 2727 h->current_slice = 0;
12d96de3 2728 if (!s->first_field)
f6e3c460 2729 s->current_picture_ptr= NULL;
9c095463 2730 ff_h264_reset_sei(h);
66a4b2c1
MN
2731 }
2732
0da71265
MN
2733 for(;;){
2734 int consumed;
2735 int dst_length;
2736 int bit_length;
30317501 2737 const uint8_t *ptr;
4770b1b4 2738 int i, nalsize = 0;
afebe2f7 2739 int err;
115329f1 2740
74b14aac 2741 if(buf_index >= next_avc) {
1c48415b
2742 if(buf_index >= buf_size) break;
2743 nalsize = 0;
2744 for(i = 0; i < h->nal_length_size; i++)
2745 nalsize = (nalsize << 8) | buf[buf_index++];
9d252137
BC
2746 if(nalsize <= 0 || nalsize > buf_size - buf_index){
2747 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
2748 break;
1c48415b 2749 }
74b14aac 2750 next_avc= buf_index + nalsize;
1c48415b
2751 } else {
2752 // start code prefix search
52255d17 2753 for(; buf_index + 3 < next_avc; buf_index++){
1c48415b
2754 // This should always succeed in the first iteration.
2755 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
2756 break;
8b031359 2757 }
115329f1 2758
1c48415b 2759 if(buf_index+3 >= buf_size) break;
115329f1 2760
1c48415b 2761 buf_index+=3;
52255d17 2762 if(buf_index >= next_avc) continue;
1c48415b 2763 }
115329f1 2764
afebe2f7
2765 hx = h->thread_context[context_count];
2766
74b14aac 2767 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
ff82e429 2768 if (ptr==NULL || dst_length < 0){
ac658be5
FOL
2769 return -1;
2770 }
3566042a
MN
2771 i= buf_index + consumed;
2772 if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
2773 buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
2774 s->workaround_bugs |= FF_BUG_TRUNCATED;
2775
2776 if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
6ac9696e 2777 while(ptr[dst_length - 1] == 0 && dst_length > 0)
c4da83fb 2778 dst_length--;
3566042a 2779 }
1790a5e9 2780 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
0da71265
MN
2781
2782 if(s->avctx->debug&FF_DEBUG_STARTCODE){
afebe2f7 2783 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
0da71265 2784 }
115329f1 2785
74b14aac 2786 if (h->is_avc && (nalsize != consumed) && nalsize){
e262365d 2787 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
9d2cc8c1 2788 }
4770b1b4 2789
0da71265
MN
2790 buf_index += consumed;
2791
8ed2ae09 2792 //FIXME do not discard SEI id
e7021c0e 2793 if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)
0da71265 2794 continue;
115329f1 2795
afebe2f7
2796 again:
2797 err = 0;
2798 switch(hx->nal_unit_type){
0da71265 2799 case NAL_IDR_SLICE:
afebe2f7
2800 if (h->nal_unit_type != NAL_IDR_SLICE) {
2801 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
2802 return -1;
2803 }
3b66c4c5 2804 idr(h); //FIXME ensure we don't loose some frames if there is reordering
0da71265 2805 case NAL_SLICE:
afebe2f7
2806 init_get_bits(&hx->s.gb, ptr, bit_length);
2807 hx->intra_gb_ptr=
2808 hx->inter_gb_ptr= &hx->s.gb;
2809 hx->s.data_partitioning = 0;
2810
2811 if((err = decode_slice_header(hx, h)))
2812 break;
2813
3bccd93a
SW
2814 if (h->current_slice == 1) {
2815 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
6026a096 2816 return -1;
3bccd93a
SW
2817 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2818 ff_vdpau_h264_picture_start(s);
6026a096
GB
2819 }
2820
37a558fe
IS
2821 s->current_picture_ptr->key_frame |=
2822 (hx->nal_unit_type == NAL_IDR_SLICE) ||
2823 (h->sei_recovery_frame_cnt >= 0);
8ed2ae09 2824 if(hx->redundant_pic_count==0
afebe2f7 2825 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
975a1447
SS
2826 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
2827 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
369122dd 2828 && avctx->skip_frame < AVDISCARD_ALL){
d404b3ed
MN
2829 if(avctx->hwaccel) {
2830 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
2831 return -1;
2832 }else
0d3d172f 2833 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
369122dd 2834 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
c639fc72
CEH
2835 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
2836 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
369122dd 2837 }else
f2c214a1 2838 context_count++;
369122dd 2839 }
0da71265
MN
2840 break;
2841 case NAL_DPA:
afebe2f7
2842 init_get_bits(&hx->s.gb, ptr, bit_length);
2843 hx->intra_gb_ptr=
2844 hx->inter_gb_ptr= NULL;
0410ee8f
AS
2845
2846 if ((err = decode_slice_header(hx, h)) < 0)
2847 break;
2848
afebe2f7 2849 hx->s.data_partitioning = 1;
115329f1 2850
0da71265
MN
2851 break;
2852 case NAL_DPB:
afebe2f7
2853 init_get_bits(&hx->intra_gb, ptr, bit_length);
2854 hx->intra_gb_ptr= &hx->intra_gb;
0da71265
MN
2855 break;
2856 case NAL_DPC:
afebe2f7
2857 init_get_bits(&hx->inter_gb, ptr, bit_length);
2858 hx->inter_gb_ptr= &hx->inter_gb;
8b92b792 2859
afebe2f7 2860 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
b18e5c03 2861 && s->context_initialized
afebe2f7 2862 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
975a1447
SS
2863 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
2864 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
e0111b32 2865 && avctx->skip_frame < AVDISCARD_ALL)
afebe2f7 2866 context_count++;
0da71265
MN
2867 break;
2868 case NAL_SEI:
cdd10689 2869 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2870 ff_h264_decode_sei(h);
0da71265
MN
2871 break;
2872 case NAL_SPS:
2873 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2874 ff_h264_decode_seq_parameter_set(h);
115329f1 2875
0da71265
MN
2876 if(s->flags& CODEC_FLAG_LOW_DELAY)
2877 s->low_delay=1;
115329f1 2878
a18030bb
LM
2879 if(avctx->has_b_frames < 2)
2880 avctx->has_b_frames= !s->low_delay;
0da71265
MN
2881 break;
2882 case NAL_PPS:
2883 init_get_bits(&s->gb, ptr, bit_length);
115329f1 2884
1790a5e9 2885 ff_h264_decode_picture_parameter_set(h, bit_length);
0da71265
MN
2886
2887 break;
ab470fa7
LM
2888 case NAL_AUD:
2889 case NAL_END_SEQUENCE:
2890 case NAL_END_STREAM:
2891 case NAL_FILLER_DATA:
2892 case NAL_SPS_EXT:
2893 case NAL_AUXILIARY_SLICE:
0da71265 2894 break;
bb270c08 2895 default:
4ad04da2 2896 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
115329f1 2897 }
115329f1 2898
afebe2f7
2899 if(context_count == h->max_contexts) {
2900 execute_decode_slices(h, context_count);
2901 context_count = 0;
2902 }
2903
2904 if (err < 0)
2905 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
2906 else if(err == 1) {
2907 /* Slice could not be decoded in parallel mode, copy down
2908 * NAL unit stuff to context 0 and restart. Note that
1412060e 2909 * rbsp_buffer is not transferred, but since we no longer
afebe2f7
2910 * run in parallel mode this should not be an issue. */
2911 h->nal_unit_type = hx->nal_unit_type;
2912 h->nal_ref_idc = hx->nal_ref_idc;
2913 hx = h;
2914 goto again;
2915 }
2916 }
2917 if(context_count)
2918 execute_decode_slices(h, context_count);
0da71265
MN
2919 return buf_index;
2920}
2921
2922/**
3b66c4c5 2923 * returns the number of bytes consumed for building the current frame
0da71265
MN
2924 */
2925static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
755bfeab 2926 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
0da71265
MN
2927 if(pos+10>buf_size) pos=buf_size; // oops ;)
2928
2929 return pos;
0da71265
MN
2930}
2931
115329f1 2932static int decode_frame(AVCodecContext *avctx,
0da71265 2933 void *data, int *data_size,
7a00bbad 2934 AVPacket *avpkt)
0da71265 2935{
7a00bbad
TB
2936 const uint8_t *buf = avpkt->data;
2937 int buf_size = avpkt->size;
0da71265
MN
2938 H264Context *h = avctx->priv_data;
2939 MpegEncContext *s = &h->s;
115329f1 2940 AVFrame *pict = data;
0da71265 2941 int buf_index;
115329f1 2942
0da71265 2943 s->flags= avctx->flags;
303e50e6 2944 s->flags2= avctx->flags2;
0da71265 2945
1412060e 2946 /* end of stream, output what is still in the buffers */
9d252137 2947 out:
0da71265 2948 if (buf_size == 0) {
97bbb885
MN
2949 Picture *out;
2950 int i, out_idx;
2951
2952//FIXME factorize this with the output code below
2953 out = h->delayed_pic[0];
2954 out_idx = 0;
c173a088 2955 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
97bbb885
MN
2956 if(h->delayed_pic[i]->poc < out->poc){
2957 out = h->delayed_pic[i];
2958 out_idx = i;
2959 }
2960
2961 for(i=out_idx; h->delayed_pic[i]; i++)
2962 h->delayed_pic[i] = h->delayed_pic[i+1];
2963
2964 if(out){
2965 *data_size = sizeof(AVFrame);
2966 *pict= *(AVFrame*)out;
2967 }
2968
0da71265
MN
2969 return 0;
2970 }
115329f1 2971
0da71265 2972 buf_index=decode_nal_units(h, buf, buf_size);
115329f1 2973 if(buf_index < 0)
0da71265
MN
2974 return -1;
2975
9d252137
BC
2976 if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
2977 buf_size = 0;
2978 goto out;
2979 }
2980
56c70e1d 2981 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
e7021c0e 2982 if (avctx->skip_frame >= AVDISCARD_NONREF)
8ed2ae09 2983 return 0;
56c70e1d
MN
2984 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
2985 return -1;
2986 }
2987
66a4b2c1
MN
2988 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
2989 Picture *out = s->current_picture_ptr;
2990 Picture *cur = s->current_picture_ptr;
44be1d64 2991 int i, pics, out_of_order, out_idx;
115329f1 2992
256299d3 2993 field_end(h);
66a4b2c1 2994
357282c6 2995 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
12d96de3
JD
2996 /* Wait for second field. */
2997 *data_size = 0;
2998
2999 } else {
b19d493f 3000 cur->interlaced_frame = 0;
b09a7c05
3001 cur->repeat_pict = 0;
3002
3003 /* Signal interlacing information externally. */
3004 /* Prioritize picture timing SEI information over used decoding process if it exists. */
70e01da3 3005
b09a7c05
3006 if(h->sps.pic_struct_present_flag){
3007 switch (h->sei_pic_struct)
3008 {
b19d493f
HY
3009 case SEI_PIC_STRUCT_FRAME:
3010 break;
3011 case SEI_PIC_STRUCT_TOP_FIELD:
3012 case SEI_PIC_STRUCT_BOTTOM_FIELD:
3013 cur->interlaced_frame = 1;
3014 break;
3015 case SEI_PIC_STRUCT_TOP_BOTTOM:
3016 case SEI_PIC_STRUCT_BOTTOM_TOP:
3017 if (FIELD_OR_MBAFF_PICTURE)
3018 cur->interlaced_frame = 1;
3019 else
3020 // try to flag soft telecine progressive
3021 cur->interlaced_frame = h->prev_interlaced_frame;
3022 break;
b09a7c05
3023 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
3024 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
3025 // Signal the possibility of telecined film externally (pic_struct 5,6)
3026 // From these hints, let the applications decide if they apply deinterlacing.
3027 cur->repeat_pict = 1;
b09a7c05
3028 break;
3029 case SEI_PIC_STRUCT_FRAME_DOUBLING:
3030 // Force progressive here, as doubling interlaced frame is a bad idea.
b09a7c05
3031 cur->repeat_pict = 2;
3032 break;
3033 case SEI_PIC_STRUCT_FRAME_TRIPLING:
b09a7c05
3034 cur->repeat_pict = 4;
3035 break;
3036 }
b19d493f
HY
3037
3038 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
3039 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
b09a7c05
3040 }else{
3041 /* Derive interlacing flag from used decoding process. */
3042 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
3043 }
b19d493f 3044 h->prev_interlaced_frame = cur->interlaced_frame;
b09a7c05
3045
3046 if (cur->field_poc[0] != cur->field_poc[1]){
3047 /* Derive top_field_first from field pocs. */
3048 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
3049 }else{
3050 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
3051 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
3052 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
3053 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
3054 cur->top_field_first = 1;
3055 else
3056 cur->top_field_first = 0;
3057 }else{
3058 /* Most likely progressive */
3059 cur->top_field_first = 0;
3060 }
3061 }
84a8596d 3062
f6e3c460 3063 //FIXME do something with unavailable reference frames
8b92b792 3064
f6e3c460 3065 /* Sort B-frames into display order */
2f944356 3066
f6e3c460
3067 if(h->sps.bitstream_restriction_flag
3068 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
3069 s->avctx->has_b_frames = h->sps.num_reorder_frames;
3070 s->low_delay = 0;
3071 }
9170e345 3072
fb19e144
MN
3073 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
3074 && !h->sps.bitstream_restriction_flag){
3075 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
3076 s->low_delay= 0;
3077 }
3078
f6e3c460
3079 pics = 0;
3080 while(h->delayed_pic[pics]) pics++;
9170e345 3081
64b9d48f 3082 assert(pics <= MAX_DELAYED_PIC_COUNT);
4e4d983e 3083
f6e3c460
3084 h->delayed_pic[pics++] = cur;
3085 if(cur->reference == 0)
3086 cur->reference = DELAYED_PIC_REF;
2f944356 3087
f6e3c460
3088 out = h->delayed_pic[0];
3089 out_idx = 0;
c173a088 3090 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
f6e3c460
3091 if(h->delayed_pic[i]->poc < out->poc){
3092 out = h->delayed_pic[i];
3093 out_idx = i;
3094 }
44be1d64
MN
3095 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
3096 h->outputed_poc= INT_MIN;
3097 out_of_order = out->poc < h->outputed_poc;
1b547aba 3098
f6e3c460
3099 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
3100 { }
2a811db2 3101 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
f6e3c460 3102 || (s->low_delay &&
44be1d64 3103 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
975a1447 3104 || cur->pict_type == AV_PICTURE_TYPE_B)))
f6e3c460
3105 {
3106 s->low_delay = 0;
3107 s->avctx->has_b_frames++;
f6e3c460 3108 }
f6e3c460
3109
3110 if(out_of_order || pics > s->avctx->has_b_frames){
3eaa6d0e 3111 out->reference &= ~DELAYED_PIC_REF;
f6e3c460
3112 for(i=out_idx; h->delayed_pic[i]; i++)
3113 h->delayed_pic[i] = h->delayed_pic[i+1];
3114 }
3eaa6d0e 3115 if(!out_of_order && pics > s->avctx->has_b_frames){
f6e3c460 3116 *data_size = sizeof(AVFrame);
df8a7dff 3117
44be1d64
MN
3118 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
3119 h->outputed_poc = INT_MIN;
3120 } else
67e362ca 3121 h->outputed_poc = out->poc;
f6e3c460 3122 *pict= *(AVFrame*)out;
3eaa6d0e 3123 }else{
f6e3c460 3124 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
3eaa6d0e 3125 }
12d96de3 3126 }
a4dae92b
LM
3127 }
3128
3165e258 3129 assert(pict->data[0] || !*data_size);
4e4d983e 3130 ff_print_debug_info(s, pict);
0da71265 3131//printf("out %d\n", (int)pict->data[0]);
0da71265 3132
0da71265
MN
3133 return get_consumed_bytes(s, buf_index, buf_size);
3134}
3135#if 0
3136static inline void fill_mb_avail(H264Context *h){
3137 MpegEncContext * const s = &h->s;
7bc9090a 3138 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265
MN
3139
3140 if(s->mb_y){
7bc9090a
MN
3141 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
3142 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
3143 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
0da71265
MN
3144 }else{
3145 h->mb_avail[0]=
3146 h->mb_avail[1]=
3147 h->mb_avail[2]= 0;
3148 }
3149 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
3150 h->mb_avail[4]= 1; //FIXME move out
3151 h->mb_avail[5]= 0; //FIXME move out
3152}
3153#endif
3154
07e4e3ea 3155#ifdef TEST
6bf398a0 3156#undef printf
d04d5bcd 3157#undef random
0da71265
MN
3158#define COUNT 8000
3159#define SIZE (COUNT*40)
f8a80fd6 3160int main(void){
0da71265
MN
3161 int i;
3162 uint8_t temp[SIZE];
3163 PutBitContext pb;
3164 GetBitContext gb;
3165// int int_temp[10000];
3166 DSPContext dsp;
3167 AVCodecContext avctx;
115329f1 3168