Move static function fill_filter_caches() from h264.h to h264.c.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
bad5537e 23 * @file libavcodec/h264.c
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
40e5d31b 28#include "internal.h"
0da71265
MN
29#include "dsputil.h"
30#include "avcodec.h"
31#include "mpegvideo.h"
26b4fe82 32#include "h264.h"
0da71265 33#include "h264data.h"
188d3c51 34#include "h264_mvpred.h"
26b4fe82 35#include "h264_parser.h"
0da71265 36#include "golomb.h"
199436b9 37#include "mathops.h"
626464fb 38#include "rectangle.h"
369122dd 39#include "vdpau_internal.h"
0da71265 40
e5017ab8
LA
41#include "cabac.h"
42
2848ce84 43//#undef NDEBUG
0da71265
MN
44#include <assert.h>
45
d9ec210b 46static const uint8_t rem6[52]={
acd8d10f
PI
470, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
48};
49
d9ec210b 50static const uint8_t div6[52]={
acd8d10f
PI
510, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
52};
53
903d58f6 54void ff_h264_write_back_intra_pred_mode(H264Context *h){
5b0fb524 55 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
0da71265 56
662a5b23
MN
57 AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
58 mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
59 mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
60 mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
0da71265
MN
61}
62
63/**
64 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
65 */
2bedc0e8
MN
66int ff_h264_check_intra4x4_pred_mode(H264Context *h){
67 MpegEncContext * const s = &h->s;
68 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
69 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
70 int i;
71
72 if(!(h->top_samples_available&0x8000)){
73 for(i=0; i<4; i++){
74 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
75 if(status<0){
76 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
77 return -1;
78 } else if(status){
79 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
80 }
81 }
82 }
83
84 if((h->left_samples_available&0x8888)!=0x8888){
85 static const int mask[4]={0x8000,0x2000,0x80,0x20};
86 for(i=0; i<4; i++){
87 if(!(h->left_samples_available&mask[i])){
88 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
89 if(status<0){
90 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
91 return -1;
92 } else if(status){
93 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
94 }
95 }
96 }
97 }
98
99 return 0;
100} //FIXME cleanup like ff_h264_check_intra_pred_mode
101
102/**
103 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
104 */
903d58f6 105int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
0da71265
MN
106 MpegEncContext * const s = &h->s;
107 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
108 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 109
43ff0714 110 if(mode > 6U) {
5175b937 111 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 112 return -1;
5175b937 113 }
115329f1 114
0da71265
MN
115 if(!(h->top_samples_available&0x8000)){
116 mode= top[ mode ];
117 if(mode<0){
9b879566 118 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
119 return -1;
120 }
121 }
115329f1 122
d1d10e91 123 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 124 mode= left[ mode ];
d1d10e91
MN
125 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
126 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
127 }
0da71265 128 if(mode<0){
9b879566 129 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 130 return -1;
115329f1 131 }
0da71265
MN
132 }
133
134 return mode;
135}
136
1790a5e9 137const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
138 int i, si, di;
139 uint8_t *dst;
24456882 140 int bufidx;
0da71265 141
bb270c08 142// src[0]&0x80; //forbidden bit
0da71265
MN
143 h->nal_ref_idc= src[0]>>5;
144 h->nal_unit_type= src[0]&0x1F;
145
146 src++; length--;
115329f1 147#if 0
0da71265
MN
148 for(i=0; i<length; i++)
149 printf("%2X ", src[i]);
150#endif
e08715d3 151
b250f9c6
AJ
152#if HAVE_FAST_UNALIGNED
153# if HAVE_FAST_64BIT
e08715d3
MN
154# define RS 7
155 for(i=0; i+1<length; i+=9){
19769ece 156 if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
e08715d3
MN
157# else
158# define RS 3
159 for(i=0; i+1<length; i+=5){
19769ece 160 if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
e08715d3
MN
161# endif
162 continue;
163 if(i>0 && !src[i]) i--;
164 while(src[i]) i++;
165#else
166# define RS 0
0da71265
MN
167 for(i=0; i+1<length; i+=2){
168 if(src[i]) continue;
169 if(i>0 && src[i-1]==0) i--;
e08715d3 170#endif
0da71265
MN
171 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
172 if(src[i+2]!=3){
173 /* startcode, so we must be past the end */
174 length=i;
175 }
176 break;
177 }
abb27cfb 178 i-= RS;
0da71265
MN
179 }
180
181 if(i>=length-1){ //no escaped 0
182 *dst_length= length;
183 *consumed= length+1; //+1 for the header
115329f1 184 return src;
0da71265
MN
185 }
186
24456882 187 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
238ef6da 188 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 189 dst= h->rbsp_buffer[bufidx];
0da71265 190
ac658be5
FOL
191 if (dst == NULL){
192 return NULL;
193 }
194
3b66c4c5 195//printf("decoding esc\n");
593af7cd
MN
196 memcpy(dst, src, i);
197 si=di=i;
198 while(si+2<length){
0da71265 199 //remove escapes (very rare 1:2^22)
593af7cd
MN
200 if(src[si+2]>3){
201 dst[di++]= src[si++];
202 dst[di++]= src[si++];
203 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
204 if(src[si+2]==3){ //escape
205 dst[di++]= 0;
206 dst[di++]= 0;
207 si+=3;
c8470cc1 208 continue;
0da71265 209 }else //next start code
593af7cd 210 goto nsc;
0da71265
MN
211 }
212
213 dst[di++]= src[si++];
214 }
593af7cd
MN
215 while(si<length)
216 dst[di++]= src[si++];
217nsc:
0da71265 218
d4369630
AS
219 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
220
0da71265
MN
221 *dst_length= di;
222 *consumed= si + 1;//+1 for the header
90b5b51e 223//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
224 return dst;
225}
226
1790a5e9 227int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
228 int v= *src;
229 int r;
230
a9c9a240 231 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
232
233 for(r=1; r<9; r++){
234 if(v&1) return r;
235 v>>=1;
236 }
237 return 0;
238}
239
240/**
1412060e 241 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
242 * @param qp quantization parameter
243 */
239ea04c 244static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
245#define stride 16
246 int i;
247 int temp[16]; //FIXME check if this is a good idea
248 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
249 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
250
251//memset(block, 64, 2*256);
252//return;
253 for(i=0; i<4; i++){
254 const int offset= y_offset[i];
255 const int z0= block[offset+stride*0] + block[offset+stride*4];
256 const int z1= block[offset+stride*0] - block[offset+stride*4];
257 const int z2= block[offset+stride*1] - block[offset+stride*5];
258 const int z3= block[offset+stride*1] + block[offset+stride*5];
259
260 temp[4*i+0]= z0+z3;
261 temp[4*i+1]= z1+z2;
262 temp[4*i+2]= z1-z2;
263 temp[4*i+3]= z0-z3;
264 }
265
266 for(i=0; i<4; i++){
267 const int offset= x_offset[i];
268 const int z0= temp[4*0+i] + temp[4*2+i];
269 const int z1= temp[4*0+i] - temp[4*2+i];
270 const int z2= temp[4*1+i] - temp[4*3+i];
271 const int z3= temp[4*1+i] + temp[4*3+i];
272
1412060e 273 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
274 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
275 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
276 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
277 }
278}
279
e5017ab8 280#if 0
0da71265 281/**
1412060e 282 * DCT transforms the 16 dc values.
0da71265
MN
283 * @param qp quantization parameter ??? FIXME
284 */
285static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
286// const int qmul= dequant_coeff[qp][0];
287 int i;
288 int temp[16]; //FIXME check if this is a good idea
289 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
290 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
291
292 for(i=0; i<4; i++){
293 const int offset= y_offset[i];
294 const int z0= block[offset+stride*0] + block[offset+stride*4];
295 const int z1= block[offset+stride*0] - block[offset+stride*4];
296 const int z2= block[offset+stride*1] - block[offset+stride*5];
297 const int z3= block[offset+stride*1] + block[offset+stride*5];
298
299 temp[4*i+0]= z0+z3;
300 temp[4*i+1]= z1+z2;
301 temp[4*i+2]= z1-z2;
302 temp[4*i+3]= z0-z3;
303 }
304
305 for(i=0; i<4; i++){
306 const int offset= x_offset[i];
307 const int z0= temp[4*0+i] + temp[4*2+i];
308 const int z1= temp[4*0+i] - temp[4*2+i];
309 const int z2= temp[4*1+i] - temp[4*3+i];
310 const int z3= temp[4*1+i] + temp[4*3+i];
311
312 block[stride*0 +offset]= (z0 + z3)>>1;
313 block[stride*2 +offset]= (z1 + z2)>>1;
314 block[stride*8 +offset]= (z1 - z2)>>1;
315 block[stride*10+offset]= (z0 - z3)>>1;
316 }
317}
e5017ab8
LA
318#endif
319
0da71265
MN
320#undef xStride
321#undef stride
322
239ea04c 323static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
324 const int stride= 16*2;
325 const int xStride= 16;
326 int a,b,c,d,e;
327
328 a= block[stride*0 + xStride*0];
329 b= block[stride*0 + xStride*1];
330 c= block[stride*1 + xStride*0];
331 d= block[stride*1 + xStride*1];
332
333 e= a-b;
334 a= a+b;
335 b= c-d;
336 c= c+d;
337
239ea04c
LM
338 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
339 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
340 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
341 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
342}
343
e5017ab8 344#if 0
0da71265
MN
345static void chroma_dc_dct_c(DCTELEM *block){
346 const int stride= 16*2;
347 const int xStride= 16;
348 int a,b,c,d,e;
349
350 a= block[stride*0 + xStride*0];
351 b= block[stride*0 + xStride*1];
352 c= block[stride*1 + xStride*0];
353 d= block[stride*1 + xStride*1];
354
355 e= a-b;
356 a= a+b;
357 b= c-d;
358 c= c+d;
359
360 block[stride*0 + xStride*0]= (a+c);
361 block[stride*0 + xStride*1]= (e+b);
362 block[stride*1 + xStride*0]= (a-c);
363 block[stride*1 + xStride*1]= (e-b);
364}
e5017ab8 365#endif
0da71265 366
0da71265
MN
367static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
368 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
369 int src_x_offset, int src_y_offset,
370 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
371 MpegEncContext * const s = &h->s;
372 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 373 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 374 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
375 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
376 uint8_t * src_cb, * src_cr;
377 int extra_width= h->emu_edge_width;
378 int extra_height= h->emu_edge_height;
0da71265
MN
379 int emu=0;
380 const int full_mx= mx>>2;
381 const int full_my= my>>2;
fbd312fd 382 const int pic_width = 16*s->mb_width;
0d43dd8c 383 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 384
0da71265
MN
385 if(mx&7) extra_width -= 3;
386 if(my&7) extra_height -= 3;
115329f1
DB
387
388 if( full_mx < 0-extra_width
389 || full_my < 0-extra_height
390 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 391 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
392 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
393 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
394 emu=1;
395 }
115329f1 396
5d18eaad 397 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 398 if(!square){
5d18eaad 399 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 400 }
115329f1 401
49fb20cb 402 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 403
0d43dd8c 404 if(MB_FIELD){
5d18eaad 405 // chroma offset when predicting from a field of opposite parity
2143b118 406 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
407 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
408 }
409 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
410 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
411
0da71265 412 if(emu){
5d18eaad 413 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
414 src_cb= s->edge_emu_buffer;
415 }
5d18eaad 416 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
417
418 if(emu){
5d18eaad 419 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
420 src_cr= s->edge_emu_buffer;
421 }
5d18eaad 422 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
423}
424
9f2d1b4f 425static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
426 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
427 int x_offset, int y_offset,
428 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
429 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
430 int list0, int list1){
431 MpegEncContext * const s = &h->s;
432 qpel_mc_func *qpix_op= qpix_put;
433 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 434
5d18eaad
LM
435 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
436 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
437 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 438 x_offset += 8*s->mb_x;
0d43dd8c 439 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 440
0da71265 441 if(list0){
1924f3ce 442 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
443 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
444 dest_y, dest_cb, dest_cr, x_offset, y_offset,
445 qpix_op, chroma_op);
446
447 qpix_op= qpix_avg;
448 chroma_op= chroma_avg;
449 }
450
451 if(list1){
1924f3ce 452 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
453 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
454 dest_y, dest_cb, dest_cr, x_offset, y_offset,
455 qpix_op, chroma_op);
456 }
457}
458
9f2d1b4f
LM
459static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
460 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
461 int x_offset, int y_offset,
462 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
463 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
464 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
465 int list0, int list1){
466 MpegEncContext * const s = &h->s;
467
5d18eaad
LM
468 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
469 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
470 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 471 x_offset += 8*s->mb_x;
0d43dd8c 472 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 473
9f2d1b4f
LM
474 if(list0 && list1){
475 /* don't optimize for luma-only case, since B-frames usually
476 * use implicit weights => chroma too. */
477 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
478 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
479 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
480 int refn0 = h->ref_cache[0][ scan8[n] ];
481 int refn1 = h->ref_cache[1][ scan8[n] ];
482
483 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
484 dest_y, dest_cb, dest_cr,
485 x_offset, y_offset, qpix_put, chroma_put);
486 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
487 tmp_y, tmp_cb, tmp_cr,
488 x_offset, y_offset, qpix_put, chroma_put);
489
490 if(h->use_weight == 2){
1052b76f 491 int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
9f2d1b4f 492 int weight1 = 64 - weight0;
5d18eaad
LM
493 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
494 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
495 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 496 }else{
5d18eaad 497 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
3d9137c8
MN
498 h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
499 h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
5d18eaad 500 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8
MN
501 h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
502 h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
5d18eaad 503 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8
MN
504 h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
505 h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
9f2d1b4f
LM
506 }
507 }else{
508 int list = list1 ? 1 : 0;
509 int refn = h->ref_cache[list][ scan8[n] ];
510 Picture *ref= &h->ref_list[list][refn];
511 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
512 dest_y, dest_cb, dest_cr, x_offset, y_offset,
513 qpix_put, chroma_put);
514
5d18eaad 515 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3d9137c8 516 h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
9f2d1b4f 517 if(h->use_weight_chroma){
5d18eaad 518 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8 519 h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
5d18eaad 520 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8 521 h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
9f2d1b4f
LM
522 }
523 }
524}
525
526static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
527 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
528 int x_offset, int y_offset,
529 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
530 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 531 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
532 int list0, int list1){
533 if((h->use_weight==2 && list0 && list1
1052b76f 534 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
9f2d1b4f
LM
535 || h->use_weight==1)
536 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
537 x_offset, y_offset, qpix_put, chroma_put,
538 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
539 else
540 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
541 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
542}
543
513fbd8e
LM
544static inline void prefetch_motion(H264Context *h, int list){
545 /* fetch pixels for estimated mv 4 macroblocks ahead
546 * optimized for 64byte cache lines */
547 MpegEncContext * const s = &h->s;
548 const int refn = h->ref_cache[list][scan8[0]];
549 if(refn >= 0){
550 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
551 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
552 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 553 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
554 s->dsp.prefetch(src[0]+off, s->linesize, 4);
555 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
556 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
557 }
558}
559
0da71265
MN
560static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
561 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
562 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
563 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 564 MpegEncContext * const s = &h->s;
64514ee8 565 const int mb_xy= h->mb_xy;
0da71265 566 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 567
0da71265 568 assert(IS_INTER(mb_type));
115329f1 569
513fbd8e
LM
570 prefetch_motion(h, 0);
571
0da71265
MN
572 if(IS_16X16(mb_type)){
573 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
574 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
7231ccf4 575 weight_op, weight_avg,
0da71265
MN
576 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
577 }else if(IS_16X8(mb_type)){
578 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
579 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 580 &weight_op[1], &weight_avg[1],
0da71265
MN
581 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
582 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
583 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 584 &weight_op[1], &weight_avg[1],
0da71265
MN
585 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
586 }else if(IS_8X16(mb_type)){
5d18eaad 587 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 588 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 589 &weight_op[2], &weight_avg[2],
0da71265 590 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 591 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 592 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 593 &weight_op[2], &weight_avg[2],
0da71265
MN
594 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
595 }else{
596 int i;
115329f1 597
0da71265
MN
598 assert(IS_8X8(mb_type));
599
600 for(i=0; i<4; i++){
601 const int sub_mb_type= h->sub_mb_type[i];
602 const int n= 4*i;
603 int x_offset= (i&1)<<2;
604 int y_offset= (i&2)<<1;
605
606 if(IS_SUB_8X8(sub_mb_type)){
607 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
608 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 609 &weight_op[3], &weight_avg[3],
0da71265
MN
610 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
611 }else if(IS_SUB_8X4(sub_mb_type)){
612 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
613 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 614 &weight_op[4], &weight_avg[4],
0da71265
MN
615 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
616 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
617 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 618 &weight_op[4], &weight_avg[4],
0da71265
MN
619 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
620 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 621 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 622 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 623 &weight_op[5], &weight_avg[5],
0da71265 624 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 625 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 626 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 627 &weight_op[5], &weight_avg[5],
0da71265
MN
628 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
629 }else{
630 int j;
631 assert(IS_SUB_4X4(sub_mb_type));
632 for(j=0; j<4; j++){
633 int sub_x_offset= x_offset + 2*(j&1);
634 int sub_y_offset= y_offset + (j&2);
635 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
636 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 637 &weight_op[6], &weight_avg[6],
0da71265
MN
638 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
639 }
640 }
641 }
642 }
513fbd8e
LM
643
644 prefetch_motion(h, 1);
0da71265
MN
645}
646
0da71265 647
0da71265 648static void free_tables(H264Context *h){
7978debd 649 int i;
afebe2f7 650 H264Context *hx;
0da71265 651 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
652 av_freep(&h->chroma_pred_mode_table);
653 av_freep(&h->cbp_table);
9e528114
LA
654 av_freep(&h->mvd_table[0]);
655 av_freep(&h->mvd_table[1]);
5ad984c9 656 av_freep(&h->direct_table);
0da71265
MN
657 av_freep(&h->non_zero_count);
658 av_freep(&h->slice_table_base);
659 h->slice_table= NULL;
c988f975 660 av_freep(&h->list_counts);
e5017ab8 661
0da71265 662 av_freep(&h->mb2b_xy);
d43c1922 663 av_freep(&h->mb2br_xy);
9f2d1b4f 664
6752dd5a 665 for(i = 0; i < MAX_THREADS; i++) {
afebe2f7
666 hx = h->thread_context[i];
667 if(!hx) continue;
668 av_freep(&hx->top_borders[1]);
669 av_freep(&hx->top_borders[0]);
670 av_freep(&hx->s.obmc_scratchpad);
d2d5e067
AS
671 av_freep(&hx->rbsp_buffer[1]);
672 av_freep(&hx->rbsp_buffer[0]);
eda4ea4e
MS
673 hx->rbsp_buffer_size[0] = 0;
674 hx->rbsp_buffer_size[1] = 0;
d2d5e067 675 if (i) av_freep(&h->thread_context[i]);
afebe2f7 676 }
0da71265
MN
677}
678
239ea04c
LM
679static void init_dequant8_coeff_table(H264Context *h){
680 int i,q,x;
4693b031 681 const int transpose = (h->h264dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
682 h->dequant8_coeff[0] = h->dequant8_buffer[0];
683 h->dequant8_coeff[1] = h->dequant8_buffer[1];
684
685 for(i=0; i<2; i++ ){
686 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
687 h->dequant8_coeff[1] = h->dequant8_buffer[0];
688 break;
689 }
690
691 for(q=0; q<52; q++){
d9ec210b
DP
692 int shift = div6[q];
693 int idx = rem6[q];
239ea04c 694 for(x=0; x<64; x++)
548a1c8a
LM
695 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
696 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
697 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
698 }
699 }
700}
701
702static void init_dequant4_coeff_table(H264Context *h){
703 int i,j,q,x;
4693b031 704 const int transpose = (h->h264dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
705 for(i=0; i<6; i++ ){
706 h->dequant4_coeff[i] = h->dequant4_buffer[i];
707 for(j=0; j<i; j++){
708 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
709 h->dequant4_coeff[i] = h->dequant4_buffer[j];
710 break;
711 }
712 }
713 if(j<i)
714 continue;
715
716 for(q=0; q<52; q++){
d9ec210b
DP
717 int shift = div6[q] + 2;
718 int idx = rem6[q];
239ea04c 719 for(x=0; x<16; x++)
ab2e3e2c
LM
720 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
721 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
722 h->pps.scaling_matrix4[i][x]) << shift;
723 }
724 }
725}
726
727static void init_dequant_tables(H264Context *h){
728 int i,x;
729 init_dequant4_coeff_table(h);
730 if(h->pps.transform_8x8_mode)
731 init_dequant8_coeff_table(h);
732 if(h->sps.transform_bypass){
733 for(i=0; i<6; i++)
734 for(x=0; x<16; x++)
735 h->dequant4_coeff[i][0][x] = 1<<6;
736 if(h->pps.transform_8x8_mode)
737 for(i=0; i<2; i++)
738 for(x=0; x<64; x++)
739 h->dequant8_coeff[i][0][x] = 1<<6;
740 }
741}
742
743
903d58f6 744int ff_h264_alloc_tables(H264Context *h){
0da71265 745 MpegEncContext * const s = &h->s;
7bc9090a 746 const int big_mb_num= s->mb_stride * (s->mb_height+1);
145061a1 747 const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
239ea04c 748 int x,y;
0da71265 749
145061a1 750 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail)
e5017ab8 751
c988f975 752 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail)
d31dbec3
RP
753 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
754 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
0da71265 755
d31dbec3 756 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
145061a1
MN
757 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
758 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
36b54927 759 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
c988f975 760 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
e5017ab8 761
b735aeea 762 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 763 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 764
d31dbec3 765 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
d43c1922 766 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
0da71265
MN
767 for(y=0; y<s->mb_height; y++){
768 for(x=0; x<s->mb_width; x++){
7bc9090a 769 const int mb_xy= x + y*s->mb_stride;
0da71265 770 const int b_xy = 4*x + 4*y*h->b_stride;
115329f1 771
0da71265 772 h->mb2b_xy [mb_xy]= b_xy;
e1c88a21 773 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
0da71265
MN
774 }
775 }
9f2d1b4f 776
9c6221ae
GV
777 s->obmc_scratchpad = NULL;
778
56edbd81
LM
779 if(!h->dequant4_coeff[0])
780 init_dequant_tables(h);
781
0da71265
MN
782 return 0;
783fail:
784 free_tables(h);
785 return -1;
786}
787
afebe2f7
788/**
789 * Mimic alloc_tables(), but for every context thread.
790 */
145061a1
MN
791static void clone_tables(H264Context *dst, H264Context *src, int i){
792 MpegEncContext * const s = &src->s;
793 dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
afebe2f7
794 dst->non_zero_count = src->non_zero_count;
795 dst->slice_table = src->slice_table;
796 dst->cbp_table = src->cbp_table;
797 dst->mb2b_xy = src->mb2b_xy;
d43c1922 798 dst->mb2br_xy = src->mb2br_xy;
afebe2f7 799 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
145061a1
MN
800 dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride;
801 dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride;
afebe2f7 802 dst->direct_table = src->direct_table;
fb823b77 803 dst->list_counts = src->list_counts;
afebe2f7 804
afebe2f7
805 dst->s.obmc_scratchpad = NULL;
806 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
807}
808
809/**
810 * Init context
811 * Allocate buffers which are not shared amongst multiple threads.
812 */
813static int context_init(H264Context *h){
d31dbec3
RP
814 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
815 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
afebe2f7 816
145061a1
MN
817 h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
818 h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
819
afebe2f7
820 return 0;
821fail:
822 return -1; // free_tables will clean up for us
823}
824
9855b2e3
MN
825static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
826
98a6fff9 827static av_cold void common_init(H264Context *h){
0da71265 828 MpegEncContext * const s = &h->s;
0da71265
MN
829
830 s->width = s->avctx->width;
831 s->height = s->avctx->height;
832 s->codec_id= s->avctx->codec->id;
115329f1 833
4693b031 834 ff_h264dsp_init(&h->h264dsp);
c92a30bb 835 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 836
239ea04c 837 h->dequant_coeff_pps= -1;
9a41c2c7 838 s->unrestricted_mv=1;
0da71265 839 s->decode=1; //FIXME
56edbd81 840
a5805aa9
MN
841 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
842
56edbd81
LM
843 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
844 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
845}
846
903d58f6 847av_cold int ff_h264_decode_init(AVCodecContext *avctx){
0da71265
MN
848 H264Context *h= avctx->priv_data;
849 MpegEncContext * const s = &h->s;
850
3edcacde 851 MPV_decode_defaults(s);
115329f1 852
0da71265
MN
853 s->avctx = avctx;
854 common_init(h);
855
856 s->out_format = FMT_H264;
857 s->workaround_bugs= avctx->workaround_bugs;
858
859 // set defaults
0da71265 860// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 861 s->quarter_sample = 1;
47cd974a 862 if(!avctx->has_b_frames)
0da71265 863 s->low_delay= 1;
7a9dba3c 864
580a7465 865 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
0da71265 866
e1e94902 867 ff_h264_decode_init_vlc();
115329f1 868
afebe2f7 869 h->thread_context[0] = h;
18c7be65 870 h->outputed_poc = INT_MIN;
e4b8f1fa 871 h->prev_poc_msb= 1<<16;
055a6aa7 872 h->x264_build = -1;
9c095463 873 ff_h264_reset_sei(h);
efd8c1f6
MN
874 if(avctx->codec_id == CODEC_ID_H264){
875 if(avctx->ticks_per_frame == 1){
876 s->avctx->time_base.den *=2;
877 }
19df37a8 878 avctx->ticks_per_frame = 2;
efd8c1f6 879 }
9855b2e3
MN
880
881 if(avctx->extradata_size > 0 && avctx->extradata && *(char *)avctx->extradata == 1){
882 int i, cnt, nalsize;
883 unsigned char *p = avctx->extradata;
884
885 h->is_avc = 1;
886
887 if(avctx->extradata_size < 7) {
888 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
889 return -1;
890 }
891 /* sps and pps in the avcC always have length coded with 2 bytes,
892 so put a fake nal_length_size = 2 while parsing them */
893 h->nal_length_size = 2;
894 // Decode sps from avcC
895 cnt = *(p+5) & 0x1f; // Number of sps
896 p += 6;
897 for (i = 0; i < cnt; i++) {
898 nalsize = AV_RB16(p) + 2;
899 if(decode_nal_units(h, p, nalsize) < 0) {
900 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
901 return -1;
902 }
903 p += nalsize;
904 }
905 // Decode pps from avcC
906 cnt = *(p++); // Number of pps
907 for (i = 0; i < cnt; i++) {
908 nalsize = AV_RB16(p) + 2;
909 if(decode_nal_units(h, p, nalsize) != nalsize) {
910 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
911 return -1;
912 }
913 p += nalsize;
914 }
915 // Now store right nal length size, that will be use to parse all other nals
916 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
917 } else {
918 h->is_avc = 0;
919 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
920 return -1;
921 }
db8cb47d
MN
922 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
923 s->avctx->has_b_frames = h->sps.num_reorder_frames;
924 s->low_delay = 0;
925 }
9855b2e3 926
0da71265
MN
927 return 0;
928}
929
903d58f6 930int ff_h264_frame_start(H264Context *h){
0da71265
MN
931 MpegEncContext * const s = &h->s;
932 int i;
933
af8aa846
MN
934 if(MPV_frame_start(s, s->avctx) < 0)
935 return -1;
0da71265 936 ff_er_frame_start(s);
3a22d7fa
JD
937 /*
938 * MPV_frame_start uses pict_type to derive key_frame.
939 * This is incorrect for H.264; IDR markings must be used.
1412060e 940 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
941 * See decode_nal_units().
942 */
943 s->current_picture_ptr->key_frame= 0;
c173a088 944 s->current_picture_ptr->mmco_reset= 0;
0da71265
MN
945
946 assert(s->linesize && s->uvlinesize);
947
948 for(i=0; i<16; i++){
949 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 950 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
951 }
952 for(i=0; i<4; i++){
953 h->block_offset[16+i]=
954 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
955 h->block_offset[24+16+i]=
956 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
957 }
958
934b0821
LM
959 /* can't be in alloc_tables because linesize isn't known there.
960 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
961 for(i = 0; i < s->avctx->thread_count; i++)
962 if(!h->thread_context[i]->s.obmc_scratchpad)
963 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad 964
2ce1c2e0 965 /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
5820b90d 966 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 967
0da71265 968// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 969
1412060e 970 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
971 // that if we break out due to an error it can be released automatically
972 // in the next MPV_frame_start().
973 // SVQ3 as well as most other codecs have only last/next/current and thus
974 // get released even with set reference, besides SVQ3 and others do not
975 // mark frames as reference later "naturally".
976 if(s->codec_id != CODEC_ID_SVQ3)
977 s->current_picture_ptr->reference= 0;
357282c6
MN
978
979 s->current_picture_ptr->field_poc[0]=
980 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 981 assert(s->current_picture_ptr->long_ref==0);
357282c6 982
af8aa846 983 return 0;
0da71265
MN
984}
985
93cc10fa 986static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e 987 MpegEncContext * const s = &h->s;
0b69d625 988 uint8_t *top_border;
5f7f9719 989 int top_idx = 1;
115329f1 990
53c05b1e
MN
991 src_y -= linesize;
992 src_cb -= uvlinesize;
993 src_cr -= uvlinesize;
994
5f7f9719
MN
995 if(!simple && FRAME_MBAFF){
996 if(s->mb_y&1){
5f7f9719 997 if(!MB_MBAFF){
0b69d625
AS
998 top_border = h->top_borders[0][s->mb_x];
999 AV_COPY128(top_border, src_y + 15*linesize);
49fb20cb 1000 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
1001 AV_COPY64(top_border+16, src_cb+7*uvlinesize);
1002 AV_COPY64(top_border+24, src_cr+7*uvlinesize);
5f7f9719
MN
1003 }
1004 }
c988f975
MN
1005 }else if(MB_MBAFF){
1006 top_idx = 0;
1007 }else
1008 return;
5f7f9719
MN
1009 }
1010
0b69d625 1011 top_border = h->top_borders[top_idx][s->mb_x];
3b66c4c5 1012 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 1013 // and the line above the bottom macroblock
0b69d625 1014 AV_COPY128(top_border, src_y + 16*linesize);
53c05b1e 1015
49fb20cb 1016 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
1017 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
1018 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
53c05b1e
MN
1019 }
1020}
1021
93cc10fa 1022static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e 1023 MpegEncContext * const s = &h->s;
b69378e2
1024 int deblock_left;
1025 int deblock_top;
5f7f9719 1026 int top_idx = 1;
1e4f1c56
AS
1027 uint8_t *top_border_m1;
1028 uint8_t *top_border;
5f7f9719
MN
1029
1030 if(!simple && FRAME_MBAFF){
1031 if(s->mb_y&1){
c988f975
MN
1032 if(!MB_MBAFF)
1033 return;
5f7f9719 1034 }else{
5f7f9719
MN
1035 top_idx = MB_MBAFF ? 0 : 1;
1036 }
5f7f9719 1037 }
b69378e2
1038
1039 if(h->deblocking_filter == 2) {
024bf79f
MN
1040 deblock_left = h->left_type[0];
1041 deblock_top = h->top_type;
b69378e2
1042 } else {
1043 deblock_left = (s->mb_x > 0);
6c805007 1044 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 1045 }
53c05b1e
MN
1046
1047 src_y -= linesize + 1;
1048 src_cb -= uvlinesize + 1;
1049 src_cr -= uvlinesize + 1;
1050
1e4f1c56
AS
1051 top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
1052 top_border = h->top_borders[top_idx][s->mb_x];
1053
0b69d625
AS
1054#define XCHG(a,b,xchg)\
1055if (xchg) AV_SWAP64(b,a);\
1056else AV_COPY64(b,a);
d89dc06a 1057
d89dc06a 1058 if(deblock_top){
c988f975 1059 if(deblock_left){
0b69d625 1060 XCHG(top_border_m1+8, src_y -7, 1);
c988f975 1061 }
0b69d625
AS
1062 XCHG(top_border+0, src_y +1, xchg);
1063 XCHG(top_border+8, src_y +9, 1);
cad4368a 1064 if(s->mb_x+1 < s->mb_width){
0b69d625 1065 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
43efd19a 1066 }
53c05b1e 1067 }
53c05b1e 1068
49fb20cb 1069 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 1070 if(deblock_top){
c988f975 1071 if(deblock_left){
0b69d625
AS
1072 XCHG(top_border_m1+16, src_cb -7, 1);
1073 XCHG(top_border_m1+24, src_cr -7, 1);
c988f975 1074 }
0b69d625
AS
1075 XCHG(top_border+16, src_cb+1, 1);
1076 XCHG(top_border+24, src_cr+1, 1);
53c05b1e 1077 }
53c05b1e
MN
1078 }
1079}
1080
5a6a6cc7 1081static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
1082 MpegEncContext * const s = &h->s;
1083 const int mb_x= s->mb_x;
1084 const int mb_y= s->mb_y;
64514ee8 1085 const int mb_xy= h->mb_xy;
0da71265
MN
1086 const int mb_type= s->current_picture.mb_type[mb_xy];
1087 uint8_t *dest_y, *dest_cb, *dest_cr;
1088 int linesize, uvlinesize /*dct_offset*/;
1089 int i;
6867a90b 1090 int *block_offset = &h->block_offset[0];
41e4055b 1091 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed 1092 /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb 1093 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca 1094 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 1095 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 1096
6120a343
MN
1097 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
1098 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
1099 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 1100
a957c27b
LM
1101 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1102 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
1103
c988f975
MN
1104 h->list_counts[mb_xy]= h->list_count;
1105
bd91fee3 1106 if (!simple && MB_FIELD) {
5d18eaad
LM
1107 linesize = h->mb_linesize = s->linesize * 2;
1108 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 1109 block_offset = &h->block_offset[24];
1412060e 1110 if(mb_y&1){ //FIXME move out of this function?
0da71265 1111 dest_y -= s->linesize*15;
6867a90b
LLL
1112 dest_cb-= s->uvlinesize*7;
1113 dest_cr-= s->uvlinesize*7;
0da71265 1114 }
5d18eaad
LM
1115 if(FRAME_MBAFF) {
1116 int list;
3425501d 1117 for(list=0; list<h->list_count; list++){
5d18eaad
LM
1118 if(!USES_LIST(mb_type, list))
1119 continue;
1120 if(IS_16X16(mb_type)){
1121 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 1122 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
1123 }else{
1124 for(i=0; i<16; i+=4){
5d18eaad
LM
1125 int ref = h->ref_cache[list][scan8[i]];
1126 if(ref >= 0)
1710856c 1127 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
1128 }
1129 }
1130 }
1131 }
0da71265 1132 } else {
5d18eaad
LM
1133 linesize = h->mb_linesize = s->linesize;
1134 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
1135// dct_offset = s->linesize * 16;
1136 }
115329f1 1137
bd91fee3 1138 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
1139 for (i=0; i<16; i++) {
1140 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 1141 }
c1708e8d
MN
1142 for (i=0; i<8; i++) {
1143 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
1144 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 1145 }
e7e09b49
LLL
1146 } else {
1147 if(IS_INTRA(mb_type)){
5f7f9719 1148 if(h->deblocking_filter)
93cc10fa 1149 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 1150
49fb20cb 1151 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
1152 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
1153 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 1154 }
0da71265 1155
e7e09b49 1156 if(IS_INTRA4x4(mb_type)){
bd91fee3 1157 if(simple || !s->encoding){
43efd19a 1158 if(IS_8x8DCT(mb_type)){
1eb96035
MN
1159 if(transform_bypass){
1160 idct_dc_add =
1161 idct_add = s->dsp.add_pixels8;
dae006d7 1162 }else{
4693b031
MR
1163 idct_dc_add = h->h264dsp.h264_idct8_dc_add;
1164 idct_add = h->h264dsp.h264_idct8_add;
1eb96035 1165 }
43efd19a
LM
1166 for(i=0; i<16; i+=4){
1167 uint8_t * const ptr= dest_y + block_offset[i];
1168 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
1169 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1170 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
1171 }else{
ac0623b2
MN
1172 const int nnz = h->non_zero_count_cache[ scan8[i] ];
1173 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1174 (h->topright_samples_available<<i)&0x4000, linesize);
1175 if(nnz){
1176 if(nnz == 1 && h->mb[i*16])
1177 idct_dc_add(ptr, h->mb + i*16, linesize);
1178 else
1179 idct_add (ptr, h->mb + i*16, linesize);
1180 }
41e4055b 1181 }
43efd19a 1182 }
1eb96035
MN
1183 }else{
1184 if(transform_bypass){
1185 idct_dc_add =
1186 idct_add = s->dsp.add_pixels4;
1187 }else{
4693b031
MR
1188 idct_dc_add = h->h264dsp.h264_idct_dc_add;
1189 idct_add = h->h264dsp.h264_idct_add;
1eb96035 1190 }
aebb5d6d
MN
1191 for(i=0; i<16; i++){
1192 uint8_t * const ptr= dest_y + block_offset[i];
1193 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 1194
aebb5d6d
MN
1195 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1196 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
1197 }else{
1198 uint8_t *topright;
1199 int nnz, tr;
1200 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
1201 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1202 assert(mb_y || linesize <= block_offset[i]);
1203 if(!topright_avail){
1204 tr= ptr[3 - linesize]*0x01010101;
1205 topright= (uint8_t*) &tr;
1206 }else
1207 topright= ptr + 4 - linesize;
ac0623b2 1208 }else
aebb5d6d
MN
1209 topright= NULL;
1210
1211 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
1212 nnz = h->non_zero_count_cache[ scan8[i] ];
1213 if(nnz){
1214 if(is_h264){
1215 if(nnz == 1 && h->mb[i*16])
1216 idct_dc_add(ptr, h->mb + i*16, linesize);
1217 else
1218 idct_add (ptr, h->mb + i*16, linesize);
1219 }else
881b5b80 1220 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
aebb5d6d 1221 }
ac0623b2 1222 }
41e4055b 1223 }
8b82a956 1224 }
0da71265 1225 }
e7e09b49 1226 }else{
c92a30bb 1227 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 1228 if(is_h264){
36940eca 1229 if(!transform_bypass)
93f0c0a4 1230 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 1231 }else
881b5b80 1232 ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 1233 }
5f7f9719 1234 if(h->deblocking_filter)
93cc10fa 1235 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 1236 }else if(is_h264){
e7e09b49 1237 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
1238 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
1239 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
4693b031 1240 h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
0da71265 1241 }
e7e09b49
LLL
1242
1243
1244 if(!IS_INTRA4x4(mb_type)){
bd91fee3 1245 if(is_h264){
ef9d1d15 1246 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
1247 if(transform_bypass){
1248 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
1249 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
1250 }else{
1251 for(i=0; i<16; i++){
1252 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 1253 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1254 }
2fd1f0e0
MN
1255 }
1256 }else{
4693b031 1257 h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 1258 }
49c084a7 1259 }else if(h->cbp&15){
2fd1f0e0 1260 if(transform_bypass){
0a8ca22f 1261 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 1262 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 1263 for(i=0; i<16; i+=di){
62bc966f 1264 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 1265 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1266 }
ef9d1d15 1267 }
2fd1f0e0
MN
1268 }else{
1269 if(IS_8x8DCT(mb_type)){
4693b031 1270 h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2fd1f0e0 1271 }else{
4693b031 1272 h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2fd1f0e0
MN
1273 }
1274 }
4704097a 1275 }
e7e09b49
LLL
1276 }else{
1277 for(i=0; i<16; i++){
1278 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 1279 uint8_t * const ptr= dest_y + block_offset[i];
881b5b80 1280 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
e7e09b49 1281 }
4704097a 1282 }
0da71265
MN
1283 }
1284 }
0da71265 1285
49fb20cb 1286 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
1287 uint8_t *dest[2] = {dest_cb, dest_cr};
1288 if(transform_bypass){
96465b90
MN
1289 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
1290 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
1291 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
1292 }else{
c25ac15a 1293 idct_add = s->dsp.add_pixels4;
96465b90
MN
1294 for(i=16; i<16+8; i++){
1295 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1296 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1297 }
1298 }
ef9d1d15 1299 }else{
4691a77d
1300 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1301 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 1302 if(is_h264){
4693b031
MR
1303 idct_add = h->h264dsp.h264_idct_add;
1304 idct_dc_add = h->h264dsp.h264_idct_dc_add;
ac0623b2
MN
1305 for(i=16; i<16+8; i++){
1306 if(h->non_zero_count_cache[ scan8[i] ])
1307 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1308 else if(h->mb[i*16])
1309 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1310 }
aebb5d6d
MN
1311 }else{
1312 for(i=16; i<16+8; i++){
1313 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
1314 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
881b5b80 1315 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2);
aebb5d6d 1316 }
e7e09b49 1317 }
4704097a 1318 }
0da71265
MN
1319 }
1320 }
1321 }
c212fb0c
MN
1322 if(h->cbp || IS_INTRA(mb_type))
1323 s->dsp.clear_blocks(h->mb);
0da71265
MN
1324}
1325
0da71265 1326/**
bd91fee3
AS
1327 * Process a macroblock; this case avoids checks for expensive uncommon cases.
1328 */
1329static void hl_decode_mb_simple(H264Context *h){
1330 hl_decode_mb_internal(h, 1);
1331}
1332
1333/**
1334 * Process a macroblock; this handles edge cases, such as interlacing.
1335 */
1336static void av_noinline hl_decode_mb_complex(H264Context *h){
1337 hl_decode_mb_internal(h, 0);
1338}
1339
903d58f6 1340void ff_h264_hl_decode_mb(H264Context *h){
bd91fee3 1341 MpegEncContext * const s = &h->s;
64514ee8 1342 const int mb_xy= h->mb_xy;
bd91fee3 1343 const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb 1344 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 1345
bd91fee3
AS
1346 if (is_complex)
1347 hl_decode_mb_complex(h);
1348 else hl_decode_mb_simple(h);
1349}
1350
0da71265
MN
1351static int pred_weight_table(H264Context *h){
1352 MpegEncContext * const s = &h->s;
1353 int list, i;
9f2d1b4f 1354 int luma_def, chroma_def;
115329f1 1355
9f2d1b4f
LM
1356 h->use_weight= 0;
1357 h->use_weight_chroma= 0;
0da71265
MN
1358 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
1359 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
1360 luma_def = 1<<h->luma_log2_weight_denom;
1361 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
1362
1363 for(list=0; list<2; list++){
cb99c652
GB
1364 h->luma_weight_flag[list] = 0;
1365 h->chroma_weight_flag[list] = 0;
0da71265
MN
1366 for(i=0; i<h->ref_count[list]; i++){
1367 int luma_weight_flag, chroma_weight_flag;
115329f1 1368
0da71265
MN
1369 luma_weight_flag= get_bits1(&s->gb);
1370 if(luma_weight_flag){
3d9137c8
MN
1371 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
1372 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
1373 if( h->luma_weight[i][list][0] != luma_def
1374 || h->luma_weight[i][list][1] != 0) {
9f2d1b4f 1375 h->use_weight= 1;
cb99c652
GB
1376 h->luma_weight_flag[list]= 1;
1377 }
9f2d1b4f 1378 }else{
3d9137c8
MN
1379 h->luma_weight[i][list][0]= luma_def;
1380 h->luma_weight[i][list][1]= 0;
0da71265
MN
1381 }
1382
0af6967e 1383 if(CHROMA){
fef744d4
MN
1384 chroma_weight_flag= get_bits1(&s->gb);
1385 if(chroma_weight_flag){
1386 int j;
1387 for(j=0; j<2; j++){
3d9137c8
MN
1388 h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
1389 h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
1390 if( h->chroma_weight[i][list][j][0] != chroma_def
1391 || h->chroma_weight[i][list][j][1] != 0) {
fef744d4 1392 h->use_weight_chroma= 1;
cb99c652
GB
1393 h->chroma_weight_flag[list]= 1;
1394 }
fef744d4
MN
1395 }
1396 }else{
1397 int j;
1398 for(j=0; j<2; j++){
3d9137c8
MN
1399 h->chroma_weight[i][list][j][0]= chroma_def;
1400 h->chroma_weight[i][list][j][1]= 0;
fef744d4 1401 }
0da71265
MN
1402 }
1403 }
1404 }
9f5c1037 1405 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 1406 }
9f2d1b4f 1407 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
1408 return 0;
1409}
1410
1052b76f
MN
1411/**
1412 * Initialize implicit_weight table.
1413 * @param field, 0/1 initialize the weight for interlaced MBAFF
1414 * -1 initializes the rest
1415 */
1416static void implicit_weight_table(H264Context *h, int field){
9f2d1b4f 1417 MpegEncContext * const s = &h->s;
1052b76f 1418 int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
9f2d1b4f 1419
ce09f927
GB
1420 for (i = 0; i < 2; i++) {
1421 h->luma_weight_flag[i] = 0;
1422 h->chroma_weight_flag[i] = 0;
1423 }
1424
1052b76f
MN
1425 if(field < 0){
1426 cur_poc = s->current_picture_ptr->poc;
1427 if( h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
9f2d1b4f
LM
1428 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
1429 h->use_weight= 0;
1430 h->use_weight_chroma= 0;
1431 return;
1432 }
1052b76f
MN
1433 ref_start= 0;
1434 ref_count0= h->ref_count[0];
1435 ref_count1= h->ref_count[1];
1436 }else{
1437 cur_poc = s->current_picture_ptr->field_poc[field];
1438 ref_start= 16;
1439 ref_count0= 16+2*h->ref_count[0];
1440 ref_count1= 16+2*h->ref_count[1];
1441 }
9f2d1b4f
LM
1442
1443 h->use_weight= 2;
1444 h->use_weight_chroma= 2;
1445 h->luma_log2_weight_denom= 5;
1446 h->chroma_log2_weight_denom= 5;
1447
1052b76f 1448 for(ref0=ref_start; ref0 < ref_count0; ref0++){
9f2d1b4f 1449 int poc0 = h->ref_list[0][ref0].poc;
1052b76f 1450 for(ref1=ref_start; ref1 < ref_count1; ref1++){
738386a5 1451 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 1452 int td = av_clip(poc1 - poc0, -128, 127);
1052b76f 1453 int w= 32;
9f2d1b4f 1454 if(td){
f66e4f5f 1455 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 1456 int tx = (16384 + (FFABS(td) >> 1)) / td;
72f86ec0
MN
1457 int dist_scale_factor = (tb*tx + 32) >> 8;
1458 if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
1052b76f
MN
1459 w = 64 - dist_scale_factor;
1460 }
1461 if(field<0){
1462 h->implicit_weight[ref0][ref1][0]=
1463 h->implicit_weight[ref0][ref1][1]= w;
1464 }else{
1465 h->implicit_weight[ref0][ref1][field]=w;
72f86ec0 1466 }
9f2d1b4f
LM
1467 }
1468 }
1469}
1470
8fd57a66 1471/**
5175b937 1472 * instantaneous decoder refresh.
0da71265
MN
1473 */
1474static void idr(H264Context *h){
ea6f00c4 1475 ff_h264_remove_all_refs(h);
a149c1a5 1476 h->prev_frame_num= 0;
80f8e035
MN
1477 h->prev_frame_num_offset= 0;
1478 h->prev_poc_msb=
1479 h->prev_poc_lsb= 0;
0da71265
MN
1480}
1481
7c33ad19
LM
1482/* forget old pics after a seek */
1483static void flush_dpb(AVCodecContext *avctx){
1484 H264Context *h= avctx->priv_data;
1485 int i;
64b9d48f 1486 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
1487 if(h->delayed_pic[i])
1488 h->delayed_pic[i]->reference= 0;
7c33ad19 1489 h->delayed_pic[i]= NULL;
285b570f 1490 }
df8a7dff 1491 h->outputed_poc= INT_MIN;
b19d493f 1492 h->prev_interlaced_frame = 1;
7c33ad19 1493 idr(h);
ca159196
MR
1494 if(h->s.current_picture_ptr)
1495 h->s.current_picture_ptr->reference= 0;
12d96de3 1496 h->s.first_field= 0;
9c095463 1497 ff_h264_reset_sei(h);
e240f898 1498 ff_mpeg_flush(avctx);
7c33ad19
LM
1499}
1500
0da71265
MN
1501static int init_poc(H264Context *h){
1502 MpegEncContext * const s = &h->s;
1503 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
1504 int field_poc[2];
357282c6 1505 Picture *cur = s->current_picture_ptr;
0da71265 1506
b78a6baa 1507 h->frame_num_offset= h->prev_frame_num_offset;
5710b371 1508 if(h->frame_num < h->prev_frame_num)
b78a6baa 1509 h->frame_num_offset += max_frame_num;
0da71265
MN
1510
1511 if(h->sps.poc_type==0){
1512 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
1513
1514 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
1515 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
1516 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
1517 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
1518 else
1519 h->poc_msb = h->prev_poc_msb;
1520//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
115329f1 1521 field_poc[0] =
0da71265 1522 field_poc[1] = h->poc_msb + h->poc_lsb;
115329f1 1523 if(s->picture_structure == PICT_FRAME)
0da71265
MN
1524 field_poc[1] += h->delta_poc_bottom;
1525 }else if(h->sps.poc_type==1){
1526 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
1527 int i;
1528
1529 if(h->sps.poc_cycle_length != 0)
1530 abs_frame_num = h->frame_num_offset + h->frame_num;
1531 else
1532 abs_frame_num = 0;
1533
1534 if(h->nal_ref_idc==0 && abs_frame_num > 0)
1535 abs_frame_num--;
115329f1 1536
0da71265
MN
1537 expected_delta_per_poc_cycle = 0;
1538 for(i=0; i < h->sps.poc_cycle_length; i++)
1539 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
1540
1541 if(abs_frame_num > 0){
1542 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
1543 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
1544
1545 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
1546 for(i = 0; i <= frame_num_in_poc_cycle; i++)
1547 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
1548 } else
1549 expectedpoc = 0;
1550
115329f1 1551 if(h->nal_ref_idc == 0)
0da71265 1552 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
115329f1 1553
0da71265
MN
1554 field_poc[0] = expectedpoc + h->delta_poc[0];
1555 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
1556
1557 if(s->picture_structure == PICT_FRAME)
1558 field_poc[1] += h->delta_poc[1];
1559 }else{
b78a6baa 1560 int poc= 2*(h->frame_num_offset + h->frame_num);
5710b371 1561
b78a6baa
MN
1562 if(!h->nal_ref_idc)
1563 poc--;
5710b371 1564
0da71265
MN
1565 field_poc[0]= poc;
1566 field_poc[1]= poc;
1567 }
115329f1 1568
357282c6 1569 if(s->picture_structure != PICT_BOTTOM_FIELD)
0da71265 1570 s->current_picture_ptr->field_poc[0]= field_poc[0];
357282c6 1571 if(s->picture_structure != PICT_TOP_FIELD)
0da71265 1572 s->current_picture_ptr->field_poc[1]= field_poc[1];
357282c6 1573 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
0da71265
MN
1574
1575 return 0;
1576}
1577
b41c1db3
1578
1579/**
1580 * initialize scan tables
1581 */
1582static void init_scan_tables(H264Context *h){
b41c1db3 1583 int i;
4693b031 1584 if(h->h264dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
b41c1db3
1585 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
1586 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
1587 }else{
1588 for(i=0; i<16; i++){
1589#define T(x) (x>>2) | ((x<<2) & 0xF)
1590 h->zigzag_scan[i] = T(zigzag_scan[i]);
1591 h-> field_scan[i] = T( field_scan[i]);
1592#undef T
1593 }
1594 }
4693b031 1595 if(h->h264dsp.h264_idct8_add == ff_h264_idct8_add_c){
45beb850 1596 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
b41c1db3
1597 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
1598 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
1599 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
1600 }else{
1601 for(i=0; i<64; i++){
1602#define T(x) (x>>3) | ((x&7)<<3)
45beb850 1603 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
b41c1db3
1604 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
1605 h->field_scan8x8[i] = T(field_scan8x8[i]);
1606 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
1607#undef T
1608 }
1609 }
1610 if(h->sps.transform_bypass){ //FIXME same ugly
1611 h->zigzag_scan_q0 = zigzag_scan;
45beb850 1612 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
b41c1db3
1613 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
1614 h->field_scan_q0 = field_scan;
1615 h->field_scan8x8_q0 = field_scan8x8;
1616 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
1617 }else{
1618 h->zigzag_scan_q0 = h->zigzag_scan;
1619 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
1620 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
1621 h->field_scan_q0 = h->field_scan;
1622 h->field_scan8x8_q0 = h->field_scan8x8;
1623 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
1624 }
1625}
afebe2f7 1626
256299d3
MN
1627static void field_end(H264Context *h){
1628 MpegEncContext * const s = &h->s;
1629 AVCodecContext * const avctx= s->avctx;
1630 s->mb_y= 0;
1631
1632 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
1633 s->current_picture_ptr->pict_type= s->pict_type;
1634
1635 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1636 ff_vdpau_h264_set_reference_frames(s);
1637
1638 if(!s->dropable) {
ea6f00c4 1639 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
256299d3
MN
1640 h->prev_poc_msb= h->poc_msb;
1641 h->prev_poc_lsb= h->poc_lsb;
1642 }
1643 h->prev_frame_num_offset= h->frame_num_offset;
1644 h->prev_frame_num= h->frame_num;
1645
1646 if (avctx->hwaccel) {
1647 if (avctx->hwaccel->end_frame(avctx) < 0)
1648 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
1649 }
1650
1651 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1652 ff_vdpau_h264_picture_complete(s);
1653
1654 /*
1655 * FIXME: Error handling code does not seem to support interlaced
1656 * when slices span multiple rows
1657 * The ff_er_add_slice calls don't work right for bottom
1658 * fields; they cause massive erroneous error concealing
1659 * Error marking covers both fields (top and bottom).
1660 * This causes a mismatched s->error_count
1661 * and a bad error table. Further, the error count goes to
1662 * INT_MAX when called for bottom field, because mb_y is
1663 * past end by one (callers fault) and resync_mb_y != 0
1664 * causes problems for the first MB line, too.
1665 */
1666 if (!FIELD_PICTURE)
1667 ff_er_frame_end(s);
1668
1669 MPV_frame_end(s);
d225a1e2
MN
1670
1671 h->current_slice=0;
256299d3
MN
1672}
1673
afebe2f7
1674/**
1675 * Replicates H264 "master" context to thread contexts.
1676 */
1677static void clone_slice(H264Context *dst, H264Context *src)
1678{
1679 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
1680 dst->s.current_picture_ptr = src->s.current_picture_ptr;
1681 dst->s.current_picture = src->s.current_picture;
1682 dst->s.linesize = src->s.linesize;
1683 dst->s.uvlinesize = src->s.uvlinesize;
12d96de3 1684 dst->s.first_field = src->s.first_field;
afebe2f7
1685
1686 dst->prev_poc_msb = src->prev_poc_msb;
1687 dst->prev_poc_lsb = src->prev_poc_lsb;
1688 dst->prev_frame_num_offset = src->prev_frame_num_offset;
1689 dst->prev_frame_num = src->prev_frame_num;
1690 dst->short_ref_count = src->short_ref_count;
1691
1692 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
1693 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
1694 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
1695 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
50c21814
1696
1697 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
1698 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
afebe2f7
1699}
1700
0da71265
MN
1701/**
1702 * decodes a slice header.
9c852bcf 1703 * This will also call MPV_common_init() and frame_start() as needed.
afebe2f7
1704 *
1705 * @param h h264context
1706 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
1707 *
d9526386 1708 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
0da71265 1709 */
afebe2f7 1710static int decode_slice_header(H264Context *h, H264Context *h0){
0da71265 1711 MpegEncContext * const s = &h->s;
12d96de3 1712 MpegEncContext * const s0 = &h0->s;
88e7a4d1 1713 unsigned int first_mb_in_slice;
ac658be5 1714 unsigned int pps_id;
0da71265 1715 int num_ref_idx_active_override_flag;
41f5c62f 1716 unsigned int slice_type, tmp, i, j;
0bf79634 1717 int default_ref_list_done = 0;
12d96de3 1718 int last_pic_structure;
0da71265 1719
2f944356 1720 s->dropable= h->nal_ref_idc == 0;
0da71265 1721
cf653d08
JD
1722 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
1723 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
1724 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
1725 }else{
1726 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
1727 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
1728 }
1729
0da71265
MN
1730 first_mb_in_slice= get_ue_golomb(&s->gb);
1731
d225a1e2
MN
1732 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
1733 if(h0->current_slice && FIELD_PICTURE){
1734 field_end(h);
1735 }
1736
afebe2f7 1737 h0->current_slice = 0;
12d96de3 1738 if (!s0->first_field)
f6e3c460 1739 s->current_picture_ptr= NULL;
66a4b2c1
MN
1740 }
1741
9963b332 1742 slice_type= get_ue_golomb_31(&s->gb);
0bf79634 1743 if(slice_type > 9){
9b879566 1744 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
5175b937 1745 return -1;
0da71265 1746 }
0bf79634
LLL
1747 if(slice_type > 4){
1748 slice_type -= 5;
0da71265
MN
1749 h->slice_type_fixed=1;
1750 }else
1751 h->slice_type_fixed=0;
115329f1 1752
ee2a957f 1753 slice_type= golomb_to_pict_type[ slice_type ];
9701840b 1754 if (slice_type == FF_I_TYPE
afebe2f7 1755 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
0bf79634
LLL
1756 default_ref_list_done = 1;
1757 }
1758 h->slice_type= slice_type;
e3e6f18f 1759 h->slice_type_nos= slice_type & 3;
0bf79634 1760
1412060e 1761 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
115329f1 1762
0da71265 1763 pps_id= get_ue_golomb(&s->gb);
ac658be5 1764 if(pps_id>=MAX_PPS_COUNT){
9b879566 1765 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
0da71265
MN
1766 return -1;
1767 }
afebe2f7 1768 if(!h0->pps_buffers[pps_id]) {
a0f80050 1769 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
8b92b792
MN
1770 return -1;
1771 }
afebe2f7 1772 h->pps= *h0->pps_buffers[pps_id];
8b92b792 1773
afebe2f7 1774 if(!h0->sps_buffers[h->pps.sps_id]) {
a0f80050 1775 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
8b92b792
MN
1776 return -1;
1777 }
afebe2f7 1778 h->sps = *h0->sps_buffers[h->pps.sps_id];
239ea04c 1779
50c21814 1780 if(h == h0 && h->dequant_coeff_pps != pps_id){
50eaa857 1781 h->dequant_coeff_pps = pps_id;
239ea04c
LM
1782 init_dequant_tables(h);
1783 }
115329f1 1784
0da71265 1785 s->mb_width= h->sps.mb_width;
6867a90b 1786 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
115329f1 1787
bf4665ee 1788 h->b_stride= s->mb_width*4;
0da71265 1789
faf3dfb9 1790 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
0da71265 1791 if(h->sps.frame_mbs_only_flag)
faf3dfb9 1792 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
0da71265 1793 else
faf3dfb9 1794 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
115329f1
DB
1795
1796 if (s->context_initialized
5388f0b4
JK
1797 && ( s->width != s->avctx->width || s->height != s->avctx->height
1798 || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
afebe2f7
1799 if(h != h0)
1800 return -1; // width / height changed during parallelized decoding
0da71265 1801 free_tables(h);
ff7f75e1 1802 flush_dpb(s->avctx);
0da71265
MN
1803 MPV_common_end(s);
1804 }
1805 if (!s->context_initialized) {
afebe2f7
1806 if(h != h0)
1807 return -1; // we cant (re-)initialize context during parallel decoding
f3bdc3da
RD
1808
1809 avcodec_set_dimensions(s->avctx, s->width, s->height);
1810 s->avctx->sample_aspect_ratio= h->sps.sar;
1811 if(!s->avctx->sample_aspect_ratio.den)
1812 s->avctx->sample_aspect_ratio.den = 1;
1813
c4dffe7e
DC
1814 if(h->sps.video_signal_type_present_flag){
1815 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
1816 if(h->sps.colour_description_present_flag){
1817 s->avctx->color_primaries = h->sps.color_primaries;
1818 s->avctx->color_trc = h->sps.color_trc;
1819 s->avctx->colorspace = h->sps.colorspace;
1820 }
1821 }
1822
f3bdc3da 1823 if(h->sps.timing_info_present_flag){
3102d180 1824 int64_t den= h->sps.time_scale;
055a6aa7 1825 if(h->x264_build < 44U)
3102d180 1826 den *= 2;
f3bdc3da 1827 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3102d180 1828 h->sps.num_units_in_tick, den, 1<<30);
f3bdc3da
RD
1829 }
1830 s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts);
1831 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
1832
0da71265
MN
1833 if (MPV_common_init(s) < 0)
1834 return -1;
12d96de3 1835 s->first_field = 0;
b19d493f 1836 h->prev_interlaced_frame = 1;
115329f1 1837
b41c1db3 1838 init_scan_tables(h);
903d58f6 1839 ff_h264_alloc_tables(h);
0da71265 1840
afebe2f7
1841 for(i = 1; i < s->avctx->thread_count; i++) {
1842 H264Context *c;
1843 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
79db7ac6 1844 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
afebe2f7 1845 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
7a5c850b 1846 c->h264dsp = h->h264dsp;
afebe2f7
1847 c->sps = h->sps;
1848 c->pps = h->pps;
1849 init_scan_tables(c);
145061a1 1850 clone_tables(c, h, i);
afebe2f7
1851 }
1852
1853 for(i = 0; i < s->avctx->thread_count; i++)
1854 if(context_init(h->thread_context[i]) < 0)
1855 return -1;
0da71265
MN
1856 }
1857
0da71265
MN
1858 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
1859
5d18eaad 1860 h->mb_mbaff = 0;
6ba71fc4 1861 h->mb_aff_frame = 0;
12d96de3 1862 last_pic_structure = s0->picture_structure;
0da71265
MN
1863 if(h->sps.frame_mbs_only_flag){
1864 s->picture_structure= PICT_FRAME;
1865 }else{
6ba71fc4 1866 if(get_bits1(&s->gb)) { //field_pic_flag
0da71265 1867 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
6ba71fc4 1868 } else {
0da71265 1869 s->picture_structure= PICT_FRAME;
6ba71fc4 1870 h->mb_aff_frame = h->sps.mb_aff;
6867a90b 1871 }
0da71265 1872 }
44e9dcf1 1873 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
2ddcf84b
JD
1874
1875 if(h0->current_slice == 0){
26b86e47
MN
1876 while(h->frame_num != h->prev_frame_num &&
1877 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
1878 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
903d58f6 1879 if (ff_h264_frame_start(h) < 0)
66e6038c 1880 return -1;
26b86e47
MN
1881 h->prev_frame_num++;
1882 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
1883 s->current_picture_ptr->frame_num= h->prev_frame_num;
ea6f00c4 1884 ff_h264_execute_ref_pic_marking(h, NULL, 0);
26b86e47
MN
1885 }
1886
12d96de3
JD
1887 /* See if we have a decoded first field looking for a pair... */
1888 if (s0->first_field) {
1889 assert(s0->current_picture_ptr);
1890 assert(s0->current_picture_ptr->data[0]);
1891 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
1892
1893 /* figure out if we have a complementary field pair */
1894 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
1895 /*
1896 * Previous field is unmatched. Don't display it, but let it
1897 * remain for reference if marked as such.
1898 */
1899 s0->current_picture_ptr = NULL;
1900 s0->first_field = FIELD_PICTURE;
1901
1902 } else {
1903 if (h->nal_ref_idc &&
1904 s0->current_picture_ptr->reference &&
1905 s0->current_picture_ptr->frame_num != h->frame_num) {
1906 /*
1907 * This and previous field were reference, but had
1908 * different frame_nums. Consider this field first in
1909 * pair. Throw away previous field except for reference
1910 * purposes.
1911 */
1912 s0->first_field = 1;
1913 s0->current_picture_ptr = NULL;
1914
1915 } else {
1916 /* Second field in complementary pair */
1917 s0->first_field = 0;
1918 }
1919 }
1920
1921 } else {
1922 /* Frame or first field in a potentially complementary pair */
1923 assert(!s0->current_picture_ptr);
1924 s0->first_field = FIELD_PICTURE;
1925 }
1926
903d58f6 1927 if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) {
12d96de3 1928 s0->first_field = 0;
2ddcf84b 1929 return -1;
12d96de3 1930 }
2ddcf84b
JD
1931 }
1932 if(h != h0)
1933 clone_slice(h, h0);
1934
1935 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
1936
88e7a4d1 1937 assert(s->mb_num == s->mb_width * s->mb_height);
f3e53d9f 1938 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
88e7a4d1
MN
1939 first_mb_in_slice >= s->mb_num){
1940 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
6b53b87e
MN
1941 return -1;
1942 }
88e7a4d1 1943 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
f3e53d9f
JD
1944 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
1945 if (s->picture_structure == PICT_BOTTOM_FIELD)
1946 s->resync_mb_y = s->mb_y = s->mb_y + 1;
88e7a4d1 1947 assert(s->mb_y < s->mb_height);
115329f1 1948
0da71265
MN
1949 if(s->picture_structure==PICT_FRAME){
1950 h->curr_pic_num= h->frame_num;
1951 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
1952 }else{
f57e2af6 1953 h->curr_pic_num= 2*h->frame_num + 1;
0da71265
MN
1954 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
1955 }
115329f1 1956
0da71265 1957 if(h->nal_unit_type == NAL_IDR_SLICE){
1df1df0b 1958 get_ue_golomb(&s->gb); /* idr_pic_id */
0da71265 1959 }
115329f1 1960
0da71265
MN
1961 if(h->sps.poc_type==0){
1962 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
115329f1 1963
0da71265
MN
1964 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
1965 h->delta_poc_bottom= get_se_golomb(&s->gb);
1966 }
1967 }
115329f1 1968
0da71265
MN
1969 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
1970 h->delta_poc[0]= get_se_golomb(&s->gb);
115329f1 1971
0da71265
MN
1972 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
1973 h->delta_poc[1]= get_se_golomb(&s->gb);
1974 }
115329f1 1975
0da71265 1976 init_poc(h);
115329f1 1977
0da71265
MN
1978 if(h->pps.redundant_pic_cnt_present){
1979 h->redundant_pic_count= get_ue_golomb(&s->gb);
1980 }
1981
1412060e 1982 //set defaults, might be overridden a few lines later
0da71265
MN
1983 h->ref_count[0]= h->pps.ref_count[0];
1984 h->ref_count[1]= h->pps.ref_count[1];
1985
e3e6f18f 1986 if(h->slice_type_nos != FF_I_TYPE){
9f5c1037 1987 if(h->slice_type_nos == FF_B_TYPE){
0da71265
MN
1988 h->direct_spatial_mv_pred= get_bits1(&s->gb);
1989 }
1990 num_ref_idx_active_override_flag= get_bits1(&s->gb);
115329f1 1991
0da71265
MN
1992 if(num_ref_idx_active_override_flag){
1993 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
9f5c1037 1994 if(h->slice_type_nos==FF_B_TYPE)
0da71265
MN
1995 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
1996
187696fa 1997 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
9b879566 1998 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
88e7a4d1 1999 h->ref_count[0]= h->ref_count[1]= 1;
0da71265
MN
2000 return -1;
2001 }
2002 }
9f5c1037 2003 if(h->slice_type_nos == FF_B_TYPE)
187696fa
MN
2004 h->list_count= 2;
2005 else
2006 h->list_count= 1;
2007 }else
2008 h->list_count= 0;
0da71265 2009
0bf79634 2010 if(!default_ref_list_done){
ea6f00c4 2011 ff_h264_fill_default_ref_list(h);
0da71265
MN
2012 }
2013
ea6f00c4 2014 if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0)
806bb93f 2015 return -1;
0da71265 2016
07dff5c7
MN
2017 if(h->slice_type_nos!=FF_I_TYPE){
2018 s->last_picture_ptr= &h->ref_list[0][0];
8d2fc163 2019 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
07dff5c7
MN
2020 }
2021 if(h->slice_type_nos==FF_B_TYPE){
2022 s->next_picture_ptr= &h->ref_list[1][0];
8d2fc163 2023 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
07dff5c7
MN
2024 }
2025
932f396f 2026 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
9f5c1037 2027 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
0da71265 2028 pred_weight_table(h);
1a29c6a0 2029 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
1052b76f 2030 implicit_weight_table(h, -1);
1a29c6a0 2031 }else {
9f2d1b4f 2032 h->use_weight = 0;
cb99c652
GB
2033 for (i = 0; i < 2; i++) {
2034 h->luma_weight_flag[i] = 0;
2035 h->chroma_weight_flag[i] = 0;
2036 }
2037 }
115329f1 2038
2ddcf84b 2039 if(h->nal_ref_idc)
ea6f00c4 2040 ff_h264_decode_ref_pic_marking(h0, &s->gb);
0da71265 2041
1052b76f 2042 if(FRAME_MBAFF){
ea6f00c4 2043 ff_h264_fill_mbaff_ref_list(h);
5d18eaad 2044
1052b76f
MN
2045 if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
2046 implicit_weight_table(h, 0);
2047 implicit_weight_table(h, 1);
2048 }
2049 }
2050
8f56e219 2051 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
943f69a6
MN
2052 ff_h264_direct_dist_scale_factor(h);
2053 ff_h264_direct_ref_list_init(h);
8f56e219 2054
e3e6f18f 2055 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
9963b332 2056 tmp = get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2057 if(tmp > 2){
2058 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
2059 return -1;
2060 }
2061 h->cabac_init_idc= tmp;
2062 }
e5017ab8
LA
2063
2064 h->last_qscale_diff = 0;
88e7a4d1
MN
2065 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
2066 if(tmp>51){
2067 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3ebc7e04
MN
2068 return -1;
2069 }
88e7a4d1 2070 s->qscale= tmp;
4691a77d
2071 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2072 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
0da71265 2073 //FIXME qscale / qp ... stuff
9701840b 2074 if(h->slice_type == FF_SP_TYPE){
1df1df0b 2075 get_bits1(&s->gb); /* sp_for_switch_flag */
0da71265 2076 }
9701840b 2077 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
1df1df0b 2078 get_se_golomb(&s->gb); /* slice_qs_delta */
0da71265
MN
2079 }
2080
53c05b1e 2081 h->deblocking_filter = 1;
0c32e19d
MN
2082 h->slice_alpha_c0_offset = 52;
2083 h->slice_beta_offset = 52;
0da71265 2084 if( h->pps.deblocking_filter_parameters_present ) {
9963b332 2085 tmp= get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2086 if(tmp > 2){
2087 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
2088 return -1;
2089 }
2090 h->deblocking_filter= tmp;
115329f1 2091 if(h->deblocking_filter < 2)
53c05b1e
MN
2092 h->deblocking_filter^= 1; // 1<->0
2093
2094 if( h->deblocking_filter ) {
0c32e19d
MN
2095 h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
2096 h->slice_beta_offset += get_se_golomb(&s->gb) << 1;
2097 if( h->slice_alpha_c0_offset > 104U
2098 || h->slice_beta_offset > 104U){
2099 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
2100 return -1;
2101 }
0da71265 2102 }
980a82b7 2103 }
afebe2f7 2104
61858a76 2105 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4b30289e 2106 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
9f5c1037 2107 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
61858a76
RD
2108 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
2109 h->deblocking_filter= 0;
2110
afebe2f7 2111 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
ec970c21
2112 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
2113 /* Cheat slightly for speed:
5d81d641 2114 Do not bother to deblock across slices. */
ec970c21
2115 h->deblocking_filter = 2;
2116 } else {
7ae94d52
2117 h0->max_contexts = 1;
2118 if(!h0->single_decode_warning) {
2119 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
2120 h0->single_decode_warning = 1;
2121 }
2122 if(h != h0)
2123 return 1; // deblocking switched inside frame
ec970c21 2124 }
afebe2f7 2125 }
0c32e19d 2126 h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
afebe2f7 2127
0da71265
MN
2128#if 0 //FMO
2129 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
2130 slice_group_change_cycle= get_bits(&s->gb, ?);
2131#endif
2132
afebe2f7
2133 h0->last_slice_type = slice_type;
2134 h->slice_num = ++h0->current_slice;
b735aeea
MN
2135 if(h->slice_num >= MAX_SLICES){
2136 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
2137 }
5175b937 2138
c32867b5 2139 for(j=0; j<2; j++){
6d7e6b26 2140 int id_list[16];
b735aeea 2141 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
6d7e6b26
MN
2142 for(i=0; i<16; i++){
2143 id_list[i]= 60;
2144 if(h->ref_list[j][i].data[0]){
2145 int k;
2146 uint8_t *base= h->ref_list[j][i].base[0];
2147 for(k=0; k<h->short_ref_count; k++)
2148 if(h->short_ref[k]->base[0] == base){
2149 id_list[i]= k;
2150 break;
2151 }
2152 for(k=0; k<h->long_ref_count; k++)
2153 if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
2154 id_list[i]= h->short_ref_count + k;
2155 break;
2156 }
2157 }
2158 }
2159
c32867b5
MN
2160 ref2frm[0]=
2161 ref2frm[1]= -1;
d50cdd82 2162 for(i=0; i<16; i++)
6d7e6b26 2163 ref2frm[i+2]= 4*id_list[i]
c32867b5 2164 +(h->ref_list[j][i].reference&3);
d50cdd82
MN
2165 ref2frm[18+0]=
2166 ref2frm[18+1]= -1;
2167 for(i=16; i<48; i++)
6d7e6b26 2168 ref2frm[i+4]= 4*id_list[(i-16)>>1]
d50cdd82 2169 +(h->ref_list[j][i].reference&3);
c32867b5
MN
2170 }
2171
5d18eaad 2172 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
8a11a969 2173 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
5d18eaad 2174
802e9146
MN
2175 s->avctx->refs= h->sps.ref_frame_count;
2176
0da71265 2177 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
49573a87 2178 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
6867a90b
LLL
2179 h->slice_num,
2180 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
115329f1 2181 first_mb_in_slice,
49573a87 2182 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
0da71265
MN
2183 pps_id, h->frame_num,
2184 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
2185 h->ref_count[0], h->ref_count[1],
2186 s->qscale,
0c32e19d 2187 h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
9f2d1b4f 2188 h->use_weight,
4806b922
MN
2189 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
2190 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
0da71265
MN
2191 );
2192 }
2193
2194 return 0;
2195}
2196
0dc343d4 2197int ff_h264_get_slice_type(const H264Context *h)
75dd6938
LA
2198{
2199 switch (h->slice_type) {
2200 case FF_P_TYPE: return 0;
2201 case FF_B_TYPE: return 1;
2202 case FF_I_TYPE: return 2;
2203 case FF_SP_TYPE: return 3;
2204 case FF_SI_TYPE: return 4;
2205 default: return -1;
2206 }
2207}
2208
d02bb3ec
DB
2209/**
2210 *
2211 * @return non zero if the loop filter can be skiped
2212 */
2213static int fill_filter_caches(H264Context *h, int mb_type){
2214 MpegEncContext * const s = &h->s;
2215 const int mb_xy= h->mb_xy;
2216 int top_xy, left_xy[2];
2217 int top_type, left_type[2];
2218
2219 top_xy = mb_xy - (s->mb_stride << MB_FIELD);
2220
2221 //FIXME deblocking could skip the intra and nnz parts.
2222
2223 /* Wow, what a mess, why didn't they simplify the interlacing & intra
2224 * stuff, I can't imagine that these complex rules are worth it. */
2225
2226 left_xy[1] = left_xy[0] = mb_xy-1;
2227 if(FRAME_MBAFF){
2228 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
2229 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
2230 if(s->mb_y&1){
2231 if (left_mb_field_flag != curr_mb_field_flag) {
2232 left_xy[0] -= s->mb_stride;
2233 }
2234 }else{
2235 if(curr_mb_field_flag){
2236 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
2237 }
2238 if (left_mb_field_flag != curr_mb_field_flag) {
2239 left_xy[1] += s->mb_stride;
2240 }
2241 }
2242 }
2243
2244 h->top_mb_xy = top_xy;
2245 h->left_mb_xy[0] = left_xy[0];
2246 h->left_mb_xy[1] = left_xy[1];
2247 {
2248 //for sufficiently low qp, filtering wouldn't do anything
2249 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
2250 int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
2251 int qp = s->current_picture.qscale_table[mb_xy];
2252 if(qp <= qp_thresh
2253 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
2254 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
2255 if(!FRAME_MBAFF)
2256 return 1;
2257 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh)
2258 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
2259 return 1;
2260 }
2261 }
2262
2263 top_type = s->current_picture.mb_type[top_xy] ;
2264 left_type[0] = s->current_picture.mb_type[left_xy[0]];
2265 left_type[1] = s->current_picture.mb_type[left_xy[1]];
2266 if(h->deblocking_filter == 2){
2267 if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
2268 if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
2269 }else{
2270 if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
2271 if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
2272 }
2273 h->top_type = top_type ;
2274 h->left_type[0]= left_type[0];
2275 h->left_type[1]= left_type[1];
2276
2277 if(IS_INTRA(mb_type))
2278 return 0;
2279
2280 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
2281 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
2282 AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
2283 AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
2284 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
2285
2286 h->cbp= h->cbp_table[mb_xy];
2287
2288 {
2289 int list;
2290 for(list=0; list<h->list_count; list++){
2291 int8_t *ref;
2292 int y, b_stride;
2293 int16_t (*mv_dst)[2];
2294 int16_t (*mv_src)[2];
2295
2296 if(!USES_LIST(mb_type, list)){
2297 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
2298 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2299 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2300 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2301 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2302 continue;
2303 }
2304
2305 ref = &s->current_picture.ref_index[list][4*mb_xy];
2306 {
2307 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
2308 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2309 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2310 ref += 2;
2311 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2312 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2313 }
2314
2315 b_stride = h->b_stride;
2316 mv_dst = &h->mv_cache[list][scan8[0]];
2317 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
2318 for(y=0; y<4; y++){
2319 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
2320 }
2321
2322 }
2323 }
2324
2325
2326/*
23270 . T T. T T T T
23281 L . .L . . . .
23292 L . .L . . . .
23303 . T TL . . . .
23314 L . .L . . . .
23325 L . .. . . . .
2333*/
2334//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
2335 if(top_type){
2336 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
2337 }
2338
2339 if(left_type[0]){
2340 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
2341 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
2342 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
2343 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
2344 }
2345
2346 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
2347 if(!CABAC && h->pps.transform_8x8_mode){
2348 if(IS_8x8DCT(top_type)){
2349 h->non_zero_count_cache[4+8*0]=
2350 h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
2351 h->non_zero_count_cache[6+8*0]=
2352 h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
2353 }
2354 if(IS_8x8DCT(left_type[0])){
2355 h->non_zero_count_cache[3+8*1]=
2356 h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
2357 }
2358 if(IS_8x8DCT(left_type[1])){
2359 h->non_zero_count_cache[3+8*3]=
2360 h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
2361 }
2362
2363 if(IS_8x8DCT(mb_type)){
2364 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
2365 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
2366
2367 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
2368 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
2369
2370 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
2371 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
2372
2373 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
2374 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
2375 }
2376 }
2377
2378 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
2379 int list;
2380 for(list=0; list<h->list_count; list++){
2381 if(USES_LIST(top_type, list)){
2382 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
2383 const int b8_xy= 4*top_xy + 2;
2384 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
2385 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
2386 h->ref_cache[list][scan8[0] + 0 - 1*8]=
2387 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
2388 h->ref_cache[list][scan8[0] + 2 - 1*8]=
2389 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
2390 }else{
2391 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
2392 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2393 }
2394
2395 if(!IS_INTERLACED(mb_type^left_type[0])){
2396 if(USES_LIST(left_type[0], list)){
2397 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
2398 const int b8_xy= 4*left_xy[0] + 1;
2399 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
2400 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
2401 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
2402 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
2403 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
2404 h->ref_cache[list][scan8[0] - 1 + 0 ]=
2405 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
2406 h->ref_cache[list][scan8[0] - 1 +16 ]=
2407 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
2408 }else{
2409 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
2410 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
2411 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
2412 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
2413 h->ref_cache[list][scan8[0] - 1 + 0 ]=
2414 h->ref_cache[list][scan8[0] - 1 + 8 ]=
2415 h->ref_cache[list][scan8[0] - 1 + 16 ]=
2416 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
2417 }
2418 }
2419 }
2420 }
2421
2422 return 0;
2423}
2424
c988f975
MN
2425static void loop_filter(H264Context *h){
2426 MpegEncContext * const s = &h->s;
2427 uint8_t *dest_y, *dest_cb, *dest_cr;
2428 int linesize, uvlinesize, mb_x, mb_y;
2429 const int end_mb_y= s->mb_y + FRAME_MBAFF;
2430 const int old_slice_type= h->slice_type;
2431
2432 if(h->deblocking_filter) {
2433 for(mb_x= 0; mb_x<s->mb_width; mb_x++){
2434 for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
78998bf2 2435 int mb_xy, mb_type;
c988f975
MN
2436 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
2437 h->slice_num= h->slice_table[mb_xy];
2438 mb_type= s->current_picture.mb_type[mb_xy];
2439 h->list_count= h->list_counts[mb_xy];
c988f975
MN
2440
2441 if(FRAME_MBAFF)
2442 h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
2443
c988f975
MN
2444 s->mb_x= mb_x;
2445 s->mb_y= mb_y;
2446 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2447 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2448 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2449 //FIXME simplify above
2450
2451 if (MB_FIELD) {
2452 linesize = h->mb_linesize = s->linesize * 2;
2453 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2454 if(mb_y&1){ //FIXME move out of this function?
2455 dest_y -= s->linesize*15;
2456 dest_cb-= s->uvlinesize*7;
2457 dest_cr-= s->uvlinesize*7;
2458 }
2459 } else {
2460 linesize = h->mb_linesize = s->linesize;
2461 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2462 }
77d40dce 2463 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
aaa995d7 2464 if(fill_filter_caches(h, mb_type))
44a5e7b6 2465 continue;
c988f975
MN
2466 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2467 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2468
77d40dce 2469 if (FRAME_MBAFF) {
c988f975
MN
2470 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2471 } else {
2472 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2473 }
2474 }
2475 }
2476 }
2477 h->slice_type= old_slice_type;
2478 s->mb_x= 0;
2479 s->mb_y= end_mb_y - FRAME_MBAFF;
f4b8b825
MN
2480 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2481 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
c988f975
MN
2482}
2483
69a28f3e
MN
2484static void predict_field_decoding_flag(H264Context *h){
2485 MpegEncContext * const s = &h->s;
2486 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2487 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
2488 ? s->current_picture.mb_type[mb_xy-1]
2489 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
2490 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
2491 : 0;
2492 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
2493}
2494
3a84713a
RS
2495static int decode_slice(struct AVCodecContext *avctx, void *arg){
2496 H264Context *h = *(void**)arg;
0da71265
MN
2497 MpegEncContext * const s = &h->s;
2498 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
2499
2500 s->mb_skip_run= -1;
0da71265 2501
89db0bae 2502 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
5317c95b 2503 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
89db0bae 2504
e5017ab8 2505 if( h->pps.cabac ) {
e5017ab8
LA
2506 /* realign */
2507 align_get_bits( &s->gb );
2508
2509 /* init cabac */
d61c4e73 2510 ff_init_cabac_states( &h->cabac);
e5017ab8
LA
2511 ff_init_cabac_decoder( &h->cabac,
2512 s->gb.buffer + get_bits_count(&s->gb)/8,
6e44ba15 2513 (get_bits_left(&s->gb) + 7)/8);
cc51b282
MN
2514
2515 ff_h264_init_cabac_states(h);
95c26348 2516
e5017ab8 2517 for(;;){
851ded89 2518//START_TIMER
cc51b282 2519 int ret = ff_h264_decode_mb_cabac(h);
6867a90b 2520 int eos;
851ded89 2521//STOP_TIMER("decode_mb_cabac")
0da71265 2522
903d58f6 2523 if(ret>=0) ff_h264_hl_decode_mb(h);
0da71265 2524
5d18eaad 2525 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8
LA
2526 s->mb_y++;
2527
cc51b282 2528 ret = ff_h264_decode_mb_cabac(h);
e5017ab8 2529
903d58f6 2530 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2531 s->mb_y--;
2532 }
6867a90b 2533 eos = get_cabac_terminate( &h->cabac );
e5017ab8 2534
3566042a
MN
2535 if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
2536 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2537 return 0;
2538 }
5659b509 2539 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
706da4af 2540 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
e5017ab8
LA
2541 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2542 return -1;
2543 }
2544
2545 if( ++s->mb_x >= s->mb_width ) {
2546 s->mb_x = 0;
c988f975 2547 loop_filter(h);
e5017ab8 2548 ff_draw_horiz_band(s, 16*s->mb_y, 16);
5175b937 2549 ++s->mb_y;
f3e53d9f 2550 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b 2551 ++s->mb_y;
69cc3183
MN
2552 if(FRAME_MBAFF && s->mb_y < s->mb_height)
2553 predict_field_decoding_flag(h);
6867a90b 2554 }
0da71265 2555 }
0da71265 2556
e5017ab8 2557 if( eos || s->mb_y >= s->mb_height ) {
a9c9a240 2558 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8 2559 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
0da71265 2560 return 0;
e5017ab8 2561 }
e5017ab8
LA
2562 }
2563
2564 } else {
2565 for(;;){
e1e94902 2566 int ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2567
903d58f6 2568 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8 2569
5d18eaad 2570 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8 2571 s->mb_y++;
e1e94902 2572 ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2573
903d58f6 2574 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2575 s->mb_y--;
2576 }
2577
2578 if(ret<0){
2579 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2580 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2581
2582 return -1;
2583 }
e5017ab8
LA
2584
2585 if(++s->mb_x >= s->mb_width){
2586 s->mb_x=0;
c988f975 2587 loop_filter(h);
e5017ab8 2588 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6867a90b 2589 ++s->mb_y;
f3e53d9f 2590 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b 2591 ++s->mb_y;
69cc3183
MN
2592 if(FRAME_MBAFF && s->mb_y < s->mb_height)
2593 predict_field_decoding_flag(h);
6867a90b
LLL
2594 }
2595 if(s->mb_y >= s->mb_height){
a9c9a240 2596 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2597
2598 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
2599 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2600
2601 return 0;
2602 }else{
2603 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2604
2605 return -1;
2606 }
2607 }
2608 }
2609
2610 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
a9c9a240 2611 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2612 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
2613 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2614
2615 return 0;
2616 }else{
2617 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2618
2619 return -1;
2620 }
2621 }
0da71265
MN
2622 }
2623 }
e5017ab8 2624
0da71265
MN
2625#if 0
2626 for(;s->mb_y < s->mb_height; s->mb_y++){
2627 for(;s->mb_x < s->mb_width; s->mb_x++){
2628 int ret= decode_mb(h);
115329f1 2629
903d58f6 2630 ff_h264_hl_decode_mb(h);
0da71265
MN
2631
2632 if(ret<0){
267f7edc 2633 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2634 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2635
2636 return -1;
2637 }
115329f1 2638
0da71265
MN
2639 if(++s->mb_x >= s->mb_width){
2640 s->mb_x=0;
2641 if(++s->mb_y >= s->mb_height){
2642 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2643 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2644
2645 return 0;
2646 }else{
2647 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2648
2649 return -1;
2650 }
2651 }
2652 }
115329f1 2653
0da71265
MN
2654 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
2655 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2656 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2657
2658 return 0;
2659 }else{
2660 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2661
2662 return -1;
2663 }
2664 }
2665 }
2666 s->mb_x=0;
2667 ff_draw_horiz_band(s, 16*s->mb_y, 16);
2668 }
2669#endif
2670 return -1; //not reached
2671}
2672
afebe2f7
2673/**
2674 * Call decode_slice() for each context.
2675 *
2676 * @param h h264 master context
2677 * @param context_count number of contexts to execute
2678 */
2679static void execute_decode_slices(H264Context *h, int context_count){
2680 MpegEncContext * const s = &h->s;
2681 AVCodecContext * const avctx= s->avctx;
2682 H264Context *hx;
2683 int i;
2684
40e5d31b
GB
2685 if (s->avctx->hwaccel)
2686 return;
0d3d172f 2687 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
369122dd 2688 return;
afebe2f7 2689 if(context_count == 1) {
74e8b78b 2690 decode_slice(avctx, &h);
afebe2f7
2691 } else {
2692 for(i = 1; i < context_count; i++) {
2693 hx = h->thread_context[i];
047599a4 2694 hx->s.error_recognition = avctx->error_recognition;
afebe2f7
2695 hx->s.error_count = 0;
2696 }
2697
2698 avctx->execute(avctx, (void *)decode_slice,
01418506 2699 h->thread_context, NULL, context_count, sizeof(void*));
afebe2f7
2700
2701 /* pull back stuff from slices to master context */
2702 hx = h->thread_context[context_count - 1];
2703 s->mb_x = hx->s.mb_x;
2704 s->mb_y = hx->s.mb_y;
12d96de3
JD
2705 s->dropable = hx->s.dropable;
2706 s->picture_structure = hx->s.picture_structure;
afebe2f7
2707 for(i = 1; i < context_count; i++)
2708 h->s.error_count += h->thread_context[i]->s.error_count;
2709 }
2710}
2711
2712
30317501 2713static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
0da71265
MN
2714 MpegEncContext * const s = &h->s;
2715 AVCodecContext * const avctx= s->avctx;
2716 int buf_index=0;
afebe2f7
2717 H264Context *hx; ///< thread context
2718 int context_count = 0;
74b14aac 2719 int next_avc= h->is_avc ? 0 : buf_size;
afebe2f7
2720
2721 h->max_contexts = avctx->thread_count;
377ec888 2722#if 0
eb60dddc 2723 int i;
96b6ace2
MN
2724 for(i=0; i<50; i++){
2725 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
0da71265
MN
2726 }
2727#endif
66a4b2c1 2728 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
afebe2f7 2729 h->current_slice = 0;
12d96de3 2730 if (!s->first_field)
f6e3c460 2731 s->current_picture_ptr= NULL;
9c095463 2732 ff_h264_reset_sei(h);
66a4b2c1
MN
2733 }
2734
0da71265
MN
2735 for(;;){
2736 int consumed;
2737 int dst_length;
2738 int bit_length;
30317501 2739 const uint8_t *ptr;
4770b1b4 2740 int i, nalsize = 0;
afebe2f7 2741 int err;
115329f1 2742
74b14aac 2743 if(buf_index >= next_avc) {
1c48415b
2744 if(buf_index >= buf_size) break;
2745 nalsize = 0;
2746 for(i = 0; i < h->nal_length_size; i++)
2747 nalsize = (nalsize << 8) | buf[buf_index++];
8d8409ca 2748 if(nalsize <= 1 || nalsize > buf_size - buf_index){
1c48415b
2749 if(nalsize == 1){
2750 buf_index++;
2751 continue;
2752 }else{
2753 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
2754 break;
2755 }
2756 }
74b14aac 2757 next_avc= buf_index + nalsize;
1c48415b
2758 } else {
2759 // start code prefix search
52255d17 2760 for(; buf_index + 3 < next_avc; buf_index++){
1c48415b
2761 // This should always succeed in the first iteration.
2762 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
2763 break;
8b031359 2764 }
115329f1 2765
1c48415b 2766 if(buf_index+3 >= buf_size) break;
115329f1 2767
1c48415b 2768 buf_index+=3;
52255d17 2769 if(buf_index >= next_avc) continue;
1c48415b 2770 }
115329f1 2771
afebe2f7
2772 hx = h->thread_context[context_count];
2773
74b14aac 2774 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
ff82e429 2775 if (ptr==NULL || dst_length < 0){
ac658be5
FOL
2776 return -1;
2777 }
3566042a
MN
2778 i= buf_index + consumed;
2779 if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
2780 buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
2781 s->workaround_bugs |= FF_BUG_TRUNCATED;
2782
2783 if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
6ac9696e 2784 while(ptr[dst_length - 1] == 0 && dst_length > 0)
c4da83fb 2785 dst_length--;
3566042a 2786 }
1790a5e9 2787 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
0da71265
MN
2788
2789 if(s->avctx->debug&FF_DEBUG_STARTCODE){
afebe2f7 2790 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
0da71265 2791 }
115329f1 2792
74b14aac 2793 if (h->is_avc && (nalsize != consumed) && nalsize){
e262365d 2794 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
9d2cc8c1 2795 }
4770b1b4 2796
0da71265
MN
2797 buf_index += consumed;
2798
755bfeab 2799 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
8c3eba7c 2800 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
0da71265 2801 continue;
115329f1 2802
afebe2f7
2803 again:
2804 err = 0;
2805 switch(hx->nal_unit_type){
0da71265 2806 case NAL_IDR_SLICE:
afebe2f7
2807 if (h->nal_unit_type != NAL_IDR_SLICE) {
2808 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
2809 return -1;
2810 }
3b66c4c5 2811 idr(h); //FIXME ensure we don't loose some frames if there is reordering
0da71265 2812 case NAL_SLICE:
afebe2f7
2813 init_get_bits(&hx->s.gb, ptr, bit_length);
2814 hx->intra_gb_ptr=
2815 hx->inter_gb_ptr= &hx->s.gb;
2816 hx->s.data_partitioning = 0;
2817
2818 if((err = decode_slice_header(hx, h)))
2819 break;
2820
dd0cd3d2
RC
2821 avctx->profile = hx->sps.profile_idc;
2822 avctx->level = hx->sps.level_idc;
2823
3bccd93a
SW
2824 if (h->current_slice == 1) {
2825 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
6026a096 2826 return -1;
3bccd93a
SW
2827 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2828 ff_vdpau_h264_picture_start(s);
6026a096
GB
2829 }
2830
37a558fe
IS
2831 s->current_picture_ptr->key_frame |=
2832 (hx->nal_unit_type == NAL_IDR_SLICE) ||
2833 (h->sei_recovery_frame_cnt >= 0);
afebe2f7
2834 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
2835 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2836 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2837 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
369122dd 2838 && avctx->skip_frame < AVDISCARD_ALL){
d404b3ed
MN
2839 if(avctx->hwaccel) {
2840 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
2841 return -1;
2842 }else
0d3d172f 2843 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
369122dd 2844 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
c639fc72
CEH
2845 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
2846 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
369122dd 2847 }else
f2c214a1 2848 context_count++;
369122dd 2849 }
0da71265
MN
2850 break;
2851 case NAL_DPA:
afebe2f7
2852 init_get_bits(&hx->s.gb, ptr, bit_length);
2853 hx->intra_gb_ptr=
2854 hx->inter_gb_ptr= NULL;
0410ee8f
AS
2855
2856 if ((err = decode_slice_header(hx, h)) < 0)
2857 break;
2858
dd0cd3d2
RC
2859 avctx->profile = hx->sps.profile_idc;
2860 avctx->level = hx->sps.level_idc;
2861
afebe2f7 2862 hx->s.data_partitioning = 1;
115329f1 2863
0da71265
MN
2864 break;
2865 case NAL_DPB:
afebe2f7
2866 init_get_bits(&hx->intra_gb, ptr, bit_length);
2867 hx->intra_gb_ptr= &hx->intra_gb;
0da71265
MN
2868 break;
2869 case NAL_DPC:
afebe2f7
2870 init_get_bits(&hx->inter_gb, ptr, bit_length);
2871 hx->inter_gb_ptr= &hx->inter_gb;
8b92b792 2872
afebe2f7 2873 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
b18e5c03 2874 && s->context_initialized
e0111b32 2875 && s->hurry_up < 5
afebe2f7 2876 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2877 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2878 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
e0111b32 2879 && avctx->skip_frame < AVDISCARD_ALL)
afebe2f7 2880 context_count++;
0da71265
MN
2881 break;
2882 case NAL_SEI:
cdd10689 2883 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2884 ff_h264_decode_sei(h);
0da71265
MN
2885 break;
2886 case NAL_SPS:
2887 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2888 ff_h264_decode_seq_parameter_set(h);
115329f1 2889
0da71265
MN
2890 if(s->flags& CODEC_FLAG_LOW_DELAY)
2891 s->low_delay=1;
115329f1 2892
a18030bb
LM
2893 if(avctx->has_b_frames < 2)
2894 avctx->has_b_frames= !s->low_delay;
0da71265
MN
2895 break;
2896 case NAL_PPS:
2897 init_get_bits(&s->gb, ptr, bit_length);
115329f1 2898
1790a5e9 2899 ff_h264_decode_picture_parameter_set(h, bit_length);
0da71265
MN
2900
2901 break;
ab470fa7
LM
2902 case NAL_AUD:
2903 case NAL_END_SEQUENCE:
2904 case NAL_END_STREAM:
2905 case NAL_FILLER_DATA:
2906 case NAL_SPS_EXT:
2907 case NAL_AUXILIARY_SLICE:
0da71265 2908 break;
bb270c08 2909 default:
4ad04da2 2910 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
115329f1 2911 }
115329f1 2912
afebe2f7
2913 if(context_count == h->max_contexts) {
2914 execute_decode_slices(h, context_count);
2915 context_count = 0;
2916 }
2917
2918 if (err < 0)
2919 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
2920 else if(err == 1) {
2921 /* Slice could not be decoded in parallel mode, copy down
2922 * NAL unit stuff to context 0 and restart. Note that
1412060e 2923 * rbsp_buffer is not transferred, but since we no longer
afebe2f7
2924 * run in parallel mode this should not be an issue. */
2925 h->nal_unit_type = hx->nal_unit_type;
2926 h->nal_ref_idc = hx->nal_ref_idc;
2927 hx = h;
2928 goto again;
2929 }
2930 }
2931 if(context_count)
2932 execute_decode_slices(h, context_count);
0da71265
MN
2933 return buf_index;
2934}
2935
2936/**
3b66c4c5 2937 * returns the number of bytes consumed for building the current frame
0da71265
MN
2938 */
2939static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
755bfeab 2940 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
0da71265
MN
2941 if(pos+10>buf_size) pos=buf_size; // oops ;)
2942
2943 return pos;
0da71265
MN
2944}
2945
115329f1 2946static int decode_frame(AVCodecContext *avctx,
0da71265 2947 void *data, int *data_size,
7a00bbad 2948 AVPacket *avpkt)
0da71265 2949{
7a00bbad
TB
2950 const uint8_t *buf = avpkt->data;
2951 int buf_size = avpkt->size;
0da71265
MN
2952 H264Context *h = avctx->priv_data;
2953 MpegEncContext *s = &h->s;
115329f1 2954 AVFrame *pict = data;
0da71265 2955 int buf_index;
115329f1 2956
0da71265 2957 s->flags= avctx->flags;
303e50e6 2958 s->flags2= avctx->flags2;
0da71265 2959
1412060e 2960 /* end of stream, output what is still in the buffers */
0da71265 2961 if (buf_size == 0) {
97bbb885
MN
2962 Picture *out;
2963 int i, out_idx;
2964
2965//FIXME factorize this with the output code below
2966 out = h->delayed_pic[0];
2967 out_idx = 0;
c173a088 2968 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
97bbb885
MN
2969 if(h->delayed_pic[i]->poc < out->poc){
2970 out = h->delayed_pic[i];
2971 out_idx = i;
2972 }
2973
2974 for(i=out_idx; h->delayed_pic[i]; i++)
2975 h->delayed_pic[i] = h->delayed_pic[i+1];
2976
2977 if(out){
2978 *data_size = sizeof(AVFrame);
2979 *pict= *(AVFrame*)out;
2980 }
2981
0da71265
MN
2982 return 0;
2983 }
115329f1 2984
0da71265 2985 buf_index=decode_nal_units(h, buf, buf_size);
115329f1 2986 if(buf_index < 0)
0da71265
MN
2987 return -1;
2988
56c70e1d 2989 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
1c746a49 2990 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
56c70e1d
MN
2991 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
2992 return -1;
2993 }
2994
66a4b2c1
MN
2995 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
2996 Picture *out = s->current_picture_ptr;
2997 Picture *cur = s->current_picture_ptr;
44be1d64 2998 int i, pics, out_of_order, out_idx;
115329f1 2999
256299d3 3000 field_end(h);
66a4b2c1 3001
357282c6 3002 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
12d96de3
JD
3003 /* Wait for second field. */
3004 *data_size = 0;
3005
3006 } else {
b19d493f 3007 cur->interlaced_frame = 0;
b09a7c05
3008 cur->repeat_pict = 0;
3009
3010 /* Signal interlacing information externally. */
3011 /* Prioritize picture timing SEI information over used decoding process if it exists. */
70e01da3 3012
b09a7c05
3013 if(h->sps.pic_struct_present_flag){
3014 switch (h->sei_pic_struct)
3015 {
b19d493f
HY
3016 case SEI_PIC_STRUCT_FRAME:
3017 break;
3018 case SEI_PIC_STRUCT_TOP_FIELD:
3019 case SEI_PIC_STRUCT_BOTTOM_FIELD:
3020 cur->interlaced_frame = 1;
3021 break;
3022 case SEI_PIC_STRUCT_TOP_BOTTOM:
3023 case SEI_PIC_STRUCT_BOTTOM_TOP:
3024 if (FIELD_OR_MBAFF_PICTURE)
3025 cur->interlaced_frame = 1;
3026 else
3027 // try to flag soft telecine progressive
3028 cur->interlaced_frame = h->prev_interlaced_frame;
3029 break;
b09a7c05
3030 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
3031 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
3032 // Signal the possibility of telecined film externally (pic_struct 5,6)
3033 // From these hints, let the applications decide if they apply deinterlacing.
3034 cur->repeat_pict = 1;
b09a7c05
3035 break;
3036 case SEI_PIC_STRUCT_FRAME_DOUBLING:
3037 // Force progressive here, as doubling interlaced frame is a bad idea.
b09a7c05
3038 cur->repeat_pict = 2;
3039 break;
3040 case SEI_PIC_STRUCT_FRAME_TRIPLING:
b09a7c05
3041 cur->repeat_pict = 4;
3042 break;
3043 }
b19d493f
HY
3044
3045 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
3046 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
b09a7c05
3047 }else{
3048 /* Derive interlacing flag from used decoding process. */
3049 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
3050 }
b19d493f 3051 h->prev_interlaced_frame = cur->interlaced_frame;
b09a7c05
3052
3053 if (cur->field_poc[0] != cur->field_poc[1]){
3054 /* Derive top_field_first from field pocs. */
3055 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
3056 }else{
3057 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
3058 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
3059 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
3060 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
3061 cur->top_field_first = 1;
3062 else
3063 cur->top_field_first = 0;
3064 }else{
3065 /* Most likely progressive */
3066 cur->top_field_first = 0;
3067 }
3068 }
84a8596d 3069
f6e3c460 3070 //FIXME do something with unavailable reference frames
8b92b792 3071
f6e3c460 3072 /* Sort B-frames into display order */
2f944356 3073
f6e3c460
3074 if(h->sps.bitstream_restriction_flag
3075 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
3076 s->avctx->has_b_frames = h->sps.num_reorder_frames;
3077 s->low_delay = 0;
3078 }
9170e345 3079
fb19e144
MN
3080 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
3081 && !h->sps.bitstream_restriction_flag){
3082 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
3083 s->low_delay= 0;
3084 }
3085
f6e3c460
3086 pics = 0;
3087 while(h->delayed_pic[pics]) pics++;
9170e345 3088
64b9d48f 3089 assert(pics <= MAX_DELAYED_PIC_COUNT);
4e4d983e 3090
f6e3c460
3091 h->delayed_pic[pics++] = cur;
3092 if(cur->reference == 0)
3093 cur->reference = DELAYED_PIC_REF;
2f944356 3094
f6e3c460
3095 out = h->delayed_pic[0];
3096 out_idx = 0;
c173a088 3097 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
f6e3c460
3098 if(h->delayed_pic[i]->poc < out->poc){
3099 out = h->delayed_pic[i];
3100 out_idx = i;
3101 }
44be1d64
MN
3102 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
3103 h->outputed_poc= INT_MIN;
3104 out_of_order = out->poc < h->outputed_poc;
1b547aba 3105
f6e3c460
3106 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
3107 { }
2a811db2 3108 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
f6e3c460 3109 || (s->low_delay &&
44be1d64 3110 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
9701840b 3111 || cur->pict_type == FF_B_TYPE)))
f6e3c460
3112 {
3113 s->low_delay = 0;
3114 s->avctx->has_b_frames++;
f6e3c460 3115 }
f6e3c460
3116
3117 if(out_of_order || pics > s->avctx->has_b_frames){
3eaa6d0e 3118 out->reference &= ~DELAYED_PIC_REF;
f6e3c460
3119 for(i=out_idx; h->delayed_pic[i]; i++)
3120 h->delayed_pic[i] = h->delayed_pic[i+1];
3121 }
3eaa6d0e 3122 if(!out_of_order && pics > s->avctx->has_b_frames){
f6e3c460 3123 *data_size = sizeof(AVFrame);
df8a7dff 3124
44be1d64
MN
3125 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
3126 h->outputed_poc = INT_MIN;
3127 } else
67e362ca 3128 h->outputed_poc = out->poc;
f6e3c460 3129 *pict= *(AVFrame*)out;
3eaa6d0e 3130 }else{
f6e3c460 3131 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
3eaa6d0e 3132 }
12d96de3 3133 }
a4dae92b
LM
3134 }
3135
3165e258 3136 assert(pict->data[0] || !*data_size);
4e4d983e 3137 ff_print_debug_info(s, pict);
0da71265 3138//printf("out %d\n", (int)pict->data[0]);
0da71265 3139
0da71265
MN
3140 return get_consumed_bytes(s, buf_index, buf_size);
3141}
3142#if 0
3143static inline void fill_mb_avail(H264Context *h){
3144 MpegEncContext * const s = &h->s;
7bc9090a 3145 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265
MN
3146
3147 if(s->mb_y){
7bc9090a
MN
3148 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
3149 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
3150 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
0da71265
MN
3151 }else{
3152 h->mb_avail[0]=
3153 h->mb_avail[1]=
3154 h->mb_avail[2]= 0;
3155 }
3156 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
3157 h->mb_avail[4]= 1; //FIXME move out
3158 h->mb_avail[5]= 0; //FIXME move out
3159}
3160#endif
3161
07e4e3ea 3162#ifdef TEST
6bf398a0 3163#undef printf
d04d5bcd 3164#undef random
0da71265
MN