Fix large timebases.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
bad5537e 23 * @file libavcodec/h264.c
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
40e5d31b 28#include "internal.h"
0da71265
MN
29#include "dsputil.h"
30#include "avcodec.h"
31#include "mpegvideo.h"
26b4fe82 32#include "h264.h"
0da71265 33#include "h264data.h"
188d3c51 34#include "h264_mvpred.h"
26b4fe82 35#include "h264_parser.h"
0da71265 36#include "golomb.h"
199436b9 37#include "mathops.h"
626464fb 38#include "rectangle.h"
369122dd 39#include "vdpau_internal.h"
0da71265 40
e5017ab8
LA
41#include "cabac.h"
42
2848ce84 43//#undef NDEBUG
0da71265
MN
44#include <assert.h>
45
d9ec210b 46static const uint8_t rem6[52]={
acd8d10f
PI
470, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
48};
49
d9ec210b 50static const uint8_t div6[52]={
acd8d10f
PI
510, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
52};
53
903d58f6 54void ff_h264_write_back_intra_pred_mode(H264Context *h){
64514ee8 55 const int mb_xy= h->mb_xy;
0da71265
MN
56
57 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
58 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
59 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
60 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
61 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
62 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
63 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
64}
65
66/**
67 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
68 */
2bedc0e8
MN
69int ff_h264_check_intra4x4_pred_mode(H264Context *h){
70 MpegEncContext * const s = &h->s;
71 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
72 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
73 int i;
74
75 if(!(h->top_samples_available&0x8000)){
76 for(i=0; i<4; i++){
77 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
78 if(status<0){
79 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
80 return -1;
81 } else if(status){
82 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
83 }
84 }
85 }
86
87 if((h->left_samples_available&0x8888)!=0x8888){
88 static const int mask[4]={0x8000,0x2000,0x80,0x20};
89 for(i=0; i<4; i++){
90 if(!(h->left_samples_available&mask[i])){
91 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
92 if(status<0){
93 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
94 return -1;
95 } else if(status){
96 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
97 }
98 }
99 }
100 }
101
102 return 0;
103} //FIXME cleanup like ff_h264_check_intra_pred_mode
104
105/**
106 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
107 */
903d58f6 108int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
0da71265
MN
109 MpegEncContext * const s = &h->s;
110 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
111 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 112
43ff0714 113 if(mode > 6U) {
5175b937 114 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 115 return -1;
5175b937 116 }
115329f1 117
0da71265
MN
118 if(!(h->top_samples_available&0x8000)){
119 mode= top[ mode ];
120 if(mode<0){
9b879566 121 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
122 return -1;
123 }
124 }
115329f1 125
d1d10e91 126 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 127 mode= left[ mode ];
d1d10e91
MN
128 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
129 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
130 }
0da71265 131 if(mode<0){
9b879566 132 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 133 return -1;
115329f1 134 }
0da71265
MN
135 }
136
137 return mode;
138}
139
1790a5e9 140const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
141 int i, si, di;
142 uint8_t *dst;
24456882 143 int bufidx;
0da71265 144
bb270c08 145// src[0]&0x80; //forbidden bit
0da71265
MN
146 h->nal_ref_idc= src[0]>>5;
147 h->nal_unit_type= src[0]&0x1F;
148
149 src++; length--;
115329f1 150#if 0
0da71265
MN
151 for(i=0; i<length; i++)
152 printf("%2X ", src[i]);
153#endif
e08715d3 154
b250f9c6
AJ
155#if HAVE_FAST_UNALIGNED
156# if HAVE_FAST_64BIT
e08715d3
MN
157# define RS 7
158 for(i=0; i+1<length; i+=9){
3878be31 159 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
e08715d3
MN
160# else
161# define RS 3
162 for(i=0; i+1<length; i+=5){
3878be31 163 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
e08715d3
MN
164# endif
165 continue;
166 if(i>0 && !src[i]) i--;
167 while(src[i]) i++;
168#else
169# define RS 0
0da71265
MN
170 for(i=0; i+1<length; i+=2){
171 if(src[i]) continue;
172 if(i>0 && src[i-1]==0) i--;
e08715d3 173#endif
0da71265
MN
174 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
175 if(src[i+2]!=3){
176 /* startcode, so we must be past the end */
177 length=i;
178 }
179 break;
180 }
abb27cfb 181 i-= RS;
0da71265
MN
182 }
183
184 if(i>=length-1){ //no escaped 0
185 *dst_length= length;
186 *consumed= length+1; //+1 for the header
115329f1 187 return src;
0da71265
MN
188 }
189
24456882 190 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
238ef6da 191 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 192 dst= h->rbsp_buffer[bufidx];
0da71265 193
ac658be5
FOL
194 if (dst == NULL){
195 return NULL;
196 }
197
3b66c4c5 198//printf("decoding esc\n");
593af7cd
MN
199 memcpy(dst, src, i);
200 si=di=i;
201 while(si+2<length){
0da71265 202 //remove escapes (very rare 1:2^22)
593af7cd
MN
203 if(src[si+2]>3){
204 dst[di++]= src[si++];
205 dst[di++]= src[si++];
206 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
207 if(src[si+2]==3){ //escape
208 dst[di++]= 0;
209 dst[di++]= 0;
210 si+=3;
c8470cc1 211 continue;
0da71265 212 }else //next start code
593af7cd 213 goto nsc;
0da71265
MN
214 }
215
216 dst[di++]= src[si++];
217 }
593af7cd
MN
218 while(si<length)
219 dst[di++]= src[si++];
220nsc:
0da71265 221
d4369630
AS
222 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
223
0da71265
MN
224 *dst_length= di;
225 *consumed= si + 1;//+1 for the header
90b5b51e 226//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
227 return dst;
228}
229
1790a5e9 230int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
231 int v= *src;
232 int r;
233
a9c9a240 234 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
235
236 for(r=1; r<9; r++){
237 if(v&1) return r;
238 v>>=1;
239 }
240 return 0;
241}
242
243/**
1412060e 244 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
245 * @param qp quantization parameter
246 */
239ea04c 247static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
248#define stride 16
249 int i;
250 int temp[16]; //FIXME check if this is a good idea
251 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
252 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
253
254//memset(block, 64, 2*256);
255//return;
256 for(i=0; i<4; i++){
257 const int offset= y_offset[i];
258 const int z0= block[offset+stride*0] + block[offset+stride*4];
259 const int z1= block[offset+stride*0] - block[offset+stride*4];
260 const int z2= block[offset+stride*1] - block[offset+stride*5];
261 const int z3= block[offset+stride*1] + block[offset+stride*5];
262
263 temp[4*i+0]= z0+z3;
264 temp[4*i+1]= z1+z2;
265 temp[4*i+2]= z1-z2;
266 temp[4*i+3]= z0-z3;
267 }
268
269 for(i=0; i<4; i++){
270 const int offset= x_offset[i];
271 const int z0= temp[4*0+i] + temp[4*2+i];
272 const int z1= temp[4*0+i] - temp[4*2+i];
273 const int z2= temp[4*1+i] - temp[4*3+i];
274 const int z3= temp[4*1+i] + temp[4*3+i];
275
1412060e 276 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
277 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
278 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
279 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
280 }
281}
282
e5017ab8 283#if 0
0da71265 284/**
1412060e 285 * DCT transforms the 16 dc values.
0da71265
MN
286 * @param qp quantization parameter ??? FIXME
287 */
288static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
289// const int qmul= dequant_coeff[qp][0];
290 int i;
291 int temp[16]; //FIXME check if this is a good idea
292 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
293 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
294
295 for(i=0; i<4; i++){
296 const int offset= y_offset[i];
297 const int z0= block[offset+stride*0] + block[offset+stride*4];
298 const int z1= block[offset+stride*0] - block[offset+stride*4];
299 const int z2= block[offset+stride*1] - block[offset+stride*5];
300 const int z3= block[offset+stride*1] + block[offset+stride*5];
301
302 temp[4*i+0]= z0+z3;
303 temp[4*i+1]= z1+z2;
304 temp[4*i+2]= z1-z2;
305 temp[4*i+3]= z0-z3;
306 }
307
308 for(i=0; i<4; i++){
309 const int offset= x_offset[i];
310 const int z0= temp[4*0+i] + temp[4*2+i];
311 const int z1= temp[4*0+i] - temp[4*2+i];
312 const int z2= temp[4*1+i] - temp[4*3+i];
313 const int z3= temp[4*1+i] + temp[4*3+i];
314
315 block[stride*0 +offset]= (z0 + z3)>>1;
316 block[stride*2 +offset]= (z1 + z2)>>1;
317 block[stride*8 +offset]= (z1 - z2)>>1;
318 block[stride*10+offset]= (z0 - z3)>>1;
319 }
320}
e5017ab8
LA
321#endif
322
0da71265
MN
323#undef xStride
324#undef stride
325
239ea04c 326static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
327 const int stride= 16*2;
328 const int xStride= 16;
329 int a,b,c,d,e;
330
331 a= block[stride*0 + xStride*0];
332 b= block[stride*0 + xStride*1];
333 c= block[stride*1 + xStride*0];
334 d= block[stride*1 + xStride*1];
335
336 e= a-b;
337 a= a+b;
338 b= c-d;
339 c= c+d;
340
239ea04c
LM
341 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
342 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
343 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
344 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
345}
346
e5017ab8 347#if 0
0da71265
MN
348static void chroma_dc_dct_c(DCTELEM *block){
349 const int stride= 16*2;
350 const int xStride= 16;
351 int a,b,c,d,e;
352
353 a= block[stride*0 + xStride*0];
354 b= block[stride*0 + xStride*1];
355 c= block[stride*1 + xStride*0];
356 d= block[stride*1 + xStride*1];
357
358 e= a-b;
359 a= a+b;
360 b= c-d;
361 c= c+d;
362
363 block[stride*0 + xStride*0]= (a+c);
364 block[stride*0 + xStride*1]= (e+b);
365 block[stride*1 + xStride*0]= (a-c);
366 block[stride*1 + xStride*1]= (e-b);
367}
e5017ab8 368#endif
0da71265 369
0da71265
MN
370static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
371 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
372 int src_x_offset, int src_y_offset,
373 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
374 MpegEncContext * const s = &h->s;
375 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 376 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 377 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
378 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
379 uint8_t * src_cb, * src_cr;
380 int extra_width= h->emu_edge_width;
381 int extra_height= h->emu_edge_height;
0da71265
MN
382 int emu=0;
383 const int full_mx= mx>>2;
384 const int full_my= my>>2;
fbd312fd 385 const int pic_width = 16*s->mb_width;
0d43dd8c 386 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 387
0da71265
MN
388 if(mx&7) extra_width -= 3;
389 if(my&7) extra_height -= 3;
115329f1
DB
390
391 if( full_mx < 0-extra_width
392 || full_my < 0-extra_height
393 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 394 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
395 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
396 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
397 emu=1;
398 }
115329f1 399
5d18eaad 400 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 401 if(!square){
5d18eaad 402 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 403 }
115329f1 404
49fb20cb 405 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 406
0d43dd8c 407 if(MB_FIELD){
5d18eaad 408 // chroma offset when predicting from a field of opposite parity
2143b118 409 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
410 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
411 }
412 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
413 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
414
0da71265 415 if(emu){
5d18eaad 416 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
417 src_cb= s->edge_emu_buffer;
418 }
5d18eaad 419 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
420
421 if(emu){
5d18eaad 422 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
423 src_cr= s->edge_emu_buffer;
424 }
5d18eaad 425 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
426}
427
9f2d1b4f 428static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
429 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
430 int x_offset, int y_offset,
431 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
432 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
433 int list0, int list1){
434 MpegEncContext * const s = &h->s;
435 qpel_mc_func *qpix_op= qpix_put;
436 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 437
5d18eaad
LM
438 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
439 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
440 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 441 x_offset += 8*s->mb_x;
0d43dd8c 442 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 443
0da71265 444 if(list0){
1924f3ce 445 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
446 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
447 dest_y, dest_cb, dest_cr, x_offset, y_offset,
448 qpix_op, chroma_op);
449
450 qpix_op= qpix_avg;
451 chroma_op= chroma_avg;
452 }
453
454 if(list1){
1924f3ce 455 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
456 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
457 dest_y, dest_cb, dest_cr, x_offset, y_offset,
458 qpix_op, chroma_op);
459 }
460}
461
9f2d1b4f
LM
462static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
463 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
464 int x_offset, int y_offset,
465 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
466 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
467 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
468 int list0, int list1){
469 MpegEncContext * const s = &h->s;
470
5d18eaad
LM
471 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
472 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
473 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 474 x_offset += 8*s->mb_x;
0d43dd8c 475 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 476
9f2d1b4f
LM
477 if(list0 && list1){
478 /* don't optimize for luma-only case, since B-frames usually
479 * use implicit weights => chroma too. */
480 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
481 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
482 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
483 int refn0 = h->ref_cache[0][ scan8[n] ];
484 int refn1 = h->ref_cache[1][ scan8[n] ];
485
486 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
487 dest_y, dest_cb, dest_cr,
488 x_offset, y_offset, qpix_put, chroma_put);
489 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
490 tmp_y, tmp_cb, tmp_cr,
491 x_offset, y_offset, qpix_put, chroma_put);
492
493 if(h->use_weight == 2){
494 int weight0 = h->implicit_weight[refn0][refn1];
495 int weight1 = 64 - weight0;
5d18eaad
LM
496 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
497 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
498 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 499 }else{
5d18eaad 500 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 501 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 502 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 503 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 504 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 505 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 506 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 507 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 508 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
509 }
510 }else{
511 int list = list1 ? 1 : 0;
512 int refn = h->ref_cache[list][ scan8[n] ];
513 Picture *ref= &h->ref_list[list][refn];
514 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
515 dest_y, dest_cb, dest_cr, x_offset, y_offset,
516 qpix_put, chroma_put);
517
5d18eaad 518 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
519 h->luma_weight[list][refn], h->luma_offset[list][refn]);
520 if(h->use_weight_chroma){
5d18eaad 521 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 522 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 523 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
524 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
525 }
526 }
527}
528
529static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
530 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
531 int x_offset, int y_offset,
532 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
533 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 534 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
535 int list0, int list1){
536 if((h->use_weight==2 && list0 && list1
537 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
538 || h->use_weight==1)
539 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
540 x_offset, y_offset, qpix_put, chroma_put,
541 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
542 else
543 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
544 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
545}
546
513fbd8e
LM
547static inline void prefetch_motion(H264Context *h, int list){
548 /* fetch pixels for estimated mv 4 macroblocks ahead
549 * optimized for 64byte cache lines */
550 MpegEncContext * const s = &h->s;
551 const int refn = h->ref_cache[list][scan8[0]];
552 if(refn >= 0){
553 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
554 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
555 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 556 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
557 s->dsp.prefetch(src[0]+off, s->linesize, 4);
558 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
559 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
560 }
561}
562
0da71265
MN
563static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
564 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
565 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
566 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 567 MpegEncContext * const s = &h->s;
64514ee8 568 const int mb_xy= h->mb_xy;
0da71265 569 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 570
0da71265 571 assert(IS_INTER(mb_type));
115329f1 572
513fbd8e
LM
573 prefetch_motion(h, 0);
574
0da71265
MN
575 if(IS_16X16(mb_type)){
576 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
577 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
7231ccf4 578 weight_op, weight_avg,
0da71265
MN
579 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
580 }else if(IS_16X8(mb_type)){
581 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
582 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 583 &weight_op[1], &weight_avg[1],
0da71265
MN
584 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
585 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
586 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 587 &weight_op[1], &weight_avg[1],
0da71265
MN
588 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
589 }else if(IS_8X16(mb_type)){
5d18eaad 590 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 591 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 592 &weight_op[2], &weight_avg[2],
0da71265 593 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 594 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 595 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 596 &weight_op[2], &weight_avg[2],
0da71265
MN
597 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
598 }else{
599 int i;
115329f1 600
0da71265
MN
601 assert(IS_8X8(mb_type));
602
603 for(i=0; i<4; i++){
604 const int sub_mb_type= h->sub_mb_type[i];
605 const int n= 4*i;
606 int x_offset= (i&1)<<2;
607 int y_offset= (i&2)<<1;
608
609 if(IS_SUB_8X8(sub_mb_type)){
610 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
611 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 612 &weight_op[3], &weight_avg[3],
0da71265
MN
613 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
614 }else if(IS_SUB_8X4(sub_mb_type)){
615 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
616 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 617 &weight_op[4], &weight_avg[4],
0da71265
MN
618 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
619 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
620 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 621 &weight_op[4], &weight_avg[4],
0da71265
MN
622 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
623 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 624 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 625 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 626 &weight_op[5], &weight_avg[5],
0da71265 627 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 628 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 629 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 630 &weight_op[5], &weight_avg[5],
0da71265
MN
631 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
632 }else{
633 int j;
634 assert(IS_SUB_4X4(sub_mb_type));
635 for(j=0; j<4; j++){
636 int sub_x_offset= x_offset + 2*(j&1);
637 int sub_y_offset= y_offset + (j&2);
638 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
639 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 640 &weight_op[6], &weight_avg[6],
0da71265
MN
641 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
642 }
643 }
644 }
645 }
513fbd8e
LM
646
647 prefetch_motion(h, 1);
0da71265
MN
648}
649
0da71265 650
0da71265 651static void free_tables(H264Context *h){
7978debd 652 int i;
afebe2f7 653 H264Context *hx;
0da71265 654 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
655 av_freep(&h->chroma_pred_mode_table);
656 av_freep(&h->cbp_table);
9e528114
LA
657 av_freep(&h->mvd_table[0]);
658 av_freep(&h->mvd_table[1]);
5ad984c9 659 av_freep(&h->direct_table);
0da71265
MN
660 av_freep(&h->non_zero_count);
661 av_freep(&h->slice_table_base);
662 h->slice_table= NULL;
c988f975 663 av_freep(&h->list_counts);
e5017ab8 664
0da71265
MN
665 av_freep(&h->mb2b_xy);
666 av_freep(&h->mb2b8_xy);
9f2d1b4f 667
6752dd5a 668 for(i = 0; i < MAX_THREADS; i++) {
afebe2f7
669 hx = h->thread_context[i];
670 if(!hx) continue;
671 av_freep(&hx->top_borders[1]);
672 av_freep(&hx->top_borders[0]);
673 av_freep(&hx->s.obmc_scratchpad);
d2d5e067
AS
674 av_freep(&hx->rbsp_buffer[1]);
675 av_freep(&hx->rbsp_buffer[0]);
eda4ea4e
MS
676 hx->rbsp_buffer_size[0] = 0;
677 hx->rbsp_buffer_size[1] = 0;
d2d5e067 678 if (i) av_freep(&h->thread_context[i]);
afebe2f7 679 }
0da71265
MN
680}
681
239ea04c
LM
682static void init_dequant8_coeff_table(H264Context *h){
683 int i,q,x;
548a1c8a 684 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
685 h->dequant8_coeff[0] = h->dequant8_buffer[0];
686 h->dequant8_coeff[1] = h->dequant8_buffer[1];
687
688 for(i=0; i<2; i++ ){
689 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
690 h->dequant8_coeff[1] = h->dequant8_buffer[0];
691 break;
692 }
693
694 for(q=0; q<52; q++){
d9ec210b
DP
695 int shift = div6[q];
696 int idx = rem6[q];
239ea04c 697 for(x=0; x<64; x++)
548a1c8a
LM
698 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
699 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
700 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
701 }
702 }
703}
704
705static void init_dequant4_coeff_table(H264Context *h){
706 int i,j,q,x;
ab2e3e2c 707 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
708 for(i=0; i<6; i++ ){
709 h->dequant4_coeff[i] = h->dequant4_buffer[i];
710 for(j=0; j<i; j++){
711 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
712 h->dequant4_coeff[i] = h->dequant4_buffer[j];
713 break;
714 }
715 }
716 if(j<i)
717 continue;
718
719 for(q=0; q<52; q++){
d9ec210b
DP
720 int shift = div6[q] + 2;
721 int idx = rem6[q];
239ea04c 722 for(x=0; x<16; x++)
ab2e3e2c
LM
723 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
724 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
725 h->pps.scaling_matrix4[i][x]) << shift;
726 }
727 }
728}
729
730static void init_dequant_tables(H264Context *h){
731 int i,x;
732 init_dequant4_coeff_table(h);
733 if(h->pps.transform_8x8_mode)
734 init_dequant8_coeff_table(h);
735 if(h->sps.transform_bypass){
736 for(i=0; i<6; i++)
737 for(x=0; x<16; x++)
738 h->dequant4_coeff[i][0][x] = 1<<6;
739 if(h->pps.transform_8x8_mode)
740 for(i=0; i<2; i++)
741 for(x=0; x<64; x++)
742 h->dequant8_coeff[i][0][x] = 1<<6;
743 }
744}
745
746
903d58f6 747int ff_h264_alloc_tables(H264Context *h){
0da71265 748 MpegEncContext * const s = &h->s;
7bc9090a 749 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 750 int x,y;
0da71265 751
d31dbec3 752 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t), fail)
e5017ab8 753
c988f975 754 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail)
d31dbec3
RP
755 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
756 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
0da71265 757
d31dbec3
RP
758 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
759 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t), fail);
760 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t), fail);
761 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 32*big_mb_num * sizeof(uint8_t) , fail);
c988f975 762 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
e5017ab8 763
b735aeea 764 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 765 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 766
d31dbec3
RP
767 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
768 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
0da71265
MN
769 for(y=0; y<s->mb_height; y++){
770 for(x=0; x<s->mb_width; x++){
7bc9090a 771 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
772 const int b_xy = 4*x + 4*y*h->b_stride;
773 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 774
0da71265
MN
775 h->mb2b_xy [mb_xy]= b_xy;
776 h->mb2b8_xy[mb_xy]= b8_xy;
777 }
778 }
9f2d1b4f 779
9c6221ae
GV
780 s->obmc_scratchpad = NULL;
781
56edbd81
LM
782 if(!h->dequant4_coeff[0])
783 init_dequant_tables(h);
784
0da71265
MN
785 return 0;
786fail:
787 free_tables(h);
788 return -1;
789}
790
afebe2f7
791/**
792 * Mimic alloc_tables(), but for every context thread.
793 */
794static void clone_tables(H264Context *dst, H264Context *src){
795 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
796 dst->non_zero_count = src->non_zero_count;
797 dst->slice_table = src->slice_table;
798 dst->cbp_table = src->cbp_table;
799 dst->mb2b_xy = src->mb2b_xy;
800 dst->mb2b8_xy = src->mb2b8_xy;
801 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
802 dst->mvd_table[0] = src->mvd_table[0];
803 dst->mvd_table[1] = src->mvd_table[1];
804 dst->direct_table = src->direct_table;
fb823b77 805 dst->list_counts = src->list_counts;
afebe2f7 806
afebe2f7
807 dst->s.obmc_scratchpad = NULL;
808 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
809}
810
811/**
812 * Init context
813 * Allocate buffers which are not shared amongst multiple threads.
814 */
815static int context_init(H264Context *h){
d31dbec3
RP
816 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
817 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
afebe2f7 818
afebe2f7
819 return 0;
820fail:
821 return -1; // free_tables will clean up for us
822}
823
98a6fff9 824static av_cold void common_init(H264Context *h){
0da71265 825 MpegEncContext * const s = &h->s;
0da71265
MN
826
827 s->width = s->avctx->width;
828 s->height = s->avctx->height;
829 s->codec_id= s->avctx->codec->id;
115329f1 830
c92a30bb 831 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 832
239ea04c 833 h->dequant_coeff_pps= -1;
9a41c2c7 834 s->unrestricted_mv=1;
0da71265 835 s->decode=1; //FIXME
56edbd81 836
a5805aa9
MN
837 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
838
56edbd81
LM
839 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
840 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
841}
842
903d58f6 843av_cold int ff_h264_decode_init(AVCodecContext *avctx){
0da71265
MN
844 H264Context *h= avctx->priv_data;
845 MpegEncContext * const s = &h->s;
846
3edcacde 847 MPV_decode_defaults(s);
115329f1 848
0da71265
MN
849 s->avctx = avctx;
850 common_init(h);
851
852 s->out_format = FMT_H264;
853 s->workaround_bugs= avctx->workaround_bugs;
854
855 // set defaults
0da71265 856// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 857 s->quarter_sample = 1;
47cd974a 858 if(!avctx->has_b_frames)
0da71265 859 s->low_delay= 1;
7a9dba3c 860
580a7465 861 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
0da71265 862
e1e94902 863 ff_h264_decode_init_vlc();
115329f1 864
26165f99
MR
865 if(avctx->extradata_size > 0 && avctx->extradata &&
866 *(char *)avctx->extradata == 1){
4770b1b4
RT
867 h->is_avc = 1;
868 h->got_avcC = 0;
26165f99
MR
869 } else {
870 h->is_avc = 0;
4770b1b4
RT
871 }
872
afebe2f7 873 h->thread_context[0] = h;
18c7be65 874 h->outputed_poc = INT_MIN;
e4b8f1fa 875 h->prev_poc_msb= 1<<16;
9c095463 876 ff_h264_reset_sei(h);
efd8c1f6
MN
877 if(avctx->codec_id == CODEC_ID_H264){
878 if(avctx->ticks_per_frame == 1){
879 s->avctx->time_base.den *=2;
880 }
19df37a8 881 avctx->ticks_per_frame = 2;
efd8c1f6 882 }
0da71265
MN
883 return 0;
884}
885
903d58f6 886int ff_h264_frame_start(H264Context *h){
0da71265
MN
887 MpegEncContext * const s = &h->s;
888 int i;
889
af8aa846
MN
890 if(MPV_frame_start(s, s->avctx) < 0)
891 return -1;
0da71265 892 ff_er_frame_start(s);
3a22d7fa
JD
893 /*
894 * MPV_frame_start uses pict_type to derive key_frame.
895 * This is incorrect for H.264; IDR markings must be used.
1412060e 896 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
897 * See decode_nal_units().
898 */
899 s->current_picture_ptr->key_frame= 0;
c173a088 900 s->current_picture_ptr->mmco_reset= 0;
0da71265
MN
901
902 assert(s->linesize && s->uvlinesize);
903
904 for(i=0; i<16; i++){
905 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 906 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
907 }
908 for(i=0; i<4; i++){
909 h->block_offset[16+i]=
910 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
911 h->block_offset[24+16+i]=
912 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
913 }
914
934b0821
LM
915 /* can't be in alloc_tables because linesize isn't known there.
916 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
917 for(i = 0; i < s->avctx->thread_count; i++)
918 if(!h->thread_context[i]->s.obmc_scratchpad)
919 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
920
921 /* some macroblocks will be accessed before they're available */
afebe2f7 922 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 923 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 924
0da71265 925// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 926
1412060e 927 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
928 // that if we break out due to an error it can be released automatically
929 // in the next MPV_frame_start().
930 // SVQ3 as well as most other codecs have only last/next/current and thus
931 // get released even with set reference, besides SVQ3 and others do not
932 // mark frames as reference later "naturally".
933 if(s->codec_id != CODEC_ID_SVQ3)
934 s->current_picture_ptr->reference= 0;
357282c6
MN
935
936 s->current_picture_ptr->field_poc[0]=
937 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 938 assert(s->current_picture_ptr->long_ref==0);
357282c6 939
af8aa846 940 return 0;
0da71265
MN
941}
942
93cc10fa 943static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e 944 MpegEncContext * const s = &h->s;
0b69d625 945 uint8_t *top_border;
5f7f9719 946 int top_idx = 1;
115329f1 947
53c05b1e
MN
948 src_y -= linesize;
949 src_cb -= uvlinesize;
950 src_cr -= uvlinesize;
951
5f7f9719
MN
952 if(!simple && FRAME_MBAFF){
953 if(s->mb_y&1){
5f7f9719 954 if(!MB_MBAFF){
0b69d625
AS
955 top_border = h->top_borders[0][s->mb_x];
956 AV_COPY128(top_border, src_y + 15*linesize);
49fb20cb 957 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
958 AV_COPY64(top_border+16, src_cb+7*uvlinesize);
959 AV_COPY64(top_border+24, src_cr+7*uvlinesize);
5f7f9719
MN
960 }
961 }
c988f975
MN
962 }else if(MB_MBAFF){
963 top_idx = 0;
964 }else
965 return;
5f7f9719
MN
966 }
967
0b69d625 968 top_border = h->top_borders[top_idx][s->mb_x];
3b66c4c5 969 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 970 // and the line above the bottom macroblock
0b69d625 971 AV_COPY128(top_border, src_y + 16*linesize);
53c05b1e 972
49fb20cb 973 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
974 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
975 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
53c05b1e
MN
976 }
977}
978
93cc10fa 979static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
980 MpegEncContext * const s = &h->s;
981 int temp8, i;
982 uint64_t temp64;
b69378e2
983 int deblock_left;
984 int deblock_top;
985 int mb_xy;
5f7f9719 986 int top_idx = 1;
1e4f1c56
AS
987 uint8_t *top_border_m1;
988 uint8_t *top_border;
5f7f9719
MN
989
990 if(!simple && FRAME_MBAFF){
991 if(s->mb_y&1){
c988f975
MN
992 if(!MB_MBAFF)
993 return;
5f7f9719 994 }else{
5f7f9719
MN
995 top_idx = MB_MBAFF ? 0 : 1;
996 }
5f7f9719 997 }
b69378e2
998
999 if(h->deblocking_filter == 2) {
64514ee8 1000 mb_xy = h->mb_xy;
b69378e2
1001 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
1002 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
1003 } else {
1004 deblock_left = (s->mb_x > 0);
6c805007 1005 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 1006 }
53c05b1e
MN
1007
1008 src_y -= linesize + 1;
1009 src_cb -= uvlinesize + 1;
1010 src_cr -= uvlinesize + 1;
1011
1e4f1c56
AS
1012 top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
1013 top_border = h->top_borders[top_idx][s->mb_x];
1014
0b69d625
AS
1015#define XCHG(a,b,xchg)\
1016if (xchg) AV_SWAP64(b,a);\
1017else AV_COPY64(b,a);
d89dc06a 1018
d89dc06a 1019 if(deblock_top){
c988f975 1020 if(deblock_left){
0b69d625 1021 XCHG(top_border_m1+8, src_y -7, 1);
c988f975 1022 }
0b69d625
AS
1023 XCHG(top_border+0, src_y +1, xchg);
1024 XCHG(top_border+8, src_y +9, 1);
cad4368a 1025 if(s->mb_x+1 < s->mb_width){
0b69d625 1026 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
43efd19a 1027 }
53c05b1e 1028 }
53c05b1e 1029
49fb20cb 1030 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 1031 if(deblock_top){
c988f975 1032 if(deblock_left){
0b69d625
AS
1033 XCHG(top_border_m1+16, src_cb -7, 1);
1034 XCHG(top_border_m1+24, src_cr -7, 1);
c988f975 1035 }
0b69d625
AS
1036 XCHG(top_border+16, src_cb+1, 1);
1037 XCHG(top_border+24, src_cr+1, 1);
53c05b1e 1038 }
53c05b1e
MN
1039 }
1040}
1041
5a6a6cc7 1042static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
1043 MpegEncContext * const s = &h->s;
1044 const int mb_x= s->mb_x;
1045 const int mb_y= s->mb_y;
64514ee8 1046 const int mb_xy= h->mb_xy;
0da71265
MN
1047 const int mb_type= s->current_picture.mb_type[mb_xy];
1048 uint8_t *dest_y, *dest_cb, *dest_cr;
1049 int linesize, uvlinesize /*dct_offset*/;
1050 int i;
6867a90b 1051 int *block_offset = &h->block_offset[0];
41e4055b 1052 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed 1053 /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb 1054 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca 1055 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 1056 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 1057
6120a343
MN
1058 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
1059 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
1060 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 1061
a957c27b
LM
1062 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1063 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
1064
c988f975
MN
1065 h->list_counts[mb_xy]= h->list_count;
1066
bd91fee3 1067 if (!simple && MB_FIELD) {
5d18eaad
LM
1068 linesize = h->mb_linesize = s->linesize * 2;
1069 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 1070 block_offset = &h->block_offset[24];
1412060e 1071 if(mb_y&1){ //FIXME move out of this function?
0da71265 1072 dest_y -= s->linesize*15;
6867a90b
LLL
1073 dest_cb-= s->uvlinesize*7;
1074 dest_cr-= s->uvlinesize*7;
0da71265 1075 }
5d18eaad
LM
1076 if(FRAME_MBAFF) {
1077 int list;
3425501d 1078 for(list=0; list<h->list_count; list++){
5d18eaad
LM
1079 if(!USES_LIST(mb_type, list))
1080 continue;
1081 if(IS_16X16(mb_type)){
1082 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 1083 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
1084 }else{
1085 for(i=0; i<16; i+=4){
5d18eaad
LM
1086 int ref = h->ref_cache[list][scan8[i]];
1087 if(ref >= 0)
1710856c 1088 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
1089 }
1090 }
1091 }
1092 }
0da71265 1093 } else {
5d18eaad
LM
1094 linesize = h->mb_linesize = s->linesize;
1095 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
1096// dct_offset = s->linesize * 16;
1097 }
115329f1 1098
bd91fee3 1099 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
1100 for (i=0; i<16; i++) {
1101 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 1102 }
c1708e8d
MN
1103 for (i=0; i<8; i++) {
1104 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
1105 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 1106 }
e7e09b49
LLL
1107 } else {
1108 if(IS_INTRA(mb_type)){
5f7f9719 1109 if(h->deblocking_filter)
93cc10fa 1110 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 1111
49fb20cb 1112 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
1113 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
1114 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 1115 }
0da71265 1116
e7e09b49 1117 if(IS_INTRA4x4(mb_type)){
bd91fee3 1118 if(simple || !s->encoding){
43efd19a 1119 if(IS_8x8DCT(mb_type)){
1eb96035
MN
1120 if(transform_bypass){
1121 idct_dc_add =
1122 idct_add = s->dsp.add_pixels8;
dae006d7 1123 }else{
1eb96035
MN
1124 idct_dc_add = s->dsp.h264_idct8_dc_add;
1125 idct_add = s->dsp.h264_idct8_add;
1126 }
43efd19a
LM
1127 for(i=0; i<16; i+=4){
1128 uint8_t * const ptr= dest_y + block_offset[i];
1129 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
1130 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1131 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
1132 }else{
ac0623b2
MN
1133 const int nnz = h->non_zero_count_cache[ scan8[i] ];
1134 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1135 (h->topright_samples_available<<i)&0x4000, linesize);
1136 if(nnz){
1137 if(nnz == 1 && h->mb[i*16])
1138 idct_dc_add(ptr, h->mb + i*16, linesize);
1139 else
1140 idct_add (ptr, h->mb + i*16, linesize);
1141 }
41e4055b 1142 }
43efd19a 1143 }
1eb96035
MN
1144 }else{
1145 if(transform_bypass){
1146 idct_dc_add =
1147 idct_add = s->dsp.add_pixels4;
1148 }else{
1149 idct_dc_add = s->dsp.h264_idct_dc_add;
1150 idct_add = s->dsp.h264_idct_add;
1151 }
aebb5d6d
MN
1152 for(i=0; i<16; i++){
1153 uint8_t * const ptr= dest_y + block_offset[i];
1154 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 1155
aebb5d6d
MN
1156 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1157 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
1158 }else{
1159 uint8_t *topright;
1160 int nnz, tr;
1161 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
1162 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1163 assert(mb_y || linesize <= block_offset[i]);
1164 if(!topright_avail){
1165 tr= ptr[3 - linesize]*0x01010101;
1166 topright= (uint8_t*) &tr;
1167 }else
1168 topright= ptr + 4 - linesize;
ac0623b2 1169 }else
aebb5d6d
MN
1170 topright= NULL;
1171
1172 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
1173 nnz = h->non_zero_count_cache[ scan8[i] ];
1174 if(nnz){
1175 if(is_h264){
1176 if(nnz == 1 && h->mb[i*16])
1177 idct_dc_add(ptr, h->mb + i*16, linesize);
1178 else
1179 idct_add (ptr, h->mb + i*16, linesize);
1180 }else
881b5b80 1181 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
aebb5d6d 1182 }
ac0623b2 1183 }
41e4055b 1184 }
8b82a956 1185 }
0da71265 1186 }
e7e09b49 1187 }else{
c92a30bb 1188 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 1189 if(is_h264){
36940eca 1190 if(!transform_bypass)
93f0c0a4 1191 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 1192 }else
881b5b80 1193 ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 1194 }
5f7f9719 1195 if(h->deblocking_filter)
93cc10fa 1196 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 1197 }else if(is_h264){
e7e09b49 1198 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
1199 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
1200 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 1201 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 1202 }
e7e09b49
LLL
1203
1204
1205 if(!IS_INTRA4x4(mb_type)){
bd91fee3 1206 if(is_h264){
ef9d1d15 1207 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
1208 if(transform_bypass){
1209 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
1210 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
1211 }else{
1212 for(i=0; i<16; i++){
1213 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 1214 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1215 }
2fd1f0e0
MN
1216 }
1217 }else{
1218 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 1219 }
49c084a7 1220 }else if(h->cbp&15){
2fd1f0e0 1221 if(transform_bypass){
0a8ca22f 1222 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 1223 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 1224 for(i=0; i<16; i+=di){
62bc966f 1225 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 1226 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1227 }
ef9d1d15 1228 }
2fd1f0e0
MN
1229 }else{
1230 if(IS_8x8DCT(mb_type)){
1231 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1232 }else{
1233 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1234 }
1235 }
4704097a 1236 }
e7e09b49
LLL
1237 }else{
1238 for(i=0; i<16; i++){
1239 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 1240 uint8_t * const ptr= dest_y + block_offset[i];
881b5b80 1241 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
e7e09b49 1242 }
4704097a 1243 }
0da71265
MN
1244 }
1245 }
0da71265 1246
49fb20cb 1247 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
1248 uint8_t *dest[2] = {dest_cb, dest_cr};
1249 if(transform_bypass){
96465b90
MN
1250 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
1251 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
1252 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
1253 }else{
c25ac15a 1254 idct_add = s->dsp.add_pixels4;
96465b90
MN
1255 for(i=16; i<16+8; i++){
1256 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1257 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1258 }
1259 }
ef9d1d15 1260 }else{
4691a77d
1261 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1262 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 1263 if(is_h264){
c25ac15a
MN
1264 idct_add = s->dsp.h264_idct_add;
1265 idct_dc_add = s->dsp.h264_idct_dc_add;
ac0623b2
MN
1266 for(i=16; i<16+8; i++){
1267 if(h->non_zero_count_cache[ scan8[i] ])
1268 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1269 else if(h->mb[i*16])
1270 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1271 }
aebb5d6d
MN
1272 }else{
1273 for(i=16; i<16+8; i++){
1274 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
1275 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
881b5b80 1276 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2);
aebb5d6d 1277 }
e7e09b49 1278 }
4704097a 1279 }
0da71265
MN
1280 }
1281 }
1282 }
c212fb0c
MN
1283 if(h->cbp || IS_INTRA(mb_type))
1284 s->dsp.clear_blocks(h->mb);
1285
c988f975 1286 if(h->deblocking_filter && 0) {
5f7f9719 1287 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
f432b43b 1288 fill_filter_caches(h, mb_type); //FIXME don't fill stuff which isn't used by filter_mb
5f7f9719
MN
1289 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
1290 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
bd91fee3 1291 if (!simple && FRAME_MBAFF) {
082cf971 1292 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 1293 } else {
082cf971 1294 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 1295 }
53c05b1e 1296 }
0da71265
MN
1297}
1298
0da71265 1299/**
bd91fee3
AS
1300 * Process a macroblock; this case avoids checks for expensive uncommon cases.
1301 */
1302static void hl_decode_mb_simple(H264Context *h){
1303 hl_decode_mb_internal(h, 1);
1304}
1305
1306/**
1307 * Process a macroblock; this handles edge cases, such as interlacing.
1308 */
1309static void av_noinline hl_decode_mb_complex(H264Context *h){
1310 hl_decode_mb_internal(h, 0);
1311}
1312
903d58f6 1313void ff_h264_hl_decode_mb(H264Context *h){
bd91fee3 1314 MpegEncContext * const s = &h->s;
64514ee8 1315 const int mb_xy= h->mb_xy;
bd91fee3 1316 const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb 1317 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 1318
bd91fee3
AS
1319 if (is_complex)
1320 hl_decode_mb_complex(h);
1321 else hl_decode_mb_simple(h);
1322}
1323
0da71265
MN
1324static int pred_weight_table(H264Context *h){
1325 MpegEncContext * const s = &h->s;
1326 int list, i;
9f2d1b4f 1327 int luma_def, chroma_def;
115329f1 1328
9f2d1b4f
LM
1329 h->use_weight= 0;
1330 h->use_weight_chroma= 0;
0da71265
MN
1331 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
1332 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
1333 luma_def = 1<<h->luma_log2_weight_denom;
1334 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
1335
1336 for(list=0; list<2; list++){
cb99c652
GB
1337 h->luma_weight_flag[list] = 0;
1338 h->chroma_weight_flag[list] = 0;
0da71265
MN
1339 for(i=0; i<h->ref_count[list]; i++){
1340 int luma_weight_flag, chroma_weight_flag;
115329f1 1341
0da71265
MN
1342 luma_weight_flag= get_bits1(&s->gb);
1343 if(luma_weight_flag){
1344 h->luma_weight[list][i]= get_se_golomb(&s->gb);
1345 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f 1346 if( h->luma_weight[list][i] != luma_def
cb99c652 1347 || h->luma_offset[list][i] != 0) {
9f2d1b4f 1348 h->use_weight= 1;
cb99c652
GB
1349 h->luma_weight_flag[list]= 1;
1350 }
9f2d1b4f
LM
1351 }else{
1352 h->luma_weight[list][i]= luma_def;
1353 h->luma_offset[list][i]= 0;
0da71265
MN
1354 }
1355
0af6967e 1356 if(CHROMA){
fef744d4
MN
1357 chroma_weight_flag= get_bits1(&s->gb);
1358 if(chroma_weight_flag){
1359 int j;
1360 for(j=0; j<2; j++){
1361 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
1362 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
1363 if( h->chroma_weight[list][i][j] != chroma_def
cb99c652 1364 || h->chroma_offset[list][i][j] != 0) {
fef744d4 1365 h->use_weight_chroma= 1;
cb99c652
GB
1366 h->chroma_weight_flag[list]= 1;
1367 }
fef744d4
MN
1368 }
1369 }else{
1370 int j;
1371 for(j=0; j<2; j++){
1372 h->chroma_weight[list][i][j]= chroma_def;
1373 h->chroma_offset[list][i][j]= 0;
1374 }
0da71265
MN
1375 }
1376 }
1377 }
9f5c1037 1378 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 1379 }
9f2d1b4f 1380 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
1381 return 0;
1382}
1383
9f2d1b4f
LM
1384static void implicit_weight_table(H264Context *h){
1385 MpegEncContext * const s = &h->s;
cb99c652 1386 int ref0, ref1, i;
9f2d1b4f
LM
1387 int cur_poc = s->current_picture_ptr->poc;
1388
ce09f927
GB
1389 for (i = 0; i < 2; i++) {
1390 h->luma_weight_flag[i] = 0;
1391 h->chroma_weight_flag[i] = 0;
1392 }
1393
9f2d1b4f
LM
1394 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
1395 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
1396 h->use_weight= 0;
1397 h->use_weight_chroma= 0;
1398 return;
1399 }
1400
1401 h->use_weight= 2;
1402 h->use_weight_chroma= 2;
1403 h->luma_log2_weight_denom= 5;
1404 h->chroma_log2_weight_denom= 5;
1405
9f2d1b4f
LM
1406 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
1407 int poc0 = h->ref_list[0][ref0].poc;
1408 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 1409 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 1410 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 1411 if(td){
f66e4f5f 1412 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 1413 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 1414 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
1415 if(dist_scale_factor < -64 || dist_scale_factor > 128)
1416 h->implicit_weight[ref0][ref1] = 32;
1417 else
1418 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
1419 }else
1420 h->implicit_weight[ref0][ref1] = 32;
1421 }
1422 }
1423}
1424
8fd57a66 1425/**
5175b937 1426 * instantaneous decoder refresh.
0da71265
MN
1427 */
1428static void idr(H264Context *h){
ea6f00c4 1429 ff_h264_remove_all_refs(h);
a149c1a5 1430 h->prev_frame_num= 0;
80f8e035
MN
1431 h->prev_frame_num_offset= 0;
1432 h->prev_poc_msb=
1433 h->prev_poc_lsb= 0;
0da71265
MN
1434}
1435
7c33ad19
LM
1436/* forget old pics after a seek */
1437static void flush_dpb(AVCodecContext *avctx){
1438 H264Context *h= avctx->priv_data;
1439 int i;
64b9d48f 1440 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
1441 if(h->delayed_pic[i])
1442 h->delayed_pic[i]->reference= 0;
7c33ad19 1443 h->delayed_pic[i]= NULL;
285b570f 1444 }
df8a7dff 1445 h->outputed_poc= INT_MIN;
b19d493f 1446 h->prev_interlaced_frame = 1;
7c33ad19 1447 idr(h);
ca159196
MR
1448 if(h->s.current_picture_ptr)
1449 h->s.current_picture_ptr->reference= 0;
12d96de3 1450 h->s.first_field= 0;
9c095463 1451 ff_h264_reset_sei(h);
e240f898 1452 ff_mpeg_flush(avctx);
7c33ad19
LM
1453}
1454
0da71265
MN
1455static int init_poc(H264Context *h){
1456 MpegEncContext * const s = &h->s;
1457 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
1458 int field_poc[2];
357282c6 1459 Picture *cur = s->current_picture_ptr;
0da71265 1460
b78a6baa 1461 h->frame_num_offset= h->prev_frame_num_offset;
5710b371 1462 if(h->frame_num < h->prev_frame_num)
b78a6baa 1463 h->frame_num_offset += max_frame_num;
0da71265
MN
1464
1465 if(h->sps.poc_type==0){
1466 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
1467
1468 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
1469 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
1470 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
1471 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
1472 else
1473 h->poc_msb = h->prev_poc_msb;
1474//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
115329f1 1475 field_poc[0] =
0da71265 1476 field_poc[1] = h->poc_msb + h->poc_lsb;
115329f1 1477 if(s->picture_structure == PICT_FRAME)
0da71265
MN
1478 field_poc[1] += h->delta_poc_bottom;
1479 }else if(h->sps.poc_type==1){
1480 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
1481 int i;
1482
1483 if(h->sps.poc_cycle_length != 0)
1484 abs_frame_num = h->frame_num_offset + h->frame_num;
1485 else
1486 abs_frame_num = 0;
1487
1488 if(h->nal_ref_idc==0 && abs_frame_num > 0)
1489 abs_frame_num--;
115329f1 1490
0da71265
MN
1491 expected_delta_per_poc_cycle = 0;
1492 for(i=0; i < h->sps.poc_cycle_length; i++)
1493 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
1494
1495 if(abs_frame_num > 0){
1496 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
1497 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
1498
1499 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
1500 for(i = 0; i <= frame_num_in_poc_cycle; i++)
1501 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
1502 } else
1503 expectedpoc = 0;
1504
115329f1 1505 if(h->nal_ref_idc == 0)
0da71265 1506 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
115329f1 1507
0da71265
MN
1508 field_poc[0] = expectedpoc + h->delta_poc[0];
1509 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
1510
1511 if(s->picture_structure == PICT_FRAME)
1512 field_poc[1] += h->delta_poc[1];
1513 }else{
b78a6baa 1514 int poc= 2*(h->frame_num_offset + h->frame_num);
5710b371 1515
b78a6baa
MN
1516 if(!h->nal_ref_idc)
1517 poc--;
5710b371 1518
0da71265
MN
1519 field_poc[0]= poc;
1520 field_poc[1]= poc;
1521 }
115329f1 1522
357282c6 1523 if(s->picture_structure != PICT_BOTTOM_FIELD)
0da71265 1524 s->current_picture_ptr->field_poc[0]= field_poc[0];
357282c6 1525 if(s->picture_structure != PICT_TOP_FIELD)
0da71265 1526 s->current_picture_ptr->field_poc[1]= field_poc[1];
357282c6 1527 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
0da71265
MN
1528
1529 return 0;
1530}
1531
b41c1db3
1532
1533/**
1534 * initialize scan tables
1535 */
1536static void init_scan_tables(H264Context *h){
1537 MpegEncContext * const s = &h->s;
1538 int i;
1539 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
1540 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
1541 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
1542 }else{
1543 for(i=0; i<16; i++){
1544#define T(x) (x>>2) | ((x<<2) & 0xF)
1545 h->zigzag_scan[i] = T(zigzag_scan[i]);
1546 h-> field_scan[i] = T( field_scan[i]);
1547#undef T
1548 }
1549 }
1550 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
45beb850 1551 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
b41c1db3
1552 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
1553 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
1554 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
1555 }else{
1556 for(i=0; i<64; i++){
1557#define T(x) (x>>3) | ((x&7)<<3)
45beb850 1558 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
b41c1db3
1559 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
1560 h->field_scan8x8[i] = T(field_scan8x8[i]);
1561 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
1562#undef T
1563 }
1564 }
1565 if(h->sps.transform_bypass){ //FIXME same ugly
1566 h->zigzag_scan_q0 = zigzag_scan;
45beb850 1567 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
b41c1db3
1568 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
1569 h->field_scan_q0 = field_scan;
1570 h->field_scan8x8_q0 = field_scan8x8;
1571 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
1572 }else{
1573 h->zigzag_scan_q0 = h->zigzag_scan;
1574 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
1575 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
1576 h->field_scan_q0 = h->field_scan;
1577 h->field_scan8x8_q0 = h->field_scan8x8;
1578 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
1579 }
1580}
afebe2f7 1581
256299d3
MN
1582static void field_end(H264Context *h){
1583 MpegEncContext * const s = &h->s;
1584 AVCodecContext * const avctx= s->avctx;
1585 s->mb_y= 0;
1586
1587 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
1588 s->current_picture_ptr->pict_type= s->pict_type;
1589
1590 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1591 ff_vdpau_h264_set_reference_frames(s);
1592
1593 if(!s->dropable) {
ea6f00c4 1594 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
256299d3
MN
1595 h->prev_poc_msb= h->poc_msb;
1596 h->prev_poc_lsb= h->poc_lsb;
1597 }
1598 h->prev_frame_num_offset= h->frame_num_offset;
1599 h->prev_frame_num= h->frame_num;
1600
1601 if (avctx->hwaccel) {
1602 if (avctx->hwaccel->end_frame(avctx) < 0)
1603 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
1604 }
1605
1606 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1607 ff_vdpau_h264_picture_complete(s);
1608
1609 /*
1610 * FIXME: Error handling code does not seem to support interlaced
1611 * when slices span multiple rows
1612 * The ff_er_add_slice calls don't work right for bottom
1613 * fields; they cause massive erroneous error concealing
1614 * Error marking covers both fields (top and bottom).
1615 * This causes a mismatched s->error_count
1616 * and a bad error table. Further, the error count goes to
1617 * INT_MAX when called for bottom field, because mb_y is
1618 * past end by one (callers fault) and resync_mb_y != 0
1619 * causes problems for the first MB line, too.
1620 */
1621 if (!FIELD_PICTURE)
1622 ff_er_frame_end(s);
1623
1624 MPV_frame_end(s);
d225a1e2
MN
1625
1626 h->current_slice=0;
256299d3
MN
1627}
1628
afebe2f7
1629/**
1630 * Replicates H264 "master" context to thread contexts.
1631 */
1632static void clone_slice(H264Context *dst, H264Context *src)
1633{
1634 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
1635 dst->s.current_picture_ptr = src->s.current_picture_ptr;
1636 dst->s.current_picture = src->s.current_picture;
1637 dst->s.linesize = src->s.linesize;
1638 dst->s.uvlinesize = src->s.uvlinesize;
12d96de3 1639 dst->s.first_field = src->s.first_field;
afebe2f7
1640
1641 dst->prev_poc_msb = src->prev_poc_msb;
1642 dst->prev_poc_lsb = src->prev_poc_lsb;
1643 dst->prev_frame_num_offset = src->prev_frame_num_offset;
1644 dst->prev_frame_num = src->prev_frame_num;
1645 dst->short_ref_count = src->short_ref_count;
1646
1647 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
1648 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
1649 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
1650 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
50c21814
1651
1652 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
1653 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
afebe2f7
1654}
1655
0da71265
MN
1656/**
1657 * decodes a slice header.
9c852bcf 1658 * This will also call MPV_common_init() and frame_start() as needed.
afebe2f7
1659 *
1660 * @param h h264context
1661 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
1662 *
d9526386 1663 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
0da71265 1664 */
afebe2f7 1665static int decode_slice_header(H264Context *h, H264Context *h0){
0da71265 1666 MpegEncContext * const s = &h->s;
12d96de3 1667 MpegEncContext * const s0 = &h0->s;
88e7a4d1 1668 unsigned int first_mb_in_slice;
ac658be5 1669 unsigned int pps_id;
0da71265 1670 int num_ref_idx_active_override_flag;
41f5c62f 1671 unsigned int slice_type, tmp, i, j;
0bf79634 1672 int default_ref_list_done = 0;
12d96de3 1673 int last_pic_structure;
0da71265 1674
2f944356 1675 s->dropable= h->nal_ref_idc == 0;
0da71265 1676
cf653d08
JD
1677 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
1678 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
1679 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
1680 }else{
1681 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
1682 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
1683 }
1684
0da71265
MN
1685 first_mb_in_slice= get_ue_golomb(&s->gb);
1686
d225a1e2
MN
1687 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
1688 if(h0->current_slice && FIELD_PICTURE){
1689 field_end(h);
1690 }
1691
afebe2f7 1692 h0->current_slice = 0;
12d96de3 1693 if (!s0->first_field)
f6e3c460 1694 s->current_picture_ptr= NULL;
66a4b2c1
MN
1695 }
1696
9963b332 1697 slice_type= get_ue_golomb_31(&s->gb);
0bf79634 1698 if(slice_type > 9){
9b879566 1699 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
5175b937 1700 return -1;
0da71265 1701 }
0bf79634
LLL
1702 if(slice_type > 4){
1703 slice_type -= 5;
0da71265
MN
1704 h->slice_type_fixed=1;
1705 }else
1706 h->slice_type_fixed=0;
115329f1 1707
ee2a957f 1708 slice_type= golomb_to_pict_type[ slice_type ];
9701840b 1709 if (slice_type == FF_I_TYPE
afebe2f7 1710 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
0bf79634
LLL
1711 default_ref_list_done = 1;
1712 }
1713 h->slice_type= slice_type;
e3e6f18f 1714 h->slice_type_nos= slice_type & 3;
0bf79634 1715
1412060e 1716 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
9701840b 1717 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
33e00731
JD
1718 av_log(h->s.avctx, AV_LOG_ERROR,
1719 "B picture before any references, skipping\n");
1720 return -1;
1721 }
115329f1 1722
0da71265 1723 pps_id= get_ue_golomb(&s->gb);
ac658be5 1724 if(pps_id>=MAX_PPS_COUNT){
9b879566 1725 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
0da71265
MN
1726 return -1;
1727 }
afebe2f7 1728 if(!h0->pps_buffers[pps_id]) {
a0f80050 1729 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
8b92b792
MN
1730 return -1;
1731 }
afebe2f7 1732 h->pps= *h0->pps_buffers[pps_id];
8b92b792 1733
afebe2f7 1734 if(!h0->sps_buffers[h->pps.sps_id]) {
a0f80050 1735 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
8b92b792
MN
1736 return -1;
1737 }
afebe2f7 1738 h->sps = *h0->sps_buffers[h->pps.sps_id];
239ea04c 1739
50c21814 1740 if(h == h0 && h->dequant_coeff_pps != pps_id){
50eaa857 1741 h->dequant_coeff_pps = pps_id;
239ea04c
LM
1742 init_dequant_tables(h);
1743 }
115329f1 1744
0da71265 1745 s->mb_width= h->sps.mb_width;
6867a90b 1746 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
115329f1 1747
bf4665ee
DM
1748 h->b_stride= s->mb_width*4;
1749 h->b8_stride= s->mb_width*2;
0da71265 1750
faf3dfb9 1751 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
0da71265 1752 if(h->sps.frame_mbs_only_flag)
faf3dfb9 1753 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
0da71265 1754 else
faf3dfb9 1755 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
115329f1
DB
1756
1757 if (s->context_initialized
5ff85f1d 1758 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
afebe2f7
1759 if(h != h0)
1760 return -1; // width / height changed during parallelized decoding
0da71265 1761 free_tables(h);
ff7f75e1 1762 flush_dpb(s->avctx);
0da71265
MN
1763 MPV_common_end(s);
1764 }
1765 if (!s->context_initialized) {
afebe2f7
1766 if(h != h0)
1767 return -1; // we cant (re-)initialize context during parallel decoding
f3bdc3da
RD
1768
1769 avcodec_set_dimensions(s->avctx, s->width, s->height);
1770 s->avctx->sample_aspect_ratio= h->sps.sar;
1771 if(!s->avctx->sample_aspect_ratio.den)
1772 s->avctx->sample_aspect_ratio.den = 1;
1773
c4dffe7e
DC
1774 if(h->sps.video_signal_type_present_flag){
1775 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
1776 if(h->sps.colour_description_present_flag){
1777 s->avctx->color_primaries = h->sps.color_primaries;
1778 s->avctx->color_trc = h->sps.color_trc;
1779 s->avctx->colorspace = h->sps.colorspace;
1780 }
1781 }
1782
f3bdc3da 1783 if(h->sps.timing_info_present_flag){
3102d180 1784 int64_t den= h->sps.time_scale;
f3bdc3da 1785 if(h->x264_build > 0 && h->x264_build < 44)
3102d180 1786 den *= 2;
f3bdc3da 1787 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3102d180 1788 h->sps.num_units_in_tick, den, 1<<30);
f3bdc3da
RD
1789 }
1790 s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts);
1791 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
1792
0da71265
MN
1793 if (MPV_common_init(s) < 0)
1794 return -1;
12d96de3 1795 s->first_field = 0;
b19d493f 1796 h->prev_interlaced_frame = 1;
115329f1 1797
b41c1db3 1798 init_scan_tables(h);
903d58f6 1799 ff_h264_alloc_tables(h);
0da71265 1800
afebe2f7
1801 for(i = 1; i < s->avctx->thread_count; i++) {
1802 H264Context *c;
1803 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
79db7ac6 1804 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
afebe2f7
1805 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
1806 c->sps = h->sps;
1807 c->pps = h->pps;
1808 init_scan_tables(c);
1809 clone_tables(c, h);
1810 }
1811
1812 for(i = 0; i < s->avctx->thread_count; i++)
1813 if(context_init(h->thread_context[i]) < 0)
1814 return -1;
0da71265
MN
1815 }
1816
0da71265
MN
1817 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
1818
5d18eaad 1819 h->mb_mbaff = 0;
6ba71fc4 1820 h->mb_aff_frame = 0;
12d96de3 1821 last_pic_structure = s0->picture_structure;
0da71265
MN
1822 if(h->sps.frame_mbs_only_flag){
1823 s->picture_structure= PICT_FRAME;
1824 }else{
6ba71fc4 1825 if(get_bits1(&s->gb)) { //field_pic_flag
0da71265 1826 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
6ba71fc4 1827 } else {
0da71265 1828 s->picture_structure= PICT_FRAME;
6ba71fc4 1829 h->mb_aff_frame = h->sps.mb_aff;
6867a90b 1830 }
0da71265 1831 }
44e9dcf1 1832 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
2ddcf84b
JD
1833
1834 if(h0->current_slice == 0){
26b86e47
MN
1835 while(h->frame_num != h->prev_frame_num &&
1836 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
1837 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
903d58f6 1838 if (ff_h264_frame_start(h) < 0)
66e6038c 1839 return -1;
26b86e47
MN
1840 h->prev_frame_num++;
1841 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
1842 s->current_picture_ptr->frame_num= h->prev_frame_num;
ea6f00c4 1843 ff_h264_execute_ref_pic_marking(h, NULL, 0);
26b86e47
MN
1844 }
1845
12d96de3
JD
1846 /* See if we have a decoded first field looking for a pair... */
1847 if (s0->first_field) {
1848 assert(s0->current_picture_ptr);
1849 assert(s0->current_picture_ptr->data[0]);
1850 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
1851
1852 /* figure out if we have a complementary field pair */
1853 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
1854 /*
1855 * Previous field is unmatched. Don't display it, but let it
1856 * remain for reference if marked as such.
1857 */
1858 s0->current_picture_ptr = NULL;
1859 s0->first_field = FIELD_PICTURE;
1860
1861 } else {
1862 if (h->nal_ref_idc &&
1863 s0->current_picture_ptr->reference &&
1864 s0->current_picture_ptr->frame_num != h->frame_num) {
1865 /*
1866 * This and previous field were reference, but had
1867 * different frame_nums. Consider this field first in
1868 * pair. Throw away previous field except for reference
1869 * purposes.
1870 */
1871 s0->first_field = 1;
1872 s0->current_picture_ptr = NULL;
1873
1874 } else {
1875 /* Second field in complementary pair */
1876 s0->first_field = 0;
1877 }
1878 }
1879
1880 } else {
1881 /* Frame or first field in a potentially complementary pair */
1882 assert(!s0->current_picture_ptr);
1883 s0->first_field = FIELD_PICTURE;
1884 }
1885
903d58f6 1886 if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) {
12d96de3 1887 s0->first_field = 0;
2ddcf84b 1888 return -1;
12d96de3 1889 }
2ddcf84b
JD
1890 }
1891 if(h != h0)
1892 clone_slice(h, h0);
1893
1894 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
1895
88e7a4d1 1896 assert(s->mb_num == s->mb_width * s->mb_height);
f3e53d9f 1897 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
88e7a4d1
MN
1898 first_mb_in_slice >= s->mb_num){
1899 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
6b53b87e
MN
1900 return -1;
1901 }
88e7a4d1 1902 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
f3e53d9f
JD
1903 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
1904 if (s->picture_structure == PICT_BOTTOM_FIELD)
1905 s->resync_mb_y = s->mb_y = s->mb_y + 1;
88e7a4d1 1906 assert(s->mb_y < s->mb_height);
115329f1 1907
0da71265
MN
1908 if(s->picture_structure==PICT_FRAME){
1909 h->curr_pic_num= h->frame_num;
1910 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
1911 }else{
f57e2af6 1912 h->curr_pic_num= 2*h->frame_num + 1;
0da71265
MN
1913 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
1914 }
115329f1 1915
0da71265 1916 if(h->nal_unit_type == NAL_IDR_SLICE){
1df1df0b 1917 get_ue_golomb(&s->gb); /* idr_pic_id */
0da71265 1918 }
115329f1 1919
0da71265
MN
1920 if(h->sps.poc_type==0){
1921 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
115329f1 1922
0da71265
MN
1923 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
1924 h->delta_poc_bottom= get_se_golomb(&s->gb);
1925 }
1926 }
115329f1 1927
0da71265
MN
1928 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
1929 h->delta_poc[0]= get_se_golomb(&s->gb);
115329f1 1930
0da71265
MN
1931 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
1932 h->delta_poc[1]= get_se_golomb(&s->gb);
1933 }
115329f1 1934
0da71265 1935 init_poc(h);
115329f1 1936
0da71265
MN
1937 if(h->pps.redundant_pic_cnt_present){
1938 h->redundant_pic_count= get_ue_golomb(&s->gb);
1939 }
1940
1412060e 1941 //set defaults, might be overridden a few lines later
0da71265
MN
1942 h->ref_count[0]= h->pps.ref_count[0];
1943 h->ref_count[1]= h->pps.ref_count[1];
1944
e3e6f18f 1945 if(h->slice_type_nos != FF_I_TYPE){
9f5c1037 1946 if(h->slice_type_nos == FF_B_TYPE){
0da71265
MN
1947 h->direct_spatial_mv_pred= get_bits1(&s->gb);
1948 }
1949 num_ref_idx_active_override_flag= get_bits1(&s->gb);
115329f1 1950
0da71265
MN
1951 if(num_ref_idx_active_override_flag){
1952 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
9f5c1037 1953 if(h->slice_type_nos==FF_B_TYPE)
0da71265
MN
1954 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
1955
187696fa 1956 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
9b879566 1957 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
88e7a4d1 1958 h->ref_count[0]= h->ref_count[1]= 1;
0da71265
MN
1959 return -1;
1960 }
1961 }
9f5c1037 1962 if(h->slice_type_nos == FF_B_TYPE)
187696fa
MN
1963 h->list_count= 2;
1964 else
1965 h->list_count= 1;
1966 }else
1967 h->list_count= 0;
0da71265 1968
0bf79634 1969 if(!default_ref_list_done){
ea6f00c4 1970 ff_h264_fill_default_ref_list(h);
0da71265
MN
1971 }
1972
ea6f00c4 1973 if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0)
806bb93f 1974 return -1;
0da71265 1975
07dff5c7
MN
1976 if(h->slice_type_nos!=FF_I_TYPE){
1977 s->last_picture_ptr= &h->ref_list[0][0];
8d2fc163 1978 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
07dff5c7
MN
1979 }
1980 if(h->slice_type_nos==FF_B_TYPE){
1981 s->next_picture_ptr= &h->ref_list[1][0];
8d2fc163 1982 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
07dff5c7
MN
1983 }
1984
932f396f 1985 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
9f5c1037 1986 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
0da71265 1987 pred_weight_table(h);
9f5c1037 1988 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
9f2d1b4f 1989 implicit_weight_table(h);
cb99c652 1990 else {
9f2d1b4f 1991 h->use_weight = 0;
cb99c652
GB
1992 for (i = 0; i < 2; i++) {
1993 h->luma_weight_flag[i] = 0;
1994 h->chroma_weight_flag[i] = 0;
1995 }
1996 }
115329f1 1997
2ddcf84b 1998 if(h->nal_ref_idc)
ea6f00c4 1999 ff_h264_decode_ref_pic_marking(h0, &s->gb);
0da71265 2000
5d18eaad 2001 if(FRAME_MBAFF)
ea6f00c4 2002 ff_h264_fill_mbaff_ref_list(h);
5d18eaad 2003
8f56e219 2004 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
943f69a6
MN
2005 ff_h264_direct_dist_scale_factor(h);
2006 ff_h264_direct_ref_list_init(h);
8f56e219 2007
e3e6f18f 2008 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
9963b332 2009 tmp = get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2010 if(tmp > 2){
2011 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
2012 return -1;
2013 }
2014 h->cabac_init_idc= tmp;
2015 }
e5017ab8
LA
2016
2017 h->last_qscale_diff = 0;
88e7a4d1
MN
2018 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
2019 if(tmp>51){
2020 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3ebc7e04
MN
2021 return -1;
2022 }
88e7a4d1 2023 s->qscale= tmp;
4691a77d
2024 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2025 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
0da71265 2026 //FIXME qscale / qp ... stuff
9701840b 2027 if(h->slice_type == FF_SP_TYPE){
1df1df0b 2028 get_bits1(&s->gb); /* sp_for_switch_flag */
0da71265 2029 }
9701840b 2030 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
1df1df0b 2031 get_se_golomb(&s->gb); /* slice_qs_delta */
0da71265
MN
2032 }
2033
53c05b1e 2034 h->deblocking_filter = 1;
0c32e19d
MN
2035 h->slice_alpha_c0_offset = 52;
2036 h->slice_beta_offset = 52;
0da71265 2037 if( h->pps.deblocking_filter_parameters_present ) {
9963b332 2038 tmp= get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2039 if(tmp > 2){
2040 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
2041 return -1;
2042 }
2043 h->deblocking_filter= tmp;
115329f1 2044 if(h->deblocking_filter < 2)
53c05b1e
MN
2045 h->deblocking_filter^= 1; // 1<->0
2046
2047 if( h->deblocking_filter ) {
0c32e19d
MN
2048 h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
2049 h->slice_beta_offset += get_se_golomb(&s->gb) << 1;
2050 if( h->slice_alpha_c0_offset > 104U
2051 || h->slice_beta_offset > 104U){
2052 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
2053 return -1;
2054 }
0da71265 2055 }
980a82b7 2056 }
afebe2f7 2057
61858a76 2058 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4b30289e 2059 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
9f5c1037 2060 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
61858a76
RD
2061 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
2062 h->deblocking_filter= 0;
2063
afebe2f7 2064 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
ec970c21
2065 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
2066 /* Cheat slightly for speed:
5d81d641 2067 Do not bother to deblock across slices. */
ec970c21
2068 h->deblocking_filter = 2;
2069 } else {
7ae94d52
2070 h0->max_contexts = 1;
2071 if(!h0->single_decode_warning) {
2072 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
2073 h0->single_decode_warning = 1;
2074 }
2075 if(h != h0)
2076 return 1; // deblocking switched inside frame
ec970c21 2077 }
afebe2f7 2078 }
0c32e19d 2079 h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
afebe2f7 2080
0da71265
MN
2081#if 0 //FMO
2082 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
2083 slice_group_change_cycle= get_bits(&s->gb, ?);
2084#endif
2085
afebe2f7
2086 h0->last_slice_type = slice_type;
2087 h->slice_num = ++h0->current_slice;
b735aeea
MN
2088 if(h->slice_num >= MAX_SLICES){
2089 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
2090 }
5175b937 2091
c32867b5 2092 for(j=0; j<2; j++){
6d7e6b26 2093 int id_list[16];
b735aeea 2094 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
6d7e6b26
MN
2095 for(i=0; i<16; i++){
2096 id_list[i]= 60;
2097 if(h->ref_list[j][i].data[0]){
2098 int k;
2099 uint8_t *base= h->ref_list[j][i].base[0];
2100 for(k=0; k<h->short_ref_count; k++)
2101 if(h->short_ref[k]->base[0] == base){
2102 id_list[i]= k;
2103 break;
2104 }
2105 for(k=0; k<h->long_ref_count; k++)
2106 if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
2107 id_list[i]= h->short_ref_count + k;
2108 break;
2109 }
2110 }
2111 }
2112
c32867b5
MN
2113 ref2frm[0]=
2114 ref2frm[1]= -1;
d50cdd82 2115 for(i=0; i<16; i++)
6d7e6b26 2116 ref2frm[i+2]= 4*id_list[i]
c32867b5 2117 +(h->ref_list[j][i].reference&3);
d50cdd82
MN
2118 ref2frm[18+0]=
2119 ref2frm[18+1]= -1;
2120 for(i=16; i<48; i++)
6d7e6b26 2121 ref2frm[i+4]= 4*id_list[(i-16)>>1]
d50cdd82 2122 +(h->ref_list[j][i].reference&3);
c32867b5
MN
2123 }
2124
5d18eaad 2125 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
8a11a969 2126 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
5d18eaad 2127
802e9146
MN
2128 s->avctx->refs= h->sps.ref_frame_count;
2129
0da71265 2130 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
49573a87 2131 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
6867a90b
LLL
2132 h->slice_num,
2133 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
115329f1 2134 first_mb_in_slice,
49573a87 2135 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
0da71265
MN
2136 pps_id, h->frame_num,
2137 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
2138 h->ref_count[0], h->ref_count[1],
2139 s->qscale,
0c32e19d 2140 h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
9f2d1b4f 2141 h->use_weight,
4806b922
MN
2142 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
2143 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
0da71265
MN
2144 );
2145 }
2146
2147 return 0;
2148}
2149
0dc343d4 2150int ff_h264_get_slice_type(const H264Context *h)
75dd6938
LA
2151{
2152 switch (h->slice_type) {
2153 case FF_P_TYPE: return 0;
2154 case FF_B_TYPE: return 1;
2155 case FF_I_TYPE: return 2;
2156 case FF_SP_TYPE: return 3;
2157 case FF_SI_TYPE: return 4;
2158 default: return -1;
2159 }
2160}
2161
c988f975
MN
2162static void loop_filter(H264Context *h){
2163 MpegEncContext * const s = &h->s;
2164 uint8_t *dest_y, *dest_cb, *dest_cr;
2165 int linesize, uvlinesize, mb_x, mb_y;
2166 const int end_mb_y= s->mb_y + FRAME_MBAFF;
2167 const int old_slice_type= h->slice_type;
2168
2169 if(h->deblocking_filter) {
2170 for(mb_x= 0; mb_x<s->mb_width; mb_x++){
2171 for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
77d40dce 2172 int list, mb_xy, mb_type;
c988f975
MN
2173 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
2174 h->slice_num= h->slice_table[mb_xy];
2175 mb_type= s->current_picture.mb_type[mb_xy];
2176 h->list_count= h->list_counts[mb_xy];
c988f975
MN
2177
2178 if(FRAME_MBAFF)
2179 h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
2180
c988f975
MN
2181 s->mb_x= mb_x;
2182 s->mb_y= mb_y;
2183 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2184 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2185 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2186 //FIXME simplify above
2187
2188 if (MB_FIELD) {
2189 linesize = h->mb_linesize = s->linesize * 2;
2190 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2191 if(mb_y&1){ //FIXME move out of this function?
2192 dest_y -= s->linesize*15;
2193 dest_cb-= s->uvlinesize*7;
2194 dest_cr-= s->uvlinesize*7;
2195 }
2196 } else {
2197 linesize = h->mb_linesize = s->linesize;
2198 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2199 }
77d40dce 2200 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
aaa995d7 2201 if(fill_filter_caches(h, mb_type))
44a5e7b6 2202 continue;
c988f975
MN
2203 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2204 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2205
77d40dce 2206 if (FRAME_MBAFF) {
c988f975
MN
2207 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2208 } else {
2209 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2210 }
2211 }
2212 }
2213 }
2214 h->slice_type= old_slice_type;
2215 s->mb_x= 0;
2216 s->mb_y= end_mb_y - FRAME_MBAFF;
2217}
2218
3a84713a
RS
2219static int decode_slice(struct AVCodecContext *avctx, void *arg){
2220 H264Context *h = *(void**)arg;
0da71265
MN
2221 MpegEncContext * const s = &h->s;
2222 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
2223
2224 s->mb_skip_run= -1;
0da71265 2225
89db0bae 2226 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
5317c95b 2227 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
89db0bae 2228
e5017ab8 2229 if( h->pps.cabac ) {
e5017ab8
LA
2230 /* realign */
2231 align_get_bits( &s->gb );
2232
2233 /* init cabac */
d61c4e73 2234 ff_init_cabac_states( &h->cabac);
e5017ab8
LA
2235 ff_init_cabac_decoder( &h->cabac,
2236 s->gb.buffer + get_bits_count(&s->gb)/8,
6e44ba15 2237 (get_bits_left(&s->gb) + 7)/8);
cc51b282
MN
2238
2239 ff_h264_init_cabac_states(h);
95c26348 2240
e5017ab8 2241 for(;;){
851ded89 2242//START_TIMER
cc51b282 2243 int ret = ff_h264_decode_mb_cabac(h);
6867a90b 2244 int eos;
851ded89 2245//STOP_TIMER("decode_mb_cabac")
0da71265 2246
903d58f6 2247 if(ret>=0) ff_h264_hl_decode_mb(h);
0da71265 2248
5d18eaad 2249 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8
LA
2250 s->mb_y++;
2251
cc51b282 2252 ret = ff_h264_decode_mb_cabac(h);
e5017ab8 2253
903d58f6 2254 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2255 s->mb_y--;
2256 }
6867a90b 2257 eos = get_cabac_terminate( &h->cabac );
e5017ab8 2258
5659b509 2259 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
706da4af 2260 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
e5017ab8
LA
2261 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2262 return -1;
2263 }
2264
2265 if( ++s->mb_x >= s->mb_width ) {
2266 s->mb_x = 0;
c988f975 2267 loop_filter(h);
e5017ab8 2268 ff_draw_horiz_band(s, 16*s->mb_y, 16);
5175b937 2269 ++s->mb_y;
f3e53d9f 2270 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b
LLL
2271 ++s->mb_y;
2272 }
0da71265 2273 }
0da71265 2274
e5017ab8 2275 if( eos || s->mb_y >= s->mb_height ) {
a9c9a240 2276 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8 2277 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
0da71265 2278 return 0;
e5017ab8 2279 }
e5017ab8
LA
2280 }
2281
2282 } else {
2283 for(;;){
e1e94902 2284 int ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2285
903d58f6 2286 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8 2287
5d18eaad 2288 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8 2289 s->mb_y++;
e1e94902 2290 ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2291
903d58f6 2292 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2293 s->mb_y--;
2294 }
2295
2296 if(ret<0){
2297 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2298 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2299
2300 return -1;
2301 }
e5017ab8
LA
2302
2303 if(++s->mb_x >= s->mb_width){
2304 s->mb_x=0;
c988f975 2305 loop_filter(h);
e5017ab8 2306 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6867a90b 2307 ++s->mb_y;
f3e53d9f 2308 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b
LLL
2309 ++s->mb_y;
2310 }
2311 if(s->mb_y >= s->mb_height){
a9c9a240 2312 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2313
2314 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
2315 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2316
2317 return 0;
2318 }else{
2319 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2320
2321 return -1;
2322 }
2323 }
2324 }
2325
2326 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
a9c9a240 2327 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2328 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
2329 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2330
2331 return 0;
2332 }else{
2333 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2334
2335 return -1;
2336 }
2337 }
0da71265
MN
2338 }
2339 }
e5017ab8 2340
0da71265
MN
2341#if 0
2342 for(;s->mb_y < s->mb_height; s->mb_y++){
2343 for(;s->mb_x < s->mb_width; s->mb_x++){
2344 int ret= decode_mb(h);
115329f1 2345
903d58f6 2346 ff_h264_hl_decode_mb(h);
0da71265
MN
2347
2348 if(ret<0){
267f7edc 2349 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2350 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2351
2352 return -1;
2353 }
115329f1 2354
0da71265
MN
2355 if(++s->mb_x >= s->mb_width){
2356 s->mb_x=0;
2357 if(++s->mb_y >= s->mb_height){
2358 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2359 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2360
2361 return 0;
2362 }else{
2363 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2364
2365 return -1;
2366 }
2367 }
2368 }
115329f1 2369
0da71265
MN
2370 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
2371 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2372 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2373
2374 return 0;
2375 }else{
2376 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2377
2378 return -1;
2379 }
2380 }
2381 }
2382 s->mb_x=0;
2383 ff_draw_horiz_band(s, 16*s->mb_y, 16);
2384 }
2385#endif
2386 return -1; //not reached
2387}
2388
afebe2f7
2389/**
2390 * Call decode_slice() for each context.
2391 *
2392 * @param h h264 master context
2393 * @param context_count number of contexts to execute
2394 */
2395static void execute_decode_slices(H264Context *h, int context_count){
2396 MpegEncContext * const s = &h->s;
2397 AVCodecContext * const avctx= s->avctx;
2398 H264Context *hx;
2399 int i;
2400
40e5d31b
GB
2401 if (s->avctx->hwaccel)
2402 return;
0d3d172f 2403 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
369122dd 2404 return;
afebe2f7 2405 if(context_count == 1) {
74e8b78b 2406 decode_slice(avctx, &h);
afebe2f7
2407 } else {
2408 for(i = 1; i < context_count; i++) {
2409 hx = h->thread_context[i];
047599a4 2410 hx->s.error_recognition = avctx->error_recognition;
afebe2f7
2411 hx->s.error_count = 0;
2412 }
2413
2414 avctx->execute(avctx, (void *)decode_slice,
01418506 2415 h->thread_context, NULL, context_count, sizeof(void*));
afebe2f7
2416
2417 /* pull back stuff from slices to master context */
2418 hx = h->thread_context[context_count - 1];
2419 s->mb_x = hx->s.mb_x;
2420 s->mb_y = hx->s.mb_y;
12d96de3
JD
2421 s->dropable = hx->s.dropable;
2422 s->picture_structure = hx->s.picture_structure;
afebe2f7
2423 for(i = 1; i < context_count; i++)
2424 h->s.error_count += h->thread_context[i]->s.error_count;
2425 }
2426}
2427
2428
30317501 2429static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
0da71265
MN
2430 MpegEncContext * const s = &h->s;
2431 AVCodecContext * const avctx= s->avctx;
2432 int buf_index=0;
afebe2f7
2433 H264Context *hx; ///< thread context
2434 int context_count = 0;
74b14aac 2435 int next_avc= h->is_avc ? 0 : buf_size;
afebe2f7
2436
2437 h->max_contexts = avctx->thread_count;
377ec888 2438#if 0
eb60dddc 2439 int i;
96b6ace2
MN
2440 for(i=0; i<50; i++){
2441 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
0da71265
MN
2442 }
2443#endif
66a4b2c1 2444 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
afebe2f7 2445 h->current_slice = 0;
12d96de3 2446 if (!s->first_field)
f6e3c460 2447 s->current_picture_ptr= NULL;
9c095463 2448 ff_h264_reset_sei(h);
66a4b2c1
MN
2449 }
2450
0da71265
MN
2451 for(;;){
2452 int consumed;
2453 int dst_length;
2454 int bit_length;
30317501 2455 const uint8_t *ptr;
4770b1b4 2456 int i, nalsize = 0;
afebe2f7 2457 int err;
115329f1 2458
74b14aac 2459 if(buf_index >= next_avc) {
1c48415b
2460 if(buf_index >= buf_size) break;
2461 nalsize = 0;
2462 for(i = 0; i < h->nal_length_size; i++)
2463 nalsize = (nalsize << 8) | buf[buf_index++];
8d8409ca 2464 if(nalsize <= 1 || nalsize > buf_size - buf_index){
1c48415b
2465 if(nalsize == 1){
2466 buf_index++;
2467 continue;
2468 }else{
2469 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
2470 break;
2471 }
2472 }
74b14aac 2473 next_avc= buf_index + nalsize;
1c48415b
2474 } else {
2475 // start code prefix search
52255d17 2476 for(; buf_index + 3 < next_avc; buf_index++){
1c48415b
2477 // This should always succeed in the first iteration.
2478 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
2479 break;
8b031359 2480 }
115329f1 2481
1c48415b 2482 if(buf_index+3 >= buf_size) break;
115329f1 2483
1c48415b 2484 buf_index+=3;
52255d17 2485 if(buf_index >= next_avc) continue;
1c48415b 2486 }
115329f1 2487
afebe2f7
2488 hx = h->thread_context[context_count];
2489
74b14aac 2490 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
ff82e429 2491 if (ptr==NULL || dst_length < 0){
ac658be5
FOL
2492 return -1;
2493 }
6ac9696e 2494 while(ptr[dst_length - 1] == 0 && dst_length > 0)
c4da83fb 2495 dst_length--;
1790a5e9 2496 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
0da71265
MN
2497
2498 if(s->avctx->debug&FF_DEBUG_STARTCODE){
afebe2f7 2499 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
0da71265 2500 }
115329f1 2501
74b14aac 2502 if (h->is_avc && (nalsize != consumed) && nalsize){
e262365d 2503 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
9d2cc8c1 2504 }
4770b1b4 2505
0da71265
MN
2506 buf_index += consumed;
2507
755bfeab 2508 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
8c3eba7c 2509 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
0da71265 2510 continue;
115329f1 2511
afebe2f7
2512 again:
2513 err = 0;
2514 switch(hx->nal_unit_type){
0da71265 2515 case NAL_IDR_SLICE:
afebe2f7
2516 if (h->nal_unit_type != NAL_IDR_SLICE) {
2517 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
2518 return -1;
2519 }
3b66c4c5 2520 idr(h); //FIXME ensure we don't loose some frames if there is reordering
0da71265 2521 case NAL_SLICE:
afebe2f7
2522 init_get_bits(&hx->s.gb, ptr, bit_length);
2523 hx->intra_gb_ptr=
2524 hx->inter_gb_ptr= &hx->s.gb;
2525 hx->s.data_partitioning = 0;
2526
2527 if((err = decode_slice_header(hx, h)))
2528 break;
2529
dd0cd3d2
RC
2530 avctx->profile = hx->sps.profile_idc;
2531 avctx->level = hx->sps.level_idc;
2532
6026a096
GB
2533 if (s->avctx->hwaccel && h->current_slice == 1) {
2534 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
2535 return -1;
2536 }
2537
37a558fe
IS
2538 s->current_picture_ptr->key_frame |=
2539 (hx->nal_unit_type == NAL_IDR_SLICE) ||
2540 (h->sei_recovery_frame_cnt >= 0);
afebe2f7
2541 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
2542 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2543 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2544 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
369122dd 2545 && avctx->skip_frame < AVDISCARD_ALL){
d404b3ed
MN
2546 if(avctx->hwaccel) {
2547 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
2548 return -1;
2549 }else
0d3d172f 2550 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
369122dd 2551 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
c639fc72
CEH
2552 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
2553 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
369122dd 2554 }else
f2c214a1 2555 context_count++;
369122dd 2556 }
0da71265
MN
2557 break;
2558 case NAL_DPA:
afebe2f7
2559 init_get_bits(&hx->s.gb, ptr, bit_length);
2560 hx->intra_gb_ptr=
2561 hx->inter_gb_ptr= NULL;
0410ee8f
AS
2562
2563 if ((err = decode_slice_header(hx, h)) < 0)
2564 break;
2565
dd0cd3d2
RC
2566 avctx->profile = hx->sps.profile_idc;
2567 avctx->level = hx->sps.level_idc;
2568
afebe2f7 2569 hx->s.data_partitioning = 1;
115329f1 2570
0da71265
MN
2571 break;
2572 case NAL_DPB:
afebe2f7
2573 init_get_bits(&hx->intra_gb, ptr, bit_length);
2574 hx->intra_gb_ptr= &hx->intra_gb;
0da71265
MN
2575 break;
2576 case NAL_DPC:
afebe2f7
2577 init_get_bits(&hx->inter_gb, ptr, bit_length);
2578 hx->inter_gb_ptr= &hx->inter_gb;
8b92b792 2579
afebe2f7 2580 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
b18e5c03 2581 && s->context_initialized
e0111b32 2582 && s->hurry_up < 5
afebe2f7 2583 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2584 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2585 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
e0111b32 2586 && avctx->skip_frame < AVDISCARD_ALL)
afebe2f7 2587 context_count++;
0da71265
MN
2588 break;
2589 case NAL_SEI:
cdd10689 2590 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2591 ff_h264_decode_sei(h);
0da71265
MN
2592 break;
2593 case NAL_SPS:
2594 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2595 ff_h264_decode_seq_parameter_set(h);
115329f1 2596
0da71265
MN
2597 if(s->flags& CODEC_FLAG_LOW_DELAY)
2598 s->low_delay=1;
115329f1 2599
a18030bb
LM
2600 if(avctx->has_b_frames < 2)
2601 avctx->has_b_frames= !s->low_delay;
0da71265
MN
2602 break;
2603 case NAL_PPS:
2604 init_get_bits(&s->gb, ptr, bit_length);
115329f1 2605
1790a5e9 2606 ff_h264_decode_picture_parameter_set(h, bit_length);
0da71265
MN
2607
2608 break;
ab470fa7
LM
2609 case NAL_AUD:
2610 case NAL_END_SEQUENCE:
2611 case NAL_END_STREAM:
2612 case NAL_FILLER_DATA:
2613 case NAL_SPS_EXT:
2614 case NAL_AUXILIARY_SLICE:
0da71265 2615 break;
bb270c08 2616 default:
4ad04da2 2617 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
115329f1 2618 }
115329f1 2619
afebe2f7
2620 if(context_count == h->max_contexts) {
2621 execute_decode_slices(h, context_count);
2622 context_count = 0;
2623 }
2624
2625 if (err < 0)
2626 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
2627 else if(err == 1) {
2628 /* Slice could not be decoded in parallel mode, copy down
2629 * NAL unit stuff to context 0 and restart. Note that
1412060e 2630 * rbsp_buffer is not transferred, but since we no longer
afebe2f7
2631 * run in parallel mode this should not be an issue. */
2632 h->nal_unit_type = hx->nal_unit_type;
2633 h->nal_ref_idc = hx->nal_ref_idc;
2634 hx = h;
2635 goto again;
2636 }
2637 }
2638 if(context_count)
2639 execute_decode_slices(h, context_count);
0da71265
MN
2640 return buf_index;
2641}
2642
2643/**
3b66c4c5 2644 * returns the number of bytes consumed for building the current frame
0da71265
MN
2645 */
2646static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
755bfeab 2647 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
0da71265
MN
2648 if(pos+10>buf_size) pos=buf_size; // oops ;)
2649
2650 return pos;
0da71265
MN
2651}
2652
115329f1 2653static int decode_frame(AVCodecContext *avctx,
0da71265 2654 void *data, int *data_size,
7a00bbad 2655 AVPacket *avpkt)
0da71265 2656{
7a00bbad
TB
2657 const uint8_t *buf = avpkt->data;
2658 int buf_size = avpkt->size;
0da71265
MN
2659 H264Context *h = avctx->priv_data;
2660 MpegEncContext *s = &h->s;
115329f1 2661 AVFrame *pict = data;
0da71265 2662 int buf_index;
115329f1 2663
0da71265 2664 s->flags= avctx->flags;
303e50e6 2665 s->flags2= avctx->flags2;
0da71265 2666
1412060e 2667 /* end of stream, output what is still in the buffers */
0da71265 2668 if (buf_size == 0) {
97bbb885
MN
2669 Picture *out;
2670 int i, out_idx;
2671
2672//FIXME factorize this with the output code below
2673 out = h->delayed_pic[0];
2674 out_idx = 0;
c173a088 2675 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
97bbb885
MN
2676 if(h->delayed_pic[i]->poc < out->poc){
2677 out = h->delayed_pic[i];
2678 out_idx = i;
2679 }
2680
2681 for(i=out_idx; h->delayed_pic[i]; i++)
2682 h->delayed_pic[i] = h->delayed_pic[i+1];
2683
2684 if(out){
2685 *data_size = sizeof(AVFrame);
2686 *pict= *(AVFrame*)out;
2687 }
2688
0da71265
MN
2689 return 0;
2690 }
115329f1 2691
4770b1b4
RT
2692 if(h->is_avc && !h->got_avcC) {
2693 int i, cnt, nalsize;
2694 unsigned char *p = avctx->extradata;
2695 if(avctx->extradata_size < 7) {
2696 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
2697 return -1;
2698 }
2699 if(*p != 1) {
2700 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
2701 return -1;
2702 }
2703 /* sps and pps in the avcC always have length coded with 2 bytes,
2704 so put a fake nal_length_size = 2 while parsing them */
2705 h->nal_length_size = 2;
2706 // Decode sps from avcC
2707 cnt = *(p+5) & 0x1f; // Number of sps
2708 p += 6;
2709 for (i = 0; i < cnt; i++) {
fead30d4 2710 nalsize = AV_RB16(p) + 2;
96b6ace2 2711 if(decode_nal_units(h, p, nalsize) < 0) {
4770b1b4
RT
2712 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
2713 return -1;
2714 }
2715 p += nalsize;
115329f1 2716 }
4770b1b4
RT
2717 // Decode pps from avcC
2718 cnt = *(p++); // Number of pps
2719 for (i = 0; i < cnt; i++) {
fead30d4 2720 nalsize = AV_RB16(p) + 2;
4770b1b4
RT
2721 if(decode_nal_units(h, p, nalsize) != nalsize) {
2722 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
2723 return -1;
2724 }
2725 p += nalsize;
115329f1 2726 }
4770b1b4
RT
2727 // Now store right nal length size, that will be use to parse all other nals
2728 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
2729 // Do not reparse avcC
2730 h->got_avcC = 1;
2731 }
2732
d464bcef 2733 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
115329f1 2734 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
0da71265 2735 return -1;
d464bcef 2736 h->got_avcC = 1;
0da71265
MN
2737 }
2738
2739 buf_index=decode_nal_units(h, buf, buf_size);
115329f1 2740 if(buf_index < 0)
0da71265
MN
2741 return -1;
2742
56c70e1d 2743 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
1c746a49 2744 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
56c70e1d
MN
2745 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
2746 return -1;
2747 }
2748
66a4b2c1
MN
2749 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
2750 Picture *out = s->current_picture_ptr;
2751 Picture *cur = s->current_picture_ptr;
44be1d64 2752 int i, pics, out_of_order, out_idx;
115329f1 2753
256299d3 2754 field_end(h);
66a4b2c1 2755
357282c6 2756 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
12d96de3
JD
2757 /* Wait for second field. */
2758 *data_size = 0;
2759
2760 } else {
b19d493f 2761 cur->interlaced_frame = 0;
b09a7c05
2762 cur->repeat_pict = 0;
2763
2764 /* Signal interlacing information externally. */
2765 /* Prioritize picture timing SEI information over used decoding process if it exists. */
70e01da3 2766
b09a7c05
2767 if(h->sps.pic_struct_present_flag){
2768 switch (h->sei_pic_struct)
2769 {
b19d493f
HY
2770 case SEI_PIC_STRUCT_FRAME:
2771 break;
2772 case SEI_PIC_STRUCT_TOP_FIELD:
2773 case SEI_PIC_STRUCT_BOTTOM_FIELD:
2774 cur->interlaced_frame = 1;
2775 break;
2776 case SEI_PIC_STRUCT_TOP_BOTTOM:
2777 case SEI_PIC_STRUCT_BOTTOM_TOP:
2778 if (FIELD_OR_MBAFF_PICTURE)
2779 cur->interlaced_frame = 1;
2780 else
2781 // try to flag soft telecine progressive
2782 cur->interlaced_frame = h->prev_interlaced_frame;
2783 break;
b09a7c05
2784 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
2785 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
2786 // Signal the possibility of telecined film externally (pic_struct 5,6)
2787 // From these hints, let the applications decide if they apply deinterlacing.
2788 cur->repeat_pict = 1;
b09a7c05
2789 break;
2790 case SEI_PIC_STRUCT_FRAME_DOUBLING:
2791 // Force progressive here, as doubling interlaced frame is a bad idea.
b09a7c05
2792 cur->repeat_pict = 2;
2793 break;
2794 case SEI_PIC_STRUCT_FRAME_TRIPLING:
b09a7c05
2795 cur->repeat_pict = 4;
2796 break;
2797 }
b19d493f
HY
2798
2799 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
2800 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
b09a7c05
2801 }else{
2802 /* Derive interlacing flag from used decoding process. */
2803 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
2804 }
b19d493f 2805 h->prev_interlaced_frame = cur->interlaced_frame;
b09a7c05
2806
2807 if (cur->field_poc[0] != cur->field_poc[1]){
2808 /* Derive top_field_first from field pocs. */
2809 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
2810 }else{
2811 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
2812 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
2813 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
2814 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
2815 cur->top_field_first = 1;
2816 else
2817 cur->top_field_first = 0;
2818 }else{
2819 /* Most likely progressive */
2820 cur->top_field_first = 0;
2821 }
2822 }
84a8596d 2823
f6e3c460 2824 //FIXME do something with unavailable reference frames
8b92b792 2825
f6e3c460 2826 /* Sort B-frames into display order */
2f944356 2827
f6e3c460
2828 if(h->sps.bitstream_restriction_flag
2829 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
2830 s->avctx->has_b_frames = h->sps.num_reorder_frames;
2831 s->low_delay = 0;
2832 }
9170e345 2833
fb19e144
MN
2834 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
2835 && !h->sps.bitstream_restriction_flag){
2836 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
2837 s->low_delay= 0;
2838 }
2839
f6e3c460
2840 pics = 0;
2841 while(h->delayed_pic[pics]) pics++;
9170e345 2842
64b9d48f 2843 assert(pics <= MAX_DELAYED_PIC_COUNT);
4e4d983e 2844
f6e3c460
2845 h->delayed_pic[pics++] = cur;
2846 if(cur->reference == 0)
2847 cur->reference = DELAYED_PIC_REF;
2f944356 2848
f6e3c460
2849 out = h->delayed_pic[0];
2850 out_idx = 0;
c173a088 2851 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
f6e3c460
2852 if(h->delayed_pic[i]->poc < out->poc){
2853 out = h->delayed_pic[i];
2854 out_idx = i;
2855 }
44be1d64
MN
2856 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
2857 h->outputed_poc= INT_MIN;
2858 out_of_order = out->poc < h->outputed_poc;
1b547aba 2859
f6e3c460
2860 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
2861 { }
2a811db2 2862 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
f6e3c460 2863 || (s->low_delay &&
44be1d64 2864 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
9701840b 2865 || cur->pict_type == FF_B_TYPE)))
f6e3c460
2866 {
2867 s->low_delay = 0;
2868 s->avctx->has_b_frames++;
f6e3c460 2869 }
f6e3c460
2870
2871 if(out_of_order || pics > s->avctx->has_b_frames){
3eaa6d0e 2872 out->reference &= ~DELAYED_PIC_REF;
f6e3c460
2873 for(i=out_idx; h->delayed_pic[i]; i++)
2874 h->delayed_pic[i] = h->delayed_pic[i+1];
2875 }
3eaa6d0e 2876 if(!out_of_order && pics > s->avctx->has_b_frames){
f6e3c460 2877 *data_size = sizeof(AVFrame);
df8a7dff 2878
44be1d64
MN
2879 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
2880 h->outputed_poc = INT_MIN;
2881 } else
67e362ca 2882 h->outputed_poc = out->poc;
f6e3c460 2883 *pict= *(AVFrame*)out;
3eaa6d0e 2884 }else{
f6e3c460 2885 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
3eaa6d0e 2886 }
12d96de3 2887 }
a4dae92b
LM
2888 }
2889
3165e258 2890 assert(pict->data[0] || !*data_size);
4e4d983e 2891 ff_print_debug_info(s, pict);
0da71265 2892//printf("out %d\n", (int)pict->data[0]);
0da71265 2893
0da71265
MN
2894 return get_consumed_bytes(s, buf_index, buf_size);
2895}
2896#if 0
2897static inline void fill_mb_avail(H264Context *h){
2898 MpegEncContext * const s = &h->s;
7bc9090a 2899 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265
MN
2900
2901 if(s->mb_y){
7bc9090a
MN
2902 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
2903 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
2904 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
0da71265
MN
2905 }else{
2906 h->mb_avail[0]=
2907 h->mb_avail[1]=
2908 h->mb_avail[2]= 0;
2909 }
2910 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
2911 h->mb_avail[4]= 1; //FIXME move out
2912 h->mb_avail[5]= 0; //FIXME move out
2913}
2914#endif
2915
07e4e3ea 2916#ifdef TEST
6bf398a0 2917#undef printf
d04d5bcd 2918#undef random
0da71265
MN
2919#define COUNT 8000
2920#define SIZE (COUNT*40)
f8a80fd6 2921int main(void){
0da71265
MN
2922 int i;
2923 uint8_t temp[SIZE];
2924 PutBitContext pb;
2925 GetBitContext gb;
2926// int int_temp[10000];
2927 DSPContext dsp;
2928 AVCodecContext avctx;
115329f1 2929
0da71265
MN
2930 dsputil_init(&dsp, &avctx);
2931
ed7debda 2932 init_put_bits(&pb, temp, SIZE);
0da71265
MN
2933 printf("testing unsigned exp golomb\n");
2934 for(i=0; i<COUNT; i++){
2935 START_TIMER
2936 set_ue_golomb(&pb, i);
2937 STOP_TIMER("set_ue_golomb");
2938 }
2939 flush_put_bits(&pb);
115329f1 2940
0da71265
MN
2941 init_get_bits(&gb, temp, 8*SIZE);
2942 for(i=0; i<COUNT; i++){
2943 int j, s;
115329f1 2944
0da71265 2945 s= show_bits(&gb, 24);
115329f1 2946
0da71265
MN
2947 START_TIMER
2948 j= get_ue_golomb(&gb);
2949 if(j != i){
755bfeab 2950 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
MN
2951// return -1;
2952 }
2953 STOP_TIMER("get_ue_golomb");
2954 }
115329f1
DB
2955
2956
c58222c5 2957 init_put_bits(&pb, temp, SIZE);
0da71265
MN
2958 printf("testing signed exp golomb\n");
2959 for(i=0; i<COUNT; i++){
2960 START_TIMER
2961 set_se_golomb(&pb, i - COUNT/2);
2962 STOP_TIMER("set_se_golomb");
2963 }
2964 flush_put_bits(&pb);
115329f1 2965
0da71265
MN
2966 init_get_bits(&gb, temp, 8*SIZE);
2967 for(i=0; i<COUNT; i++){
2968 int j, s;
115329f1 2969
0da71265 2970 s= show_bits(&gb, 24);
115329f1 2971
0da71265
MN
2972 START_TIMER
2973 j= get_se_golomb(&gb);
2974 if(j != i - COUNT/2){
755bfeab 2975 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
MN
2976// return -1;
2977 }
2978 STOP_TIMER("get_se_golomb");
2979 }
2980
6bf398a0 2981#if 0
0da71265 2982 printf("testing 4x4 (I)DCT\n");
115329f1 2983
0da71265
MN
2984 DCTELEM block[16];
2985 uint8_t src[16], ref[16];
2986 uint64_t error= 0, max_error=0;
2987
2988 for(i=0; i<COUNT; i++){
2989 int j;
2990// printf("%d %d %d\n", r1, r2, (r2-r1)*16);
2991 for(j=0; j<16; j++){
2992 ref[j]= random()%255;
2993 src[j]= random()%255;
2994 }
2995
2996 h264_diff_dct_c(block, src, ref, 4);
115329f1 2997
0da71265
MN
2998 //normalize
2999 for(j=0; j<16; j++){
3000// printf("%d ", block[j]);
3001 block[j]= block[j]*4;
3002 if(j&1) block[j]= (block[j]*4 + 2)/5;
3003 if(j&4) block[j]= (block[j]*4 + 2)/5;
3004 }
3005// printf("\n");
115329f1 3006
0fa8158d 3007 s->dsp.h264_idct_add(ref, block, 4);
0da71265
MN
3008/* for(j=0; j<16; j++){
3009 printf("%d ", ref[j]);
3010 }
3011 printf("\n");*/
115329f1 3012
0da71265 3013 for(j=0; j<16; j++){
c26abfa5 3014 int diff= FFABS(src[j] - ref[j]);
115329f1 3015
0da71265
MN
3016 error+= diff*diff;
3017 max_error= FFMAX(max_error, diff);
3018 }
3019 }
3020 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
0da71265
MN
3021 printf("testing quantizer\n");
3022 for(qp=0; qp<52; qp++){
3023 for(i=0; i<16; i++)
3024 src1_block[i]= src2_block[i]= random()%255;
115329f1 3025
0da71265 3026 }
0da71265 3027 printf("Testing NAL layer\n");
115329f1 3028
0da71265
MN
3029 uint8_t bitstream[COUNT];
3030 uint8_t nal[COUNT*2];
3031 H264Context h;
3032 memset(&h, 0, sizeof(H264Context));
115329f1 3033
0da71265
MN
3034 for(i=0; i<COUNT; i++){
3035 int zeros= i;
3036 int nal_length;
3037 int consumed;
3038 int out_length;
3039 uint8_t *out;
3040 int j;
115329f1 3041
0da71265
MN
3042 for(j=0; j<COUNT; j++){
3043 bitstream[j]= (random() % 255) + 1;
3044 }
115329f1 3045
0da71265
MN
3046 for(j=0; j<zeros; j++){
3047 int pos= random() % COUNT;
3048 while(bitstream[pos] == 0){
3049 pos++;
3050 pos %= COUNT;
3051 }
3052 bitstream[pos]=0;
3053 }
115329f1 3054
0da71265 3055 START_TIMER
115329f1 3056
0da71265
MN
3057 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
3058 if(nal_length<0){
3059 printf("encoding failed\n");
3060 return -1;
3061 }
115329f1 3062
1790a5e9 3063 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
0da71265
MN
3064
3065 STOP_TIMER("NAL")
115329f1 3066
0da71265
MN
3067 if(out_length != COUNT){
3068 printf("incorrect length %d %d\n", out_length, COUNT);
3069 return -1;
3070 }
115329f1 3071
0da71265
MN
3072 if(consumed != nal_length){
3073 printf("incorrect consumed length %d %d\n", nal_length, consumed);
3074 return -1;
3075 }
115329f1 3076
0da71265 3077 if(memcmp(bitstream, out, COUNT)){
755bfeab 3078 printf("mismatch\n");
0da71265
MN
3079 return -1;
3080 }
3081 }
6bf398a0 3082#endif
115329f1 3083
0da71265 3084 printf("Testing RBSP\n");
115329f1
DB
3085
3086
0da71265
MN
3087 return 0;
3088}
07e4e3ea 3089#endif /* TEST */
0da71265
MN
3090
3091
cbf1eae9 3092av_cold void ff_h264_free_context(H264Context *h)
0da71265 3093{
5f129a05 3094 int i;
115329f1 3095
0da71265 3096 free_tables(h); //FIXME cleanup init stuff perhaps
5f129a05
MN
3097
3098 for(i = 0; i < MAX_SPS_COUNT; i++)
3099 av_freep(h->sps_buffers + i);
3100
3101 for(i = 0; i < MAX_PPS_COUNT; i++)
3102 av_freep(h->pps_buffers + i);
15861962
RD
3103}
3104
903d58f6 3105av_cold int ff_h264_decode_end(AVCodecContext *avctx)
15861962
RD
3106{
3107 H264Context *h = avctx->priv_data;
3108 MpegEncContext *s = &h->s;
3109
3110 ff_h264_free_context(h);
5f129a05 3111
0da71265
MN
3112 MPV_common_end(s);
3113
3114// memset(h, 0, sizeof(H264Context));
115329f1 3115
0da71265
MN
3116 return 0;
3117}
3118
3119
3120AVCodec h264_decoder = {
3121 "h264",
3122 CODEC_TYPE_VIDEO,
3123 CODEC_ID_H264,
3124 sizeof(H264Context),
903d58f6 3125 ff_h264_decode_init,
0da71265 3126 NULL,
903d58f6 3127 ff_h264_decode_end,
0da71265 3128 decode_frame,
f3ba9db4 3129 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7c33ad19 3130 .flush= flush_dpb,
fe4bf374 3131 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
6026a096 3132 .pix_fmts= ff_hwaccel_pixfmt_list_420,
0da71265
MN
3133};
3134
b250f9c6 3135#if CONFIG_H264_VDPAU_DECODER
369122dd
NC
3136AVCodec h264_vdpau_decoder = {