Drop if(0) code.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
bad5537e 23 * @file libavcodec/h264.c
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
40e5d31b 28#include "internal.h"
0da71265
MN
29#include "dsputil.h"
30#include "avcodec.h"
31#include "mpegvideo.h"
26b4fe82 32#include "h264.h"
0da71265 33#include "h264data.h"
188d3c51 34#include "h264_mvpred.h"
26b4fe82 35#include "h264_parser.h"
0da71265 36#include "golomb.h"
199436b9 37#include "mathops.h"
626464fb 38#include "rectangle.h"
369122dd 39#include "vdpau_internal.h"
0da71265 40
e5017ab8
LA
41#include "cabac.h"
42
2848ce84 43//#undef NDEBUG
0da71265
MN
44#include <assert.h>
45
d9ec210b 46static const uint8_t rem6[52]={
acd8d10f
PI
470, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
48};
49
d9ec210b 50static const uint8_t div6[52]={
acd8d10f
PI
510, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
52};
53
903d58f6 54void ff_h264_write_back_intra_pred_mode(H264Context *h){
64514ee8 55 const int mb_xy= h->mb_xy;
0da71265
MN
56
57 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
58 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
59 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
60 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
61 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
62 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
63 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
64}
65
66/**
67 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
68 */
2bedc0e8
MN
69int ff_h264_check_intra4x4_pred_mode(H264Context *h){
70 MpegEncContext * const s = &h->s;
71 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
72 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
73 int i;
74
75 if(!(h->top_samples_available&0x8000)){
76 for(i=0; i<4; i++){
77 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
78 if(status<0){
79 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
80 return -1;
81 } else if(status){
82 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
83 }
84 }
85 }
86
87 if((h->left_samples_available&0x8888)!=0x8888){
88 static const int mask[4]={0x8000,0x2000,0x80,0x20};
89 for(i=0; i<4; i++){
90 if(!(h->left_samples_available&mask[i])){
91 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
92 if(status<0){
93 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
94 return -1;
95 } else if(status){
96 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
97 }
98 }
99 }
100 }
101
102 return 0;
103} //FIXME cleanup like ff_h264_check_intra_pred_mode
104
105/**
106 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
107 */
903d58f6 108int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
0da71265
MN
109 MpegEncContext * const s = &h->s;
110 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
111 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 112
43ff0714 113 if(mode > 6U) {
5175b937 114 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 115 return -1;
5175b937 116 }
115329f1 117
0da71265
MN
118 if(!(h->top_samples_available&0x8000)){
119 mode= top[ mode ];
120 if(mode<0){
9b879566 121 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
122 return -1;
123 }
124 }
115329f1 125
d1d10e91 126 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 127 mode= left[ mode ];
d1d10e91
MN
128 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
129 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
130 }
0da71265 131 if(mode<0){
9b879566 132 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 133 return -1;
115329f1 134 }
0da71265
MN
135 }
136
137 return mode;
138}
139
1790a5e9 140const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
141 int i, si, di;
142 uint8_t *dst;
24456882 143 int bufidx;
0da71265 144
bb270c08 145// src[0]&0x80; //forbidden bit
0da71265
MN
146 h->nal_ref_idc= src[0]>>5;
147 h->nal_unit_type= src[0]&0x1F;
148
149 src++; length--;
115329f1 150#if 0
0da71265
MN
151 for(i=0; i<length; i++)
152 printf("%2X ", src[i]);
153#endif
e08715d3 154
b250f9c6
AJ
155#if HAVE_FAST_UNALIGNED
156# if HAVE_FAST_64BIT
e08715d3
MN
157# define RS 7
158 for(i=0; i+1<length; i+=9){
3878be31 159 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
e08715d3
MN
160# else
161# define RS 3
162 for(i=0; i+1<length; i+=5){
3878be31 163 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
e08715d3
MN
164# endif
165 continue;
166 if(i>0 && !src[i]) i--;
167 while(src[i]) i++;
168#else
169# define RS 0
0da71265
MN
170 for(i=0; i+1<length; i+=2){
171 if(src[i]) continue;
172 if(i>0 && src[i-1]==0) i--;
e08715d3 173#endif
0da71265
MN
174 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
175 if(src[i+2]!=3){
176 /* startcode, so we must be past the end */
177 length=i;
178 }
179 break;
180 }
abb27cfb 181 i-= RS;
0da71265
MN
182 }
183
184 if(i>=length-1){ //no escaped 0
185 *dst_length= length;
186 *consumed= length+1; //+1 for the header
115329f1 187 return src;
0da71265
MN
188 }
189
24456882 190 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
238ef6da 191 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 192 dst= h->rbsp_buffer[bufidx];
0da71265 193
ac658be5
FOL
194 if (dst == NULL){
195 return NULL;
196 }
197
3b66c4c5 198//printf("decoding esc\n");
593af7cd
MN
199 memcpy(dst, src, i);
200 si=di=i;
201 while(si+2<length){
0da71265 202 //remove escapes (very rare 1:2^22)
593af7cd
MN
203 if(src[si+2]>3){
204 dst[di++]= src[si++];
205 dst[di++]= src[si++];
206 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
207 if(src[si+2]==3){ //escape
208 dst[di++]= 0;
209 dst[di++]= 0;
210 si+=3;
c8470cc1 211 continue;
0da71265 212 }else //next start code
593af7cd 213 goto nsc;
0da71265
MN
214 }
215
216 dst[di++]= src[si++];
217 }
593af7cd
MN
218 while(si<length)
219 dst[di++]= src[si++];
220nsc:
0da71265 221
d4369630
AS
222 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
223
0da71265
MN
224 *dst_length= di;
225 *consumed= si + 1;//+1 for the header
90b5b51e 226//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
227 return dst;
228}
229
1790a5e9 230int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
231 int v= *src;
232 int r;
233
a9c9a240 234 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
235
236 for(r=1; r<9; r++){
237 if(v&1) return r;
238 v>>=1;
239 }
240 return 0;
241}
242
243/**
1412060e 244 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
245 * @param qp quantization parameter
246 */
239ea04c 247static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
248#define stride 16
249 int i;
250 int temp[16]; //FIXME check if this is a good idea
251 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
252 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
253
254//memset(block, 64, 2*256);
255//return;
256 for(i=0; i<4; i++){
257 const int offset= y_offset[i];
258 const int z0= block[offset+stride*0] + block[offset+stride*4];
259 const int z1= block[offset+stride*0] - block[offset+stride*4];
260 const int z2= block[offset+stride*1] - block[offset+stride*5];
261 const int z3= block[offset+stride*1] + block[offset+stride*5];
262
263 temp[4*i+0]= z0+z3;
264 temp[4*i+1]= z1+z2;
265 temp[4*i+2]= z1-z2;
266 temp[4*i+3]= z0-z3;
267 }
268
269 for(i=0; i<4; i++){
270 const int offset= x_offset[i];
271 const int z0= temp[4*0+i] + temp[4*2+i];
272 const int z1= temp[4*0+i] - temp[4*2+i];
273 const int z2= temp[4*1+i] - temp[4*3+i];
274 const int z3= temp[4*1+i] + temp[4*3+i];
275
1412060e 276 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
277 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
278 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
279 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
280 }
281}
282
e5017ab8 283#if 0
0da71265 284/**
1412060e 285 * DCT transforms the 16 dc values.
0da71265
MN
286 * @param qp quantization parameter ??? FIXME
287 */
288static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
289// const int qmul= dequant_coeff[qp][0];
290 int i;
291 int temp[16]; //FIXME check if this is a good idea
292 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
293 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
294
295 for(i=0; i<4; i++){
296 const int offset= y_offset[i];
297 const int z0= block[offset+stride*0] + block[offset+stride*4];
298 const int z1= block[offset+stride*0] - block[offset+stride*4];
299 const int z2= block[offset+stride*1] - block[offset+stride*5];
300 const int z3= block[offset+stride*1] + block[offset+stride*5];
301
302 temp[4*i+0]= z0+z3;
303 temp[4*i+1]= z1+z2;
304 temp[4*i+2]= z1-z2;
305 temp[4*i+3]= z0-z3;
306 }
307
308 for(i=0; i<4; i++){
309 const int offset= x_offset[i];
310 const int z0= temp[4*0+i] + temp[4*2+i];
311 const int z1= temp[4*0+i] - temp[4*2+i];
312 const int z2= temp[4*1+i] - temp[4*3+i];
313 const int z3= temp[4*1+i] + temp[4*3+i];
314
315 block[stride*0 +offset]= (z0 + z3)>>1;
316 block[stride*2 +offset]= (z1 + z2)>>1;
317 block[stride*8 +offset]= (z1 - z2)>>1;
318 block[stride*10+offset]= (z0 - z3)>>1;
319 }
320}
e5017ab8
LA
321#endif
322
0da71265
MN
323#undef xStride
324#undef stride
325
239ea04c 326static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
327 const int stride= 16*2;
328 const int xStride= 16;
329 int a,b,c,d,e;
330
331 a= block[stride*0 + xStride*0];
332 b= block[stride*0 + xStride*1];
333 c= block[stride*1 + xStride*0];
334 d= block[stride*1 + xStride*1];
335
336 e= a-b;
337 a= a+b;
338 b= c-d;
339 c= c+d;
340
239ea04c
LM
341 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
342 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
343 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
344 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
345}
346
e5017ab8 347#if 0
0da71265
MN
348static void chroma_dc_dct_c(DCTELEM *block){
349 const int stride= 16*2;
350 const int xStride= 16;
351 int a,b,c,d,e;
352
353 a= block[stride*0 + xStride*0];
354 b= block[stride*0 + xStride*1];
355 c= block[stride*1 + xStride*0];
356 d= block[stride*1 + xStride*1];
357
358 e= a-b;
359 a= a+b;
360 b= c-d;
361 c= c+d;
362
363 block[stride*0 + xStride*0]= (a+c);
364 block[stride*0 + xStride*1]= (e+b);
365 block[stride*1 + xStride*0]= (a-c);
366 block[stride*1 + xStride*1]= (e-b);
367}
e5017ab8 368#endif
0da71265 369
0da71265
MN
370static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
371 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
372 int src_x_offset, int src_y_offset,
373 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
374 MpegEncContext * const s = &h->s;
375 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 376 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 377 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
378 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
379 uint8_t * src_cb, * src_cr;
380 int extra_width= h->emu_edge_width;
381 int extra_height= h->emu_edge_height;
0da71265
MN
382 int emu=0;
383 const int full_mx= mx>>2;
384 const int full_my= my>>2;
fbd312fd 385 const int pic_width = 16*s->mb_width;
0d43dd8c 386 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 387
0da71265
MN
388 if(mx&7) extra_width -= 3;
389 if(my&7) extra_height -= 3;
115329f1
DB
390
391 if( full_mx < 0-extra_width
392 || full_my < 0-extra_height
393 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 394 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
395 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
396 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
397 emu=1;
398 }
115329f1 399
5d18eaad 400 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 401 if(!square){
5d18eaad 402 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 403 }
115329f1 404
49fb20cb 405 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 406
0d43dd8c 407 if(MB_FIELD){
5d18eaad 408 // chroma offset when predicting from a field of opposite parity
2143b118 409 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
410 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
411 }
412 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
413 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
414
0da71265 415 if(emu){
5d18eaad 416 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
417 src_cb= s->edge_emu_buffer;
418 }
5d18eaad 419 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
420
421 if(emu){
5d18eaad 422 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
423 src_cr= s->edge_emu_buffer;
424 }
5d18eaad 425 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
426}
427
9f2d1b4f 428static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
429 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
430 int x_offset, int y_offset,
431 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
432 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
433 int list0, int list1){
434 MpegEncContext * const s = &h->s;
435 qpel_mc_func *qpix_op= qpix_put;
436 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 437
5d18eaad
LM
438 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
439 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
440 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 441 x_offset += 8*s->mb_x;
0d43dd8c 442 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 443
0da71265 444 if(list0){
1924f3ce 445 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
446 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
447 dest_y, dest_cb, dest_cr, x_offset, y_offset,
448 qpix_op, chroma_op);
449
450 qpix_op= qpix_avg;
451 chroma_op= chroma_avg;
452 }
453
454 if(list1){
1924f3ce 455 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
456 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
457 dest_y, dest_cb, dest_cr, x_offset, y_offset,
458 qpix_op, chroma_op);
459 }
460}
461
9f2d1b4f
LM
462static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
463 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
464 int x_offset, int y_offset,
465 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
466 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
467 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
468 int list0, int list1){
469 MpegEncContext * const s = &h->s;
470
5d18eaad
LM
471 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
472 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
473 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 474 x_offset += 8*s->mb_x;
0d43dd8c 475 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 476
9f2d1b4f
LM
477 if(list0 && list1){
478 /* don't optimize for luma-only case, since B-frames usually
479 * use implicit weights => chroma too. */
480 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
481 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
482 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
483 int refn0 = h->ref_cache[0][ scan8[n] ];
484 int refn1 = h->ref_cache[1][ scan8[n] ];
485
486 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
487 dest_y, dest_cb, dest_cr,
488 x_offset, y_offset, qpix_put, chroma_put);
489 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
490 tmp_y, tmp_cb, tmp_cr,
491 x_offset, y_offset, qpix_put, chroma_put);
492
493 if(h->use_weight == 2){
494 int weight0 = h->implicit_weight[refn0][refn1];
495 int weight1 = 64 - weight0;
5d18eaad
LM
496 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
497 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
498 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 499 }else{
5d18eaad 500 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 501 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 502 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 503 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 504 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 505 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 506 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 507 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 508 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
509 }
510 }else{
511 int list = list1 ? 1 : 0;
512 int refn = h->ref_cache[list][ scan8[n] ];
513 Picture *ref= &h->ref_list[list][refn];
514 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
515 dest_y, dest_cb, dest_cr, x_offset, y_offset,
516 qpix_put, chroma_put);
517
5d18eaad 518 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
519 h->luma_weight[list][refn], h->luma_offset[list][refn]);
520 if(h->use_weight_chroma){
5d18eaad 521 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 522 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 523 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
524 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
525 }
526 }
527}
528
529static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
530 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
531 int x_offset, int y_offset,
532 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
533 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 534 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
535 int list0, int list1){
536 if((h->use_weight==2 && list0 && list1
537 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
538 || h->use_weight==1)
539 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
540 x_offset, y_offset, qpix_put, chroma_put,
541 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
542 else
543 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
544 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
545}
546
513fbd8e
LM
547static inline void prefetch_motion(H264Context *h, int list){
548 /* fetch pixels for estimated mv 4 macroblocks ahead
549 * optimized for 64byte cache lines */
550 MpegEncContext * const s = &h->s;
551 const int refn = h->ref_cache[list][scan8[0]];
552 if(refn >= 0){
553 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
554 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
555 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 556 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
557 s->dsp.prefetch(src[0]+off, s->linesize, 4);
558 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
559 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
560 }
561}
562
0da71265
MN
563static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
564 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
565 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
566 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 567 MpegEncContext * const s = &h->s;
64514ee8 568 const int mb_xy= h->mb_xy;
0da71265 569 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 570
0da71265 571 assert(IS_INTER(mb_type));
115329f1 572
513fbd8e
LM
573 prefetch_motion(h, 0);
574
0da71265
MN
575 if(IS_16X16(mb_type)){
576 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
577 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
7231ccf4 578 weight_op, weight_avg,
0da71265
MN
579 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
580 }else if(IS_16X8(mb_type)){
581 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
582 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 583 &weight_op[1], &weight_avg[1],
0da71265
MN
584 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
585 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
586 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 587 &weight_op[1], &weight_avg[1],
0da71265
MN
588 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
589 }else if(IS_8X16(mb_type)){
5d18eaad 590 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 591 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 592 &weight_op[2], &weight_avg[2],
0da71265 593 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 594 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 595 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 596 &weight_op[2], &weight_avg[2],
0da71265
MN
597 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
598 }else{
599 int i;
115329f1 600
0da71265
MN
601 assert(IS_8X8(mb_type));
602
603 for(i=0; i<4; i++){
604 const int sub_mb_type= h->sub_mb_type[i];
605 const int n= 4*i;
606 int x_offset= (i&1)<<2;
607 int y_offset= (i&2)<<1;
608
609 if(IS_SUB_8X8(sub_mb_type)){
610 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
611 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 612 &weight_op[3], &weight_avg[3],
0da71265
MN
613 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
614 }else if(IS_SUB_8X4(sub_mb_type)){
615 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
616 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 617 &weight_op[4], &weight_avg[4],
0da71265
MN
618 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
619 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
620 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 621 &weight_op[4], &weight_avg[4],
0da71265
MN
622 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
623 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 624 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 625 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 626 &weight_op[5], &weight_avg[5],
0da71265 627 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 628 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 629 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 630 &weight_op[5], &weight_avg[5],
0da71265
MN
631 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
632 }else{
633 int j;
634 assert(IS_SUB_4X4(sub_mb_type));
635 for(j=0; j<4; j++){
636 int sub_x_offset= x_offset + 2*(j&1);
637 int sub_y_offset= y_offset + (j&2);
638 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
639 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 640 &weight_op[6], &weight_avg[6],
0da71265
MN
641 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
642 }
643 }
644 }
645 }
513fbd8e
LM
646
647 prefetch_motion(h, 1);
0da71265
MN
648}
649
0da71265 650
0da71265 651static void free_tables(H264Context *h){
7978debd 652 int i;
afebe2f7 653 H264Context *hx;
0da71265 654 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
655 av_freep(&h->chroma_pred_mode_table);
656 av_freep(&h->cbp_table);
9e528114
LA
657 av_freep(&h->mvd_table[0]);
658 av_freep(&h->mvd_table[1]);
5ad984c9 659 av_freep(&h->direct_table);
0da71265
MN
660 av_freep(&h->non_zero_count);
661 av_freep(&h->slice_table_base);
662 h->slice_table= NULL;
c988f975 663 av_freep(&h->list_counts);
e5017ab8 664
0da71265
MN
665 av_freep(&h->mb2b_xy);
666 av_freep(&h->mb2b8_xy);
9f2d1b4f 667
6752dd5a 668 for(i = 0; i < MAX_THREADS; i++) {
afebe2f7
669 hx = h->thread_context[i];
670 if(!hx) continue;
671 av_freep(&hx->top_borders[1]);
672 av_freep(&hx->top_borders[0]);
673 av_freep(&hx->s.obmc_scratchpad);
d2d5e067
AS
674 av_freep(&hx->rbsp_buffer[1]);
675 av_freep(&hx->rbsp_buffer[0]);
eda4ea4e
MS
676 hx->rbsp_buffer_size[0] = 0;
677 hx->rbsp_buffer_size[1] = 0;
d2d5e067 678 if (i) av_freep(&h->thread_context[i]);
afebe2f7 679 }
0da71265
MN
680}
681
239ea04c
LM
682static void init_dequant8_coeff_table(H264Context *h){
683 int i,q,x;
548a1c8a 684 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
685 h->dequant8_coeff[0] = h->dequant8_buffer[0];
686 h->dequant8_coeff[1] = h->dequant8_buffer[1];
687
688 for(i=0; i<2; i++ ){
689 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
690 h->dequant8_coeff[1] = h->dequant8_buffer[0];
691 break;
692 }
693
694 for(q=0; q<52; q++){
d9ec210b
DP
695 int shift = div6[q];
696 int idx = rem6[q];
239ea04c 697 for(x=0; x<64; x++)
548a1c8a
LM
698 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
699 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
700 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
701 }
702 }
703}
704
705static void init_dequant4_coeff_table(H264Context *h){
706 int i,j,q,x;
ab2e3e2c 707 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
708 for(i=0; i<6; i++ ){
709 h->dequant4_coeff[i] = h->dequant4_buffer[i];
710 for(j=0; j<i; j++){
711 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
712 h->dequant4_coeff[i] = h->dequant4_buffer[j];
713 break;
714 }
715 }
716 if(j<i)
717 continue;
718
719 for(q=0; q<52; q++){
d9ec210b
DP
720 int shift = div6[q] + 2;
721 int idx = rem6[q];
239ea04c 722 for(x=0; x<16; x++)
ab2e3e2c
LM
723 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
724 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
725 h->pps.scaling_matrix4[i][x]) << shift;
726 }
727 }
728}
729
730static void init_dequant_tables(H264Context *h){
731 int i,x;
732 init_dequant4_coeff_table(h);
733 if(h->pps.transform_8x8_mode)
734 init_dequant8_coeff_table(h);
735 if(h->sps.transform_bypass){
736 for(i=0; i<6; i++)
737 for(x=0; x<16; x++)
738 h->dequant4_coeff[i][0][x] = 1<<6;
739 if(h->pps.transform_8x8_mode)
740 for(i=0; i<2; i++)
741 for(x=0; x<64; x++)
742 h->dequant8_coeff[i][0][x] = 1<<6;
743 }
744}
745
746
903d58f6 747int ff_h264_alloc_tables(H264Context *h){
0da71265 748 MpegEncContext * const s = &h->s;
7bc9090a 749 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 750 int x,y;
0da71265 751
d31dbec3 752 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t), fail)
e5017ab8 753
c988f975 754 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail)
d31dbec3
RP
755 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
756 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
0da71265 757
d31dbec3
RP
758 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
759 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t), fail);
760 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t), fail);
761 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 32*big_mb_num * sizeof(uint8_t) , fail);
c988f975 762 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
e5017ab8 763
b735aeea 764 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 765 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 766
d31dbec3
RP
767 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
768 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
0da71265
MN
769 for(y=0; y<s->mb_height; y++){
770 for(x=0; x<s->mb_width; x++){
7bc9090a 771 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
772 const int b_xy = 4*x + 4*y*h->b_stride;
773 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 774
0da71265
MN
775 h->mb2b_xy [mb_xy]= b_xy;
776 h->mb2b8_xy[mb_xy]= b8_xy;
777 }
778 }
9f2d1b4f 779
9c6221ae
GV
780 s->obmc_scratchpad = NULL;
781
56edbd81
LM
782 if(!h->dequant4_coeff[0])
783 init_dequant_tables(h);
784
0da71265
MN
785 return 0;
786fail:
787 free_tables(h);
788 return -1;
789}
790
afebe2f7
791/**
792 * Mimic alloc_tables(), but for every context thread.
793 */
794static void clone_tables(H264Context *dst, H264Context *src){
795 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
796 dst->non_zero_count = src->non_zero_count;
797 dst->slice_table = src->slice_table;
798 dst->cbp_table = src->cbp_table;
799 dst->mb2b_xy = src->mb2b_xy;
800 dst->mb2b8_xy = src->mb2b8_xy;
801 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
802 dst->mvd_table[0] = src->mvd_table[0];
803 dst->mvd_table[1] = src->mvd_table[1];
804 dst->direct_table = src->direct_table;
fb823b77 805 dst->list_counts = src->list_counts;
afebe2f7 806
afebe2f7
807 dst->s.obmc_scratchpad = NULL;
808 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
809}
810
811/**
812 * Init context
813 * Allocate buffers which are not shared amongst multiple threads.
814 */
815static int context_init(H264Context *h){
d31dbec3
RP
816 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
817 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
afebe2f7 818
afebe2f7
819 return 0;
820fail:
821 return -1; // free_tables will clean up for us
822}
823
98a6fff9 824static av_cold void common_init(H264Context *h){
0da71265 825 MpegEncContext * const s = &h->s;
0da71265
MN
826
827 s->width = s->avctx->width;
828 s->height = s->avctx->height;
829 s->codec_id= s->avctx->codec->id;
115329f1 830
c92a30bb 831 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 832
239ea04c 833 h->dequant_coeff_pps= -1;
9a41c2c7 834 s->unrestricted_mv=1;
0da71265 835 s->decode=1; //FIXME
56edbd81 836
a5805aa9
MN
837 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
838
56edbd81
LM
839 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
840 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
841}
842
903d58f6 843av_cold int ff_h264_decode_init(AVCodecContext *avctx){
0da71265
MN
844 H264Context *h= avctx->priv_data;
845 MpegEncContext * const s = &h->s;
846
3edcacde 847 MPV_decode_defaults(s);
115329f1 848
0da71265
MN
849 s->avctx = avctx;
850 common_init(h);
851
852 s->out_format = FMT_H264;
853 s->workaround_bugs= avctx->workaround_bugs;
854
855 // set defaults
0da71265 856// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 857 s->quarter_sample = 1;
47cd974a 858 if(!avctx->has_b_frames)
0da71265 859 s->low_delay= 1;
7a9dba3c 860
580a7465 861 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
0da71265 862
e1e94902 863 ff_h264_decode_init_vlc();
115329f1 864
26165f99
MR
865 if(avctx->extradata_size > 0 && avctx->extradata &&
866 *(char *)avctx->extradata == 1){
4770b1b4
RT
867 h->is_avc = 1;
868 h->got_avcC = 0;
26165f99
MR
869 } else {
870 h->is_avc = 0;
4770b1b4
RT
871 }
872
afebe2f7 873 h->thread_context[0] = h;
18c7be65 874 h->outputed_poc = INT_MIN;
e4b8f1fa 875 h->prev_poc_msb= 1<<16;
055a6aa7 876 h->x264_build = -1;
9c095463 877 ff_h264_reset_sei(h);
efd8c1f6
MN
878 if(avctx->codec_id == CODEC_ID_H264){
879 if(avctx->ticks_per_frame == 1){
880 s->avctx->time_base.den *=2;
881 }
19df37a8 882 avctx->ticks_per_frame = 2;
efd8c1f6 883 }
0da71265
MN
884 return 0;
885}
886
903d58f6 887int ff_h264_frame_start(H264Context *h){
0da71265
MN
888 MpegEncContext * const s = &h->s;
889 int i;
890
af8aa846
MN
891 if(MPV_frame_start(s, s->avctx) < 0)
892 return -1;
0da71265 893 ff_er_frame_start(s);
3a22d7fa
JD
894 /*
895 * MPV_frame_start uses pict_type to derive key_frame.
896 * This is incorrect for H.264; IDR markings must be used.
1412060e 897 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
898 * See decode_nal_units().
899 */
900 s->current_picture_ptr->key_frame= 0;
c173a088 901 s->current_picture_ptr->mmco_reset= 0;
0da71265
MN
902
903 assert(s->linesize && s->uvlinesize);
904
905 for(i=0; i<16; i++){
906 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 907 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
908 }
909 for(i=0; i<4; i++){
910 h->block_offset[16+i]=
911 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
912 h->block_offset[24+16+i]=
913 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
914 }
915
934b0821
LM
916 /* can't be in alloc_tables because linesize isn't known there.
917 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
918 for(i = 0; i < s->avctx->thread_count; i++)
919 if(!h->thread_context[i]->s.obmc_scratchpad)
920 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
921
922 /* some macroblocks will be accessed before they're available */
afebe2f7 923 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 924 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 925
0da71265 926// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 927
1412060e 928 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
929 // that if we break out due to an error it can be released automatically
930 // in the next MPV_frame_start().
931 // SVQ3 as well as most other codecs have only last/next/current and thus
932 // get released even with set reference, besides SVQ3 and others do not
933 // mark frames as reference later "naturally".
934 if(s->codec_id != CODEC_ID_SVQ3)
935 s->current_picture_ptr->reference= 0;
357282c6
MN
936
937 s->current_picture_ptr->field_poc[0]=
938 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 939 assert(s->current_picture_ptr->long_ref==0);
357282c6 940
af8aa846 941 return 0;
0da71265
MN
942}
943
93cc10fa 944static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e 945 MpegEncContext * const s = &h->s;
0b69d625 946 uint8_t *top_border;
5f7f9719 947 int top_idx = 1;
115329f1 948
53c05b1e
MN
949 src_y -= linesize;
950 src_cb -= uvlinesize;
951 src_cr -= uvlinesize;
952
5f7f9719
MN
953 if(!simple && FRAME_MBAFF){
954 if(s->mb_y&1){
5f7f9719 955 if(!MB_MBAFF){
0b69d625
AS
956 top_border = h->top_borders[0][s->mb_x];
957 AV_COPY128(top_border, src_y + 15*linesize);
49fb20cb 958 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
959 AV_COPY64(top_border+16, src_cb+7*uvlinesize);
960 AV_COPY64(top_border+24, src_cr+7*uvlinesize);
5f7f9719
MN
961 }
962 }
c988f975
MN
963 }else if(MB_MBAFF){
964 top_idx = 0;
965 }else
966 return;
5f7f9719
MN
967 }
968
0b69d625 969 top_border = h->top_borders[top_idx][s->mb_x];
3b66c4c5 970 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 971 // and the line above the bottom macroblock
0b69d625 972 AV_COPY128(top_border, src_y + 16*linesize);
53c05b1e 973
49fb20cb 974 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
975 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
976 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
53c05b1e
MN
977 }
978}
979
93cc10fa 980static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e 981 MpegEncContext * const s = &h->s;
b69378e2
982 int deblock_left;
983 int deblock_top;
984 int mb_xy;
5f7f9719 985 int top_idx = 1;
1e4f1c56
AS
986 uint8_t *top_border_m1;
987 uint8_t *top_border;
5f7f9719
MN
988
989 if(!simple && FRAME_MBAFF){
990 if(s->mb_y&1){
c988f975
MN
991 if(!MB_MBAFF)
992 return;
5f7f9719 993 }else{
5f7f9719
MN
994 top_idx = MB_MBAFF ? 0 : 1;
995 }
5f7f9719 996 }
b69378e2
997
998 if(h->deblocking_filter == 2) {
64514ee8 999 mb_xy = h->mb_xy;
b69378e2
1000 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
1001 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
1002 } else {
1003 deblock_left = (s->mb_x > 0);
6c805007 1004 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 1005 }
53c05b1e
MN
1006
1007 src_y -= linesize + 1;
1008 src_cb -= uvlinesize + 1;
1009 src_cr -= uvlinesize + 1;
1010
1e4f1c56
AS
1011 top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
1012 top_border = h->top_borders[top_idx][s->mb_x];
1013
0b69d625
AS
1014#define XCHG(a,b,xchg)\
1015if (xchg) AV_SWAP64(b,a);\
1016else AV_COPY64(b,a);
d89dc06a 1017
d89dc06a 1018 if(deblock_top){
c988f975 1019 if(deblock_left){
0b69d625 1020 XCHG(top_border_m1+8, src_y -7, 1);
c988f975 1021 }
0b69d625
AS
1022 XCHG(top_border+0, src_y +1, xchg);
1023 XCHG(top_border+8, src_y +9, 1);
cad4368a 1024 if(s->mb_x+1 < s->mb_width){
0b69d625 1025 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
43efd19a 1026 }
53c05b1e 1027 }
53c05b1e 1028
49fb20cb 1029 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 1030 if(deblock_top){
c988f975 1031 if(deblock_left){
0b69d625
AS
1032 XCHG(top_border_m1+16, src_cb -7, 1);
1033 XCHG(top_border_m1+24, src_cr -7, 1);
c988f975 1034 }
0b69d625
AS
1035 XCHG(top_border+16, src_cb+1, 1);
1036 XCHG(top_border+24, src_cr+1, 1);
53c05b1e 1037 }
53c05b1e
MN
1038 }
1039}
1040
5a6a6cc7 1041static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
1042 MpegEncContext * const s = &h->s;
1043 const int mb_x= s->mb_x;
1044 const int mb_y= s->mb_y;
64514ee8 1045 const int mb_xy= h->mb_xy;
0da71265
MN
1046 const int mb_type= s->current_picture.mb_type[mb_xy];
1047 uint8_t *dest_y, *dest_cb, *dest_cr;
1048 int linesize, uvlinesize /*dct_offset*/;
1049 int i;
6867a90b 1050 int *block_offset = &h->block_offset[0];
41e4055b 1051 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed 1052 /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb 1053 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca 1054 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 1055 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 1056
6120a343
MN
1057 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
1058 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
1059 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 1060
a957c27b
LM
1061 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1062 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
1063
c988f975
MN
1064 h->list_counts[mb_xy]= h->list_count;
1065
bd91fee3 1066 if (!simple && MB_FIELD) {
5d18eaad
LM
1067 linesize = h->mb_linesize = s->linesize * 2;
1068 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 1069 block_offset = &h->block_offset[24];
1412060e 1070 if(mb_y&1){ //FIXME move out of this function?
0da71265 1071 dest_y -= s->linesize*15;
6867a90b
LLL
1072 dest_cb-= s->uvlinesize*7;
1073 dest_cr-= s->uvlinesize*7;
0da71265 1074 }
5d18eaad
LM
1075 if(FRAME_MBAFF) {
1076 int list;
3425501d 1077 for(list=0; list<h->list_count; list++){
5d18eaad
LM
1078 if(!USES_LIST(mb_type, list))
1079 continue;
1080 if(IS_16X16(mb_type)){
1081 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 1082 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
1083 }else{
1084 for(i=0; i<16; i+=4){
5d18eaad
LM
1085 int ref = h->ref_cache[list][scan8[i]];
1086 if(ref >= 0)
1710856c 1087 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
1088 }
1089 }
1090 }
1091 }
0da71265 1092 } else {
5d18eaad
LM
1093 linesize = h->mb_linesize = s->linesize;
1094 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
1095// dct_offset = s->linesize * 16;
1096 }
115329f1 1097
bd91fee3 1098 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
1099 for (i=0; i<16; i++) {
1100 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 1101 }
c1708e8d
MN
1102 for (i=0; i<8; i++) {
1103 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
1104 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 1105 }
e7e09b49
LLL
1106 } else {
1107 if(IS_INTRA(mb_type)){
5f7f9719 1108 if(h->deblocking_filter)
93cc10fa 1109 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 1110
49fb20cb 1111 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
1112 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
1113 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 1114 }
0da71265 1115
e7e09b49 1116 if(IS_INTRA4x4(mb_type)){
bd91fee3 1117 if(simple || !s->encoding){
43efd19a 1118 if(IS_8x8DCT(mb_type)){
1eb96035
MN
1119 if(transform_bypass){
1120 idct_dc_add =
1121 idct_add = s->dsp.add_pixels8;
dae006d7 1122 }else{
1eb96035
MN
1123 idct_dc_add = s->dsp.h264_idct8_dc_add;
1124 idct_add = s->dsp.h264_idct8_add;
1125 }
43efd19a
LM
1126 for(i=0; i<16; i+=4){
1127 uint8_t * const ptr= dest_y + block_offset[i];
1128 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
1129 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1130 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
1131 }else{
ac0623b2
MN
1132 const int nnz = h->non_zero_count_cache[ scan8[i] ];
1133 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1134 (h->topright_samples_available<<i)&0x4000, linesize);
1135 if(nnz){
1136 if(nnz == 1 && h->mb[i*16])
1137 idct_dc_add(ptr, h->mb + i*16, linesize);
1138 else
1139 idct_add (ptr, h->mb + i*16, linesize);
1140 }
41e4055b 1141 }
43efd19a 1142 }
1eb96035
MN
1143 }else{
1144 if(transform_bypass){
1145 idct_dc_add =
1146 idct_add = s->dsp.add_pixels4;
1147 }else{
1148 idct_dc_add = s->dsp.h264_idct_dc_add;
1149 idct_add = s->dsp.h264_idct_add;
1150 }
aebb5d6d
MN
1151 for(i=0; i<16; i++){
1152 uint8_t * const ptr= dest_y + block_offset[i];
1153 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 1154
aebb5d6d
MN
1155 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1156 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
1157 }else{
1158 uint8_t *topright;
1159 int nnz, tr;
1160 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
1161 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1162 assert(mb_y || linesize <= block_offset[i]);
1163 if(!topright_avail){
1164 tr= ptr[3 - linesize]*0x01010101;
1165 topright= (uint8_t*) &tr;
1166 }else
1167 topright= ptr + 4 - linesize;
ac0623b2 1168 }else
aebb5d6d
MN
1169 topright= NULL;
1170
1171 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
1172 nnz = h->non_zero_count_cache[ scan8[i] ];
1173 if(nnz){
1174 if(is_h264){
1175 if(nnz == 1 && h->mb[i*16])
1176 idct_dc_add(ptr, h->mb + i*16, linesize);
1177 else
1178 idct_add (ptr, h->mb + i*16, linesize);
1179 }else
881b5b80 1180 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
aebb5d6d 1181 }
ac0623b2 1182 }
41e4055b 1183 }
8b82a956 1184 }
0da71265 1185 }
e7e09b49 1186 }else{
c92a30bb 1187 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 1188 if(is_h264){
36940eca 1189 if(!transform_bypass)
93f0c0a4 1190 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 1191 }else
881b5b80 1192 ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 1193 }
5f7f9719 1194 if(h->deblocking_filter)
93cc10fa 1195 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 1196 }else if(is_h264){
e7e09b49 1197 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
1198 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
1199 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 1200 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 1201 }
e7e09b49
LLL
1202
1203
1204 if(!IS_INTRA4x4(mb_type)){
bd91fee3 1205 if(is_h264){
ef9d1d15 1206 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
1207 if(transform_bypass){
1208 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
1209 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
1210 }else{
1211 for(i=0; i<16; i++){
1212 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 1213 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1214 }
2fd1f0e0
MN
1215 }
1216 }else{
1217 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 1218 }
49c084a7 1219 }else if(h->cbp&15){
2fd1f0e0 1220 if(transform_bypass){
0a8ca22f 1221 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 1222 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 1223 for(i=0; i<16; i+=di){
62bc966f 1224 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 1225 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1226 }
ef9d1d15 1227 }
2fd1f0e0
MN
1228 }else{
1229 if(IS_8x8DCT(mb_type)){
1230 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1231 }else{
1232 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1233 }
1234 }
4704097a 1235 }
e7e09b49
LLL
1236 }else{
1237 for(i=0; i<16; i++){
1238 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 1239 uint8_t * const ptr= dest_y + block_offset[i];
881b5b80 1240 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
e7e09b49 1241 }
4704097a 1242 }
0da71265
MN
1243 }
1244 }
0da71265 1245
49fb20cb 1246 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
1247 uint8_t *dest[2] = {dest_cb, dest_cr};
1248 if(transform_bypass){
96465b90
MN
1249 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
1250 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
1251 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
1252 }else{
c25ac15a 1253 idct_add = s->dsp.add_pixels4;
96465b90
MN
1254 for(i=16; i<16+8; i++){
1255 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1256 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1257 }
1258 }
ef9d1d15 1259 }else{
4691a77d
1260 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1261 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 1262 if(is_h264){
c25ac15a
MN
1263 idct_add = s->dsp.h264_idct_add;
1264 idct_dc_add = s->dsp.h264_idct_dc_add;
ac0623b2
MN
1265 for(i=16; i<16+8; i++){
1266 if(h->non_zero_count_cache[ scan8[i] ])
1267 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1268 else if(h->mb[i*16])
1269 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1270 }
aebb5d6d
MN
1271 }else{
1272 for(i=16; i<16+8; i++){
1273 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
1274 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
881b5b80 1275 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2);
aebb5d6d 1276 }
e7e09b49 1277 }
4704097a 1278 }
0da71265
MN
1279 }
1280 }
1281 }
c212fb0c
MN
1282 if(h->cbp || IS_INTRA(mb_type))
1283 s->dsp.clear_blocks(h->mb);
0da71265
MN
1284}
1285
0da71265 1286/**
bd91fee3
AS
1287 * Process a macroblock; this case avoids checks for expensive uncommon cases.
1288 */
1289static void hl_decode_mb_simple(H264Context *h){
1290 hl_decode_mb_internal(h, 1);
1291}
1292
1293/**
1294 * Process a macroblock; this handles edge cases, such as interlacing.
1295 */
1296static void av_noinline hl_decode_mb_complex(H264Context *h){
1297 hl_decode_mb_internal(h, 0);
1298}
1299
903d58f6 1300void ff_h264_hl_decode_mb(H264Context *h){
bd91fee3 1301 MpegEncContext * const s = &h->s;
64514ee8 1302 const int mb_xy= h->mb_xy;
bd91fee3 1303 const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb 1304 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 1305
bd91fee3
AS
1306 if (is_complex)
1307 hl_decode_mb_complex(h);
1308 else hl_decode_mb_simple(h);
1309}
1310
0da71265
MN
1311static int pred_weight_table(H264Context *h){
1312 MpegEncContext * const s = &h->s;
1313 int list, i;
9f2d1b4f 1314 int luma_def, chroma_def;
115329f1 1315
9f2d1b4f
LM
1316 h->use_weight= 0;
1317 h->use_weight_chroma= 0;
0da71265
MN
1318 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
1319 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
1320 luma_def = 1<<h->luma_log2_weight_denom;
1321 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
1322
1323 for(list=0; list<2; list++){
cb99c652
GB
1324 h->luma_weight_flag[list] = 0;
1325 h->chroma_weight_flag[list] = 0;
0da71265
MN
1326 for(i=0; i<h->ref_count[list]; i++){
1327 int luma_weight_flag, chroma_weight_flag;
115329f1 1328
0da71265
MN
1329 luma_weight_flag= get_bits1(&s->gb);
1330 if(luma_weight_flag){
1331 h->luma_weight[list][i]= get_se_golomb(&s->gb);
1332 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f 1333 if( h->luma_weight[list][i] != luma_def
cb99c652 1334 || h->luma_offset[list][i] != 0) {
9f2d1b4f 1335 h->use_weight= 1;
cb99c652
GB
1336 h->luma_weight_flag[list]= 1;
1337 }
9f2d1b4f
LM
1338 }else{
1339 h->luma_weight[list][i]= luma_def;
1340 h->luma_offset[list][i]= 0;
0da71265
MN
1341 }
1342
0af6967e 1343 if(CHROMA){
fef744d4
MN
1344 chroma_weight_flag= get_bits1(&s->gb);
1345 if(chroma_weight_flag){
1346 int j;
1347 for(j=0; j<2; j++){
1348 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
1349 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
1350 if( h->chroma_weight[list][i][j] != chroma_def
cb99c652 1351 || h->chroma_offset[list][i][j] != 0) {
fef744d4 1352 h->use_weight_chroma= 1;
cb99c652
GB
1353 h->chroma_weight_flag[list]= 1;
1354 }
fef744d4
MN
1355 }
1356 }else{
1357 int j;
1358 for(j=0; j<2; j++){
1359 h->chroma_weight[list][i][j]= chroma_def;
1360 h->chroma_offset[list][i][j]= 0;
1361 }
0da71265
MN
1362 }
1363 }
1364 }
9f5c1037 1365 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 1366 }
9f2d1b4f 1367 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
1368 return 0;
1369}
1370
9f2d1b4f
LM
1371static void implicit_weight_table(H264Context *h){
1372 MpegEncContext * const s = &h->s;
cb99c652 1373 int ref0, ref1, i;
9f2d1b4f
LM
1374 int cur_poc = s->current_picture_ptr->poc;
1375
ce09f927
GB
1376 for (i = 0; i < 2; i++) {
1377 h->luma_weight_flag[i] = 0;
1378 h->chroma_weight_flag[i] = 0;
1379 }
1380
9f2d1b4f
LM
1381 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
1382 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
1383 h->use_weight= 0;
1384 h->use_weight_chroma= 0;
1385 return;
1386 }
1387
1388 h->use_weight= 2;
1389 h->use_weight_chroma= 2;
1390 h->luma_log2_weight_denom= 5;
1391 h->chroma_log2_weight_denom= 5;
1392
9f2d1b4f
LM
1393 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
1394 int poc0 = h->ref_list[0][ref0].poc;
1395 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 1396 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 1397 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 1398 if(td){
f66e4f5f 1399 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 1400 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 1401 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
1402 if(dist_scale_factor < -64 || dist_scale_factor > 128)
1403 h->implicit_weight[ref0][ref1] = 32;
1404 else
1405 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
1406 }else
1407 h->implicit_weight[ref0][ref1] = 32;
1408 }
1409 }
1410}
1411
8fd57a66 1412/**
5175b937 1413 * instantaneous decoder refresh.
0da71265
MN
1414 */
1415static void idr(H264Context *h){
ea6f00c4 1416 ff_h264_remove_all_refs(h);
a149c1a5 1417 h->prev_frame_num= 0;
80f8e035
MN
1418 h->prev_frame_num_offset= 0;
1419 h->prev_poc_msb=
1420 h->prev_poc_lsb= 0;
0da71265
MN
1421}
1422
7c33ad19
LM
1423/* forget old pics after a seek */
1424static void flush_dpb(AVCodecContext *avctx){
1425 H264Context *h= avctx->priv_data;
1426 int i;
64b9d48f 1427 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
1428 if(h->delayed_pic[i])
1429 h->delayed_pic[i]->reference= 0;
7c33ad19 1430 h->delayed_pic[i]= NULL;
285b570f 1431 }
df8a7dff 1432 h->outputed_poc= INT_MIN;
b19d493f 1433 h->prev_interlaced_frame = 1;
7c33ad19 1434 idr(h);
ca159196
MR
1435 if(h->s.current_picture_ptr)
1436 h->s.current_picture_ptr->reference= 0;
12d96de3 1437 h->s.first_field= 0;
9c095463 1438 ff_h264_reset_sei(h);
e240f898 1439 ff_mpeg_flush(avctx);
7c33ad19
LM
1440}
1441
0da71265
MN
1442static int init_poc(H264Context *h){
1443 MpegEncContext * const s = &h->s;
1444 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
1445 int field_poc[2];
357282c6 1446 Picture *cur = s->current_picture_ptr;
0da71265 1447
b78a6baa 1448 h->frame_num_offset= h->prev_frame_num_offset;
5710b371 1449 if(h->frame_num < h->prev_frame_num)
b78a6baa 1450 h->frame_num_offset += max_frame_num;
0da71265
MN
1451
1452 if(h->sps.poc_type==0){
1453 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
1454
1455 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
1456 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
1457 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
1458 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
1459 else
1460 h->poc_msb = h->prev_poc_msb;
1461//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
115329f1 1462 field_poc[0] =
0da71265 1463 field_poc[1] = h->poc_msb + h->poc_lsb;
115329f1 1464 if(s->picture_structure == PICT_FRAME)
0da71265
MN
1465 field_poc[1] += h->delta_poc_bottom;
1466 }else if(h->sps.poc_type==1){
1467 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
1468 int i;
1469
1470 if(h->sps.poc_cycle_length != 0)
1471 abs_frame_num = h->frame_num_offset + h->frame_num;
1472 else
1473 abs_frame_num = 0;
1474
1475 if(h->nal_ref_idc==0 && abs_frame_num > 0)
1476 abs_frame_num--;
115329f1 1477
0da71265
MN
1478 expected_delta_per_poc_cycle = 0;
1479 for(i=0; i < h->sps.poc_cycle_length; i++)
1480 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
1481
1482 if(abs_frame_num > 0){
1483 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
1484 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
1485
1486 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
1487 for(i = 0; i <= frame_num_in_poc_cycle; i++)
1488 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
1489 } else
1490 expectedpoc = 0;
1491
115329f1 1492 if(h->nal_ref_idc == 0)
0da71265 1493 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
115329f1 1494
0da71265
MN
1495 field_poc[0] = expectedpoc + h->delta_poc[0];
1496 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
1497
1498 if(s->picture_structure == PICT_FRAME)
1499 field_poc[1] += h->delta_poc[1];
1500 }else{
b78a6baa 1501 int poc= 2*(h->frame_num_offset + h->frame_num);
5710b371 1502
b78a6baa
MN
1503 if(!h->nal_ref_idc)
1504 poc--;
5710b371 1505
0da71265
MN
1506 field_poc[0]= poc;
1507 field_poc[1]= poc;
1508 }
115329f1 1509
357282c6 1510 if(s->picture_structure != PICT_BOTTOM_FIELD)
0da71265 1511 s->current_picture_ptr->field_poc[0]= field_poc[0];
357282c6 1512 if(s->picture_structure != PICT_TOP_FIELD)
0da71265 1513 s->current_picture_ptr->field_poc[1]= field_poc[1];
357282c6 1514 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
0da71265
MN
1515
1516 return 0;
1517}
1518
b41c1db3
1519
1520/**
1521 * initialize scan tables
1522 */
1523static void init_scan_tables(H264Context *h){
1524 MpegEncContext * const s = &h->s;
1525 int i;
1526 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
1527 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
1528 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
1529 }else{
1530 for(i=0; i<16; i++){
1531#define T(x) (x>>2) | ((x<<2) & 0xF)
1532 h->zigzag_scan[i] = T(zigzag_scan[i]);
1533 h-> field_scan[i] = T( field_scan[i]);
1534#undef T
1535 }
1536 }
1537 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
45beb850 1538 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
b41c1db3
1539 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
1540 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
1541 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
1542 }else{
1543 for(i=0; i<64; i++){
1544#define T(x) (x>>3) | ((x&7)<<3)
45beb850 1545 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
b41c1db3
1546 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
1547 h->field_scan8x8[i] = T(field_scan8x8[i]);
1548 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
1549#undef T
1550 }
1551 }
1552 if(h->sps.transform_bypass){ //FIXME same ugly
1553 h->zigzag_scan_q0 = zigzag_scan;
45beb850 1554 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
b41c1db3
1555 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
1556 h->field_scan_q0 = field_scan;
1557 h->field_scan8x8_q0 = field_scan8x8;
1558 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
1559 }else{
1560 h->zigzag_scan_q0 = h->zigzag_scan;
1561 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
1562 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
1563 h->field_scan_q0 = h->field_scan;
1564 h->field_scan8x8_q0 = h->field_scan8x8;
1565 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
1566 }
1567}
afebe2f7 1568
256299d3
MN
1569static void field_end(H264Context *h){
1570 MpegEncContext * const s = &h->s;
1571 AVCodecContext * const avctx= s->avctx;
1572 s->mb_y= 0;
1573
1574 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
1575 s->current_picture_ptr->pict_type= s->pict_type;
1576
1577 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1578 ff_vdpau_h264_set_reference_frames(s);
1579
1580 if(!s->dropable) {
ea6f00c4 1581 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
256299d3
MN
1582 h->prev_poc_msb= h->poc_msb;
1583 h->prev_poc_lsb= h->poc_lsb;
1584 }
1585 h->prev_frame_num_offset= h->frame_num_offset;
1586 h->prev_frame_num= h->frame_num;
1587
1588 if (avctx->hwaccel) {
1589 if (avctx->hwaccel->end_frame(avctx) < 0)
1590 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
1591 }
1592
1593 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1594 ff_vdpau_h264_picture_complete(s);
1595
1596 /*
1597 * FIXME: Error handling code does not seem to support interlaced
1598 * when slices span multiple rows
1599 * The ff_er_add_slice calls don't work right for bottom
1600 * fields; they cause massive erroneous error concealing
1601 * Error marking covers both fields (top and bottom).
1602 * This causes a mismatched s->error_count
1603 * and a bad error table. Further, the error count goes to
1604 * INT_MAX when called for bottom field, because mb_y is
1605 * past end by one (callers fault) and resync_mb_y != 0
1606 * causes problems for the first MB line, too.
1607 */
1608 if (!FIELD_PICTURE)
1609 ff_er_frame_end(s);
1610
1611 MPV_frame_end(s);
d225a1e2
MN
1612
1613 h->current_slice=0;
256299d3
MN
1614}
1615
afebe2f7
1616/**
1617 * Replicates H264 "master" context to thread contexts.
1618 */
1619static void clone_slice(H264Context *dst, H264Context *src)
1620{
1621 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
1622 dst->s.current_picture_ptr = src->s.current_picture_ptr;
1623 dst->s.current_picture = src->s.current_picture;
1624 dst->s.linesize = src->s.linesize;
1625 dst->s.uvlinesize = src->s.uvlinesize;
12d96de3 1626 dst->s.first_field = src->s.first_field;
afebe2f7
1627
1628 dst->prev_poc_msb = src->prev_poc_msb;
1629 dst->prev_poc_lsb = src->prev_poc_lsb;
1630 dst->prev_frame_num_offset = src->prev_frame_num_offset;
1631 dst->prev_frame_num = src->prev_frame_num;
1632 dst->short_ref_count = src->short_ref_count;
1633
1634 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
1635 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
1636 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
1637 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
50c21814
1638
1639 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
1640 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
afebe2f7
1641}
1642
0da71265
MN
1643/**
1644 * decodes a slice header.
9c852bcf 1645 * This will also call MPV_common_init() and frame_start() as needed.
afebe2f7
1646 *
1647 * @param h h264context
1648 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
1649 *
d9526386 1650 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
0da71265 1651 */
afebe2f7 1652static int decode_slice_header(H264Context *h, H264Context *h0){
0da71265 1653 MpegEncContext * const s = &h->s;
12d96de3 1654 MpegEncContext * const s0 = &h0->s;
88e7a4d1 1655 unsigned int first_mb_in_slice;
ac658be5 1656 unsigned int pps_id;
0da71265 1657 int num_ref_idx_active_override_flag;
41f5c62f 1658 unsigned int slice_type, tmp, i, j;
0bf79634 1659 int default_ref_list_done = 0;
12d96de3 1660 int last_pic_structure;
0da71265 1661
2f944356 1662 s->dropable= h->nal_ref_idc == 0;
0da71265 1663
cf653d08
JD
1664 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
1665 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
1666 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
1667 }else{
1668 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
1669 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
1670 }
1671
0da71265
MN
1672 first_mb_in_slice= get_ue_golomb(&s->gb);
1673
d225a1e2
MN
1674 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
1675 if(h0->current_slice && FIELD_PICTURE){
1676 field_end(h);
1677 }
1678
afebe2f7 1679 h0->current_slice = 0;
12d96de3 1680 if (!s0->first_field)
f6e3c460 1681 s->current_picture_ptr= NULL;
66a4b2c1
MN
1682 }
1683
9963b332 1684 slice_type= get_ue_golomb_31(&s->gb);
0bf79634 1685 if(slice_type > 9){
9b879566 1686 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
5175b937 1687 return -1;
0da71265 1688 }
0bf79634
LLL
1689 if(slice_type > 4){
1690 slice_type -= 5;
0da71265
MN
1691 h->slice_type_fixed=1;
1692 }else
1693 h->slice_type_fixed=0;
115329f1 1694
ee2a957f 1695 slice_type= golomb_to_pict_type[ slice_type ];
9701840b 1696 if (slice_type == FF_I_TYPE
afebe2f7 1697 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
0bf79634
LLL
1698 default_ref_list_done = 1;
1699 }
1700 h->slice_type= slice_type;
e3e6f18f 1701 h->slice_type_nos= slice_type & 3;
0bf79634 1702
1412060e 1703 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
115329f1 1704
0da71265 1705 pps_id= get_ue_golomb(&s->gb);
ac658be5 1706 if(pps_id>=MAX_PPS_COUNT){
9b879566 1707 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
0da71265
MN
1708 return -1;
1709 }
afebe2f7 1710 if(!h0->pps_buffers[pps_id]) {
a0f80050 1711 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
8b92b792
MN
1712 return -1;
1713 }
afebe2f7 1714 h->pps= *h0->pps_buffers[pps_id];
8b92b792 1715
afebe2f7 1716 if(!h0->sps_buffers[h->pps.sps_id]) {
a0f80050 1717 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
8b92b792
MN
1718 return -1;
1719 }
afebe2f7 1720 h->sps = *h0->sps_buffers[h->pps.sps_id];
239ea04c 1721
50c21814 1722 if(h == h0 && h->dequant_coeff_pps != pps_id){
50eaa857 1723 h->dequant_coeff_pps = pps_id;
239ea04c
LM
1724 init_dequant_tables(h);
1725 }
115329f1 1726
0da71265 1727 s->mb_width= h->sps.mb_width;
6867a90b 1728 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
115329f1 1729
bf4665ee
DM
1730 h->b_stride= s->mb_width*4;
1731 h->b8_stride= s->mb_width*2;
0da71265 1732
faf3dfb9 1733 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
0da71265 1734 if(h->sps.frame_mbs_only_flag)
faf3dfb9 1735 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
0da71265 1736 else
faf3dfb9 1737 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
115329f1
DB
1738
1739 if (s->context_initialized
5ff85f1d 1740 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
afebe2f7
1741 if(h != h0)
1742 return -1; // width / height changed during parallelized decoding
0da71265 1743 free_tables(h);
ff7f75e1 1744 flush_dpb(s->avctx);
0da71265
MN
1745 MPV_common_end(s);
1746 }
1747 if (!s->context_initialized) {
afebe2f7
1748 if(h != h0)
1749 return -1; // we cant (re-)initialize context during parallel decoding
f3bdc3da
RD
1750
1751 avcodec_set_dimensions(s->avctx, s->width, s->height);
1752 s->avctx->sample_aspect_ratio= h->sps.sar;
1753 if(!s->avctx->sample_aspect_ratio.den)
1754 s->avctx->sample_aspect_ratio.den = 1;
1755
c4dffe7e
DC
1756 if(h->sps.video_signal_type_present_flag){
1757 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
1758 if(h->sps.colour_description_present_flag){
1759 s->avctx->color_primaries = h->sps.color_primaries;
1760 s->avctx->color_trc = h->sps.color_trc;
1761 s->avctx->colorspace = h->sps.colorspace;
1762 }
1763 }
1764
f3bdc3da 1765 if(h->sps.timing_info_present_flag){
3102d180 1766 int64_t den= h->sps.time_scale;
055a6aa7 1767 if(h->x264_build < 44U)
3102d180 1768 den *= 2;
f3bdc3da 1769 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3102d180 1770 h->sps.num_units_in_tick, den, 1<<30);
f3bdc3da
RD
1771 }
1772 s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts);
1773 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
1774
0da71265
MN
1775 if (MPV_common_init(s) < 0)
1776 return -1;
12d96de3 1777 s->first_field = 0;
b19d493f 1778 h->prev_interlaced_frame = 1;
115329f1 1779
b41c1db3 1780 init_scan_tables(h);
903d58f6 1781 ff_h264_alloc_tables(h);
0da71265 1782
afebe2f7
1783 for(i = 1; i < s->avctx->thread_count; i++) {
1784 H264Context *c;
1785 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
79db7ac6 1786 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
afebe2f7
1787 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
1788 c->sps = h->sps;
1789 c->pps = h->pps;
1790 init_scan_tables(c);
1791 clone_tables(c, h);
1792 }
1793
1794 for(i = 0; i < s->avctx->thread_count; i++)
1795 if(context_init(h->thread_context[i]) < 0)
1796 return -1;
0da71265
MN
1797 }
1798
0da71265
MN
1799 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
1800
5d18eaad 1801 h->mb_mbaff = 0;
6ba71fc4 1802 h->mb_aff_frame = 0;
12d96de3 1803 last_pic_structure = s0->picture_structure;
0da71265
MN
1804 if(h->sps.frame_mbs_only_flag){
1805 s->picture_structure= PICT_FRAME;
1806 }else{
6ba71fc4 1807 if(get_bits1(&s->gb)) { //field_pic_flag
0da71265 1808 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
6ba71fc4 1809 } else {
0da71265 1810 s->picture_structure= PICT_FRAME;
6ba71fc4 1811 h->mb_aff_frame = h->sps.mb_aff;
6867a90b 1812 }
0da71265 1813 }
44e9dcf1 1814 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
2ddcf84b
JD
1815
1816 if(h0->current_slice == 0){
26b86e47
MN
1817 while(h->frame_num != h->prev_frame_num &&
1818 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
1819 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
903d58f6 1820 if (ff_h264_frame_start(h) < 0)
66e6038c 1821 return -1;
26b86e47
MN
1822 h->prev_frame_num++;
1823 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
1824 s->current_picture_ptr->frame_num= h->prev_frame_num;
ea6f00c4 1825 ff_h264_execute_ref_pic_marking(h, NULL, 0);
26b86e47
MN
1826 }
1827
12d96de3
JD
1828 /* See if we have a decoded first field looking for a pair... */
1829 if (s0->first_field) {
1830 assert(s0->current_picture_ptr);
1831 assert(s0->current_picture_ptr->data[0]);
1832 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
1833
1834 /* figure out if we have a complementary field pair */
1835 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
1836 /*
1837 * Previous field is unmatched. Don't display it, but let it
1838 * remain for reference if marked as such.
1839 */
1840 s0->current_picture_ptr = NULL;
1841 s0->first_field = FIELD_PICTURE;
1842
1843 } else {
1844 if (h->nal_ref_idc &&
1845 s0->current_picture_ptr->reference &&
1846 s0->current_picture_ptr->frame_num != h->frame_num) {
1847 /*
1848 * This and previous field were reference, but had
1849 * different frame_nums. Consider this field first in
1850 * pair. Throw away previous field except for reference
1851 * purposes.
1852 */
1853 s0->first_field = 1;
1854 s0->current_picture_ptr = NULL;
1855
1856 } else {
1857 /* Second field in complementary pair */
1858 s0->first_field = 0;
1859 }
1860 }
1861
1862 } else {
1863 /* Frame or first field in a potentially complementary pair */
1864 assert(!s0->current_picture_ptr);
1865 s0->first_field = FIELD_PICTURE;
1866 }
1867
903d58f6 1868 if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) {
12d96de3 1869 s0->first_field = 0;
2ddcf84b 1870 return -1;
12d96de3 1871 }
2ddcf84b
JD
1872 }
1873 if(h != h0)
1874 clone_slice(h, h0);
1875
1876 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
1877
88e7a4d1 1878 assert(s->mb_num == s->mb_width * s->mb_height);
f3e53d9f 1879 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
88e7a4d1
MN
1880 first_mb_in_slice >= s->mb_num){
1881 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
6b53b87e
MN
1882 return -1;
1883 }
88e7a4d1 1884 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
f3e53d9f
JD
1885 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
1886 if (s->picture_structure == PICT_BOTTOM_FIELD)
1887 s->resync_mb_y = s->mb_y = s->mb_y + 1;
88e7a4d1 1888 assert(s->mb_y < s->mb_height);
115329f1 1889
0da71265
MN
1890 if(s->picture_structure==PICT_FRAME){
1891 h->curr_pic_num= h->frame_num;
1892 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
1893 }else{
f57e2af6 1894 h->curr_pic_num= 2*h->frame_num + 1;
0da71265
MN
1895 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
1896 }
115329f1 1897
0da71265 1898 if(h->nal_unit_type == NAL_IDR_SLICE){
1df1df0b 1899 get_ue_golomb(&s->gb); /* idr_pic_id */
0da71265 1900 }
115329f1 1901
0da71265
MN
1902 if(h->sps.poc_type==0){
1903 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
115329f1 1904
0da71265
MN
1905 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
1906 h->delta_poc_bottom= get_se_golomb(&s->gb);
1907 }
1908 }
115329f1 1909
0da71265
MN
1910 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
1911 h->delta_poc[0]= get_se_golomb(&s->gb);
115329f1 1912
0da71265
MN
1913 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
1914 h->delta_poc[1]= get_se_golomb(&s->gb);
1915 }
115329f1 1916
0da71265 1917 init_poc(h);
115329f1 1918
0da71265
MN
1919 if(h->pps.redundant_pic_cnt_present){
1920 h->redundant_pic_count= get_ue_golomb(&s->gb);
1921 }
1922
1412060e 1923 //set defaults, might be overridden a few lines later
0da71265
MN
1924 h->ref_count[0]= h->pps.ref_count[0];
1925 h->ref_count[1]= h->pps.ref_count[1];
1926
e3e6f18f 1927 if(h->slice_type_nos != FF_I_TYPE){
9f5c1037 1928 if(h->slice_type_nos == FF_B_TYPE){
0da71265
MN
1929 h->direct_spatial_mv_pred= get_bits1(&s->gb);
1930 }
1931 num_ref_idx_active_override_flag= get_bits1(&s->gb);
115329f1 1932
0da71265
MN
1933 if(num_ref_idx_active_override_flag){
1934 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
9f5c1037 1935 if(h->slice_type_nos==FF_B_TYPE)
0da71265
MN
1936 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
1937
187696fa 1938 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
9b879566 1939 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
88e7a4d1 1940 h->ref_count[0]= h->ref_count[1]= 1;
0da71265
MN
1941 return -1;
1942 }
1943 }
9f5c1037 1944 if(h->slice_type_nos == FF_B_TYPE)
187696fa
MN
1945 h->list_count= 2;
1946 else
1947 h->list_count= 1;
1948 }else
1949 h->list_count= 0;
0da71265 1950
0bf79634 1951 if(!default_ref_list_done){
ea6f00c4 1952 ff_h264_fill_default_ref_list(h);
0da71265
MN
1953 }
1954
ea6f00c4 1955 if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0)
806bb93f 1956 return -1;
0da71265 1957
07dff5c7
MN
1958 if(h->slice_type_nos!=FF_I_TYPE){
1959 s->last_picture_ptr= &h->ref_list[0][0];
8d2fc163 1960 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
07dff5c7
MN
1961 }
1962 if(h->slice_type_nos==FF_B_TYPE){
1963 s->next_picture_ptr= &h->ref_list[1][0];
8d2fc163 1964 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
07dff5c7
MN
1965 }
1966
932f396f 1967 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
9f5c1037 1968 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
0da71265 1969 pred_weight_table(h);
9f5c1037 1970 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
9f2d1b4f 1971 implicit_weight_table(h);
cb99c652 1972 else {
9f2d1b4f 1973 h->use_weight = 0;
cb99c652
GB
1974 for (i = 0; i < 2; i++) {
1975 h->luma_weight_flag[i] = 0;
1976 h->chroma_weight_flag[i] = 0;
1977 }
1978 }
115329f1 1979
2ddcf84b 1980 if(h->nal_ref_idc)
ea6f00c4 1981 ff_h264_decode_ref_pic_marking(h0, &s->gb);
0da71265 1982
5d18eaad 1983 if(FRAME_MBAFF)
ea6f00c4 1984 ff_h264_fill_mbaff_ref_list(h);
5d18eaad 1985
8f56e219 1986 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
943f69a6
MN
1987 ff_h264_direct_dist_scale_factor(h);
1988 ff_h264_direct_ref_list_init(h);
8f56e219 1989
e3e6f18f 1990 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
9963b332 1991 tmp = get_ue_golomb_31(&s->gb);
88e7a4d1
MN
1992 if(tmp > 2){
1993 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
1994 return -1;
1995 }
1996 h->cabac_init_idc= tmp;
1997 }
e5017ab8
LA
1998
1999 h->last_qscale_diff = 0;
88e7a4d1
MN
2000 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
2001 if(tmp>51){
2002 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3ebc7e04
MN
2003 return -1;
2004 }
88e7a4d1 2005 s->qscale= tmp;
4691a77d
2006 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2007 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
0da71265 2008 //FIXME qscale / qp ... stuff
9701840b 2009 if(h->slice_type == FF_SP_TYPE){
1df1df0b 2010 get_bits1(&s->gb); /* sp_for_switch_flag */
0da71265 2011 }
9701840b 2012 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
1df1df0b 2013 get_se_golomb(&s->gb); /* slice_qs_delta */
0da71265
MN
2014 }
2015
53c05b1e 2016 h->deblocking_filter = 1;
0c32e19d
MN
2017 h->slice_alpha_c0_offset = 52;
2018 h->slice_beta_offset = 52;
0da71265 2019 if( h->pps.deblocking_filter_parameters_present ) {
9963b332 2020 tmp= get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2021 if(tmp > 2){
2022 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
2023 return -1;
2024 }
2025 h->deblocking_filter= tmp;
115329f1 2026 if(h->deblocking_filter < 2)
53c05b1e
MN
2027 h->deblocking_filter^= 1; // 1<->0
2028
2029 if( h->deblocking_filter ) {
0c32e19d
MN
2030 h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
2031 h->slice_beta_offset += get_se_golomb(&s->gb) << 1;
2032 if( h->slice_alpha_c0_offset > 104U
2033 || h->slice_beta_offset > 104U){
2034 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
2035 return -1;
2036 }
0da71265 2037 }
980a82b7 2038 }
afebe2f7 2039
61858a76 2040 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4b30289e 2041 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
9f5c1037 2042 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
61858a76
RD
2043 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
2044 h->deblocking_filter= 0;
2045
afebe2f7 2046 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
ec970c21
2047 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
2048 /* Cheat slightly for speed:
5d81d641 2049 Do not bother to deblock across slices. */
ec970c21
2050 h->deblocking_filter = 2;
2051 } else {
7ae94d52
2052 h0->max_contexts = 1;
2053 if(!h0->single_decode_warning) {
2054 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
2055 h0->single_decode_warning = 1;
2056 }
2057 if(h != h0)
2058 return 1; // deblocking switched inside frame
ec970c21 2059 }
afebe2f7 2060 }
0c32e19d 2061 h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
afebe2f7 2062
0da71265
MN
2063#if 0 //FMO
2064 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
2065 slice_group_change_cycle= get_bits(&s->gb, ?);
2066#endif
2067
afebe2f7
2068 h0->last_slice_type = slice_type;
2069 h->slice_num = ++h0->current_slice;
b735aeea
MN
2070 if(h->slice_num >= MAX_SLICES){
2071 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
2072 }
5175b937 2073
c32867b5 2074 for(j=0; j<2; j++){
6d7e6b26 2075 int id_list[16];
b735aeea 2076 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
6d7e6b26
MN
2077 for(i=0; i<16; i++){
2078 id_list[i]= 60;
2079 if(h->ref_list[j][i].data[0]){
2080 int k;
2081 uint8_t *base= h->ref_list[j][i].base[0];
2082 for(k=0; k<h->short_ref_count; k++)
2083 if(h->short_ref[k]->base[0] == base){
2084 id_list[i]= k;
2085 break;
2086 }
2087 for(k=0; k<h->long_ref_count; k++)
2088 if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
2089 id_list[i]= h->short_ref_count + k;
2090 break;
2091 }
2092 }
2093 }
2094
c32867b5
MN
2095 ref2frm[0]=
2096 ref2frm[1]= -1;
d50cdd82 2097 for(i=0; i<16; i++)
6d7e6b26 2098 ref2frm[i+2]= 4*id_list[i]
c32867b5 2099 +(h->ref_list[j][i].reference&3);
d50cdd82
MN
2100 ref2frm[18+0]=
2101 ref2frm[18+1]= -1;
2102 for(i=16; i<48; i++)
6d7e6b26 2103 ref2frm[i+4]= 4*id_list[(i-16)>>1]
d50cdd82 2104 +(h->ref_list[j][i].reference&3);
c32867b5
MN
2105 }
2106
5d18eaad 2107 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
8a11a969 2108 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
5d18eaad 2109
802e9146
MN
2110 s->avctx->refs= h->sps.ref_frame_count;
2111
0da71265 2112 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
49573a87 2113 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
6867a90b
LLL
2114 h->slice_num,
2115 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
115329f1 2116 first_mb_in_slice,
49573a87 2117 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
0da71265
MN
2118 pps_id, h->frame_num,
2119 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
2120 h->ref_count[0], h->ref_count[1],
2121 s->qscale,
0c32e19d 2122 h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
9f2d1b4f 2123 h->use_weight,
4806b922
MN
2124 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
2125 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
0da71265
MN
2126 );
2127 }
2128
2129 return 0;
2130}
2131
0dc343d4 2132int ff_h264_get_slice_type(const H264Context *h)
75dd6938
LA
2133{
2134 switch (h->slice_type) {
2135 case FF_P_TYPE: return 0;
2136 case FF_B_TYPE: return 1;
2137 case FF_I_TYPE: return 2;
2138 case FF_SP_TYPE: return 3;
2139 case FF_SI_TYPE: return 4;
2140 default: return -1;
2141 }
2142}
2143
c988f975
MN
2144static void loop_filter(H264Context *h){
2145 MpegEncContext * const s = &h->s;
2146 uint8_t *dest_y, *dest_cb, *dest_cr;
2147 int linesize, uvlinesize, mb_x, mb_y;
2148 const int end_mb_y= s->mb_y + FRAME_MBAFF;
2149 const int old_slice_type= h->slice_type;
2150
2151 if(h->deblocking_filter) {
2152 for(mb_x= 0; mb_x<s->mb_width; mb_x++){
2153 for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
78998bf2 2154 int mb_xy, mb_type;
c988f975
MN
2155 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
2156 h->slice_num= h->slice_table[mb_xy];
2157 mb_type= s->current_picture.mb_type[mb_xy];
2158 h->list_count= h->list_counts[mb_xy];
c988f975
MN
2159
2160 if(FRAME_MBAFF)
2161 h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
2162
c988f975
MN
2163 s->mb_x= mb_x;
2164 s->mb_y= mb_y;
2165 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2166 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2167 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2168 //FIXME simplify above
2169
2170 if (MB_FIELD) {
2171 linesize = h->mb_linesize = s->linesize * 2;
2172 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2173 if(mb_y&1){ //FIXME move out of this function?
2174 dest_y -= s->linesize*15;
2175 dest_cb-= s->uvlinesize*7;
2176 dest_cr-= s->uvlinesize*7;
2177 }
2178 } else {
2179 linesize = h->mb_linesize = s->linesize;
2180 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2181 }
77d40dce 2182 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
aaa995d7 2183 if(fill_filter_caches(h, mb_type))
44a5e7b6 2184 continue;
c988f975
MN
2185 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2186 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2187
77d40dce 2188 if (FRAME_MBAFF) {
c988f975
MN
2189 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2190 } else {
2191 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2192 }
2193 }
2194 }
2195 }
2196 h->slice_type= old_slice_type;
2197 s->mb_x= 0;
2198 s->mb_y= end_mb_y - FRAME_MBAFF;
2199}
2200
3a84713a
RS
2201static int decode_slice(struct AVCodecContext *avctx, void *arg){
2202 H264Context *h = *(void**)arg;
0da71265
MN
2203 MpegEncContext * const s = &h->s;
2204 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
2205
2206 s->mb_skip_run= -1;
0da71265 2207
89db0bae 2208 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
5317c95b 2209 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
89db0bae 2210
e5017ab8 2211 if( h->pps.cabac ) {
e5017ab8
LA
2212 /* realign */
2213 align_get_bits( &s->gb );
2214
2215 /* init cabac */
d61c4e73 2216 ff_init_cabac_states( &h->cabac);
e5017ab8
LA
2217 ff_init_cabac_decoder( &h->cabac,
2218 s->gb.buffer + get_bits_count(&s->gb)/8,
6e44ba15 2219 (get_bits_left(&s->gb) + 7)/8);
cc51b282
MN
2220
2221 ff_h264_init_cabac_states(h);
95c26348 2222
e5017ab8 2223 for(;;){
851ded89 2224//START_TIMER
cc51b282 2225 int ret = ff_h264_decode_mb_cabac(h);
6867a90b 2226 int eos;
851ded89 2227//STOP_TIMER("decode_mb_cabac")
0da71265 2228
903d58f6 2229 if(ret>=0) ff_h264_hl_decode_mb(h);
0da71265 2230
5d18eaad 2231 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8
LA
2232 s->mb_y++;
2233
cc51b282 2234 ret = ff_h264_decode_mb_cabac(h);
e5017ab8 2235
903d58f6 2236 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2237 s->mb_y--;
2238 }
6867a90b 2239 eos = get_cabac_terminate( &h->cabac );
e5017ab8 2240
3566042a
MN
2241 if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
2242 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2243 return 0;
2244 }
5659b509 2245 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
706da4af 2246 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
e5017ab8
LA
2247 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2248 return -1;
2249 }
2250
2251 if( ++s->mb_x >= s->mb_width ) {
2252 s->mb_x = 0;
c988f975 2253 loop_filter(h);
e5017ab8 2254 ff_draw_horiz_band(s, 16*s->mb_y, 16);
5175b937 2255 ++s->mb_y;
f3e53d9f 2256 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b
LLL
2257 ++s->mb_y;
2258 }
0da71265 2259 }
0da71265 2260
e5017ab8 2261 if( eos || s->mb_y >= s->mb_height ) {
a9c9a240 2262 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8 2263 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
0da71265 2264 return 0;
e5017ab8 2265 }
e5017ab8
LA
2266 }
2267
2268 } else {
2269 for(;;){
e1e94902 2270 int ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2271
903d58f6 2272 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8 2273
5d18eaad 2274 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8 2275 s->mb_y++;
e1e94902 2276 ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2277
903d58f6 2278 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2279 s->mb_y--;
2280 }
2281
2282 if(ret<0){
2283 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2284 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2285
2286 return -1;
2287 }
e5017ab8
LA
2288
2289 if(++s->mb_x >= s->mb_width){
2290 s->mb_x=0;
c988f975 2291 loop_filter(h);
e5017ab8 2292 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6867a90b 2293 ++s->mb_y;
f3e53d9f 2294 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b
LLL
2295 ++s->mb_y;
2296 }
2297 if(s->mb_y >= s->mb_height){
a9c9a240 2298 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2299
2300 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
2301 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2302
2303 return 0;
2304 }else{
2305 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2306
2307 return -1;
2308 }
2309 }
2310 }
2311
2312 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
a9c9a240 2313 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2314 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
2315 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2316
2317 return 0;
2318 }else{
2319 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2320
2321 return -1;
2322 }
2323 }
0da71265
MN
2324 }
2325 }
e5017ab8 2326
0da71265
MN
2327#if 0
2328 for(;s->mb_y < s->mb_height; s->mb_y++){
2329 for(;s->mb_x < s->mb_width; s->mb_x++){
2330 int ret= decode_mb(h);
115329f1 2331
903d58f6 2332 ff_h264_hl_decode_mb(h);
0da71265
MN
2333
2334 if(ret<0){
267f7edc 2335 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2336 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2337
2338 return -1;
2339 }
115329f1 2340
0da71265
MN
2341 if(++s->mb_x >= s->mb_width){
2342 s->mb_x=0;
2343 if(++s->mb_y >= s->mb_height){
2344 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2345 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2346
2347 return 0;
2348 }else{
2349 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2350
2351 return -1;
2352 }
2353 }
2354 }
115329f1 2355
0da71265
MN
2356 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
2357 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2358 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2359
2360 return 0;
2361 }else{
2362 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2363
2364 return -1;
2365 }
2366 }
2367 }
2368 s->mb_x=0;
2369 ff_draw_horiz_band(s, 16*s->mb_y, 16);
2370 }
2371#endif
2372 return -1; //not reached
2373}
2374
afebe2f7
2375/**
2376 * Call decode_slice() for each context.
2377 *
2378 * @param h h264 master context
2379 * @param context_count number of contexts to execute
2380 */
2381static void execute_decode_slices(H264Context *h, int context_count){
2382 MpegEncContext * const s = &h->s;
2383 AVCodecContext * const avctx= s->avctx;
2384 H264Context *hx;
2385 int i;
2386
40e5d31b
GB
2387 if (s->avctx->hwaccel)
2388 return;
0d3d172f 2389 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
369122dd 2390 return;
afebe2f7 2391 if(context_count == 1) {
74e8b78b 2392 decode_slice(avctx, &h);
afebe2f7
2393 } else {
2394 for(i = 1; i < context_count; i++) {
2395 hx = h->thread_context[i];
047599a4 2396 hx->s.error_recognition = avctx->error_recognition;
afebe2f7
2397 hx->s.error_count = 0;
2398 }
2399
2400 avctx->execute(avctx, (void *)decode_slice,
01418506 2401 h->thread_context, NULL, context_count, sizeof(void*));
afebe2f7
2402
2403 /* pull back stuff from slices to master context */
2404 hx = h->thread_context[context_count - 1];
2405 s->mb_x = hx->s.mb_x;
2406 s->mb_y = hx->s.mb_y;
12d96de3
JD
2407 s->dropable = hx->s.dropable;
2408 s->picture_structure = hx->s.picture_structure;
afebe2f7
2409 for(i = 1; i < context_count; i++)
2410 h->s.error_count += h->thread_context[i]->s.error_count;
2411 }
2412}
2413
2414
30317501 2415static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
0da71265
MN
2416 MpegEncContext * const s = &h->s;
2417 AVCodecContext * const avctx= s->avctx;
2418 int buf_index=0;
afebe2f7
2419 H264Context *hx; ///< thread context
2420 int context_count = 0;
74b14aac 2421 int next_avc= h->is_avc ? 0 : buf_size;
afebe2f7
2422
2423 h->max_contexts = avctx->thread_count;
377ec888 2424#if 0
eb60dddc 2425 int i;
96b6ace2
MN
2426 for(i=0; i<50; i++){
2427 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
0da71265
MN
2428 }
2429#endif
66a4b2c1 2430 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
afebe2f7 2431 h->current_slice = 0;
12d96de3 2432 if (!s->first_field)
f6e3c460 2433 s->current_picture_ptr= NULL;
9c095463 2434 ff_h264_reset_sei(h);
66a4b2c1
MN
2435 }
2436
0da71265
MN
2437 for(;;){
2438 int consumed;
2439 int dst_length;
2440 int bit_length;
30317501 2441 const uint8_t *ptr;
4770b1b4 2442 int i, nalsize = 0;
afebe2f7 2443 int err;
115329f1 2444
74b14aac 2445 if(buf_index >= next_avc) {
1c48415b
2446 if(buf_index >= buf_size) break;
2447 nalsize = 0;
2448 for(i = 0; i < h->nal_length_size; i++)
2449 nalsize = (nalsize << 8) | buf[buf_index++];
8d8409ca 2450 if(nalsize <= 1 || nalsize > buf_size - buf_index){
1c48415b
2451 if(nalsize == 1){
2452 buf_index++;
2453 continue;
2454 }else{
2455 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
2456 break;
2457 }
2458 }
74b14aac 2459 next_avc= buf_index + nalsize;
1c48415b
2460 } else {
2461 // start code prefix search
52255d17 2462 for(; buf_index + 3 < next_avc; buf_index++){
1c48415b
2463 // This should always succeed in the first iteration.
2464 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
2465 break;
8b031359 2466 }
115329f1 2467
1c48415b 2468 if(buf_index+3 >= buf_size) break;
115329f1 2469
1c48415b 2470 buf_index+=3;
52255d17 2471 if(buf_index >= next_avc) continue;
1c48415b 2472 }
115329f1 2473
afebe2f7
2474 hx = h->thread_context[context_count];
2475
74b14aac 2476 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
ff82e429 2477 if (ptr==NULL || dst_length < 0){
ac658be5
FOL
2478 return -1;
2479 }
3566042a
MN
2480 i= buf_index + consumed;
2481 if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
2482 buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
2483 s->workaround_bugs |= FF_BUG_TRUNCATED;
2484
2485 if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
6ac9696e 2486 while(ptr[dst_length - 1] == 0 && dst_length > 0)
c4da83fb 2487 dst_length--;
3566042a 2488 }
1790a5e9 2489 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
0da71265
MN
2490
2491 if(s->avctx->debug&FF_DEBUG_STARTCODE){
afebe2f7 2492 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
0da71265 2493 }
115329f1 2494
74b14aac 2495 if (h->is_avc && (nalsize != consumed) && nalsize){
e262365d 2496 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
9d2cc8c1 2497 }
4770b1b4 2498
0da71265
MN
2499 buf_index += consumed;
2500
755bfeab 2501 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
8c3eba7c 2502 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
0da71265 2503 continue;
115329f1 2504
afebe2f7
2505 again:
2506 err = 0;
2507 switch(hx->nal_unit_type){
0da71265 2508 case NAL_IDR_SLICE:
afebe2f7
2509 if (h->nal_unit_type != NAL_IDR_SLICE) {
2510 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
2511 return -1;
2512 }
3b66c4c5 2513 idr(h); //FIXME ensure we don't loose some frames if there is reordering
0da71265 2514 case NAL_SLICE:
afebe2f7
2515 init_get_bits(&hx->s.gb, ptr, bit_length);
2516 hx->intra_gb_ptr=
2517 hx->inter_gb_ptr= &hx->s.gb;
2518 hx->s.data_partitioning = 0;
2519
2520 if((err = decode_slice_header(hx, h)))
2521 break;
2522
dd0cd3d2
RC
2523 avctx->profile = hx->sps.profile_idc;
2524 avctx->level = hx->sps.level_idc;
2525
6026a096
GB
2526 if (s->avctx->hwaccel && h->current_slice == 1) {
2527 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
2528 return -1;
2529 }
2530
37a558fe
IS
2531 s->current_picture_ptr->key_frame |=
2532 (hx->nal_unit_type == NAL_IDR_SLICE) ||
2533 (h->sei_recovery_frame_cnt >= 0);
afebe2f7
2534 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
2535 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2536 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2537 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
369122dd 2538 && avctx->skip_frame < AVDISCARD_ALL){
d404b3ed
MN
2539 if(avctx->hwaccel) {
2540 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
2541 return -1;
2542 }else
0d3d172f 2543 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
369122dd 2544 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
c639fc72
CEH
2545 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
2546 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
369122dd 2547 }else
f2c214a1 2548 context_count++;
369122dd 2549 }
0da71265
MN
2550 break;
2551 case NAL_DPA:
afebe2f7
2552 init_get_bits(&hx->s.gb, ptr, bit_length);
2553 hx->intra_gb_ptr=
2554 hx->inter_gb_ptr= NULL;
0410ee8f
AS
2555
2556 if ((err = decode_slice_header(hx, h)) < 0)
2557 break;
2558
dd0cd3d2
RC
2559 avctx->profile = hx->sps.profile_idc;
2560 avctx->level = hx->sps.level_idc;
2561
afebe2f7 2562 hx->s.data_partitioning = 1;
115329f1 2563
0da71265
MN
2564 break;
2565 case NAL_DPB:
afebe2f7
2566 init_get_bits(&hx->intra_gb, ptr, bit_length);
2567 hx->intra_gb_ptr= &hx->intra_gb;
0da71265
MN
2568 break;
2569 case NAL_DPC:
afebe2f7
2570 init_get_bits(&hx->inter_gb, ptr, bit_length);
2571 hx->inter_gb_ptr= &hx->inter_gb;
8b92b792 2572
afebe2f7 2573 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
b18e5c03 2574 && s->context_initialized
e0111b32 2575 && s->hurry_up < 5
afebe2f7 2576 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2577 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2578 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
e0111b32 2579 && avctx->skip_frame < AVDISCARD_ALL)
afebe2f7 2580 context_count++;
0da71265
MN
2581 break;
2582 case NAL_SEI:
cdd10689 2583 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2584 ff_h264_decode_sei(h);
0da71265
MN
2585 break;
2586 case NAL_SPS:
2587 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2588 ff_h264_decode_seq_parameter_set(h);
115329f1 2589
0da71265
MN
2590 if(s->flags& CODEC_FLAG_LOW_DELAY)
2591 s->low_delay=1;
115329f1 2592
a18030bb
LM
2593 if(avctx->has_b_frames < 2)
2594 avctx->has_b_frames= !s->low_delay;
0da71265
MN
2595 break;
2596 case NAL_PPS:
2597 init_get_bits(&s->gb, ptr, bit_length);
115329f1 2598
1790a5e9 2599 ff_h264_decode_picture_parameter_set(h, bit_length);
0da71265
MN
2600
2601 break;
ab470fa7
LM
2602 case NAL_AUD:
2603 case NAL_END_SEQUENCE:
2604 case NAL_END_STREAM:
2605 case NAL_FILLER_DATA:
2606 case NAL_SPS_EXT:
2607 case NAL_AUXILIARY_SLICE:
0da71265 2608 break;
bb270c08 2609 default:
4ad04da2 2610 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
115329f1 2611 }
115329f1 2612
afebe2f7
2613 if(context_count == h->max_contexts) {
2614 execute_decode_slices(h, context_count);
2615 context_count = 0;
2616 }
2617
2618 if (err < 0)
2619 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
2620 else if(err == 1) {
2621 /* Slice could not be decoded in parallel mode, copy down
2622 * NAL unit stuff to context 0 and restart. Note that
1412060e 2623 * rbsp_buffer is not transferred, but since we no longer
afebe2f7
2624 * run in parallel mode this should not be an issue. */
2625 h->nal_unit_type = hx->nal_unit_type;
2626 h->nal_ref_idc = hx->nal_ref_idc;
2627 hx = h;
2628 goto again;
2629 }
2630 }
2631 if(context_count)
2632 execute_decode_slices(h, context_count);
0da71265
MN
2633 return buf_index;
2634}
2635
2636/**
3b66c4c5 2637 * returns the number of bytes consumed for building the current frame
0da71265
MN
2638 */
2639static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
755bfeab 2640 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
0da71265
MN
2641 if(pos+10>buf_size) pos=buf_size; // oops ;)
2642
2643 return pos;
0da71265
MN
2644}
2645
115329f1 2646static int decode_frame(AVCodecContext *avctx,
0da71265 2647 void *data, int *data_size,
7a00bbad 2648 AVPacket *avpkt)
0da71265 2649{
7a00bbad
TB
2650 const uint8_t *buf = avpkt->data;
2651 int buf_size = avpkt->size;
0da71265
MN
2652 H264Context *h = avctx->priv_data;
2653 MpegEncContext *s = &h->s;
115329f1 2654 AVFrame *pict = data;
0da71265 2655 int buf_index;
115329f1 2656
0da71265 2657 s->flags= avctx->flags;
303e50e6 2658 s->flags2= avctx->flags2;
0da71265 2659
1412060e 2660 /* end of stream, output what is still in the buffers */
0da71265 2661 if (buf_size == 0) {
97bbb885
MN
2662 Picture *out;
2663 int i, out_idx;
2664
2665//FIXME factorize this with the output code below
2666 out = h->delayed_pic[0];
2667 out_idx = 0;
c173a088 2668 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
97bbb885
MN
2669 if(h->delayed_pic[i]->poc < out->poc){
2670 out = h->delayed_pic[i];
2671 out_idx = i;
2672 }
2673
2674 for(i=out_idx; h->delayed_pic[i]; i++)
2675 h->delayed_pic[i] = h->delayed_pic[i+1];
2676
2677 if(out){
2678 *data_size = sizeof(AVFrame);
2679 *pict= *(AVFrame*)out;
2680 }
2681
0da71265
MN
2682 return 0;
2683 }
115329f1 2684
4770b1b4
RT
2685 if(h->is_avc && !h->got_avcC) {
2686 int i, cnt, nalsize;
2687 unsigned char *p = avctx->extradata;
2688 if(avctx->extradata_size < 7) {
2689 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
2690 return -1;
2691 }
2692 if(*p != 1) {
2693 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
2694 return -1;
2695 }
2696 /* sps and pps in the avcC always have length coded with 2 bytes,
2697 so put a fake nal_length_size = 2 while parsing them */
2698 h->nal_length_size = 2;
2699 // Decode sps from avcC
2700 cnt = *(p+5) & 0x1f; // Number of sps
2701 p += 6;
2702 for (i = 0; i < cnt; i++) {
fead30d4 2703 nalsize = AV_RB16(p) + 2;
96b6ace2 2704 if(decode_nal_units(h, p, nalsize) < 0) {
4770b1b4
RT
2705 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
2706 return -1;
2707 }
2708 p += nalsize;
115329f1 2709 }
4770b1b4
RT
2710 // Decode pps from avcC
2711 cnt = *(p++); // Number of pps
2712 for (i = 0; i < cnt; i++) {
fead30d4 2713 nalsize = AV_RB16(p) + 2;
4770b1b4
RT
2714 if(decode_nal_units(h, p, nalsize) != nalsize) {
2715 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
2716 return -1;
2717 }
2718 p += nalsize;
115329f1 2719 }
4770b1b4
RT
2720 // Now store right nal length size, that will be use to parse all other nals
2721 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
2722 // Do not reparse avcC
2723 h->got_avcC = 1;
2724 }
2725
d464bcef 2726 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
115329f1 2727 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
0da71265 2728 return -1;
d464bcef 2729 h->got_avcC = 1;
0da71265
MN
2730 }
2731
2732 buf_index=decode_nal_units(h, buf, buf_size);
115329f1 2733 if(buf_index < 0)
0da71265
MN
2734 return -1;
2735
56c70e1d 2736 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
1c746a49 2737 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
56c70e1d
MN
2738 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
2739 return -1;
2740 }
2741
66a4b2c1
MN
2742 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
2743 Picture *out = s->current_picture_ptr;
2744 Picture *cur = s->current_picture_ptr;
44be1d64 2745 int i, pics, out_of_order, out_idx;
115329f1 2746
256299d3 2747 field_end(h);
66a4b2c1 2748
357282c6 2749 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
12d96de3
JD
2750 /* Wait for second field. */
2751 *data_size = 0;
2752
2753 } else {
b19d493f 2754 cur->interlaced_frame = 0;
b09a7c05
2755 cur->repeat_pict = 0;
2756
2757 /* Signal interlacing information externally. */
2758 /* Prioritize picture timing SEI information over used decoding process if it exists. */
70e01da3 2759
b09a7c05
2760 if(h->sps.pic_struct_present_flag){
2761 switch (h->sei_pic_struct)
2762 {
b19d493f
HY
2763 case SEI_PIC_STRUCT_FRAME:
2764 break;
2765 case SEI_PIC_STRUCT_TOP_FIELD:
2766 case SEI_PIC_STRUCT_BOTTOM_FIELD:
2767 cur->interlaced_frame = 1;
2768 break;
2769 case SEI_PIC_STRUCT_TOP_BOTTOM:
2770 case SEI_PIC_STRUCT_BOTTOM_TOP:
2771 if (FIELD_OR_MBAFF_PICTURE)
2772 cur->interlaced_frame = 1;
2773 else
2774 // try to flag soft telecine progressive
2775 cur->interlaced_frame = h->prev_interlaced_frame;
2776 break;
b09a7c05
2777 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
2778 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
2779 // Signal the possibility of telecined film externally (pic_struct 5,6)
2780 // From these hints, let the applications decide if they apply deinterlacing.
2781 cur->repeat_pict = 1;
b09a7c05
2782 break;
2783 case SEI_PIC_STRUCT_FRAME_DOUBLING:
2784 // Force progressive here, as doubling interlaced frame is a bad idea.
b09a7c05
2785 cur->repeat_pict = 2;
2786 break;
2787 case SEI_PIC_STRUCT_FRAME_TRIPLING:
b09a7c05
2788 cur->repeat_pict = 4;
2789 break;
2790 }
b19d493f
HY
2791
2792 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
2793 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
b09a7c05
2794 }else{
2795 /* Derive interlacing flag from used decoding process. */
2796 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
2797 }
b19d493f 2798 h->prev_interlaced_frame = cur->interlaced_frame;
b09a7c05
2799
2800 if (cur->field_poc[0] != cur->field_poc[1]){
2801 /* Derive top_field_first from field pocs. */
2802 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
2803 }else{
2804 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
2805 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
2806 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
2807 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
2808 cur->top_field_first = 1;
2809 else
2810 cur->top_field_first = 0;
2811 }else{
2812 /* Most likely progressive */
2813 cur->top_field_first = 0;
2814 }
2815 }
84a8596d 2816
f6e3c460 2817 //FIXME do something with unavailable reference frames
8b92b792 2818
f6e3c460 2819 /* Sort B-frames into display order */
2f944356 2820
f6e3c460
2821 if(h->sps.bitstream_restriction_flag
2822 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
2823 s->avctx->has_b_frames = h->sps.num_reorder_frames;
2824 s->low_delay = 0;
2825 }
9170e345 2826
fb19e144
MN
2827 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
2828 && !h->sps.bitstream_restriction_flag){
2829 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
2830 s->low_delay= 0;
2831 }
2832
f6e3c460
2833 pics = 0;
2834 while(h->delayed_pic[pics]) pics++;
9170e345 2835
64b9d48f 2836 assert(pics <= MAX_DELAYED_PIC_COUNT);
4e4d983e 2837
f6e3c460
2838 h->delayed_pic[pics++] = cur;
2839 if(cur->reference == 0)
2840 cur->reference = DELAYED_PIC_REF;
2f944356 2841
f6e3c460
2842 out = h->delayed_pic[0];
2843 out_idx = 0;
c173a088 2844 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
f6e3c460
2845 if(h->delayed_pic[i]->poc < out->poc){
2846 out = h->delayed_pic[i];
2847 out_idx = i;
2848 }
44be1d64
MN
2849 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
2850 h->outputed_poc= INT_MIN;
2851 out_of_order = out->poc < h->outputed_poc;
1b547aba 2852
f6e3c460
2853 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
2854 { }
2a811db2 2855 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
f6e3c460 2856 || (s->low_delay &&
44be1d64 2857 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
9701840b 2858 || cur->pict_type == FF_B_TYPE)))
f6e3c460
2859 {
2860 s->low_delay = 0;
2861 s->avctx->has_b_frames++;
f6e3c460 2862 }
f6e3c460
2863
2864 if(out_of_order || pics > s->avctx->has_b_frames){
3eaa6d0e 2865 out->reference &= ~DELAYED_PIC_REF;
f6e3c460
2866 for(i=out_idx; h->delayed_pic[i]; i++)
2867 h->delayed_pic[i] = h->delayed_pic[i+1];
2868 }
3eaa6d0e 2869 if(!out_of_order && pics > s->avctx->has_b_frames){
f6e3c460 2870 *data_size = sizeof(AVFrame);
df8a7dff 2871
44be1d64
MN
2872 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
2873 h->outputed_poc = INT_MIN;
2874 } else
67e362ca 2875 h->outputed_poc = out->poc;
f6e3c460 2876 *pict= *(AVFrame*)out;
3eaa6d0e 2877 }else{
f6e3c460 2878 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
3eaa6d0e 2879 }
12d96de3 2880 }
a4dae92b
LM
2881 }
2882
3165e258 2883 assert(pict->data[0] || !*data_size);
4e4d983e 2884 ff_print_debug_info(s, pict);
0da71265 2885//printf("out %d\n", (int)pict->data[0]);
0da71265 2886
0da71265
MN
2887 return get_consumed_bytes(s, buf_index, buf_size);
2888}
2889#if 0
2890static inline void fill_mb_avail(H264Context *h){
2891 MpegEncContext * const s = &h->s;
7bc9090a 2892 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265
MN
2893
2894 if(s->mb_y){
7bc9090a
MN
2895 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
2896 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
2897 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
0da71265
MN
2898 }else{
2899 h->mb_avail[0]=
2900 h->mb_avail[1]=
2901 h->mb_avail[2]= 0;
2902 }
2903 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
2904 h->mb_avail[4]= 1; //FIXME move out
2905 h->mb_avail[5]= 0; //FIXME move out
2906}
2907#endif
2908
07e4e3ea 2909#ifdef TEST
6bf398a0 2910#undef printf
d04d5bcd 2911#undef random
0da71265
MN
2912#define COUNT 8000
2913#define SIZE (COUNT*40)
f8a80fd6 2914int main(void){
0da71265
MN
2915 int i;
2916 uint8_t temp[SIZE];
2917 PutBitContext pb;
2918 GetBitContext gb;
2919// int int_temp[10000];
2920 DSPContext dsp;
2921 AVCodecContext avctx;
115329f1 2922
0da71265
MN
2923 dsputil_init(&dsp, &avctx);
2924
ed7debda 2925 init_put_bits(&pb, temp, SIZE);
0da71265
MN
2926 printf("testing unsigned exp golomb\n");
2927 for(i=0; i<COUNT; i++){
2928 START_TIMER
2929 set_ue_golomb(&pb, i);
2930 STOP_TIMER("set_ue_golomb");
2931 }
2932 flush_put_bits(&pb);
115329f1 2933
0da71265
MN
2934 init_get_bits(&gb, temp, 8*SIZE);
2935 for(i=0; i<COUNT; i++){
2936 int j, s;
115329f1 2937
0da71265 2938 s= show_bits(&gb, 24);
115329f1 2939
0da71265
MN
2940 START_TIMER
2941 j= get_ue_golomb(&gb);
2942 if(j != i){
755bfeab 2943 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
MN
2944// return -1;
2945 }
2946 STOP_TIMER("get_ue_golomb");
2947 }
115329f1
DB
2948
2949
c58222c5 2950 init_put_bits(&pb, temp, SIZE);
0da71265
MN
2951 printf("testing signed exp golomb\n");
2952 for(i=0; i<COUNT; i++){
2953 START_TIMER
2954 set_se_golomb(&pb, i - COUNT/2);
2955 STOP_TIMER("set_se_golomb");
2956 }
2957 flush_put_bits(&pb);
115329f1 2958
0da71265
MN
2959 init_get_bits(&gb, temp, 8*SIZE);
2960 for(i=0; i<COUNT; i++){
2961 int j, s;
115329f1 2962
0da71265 2963 s= show_bits(&gb, 24);
115329f1 2964
0da71265
MN
2965 START_TIMER
2966 j= get_se_golomb(&gb);
2967 if(j != i - COUNT/2){
755bfeab 2968 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
MN
2969// return -1;
2970 }
2971 STOP_TIMER("get_se_golomb");
2972 }
2973
6bf398a0 2974#if 0
0da71265 2975 printf("testing 4x4 (I)DCT\n");
115329f1 2976
0da71265
MN
2977 DCTELEM block[16];
2978 uint8_t src[16], ref[16];
2979 uint64_t error= 0, max_error=0;
2980
2981 for(i=0; i<COUNT; i++){
2982 int j;
2983// printf("%d %d %d\n", r1, r2, (r2-r1)*16);
2984 for(j=0; j<16; j++){
2985 ref[j]= random()%255;
2986 src[j]= random()%255;
2987 }
2988
2989 h264_diff_dct_c(block, src, ref, 4);
115329f1 2990
0da71265
MN
2991 //normalize
2992 for(j=0; j<16; j++){
2993// printf("%d ", block[j]);
2994 block[j]= block[j]*4;
2995 if(j&1) block[j]= (block[j]*4 + 2)/5;
2996 if(j&4) block[j]= (block[j]*4 + 2)/5;
2997 }
2998// printf("\n");
115329f1 2999
0fa8158d 3000 s->dsp.h264_idct_add(ref, block, 4);
0da71265
MN
3001/* for(j=0; j<16; j++){
3002 printf("%d ", ref[j]);
3003 }
3004 printf("\n");*/
115329f1 3005
0da71265 3006 for(j=0; j<16; j++){
c26abfa5 3007 int diff= FFABS(src[j] - ref[j]);
115329f1 3008
0da71265
MN
3009 error+= diff*diff;
3010 max_error= FFMAX(max_error, diff);
3011 }
3012 }
3013 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
0da71265
MN
3014 printf("testing quantizer\n");
3015 for(qp=0; qp<52; qp++){
3016 for(i=0; i<16; i++)
3017 src1_block[i]= src2_block[i]= random()%255;
115329f1 3018
0da71265 3019 }
0da71265 3020 printf("Testing NAL layer\n");
115329f1 3021
0da71265
MN
3022 uint8_t bitstream[COUNT];
3023 uint8_t nal[COUNT*2];
3024 H264Context h;
3025 memset(&h, 0, sizeof(H264Context));
115329f1 3026
0da71265
MN
3027 for(i=0; i<COUNT; i++){
3028 int zeros= i;
3029 int nal_length;
3030 int consumed;
3031 int out_length;
3032 uint8_t *out;
3033 int j;
115329f1 3034
0da71265
MN
3035 for(j=0; j<COUNT; j++){
3036 bitstream[j]= (random() % 255) + 1;
3037 }
115329f1 3038
0da71265
MN
3039 for(j=0; j<zeros; j++){
3040 int pos= random() % COUNT;
3041 while(bitstream[pos] == 0){
3042 pos++;
3043 pos %= COUNT;
3044 }
3045 bitstream[pos]=0;
3046 }
115329f1 3047
0da71265 3048 START_TIMER
115329f1 3049
0da71265
MN
3050 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
3051 if(nal_length<0){
3052 printf("encoding failed\n");
3053 return -1;
3054 }
115329f1 3055
1790a5e9 3056 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
0da71265
MN
3057
3058 STOP_TIMER("NAL")
115329f1 3059
0da71265
MN
3060 if(out_length != COUNT){
3061 printf("incorrect length %d %d\n", out_length, COUNT);
3062 return -1;
3063 }
115329f1 3064
0da71265
MN
3065 if(consumed != nal_length){
3066 printf("incorrect consumed length %d %d\n", nal_length, consumed);
3067 return -1;
3068 }
115329f1 3069
0da71265 3070 if(memcmp(bitstream, out, COUNT)){
755bfeab 3071 printf("mismatch\n");
0da71265
MN
3072 return -1;
3073 }
3074 }
6bf398a0 3075#endif
115329f1 3076
0da71265 3077 printf("Testing RBSP\n");
115329f1
DB
3078
3079
0da71265
MN
3080 return 0;
3081}
07e4e3ea 3082#endif /* TEST */
0da71265
MN
3083
3084
cbf1eae9 3085av_cold void ff_h264_free_context(H264Context *h)
0da71265 3086{
5f129a05 3087 int i;
115329f1 3088
0da71265 3089 free_tables(h); //FIXME cleanup init stuff perhaps
5f129a05
MN
3090
3091 for(i = 0; i < MAX_SPS_COUNT; i++)
3092 av_freep(h->sps_buffers + i);
3093
3094 for(i = 0; i < MAX_PPS_COUNT; i++)
3095 av_freep(h->pps_buffers + i);
15861962
RD
3096}
3097
903d58f6 3098av_cold int ff_h264_decode_end(AVCodecContext *avctx)
15861962
RD
3099{
3100 H264Context *h = avctx->priv_data;
3101 MpegEncContext *s = &h->s;
3102
3103 ff_h264_free_context(h);
5f129a05 3104
0da71265
MN
3105 MPV_common_end(s);
3106
3107// memset(h, 0, sizeof(H264Context));
115329f1 3108
0da71265
MN
3109 return 0;
3110}
3111
3112
3113AVCodec h264_decoder = {
3114 "h264",
3115 CODEC_TYPE_VIDEO,
3116 CODEC_ID_H264,
3117 sizeof(H264Context),
903d58f6 3118 ff_h264_decode_init,
0da71265 3119 NULL,
903d58f6 3120 ff_h264_decode_end,
0da71265 3121 decode_frame,
f3ba9db4 3122 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7c33ad19 3123 .flush= flush_dpb,
fe4bf374 3124 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
6026a096 3125 .pix_fmts= ff_hwaccel_pixfmt_list_420,
0da71265
MN
3126};
3127
b250f9c6 3128#if CONFIG_H264_VDPAU_DECODER
369122dd
NC
3129AVCodec h264_vdpau_decoder = {
3130 "h264_vdpau",
3131 CODEC_TYPE_VIDEO,
0d3d172f 3132 CODEC_ID_H264,
369122dd 3133 sizeof(H264Context),
903d58f6 3134 ff_h264_decode_init,
369122dd 3135 NULL,
903d58f6 3136 ff_h264_decode_end,
369122dd
NC
3137 decode_frame,
3138 CODEC_CAP_DR1 |