Always reset slice_table.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
bad5537e 23 * @file libavcodec/h264.c
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
40e5d31b 28#include "internal.h"
0da71265
MN
29#include "dsputil.h"
30#include "avcodec.h"
31#include "mpegvideo.h"
26b4fe82 32#include "h264.h"
0da71265 33#include "h264data.h"
188d3c51 34#include "h264_mvpred.h"
26b4fe82 35#include "h264_parser.h"
0da71265 36#include "golomb.h"
199436b9 37#include "mathops.h"
626464fb 38#include "rectangle.h"
369122dd 39#include "vdpau_internal.h"
0da71265 40
e5017ab8
LA
41#include "cabac.h"
42
2848ce84 43//#undef NDEBUG
0da71265
MN
44#include <assert.h>
45
d9ec210b 46static const uint8_t rem6[52]={
acd8d10f
PI
470, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
48};
49
d9ec210b 50static const uint8_t div6[52]={
acd8d10f
PI
510, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
52};
53
903d58f6 54void ff_h264_write_back_intra_pred_mode(H264Context *h){
5b0fb524 55 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
0da71265 56
662a5b23
MN
57 AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
58 mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
59 mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
60 mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
0da71265
MN
61}
62
63/**
64 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
65 */
2bedc0e8
MN
66int ff_h264_check_intra4x4_pred_mode(H264Context *h){
67 MpegEncContext * const s = &h->s;
68 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
69 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
70 int i;
71
72 if(!(h->top_samples_available&0x8000)){
73 for(i=0; i<4; i++){
74 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
75 if(status<0){
76 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
77 return -1;
78 } else if(status){
79 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
80 }
81 }
82 }
83
84 if((h->left_samples_available&0x8888)!=0x8888){
85 static const int mask[4]={0x8000,0x2000,0x80,0x20};
86 for(i=0; i<4; i++){
87 if(!(h->left_samples_available&mask[i])){
88 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
89 if(status<0){
90 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
91 return -1;
92 } else if(status){
93 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
94 }
95 }
96 }
97 }
98
99 return 0;
100} //FIXME cleanup like ff_h264_check_intra_pred_mode
101
102/**
103 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
104 */
903d58f6 105int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
0da71265
MN
106 MpegEncContext * const s = &h->s;
107 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
108 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 109
43ff0714 110 if(mode > 6U) {
5175b937 111 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 112 return -1;
5175b937 113 }
115329f1 114
0da71265
MN
115 if(!(h->top_samples_available&0x8000)){
116 mode= top[ mode ];
117 if(mode<0){
9b879566 118 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
119 return -1;
120 }
121 }
115329f1 122
d1d10e91 123 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 124 mode= left[ mode ];
d1d10e91
MN
125 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
126 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
127 }
0da71265 128 if(mode<0){
9b879566 129 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 130 return -1;
115329f1 131 }
0da71265
MN
132 }
133
134 return mode;
135}
136
1790a5e9 137const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
138 int i, si, di;
139 uint8_t *dst;
24456882 140 int bufidx;
0da71265 141
bb270c08 142// src[0]&0x80; //forbidden bit
0da71265
MN
143 h->nal_ref_idc= src[0]>>5;
144 h->nal_unit_type= src[0]&0x1F;
145
146 src++; length--;
115329f1 147#if 0
0da71265
MN
148 for(i=0; i<length; i++)
149 printf("%2X ", src[i]);
150#endif
e08715d3 151
b250f9c6
AJ
152#if HAVE_FAST_UNALIGNED
153# if HAVE_FAST_64BIT
e08715d3
MN
154# define RS 7
155 for(i=0; i+1<length; i+=9){
19769ece 156 if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
e08715d3
MN
157# else
158# define RS 3
159 for(i=0; i+1<length; i+=5){
19769ece 160 if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
e08715d3
MN
161# endif
162 continue;
163 if(i>0 && !src[i]) i--;
164 while(src[i]) i++;
165#else
166# define RS 0
0da71265
MN
167 for(i=0; i+1<length; i+=2){
168 if(src[i]) continue;
169 if(i>0 && src[i-1]==0) i--;
e08715d3 170#endif
0da71265
MN
171 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
172 if(src[i+2]!=3){
173 /* startcode, so we must be past the end */
174 length=i;
175 }
176 break;
177 }
abb27cfb 178 i-= RS;
0da71265
MN
179 }
180
181 if(i>=length-1){ //no escaped 0
182 *dst_length= length;
183 *consumed= length+1; //+1 for the header
115329f1 184 return src;
0da71265
MN
185 }
186
24456882 187 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
238ef6da 188 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 189 dst= h->rbsp_buffer[bufidx];
0da71265 190
ac658be5
FOL
191 if (dst == NULL){
192 return NULL;
193 }
194
3b66c4c5 195//printf("decoding esc\n");
593af7cd
MN
196 memcpy(dst, src, i);
197 si=di=i;
198 while(si+2<length){
0da71265 199 //remove escapes (very rare 1:2^22)
593af7cd
MN
200 if(src[si+2]>3){
201 dst[di++]= src[si++];
202 dst[di++]= src[si++];
203 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
204 if(src[si+2]==3){ //escape
205 dst[di++]= 0;
206 dst[di++]= 0;
207 si+=3;
c8470cc1 208 continue;
0da71265 209 }else //next start code
593af7cd 210 goto nsc;
0da71265
MN
211 }
212
213 dst[di++]= src[si++];
214 }
593af7cd
MN
215 while(si<length)
216 dst[di++]= src[si++];
217nsc:
0da71265 218
d4369630
AS
219 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
220
0da71265
MN
221 *dst_length= di;
222 *consumed= si + 1;//+1 for the header
90b5b51e 223//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
224 return dst;
225}
226
1790a5e9 227int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
228 int v= *src;
229 int r;
230
a9c9a240 231 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
232
233 for(r=1; r<9; r++){
234 if(v&1) return r;
235 v>>=1;
236 }
237 return 0;
238}
239
240/**
1412060e 241 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
242 * @param qp quantization parameter
243 */
239ea04c 244static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
245#define stride 16
246 int i;
247 int temp[16]; //FIXME check if this is a good idea
248 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
249 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
250
251//memset(block, 64, 2*256);
252//return;
253 for(i=0; i<4; i++){
254 const int offset= y_offset[i];
255 const int z0= block[offset+stride*0] + block[offset+stride*4];
256 const int z1= block[offset+stride*0] - block[offset+stride*4];
257 const int z2= block[offset+stride*1] - block[offset+stride*5];
258 const int z3= block[offset+stride*1] + block[offset+stride*5];
259
260 temp[4*i+0]= z0+z3;
261 temp[4*i+1]= z1+z2;
262 temp[4*i+2]= z1-z2;
263 temp[4*i+3]= z0-z3;
264 }
265
266 for(i=0; i<4; i++){
267 const int offset= x_offset[i];
268 const int z0= temp[4*0+i] + temp[4*2+i];
269 const int z1= temp[4*0+i] - temp[4*2+i];
270 const int z2= temp[4*1+i] - temp[4*3+i];
271 const int z3= temp[4*1+i] + temp[4*3+i];
272
1412060e 273 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
274 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
275 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
276 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
277 }
278}
279
e5017ab8 280#if 0
0da71265 281/**
1412060e 282 * DCT transforms the 16 dc values.
0da71265
MN
283 * @param qp quantization parameter ??? FIXME
284 */
285static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
286// const int qmul= dequant_coeff[qp][0];
287 int i;
288 int temp[16]; //FIXME check if this is a good idea
289 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
290 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
291
292 for(i=0; i<4; i++){
293 const int offset= y_offset[i];
294 const int z0= block[offset+stride*0] + block[offset+stride*4];
295 const int z1= block[offset+stride*0] - block[offset+stride*4];
296 const int z2= block[offset+stride*1] - block[offset+stride*5];
297 const int z3= block[offset+stride*1] + block[offset+stride*5];
298
299 temp[4*i+0]= z0+z3;
300 temp[4*i+1]= z1+z2;
301 temp[4*i+2]= z1-z2;
302 temp[4*i+3]= z0-z3;
303 }
304
305 for(i=0; i<4; i++){
306 const int offset= x_offset[i];
307 const int z0= temp[4*0+i] + temp[4*2+i];
308 const int z1= temp[4*0+i] - temp[4*2+i];
309 const int z2= temp[4*1+i] - temp[4*3+i];
310 const int z3= temp[4*1+i] + temp[4*3+i];
311
312 block[stride*0 +offset]= (z0 + z3)>>1;
313 block[stride*2 +offset]= (z1 + z2)>>1;
314 block[stride*8 +offset]= (z1 - z2)>>1;
315 block[stride*10+offset]= (z0 - z3)>>1;
316 }
317}
e5017ab8
LA
318#endif
319
0da71265
MN
320#undef xStride
321#undef stride
322
239ea04c 323static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
324 const int stride= 16*2;
325 const int xStride= 16;
326 int a,b,c,d,e;
327
328 a= block[stride*0 + xStride*0];
329 b= block[stride*0 + xStride*1];
330 c= block[stride*1 + xStride*0];
331 d= block[stride*1 + xStride*1];
332
333 e= a-b;
334 a= a+b;
335 b= c-d;
336 c= c+d;
337
239ea04c
LM
338 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
339 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
340 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
341 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
342}
343
e5017ab8 344#if 0
0da71265
MN
345static void chroma_dc_dct_c(DCTELEM *block){
346 const int stride= 16*2;
347 const int xStride= 16;
348 int a,b,c,d,e;
349
350 a= block[stride*0 + xStride*0];
351 b= block[stride*0 + xStride*1];
352 c= block[stride*1 + xStride*0];
353 d= block[stride*1 + xStride*1];
354
355 e= a-b;
356 a= a+b;
357 b= c-d;
358 c= c+d;
359
360 block[stride*0 + xStride*0]= (a+c);
361 block[stride*0 + xStride*1]= (e+b);
362 block[stride*1 + xStride*0]= (a-c);
363 block[stride*1 + xStride*1]= (e-b);
364}
e5017ab8 365#endif
0da71265 366
0da71265
MN
367static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
368 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
369 int src_x_offset, int src_y_offset,
370 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
371 MpegEncContext * const s = &h->s;
372 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 373 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 374 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
375 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
376 uint8_t * src_cb, * src_cr;
377 int extra_width= h->emu_edge_width;
378 int extra_height= h->emu_edge_height;
0da71265
MN
379 int emu=0;
380 const int full_mx= mx>>2;
381 const int full_my= my>>2;
fbd312fd 382 const int pic_width = 16*s->mb_width;
0d43dd8c 383 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 384
0da71265
MN
385 if(mx&7) extra_width -= 3;
386 if(my&7) extra_height -= 3;
115329f1
DB
387
388 if( full_mx < 0-extra_width
389 || full_my < 0-extra_height
390 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 391 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
392 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
393 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
394 emu=1;
395 }
115329f1 396
5d18eaad 397 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 398 if(!square){
5d18eaad 399 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 400 }
115329f1 401
49fb20cb 402 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 403
0d43dd8c 404 if(MB_FIELD){
5d18eaad 405 // chroma offset when predicting from a field of opposite parity
2143b118 406 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
407 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
408 }
409 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
410 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
411
0da71265 412 if(emu){
5d18eaad 413 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
414 src_cb= s->edge_emu_buffer;
415 }
5d18eaad 416 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
417
418 if(emu){
5d18eaad 419 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
420 src_cr= s->edge_emu_buffer;
421 }
5d18eaad 422 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
423}
424
9f2d1b4f 425static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
426 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
427 int x_offset, int y_offset,
428 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
429 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
430 int list0, int list1){
431 MpegEncContext * const s = &h->s;
432 qpel_mc_func *qpix_op= qpix_put;
433 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 434
5d18eaad
LM
435 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
436 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
437 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 438 x_offset += 8*s->mb_x;
0d43dd8c 439 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 440
0da71265 441 if(list0){
1924f3ce 442 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
443 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
444 dest_y, dest_cb, dest_cr, x_offset, y_offset,
445 qpix_op, chroma_op);
446
447 qpix_op= qpix_avg;
448 chroma_op= chroma_avg;
449 }
450
451 if(list1){
1924f3ce 452 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
453 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
454 dest_y, dest_cb, dest_cr, x_offset, y_offset,
455 qpix_op, chroma_op);
456 }
457}
458
9f2d1b4f
LM
459static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
460 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
461 int x_offset, int y_offset,
462 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
463 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
464 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
465 int list0, int list1){
466 MpegEncContext * const s = &h->s;
467
5d18eaad
LM
468 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
469 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
470 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 471 x_offset += 8*s->mb_x;
0d43dd8c 472 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 473
9f2d1b4f
LM
474 if(list0 && list1){
475 /* don't optimize for luma-only case, since B-frames usually
476 * use implicit weights => chroma too. */
477 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
478 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
479 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
480 int refn0 = h->ref_cache[0][ scan8[n] ];
481 int refn1 = h->ref_cache[1][ scan8[n] ];
482
483 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
484 dest_y, dest_cb, dest_cr,
485 x_offset, y_offset, qpix_put, chroma_put);
486 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
487 tmp_y, tmp_cb, tmp_cr,
488 x_offset, y_offset, qpix_put, chroma_put);
489
490 if(h->use_weight == 2){
491 int weight0 = h->implicit_weight[refn0][refn1];
492 int weight1 = 64 - weight0;
5d18eaad
LM
493 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
494 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
495 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 496 }else{
5d18eaad 497 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
3d9137c8
MN
498 h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
499 h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
5d18eaad 500 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8
MN
501 h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
502 h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
5d18eaad 503 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8
MN
504 h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
505 h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
9f2d1b4f
LM
506 }
507 }else{
508 int list = list1 ? 1 : 0;
509 int refn = h->ref_cache[list][ scan8[n] ];
510 Picture *ref= &h->ref_list[list][refn];
511 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
512 dest_y, dest_cb, dest_cr, x_offset, y_offset,
513 qpix_put, chroma_put);
514
5d18eaad 515 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3d9137c8 516 h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
9f2d1b4f 517 if(h->use_weight_chroma){
5d18eaad 518 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8 519 h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
5d18eaad 520 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8 521 h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
9f2d1b4f
LM
522 }
523 }
524}
525
526static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
527 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
528 int x_offset, int y_offset,
529 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
530 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 531 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
532 int list0, int list1){
533 if((h->use_weight==2 && list0 && list1
534 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
535 || h->use_weight==1)
536 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
537 x_offset, y_offset, qpix_put, chroma_put,
538 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
539 else
540 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
541 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
542}
543
513fbd8e
LM
544static inline void prefetch_motion(H264Context *h, int list){
545 /* fetch pixels for estimated mv 4 macroblocks ahead
546 * optimized for 64byte cache lines */
547 MpegEncContext * const s = &h->s;
548 const int refn = h->ref_cache[list][scan8[0]];
549 if(refn >= 0){
550 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
551 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
552 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 553 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
554 s->dsp.prefetch(src[0]+off, s->linesize, 4);
555 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
556 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
557 }
558}
559
0da71265
MN
560static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
561 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
562 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
563 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 564 MpegEncContext * const s = &h->s;
64514ee8 565 const int mb_xy= h->mb_xy;
0da71265 566 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 567
0da71265 568 assert(IS_INTER(mb_type));
115329f1 569
513fbd8e
LM
570 prefetch_motion(h, 0);
571
0da71265
MN
572 if(IS_16X16(mb_type)){
573 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
574 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
7231ccf4 575 weight_op, weight_avg,
0da71265
MN
576 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
577 }else if(IS_16X8(mb_type)){
578 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
579 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 580 &weight_op[1], &weight_avg[1],
0da71265
MN
581 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
582 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
583 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 584 &weight_op[1], &weight_avg[1],
0da71265
MN
585 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
586 }else if(IS_8X16(mb_type)){
5d18eaad 587 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 588 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 589 &weight_op[2], &weight_avg[2],
0da71265 590 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 591 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 592 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 593 &weight_op[2], &weight_avg[2],
0da71265
MN
594 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
595 }else{
596 int i;
115329f1 597
0da71265
MN
598 assert(IS_8X8(mb_type));
599
600 for(i=0; i<4; i++){
601 const int sub_mb_type= h->sub_mb_type[i];
602 const int n= 4*i;
603 int x_offset= (i&1)<<2;
604 int y_offset= (i&2)<<1;
605
606 if(IS_SUB_8X8(sub_mb_type)){
607 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
608 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 609 &weight_op[3], &weight_avg[3],
0da71265
MN
610 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
611 }else if(IS_SUB_8X4(sub_mb_type)){
612 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
613 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 614 &weight_op[4], &weight_avg[4],
0da71265
MN
615 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
616 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
617 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 618 &weight_op[4], &weight_avg[4],
0da71265
MN
619 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
620 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 621 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 622 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 623 &weight_op[5], &weight_avg[5],
0da71265 624 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 625 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 626 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 627 &weight_op[5], &weight_avg[5],
0da71265
MN
628 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
629 }else{
630 int j;
631 assert(IS_SUB_4X4(sub_mb_type));
632 for(j=0; j<4; j++){
633 int sub_x_offset= x_offset + 2*(j&1);
634 int sub_y_offset= y_offset + (j&2);
635 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
636 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 637 &weight_op[6], &weight_avg[6],
0da71265
MN
638 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
639 }
640 }
641 }
642 }
513fbd8e
LM
643
644 prefetch_motion(h, 1);
0da71265
MN
645}
646
0da71265 647
0da71265 648static void free_tables(H264Context *h){
7978debd 649 int i;
afebe2f7 650 H264Context *hx;
0da71265 651 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
652 av_freep(&h->chroma_pred_mode_table);
653 av_freep(&h->cbp_table);
9e528114
LA
654 av_freep(&h->mvd_table[0]);
655 av_freep(&h->mvd_table[1]);
5ad984c9 656 av_freep(&h->direct_table);
0da71265
MN
657 av_freep(&h->non_zero_count);
658 av_freep(&h->slice_table_base);
659 h->slice_table= NULL;
c988f975 660 av_freep(&h->list_counts);
e5017ab8 661
0da71265 662 av_freep(&h->mb2b_xy);
d43c1922 663 av_freep(&h->mb2br_xy);
9f2d1b4f 664
6752dd5a 665 for(i = 0; i < MAX_THREADS; i++) {
afebe2f7
666 hx = h->thread_context[i];
667 if(!hx) continue;
668 av_freep(&hx->top_borders[1]);
669 av_freep(&hx->top_borders[0]);
670 av_freep(&hx->s.obmc_scratchpad);
d2d5e067
AS
671 av_freep(&hx->rbsp_buffer[1]);
672 av_freep(&hx->rbsp_buffer[0]);
eda4ea4e
MS
673 hx->rbsp_buffer_size[0] = 0;
674 hx->rbsp_buffer_size[1] = 0;
d2d5e067 675 if (i) av_freep(&h->thread_context[i]);
afebe2f7 676 }
0da71265
MN
677}
678
239ea04c
LM
679static void init_dequant8_coeff_table(H264Context *h){
680 int i,q,x;
548a1c8a 681 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
682 h->dequant8_coeff[0] = h->dequant8_buffer[0];
683 h->dequant8_coeff[1] = h->dequant8_buffer[1];
684
685 for(i=0; i<2; i++ ){
686 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
687 h->dequant8_coeff[1] = h->dequant8_buffer[0];
688 break;
689 }
690
691 for(q=0; q<52; q++){
d9ec210b
DP
692 int shift = div6[q];
693 int idx = rem6[q];
239ea04c 694 for(x=0; x<64; x++)
548a1c8a
LM
695 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
696 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
697 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
698 }
699 }
700}
701
702static void init_dequant4_coeff_table(H264Context *h){
703 int i,j,q,x;
ab2e3e2c 704 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
705 for(i=0; i<6; i++ ){
706 h->dequant4_coeff[i] = h->dequant4_buffer[i];
707 for(j=0; j<i; j++){
708 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
709 h->dequant4_coeff[i] = h->dequant4_buffer[j];
710 break;
711 }
712 }
713 if(j<i)
714 continue;
715
716 for(q=0; q<52; q++){
d9ec210b
DP
717 int shift = div6[q] + 2;
718 int idx = rem6[q];
239ea04c 719 for(x=0; x<16; x++)
ab2e3e2c
LM
720 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
721 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
722 h->pps.scaling_matrix4[i][x]) << shift;
723 }
724 }
725}
726
727static void init_dequant_tables(H264Context *h){
728 int i,x;
729 init_dequant4_coeff_table(h);
730 if(h->pps.transform_8x8_mode)
731 init_dequant8_coeff_table(h);
732 if(h->sps.transform_bypass){
733 for(i=0; i<6; i++)
734 for(x=0; x<16; x++)
735 h->dequant4_coeff[i][0][x] = 1<<6;
736 if(h->pps.transform_8x8_mode)
737 for(i=0; i<2; i++)
738 for(x=0; x<64; x++)
739 h->dequant8_coeff[i][0][x] = 1<<6;
740 }
741}
742
743
903d58f6 744int ff_h264_alloc_tables(H264Context *h){
0da71265 745 MpegEncContext * const s = &h->s;
7bc9090a 746 const int big_mb_num= s->mb_stride * (s->mb_height+1);
145061a1 747 const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
239ea04c 748 int x,y;
0da71265 749
145061a1 750 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail)
e5017ab8 751
c988f975 752 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail)
d31dbec3
RP
753 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
754 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
0da71265 755
d31dbec3 756 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
145061a1
MN
757 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
758 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
36b54927 759 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
c988f975 760 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
e5017ab8 761
b735aeea 762 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 763 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 764
d31dbec3 765 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
d43c1922 766 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
0da71265
MN
767 for(y=0; y<s->mb_height; y++){
768 for(x=0; x<s->mb_width; x++){
7bc9090a 769 const int mb_xy= x + y*s->mb_stride;
0da71265 770 const int b_xy = 4*x + 4*y*h->b_stride;
115329f1 771
0da71265 772 h->mb2b_xy [mb_xy]= b_xy;
e1c88a21 773 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
0da71265
MN
774 }
775 }
9f2d1b4f 776
9c6221ae
GV
777 s->obmc_scratchpad = NULL;
778
56edbd81
LM
779 if(!h->dequant4_coeff[0])
780 init_dequant_tables(h);
781
0da71265
MN
782 return 0;
783fail:
784 free_tables(h);
785 return -1;
786}
787
afebe2f7
788/**
789 * Mimic alloc_tables(), but for every context thread.
790 */
145061a1
MN
791static void clone_tables(H264Context *dst, H264Context *src, int i){
792 MpegEncContext * const s = &src->s;
793 dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
afebe2f7
794 dst->non_zero_count = src->non_zero_count;
795 dst->slice_table = src->slice_table;
796 dst->cbp_table = src->cbp_table;
797 dst->mb2b_xy = src->mb2b_xy;
d43c1922 798 dst->mb2br_xy = src->mb2br_xy;
afebe2f7 799 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
145061a1
MN
800 dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride;
801 dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride;
afebe2f7 802 dst->direct_table = src->direct_table;
fb823b77 803 dst->list_counts = src->list_counts;
afebe2f7 804
afebe2f7
805 dst->s.obmc_scratchpad = NULL;
806 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
807}
808
809/**
810 * Init context
811 * Allocate buffers which are not shared amongst multiple threads.
812 */
813static int context_init(H264Context *h){
d31dbec3
RP
814 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
815 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
afebe2f7 816
145061a1
MN
817 h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
818 h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
819
afebe2f7
820 return 0;
821fail:
822 return -1; // free_tables will clean up for us
823}
824
9855b2e3
MN
825static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
826
98a6fff9 827static av_cold void common_init(H264Context *h){
0da71265 828 MpegEncContext * const s = &h->s;
0da71265
MN
829
830 s->width = s->avctx->width;
831 s->height = s->avctx->height;
832 s->codec_id= s->avctx->codec->id;
115329f1 833
c92a30bb 834 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 835
239ea04c 836 h->dequant_coeff_pps= -1;
9a41c2c7 837 s->unrestricted_mv=1;
0da71265 838 s->decode=1; //FIXME
56edbd81 839
a5805aa9
MN
840 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
841
56edbd81
LM
842 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
843 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
844}
845
903d58f6 846av_cold int ff_h264_decode_init(AVCodecContext *avctx){
0da71265
MN
847 H264Context *h= avctx->priv_data;
848 MpegEncContext * const s = &h->s;
849
3edcacde 850 MPV_decode_defaults(s);
115329f1 851
0da71265
MN
852 s->avctx = avctx;
853 common_init(h);
854
855 s->out_format = FMT_H264;
856 s->workaround_bugs= avctx->workaround_bugs;
857
858 // set defaults
0da71265 859// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 860 s->quarter_sample = 1;
47cd974a 861 if(!avctx->has_b_frames)
0da71265 862 s->low_delay= 1;
7a9dba3c 863
580a7465 864 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
0da71265 865
e1e94902 866 ff_h264_decode_init_vlc();
115329f1 867
afebe2f7 868 h->thread_context[0] = h;
18c7be65 869 h->outputed_poc = INT_MIN;
e4b8f1fa 870 h->prev_poc_msb= 1<<16;
055a6aa7 871 h->x264_build = -1;
9c095463 872 ff_h264_reset_sei(h);
efd8c1f6
MN
873 if(avctx->codec_id == CODEC_ID_H264){
874 if(avctx->ticks_per_frame == 1){
875 s->avctx->time_base.den *=2;
876 }
19df37a8 877 avctx->ticks_per_frame = 2;
efd8c1f6 878 }
9855b2e3
MN
879
880 if(avctx->extradata_size > 0 && avctx->extradata && *(char *)avctx->extradata == 1){
881 int i, cnt, nalsize;
882 unsigned char *p = avctx->extradata;
883
884 h->is_avc = 1;
885
886 if(avctx->extradata_size < 7) {
887 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
888 return -1;
889 }
890 /* sps and pps in the avcC always have length coded with 2 bytes,
891 so put a fake nal_length_size = 2 while parsing them */
892 h->nal_length_size = 2;
893 // Decode sps from avcC
894 cnt = *(p+5) & 0x1f; // Number of sps
895 p += 6;
896 for (i = 0; i < cnt; i++) {
897 nalsize = AV_RB16(p) + 2;
898 if(decode_nal_units(h, p, nalsize) < 0) {
899 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
900 return -1;
901 }
902 p += nalsize;
903 }
904 // Decode pps from avcC
905 cnt = *(p++); // Number of pps
906 for (i = 0; i < cnt; i++) {
907 nalsize = AV_RB16(p) + 2;
908 if(decode_nal_units(h, p, nalsize) != nalsize) {
909 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
910 return -1;
911 }
912 p += nalsize;
913 }
914 // Now store right nal length size, that will be use to parse all other nals
915 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
916 } else {
917 h->is_avc = 0;
918 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
919 return -1;
920 }
db8cb47d
MN
921 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
922 s->avctx->has_b_frames = h->sps.num_reorder_frames;
923 s->low_delay = 0;
924 }
9855b2e3 925
0da71265
MN
926 return 0;
927}
928
903d58f6 929int ff_h264_frame_start(H264Context *h){
0da71265
MN
930 MpegEncContext * const s = &h->s;
931 int i;
932
af8aa846
MN
933 if(MPV_frame_start(s, s->avctx) < 0)
934 return -1;
0da71265 935 ff_er_frame_start(s);
3a22d7fa
JD
936 /*
937 * MPV_frame_start uses pict_type to derive key_frame.
938 * This is incorrect for H.264; IDR markings must be used.
1412060e 939 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
940 * See decode_nal_units().
941 */
942 s->current_picture_ptr->key_frame= 0;
c173a088 943 s->current_picture_ptr->mmco_reset= 0;
0da71265
MN
944
945 assert(s->linesize && s->uvlinesize);
946
947 for(i=0; i<16; i++){
948 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 949 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
950 }
951 for(i=0; i<4; i++){
952 h->block_offset[16+i]=
953 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
954 h->block_offset[24+16+i]=
955 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
956 }
957
934b0821
LM
958 /* can't be in alloc_tables because linesize isn't known there.
959 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
960 for(i = 0; i < s->avctx->thread_count; i++)
961 if(!h->thread_context[i]->s.obmc_scratchpad)
962 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad 963
2ce1c2e0 964 /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
b735aeea 965 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 966
0da71265 967// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 968
1412060e 969 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
970 // that if we break out due to an error it can be released automatically
971 // in the next MPV_frame_start().
972 // SVQ3 as well as most other codecs have only last/next/current and thus
973 // get released even with set reference, besides SVQ3 and others do not
974 // mark frames as reference later "naturally".
975 if(s->codec_id != CODEC_ID_SVQ3)
976 s->current_picture_ptr->reference= 0;
357282c6
MN
977
978 s->current_picture_ptr->field_poc[0]=
979 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 980 assert(s->current_picture_ptr->long_ref==0);
357282c6 981
af8aa846 982 return 0;
0da71265
MN
983}
984
93cc10fa 985static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e 986 MpegEncContext * const s = &h->s;
0b69d625 987 uint8_t *top_border;
5f7f9719 988 int top_idx = 1;
115329f1 989
53c05b1e
MN
990 src_y -= linesize;
991 src_cb -= uvlinesize;
992 src_cr -= uvlinesize;
993
5f7f9719
MN
994 if(!simple && FRAME_MBAFF){
995 if(s->mb_y&1){
5f7f9719 996 if(!MB_MBAFF){
0b69d625
AS
997 top_border = h->top_borders[0][s->mb_x];
998 AV_COPY128(top_border, src_y + 15*linesize);
49fb20cb 999 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
1000 AV_COPY64(top_border+16, src_cb+7*uvlinesize);
1001 AV_COPY64(top_border+24, src_cr+7*uvlinesize);
5f7f9719
MN
1002 }
1003 }
c988f975
MN
1004 }else if(MB_MBAFF){
1005 top_idx = 0;
1006 }else
1007 return;
5f7f9719
MN
1008 }
1009
0b69d625 1010 top_border = h->top_borders[top_idx][s->mb_x];
3b66c4c5 1011 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 1012 // and the line above the bottom macroblock
0b69d625 1013 AV_COPY128(top_border, src_y + 16*linesize);
53c05b1e 1014
49fb20cb 1015 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
1016 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
1017 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
53c05b1e
MN
1018 }
1019}
1020
93cc10fa 1021static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e 1022 MpegEncContext * const s = &h->s;
b69378e2
1023 int deblock_left;
1024 int deblock_top;
5f7f9719 1025 int top_idx = 1;
1e4f1c56
AS
1026 uint8_t *top_border_m1;
1027 uint8_t *top_border;
5f7f9719
MN
1028
1029 if(!simple && FRAME_MBAFF){
1030 if(s->mb_y&1){
c988f975
MN
1031 if(!MB_MBAFF)
1032 return;
5f7f9719 1033 }else{
5f7f9719
MN
1034 top_idx = MB_MBAFF ? 0 : 1;
1035 }
5f7f9719 1036 }
b69378e2
1037
1038 if(h->deblocking_filter == 2) {
024bf79f
MN
1039 deblock_left = h->left_type[0];
1040 deblock_top = h->top_type;
b69378e2
1041 } else {
1042 deblock_left = (s->mb_x > 0);
6c805007 1043 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 1044 }
53c05b1e
MN
1045
1046 src_y -= linesize + 1;
1047 src_cb -= uvlinesize + 1;
1048 src_cr -= uvlinesize + 1;
1049
1e4f1c56
AS
1050 top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
1051 top_border = h->top_borders[top_idx][s->mb_x];
1052
0b69d625
AS
1053#define XCHG(a,b,xchg)\
1054if (xchg) AV_SWAP64(b,a);\
1055else AV_COPY64(b,a);
d89dc06a 1056
d89dc06a 1057 if(deblock_top){
c988f975 1058 if(deblock_left){
0b69d625 1059 XCHG(top_border_m1+8, src_y -7, 1);
c988f975 1060 }
0b69d625
AS
1061 XCHG(top_border+0, src_y +1, xchg);
1062 XCHG(top_border+8, src_y +9, 1);
cad4368a 1063 if(s->mb_x+1 < s->mb_width){
0b69d625 1064 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
43efd19a 1065 }
53c05b1e 1066 }
53c05b1e 1067
49fb20cb 1068 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 1069 if(deblock_top){
c988f975 1070 if(deblock_left){
0b69d625
AS
1071 XCHG(top_border_m1+16, src_cb -7, 1);
1072 XCHG(top_border_m1+24, src_cr -7, 1);
c988f975 1073 }
0b69d625
AS
1074 XCHG(top_border+16, src_cb+1, 1);
1075 XCHG(top_border+24, src_cr+1, 1);
53c05b1e 1076 }
53c05b1e
MN
1077 }
1078}
1079
5a6a6cc7 1080static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
1081 MpegEncContext * const s = &h->s;
1082 const int mb_x= s->mb_x;
1083 const int mb_y= s->mb_y;
64514ee8 1084 const int mb_xy= h->mb_xy;
0da71265
MN
1085 const int mb_type= s->current_picture.mb_type[mb_xy];
1086 uint8_t *dest_y, *dest_cb, *dest_cr;
1087 int linesize, uvlinesize /*dct_offset*/;
1088 int i;
6867a90b 1089 int *block_offset = &h->block_offset[0];
41e4055b 1090 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed 1091 /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb 1092 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca 1093 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 1094 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 1095
6120a343
MN
1096 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
1097 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
1098 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 1099
a957c27b
LM
1100 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1101 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
1102
c988f975
MN
1103 h->list_counts[mb_xy]= h->list_count;
1104
bd91fee3 1105 if (!simple && MB_FIELD) {
5d18eaad
LM
1106 linesize = h->mb_linesize = s->linesize * 2;
1107 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 1108 block_offset = &h->block_offset[24];
1412060e 1109 if(mb_y&1){ //FIXME move out of this function?
0da71265 1110 dest_y -= s->linesize*15;
6867a90b
LLL
1111 dest_cb-= s->uvlinesize*7;
1112 dest_cr-= s->uvlinesize*7;
0da71265 1113 }
5d18eaad
LM
1114 if(FRAME_MBAFF) {
1115 int list;
3425501d 1116 for(list=0; list<h->list_count; list++){
5d18eaad
LM
1117 if(!USES_LIST(mb_type, list))
1118 continue;
1119 if(IS_16X16(mb_type)){
1120 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 1121 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
1122 }else{
1123 for(i=0; i<16; i+=4){
5d18eaad
LM
1124 int ref = h->ref_cache[list][scan8[i]];
1125 if(ref >= 0)
1710856c 1126 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
1127 }
1128 }
1129 }
1130 }
0da71265 1131 } else {
5d18eaad
LM
1132 linesize = h->mb_linesize = s->linesize;
1133 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
1134// dct_offset = s->linesize * 16;
1135 }
115329f1 1136
bd91fee3 1137 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
1138 for (i=0; i<16; i++) {
1139 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 1140 }
c1708e8d
MN
1141 for (i=0; i<8; i++) {
1142 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
1143 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 1144 }
e7e09b49
LLL
1145 } else {
1146 if(IS_INTRA(mb_type)){
5f7f9719 1147 if(h->deblocking_filter)
93cc10fa 1148 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 1149
49fb20cb 1150 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
1151 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
1152 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 1153 }
0da71265 1154
e7e09b49 1155 if(IS_INTRA4x4(mb_type)){
bd91fee3 1156 if(simple || !s->encoding){
43efd19a 1157 if(IS_8x8DCT(mb_type)){
1eb96035
MN
1158 if(transform_bypass){
1159 idct_dc_add =
1160 idct_add = s->dsp.add_pixels8;
dae006d7 1161 }else{
1eb96035
MN
1162 idct_dc_add = s->dsp.h264_idct8_dc_add;
1163 idct_add = s->dsp.h264_idct8_add;
1164 }
43efd19a
LM
1165 for(i=0; i<16; i+=4){
1166 uint8_t * const ptr= dest_y + block_offset[i];
1167 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
1168 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1169 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
1170 }else{
ac0623b2
MN
1171 const int nnz = h->non_zero_count_cache[ scan8[i] ];
1172 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1173 (h->topright_samples_available<<i)&0x4000, linesize);
1174 if(nnz){
1175 if(nnz == 1 && h->mb[i*16])
1176 idct_dc_add(ptr, h->mb + i*16, linesize);
1177 else
1178 idct_add (ptr, h->mb + i*16, linesize);
1179 }
41e4055b 1180 }
43efd19a 1181 }
1eb96035
MN
1182 }else{
1183 if(transform_bypass){
1184 idct_dc_add =
1185 idct_add = s->dsp.add_pixels4;
1186 }else{
1187 idct_dc_add = s->dsp.h264_idct_dc_add;
1188 idct_add = s->dsp.h264_idct_add;
1189 }
aebb5d6d
MN
1190 for(i=0; i<16; i++){
1191 uint8_t * const ptr= dest_y + block_offset[i];
1192 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 1193
aebb5d6d
MN
1194 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1195 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
1196 }else{
1197 uint8_t *topright;
1198 int nnz, tr;
1199 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
1200 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1201 assert(mb_y || linesize <= block_offset[i]);
1202 if(!topright_avail){
1203 tr= ptr[3 - linesize]*0x01010101;
1204 topright= (uint8_t*) &tr;
1205 }else
1206 topright= ptr + 4 - linesize;
ac0623b2 1207 }else
aebb5d6d
MN
1208 topright= NULL;
1209
1210 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
1211 nnz = h->non_zero_count_cache[ scan8[i] ];
1212 if(nnz){
1213 if(is_h264){
1214 if(nnz == 1 && h->mb[i*16])
1215 idct_dc_add(ptr, h->mb + i*16, linesize);
1216 else
1217 idct_add (ptr, h->mb + i*16, linesize);
1218 }else
881b5b80 1219 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
aebb5d6d 1220 }
ac0623b2 1221 }
41e4055b 1222 }
8b82a956 1223 }
0da71265 1224 }
e7e09b49 1225 }else{
c92a30bb 1226 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 1227 if(is_h264){
36940eca 1228 if(!transform_bypass)
93f0c0a4 1229 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 1230 }else
881b5b80 1231 ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 1232 }
5f7f9719 1233 if(h->deblocking_filter)
93cc10fa 1234 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 1235 }else if(is_h264){
e7e09b49 1236 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
1237 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
1238 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 1239 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 1240 }
e7e09b49
LLL
1241
1242
1243 if(!IS_INTRA4x4(mb_type)){
bd91fee3 1244 if(is_h264){
ef9d1d15 1245 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
1246 if(transform_bypass){
1247 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
1248 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
1249 }else{
1250 for(i=0; i<16; i++){
1251 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 1252 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1253 }
2fd1f0e0
MN
1254 }
1255 }else{
1256 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 1257 }
49c084a7 1258 }else if(h->cbp&15){
2fd1f0e0 1259 if(transform_bypass){
0a8ca22f 1260 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 1261 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 1262 for(i=0; i<16; i+=di){
62bc966f 1263 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 1264 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1265 }
ef9d1d15 1266 }
2fd1f0e0
MN
1267 }else{
1268 if(IS_8x8DCT(mb_type)){
1269 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1270 }else{
1271 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1272 }
1273 }
4704097a 1274 }
e7e09b49
LLL
1275 }else{
1276 for(i=0; i<16; i++){
1277 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 1278 uint8_t * const ptr= dest_y + block_offset[i];
881b5b80 1279 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
e7e09b49 1280 }
4704097a 1281 }
0da71265
MN
1282 }
1283 }
0da71265 1284
49fb20cb 1285 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
1286 uint8_t *dest[2] = {dest_cb, dest_cr};
1287 if(transform_bypass){
96465b90
MN
1288 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
1289 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
1290 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
1291 }else{
c25ac15a 1292 idct_add = s->dsp.add_pixels4;
96465b90
MN
1293 for(i=16; i<16+8; i++){
1294 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1295 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1296 }
1297 }
ef9d1d15 1298 }else{
4691a77d
1299 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1300 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 1301 if(is_h264){
c25ac15a
MN
1302 idct_add = s->dsp.h264_idct_add;
1303 idct_dc_add = s->dsp.h264_idct_dc_add;
ac0623b2
MN
1304 for(i=16; i<16+8; i++){
1305 if(h->non_zero_count_cache[ scan8[i] ])
1306 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1307 else if(h->mb[i*16])
1308 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1309 }
aebb5d6d
MN
1310 }else{
1311 for(i=16; i<16+8; i++){
1312 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
1313 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
881b5b80 1314 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2);
aebb5d6d 1315 }
e7e09b49 1316 }
4704097a 1317 }
0da71265
MN
1318 }
1319 }
1320 }
c212fb0c
MN
1321 if(h->cbp || IS_INTRA(mb_type))
1322 s->dsp.clear_blocks(h->mb);
0da71265
MN
1323}
1324
0da71265 1325/**
bd91fee3
AS
1326 * Process a macroblock; this case avoids checks for expensive uncommon cases.
1327 */
1328static void hl_decode_mb_simple(H264Context *h){
1329 hl_decode_mb_internal(h, 1);
1330}
1331
1332/**
1333 * Process a macroblock; this handles edge cases, such as interlacing.
1334 */
1335static void av_noinline hl_decode_mb_complex(H264Context *h){
1336 hl_decode_mb_internal(h, 0);
1337}
1338
903d58f6 1339void ff_h264_hl_decode_mb(H264Context *h){
bd91fee3 1340 MpegEncContext * const s = &h->s;
64514ee8 1341 const int mb_xy= h->mb_xy;
bd91fee3 1342 const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb 1343 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 1344
bd91fee3
AS
1345 if (is_complex)
1346 hl_decode_mb_complex(h);
1347 else hl_decode_mb_simple(h);
1348}
1349
0da71265
MN
1350static int pred_weight_table(H264Context *h){
1351 MpegEncContext * const s = &h->s;
1352 int list, i;
9f2d1b4f 1353 int luma_def, chroma_def;
115329f1 1354
9f2d1b4f
LM
1355 h->use_weight= 0;
1356 h->use_weight_chroma= 0;
0da71265
MN
1357 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
1358 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
1359 luma_def = 1<<h->luma_log2_weight_denom;
1360 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
1361
1362 for(list=0; list<2; list++){
cb99c652
GB
1363 h->luma_weight_flag[list] = 0;
1364 h->chroma_weight_flag[list] = 0;
0da71265
MN
1365 for(i=0; i<h->ref_count[list]; i++){
1366 int luma_weight_flag, chroma_weight_flag;
115329f1 1367
0da71265
MN
1368 luma_weight_flag= get_bits1(&s->gb);
1369 if(luma_weight_flag){
3d9137c8
MN
1370 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
1371 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
1372 if( h->luma_weight[i][list][0] != luma_def
1373 || h->luma_weight[i][list][1] != 0) {
9f2d1b4f 1374 h->use_weight= 1;
cb99c652
GB
1375 h->luma_weight_flag[list]= 1;
1376 }
9f2d1b4f 1377 }else{
3d9137c8
MN
1378 h->luma_weight[i][list][0]= luma_def;
1379 h->luma_weight[i][list][1]= 0;
0da71265
MN
1380 }
1381
0af6967e 1382 if(CHROMA){
fef744d4
MN
1383 chroma_weight_flag= get_bits1(&s->gb);
1384 if(chroma_weight_flag){
1385 int j;
1386 for(j=0; j<2; j++){
3d9137c8
MN
1387 h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
1388 h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
1389 if( h->chroma_weight[i][list][j][0] != chroma_def
1390 || h->chroma_weight[i][list][j][1] != 0) {
fef744d4 1391 h->use_weight_chroma= 1;
cb99c652
GB
1392 h->chroma_weight_flag[list]= 1;
1393 }
fef744d4
MN
1394 }
1395 }else{
1396 int j;
1397 for(j=0; j<2; j++){
3d9137c8
MN
1398 h->chroma_weight[i][list][j][0]= chroma_def;
1399 h->chroma_weight[i][list][j][1]= 0;
fef744d4 1400 }
0da71265
MN
1401 }
1402 }
1403 }
9f5c1037 1404 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 1405 }
9f2d1b4f 1406 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
1407 return 0;
1408}
1409
9f2d1b4f
LM
1410static void implicit_weight_table(H264Context *h){
1411 MpegEncContext * const s = &h->s;
cb99c652 1412 int ref0, ref1, i;
9f2d1b4f
LM
1413 int cur_poc = s->current_picture_ptr->poc;
1414
ce09f927
GB
1415 for (i = 0; i < 2; i++) {
1416 h->luma_weight_flag[i] = 0;
1417 h->chroma_weight_flag[i] = 0;
1418 }
1419
9f2d1b4f
LM
1420 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
1421 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
1422 h->use_weight= 0;
1423 h->use_weight_chroma= 0;
1424 return;
1425 }
1426
1427 h->use_weight= 2;
1428 h->use_weight_chroma= 2;
1429 h->luma_log2_weight_denom= 5;
1430 h->chroma_log2_weight_denom= 5;
1431
9f2d1b4f
LM
1432 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
1433 int poc0 = h->ref_list[0][ref0].poc;
1434 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 1435 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 1436 int td = av_clip(poc1 - poc0, -128, 127);
72f86ec0 1437 h->implicit_weight[ref0][ref1] = 32;
9f2d1b4f 1438 if(td){
f66e4f5f 1439 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 1440 int tx = (16384 + (FFABS(td) >> 1)) / td;
72f86ec0
MN
1441 int dist_scale_factor = (tb*tx + 32) >> 8;
1442 if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
9f2d1b4f 1443 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
72f86ec0 1444 }
9f2d1b4f
LM
1445 }
1446 }
1447}
1448
8fd57a66 1449/**
5175b937 1450 * instantaneous decoder refresh.
0da71265
MN
1451 */
1452static void idr(H264Context *h){
ea6f00c4 1453 ff_h264_remove_all_refs(h);
a149c1a5 1454 h->prev_frame_num= 0;
80f8e035
MN
1455 h->prev_frame_num_offset= 0;
1456 h->prev_poc_msb=
1457 h->prev_poc_lsb= 0;
0da71265
MN
1458}
1459
7c33ad19
LM
1460/* forget old pics after a seek */
1461static void flush_dpb(AVCodecContext *avctx){
1462 H264Context *h= avctx->priv_data;
1463 int i;
64b9d48f 1464 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
1465 if(h->delayed_pic[i])
1466 h->delayed_pic[i]->reference= 0;
7c33ad19 1467 h->delayed_pic[i]= NULL;
285b570f 1468 }
df8a7dff 1469 h->outputed_poc= INT_MIN;
b19d493f 1470 h->prev_interlaced_frame = 1;
7c33ad19 1471 idr(h);
ca159196
MR
1472 if(h->s.current_picture_ptr)
1473 h->s.current_picture_ptr->reference= 0;
12d96de3 1474 h->s.first_field= 0;
9c095463 1475 ff_h264_reset_sei(h);
e240f898 1476 ff_mpeg_flush(avctx);
7c33ad19
LM
1477}
1478
0da71265
MN
1479static int init_poc(H264Context *h){
1480 MpegEncContext * const s = &h->s;
1481 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
1482 int field_poc[2];
357282c6 1483 Picture *cur = s->current_picture_ptr;
0da71265 1484
b78a6baa 1485 h->frame_num_offset= h->prev_frame_num_offset;
5710b371 1486 if(h->frame_num < h->prev_frame_num)
b78a6baa 1487 h->frame_num_offset += max_frame_num;
0da71265
MN
1488
1489 if(h->sps.poc_type==0){
1490 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
1491
1492 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
1493 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
1494 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
1495 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
1496 else
1497 h->poc_msb = h->prev_poc_msb;
1498//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
115329f1 1499 field_poc[0] =
0da71265 1500 field_poc[1] = h->poc_msb + h->poc_lsb;
115329f1 1501 if(s->picture_structure == PICT_FRAME)
0da71265
MN
1502 field_poc[1] += h->delta_poc_bottom;
1503 }else if(h->sps.poc_type==1){
1504 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
1505 int i;
1506
1507 if(h->sps.poc_cycle_length != 0)
1508 abs_frame_num = h->frame_num_offset + h->frame_num;
1509 else
1510 abs_frame_num = 0;
1511
1512 if(h->nal_ref_idc==0 && abs_frame_num > 0)
1513 abs_frame_num--;
115329f1 1514
0da71265
MN
1515 expected_delta_per_poc_cycle = 0;
1516 for(i=0; i < h->sps.poc_cycle_length; i++)
1517 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
1518
1519 if(abs_frame_num > 0){
1520 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
1521 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
1522
1523 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
1524 for(i = 0; i <= frame_num_in_poc_cycle; i++)
1525 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
1526 } else
1527 expectedpoc = 0;
1528
115329f1 1529 if(h->nal_ref_idc == 0)
0da71265 1530 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
115329f1 1531
0da71265
MN
1532 field_poc[0] = expectedpoc + h->delta_poc[0];
1533 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
1534
1535 if(s->picture_structure == PICT_FRAME)
1536 field_poc[1] += h->delta_poc[1];
1537 }else{
b78a6baa 1538 int poc= 2*(h->frame_num_offset + h->frame_num);
5710b371 1539
b78a6baa
MN
1540 if(!h->nal_ref_idc)
1541 poc--;
5710b371 1542
0da71265
MN
1543 field_poc[0]= poc;
1544 field_poc[1]= poc;
1545 }
115329f1 1546
357282c6 1547 if(s->picture_structure != PICT_BOTTOM_FIELD)
0da71265 1548 s->current_picture_ptr->field_poc[0]= field_poc[0];
357282c6 1549 if(s->picture_structure != PICT_TOP_FIELD)
0da71265 1550 s->current_picture_ptr->field_poc[1]= field_poc[1];
357282c6 1551 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
0da71265
MN
1552
1553 return 0;
1554}
1555
b41c1db3
1556
1557/**
1558 * initialize scan tables
1559 */
1560static void init_scan_tables(H264Context *h){
1561 MpegEncContext * const s = &h->s;
1562 int i;
1563 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
1564 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
1565 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
1566 }else{
1567 for(i=0; i<16; i++){
1568#define T(x) (x>>2) | ((x<<2) & 0xF)
1569 h->zigzag_scan[i] = T(zigzag_scan[i]);
1570 h-> field_scan[i] = T( field_scan[i]);
1571#undef T
1572 }
1573 }
1574 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
45beb850 1575 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
b41c1db3
1576 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
1577 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
1578 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
1579 }else{
1580 for(i=0; i<64; i++){
1581#define T(x) (x>>3) | ((x&7)<<3)
45beb850 1582 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
b41c1db3
1583 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
1584 h->field_scan8x8[i] = T(field_scan8x8[i]);
1585 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
1586#undef T
1587 }
1588 }
1589 if(h->sps.transform_bypass){ //FIXME same ugly
1590 h->zigzag_scan_q0 = zigzag_scan;
45beb850 1591 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
b41c1db3
1592 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
1593 h->field_scan_q0 = field_scan;
1594 h->field_scan8x8_q0 = field_scan8x8;
1595 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
1596 }else{
1597 h->zigzag_scan_q0 = h->zigzag_scan;
1598 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
1599 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
1600 h->field_scan_q0 = h->field_scan;
1601 h->field_scan8x8_q0 = h->field_scan8x8;
1602 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
1603 }
1604}
afebe2f7 1605
256299d3
MN
1606static void field_end(H264Context *h){
1607 MpegEncContext * const s = &h->s;
1608 AVCodecContext * const avctx= s->avctx;
1609 s->mb_y= 0;
1610
1611 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
1612 s->current_picture_ptr->pict_type= s->pict_type;
1613
1614 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1615 ff_vdpau_h264_set_reference_frames(s);
1616
1617 if(!s->dropable) {
ea6f00c4 1618 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
256299d3
MN
1619 h->prev_poc_msb= h->poc_msb;
1620 h->prev_poc_lsb= h->poc_lsb;
1621 }
1622 h->prev_frame_num_offset= h->frame_num_offset;
1623 h->prev_frame_num= h->frame_num;
1624
1625 if (avctx->hwaccel) {
1626 if (avctx->hwaccel->end_frame(avctx) < 0)
1627 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
1628 }
1629
1630 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1631 ff_vdpau_h264_picture_complete(s);
1632
1633 /*
1634 * FIXME: Error handling code does not seem to support interlaced
1635 * when slices span multiple rows
1636 * The ff_er_add_slice calls don't work right for bottom
1637 * fields; they cause massive erroneous error concealing
1638 * Error marking covers both fields (top and bottom).
1639 * This causes a mismatched s->error_count
1640 * and a bad error table. Further, the error count goes to
1641 * INT_MAX when called for bottom field, because mb_y is
1642 * past end by one (callers fault) and resync_mb_y != 0
1643 * causes problems for the first MB line, too.
1644 */
1645 if (!FIELD_PICTURE)
1646 ff_er_frame_end(s);
1647
1648 MPV_frame_end(s);
d225a1e2
MN
1649
1650 h->current_slice=0;
256299d3
MN
1651}
1652
afebe2f7
1653/**
1654 * Replicates H264 "master" context to thread contexts.
1655 */
1656static void clone_slice(H264Context *dst, H264Context *src)
1657{
1658 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
1659 dst->s.current_picture_ptr = src->s.current_picture_ptr;
1660 dst->s.current_picture = src->s.current_picture;
1661 dst->s.linesize = src->s.linesize;
1662 dst->s.uvlinesize = src->s.uvlinesize;
12d96de3 1663 dst->s.first_field = src->s.first_field;
afebe2f7
1664
1665 dst->prev_poc_msb = src->prev_poc_msb;
1666 dst->prev_poc_lsb = src->prev_poc_lsb;
1667 dst->prev_frame_num_offset = src->prev_frame_num_offset;
1668 dst->prev_frame_num = src->prev_frame_num;
1669 dst->short_ref_count = src->short_ref_count;
1670
1671 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
1672 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
1673 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
1674 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
50c21814
1675
1676 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
1677 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
afebe2f7
1678}
1679
0da71265
MN
1680/**
1681 * decodes a slice header.
9c852bcf 1682 * This will also call MPV_common_init() and frame_start() as needed.
afebe2f7
1683 *
1684 * @param h h264context
1685 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
1686 *
d9526386 1687 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
0da71265 1688 */
afebe2f7 1689static int decode_slice_header(H264Context *h, H264Context *h0){
0da71265 1690 MpegEncContext * const s = &h->s;
12d96de3 1691 MpegEncContext * const s0 = &h0->s;
88e7a4d1 1692 unsigned int first_mb_in_slice;
ac658be5 1693 unsigned int pps_id;
0da71265 1694 int num_ref_idx_active_override_flag;
41f5c62f 1695 unsigned int slice_type, tmp, i, j;
0bf79634 1696 int default_ref_list_done = 0;
12d96de3 1697 int last_pic_structure;
0da71265 1698
2f944356 1699 s->dropable= h->nal_ref_idc == 0;
0da71265 1700
cf653d08
JD
1701 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
1702 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
1703 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
1704 }else{
1705 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
1706 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
1707 }
1708
0da71265
MN
1709 first_mb_in_slice= get_ue_golomb(&s->gb);
1710
d225a1e2
MN
1711 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
1712 if(h0->current_slice && FIELD_PICTURE){
1713 field_end(h);
1714 }
1715
afebe2f7 1716 h0->current_slice = 0;
12d96de3 1717 if (!s0->first_field)
f6e3c460 1718 s->current_picture_ptr= NULL;
66a4b2c1
MN
1719 }
1720
9963b332 1721 slice_type= get_ue_golomb_31(&s->gb);
0bf79634 1722 if(slice_type > 9){
9b879566 1723 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
5175b937 1724 return -1;
0da71265 1725 }
0bf79634
LLL
1726 if(slice_type > 4){
1727 slice_type -= 5;
0da71265
MN
1728 h->slice_type_fixed=1;
1729 }else
1730 h->slice_type_fixed=0;
115329f1 1731
ee2a957f 1732 slice_type= golomb_to_pict_type[ slice_type ];
9701840b 1733 if (slice_type == FF_I_TYPE
afebe2f7 1734 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
0bf79634
LLL
1735 default_ref_list_done = 1;
1736 }
1737 h->slice_type= slice_type;
e3e6f18f 1738 h->slice_type_nos= slice_type & 3;
0bf79634 1739
1412060e 1740 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
115329f1 1741
0da71265 1742 pps_id= get_ue_golomb(&s->gb);
ac658be5 1743 if(pps_id>=MAX_PPS_COUNT){
9b879566 1744 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
0da71265
MN
1745 return -1;
1746 }
afebe2f7 1747 if(!h0->pps_buffers[pps_id]) {
a0f80050 1748 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
8b92b792
MN
1749 return -1;
1750 }
afebe2f7 1751 h->pps= *h0->pps_buffers[pps_id];
8b92b792 1752
afebe2f7 1753 if(!h0->sps_buffers[h->pps.sps_id]) {
a0f80050 1754 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
8b92b792
MN
1755 return -1;
1756 }
afebe2f7 1757 h->sps = *h0->sps_buffers[h->pps.sps_id];
239ea04c 1758
50c21814 1759 if(h == h0 && h->dequant_coeff_pps != pps_id){
50eaa857 1760 h->dequant_coeff_pps = pps_id;
239ea04c
LM
1761 init_dequant_tables(h);
1762 }
115329f1 1763
0da71265 1764 s->mb_width= h->sps.mb_width;
6867a90b 1765 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
115329f1 1766
bf4665ee 1767 h->b_stride= s->mb_width*4;
0da71265 1768
faf3dfb9 1769 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
0da71265 1770 if(h->sps.frame_mbs_only_flag)
faf3dfb9 1771 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
0da71265 1772 else
faf3dfb9 1773 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
115329f1
DB
1774
1775 if (s->context_initialized
bc997376 1776 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
afebe2f7
1777 if(h != h0)
1778 return -1; // width / height changed during parallelized decoding
0da71265 1779 free_tables(h);
ff7f75e1 1780 flush_dpb(s->avctx);
0da71265
MN
1781 MPV_common_end(s);
1782 }
1783 if (!s->context_initialized) {
afebe2f7
1784 if(h != h0)
1785 return -1; // we cant (re-)initialize context during parallel decoding
f3bdc3da
RD
1786
1787 avcodec_set_dimensions(s->avctx, s->width, s->height);
1788 s->avctx->sample_aspect_ratio= h->sps.sar;
1789 if(!s->avctx->sample_aspect_ratio.den)
1790 s->avctx->sample_aspect_ratio.den = 1;
1791
c4dffe7e
DC
1792 if(h->sps.video_signal_type_present_flag){
1793 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
1794 if(h->sps.colour_description_present_flag){
1795 s->avctx->color_primaries = h->sps.color_primaries;
1796 s->avctx->color_trc = h->sps.color_trc;
1797 s->avctx->colorspace = h->sps.colorspace;
1798 }
1799 }
1800
f3bdc3da 1801 if(h->sps.timing_info_present_flag){
3102d180 1802 int64_t den= h->sps.time_scale;
055a6aa7 1803 if(h->x264_build < 44U)
3102d180 1804 den *= 2;
f3bdc3da 1805 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3102d180 1806 h->sps.num_units_in_tick, den, 1<<30);
f3bdc3da
RD
1807 }
1808 s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts);
1809 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
1810
0da71265
MN
1811 if (MPV_common_init(s) < 0)
1812 return -1;
12d96de3 1813 s->first_field = 0;
b19d493f 1814 h->prev_interlaced_frame = 1;
115329f1 1815
b41c1db3 1816 init_scan_tables(h);
903d58f6 1817 ff_h264_alloc_tables(h);
0da71265 1818
afebe2f7
1819 for(i = 1; i < s->avctx->thread_count; i++) {
1820 H264Context *c;
1821 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
79db7ac6 1822 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
afebe2f7
1823 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
1824 c->sps = h->sps;
1825 c->pps = h->pps;
1826 init_scan_tables(c);
145061a1 1827 clone_tables(c, h, i);
afebe2f7
1828 }
1829
1830 for(i = 0; i < s->avctx->thread_count; i++)
1831 if(context_init(h->thread_context[i]) < 0)
1832 return -1;
0da71265
MN
1833 }
1834
0da71265
MN
1835 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
1836
5d18eaad 1837 h->mb_mbaff = 0;
6ba71fc4 1838 h->mb_aff_frame = 0;
12d96de3 1839 last_pic_structure = s0->picture_structure;
0da71265
MN
1840 if(h->sps.frame_mbs_only_flag){
1841 s->picture_structure= PICT_FRAME;
1842 }else{
6ba71fc4 1843 if(get_bits1(&s->gb)) { //field_pic_flag
0da71265 1844 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
6ba71fc4 1845 } else {
0da71265 1846 s->picture_structure= PICT_FRAME;
6ba71fc4 1847 h->mb_aff_frame = h->sps.mb_aff;
6867a90b 1848 }
0da71265 1849 }
44e9dcf1 1850 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
2ddcf84b
JD
1851
1852 if(h0->current_slice == 0){
26b86e47
MN
1853 while(h->frame_num != h->prev_frame_num &&
1854 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
1855 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
903d58f6 1856 if (ff_h264_frame_start(h) < 0)
66e6038c 1857 return -1;
26b86e47
MN
1858 h->prev_frame_num++;
1859 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
1860 s->current_picture_ptr->frame_num= h->prev_frame_num;
ea6f00c4 1861 ff_h264_execute_ref_pic_marking(h, NULL, 0);
26b86e47
MN
1862 }
1863
12d96de3
JD
1864 /* See if we have a decoded first field looking for a pair... */
1865 if (s0->first_field) {
1866 assert(s0->current_picture_ptr);
1867 assert(s0->current_picture_ptr->data[0]);
1868 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
1869
1870 /* figure out if we have a complementary field pair */
1871 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
1872 /*
1873 * Previous field is unmatched. Don't display it, but let it
1874 * remain for reference if marked as such.
1875 */
1876 s0->current_picture_ptr = NULL;
1877 s0->first_field = FIELD_PICTURE;
1878
1879 } else {
1880 if (h->nal_ref_idc &&
1881 s0->current_picture_ptr->reference &&
1882 s0->current_picture_ptr->frame_num != h->frame_num) {
1883 /*
1884 * This and previous field were reference, but had
1885 * different frame_nums. Consider this field first in
1886 * pair. Throw away previous field except for reference
1887 * purposes.
1888 */
1889 s0->first_field = 1;
1890 s0->current_picture_ptr = NULL;
1891
1892 } else {
1893 /* Second field in complementary pair */
1894 s0->first_field = 0;
1895 }
1896 }
1897
1898 } else {
1899 /* Frame or first field in a potentially complementary pair */
1900 assert(!s0->current_picture_ptr);
1901 s0->first_field = FIELD_PICTURE;
1902 }
1903
903d58f6 1904 if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) {
12d96de3 1905 s0->first_field = 0;
2ddcf84b 1906 return -1;
12d96de3 1907 }
2ddcf84b
JD
1908 }
1909 if(h != h0)
1910 clone_slice(h, h0);
1911
1912 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
1913
88e7a4d1 1914 assert(s->mb_num == s->mb_width * s->mb_height);
f3e53d9f 1915 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
88e7a4d1
MN
1916 first_mb_in_slice >= s->mb_num){
1917 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
6b53b87e
MN
1918 return -1;
1919 }
88e7a4d1 1920 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
f3e53d9f
JD
1921 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
1922 if (s->picture_structure == PICT_BOTTOM_FIELD)
1923 s->resync_mb_y = s->mb_y = s->mb_y + 1;
88e7a4d1 1924 assert(s->mb_y < s->mb_height);
115329f1 1925
0da71265
MN
1926 if(s->picture_structure==PICT_FRAME){
1927 h->curr_pic_num= h->frame_num;
1928 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
1929 }else{
f57e2af6 1930 h->curr_pic_num= 2*h->frame_num + 1;
0da71265
MN
1931 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
1932 }
115329f1 1933
0da71265 1934 if(h->nal_unit_type == NAL_IDR_SLICE){
1df1df0b 1935 get_ue_golomb(&s->gb); /* idr_pic_id */
0da71265 1936 }
115329f1 1937
0da71265
MN
1938 if(h->sps.poc_type==0){
1939 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
115329f1 1940
0da71265
MN
1941 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
1942 h->delta_poc_bottom= get_se_golomb(&s->gb);
1943 }
1944 }
115329f1 1945
0da71265
MN
1946 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
1947 h->delta_poc[0]= get_se_golomb(&s->gb);
115329f1 1948
0da71265
MN
1949 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
1950 h->delta_poc[1]= get_se_golomb(&s->gb);
1951 }
115329f1 1952
0da71265 1953 init_poc(h);
115329f1 1954
0da71265
MN
1955 if(h->pps.redundant_pic_cnt_present){
1956 h->redundant_pic_count= get_ue_golomb(&s->gb);
1957 }
1958
1412060e 1959 //set defaults, might be overridden a few lines later
0da71265
MN
1960 h->ref_count[0]= h->pps.ref_count[0];
1961 h->ref_count[1]= h->pps.ref_count[1];
1962
e3e6f18f 1963 if(h->slice_type_nos != FF_I_TYPE){
9f5c1037 1964 if(h->slice_type_nos == FF_B_TYPE){
0da71265
MN
1965 h->direct_spatial_mv_pred= get_bits1(&s->gb);
1966 }
1967 num_ref_idx_active_override_flag= get_bits1(&s->gb);
115329f1 1968
0da71265
MN
1969 if(num_ref_idx_active_override_flag){
1970 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
9f5c1037 1971 if(h->slice_type_nos==FF_B_TYPE)
0da71265
MN
1972 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
1973
187696fa 1974 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
9b879566 1975 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
88e7a4d1 1976 h->ref_count[0]= h->ref_count[1]= 1;
0da71265
MN
1977 return -1;
1978 }
1979 }
9f5c1037 1980 if(h->slice_type_nos == FF_B_TYPE)
187696fa
MN
1981 h->list_count= 2;
1982 else
1983 h->list_count= 1;
1984 }else
1985 h->list_count= 0;
0da71265 1986
0bf79634 1987 if(!default_ref_list_done){
ea6f00c4 1988 ff_h264_fill_default_ref_list(h);
0da71265
MN
1989 }
1990
ea6f00c4 1991 if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0)
806bb93f 1992 return -1;
0da71265 1993
07dff5c7
MN
1994 if(h->slice_type_nos!=FF_I_TYPE){
1995 s->last_picture_ptr= &h->ref_list[0][0];
8d2fc163 1996 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
07dff5c7
MN
1997 }
1998 if(h->slice_type_nos==FF_B_TYPE){
1999 s->next_picture_ptr= &h->ref_list[1][0];
8d2fc163 2000 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
07dff5c7
MN
2001 }
2002
932f396f 2003 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
9f5c1037 2004 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
0da71265 2005 pred_weight_table(h);
1a29c6a0 2006 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
9f2d1b4f 2007 implicit_weight_table(h);
1a29c6a0 2008 }else {
9f2d1b4f 2009 h->use_weight = 0;
cb99c652
GB
2010 for (i = 0; i < 2; i++) {
2011 h->luma_weight_flag[i] = 0;
2012 h->chroma_weight_flag[i] = 0;
2013 }
2014 }
115329f1 2015
2ddcf84b 2016 if(h->nal_ref_idc)
ea6f00c4 2017 ff_h264_decode_ref_pic_marking(h0, &s->gb);
0da71265 2018
5d18eaad 2019 if(FRAME_MBAFF)
ea6f00c4 2020 ff_h264_fill_mbaff_ref_list(h);
5d18eaad 2021
8f56e219 2022 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
943f69a6
MN
2023 ff_h264_direct_dist_scale_factor(h);
2024 ff_h264_direct_ref_list_init(h);
8f56e219 2025
e3e6f18f 2026 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
9963b332 2027 tmp = get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2028 if(tmp > 2){
2029 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
2030 return -1;
2031 }
2032 h->cabac_init_idc= tmp;
2033 }
e5017ab8
LA
2034
2035 h->last_qscale_diff = 0;
88e7a4d1
MN
2036 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
2037 if(tmp>51){
2038 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3ebc7e04
MN
2039 return -1;
2040 }
88e7a4d1 2041 s->qscale= tmp;
4691a77d
2042 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2043 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
0da71265 2044 //FIXME qscale / qp ... stuff
9701840b 2045 if(h->slice_type == FF_SP_TYPE){
1df1df0b 2046 get_bits1(&s->gb); /* sp_for_switch_flag */
0da71265 2047 }
9701840b 2048 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
1df1df0b 2049 get_se_golomb(&s->gb); /* slice_qs_delta */
0da71265
MN
2050 }
2051
53c05b1e 2052 h->deblocking_filter = 1;
0c32e19d
MN
2053 h->slice_alpha_c0_offset = 52;
2054 h->slice_beta_offset = 52;
0da71265 2055 if( h->pps.deblocking_filter_parameters_present ) {
9963b332 2056 tmp= get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2057 if(tmp > 2){
2058 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
2059 return -1;
2060 }
2061 h->deblocking_filter= tmp;
115329f1 2062 if(h->deblocking_filter < 2)
53c05b1e
MN
2063 h->deblocking_filter^= 1; // 1<->0
2064
2065 if( h->deblocking_filter ) {
0c32e19d
MN
2066 h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
2067 h->slice_beta_offset += get_se_golomb(&s->gb) << 1;
2068 if( h->slice_alpha_c0_offset > 104U
2069 || h->slice_beta_offset > 104U){
2070 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
2071 return -1;
2072 }
0da71265 2073 }
980a82b7 2074 }
afebe2f7 2075
61858a76 2076 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4b30289e 2077 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
9f5c1037 2078 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
61858a76
RD
2079 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
2080 h->deblocking_filter= 0;
2081
afebe2f7 2082 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
ec970c21
2083 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
2084 /* Cheat slightly for speed:
5d81d641 2085 Do not bother to deblock across slices. */
ec970c21
2086 h->deblocking_filter = 2;
2087 } else {
7ae94d52
2088 h0->max_contexts = 1;
2089 if(!h0->single_decode_warning) {
2090 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
2091 h0->single_decode_warning = 1;
2092 }
2093 if(h != h0)
2094 return 1; // deblocking switched inside frame
ec970c21 2095 }
afebe2f7 2096 }
0c32e19d 2097 h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
afebe2f7 2098
0da71265
MN
2099#if 0 //FMO
2100 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
2101 slice_group_change_cycle= get_bits(&s->gb, ?);
2102#endif
2103
afebe2f7
2104 h0->last_slice_type = slice_type;
2105 h->slice_num = ++h0->current_slice;
b735aeea
MN
2106 if(h->slice_num >= MAX_SLICES){
2107 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
2108 }
5175b937 2109
c32867b5 2110 for(j=0; j<2; j++){
6d7e6b26 2111 int id_list[16];
b735aeea 2112 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
6d7e6b26
MN
2113 for(i=0; i<16; i++){
2114 id_list[i]= 60;
2115 if(h->ref_list[j][i].data[0]){
2116 int k;
2117 uint8_t *base= h->ref_list[j][i].base[0];
2118 for(k=0; k<h->short_ref_count; k++)
2119 if(h->short_ref[k]->base[0] == base){
2120 id_list[i]= k;
2121 break;
2122 }
2123 for(k=0; k<h->long_ref_count; k++)
2124 if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
2125 id_list[i]= h->short_ref_count + k;
2126 break;
2127 }
2128 }
2129 }
2130
c32867b5
MN
2131 ref2frm[0]=
2132 ref2frm[1]= -1;
d50cdd82 2133 for(i=0; i<16; i++)
6d7e6b26 2134 ref2frm[i+2]= 4*id_list[i]
c32867b5 2135 +(h->ref_list[j][i].reference&3);
d50cdd82
MN
2136 ref2frm[18+0]=
2137 ref2frm[18+1]= -1;
2138 for(i=16; i<48; i++)
6d7e6b26 2139 ref2frm[i+4]= 4*id_list[(i-16)>>1]
d50cdd82 2140 +(h->ref_list[j][i].reference&3);
c32867b5
MN
2141 }
2142
5d18eaad 2143 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
8a11a969 2144 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
5d18eaad 2145
802e9146
MN
2146 s->avctx->refs= h->sps.ref_frame_count;
2147
0da71265 2148 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
49573a87 2149 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
6867a90b
LLL
2150 h->slice_num,
2151 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
115329f1 2152 first_mb_in_slice,
49573a87 2153 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
0da71265
MN
2154 pps_id, h->frame_num,
2155 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
2156 h->ref_count[0], h->ref_count[1],
2157 s->qscale,
0c32e19d 2158 h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
9f2d1b4f 2159 h->use_weight,
4806b922
MN
2160 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
2161 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
0da71265
MN
2162 );
2163 }
2164
2165 return 0;
2166}
2167
0dc343d4 2168int ff_h264_get_slice_type(const H264Context *h)
75dd6938
LA
2169{
2170 switch (h->slice_type) {
2171 case FF_P_TYPE: return 0;
2172 case FF_B_TYPE: return 1;
2173 case FF_I_TYPE: return 2;
2174 case FF_SP_TYPE: return 3;
2175 case FF_SI_TYPE: return 4;
2176 default: return -1;
2177 }
2178}
2179
c988f975
MN
2180static void loop_filter(H264Context *h){
2181 MpegEncContext * const s = &h->s;
2182 uint8_t *dest_y, *dest_cb, *dest_cr;
2183 int linesize, uvlinesize, mb_x, mb_y;
2184 const int end_mb_y= s->mb_y + FRAME_MBAFF;
2185 const int old_slice_type= h->slice_type;
2186
2187 if(h->deblocking_filter) {
2188 for(mb_x= 0; mb_x<s->mb_width; mb_x++){
2189 for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
78998bf2 2190 int mb_xy, mb_type;
c988f975
MN
2191 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
2192 h->slice_num= h->slice_table[mb_xy];
2193 mb_type= s->current_picture.mb_type[mb_xy];
2194 h->list_count= h->list_counts[mb_xy];
c988f975
MN
2195
2196 if(FRAME_MBAFF)
2197 h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
2198
c988f975
MN
2199 s->mb_x= mb_x;
2200 s->mb_y= mb_y;
2201 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2202 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2203 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2204 //FIXME simplify above
2205
2206 if (MB_FIELD) {
2207 linesize = h->mb_linesize = s->linesize * 2;
2208 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2209 if(mb_y&1){ //FIXME move out of this function?
2210 dest_y -= s->linesize*15;
2211 dest_cb-= s->uvlinesize*7;
2212 dest_cr-= s->uvlinesize*7;
2213 }
2214 } else {
2215 linesize = h->mb_linesize = s->linesize;
2216 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2217 }
77d40dce 2218 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
aaa995d7 2219 if(fill_filter_caches(h, mb_type))
44a5e7b6 2220 continue;
c988f975
MN
2221 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2222 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2223
77d40dce 2224 if (FRAME_MBAFF) {
c988f975
MN
2225 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2226 } else {
2227 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2228 }
2229 }
2230 }
2231 }
2232 h->slice_type= old_slice_type;
2233 s->mb_x= 0;
2234 s->mb_y= end_mb_y - FRAME_MBAFF;
f4b8b825
MN
2235 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2236 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
c988f975
MN
2237}
2238
69a28f3e
MN
2239static void predict_field_decoding_flag(H264Context *h){
2240 MpegEncContext * const s = &h->s;
2241 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2242 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
2243 ? s->current_picture.mb_type[mb_xy-1]
2244 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
2245 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
2246 : 0;
2247 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
2248}
2249
3a84713a
RS
2250static int decode_slice(struct AVCodecContext *avctx, void *arg){
2251 H264Context *h = *(void**)arg;
0da71265
MN
2252 MpegEncContext * const s = &h->s;
2253 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
2254
2255 s->mb_skip_run= -1;
0da71265 2256
89db0bae 2257 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
5317c95b 2258 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
89db0bae 2259
e5017ab8 2260 if( h->pps.cabac ) {
e5017ab8
LA
2261 /* realign */
2262 align_get_bits( &s->gb );
2263
2264 /* init cabac */
d61c4e73 2265 ff_init_cabac_states( &h->cabac);
e5017ab8
LA
2266 ff_init_cabac_decoder( &h->cabac,
2267 s->gb.buffer + get_bits_count(&s->gb)/8,
6e44ba15 2268 (get_bits_left(&s->gb) + 7)/8);
cc51b282
MN
2269
2270 ff_h264_init_cabac_states(h);
95c26348 2271
e5017ab8 2272 for(;;){
851ded89 2273//START_TIMER
cc51b282 2274 int ret = ff_h264_decode_mb_cabac(h);
6867a90b 2275 int eos;
851ded89 2276//STOP_TIMER("decode_mb_cabac")
0da71265 2277
903d58f6 2278 if(ret>=0) ff_h264_hl_decode_mb(h);
0da71265 2279
5d18eaad 2280 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8
LA
2281 s->mb_y++;
2282
cc51b282 2283 ret = ff_h264_decode_mb_cabac(h);
e5017ab8 2284
903d58f6 2285 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2286 s->mb_y--;
2287 }
6867a90b 2288 eos = get_cabac_terminate( &h->cabac );
e5017ab8 2289
3566042a
MN
2290 if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
2291 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2292 return 0;
2293 }
5659b509 2294 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
706da4af 2295 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
e5017ab8
LA
2296 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2297 return -1;
2298 }
2299
2300 if( ++s->mb_x >= s->mb_width ) {
2301 s->mb_x = 0;
c988f975 2302 loop_filter(h);
e5017ab8 2303 ff_draw_horiz_band(s, 16*s->mb_y, 16);
5175b937 2304 ++s->mb_y;
f3e53d9f 2305 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b 2306 ++s->mb_y;
69cc3183
MN
2307 if(FRAME_MBAFF && s->mb_y < s->mb_height)
2308 predict_field_decoding_flag(h);
6867a90b 2309 }
0da71265 2310 }
0da71265 2311
e5017ab8 2312 if( eos || s->mb_y >= s->mb_height ) {
a9c9a240 2313 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8 2314 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
0da71265 2315 return 0;
e5017ab8 2316 }
e5017ab8
LA
2317 }
2318
2319 } else {
2320 for(;;){
e1e94902 2321 int ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2322
903d58f6 2323 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8 2324
5d18eaad 2325 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8 2326 s->mb_y++;
e1e94902 2327 ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2328
903d58f6 2329 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2330 s->mb_y--;
2331 }
2332
2333 if(ret<0){
2334 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2335 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2336
2337 return -1;
2338 }
e5017ab8
LA
2339
2340 if(++s->mb_x >= s->mb_width){
2341 s->mb_x=0;
c988f975 2342 loop_filter(h);
e5017ab8 2343 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6867a90b 2344 ++s->mb_y;
f3e53d9f 2345 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b 2346 ++s->mb_y;
69cc3183
MN
2347 if(FRAME_MBAFF && s->mb_y < s->mb_height)
2348 predict_field_decoding_flag(h);
6867a90b
LLL
2349 }
2350 if(s->mb_y >= s->mb_height){
a9c9a240 2351 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2352
2353 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
2354 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2355
2356 return 0;
2357 }else{
2358 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2359
2360 return -1;
2361 }
2362 }
2363 }
2364
2365 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
a9c9a240 2366 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2367 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
2368 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2369
2370 return 0;
2371 }else{
2372 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2373
2374 return -1;
2375 }
2376 }
0da71265
MN
2377 }
2378 }
e5017ab8 2379
0da71265
MN
2380#if 0
2381 for(;s->mb_y < s->mb_height; s->mb_y++){
2382 for(;s->mb_x < s->mb_width; s->mb_x++){
2383 int ret= decode_mb(h);
115329f1 2384
903d58f6 2385 ff_h264_hl_decode_mb(h);
0da71265
MN
2386
2387 if(ret<0){
267f7edc 2388 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2389 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2390
2391 return -1;
2392 }
115329f1 2393
0da71265
MN
2394 if(++s->mb_x >= s->mb_width){
2395 s->mb_x=0;
2396 if(++s->mb_y >= s->mb_height){
2397 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2398 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2399
2400 return 0;
2401 }else{
2402 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2403
2404 return -1;
2405 }
2406 }
2407 }
115329f1 2408
0da71265
MN
2409 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
2410 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2411 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2412
2413 return 0;
2414 }else{
2415 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2416
2417 return -1;
2418 }
2419 }
2420 }
2421 s->mb_x=0;
2422 ff_draw_horiz_band(s, 16*s->mb_y, 16);
2423 }
2424#endif
2425 return -1; //not reached
2426}
2427
afebe2f7
2428/**
2429 * Call decode_slice() for each context.
2430 *
2431 * @param h h264 master context
2432 * @param context_count number of contexts to execute
2433 */
2434static void execute_decode_slices(H264Context *h, int context_count){
2435 MpegEncContext * const s = &h->s;
2436 AVCodecContext * const avctx= s->avctx;
2437 H264Context *hx;
2438 int i;
2439
40e5d31b
GB
2440 if (s->avctx->hwaccel)
2441 return;
0d3d172f 2442 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
369122dd 2443 return;
afebe2f7 2444 if(context_count == 1) {
74e8b78b 2445 decode_slice(avctx, &h);
afebe2f7
2446 } else {
2447 for(i = 1; i < context_count; i++) {
2448 hx = h->thread_context[i];
047599a4 2449 hx->s.error_recognition = avctx->error_recognition;
afebe2f7
2450 hx->s.error_count = 0;
2451 }
2452
2453 avctx->execute(avctx, (void *)decode_slice,
01418506 2454 h->thread_context, NULL, context_count, sizeof(void*));
afebe2f7
2455
2456 /* pull back stuff from slices to master context */
2457 hx = h->thread_context[context_count - 1];
2458 s->mb_x = hx->s.mb_x;
2459 s->mb_y = hx->s.mb_y;
12d96de3
JD
2460 s->dropable = hx->s.dropable;
2461 s->picture_structure = hx->s.picture_structure;
afebe2f7
2462 for(i = 1; i < context_count; i++)
2463 h->s.error_count += h->thread_context[i]->s.error_count;
2464 }
2465}
2466
2467
30317501 2468static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
0da71265
MN
2469 MpegEncContext * const s = &h->s;
2470 AVCodecContext * const avctx= s->avctx;
2471 int buf_index=0;
afebe2f7
2472 H264Context *hx; ///< thread context
2473 int context_count = 0;
74b14aac 2474 int next_avc= h->is_avc ? 0 : buf_size;
afebe2f7
2475
2476 h->max_contexts = avctx->thread_count;
377ec888 2477#if 0
eb60dddc 2478 int i;
96b6ace2
MN
2479 for(i=0; i<50; i++){
2480 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
0da71265
MN
2481 }
2482#endif
66a4b2c1 2483 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
afebe2f7 2484 h->current_slice = 0;
12d96de3 2485 if (!s->first_field)
f6e3c460 2486 s->current_picture_ptr= NULL;
9c095463 2487 ff_h264_reset_sei(h);
66a4b2c1
MN
2488 }
2489
0da71265
MN
2490 for(;;){
2491 int consumed;
2492 int dst_length;
2493 int bit_length;
30317501 2494 const uint8_t *ptr;
4770b1b4 2495 int i, nalsize = 0;
afebe2f7 2496 int err;
115329f1 2497
74b14aac 2498 if(buf_index >= next_avc) {
1c48415b
2499 if(buf_index >= buf_size) break;
2500 nalsize = 0;
2501 for(i = 0; i < h->nal_length_size; i++)
2502 nalsize = (nalsize << 8) | buf[buf_index++];
8d8409ca 2503 if(nalsize <= 1 || nalsize > buf_size - buf_index){
1c48415b
2504 if(nalsize == 1){
2505 buf_index++;
2506 continue;
2507 }else{
2508 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
2509 break;
2510 }
2511 }
74b14aac 2512 next_avc= buf_index + nalsize;
1c48415b
2513 } else {
2514 // start code prefix search
52255d17 2515 for(; buf_index + 3 < next_avc; buf_index++){
1c48415b
2516 // This should always succeed in the first iteration.
2517 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
2518 break;
8b031359 2519 }
115329f1 2520
1c48415b 2521 if(buf_index+3 >= buf_size) break;
115329f1 2522
1c48415b 2523 buf_index+=3;
52255d17 2524 if(buf_index >= next_avc) continue;
1c48415b 2525 }
115329f1 2526
afebe2f7
2527 hx = h->thread_context[context_count];
2528
74b14aac 2529 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
ff82e429 2530 if (ptr==NULL || dst_length < 0){
ac658be5
FOL
2531 return -1;
2532 }
3566042a
MN
2533 i= buf_index + consumed;
2534 if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
2535 buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
2536 s->workaround_bugs |= FF_BUG_TRUNCATED;
2537
2538 if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
6ac9696e 2539 while(ptr[dst_length - 1] == 0 && dst_length > 0)
c4da83fb 2540 dst_length--;
3566042a 2541 }
1790a5e9 2542 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
0da71265
MN
2543
2544 if(s->avctx->debug&FF_DEBUG_STARTCODE){
afebe2f7 2545 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
0da71265 2546 }
115329f1 2547
74b14aac 2548 if (h->is_avc && (nalsize != consumed) && nalsize){
e262365d 2549 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
9d2cc8c1 2550 }
4770b1b4 2551
0da71265
MN
2552 buf_index += consumed;
2553
755bfeab 2554 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
8c3eba7c 2555 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
0da71265 2556 continue;
115329f1 2557
afebe2f7
2558 again:
2559 err = 0;
2560 switch(hx->nal_unit_type){
0da71265 2561 case NAL_IDR_SLICE:
afebe2f7
2562 if (h->nal_unit_type != NAL_IDR_SLICE) {
2563 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
2564 return -1;
2565 }
3b66c4c5 2566 idr(h); //FIXME ensure we don't loose some frames if there is reordering
0da71265 2567 case NAL_SLICE:
afebe2f7
2568 init_get_bits(&hx->s.gb, ptr, bit_length);
2569 hx->intra_gb_ptr=
2570 hx->inter_gb_ptr= &hx->s.gb;
2571 hx->s.data_partitioning = 0;
2572
2573 if((err = decode_slice_header(hx, h)))
2574 break;
2575
dd0cd3d2
RC
2576 avctx->profile = hx->sps.profile_idc;
2577 avctx->level = hx->sps.level_idc;
2578
6026a096
GB
2579 if (s->avctx->hwaccel && h->current_slice == 1) {
2580 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
2581 return -1;
2582 }
2583
37a558fe
IS
2584 s->current_picture_ptr->key_frame |=
2585 (hx->nal_unit_type == NAL_IDR_SLICE) ||
2586 (h->sei_recovery_frame_cnt >= 0);
afebe2f7
2587 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
2588 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2589 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2590 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
369122dd 2591 && avctx->skip_frame < AVDISCARD_ALL){
d404b3ed
MN
2592 if(avctx->hwaccel) {
2593 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
2594 return -1;
2595 }else
0d3d172f 2596 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
369122dd 2597 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
c639fc72
CEH
2598 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
2599 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
369122dd 2600 }else
f2c214a1 2601 context_count++;
369122dd 2602 }
0da71265
MN
2603 break;
2604 case NAL_DPA:
afebe2f7
2605 init_get_bits(&hx->s.gb, ptr, bit_length);
2606 hx->intra_gb_ptr=
2607 hx->inter_gb_ptr= NULL;
0410ee8f
AS
2608
2609 if ((err = decode_slice_header(hx, h)) < 0)
2610 break;
2611
dd0cd3d2
RC
2612 avctx->profile = hx->sps.profile_idc;
2613 avctx->level = hx->sps.level_idc;
2614
afebe2f7 2615 hx->s.data_partitioning = 1;
115329f1 2616
0da71265
MN
2617 break;
2618 case NAL_DPB:
afebe2f7
2619 init_get_bits(&hx->intra_gb, ptr, bit_length);
2620 hx->intra_gb_ptr= &hx->intra_gb;
0da71265
MN
2621 break;
2622 case NAL_DPC:
afebe2f7
2623 init_get_bits(&hx->inter_gb, ptr, bit_length);
2624 hx->inter_gb_ptr= &hx->inter_gb;
8b92b792 2625
afebe2f7 2626 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
b18e5c03 2627 && s->context_initialized
e0111b32 2628 && s->hurry_up < 5
afebe2f7 2629 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2630 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2631 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
e0111b32 2632 && avctx->skip_frame < AVDISCARD_ALL)
afebe2f7 2633 context_count++;
0da71265
MN
2634 break;
2635 case NAL_SEI:
cdd10689 2636 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2637 ff_h264_decode_sei(h);
0da71265
MN
2638 break;
2639 case NAL_SPS:
2640 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2641 ff_h264_decode_seq_parameter_set(h);
115329f1 2642
0da71265
MN
2643 if(s->flags& CODEC_FLAG_LOW_DELAY)
2644 s->low_delay=1;
115329f1 2645
a18030bb
LM
2646 if(avctx->has_b_frames < 2)
2647 avctx->has_b_frames= !s->low_delay;
0da71265
MN
2648 break;
2649 case NAL_PPS:
2650 init_get_bits(&s->gb, ptr, bit_length);
115329f1 2651
1790a5e9 2652 ff_h264_decode_picture_parameter_set(h, bit_length);
0da71265
MN
2653
2654 break;
ab470fa7
LM
2655 case NAL_AUD:
2656 case NAL_END_SEQUENCE:
2657 case NAL_END_STREAM:
2658 case NAL_FILLER_DATA:
2659 case NAL_SPS_EXT:
2660 case NAL_AUXILIARY_SLICE:
0da71265 2661 break;
bb270c08 2662 default:
4ad04da2 2663 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
115329f1 2664 }
115329f1 2665
afebe2f7
2666 if(context_count == h->max_contexts) {
2667 execute_decode_slices(h, context_count);
2668 context_count = 0;
2669 }
2670
2671 if (err < 0)
2672 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
2673 else if(err == 1) {
2674 /* Slice could not be decoded in parallel mode, copy down
2675 * NAL unit stuff to context 0 and restart. Note that
1412060e 2676 * rbsp_buffer is not transferred, but since we no longer
afebe2f7
2677 * run in parallel mode this should not be an issue. */
2678 h->nal_unit_type = hx->nal_unit_type;
2679 h->nal_ref_idc = hx->nal_ref_idc;
2680 hx = h;
2681 goto again;
2682 }
2683 }
2684 if(context_count)
2685 execute_decode_slices(h, context_count);
0da71265
MN
2686 return buf_index;
2687}
2688
2689/**
3b66c4c5 2690 * returns the number of bytes consumed for building the current frame
0da71265
MN
2691 */
2692static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
755bfeab 2693 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
0da71265
MN
2694 if(pos+10>buf_size) pos=buf_size; // oops ;)
2695
2696 return pos;
0da71265
MN
2697}
2698
115329f1 2699static int decode_frame(AVCodecContext *avctx,
0da71265 2700 void *data, int *data_size,
7a00bbad 2701 AVPacket *avpkt)
0da71265 2702{
7a00bbad
TB
2703 const uint8_t *buf = avpkt->data;
2704 int buf_size = avpkt->size;
0da71265
MN
2705 H264Context *h = avctx->priv_data;
2706 MpegEncContext *s = &h->s;
115329f1 2707 AVFrame *pict = data;
0da71265 2708 int buf_index;
115329f1 2709
0da71265 2710 s->flags= avctx->flags;
303e50e6 2711 s->flags2= avctx->flags2;
0da71265 2712
1412060e 2713 /* end of stream, output what is still in the buffers */
0da71265 2714 if (buf_size == 0) {
97bbb885
MN
2715 Picture *out;
2716 int i, out_idx;
2717
2718//FIXME factorize this with the output code below
2719 out = h->delayed_pic[0];
2720 out_idx = 0;
c173a088 2721 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
97bbb885
MN
2722 if(h->delayed_pic[i]->poc < out->poc){
2723 out = h->delayed_pic[i];
2724 out_idx = i;
2725 }
2726
2727 for(i=out_idx; h->delayed_pic[i]; i++)
2728 h->delayed_pic[i] = h->delayed_pic[i+1];
2729
2730 if(out){
2731 *data_size = sizeof(AVFrame);
2732 *pict= *(AVFrame*)out;
2733 }
2734
0da71265
MN
2735 return 0;
2736 }
115329f1 2737
0da71265 2738 buf_index=decode_nal_units(h, buf, buf_size);
115329f1 2739 if(buf_index < 0)
0da71265
MN
2740 return -1;
2741
56c70e1d 2742 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
1c746a49 2743 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
56c70e1d
MN
2744 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
2745 return -1;
2746 }
2747
66a4b2c1
MN
2748 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
2749 Picture *out = s->current_picture_ptr;
2750 Picture *cur = s->current_picture_ptr;
44be1d64 2751 int i, pics, out_of_order, out_idx;
115329f1 2752
256299d3 2753 field_end(h);
66a4b2c1 2754
357282c6 2755 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
12d96de3
JD
2756 /* Wait for second field. */
2757 *data_size = 0;
2758
2759 } else {
b19d493f 2760 cur->interlaced_frame = 0;
b09a7c05
2761 cur->repeat_pict = 0;
2762
2763 /* Signal interlacing information externally. */
2764 /* Prioritize picture timing SEI information over used decoding process if it exists. */
70e01da3 2765
b09a7c05
2766 if(h->sps.pic_struct_present_flag){
2767 switch (h->sei_pic_struct)
2768 {
b19d493f
HY
2769 case SEI_PIC_STRUCT_FRAME:
2770 break;
2771 case SEI_PIC_STRUCT_TOP_FIELD:
2772 case SEI_PIC_STRUCT_BOTTOM_FIELD:
2773 cur->interlaced_frame = 1;
2774 break;
2775 case SEI_PIC_STRUCT_TOP_BOTTOM:
2776 case SEI_PIC_STRUCT_BOTTOM_TOP:
2777 if (FIELD_OR_MBAFF_PICTURE)
2778 cur->interlaced_frame = 1;
2779 else
2780 // try to flag soft telecine progressive
2781 cur->interlaced_frame = h->prev_interlaced_frame;
2782 break;
b09a7c05
2783 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
2784 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
2785 // Signal the possibility of telecined film externally (pic_struct 5,6)
2786 // From these hints, let the applications decide if they apply deinterlacing.
2787 cur->repeat_pict = 1;
b09a7c05
2788 break;
2789 case SEI_PIC_STRUCT_FRAME_DOUBLING:
2790 // Force progressive here, as doubling interlaced frame is a bad idea.
b09a7c05
2791 cur->repeat_pict = 2;
2792 break;
2793 case SEI_PIC_STRUCT_FRAME_TRIPLING:
b09a7c05
2794 cur->repeat_pict = 4;
2795 break;
2796 }
b19d493f
HY
2797
2798 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
2799 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
b09a7c05
2800 }else{
2801 /* Derive interlacing flag from used decoding process. */
2802 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
2803 }
b19d493f 2804 h->prev_interlaced_frame = cur->interlaced_frame;
b09a7c05
2805
2806 if (cur->field_poc[0] != cur->field_poc[1]){
2807 /* Derive top_field_first from field pocs. */
2808 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
2809 }else{
2810 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
2811 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
2812 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
2813 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
2814 cur->top_field_first = 1;
2815 else
2816 cur->top_field_first = 0;
2817 }else{
2818 /* Most likely progressive */
2819 cur->top_field_first = 0;
2820 }
2821 }
84a8596d 2822
f6e3c460 2823 //FIXME do something with unavailable reference frames
8b92b792 2824
f6e3c460 2825 /* Sort B-frames into display order */
2f944356 2826
f6e3c460
2827 if(h->sps.bitstream_restriction_flag
2828 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
2829 s->avctx->has_b_frames = h->sps.num_reorder_frames;
2830 s->low_delay = 0;
2831 }
9170e345 2832
fb19e144
MN
2833 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
2834 && !h->sps.bitstream_restriction_flag){
2835 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
2836 s->low_delay= 0;
2837 }
2838
f6e3c460
2839 pics = 0;
2840 while(h->delayed_pic[pics]) pics++;
9170e345 2841
64b9d48f 2842 assert(pics <= MAX_DELAYED_PIC_COUNT);
4e4d983e 2843
f6e3c460
2844 h->delayed_pic[pics++] = cur;
2845 if(cur->reference == 0)
2846 cur->reference = DELAYED_PIC_REF;
2f944356 2847
f6e3c460
2848 out = h->delayed_pic[0];
2849 out_idx = 0;
c173a088 2850 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
f6e3c460
2851 if(h->delayed_pic[i]->poc < out->poc){
2852 out = h->delayed_pic[i];
2853 out_idx = i;
2854 }
44be1d64
MN
2855 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
2856 h->outputed_poc= INT_MIN;
2857 out_of_order = out->poc < h->outputed_poc;
1b547aba 2858
f6e3c460
2859 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
2860 { }
2a811db2 2861 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
f6e3c460 2862 || (s->low_delay &&
44be1d64 2863 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
9701840b 2864 || cur->pict_type == FF_B_TYPE)))
f6e3c460
2865 {
2866 s->low_delay = 0;
2867 s->avctx->has_b_frames++;
f6e3c460 2868 }
f6e3c460
2869
2870 if(out_of_order || pics > s->avctx->has_b_frames){
3eaa6d0e 2871 out->reference &= ~DELAYED_PIC_REF;
f6e3c460
2872 for(i=out_idx; h->delayed_pic[i]; i++)
2873 h->delayed_pic[i] = h->delayed_pic[i+1];
2874 }
3eaa6d0e 2875 if(!out_of_order && pics > s->avctx->has_b_frames){
f6e3c460 2876 *data_size = sizeof(AVFrame);
df8a7dff 2877
44be1d64
MN
2878 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
2879 h->outputed_poc = INT_MIN;
2880 } else
67e362ca 2881 h->outputed_poc = out->poc;
f6e3c460 2882 *pict= *(AVFrame*)out;
3eaa6d0e 2883 }else{
f6e3c460 2884 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
3eaa6d0e 2885 }
12d96de3 2886 }
a4dae92b
LM
2887 }
2888
3165e258 2889 assert(pict->data[0] || !*data_size);
4e4d983e 2890 ff_print_debug_info(s, pict);
0da71265 2891//printf("out %d\n", (int)pict->data[0]);
0da71265 2892
0da71265
MN
2893 return get_consumed_bytes(s, buf_index, buf_size);
2894}
2895#if 0
2896static inline void fill_mb_avail(H264Context *h){
2897 MpegEncContext * const s = &h->s;
7bc9090a 2898 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265
MN
2899
2900 if(s->mb_y){
7bc9090a
MN
2901 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
2902 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
2903 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
0da71265
MN
2904 }else{
2905 h->mb_avail[0]=
2906 h->mb_avail[1]=
2907 h->mb_avail[2]= 0;
2908 }
2909 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
2910 h->mb_avail[4]= 1; //FIXME move out
2911 h->mb_avail[5]= 0; //FIXME move out
2912}
2913#endif
2914
07e4e3ea 2915#ifdef TEST
6bf398a0 2916#undef printf
d04d5bcd 2917#undef random
0da71265
MN
2918#define COUNT 8000
2919#define SIZE (COUNT*40)
f8a80fd6 2920int main(void){
0da71265
MN
2921 int i;
2922 uint8_t temp[SIZE];
2923 PutBitContext pb;
2924 GetBitContext gb;
2925// int int_temp[10000];
2926 DSPContext dsp;
2927 AVCodecContext avctx;
115329f1 2928
0da71265
MN
2929 dsputil_init(&dsp, &avctx);
2930
ed7debda 2931 init_put_bits(&pb, temp, SIZE);
0da71265
MN
2932 printf("testing unsigned exp golomb\n");
2933 for(i=0; i<COUNT; i++){
2934 START_TIMER
2935 set_ue_golomb(&pb, i);
2936 STOP_TIMER("set_ue_golomb");
2937 }
2938 flush_put_bits(&pb);
115329f1 2939
0da71265
MN
2940 init_get_bits(&gb, temp, 8*SIZE);
2941 for(i=0; i<COUNT; i++){
2942 int j, s;
115329f1 2943
0da71265 2944 s= show_bits(&gb, 24);
115329f1 2945
0da71265
MN
2946 START_TIMER
2947 j= get_ue_golomb(&gb);
2948 if(j != i){
755bfeab 2949 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
MN
2950// return -1;
2951 }
2952 STOP_TIMER("get_ue_golomb");
2953 }
115329f1
DB
2954
2955
c58222c5 2956 init_put_bits(&pb, temp, SIZE);
0da71265
MN
2957 printf("testing signed exp golomb\n");
2958 for(i=0; i<COUNT; i++){
2959 START_TIMER
2960 set_se_golomb(&pb, i - COUNT/2);
2961 STOP_TIMER("set_se_golomb");
2962 }
2963 flush_put_bits(&pb);
115329f1 2964
0da71265
MN
2965 init_get_bits(&gb, temp, 8*SIZE);
2966 for(i=0; i<COUNT; i++){
2967 int j, s;
115329f1 2968
0da71265 2969 s= show_bits(&gb, 24);
115329f1 2970
0da71265
MN
2971 START_TIMER
2972 j= get_se_golomb(&gb);
2973 if(j != i - COUNT/2){
755bfeab 2974 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
MN
2975// return -1;
2976 }
2977 STOP_TIMER("get_se_golomb");
2978 }
2979
6bf398a0 2980#if 0
0da71265 2981 printf("testing 4x4 (I)DCT\n");
115329f1 2982
0da71265
MN
2983 DCTELEM block[16];
2984 uint8_t src[16], ref[16];
2985 uint64_t error= 0, max_error=0;
2986
2987 for(i=0; i<COUNT; i++){
2988 int j;
2989// printf("%d %d %d\n", r1, r2, (r2-r1)*16);
2990 for(j=0; j<16; j++){
2991 ref[j]= random()%255;
2992 src[j]= random()%255;
2993 }
2994
2995 h264_diff_dct_c(block, src, ref, 4);
115329f1 2996
0da71265
MN
2997 //normalize
2998 for(j=0; j<16; j++){
2999// printf("%d ", block[j]);
3000 block[j]= block[j]*4;
3001 if(j&1) block[j]= (block[j]*4 + 2)/5;
3002 if(j&4) block[j]= (block[j]*4 + 2)/5;
3003 }
3004// printf("\n");
115329f1 3005
0fa8158d 3006 s->dsp.h264_idct_add(ref, block, 4);
0da71265
MN
3007/* for(j=0; j<16; j++){
3008 printf("%d ", ref[j]);
3009 }
3010 printf("\n");*/
115329f1 3011
0da71265 3012 for(j=0; j<16; j++){
c26abfa5 3013 int diff= FFABS(src[j] - ref[j]);
115329f1 3014
0da71265
MN
3015 error+= diff*diff;
3016 max_error= FFMAX(max_error, diff);
3017 }
3018 }
3019 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
0da71265
MN
3020 printf("testing quantizer\n");
3021 for(qp=0; qp<52; qp++){
3022 for(i=0; i<16; i++)
3023 src1_block[i]= src2_block[i]= random()%255;
115329f1 3024
0da71265 3025 }
0da71265 3026 printf("Testing NAL layer\n");
115329f1 3027
0da71265
MN
3028 uint8_t bitstream[COUNT];
3029 uint8_t nal[COUNT*2];
3030 H264Context h;
3031 memset(&h, 0, sizeof(H264Context));
115329f1 3032
0da71265
MN
3033 for(i=0; i<COUNT; i++){
3034 int zeros= i;
3035 int nal_length;
3036 int consumed;
3037 int out_length;
3038 uint8_t *out;
3039 int j;
115329f1 3040
0da71265
MN
3041 for(j=0; j<COUNT; j++){
3042 bitstream[j]= (random() % 255) + 1;
3043 }
115329f1 3044
0da71265
MN
3045 for(j=0; j<zeros; j++){
3046 int pos= random() % COUNT;
3047 while(bitstream[pos] == 0){
3048 pos++;
3049 pos %= COUNT;
3050 }
3051 bitstream[pos]=0;
3052 }
115329f1 3053
0da71265 3054 START_TIMER
115329f1 3055
0da71265
MN
3056 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
3057 if(nal_length<0){
3058 printf("encoding failed\n");
3059 return -1;
3060 }
115329f1 3061
1790a5e9 3062 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
0da71265
MN
3063
3064 STOP_TIMER("NAL")
115329f1 3065
0da71265
MN
3066 if(out_length != COUNT){
3067 printf("incorrect length %d %d\n", out_length, COUNT);
3068 return -1;
3069 }
115329f1 3070
0da71265
MN
3071 if(consumed != nal_length){
3072 printf("incorrect consumed length %d %d\n", nal_length, consumed);
3073 return -1;
3074 }
115329f1 3075
0da71265 3076 if(memcmp(bitstream, out, COUNT)){
755bfeab 3077 printf("mismatch\n");
0da71265
MN
3078 return -1;
3079 }
3080 }
6bf398a0 3081#endif
115329f1 3082
0da71265 3083 printf("Testing RBSP\n");
115329f1
DB
3084
3085
0da71265
MN
3086 return 0;
3087}
07e4e3ea 3088#endif /* TEST */
0da71265
MN
3089
3090
cbf1eae9 3091av_cold void ff_h264_free_context(H264Context *h)
0da71265 3092{
5f129a05 3093 int i;
115329f1 3094
0da71265 3095 free_tables(h); //FIXME cleanup init stuff perhaps
5f129a05
MN
3096
3097 for(i = 0; i < MAX_SPS_COUNT; i++)
3098 av_freep(h->sps_buffers + i);
3099
3100 for(i = 0; i < MAX_PPS_COUNT; i++)
3101 av_freep(h->pps_buffers + i);
15861962
RD
3102}
3103
903d58f6 3104av_cold int ff_h264_decode_end(AVCodecContext *avctx)
15861962
RD
3105{
3106 H264Context *h = avctx->priv_data;
3107 MpegEncContext *s = &h->s;
3108
3109 ff_h264_free_context(h);
5f129a05 3110
0da71265
MN
3111 MPV_common_end(s);
3112
3113// memset(h, 0, sizeof(H264Context));
115329f1 3114
0da71265
MN
3115 return 0;
3116}
3117
3118
3119AVCodec h264_decoder = {
3120 "h264",
3121 CODEC_TYPE_VIDEO,
3122 CODEC_ID_H264,
3123 sizeof(H264Context),
903d58f6 3124 ff_h264_decode_init,
0da71265 3125 NULL,
903d58f6 3126 ff_h264_decode_end,
0da71265 3127 decode_frame,
f3ba9db4 3128 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7c33ad19 3129 .flush= flush_dpb,
fe4bf374 3130 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
6026a096 3131 .pix_fmts= ff_hwaccel_pixfmt_list_420,