Make sure the EC code does not attempt to use inter based concealment if there
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
bad5537e 23 * @file libavcodec/h264.c
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
40e5d31b 28#include "internal.h"
0da71265
MN
29#include "dsputil.h"
30#include "avcodec.h"
31#include "mpegvideo.h"
26b4fe82 32#include "h264.h"
0da71265 33#include "h264data.h"
188d3c51 34#include "h264_mvpred.h"
26b4fe82 35#include "h264_parser.h"
0da71265 36#include "golomb.h"
199436b9 37#include "mathops.h"
626464fb 38#include "rectangle.h"
369122dd 39#include "vdpau_internal.h"
0da71265 40
e5017ab8
LA
41#include "cabac.h"
42
2848ce84 43//#undef NDEBUG
0da71265
MN
44#include <assert.h>
45
d9ec210b 46static const uint8_t rem6[52]={
acd8d10f
PI
470, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
48};
49
d9ec210b 50static const uint8_t div6[52]={
acd8d10f
PI
510, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
52};
53
903d58f6 54void ff_h264_write_back_intra_pred_mode(H264Context *h){
5b0fb524 55 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
0da71265 56
662a5b23
MN
57 AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
58 mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
59 mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
60 mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
0da71265
MN
61}
62
63/**
64 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
65 */
2bedc0e8
MN
66int ff_h264_check_intra4x4_pred_mode(H264Context *h){
67 MpegEncContext * const s = &h->s;
68 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
69 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
70 int i;
71
72 if(!(h->top_samples_available&0x8000)){
73 for(i=0; i<4; i++){
74 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
75 if(status<0){
76 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
77 return -1;
78 } else if(status){
79 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
80 }
81 }
82 }
83
84 if((h->left_samples_available&0x8888)!=0x8888){
85 static const int mask[4]={0x8000,0x2000,0x80,0x20};
86 for(i=0; i<4; i++){
87 if(!(h->left_samples_available&mask[i])){
88 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
89 if(status<0){
90 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
91 return -1;
92 } else if(status){
93 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
94 }
95 }
96 }
97 }
98
99 return 0;
100} //FIXME cleanup like ff_h264_check_intra_pred_mode
101
102/**
103 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
104 */
903d58f6 105int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
0da71265
MN
106 MpegEncContext * const s = &h->s;
107 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
108 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 109
43ff0714 110 if(mode > 6U) {
5175b937 111 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 112 return -1;
5175b937 113 }
115329f1 114
0da71265
MN
115 if(!(h->top_samples_available&0x8000)){
116 mode= top[ mode ];
117 if(mode<0){
9b879566 118 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
119 return -1;
120 }
121 }
115329f1 122
d1d10e91 123 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 124 mode= left[ mode ];
d1d10e91
MN
125 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
126 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
127 }
0da71265 128 if(mode<0){
9b879566 129 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 130 return -1;
115329f1 131 }
0da71265
MN
132 }
133
134 return mode;
135}
136
1790a5e9 137const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
138 int i, si, di;
139 uint8_t *dst;
24456882 140 int bufidx;
0da71265 141
bb270c08 142// src[0]&0x80; //forbidden bit
0da71265
MN
143 h->nal_ref_idc= src[0]>>5;
144 h->nal_unit_type= src[0]&0x1F;
145
146 src++; length--;
115329f1 147#if 0
0da71265
MN
148 for(i=0; i<length; i++)
149 printf("%2X ", src[i]);
150#endif
e08715d3 151
b250f9c6
AJ
152#if HAVE_FAST_UNALIGNED
153# if HAVE_FAST_64BIT
e08715d3
MN
154# define RS 7
155 for(i=0; i+1<length; i+=9){
19769ece 156 if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
e08715d3
MN
157# else
158# define RS 3
159 for(i=0; i+1<length; i+=5){
19769ece 160 if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
e08715d3
MN
161# endif
162 continue;
163 if(i>0 && !src[i]) i--;
164 while(src[i]) i++;
165#else
166# define RS 0
0da71265
MN
167 for(i=0; i+1<length; i+=2){
168 if(src[i]) continue;
169 if(i>0 && src[i-1]==0) i--;
e08715d3 170#endif
0da71265
MN
171 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
172 if(src[i+2]!=3){
173 /* startcode, so we must be past the end */
174 length=i;
175 }
176 break;
177 }
abb27cfb 178 i-= RS;
0da71265
MN
179 }
180
181 if(i>=length-1){ //no escaped 0
182 *dst_length= length;
183 *consumed= length+1; //+1 for the header
115329f1 184 return src;
0da71265
MN
185 }
186
24456882 187 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
238ef6da 188 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 189 dst= h->rbsp_buffer[bufidx];
0da71265 190
ac658be5
FOL
191 if (dst == NULL){
192 return NULL;
193 }
194
3b66c4c5 195//printf("decoding esc\n");
593af7cd
MN
196 memcpy(dst, src, i);
197 si=di=i;
198 while(si+2<length){
0da71265 199 //remove escapes (very rare 1:2^22)
593af7cd
MN
200 if(src[si+2]>3){
201 dst[di++]= src[si++];
202 dst[di++]= src[si++];
203 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
204 if(src[si+2]==3){ //escape
205 dst[di++]= 0;
206 dst[di++]= 0;
207 si+=3;
c8470cc1 208 continue;
0da71265 209 }else //next start code
593af7cd 210 goto nsc;
0da71265
MN
211 }
212
213 dst[di++]= src[si++];
214 }
593af7cd
MN
215 while(si<length)
216 dst[di++]= src[si++];
217nsc:
0da71265 218
d4369630
AS
219 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
220
0da71265
MN
221 *dst_length= di;
222 *consumed= si + 1;//+1 for the header
90b5b51e 223//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
224 return dst;
225}
226
1790a5e9 227int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
228 int v= *src;
229 int r;
230
a9c9a240 231 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
232
233 for(r=1; r<9; r++){
234 if(v&1) return r;
235 v>>=1;
236 }
237 return 0;
238}
239
240/**
1412060e 241 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
242 * @param qp quantization parameter
243 */
239ea04c 244static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
245#define stride 16
246 int i;
247 int temp[16]; //FIXME check if this is a good idea
248 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
249 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
250
251//memset(block, 64, 2*256);
252//return;
253 for(i=0; i<4; i++){
254 const int offset= y_offset[i];
255 const int z0= block[offset+stride*0] + block[offset+stride*4];
256 const int z1= block[offset+stride*0] - block[offset+stride*4];
257 const int z2= block[offset+stride*1] - block[offset+stride*5];
258 const int z3= block[offset+stride*1] + block[offset+stride*5];
259
260 temp[4*i+0]= z0+z3;
261 temp[4*i+1]= z1+z2;
262 temp[4*i+2]= z1-z2;
263 temp[4*i+3]= z0-z3;
264 }
265
266 for(i=0; i<4; i++){
267 const int offset= x_offset[i];
268 const int z0= temp[4*0+i] + temp[4*2+i];
269 const int z1= temp[4*0+i] - temp[4*2+i];
270 const int z2= temp[4*1+i] - temp[4*3+i];
271 const int z3= temp[4*1+i] + temp[4*3+i];
272
1412060e 273 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
274 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
275 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
276 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
277 }
278}
279
e5017ab8 280#if 0
0da71265 281/**
1412060e 282 * DCT transforms the 16 dc values.
0da71265
MN
283 * @param qp quantization parameter ??? FIXME
284 */
285static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
286// const int qmul= dequant_coeff[qp][0];
287 int i;
288 int temp[16]; //FIXME check if this is a good idea
289 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
290 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
291
292 for(i=0; i<4; i++){
293 const int offset= y_offset[i];
294 const int z0= block[offset+stride*0] + block[offset+stride*4];
295 const int z1= block[offset+stride*0] - block[offset+stride*4];
296 const int z2= block[offset+stride*1] - block[offset+stride*5];
297 const int z3= block[offset+stride*1] + block[offset+stride*5];
298
299 temp[4*i+0]= z0+z3;
300 temp[4*i+1]= z1+z2;
301 temp[4*i+2]= z1-z2;
302 temp[4*i+3]= z0-z3;
303 }
304
305 for(i=0; i<4; i++){
306 const int offset= x_offset[i];
307 const int z0= temp[4*0+i] + temp[4*2+i];
308 const int z1= temp[4*0+i] - temp[4*2+i];
309 const int z2= temp[4*1+i] - temp[4*3+i];
310 const int z3= temp[4*1+i] + temp[4*3+i];
311
312 block[stride*0 +offset]= (z0 + z3)>>1;
313 block[stride*2 +offset]= (z1 + z2)>>1;
314 block[stride*8 +offset]= (z1 - z2)>>1;
315 block[stride*10+offset]= (z0 - z3)>>1;
316 }
317}
e5017ab8
LA
318#endif
319
0da71265
MN
320#undef xStride
321#undef stride
322
239ea04c 323static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
324 const int stride= 16*2;
325 const int xStride= 16;
326 int a,b,c,d,e;
327
328 a= block[stride*0 + xStride*0];
329 b= block[stride*0 + xStride*1];
330 c= block[stride*1 + xStride*0];
331 d= block[stride*1 + xStride*1];
332
333 e= a-b;
334 a= a+b;
335 b= c-d;
336 c= c+d;
337
239ea04c
LM
338 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
339 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
340 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
341 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
342}
343
e5017ab8 344#if 0
0da71265
MN
345static void chroma_dc_dct_c(DCTELEM *block){
346 const int stride= 16*2;
347 const int xStride= 16;
348 int a,b,c,d,e;
349
350 a= block[stride*0 + xStride*0];
351 b= block[stride*0 + xStride*1];
352 c= block[stride*1 + xStride*0];
353 d= block[stride*1 + xStride*1];
354
355 e= a-b;
356 a= a+b;
357 b= c-d;
358 c= c+d;
359
360 block[stride*0 + xStride*0]= (a+c);
361 block[stride*0 + xStride*1]= (e+b);
362 block[stride*1 + xStride*0]= (a-c);
363 block[stride*1 + xStride*1]= (e-b);
364}
e5017ab8 365#endif
0da71265 366
0da71265
MN
367static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
368 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
369 int src_x_offset, int src_y_offset,
370 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
371 MpegEncContext * const s = &h->s;
372 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 373 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 374 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
375 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
376 uint8_t * src_cb, * src_cr;
377 int extra_width= h->emu_edge_width;
378 int extra_height= h->emu_edge_height;
0da71265
MN
379 int emu=0;
380 const int full_mx= mx>>2;
381 const int full_my= my>>2;
fbd312fd 382 const int pic_width = 16*s->mb_width;
0d43dd8c 383 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 384
0da71265
MN
385 if(mx&7) extra_width -= 3;
386 if(my&7) extra_height -= 3;
115329f1
DB
387
388 if( full_mx < 0-extra_width
389 || full_my < 0-extra_height
390 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 391 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
392 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
393 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
394 emu=1;
395 }
115329f1 396
5d18eaad 397 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 398 if(!square){
5d18eaad 399 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 400 }
115329f1 401
49fb20cb 402 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 403
0d43dd8c 404 if(MB_FIELD){
5d18eaad 405 // chroma offset when predicting from a field of opposite parity
2143b118 406 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
407 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
408 }
409 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
410 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
411
0da71265 412 if(emu){
5d18eaad 413 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
414 src_cb= s->edge_emu_buffer;
415 }
5d18eaad 416 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
417
418 if(emu){
5d18eaad 419 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
420 src_cr= s->edge_emu_buffer;
421 }
5d18eaad 422 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
423}
424
9f2d1b4f 425static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
426 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
427 int x_offset, int y_offset,
428 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
429 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
430 int list0, int list1){
431 MpegEncContext * const s = &h->s;
432 qpel_mc_func *qpix_op= qpix_put;
433 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 434
5d18eaad
LM
435 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
436 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
437 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 438 x_offset += 8*s->mb_x;
0d43dd8c 439 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 440
0da71265 441 if(list0){
1924f3ce 442 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
443 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
444 dest_y, dest_cb, dest_cr, x_offset, y_offset,
445 qpix_op, chroma_op);
446
447 qpix_op= qpix_avg;
448 chroma_op= chroma_avg;
449 }
450
451 if(list1){
1924f3ce 452 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
453 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
454 dest_y, dest_cb, dest_cr, x_offset, y_offset,
455 qpix_op, chroma_op);
456 }
457}
458
9f2d1b4f
LM
459static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
460 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
461 int x_offset, int y_offset,
462 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
463 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
464 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
465 int list0, int list1){
466 MpegEncContext * const s = &h->s;
467
5d18eaad
LM
468 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
469 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
470 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 471 x_offset += 8*s->mb_x;
0d43dd8c 472 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 473
9f2d1b4f
LM
474 if(list0 && list1){
475 /* don't optimize for luma-only case, since B-frames usually
476 * use implicit weights => chroma too. */
477 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
478 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
479 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
480 int refn0 = h->ref_cache[0][ scan8[n] ];
481 int refn1 = h->ref_cache[1][ scan8[n] ];
482
483 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
484 dest_y, dest_cb, dest_cr,
485 x_offset, y_offset, qpix_put, chroma_put);
486 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
487 tmp_y, tmp_cb, tmp_cr,
488 x_offset, y_offset, qpix_put, chroma_put);
489
490 if(h->use_weight == 2){
491 int weight0 = h->implicit_weight[refn0][refn1];
492 int weight1 = 64 - weight0;
5d18eaad
LM
493 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
494 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
495 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 496 }else{
5d18eaad 497 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
3d9137c8
MN
498 h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
499 h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
5d18eaad 500 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8
MN
501 h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
502 h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
5d18eaad 503 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8
MN
504 h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
505 h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
9f2d1b4f
LM
506 }
507 }else{
508 int list = list1 ? 1 : 0;
509 int refn = h->ref_cache[list][ scan8[n] ];
510 Picture *ref= &h->ref_list[list][refn];
511 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
512 dest_y, dest_cb, dest_cr, x_offset, y_offset,
513 qpix_put, chroma_put);
514
5d18eaad 515 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3d9137c8 516 h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
9f2d1b4f 517 if(h->use_weight_chroma){
5d18eaad 518 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8 519 h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
5d18eaad 520 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3d9137c8 521 h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
9f2d1b4f
LM
522 }
523 }
524}
525
526static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
527 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
528 int x_offset, int y_offset,
529 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
530 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 531 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
532 int list0, int list1){
533 if((h->use_weight==2 && list0 && list1
534 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
535 || h->use_weight==1)
536 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
537 x_offset, y_offset, qpix_put, chroma_put,
538 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
539 else
540 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
541 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
542}
543
513fbd8e
LM
544static inline void prefetch_motion(H264Context *h, int list){
545 /* fetch pixels for estimated mv 4 macroblocks ahead
546 * optimized for 64byte cache lines */
547 MpegEncContext * const s = &h->s;
548 const int refn = h->ref_cache[list][scan8[0]];
549 if(refn >= 0){
550 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
551 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
552 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 553 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
554 s->dsp.prefetch(src[0]+off, s->linesize, 4);
555 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
556 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
557 }
558}
559
0da71265
MN
560static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
561 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
562 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
563 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 564 MpegEncContext * const s = &h->s;
64514ee8 565 const int mb_xy= h->mb_xy;
0da71265 566 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 567
0da71265 568 assert(IS_INTER(mb_type));
115329f1 569
513fbd8e
LM
570 prefetch_motion(h, 0);
571
0da71265
MN
572 if(IS_16X16(mb_type)){
573 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
574 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
7231ccf4 575 weight_op, weight_avg,
0da71265
MN
576 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
577 }else if(IS_16X8(mb_type)){
578 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
579 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 580 &weight_op[1], &weight_avg[1],
0da71265
MN
581 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
582 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
583 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 584 &weight_op[1], &weight_avg[1],
0da71265
MN
585 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
586 }else if(IS_8X16(mb_type)){
5d18eaad 587 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 588 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 589 &weight_op[2], &weight_avg[2],
0da71265 590 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 591 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 592 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 593 &weight_op[2], &weight_avg[2],
0da71265
MN
594 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
595 }else{
596 int i;
115329f1 597
0da71265
MN
598 assert(IS_8X8(mb_type));
599
600 for(i=0; i<4; i++){
601 const int sub_mb_type= h->sub_mb_type[i];
602 const int n= 4*i;
603 int x_offset= (i&1)<<2;
604 int y_offset= (i&2)<<1;
605
606 if(IS_SUB_8X8(sub_mb_type)){
607 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
608 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 609 &weight_op[3], &weight_avg[3],
0da71265
MN
610 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
611 }else if(IS_SUB_8X4(sub_mb_type)){
612 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
613 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 614 &weight_op[4], &weight_avg[4],
0da71265
MN
615 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
616 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
617 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 618 &weight_op[4], &weight_avg[4],
0da71265
MN
619 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
620 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 621 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 622 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 623 &weight_op[5], &weight_avg[5],
0da71265 624 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 625 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 626 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 627 &weight_op[5], &weight_avg[5],
0da71265
MN
628 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
629 }else{
630 int j;
631 assert(IS_SUB_4X4(sub_mb_type));
632 for(j=0; j<4; j++){
633 int sub_x_offset= x_offset + 2*(j&1);
634 int sub_y_offset= y_offset + (j&2);
635 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
636 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 637 &weight_op[6], &weight_avg[6],
0da71265
MN
638 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
639 }
640 }
641 }
642 }
513fbd8e
LM
643
644 prefetch_motion(h, 1);
0da71265
MN
645}
646
0da71265 647
0da71265 648static void free_tables(H264Context *h){
7978debd 649 int i;
afebe2f7 650 H264Context *hx;
0da71265 651 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
652 av_freep(&h->chroma_pred_mode_table);
653 av_freep(&h->cbp_table);
9e528114
LA
654 av_freep(&h->mvd_table[0]);
655 av_freep(&h->mvd_table[1]);
5ad984c9 656 av_freep(&h->direct_table);
0da71265
MN
657 av_freep(&h->non_zero_count);
658 av_freep(&h->slice_table_base);
659 h->slice_table= NULL;
c988f975 660 av_freep(&h->list_counts);
e5017ab8 661
0da71265 662 av_freep(&h->mb2b_xy);
d43c1922 663 av_freep(&h->mb2br_xy);
9f2d1b4f 664
6752dd5a 665 for(i = 0; i < MAX_THREADS; i++) {
afebe2f7
666 hx = h->thread_context[i];
667 if(!hx) continue;
668 av_freep(&hx->top_borders[1]);
669 av_freep(&hx->top_borders[0]);
670 av_freep(&hx->s.obmc_scratchpad);
d2d5e067
AS
671 av_freep(&hx->rbsp_buffer[1]);
672 av_freep(&hx->rbsp_buffer[0]);
eda4ea4e
MS
673 hx->rbsp_buffer_size[0] = 0;
674 hx->rbsp_buffer_size[1] = 0;
d2d5e067 675 if (i) av_freep(&h->thread_context[i]);
afebe2f7 676 }
0da71265
MN
677}
678
239ea04c
LM
679static void init_dequant8_coeff_table(H264Context *h){
680 int i,q,x;
4693b031 681 const int transpose = (h->h264dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
682 h->dequant8_coeff[0] = h->dequant8_buffer[0];
683 h->dequant8_coeff[1] = h->dequant8_buffer[1];
684
685 for(i=0; i<2; i++ ){
686 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
687 h->dequant8_coeff[1] = h->dequant8_buffer[0];
688 break;
689 }
690
691 for(q=0; q<52; q++){
d9ec210b
DP
692 int shift = div6[q];
693 int idx = rem6[q];
239ea04c 694 for(x=0; x<64; x++)
548a1c8a
LM
695 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
696 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
697 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
698 }
699 }
700}
701
702static void init_dequant4_coeff_table(H264Context *h){
703 int i,j,q,x;
4693b031 704 const int transpose = (h->h264dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
705 for(i=0; i<6; i++ ){
706 h->dequant4_coeff[i] = h->dequant4_buffer[i];
707 for(j=0; j<i; j++){
708 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
709 h->dequant4_coeff[i] = h->dequant4_buffer[j];
710 break;
711 }
712 }
713 if(j<i)
714 continue;
715
716 for(q=0; q<52; q++){
d9ec210b
DP
717 int shift = div6[q] + 2;
718 int idx = rem6[q];
239ea04c 719 for(x=0; x<16; x++)
ab2e3e2c
LM
720 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
721 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
722 h->pps.scaling_matrix4[i][x]) << shift;
723 }
724 }
725}
726
727static void init_dequant_tables(H264Context *h){
728 int i,x;
729 init_dequant4_coeff_table(h);
730 if(h->pps.transform_8x8_mode)
731 init_dequant8_coeff_table(h);
732 if(h->sps.transform_bypass){
733 for(i=0; i<6; i++)
734 for(x=0; x<16; x++)
735 h->dequant4_coeff[i][0][x] = 1<<6;
736 if(h->pps.transform_8x8_mode)
737 for(i=0; i<2; i++)
738 for(x=0; x<64; x++)
739 h->dequant8_coeff[i][0][x] = 1<<6;
740 }
741}
742
743
903d58f6 744int ff_h264_alloc_tables(H264Context *h){
0da71265 745 MpegEncContext * const s = &h->s;
7bc9090a 746 const int big_mb_num= s->mb_stride * (s->mb_height+1);
145061a1 747 const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
239ea04c 748 int x,y;
0da71265 749
145061a1 750 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail)
e5017ab8 751
c988f975 752 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail)
d31dbec3
RP
753 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
754 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
0da71265 755
d31dbec3 756 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
145061a1
MN
757 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
758 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
36b54927 759 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
c988f975 760 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
e5017ab8 761
b735aeea 762 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 763 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 764
d31dbec3 765 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
d43c1922 766 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
0da71265
MN
767 for(y=0; y<s->mb_height; y++){
768 for(x=0; x<s->mb_width; x++){
7bc9090a 769 const int mb_xy= x + y*s->mb_stride;
0da71265 770 const int b_xy = 4*x + 4*y*h->b_stride;
115329f1 771
0da71265 772 h->mb2b_xy [mb_xy]= b_xy;
e1c88a21 773 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
0da71265
MN
774 }
775 }
9f2d1b4f 776
9c6221ae
GV
777 s->obmc_scratchpad = NULL;
778
56edbd81
LM
779 if(!h->dequant4_coeff[0])
780 init_dequant_tables(h);
781
0da71265
MN
782 return 0;
783fail:
784 free_tables(h);
785 return -1;
786}
787
afebe2f7
788/**
789 * Mimic alloc_tables(), but for every context thread.
790 */
145061a1
MN
791static void clone_tables(H264Context *dst, H264Context *src, int i){
792 MpegEncContext * const s = &src->s;
793 dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
afebe2f7
794 dst->non_zero_count = src->non_zero_count;
795 dst->slice_table = src->slice_table;
796 dst->cbp_table = src->cbp_table;
797 dst->mb2b_xy = src->mb2b_xy;
d43c1922 798 dst->mb2br_xy = src->mb2br_xy;
afebe2f7 799 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
145061a1
MN
800 dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride;
801 dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride;
afebe2f7 802 dst->direct_table = src->direct_table;
fb823b77 803 dst->list_counts = src->list_counts;
afebe2f7 804
afebe2f7
805 dst->s.obmc_scratchpad = NULL;
806 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
807}
808
809/**
810 * Init context
811 * Allocate buffers which are not shared amongst multiple threads.
812 */
813static int context_init(H264Context *h){
d31dbec3
RP
814 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
815 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
afebe2f7 816
145061a1
MN
817 h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
818 h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
819
afebe2f7
820 return 0;
821fail:
822 return -1; // free_tables will clean up for us
823}
824
9855b2e3
MN
825static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
826
98a6fff9 827static av_cold void common_init(H264Context *h){
0da71265 828 MpegEncContext * const s = &h->s;
0da71265
MN
829
830 s->width = s->avctx->width;
831 s->height = s->avctx->height;
832 s->codec_id= s->avctx->codec->id;
115329f1 833
4693b031 834 ff_h264dsp_init(&h->h264dsp);
c92a30bb 835 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 836
239ea04c 837 h->dequant_coeff_pps= -1;
9a41c2c7 838 s->unrestricted_mv=1;
0da71265 839 s->decode=1; //FIXME
56edbd81 840
a5805aa9
MN
841 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
842
56edbd81
LM
843 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
844 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
845}
846
903d58f6 847av_cold int ff_h264_decode_init(AVCodecContext *avctx){
0da71265
MN
848 H264Context *h= avctx->priv_data;
849 MpegEncContext * const s = &h->s;
850
3edcacde 851 MPV_decode_defaults(s);
115329f1 852
0da71265
MN
853 s->avctx = avctx;
854 common_init(h);
855
856 s->out_format = FMT_H264;
857 s->workaround_bugs= avctx->workaround_bugs;
858
859 // set defaults
0da71265 860// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 861 s->quarter_sample = 1;
47cd974a 862 if(!avctx->has_b_frames)
0da71265 863 s->low_delay= 1;
7a9dba3c 864
580a7465 865 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
0da71265 866
e1e94902 867 ff_h264_decode_init_vlc();
115329f1 868
afebe2f7 869 h->thread_context[0] = h;
18c7be65 870 h->outputed_poc = INT_MIN;
e4b8f1fa 871 h->prev_poc_msb= 1<<16;
055a6aa7 872 h->x264_build = -1;
9c095463 873 ff_h264_reset_sei(h);
efd8c1f6
MN
874 if(avctx->codec_id == CODEC_ID_H264){
875 if(avctx->ticks_per_frame == 1){
876 s->avctx->time_base.den *=2;
877 }
19df37a8 878 avctx->ticks_per_frame = 2;
efd8c1f6 879 }
9855b2e3
MN
880
881 if(avctx->extradata_size > 0 && avctx->extradata && *(char *)avctx->extradata == 1){
882 int i, cnt, nalsize;
883 unsigned char *p = avctx->extradata;
884
885 h->is_avc = 1;
886
887 if(avctx->extradata_size < 7) {
888 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
889 return -1;
890 }
891 /* sps and pps in the avcC always have length coded with 2 bytes,
892 so put a fake nal_length_size = 2 while parsing them */
893 h->nal_length_size = 2;
894 // Decode sps from avcC
895 cnt = *(p+5) & 0x1f; // Number of sps
896 p += 6;
897 for (i = 0; i < cnt; i++) {
898 nalsize = AV_RB16(p) + 2;
899 if(decode_nal_units(h, p, nalsize) < 0) {
900 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
901 return -1;
902 }
903 p += nalsize;
904 }
905 // Decode pps from avcC
906 cnt = *(p++); // Number of pps
907 for (i = 0; i < cnt; i++) {
908 nalsize = AV_RB16(p) + 2;
909 if(decode_nal_units(h, p, nalsize) != nalsize) {
910 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
911 return -1;
912 }
913 p += nalsize;
914 }
915 // Now store right nal length size, that will be use to parse all other nals
916 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
917 } else {
918 h->is_avc = 0;
919 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
920 return -1;
921 }
db8cb47d
MN
922 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
923 s->avctx->has_b_frames = h->sps.num_reorder_frames;
924 s->low_delay = 0;
925 }
9855b2e3 926
0da71265
MN
927 return 0;
928}
929
903d58f6 930int ff_h264_frame_start(H264Context *h){
0da71265
MN
931 MpegEncContext * const s = &h->s;
932 int i;
933
af8aa846
MN
934 if(MPV_frame_start(s, s->avctx) < 0)
935 return -1;
0da71265 936 ff_er_frame_start(s);
3a22d7fa
JD
937 /*
938 * MPV_frame_start uses pict_type to derive key_frame.
939 * This is incorrect for H.264; IDR markings must be used.
1412060e 940 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
941 * See decode_nal_units().
942 */
943 s->current_picture_ptr->key_frame= 0;
c173a088 944 s->current_picture_ptr->mmco_reset= 0;
0da71265
MN
945
946 assert(s->linesize && s->uvlinesize);
947
948 for(i=0; i<16; i++){
949 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 950 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
951 }
952 for(i=0; i<4; i++){
953 h->block_offset[16+i]=
954 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
955 h->block_offset[24+16+i]=
956 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
957 }
958
934b0821
LM
959 /* can't be in alloc_tables because linesize isn't known there.
960 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
961 for(i = 0; i < s->avctx->thread_count; i++)
962 if(!h->thread_context[i]->s.obmc_scratchpad)
963 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad 964
2ce1c2e0 965 /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
5820b90d 966 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 967
0da71265 968// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 969
1412060e 970 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
971 // that if we break out due to an error it can be released automatically
972 // in the next MPV_frame_start().
973 // SVQ3 as well as most other codecs have only last/next/current and thus
974 // get released even with set reference, besides SVQ3 and others do not
975 // mark frames as reference later "naturally".
976 if(s->codec_id != CODEC_ID_SVQ3)
977 s->current_picture_ptr->reference= 0;
357282c6
MN
978
979 s->current_picture_ptr->field_poc[0]=
980 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 981 assert(s->current_picture_ptr->long_ref==0);
357282c6 982
af8aa846 983 return 0;
0da71265
MN
984}
985
93cc10fa 986static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e 987 MpegEncContext * const s = &h->s;
0b69d625 988 uint8_t *top_border;
5f7f9719 989 int top_idx = 1;
115329f1 990
53c05b1e
MN
991 src_y -= linesize;
992 src_cb -= uvlinesize;
993 src_cr -= uvlinesize;
994
5f7f9719
MN
995 if(!simple && FRAME_MBAFF){
996 if(s->mb_y&1){
5f7f9719 997 if(!MB_MBAFF){
0b69d625
AS
998 top_border = h->top_borders[0][s->mb_x];
999 AV_COPY128(top_border, src_y + 15*linesize);
49fb20cb 1000 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
1001 AV_COPY64(top_border+16, src_cb+7*uvlinesize);
1002 AV_COPY64(top_border+24, src_cr+7*uvlinesize);
5f7f9719
MN
1003 }
1004 }
c988f975
MN
1005 }else if(MB_MBAFF){
1006 top_idx = 0;
1007 }else
1008 return;
5f7f9719
MN
1009 }
1010
0b69d625 1011 top_border = h->top_borders[top_idx][s->mb_x];
3b66c4c5 1012 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 1013 // and the line above the bottom macroblock
0b69d625 1014 AV_COPY128(top_border, src_y + 16*linesize);
53c05b1e 1015
49fb20cb 1016 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
1017 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
1018 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
53c05b1e
MN
1019 }
1020}
1021
93cc10fa 1022static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e 1023 MpegEncContext * const s = &h->s;
b69378e2
1024 int deblock_left;
1025 int deblock_top;
5f7f9719 1026 int top_idx = 1;
1e4f1c56
AS
1027 uint8_t *top_border_m1;
1028 uint8_t *top_border;
5f7f9719
MN
1029
1030 if(!simple && FRAME_MBAFF){
1031 if(s->mb_y&1){
c988f975
MN
1032 if(!MB_MBAFF)
1033 return;
5f7f9719 1034 }else{
5f7f9719
MN
1035 top_idx = MB_MBAFF ? 0 : 1;
1036 }
5f7f9719 1037 }
b69378e2
1038
1039 if(h->deblocking_filter == 2) {
024bf79f
MN
1040 deblock_left = h->left_type[0];
1041 deblock_top = h->top_type;
b69378e2
1042 } else {
1043 deblock_left = (s->mb_x > 0);
6c805007 1044 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 1045 }
53c05b1e
MN
1046
1047 src_y -= linesize + 1;
1048 src_cb -= uvlinesize + 1;
1049 src_cr -= uvlinesize + 1;
1050
1e4f1c56
AS
1051 top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
1052 top_border = h->top_borders[top_idx][s->mb_x];
1053
0b69d625
AS
1054#define XCHG(a,b,xchg)\
1055if (xchg) AV_SWAP64(b,a);\
1056else AV_COPY64(b,a);
d89dc06a 1057
d89dc06a 1058 if(deblock_top){
c988f975 1059 if(deblock_left){
0b69d625 1060 XCHG(top_border_m1+8, src_y -7, 1);
c988f975 1061 }
0b69d625
AS
1062 XCHG(top_border+0, src_y +1, xchg);
1063 XCHG(top_border+8, src_y +9, 1);
cad4368a 1064 if(s->mb_x+1 < s->mb_width){
0b69d625 1065 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
43efd19a 1066 }
53c05b1e 1067 }
53c05b1e 1068
49fb20cb 1069 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 1070 if(deblock_top){
c988f975 1071 if(deblock_left){
0b69d625
AS
1072 XCHG(top_border_m1+16, src_cb -7, 1);
1073 XCHG(top_border_m1+24, src_cr -7, 1);
c988f975 1074 }
0b69d625
AS
1075 XCHG(top_border+16, src_cb+1, 1);
1076 XCHG(top_border+24, src_cr+1, 1);
53c05b1e 1077 }
53c05b1e
MN
1078 }
1079}
1080
5a6a6cc7 1081static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
1082 MpegEncContext * const s = &h->s;
1083 const int mb_x= s->mb_x;
1084 const int mb_y= s->mb_y;
64514ee8 1085 const int mb_xy= h->mb_xy;
0da71265
MN
1086 const int mb_type= s->current_picture.mb_type[mb_xy];
1087 uint8_t *dest_y, *dest_cb, *dest_cr;
1088 int linesize, uvlinesize /*dct_offset*/;
1089 int i;
6867a90b 1090 int *block_offset = &h->block_offset[0];
41e4055b 1091 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed 1092 /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb 1093 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca 1094 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 1095 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 1096
6120a343
MN
1097 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
1098 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
1099 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 1100
a957c27b
LM
1101 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1102 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
1103
c988f975
MN
1104 h->list_counts[mb_xy]= h->list_count;
1105
bd91fee3 1106 if (!simple && MB_FIELD) {
5d18eaad
LM
1107 linesize = h->mb_linesize = s->linesize * 2;
1108 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 1109 block_offset = &h->block_offset[24];
1412060e 1110 if(mb_y&1){ //FIXME move out of this function?
0da71265 1111 dest_y -= s->linesize*15;
6867a90b
LLL
1112 dest_cb-= s->uvlinesize*7;
1113 dest_cr-= s->uvlinesize*7;
0da71265 1114 }
5d18eaad
LM
1115 if(FRAME_MBAFF) {
1116 int list;
3425501d 1117 for(list=0; list<h->list_count; list++){
5d18eaad
LM
1118 if(!USES_LIST(mb_type, list))
1119 continue;
1120 if(IS_16X16(mb_type)){
1121 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 1122 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
1123 }else{
1124 for(i=0; i<16; i+=4){
5d18eaad
LM
1125 int ref = h->ref_cache[list][scan8[i]];
1126 if(ref >= 0)
1710856c 1127 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
1128 }
1129 }
1130 }
1131 }
0da71265 1132 } else {
5d18eaad
LM
1133 linesize = h->mb_linesize = s->linesize;
1134 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
1135// dct_offset = s->linesize * 16;
1136 }
115329f1 1137
bd91fee3 1138 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
1139 for (i=0; i<16; i++) {
1140 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 1141 }
c1708e8d
MN
1142 for (i=0; i<8; i++) {
1143 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
1144 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 1145 }
e7e09b49
LLL
1146 } else {
1147 if(IS_INTRA(mb_type)){
5f7f9719 1148 if(h->deblocking_filter)
93cc10fa 1149 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 1150
49fb20cb 1151 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
1152 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
1153 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 1154 }
0da71265 1155
e7e09b49 1156 if(IS_INTRA4x4(mb_type)){
bd91fee3 1157 if(simple || !s->encoding){
43efd19a 1158 if(IS_8x8DCT(mb_type)){
1eb96035
MN
1159 if(transform_bypass){
1160 idct_dc_add =
1161 idct_add = s->dsp.add_pixels8;
dae006d7 1162 }else{
4693b031
MR
1163 idct_dc_add = h->h264dsp.h264_idct8_dc_add;
1164 idct_add = h->h264dsp.h264_idct8_add;
1eb96035 1165 }
43efd19a
LM
1166 for(i=0; i<16; i+=4){
1167 uint8_t * const ptr= dest_y + block_offset[i];
1168 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
1169 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1170 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
1171 }else{
ac0623b2
MN
1172 const int nnz = h->non_zero_count_cache[ scan8[i] ];
1173 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1174 (h->topright_samples_available<<i)&0x4000, linesize);
1175 if(nnz){
1176 if(nnz == 1 && h->mb[i*16])
1177 idct_dc_add(ptr, h->mb + i*16, linesize);
1178 else
1179 idct_add (ptr, h->mb + i*16, linesize);
1180 }
41e4055b 1181 }
43efd19a 1182 }
1eb96035
MN
1183 }else{
1184 if(transform_bypass){
1185 idct_dc_add =
1186 idct_add = s->dsp.add_pixels4;
1187 }else{
4693b031
MR
1188 idct_dc_add = h->h264dsp.h264_idct_dc_add;
1189 idct_add = h->h264dsp.h264_idct_add;
1eb96035 1190 }
aebb5d6d
MN
1191 for(i=0; i<16; i++){
1192 uint8_t * const ptr= dest_y + block_offset[i];
1193 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 1194
aebb5d6d
MN
1195 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1196 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
1197 }else{
1198 uint8_t *topright;
1199 int nnz, tr;
1200 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
1201 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1202 assert(mb_y || linesize <= block_offset[i]);
1203 if(!topright_avail){
1204 tr= ptr[3 - linesize]*0x01010101;
1205 topright= (uint8_t*) &tr;
1206 }else
1207 topright= ptr + 4 - linesize;
ac0623b2 1208 }else
aebb5d6d
MN
1209 topright= NULL;
1210
1211 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
1212 nnz = h->non_zero_count_cache[ scan8[i] ];
1213 if(nnz){
1214 if(is_h264){
1215 if(nnz == 1 && h->mb[i*16])
1216 idct_dc_add(ptr, h->mb + i*16, linesize);
1217 else
1218 idct_add (ptr, h->mb + i*16, linesize);
1219 }else
881b5b80 1220 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
aebb5d6d 1221 }
ac0623b2 1222 }
41e4055b 1223 }
8b82a956 1224 }
0da71265 1225 }
e7e09b49 1226 }else{
c92a30bb 1227 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 1228 if(is_h264){
36940eca 1229 if(!transform_bypass)
93f0c0a4 1230 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 1231 }else
881b5b80 1232 ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 1233 }
5f7f9719 1234 if(h->deblocking_filter)
93cc10fa 1235 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 1236 }else if(is_h264){
e7e09b49 1237 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
1238 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
1239 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
4693b031 1240 h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
0da71265 1241 }
e7e09b49
LLL
1242
1243
1244 if(!IS_INTRA4x4(mb_type)){
bd91fee3 1245 if(is_h264){
ef9d1d15 1246 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
1247 if(transform_bypass){
1248 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
1249 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
1250 }else{
1251 for(i=0; i<16; i++){
1252 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 1253 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1254 }
2fd1f0e0
MN
1255 }
1256 }else{
4693b031 1257 h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 1258 }
49c084a7 1259 }else if(h->cbp&15){
2fd1f0e0 1260 if(transform_bypass){
0a8ca22f 1261 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 1262 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 1263 for(i=0; i<16; i+=di){
62bc966f 1264 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 1265 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1266 }
ef9d1d15 1267 }
2fd1f0e0
MN
1268 }else{
1269 if(IS_8x8DCT(mb_type)){
4693b031 1270 h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2fd1f0e0 1271 }else{
4693b031 1272 h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2fd1f0e0
MN
1273 }
1274 }
4704097a 1275 }
e7e09b49
LLL
1276 }else{
1277 for(i=0; i<16; i++){
1278 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 1279 uint8_t * const ptr= dest_y + block_offset[i];
881b5b80 1280 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
e7e09b49 1281 }
4704097a 1282 }
0da71265
MN
1283 }
1284 }
0da71265 1285
49fb20cb 1286 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
1287 uint8_t *dest[2] = {dest_cb, dest_cr};
1288 if(transform_bypass){
96465b90
MN
1289 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
1290 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
1291 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
1292 }else{
c25ac15a 1293 idct_add = s->dsp.add_pixels4;
96465b90
MN
1294 for(i=16; i<16+8; i++){
1295 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1296 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1297 }
1298 }
ef9d1d15 1299 }else{
4691a77d
1300 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1301 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 1302 if(is_h264){
4693b031
MR
1303 idct_add = h->h264dsp.h264_idct_add;
1304 idct_dc_add = h->h264dsp.h264_idct_dc_add;
ac0623b2
MN
1305 for(i=16; i<16+8; i++){
1306 if(h->non_zero_count_cache[ scan8[i] ])
1307 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1308 else if(h->mb[i*16])
1309 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1310 }
aebb5d6d
MN
1311 }else{
1312 for(i=16; i<16+8; i++){
1313 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
1314 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
881b5b80 1315 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2);
aebb5d6d 1316 }
e7e09b49 1317 }
4704097a 1318 }
0da71265
MN
1319 }
1320 }
1321 }
c212fb0c
MN
1322 if(h->cbp || IS_INTRA(mb_type))
1323 s->dsp.clear_blocks(h->mb);
0da71265
MN
1324}
1325
0da71265 1326/**
bd91fee3
AS
1327 * Process a macroblock; this case avoids checks for expensive uncommon cases.
1328 */
1329static void hl_decode_mb_simple(H264Context *h){
1330 hl_decode_mb_internal(h, 1);
1331}
1332
1333/**
1334 * Process a macroblock; this handles edge cases, such as interlacing.
1335 */
1336static void av_noinline hl_decode_mb_complex(H264Context *h){
1337 hl_decode_mb_internal(h, 0);
1338}
1339
903d58f6 1340void ff_h264_hl_decode_mb(H264Context *h){
bd91fee3 1341 MpegEncContext * const s = &h->s;
64514ee8 1342 const int mb_xy= h->mb_xy;
bd91fee3 1343 const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb 1344 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 1345
bd91fee3
AS
1346 if (is_complex)
1347 hl_decode_mb_complex(h);
1348 else hl_decode_mb_simple(h);
1349}
1350
0da71265
MN
1351static int pred_weight_table(H264Context *h){
1352 MpegEncContext * const s = &h->s;
1353 int list, i;
9f2d1b4f 1354 int luma_def, chroma_def;
115329f1 1355
9f2d1b4f
LM
1356 h->use_weight= 0;
1357 h->use_weight_chroma= 0;
0da71265
MN
1358 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
1359 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
1360 luma_def = 1<<h->luma_log2_weight_denom;
1361 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
1362
1363 for(list=0; list<2; list++){
cb99c652
GB
1364 h->luma_weight_flag[list] = 0;
1365 h->chroma_weight_flag[list] = 0;
0da71265
MN
1366 for(i=0; i<h->ref_count[list]; i++){
1367 int luma_weight_flag, chroma_weight_flag;
115329f1 1368
0da71265
MN
1369 luma_weight_flag= get_bits1(&s->gb);
1370 if(luma_weight_flag){
3d9137c8
MN
1371 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
1372 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
1373 if( h->luma_weight[i][list][0] != luma_def
1374 || h->luma_weight[i][list][1] != 0) {
9f2d1b4f 1375 h->use_weight= 1;
cb99c652
GB
1376 h->luma_weight_flag[list]= 1;
1377 }
9f2d1b4f 1378 }else{
3d9137c8
MN
1379 h->luma_weight[i][list][0]= luma_def;
1380 h->luma_weight[i][list][1]= 0;
0da71265
MN
1381 }
1382
0af6967e 1383 if(CHROMA){
fef744d4
MN
1384 chroma_weight_flag= get_bits1(&s->gb);
1385 if(chroma_weight_flag){
1386 int j;
1387 for(j=0; j<2; j++){
3d9137c8
MN
1388 h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
1389 h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
1390 if( h->chroma_weight[i][list][j][0] != chroma_def
1391 || h->chroma_weight[i][list][j][1] != 0) {
fef744d4 1392 h->use_weight_chroma= 1;
cb99c652
GB
1393 h->chroma_weight_flag[list]= 1;
1394 }
fef744d4
MN
1395 }
1396 }else{
1397 int j;
1398 for(j=0; j<2; j++){
3d9137c8
MN
1399 h->chroma_weight[i][list][j][0]= chroma_def;
1400 h->chroma_weight[i][list][j][1]= 0;
fef744d4 1401 }
0da71265
MN
1402 }
1403 }
1404 }
9f5c1037 1405 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 1406 }
9f2d1b4f 1407 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
1408 return 0;
1409}
1410
9f2d1b4f
LM
1411static void implicit_weight_table(H264Context *h){
1412 MpegEncContext * const s = &h->s;
cb99c652 1413 int ref0, ref1, i;
9f2d1b4f
LM
1414 int cur_poc = s->current_picture_ptr->poc;
1415
ce09f927
GB
1416 for (i = 0; i < 2; i++) {
1417 h->luma_weight_flag[i] = 0;
1418 h->chroma_weight_flag[i] = 0;
1419 }
1420
9f2d1b4f
LM
1421 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
1422 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
1423 h->use_weight= 0;
1424 h->use_weight_chroma= 0;
1425 return;
1426 }
1427
1428 h->use_weight= 2;
1429 h->use_weight_chroma= 2;
1430 h->luma_log2_weight_denom= 5;
1431 h->chroma_log2_weight_denom= 5;
1432
9f2d1b4f
LM
1433 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
1434 int poc0 = h->ref_list[0][ref0].poc;
1435 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 1436 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 1437 int td = av_clip(poc1 - poc0, -128, 127);
72f86ec0 1438 h->implicit_weight[ref0][ref1] = 32;
9f2d1b4f 1439 if(td){
f66e4f5f 1440 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 1441 int tx = (16384 + (FFABS(td) >> 1)) / td;
72f86ec0
MN
1442 int dist_scale_factor = (tb*tx + 32) >> 8;
1443 if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
9f2d1b4f 1444 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
72f86ec0 1445 }
9f2d1b4f
LM
1446 }
1447 }
1448}
1449
8fd57a66 1450/**
5175b937 1451 * instantaneous decoder refresh.
0da71265
MN
1452 */
1453static void idr(H264Context *h){
ea6f00c4 1454 ff_h264_remove_all_refs(h);
a149c1a5 1455 h->prev_frame_num= 0;
80f8e035
MN
1456 h->prev_frame_num_offset= 0;
1457 h->prev_poc_msb=
1458 h->prev_poc_lsb= 0;
0da71265
MN
1459}
1460
7c33ad19
LM
1461/* forget old pics after a seek */
1462static void flush_dpb(AVCodecContext *avctx){
1463 H264Context *h= avctx->priv_data;
1464 int i;
64b9d48f 1465 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
1466 if(h->delayed_pic[i])
1467 h->delayed_pic[i]->reference= 0;
7c33ad19 1468 h->delayed_pic[i]= NULL;
285b570f 1469 }
df8a7dff 1470 h->outputed_poc= INT_MIN;
b19d493f 1471 h->prev_interlaced_frame = 1;
7c33ad19 1472 idr(h);
ca159196
MR
1473 if(h->s.current_picture_ptr)
1474 h->s.current_picture_ptr->reference= 0;
12d96de3 1475 h->s.first_field= 0;
9c095463 1476 ff_h264_reset_sei(h);
e240f898 1477 ff_mpeg_flush(avctx);
7c33ad19
LM
1478}
1479
0da71265
MN
1480static int init_poc(H264Context *h){
1481 MpegEncContext * const s = &h->s;
1482 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
1483 int field_poc[2];
357282c6 1484 Picture *cur = s->current_picture_ptr;
0da71265 1485
b78a6baa 1486 h->frame_num_offset= h->prev_frame_num_offset;
5710b371 1487 if(h->frame_num < h->prev_frame_num)
b78a6baa 1488 h->frame_num_offset += max_frame_num;
0da71265
MN
1489
1490 if(h->sps.poc_type==0){
1491 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
1492
1493 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
1494 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
1495 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
1496 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
1497 else
1498 h->poc_msb = h->prev_poc_msb;
1499//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
115329f1 1500 field_poc[0] =
0da71265 1501 field_poc[1] = h->poc_msb + h->poc_lsb;
115329f1 1502 if(s->picture_structure == PICT_FRAME)
0da71265
MN
1503 field_poc[1] += h->delta_poc_bottom;
1504 }else if(h->sps.poc_type==1){
1505 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
1506 int i;
1507
1508 if(h->sps.poc_cycle_length != 0)
1509 abs_frame_num = h->frame_num_offset + h->frame_num;
1510 else
1511 abs_frame_num = 0;
1512
1513 if(h->nal_ref_idc==0 && abs_frame_num > 0)
1514 abs_frame_num--;
115329f1 1515
0da71265
MN
1516 expected_delta_per_poc_cycle = 0;
1517 for(i=0; i < h->sps.poc_cycle_length; i++)
1518 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
1519
1520 if(abs_frame_num > 0){
1521 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
1522 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
1523
1524 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
1525 for(i = 0; i <= frame_num_in_poc_cycle; i++)
1526 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
1527 } else
1528 expectedpoc = 0;
1529
115329f1 1530 if(h->nal_ref_idc == 0)
0da71265 1531 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
115329f1 1532
0da71265
MN
1533 field_poc[0] = expectedpoc + h->delta_poc[0];
1534 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
1535
1536 if(s->picture_structure == PICT_FRAME)
1537 field_poc[1] += h->delta_poc[1];
1538 }else{
b78a6baa 1539 int poc= 2*(h->frame_num_offset + h->frame_num);
5710b371 1540
b78a6baa
MN
1541 if(!h->nal_ref_idc)
1542 poc--;
5710b371 1543
0da71265
MN
1544 field_poc[0]= poc;
1545 field_poc[1]= poc;
1546 }
115329f1 1547
357282c6 1548 if(s->picture_structure != PICT_BOTTOM_FIELD)
0da71265 1549 s->current_picture_ptr->field_poc[0]= field_poc[0];
357282c6 1550 if(s->picture_structure != PICT_TOP_FIELD)
0da71265 1551 s->current_picture_ptr->field_poc[1]= field_poc[1];
357282c6 1552 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
0da71265
MN
1553
1554 return 0;
1555}
1556
b41c1db3
1557
1558/**
1559 * initialize scan tables
1560 */
1561static void init_scan_tables(H264Context *h){
b41c1db3 1562 int i;
4693b031 1563 if(h->h264dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
b41c1db3
1564 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
1565 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
1566 }else{
1567 for(i=0; i<16; i++){
1568#define T(x) (x>>2) | ((x<<2) & 0xF)
1569 h->zigzag_scan[i] = T(zigzag_scan[i]);
1570 h-> field_scan[i] = T( field_scan[i]);
1571#undef T
1572 }
1573 }
4693b031 1574 if(h->h264dsp.h264_idct8_add == ff_h264_idct8_add_c){
45beb850 1575 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
b41c1db3
1576 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
1577 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
1578 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
1579 }else{
1580 for(i=0; i<64; i++){
1581#define T(x) (x>>3) | ((x&7)<<3)
45beb850 1582 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
b41c1db3
1583 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
1584 h->field_scan8x8[i] = T(field_scan8x8[i]);
1585 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
1586#undef T
1587 }
1588 }
1589 if(h->sps.transform_bypass){ //FIXME same ugly
1590 h->zigzag_scan_q0 = zigzag_scan;
45beb850 1591 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
b41c1db3
1592 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
1593 h->field_scan_q0 = field_scan;
1594 h->field_scan8x8_q0 = field_scan8x8;
1595 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
1596 }else{
1597 h->zigzag_scan_q0 = h->zigzag_scan;
1598 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
1599 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
1600 h->field_scan_q0 = h->field_scan;
1601 h->field_scan8x8_q0 = h->field_scan8x8;
1602 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
1603 }
1604}
afebe2f7 1605
256299d3
MN
1606static void field_end(H264Context *h){
1607 MpegEncContext * const s = &h->s;
1608 AVCodecContext * const avctx= s->avctx;
1609 s->mb_y= 0;
1610
1611 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
1612 s->current_picture_ptr->pict_type= s->pict_type;
1613
1614 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1615 ff_vdpau_h264_set_reference_frames(s);
1616
1617 if(!s->dropable) {
ea6f00c4 1618 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
256299d3
MN
1619 h->prev_poc_msb= h->poc_msb;
1620 h->prev_poc_lsb= h->poc_lsb;
1621 }
1622 h->prev_frame_num_offset= h->frame_num_offset;
1623 h->prev_frame_num= h->frame_num;
1624
1625 if (avctx->hwaccel) {
1626 if (avctx->hwaccel->end_frame(avctx) < 0)
1627 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
1628 }
1629
1630 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1631 ff_vdpau_h264_picture_complete(s);
1632
1633 /*
1634 * FIXME: Error handling code does not seem to support interlaced
1635 * when slices span multiple rows
1636 * The ff_er_add_slice calls don't work right for bottom
1637 * fields; they cause massive erroneous error concealing
1638 * Error marking covers both fields (top and bottom).
1639 * This causes a mismatched s->error_count
1640 * and a bad error table. Further, the error count goes to
1641 * INT_MAX when called for bottom field, because mb_y is
1642 * past end by one (callers fault) and resync_mb_y != 0
1643 * causes problems for the first MB line, too.
1644 */
1645 if (!FIELD_PICTURE)
1646 ff_er_frame_end(s);
1647
1648 MPV_frame_end(s);
d225a1e2
MN
1649
1650 h->current_slice=0;
256299d3
MN
1651}
1652
afebe2f7
1653/**
1654 * Replicates H264 "master" context to thread contexts.
1655 */
1656static void clone_slice(H264Context *dst, H264Context *src)
1657{
1658 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
1659 dst->s.current_picture_ptr = src->s.current_picture_ptr;
1660 dst->s.current_picture = src->s.current_picture;
1661 dst->s.linesize = src->s.linesize;
1662 dst->s.uvlinesize = src->s.uvlinesize;
12d96de3 1663 dst->s.first_field = src->s.first_field;
afebe2f7
1664
1665 dst->prev_poc_msb = src->prev_poc_msb;
1666 dst->prev_poc_lsb = src->prev_poc_lsb;
1667 dst->prev_frame_num_offset = src->prev_frame_num_offset;
1668 dst->prev_frame_num = src->prev_frame_num;
1669 dst->short_ref_count = src->short_ref_count;
1670
1671 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
1672 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
1673 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
1674 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
50c21814
1675
1676 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
1677 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
afebe2f7
1678}
1679
0da71265
MN
1680/**
1681 * decodes a slice header.
9c852bcf 1682 * This will also call MPV_common_init() and frame_start() as needed.
afebe2f7
1683 *
1684 * @param h h264context
1685 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
1686 *
d9526386 1687 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
0da71265 1688 */
afebe2f7 1689static int decode_slice_header(H264Context *h, H264Context *h0){
0da71265 1690 MpegEncContext * const s = &h->s;
12d96de3 1691 MpegEncContext * const s0 = &h0->s;
88e7a4d1 1692 unsigned int first_mb_in_slice;
ac658be5 1693 unsigned int pps_id;
0da71265 1694 int num_ref_idx_active_override_flag;
41f5c62f 1695 unsigned int slice_type, tmp, i, j;
0bf79634 1696 int default_ref_list_done = 0;
12d96de3 1697 int last_pic_structure;
0da71265 1698
2f944356 1699 s->dropable= h->nal_ref_idc == 0;
0da71265 1700
cf653d08
JD
1701 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
1702 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
1703 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
1704 }else{
1705 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
1706 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
1707 }
1708
0da71265
MN
1709 first_mb_in_slice= get_ue_golomb(&s->gb);
1710
d225a1e2
MN
1711 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
1712 if(h0->current_slice && FIELD_PICTURE){
1713 field_end(h);
1714 }
1715
afebe2f7 1716 h0->current_slice = 0;
12d96de3 1717 if (!s0->first_field)
f6e3c460 1718 s->current_picture_ptr= NULL;
66a4b2c1
MN
1719 }
1720
9963b332 1721 slice_type= get_ue_golomb_31(&s->gb);
0bf79634 1722 if(slice_type > 9){
9b879566 1723 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
5175b937 1724 return -1;
0da71265 1725 }
0bf79634
LLL
1726 if(slice_type > 4){
1727 slice_type -= 5;
0da71265
MN
1728 h->slice_type_fixed=1;
1729 }else
1730 h->slice_type_fixed=0;
115329f1 1731
ee2a957f 1732 slice_type= golomb_to_pict_type[ slice_type ];
9701840b 1733 if (slice_type == FF_I_TYPE
afebe2f7 1734 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
0bf79634
LLL
1735 default_ref_list_done = 1;
1736 }
1737 h->slice_type= slice_type;
e3e6f18f 1738 h->slice_type_nos= slice_type & 3;
0bf79634 1739
1412060e 1740 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
115329f1 1741
0da71265 1742 pps_id= get_ue_golomb(&s->gb);
ac658be5 1743 if(pps_id>=MAX_PPS_COUNT){
9b879566 1744 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
0da71265
MN
1745 return -1;
1746 }
afebe2f7 1747 if(!h0->pps_buffers[pps_id]) {
a0f80050 1748 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
8b92b792
MN
1749 return -1;
1750 }
afebe2f7 1751 h->pps= *h0->pps_buffers[pps_id];
8b92b792 1752
afebe2f7 1753 if(!h0->sps_buffers[h->pps.sps_id]) {
a0f80050 1754 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
8b92b792
MN
1755 return -1;
1756 }
afebe2f7 1757 h->sps = *h0->sps_buffers[h->pps.sps_id];
239ea04c 1758
50c21814 1759 if(h == h0 && h->dequant_coeff_pps != pps_id){
50eaa857 1760 h->dequant_coeff_pps = pps_id;
239ea04c
LM
1761 init_dequant_tables(h);
1762 }
115329f1 1763
0da71265 1764 s->mb_width= h->sps.mb_width;
6867a90b 1765 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
115329f1 1766
bf4665ee 1767 h->b_stride= s->mb_width*4;
0da71265 1768
faf3dfb9 1769 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
0da71265 1770 if(h->sps.frame_mbs_only_flag)
faf3dfb9 1771 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
0da71265 1772 else
faf3dfb9 1773 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
115329f1
DB
1774
1775 if (s->context_initialized
5388f0b4
JK
1776 && ( s->width != s->avctx->width || s->height != s->avctx->height
1777 || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
afebe2f7
1778 if(h != h0)
1779 return -1; // width / height changed during parallelized decoding
0da71265 1780 free_tables(h);
ff7f75e1 1781 flush_dpb(s->avctx);
0da71265
MN
1782 MPV_common_end(s);
1783 }
1784 if (!s->context_initialized) {
afebe2f7
1785 if(h != h0)
1786 return -1; // we cant (re-)initialize context during parallel decoding
f3bdc3da
RD
1787
1788 avcodec_set_dimensions(s->avctx, s->width, s->height);
1789 s->avctx->sample_aspect_ratio= h->sps.sar;
1790 if(!s->avctx->sample_aspect_ratio.den)
1791 s->avctx->sample_aspect_ratio.den = 1;
1792
c4dffe7e
DC
1793 if(h->sps.video_signal_type_present_flag){
1794 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
1795 if(h->sps.colour_description_present_flag){
1796 s->avctx->color_primaries = h->sps.color_primaries;
1797 s->avctx->color_trc = h->sps.color_trc;
1798 s->avctx->colorspace = h->sps.colorspace;
1799 }
1800 }
1801
f3bdc3da 1802 if(h->sps.timing_info_present_flag){
3102d180 1803 int64_t den= h->sps.time_scale;
055a6aa7 1804 if(h->x264_build < 44U)
3102d180 1805 den *= 2;
f3bdc3da 1806 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3102d180 1807 h->sps.num_units_in_tick, den, 1<<30);
f3bdc3da
RD
1808 }
1809 s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts);
1810 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
1811
0da71265
MN
1812 if (MPV_common_init(s) < 0)
1813 return -1;
12d96de3 1814 s->first_field = 0;
b19d493f 1815 h->prev_interlaced_frame = 1;
115329f1 1816
b41c1db3 1817 init_scan_tables(h);
903d58f6 1818 ff_h264_alloc_tables(h);
0da71265 1819
afebe2f7
1820 for(i = 1; i < s->avctx->thread_count; i++) {
1821 H264Context *c;
1822 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
79db7ac6 1823 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
afebe2f7
1824 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
1825 c->sps = h->sps;
1826 c->pps = h->pps;
1827 init_scan_tables(c);
145061a1 1828 clone_tables(c, h, i);
afebe2f7
1829 }
1830
1831 for(i = 0; i < s->avctx->thread_count; i++)
1832 if(context_init(h->thread_context[i]) < 0)
1833 return -1;
0da71265
MN
1834 }
1835
0da71265
MN
1836 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
1837
5d18eaad 1838 h->mb_mbaff = 0;
6ba71fc4 1839 h->mb_aff_frame = 0;
12d96de3 1840 last_pic_structure = s0->picture_structure;
0da71265
MN
1841 if(h->sps.frame_mbs_only_flag){
1842 s->picture_structure= PICT_FRAME;
1843 }else{
6ba71fc4 1844 if(get_bits1(&s->gb)) { //field_pic_flag
0da71265 1845 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
6ba71fc4 1846 } else {
0da71265 1847 s->picture_structure= PICT_FRAME;
6ba71fc4 1848 h->mb_aff_frame = h->sps.mb_aff;
6867a90b 1849 }
0da71265 1850 }
44e9dcf1 1851 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
2ddcf84b
JD
1852
1853 if(h0->current_slice == 0){
26b86e47
MN
1854 while(h->frame_num != h->prev_frame_num &&
1855 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
1856 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
903d58f6 1857 if (ff_h264_frame_start(h) < 0)
66e6038c 1858 return -1;
26b86e47
MN
1859 h->prev_frame_num++;
1860 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
1861 s->current_picture_ptr->frame_num= h->prev_frame_num;
ea6f00c4 1862 ff_h264_execute_ref_pic_marking(h, NULL, 0);
26b86e47
MN
1863 }
1864
12d96de3
JD
1865 /* See if we have a decoded first field looking for a pair... */
1866 if (s0->first_field) {
1867 assert(s0->current_picture_ptr);
1868 assert(s0->current_picture_ptr->data[0]);
1869 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
1870
1871 /* figure out if we have a complementary field pair */
1872 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
1873 /*
1874 * Previous field is unmatched. Don't display it, but let it
1875 * remain for reference if marked as such.
1876 */
1877 s0->current_picture_ptr = NULL;
1878 s0->first_field = FIELD_PICTURE;
1879
1880 } else {
1881 if (h->nal_ref_idc &&
1882 s0->current_picture_ptr->reference &&
1883 s0->current_picture_ptr->frame_num != h->frame_num) {
1884 /*
1885 * This and previous field were reference, but had
1886 * different frame_nums. Consider this field first in
1887 * pair. Throw away previous field except for reference
1888 * purposes.
1889 */
1890 s0->first_field = 1;
1891 s0->current_picture_ptr = NULL;
1892
1893 } else {
1894 /* Second field in complementary pair */
1895 s0->first_field = 0;
1896 }
1897 }
1898
1899 } else {
1900 /* Frame or first field in a potentially complementary pair */
1901 assert(!s0->current_picture_ptr);
1902 s0->first_field = FIELD_PICTURE;
1903 }
1904
903d58f6 1905 if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) {
12d96de3 1906 s0->first_field = 0;
2ddcf84b 1907 return -1;
12d96de3 1908 }
2ddcf84b
JD
1909 }
1910 if(h != h0)
1911 clone_slice(h, h0);
1912
1913 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
1914
88e7a4d1 1915 assert(s->mb_num == s->mb_width * s->mb_height);
f3e53d9f 1916 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
88e7a4d1
MN
1917 first_mb_in_slice >= s->mb_num){
1918 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
6b53b87e
MN
1919 return -1;
1920 }
88e7a4d1 1921 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
f3e53d9f
JD
1922 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
1923 if (s->picture_structure == PICT_BOTTOM_FIELD)
1924 s->resync_mb_y = s->mb_y = s->mb_y + 1;
88e7a4d1 1925 assert(s->mb_y < s->mb_height);
115329f1 1926
0da71265
MN
1927 if(s->picture_structure==PICT_FRAME){
1928 h->curr_pic_num= h->frame_num;
1929 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
1930 }else{
f57e2af6 1931 h->curr_pic_num= 2*h->frame_num + 1;
0da71265
MN
1932 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
1933 }
115329f1 1934
0da71265 1935 if(h->nal_unit_type == NAL_IDR_SLICE){
1df1df0b 1936 get_ue_golomb(&s->gb); /* idr_pic_id */
0da71265 1937 }
115329f1 1938
0da71265
MN
1939 if(h->sps.poc_type==0){
1940 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
115329f1 1941
0da71265
MN
1942 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
1943 h->delta_poc_bottom= get_se_golomb(&s->gb);
1944 }
1945 }
115329f1 1946
0da71265
MN
1947 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
1948 h->delta_poc[0]= get_se_golomb(&s->gb);
115329f1 1949
0da71265
MN
1950 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
1951 h->delta_poc[1]= get_se_golomb(&s->gb);
1952 }
115329f1 1953
0da71265 1954 init_poc(h);
115329f1 1955
0da71265
MN
1956 if(h->pps.redundant_pic_cnt_present){
1957 h->redundant_pic_count= get_ue_golomb(&s->gb);
1958 }
1959
1412060e 1960 //set defaults, might be overridden a few lines later
0da71265
MN
1961 h->ref_count[0]= h->pps.ref_count[0];
1962 h->ref_count[1]= h->pps.ref_count[1];
1963
e3e6f18f 1964 if(h->slice_type_nos != FF_I_TYPE){
9f5c1037 1965 if(h->slice_type_nos == FF_B_TYPE){
0da71265
MN
1966 h->direct_spatial_mv_pred= get_bits1(&s->gb);
1967 }
1968 num_ref_idx_active_override_flag= get_bits1(&s->gb);
115329f1 1969
0da71265
MN
1970 if(num_ref_idx_active_override_flag){
1971 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
9f5c1037 1972 if(h->slice_type_nos==FF_B_TYPE)
0da71265
MN
1973 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
1974
187696fa 1975 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
9b879566 1976 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
88e7a4d1 1977 h->ref_count[0]= h->ref_count[1]= 1;
0da71265
MN
1978 return -1;
1979 }
1980 }
9f5c1037 1981 if(h->slice_type_nos == FF_B_TYPE)
187696fa
MN
1982 h->list_count= 2;
1983 else
1984 h->list_count= 1;
1985 }else
1986 h->list_count= 0;
0da71265 1987
0bf79634 1988 if(!default_ref_list_done){
ea6f00c4 1989 ff_h264_fill_default_ref_list(h);
0da71265
MN
1990 }
1991
ea6f00c4 1992 if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0)
806bb93f 1993 return -1;
0da71265 1994
07dff5c7
MN
1995 if(h->slice_type_nos!=FF_I_TYPE){
1996 s->last_picture_ptr= &h->ref_list[0][0];
8d2fc163 1997 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
07dff5c7
MN
1998 }
1999 if(h->slice_type_nos==FF_B_TYPE){
2000 s->next_picture_ptr= &h->ref_list[1][0];
8d2fc163 2001 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
07dff5c7
MN
2002 }
2003
932f396f 2004 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
9f5c1037 2005 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
0da71265 2006 pred_weight_table(h);
1a29c6a0 2007 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
9f2d1b4f 2008 implicit_weight_table(h);
1a29c6a0 2009 }else {
9f2d1b4f 2010 h->use_weight = 0;
cb99c652
GB
2011 for (i = 0; i < 2; i++) {
2012 h->luma_weight_flag[i] = 0;
2013 h->chroma_weight_flag[i] = 0;
2014 }
2015 }
115329f1 2016
2ddcf84b 2017 if(h->nal_ref_idc)
ea6f00c4 2018 ff_h264_decode_ref_pic_marking(h0, &s->gb);
0da71265 2019
5d18eaad 2020 if(FRAME_MBAFF)
ea6f00c4 2021 ff_h264_fill_mbaff_ref_list(h);
5d18eaad 2022
8f56e219 2023 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
943f69a6
MN
2024 ff_h264_direct_dist_scale_factor(h);
2025 ff_h264_direct_ref_list_init(h);
8f56e219 2026
e3e6f18f 2027 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
9963b332 2028 tmp = get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2029 if(tmp > 2){
2030 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
2031 return -1;
2032 }
2033 h->cabac_init_idc= tmp;
2034 }
e5017ab8
LA
2035
2036 h->last_qscale_diff = 0;
88e7a4d1
MN
2037 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
2038 if(tmp>51){
2039 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3ebc7e04
MN
2040 return -1;
2041 }
88e7a4d1 2042 s->qscale= tmp;
4691a77d
2043 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2044 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
0da71265 2045 //FIXME qscale / qp ... stuff
9701840b 2046 if(h->slice_type == FF_SP_TYPE){
1df1df0b 2047 get_bits1(&s->gb); /* sp_for_switch_flag */
0da71265 2048 }
9701840b 2049 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
1df1df0b 2050 get_se_golomb(&s->gb); /* slice_qs_delta */
0da71265
MN
2051 }
2052
53c05b1e 2053 h->deblocking_filter = 1;
0c32e19d
MN
2054 h->slice_alpha_c0_offset = 52;
2055 h->slice_beta_offset = 52;
0da71265 2056 if( h->pps.deblocking_filter_parameters_present ) {
9963b332 2057 tmp= get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2058 if(tmp > 2){
2059 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
2060 return -1;
2061 }
2062 h->deblocking_filter= tmp;
115329f1 2063 if(h->deblocking_filter < 2)
53c05b1e
MN
2064 h->deblocking_filter^= 1; // 1<->0
2065
2066 if( h->deblocking_filter ) {
0c32e19d
MN
2067 h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
2068 h->slice_beta_offset += get_se_golomb(&s->gb) << 1;
2069 if( h->slice_alpha_c0_offset > 104U
2070 || h->slice_beta_offset > 104U){
2071 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
2072 return -1;
2073 }
0da71265 2074 }
980a82b7 2075 }
afebe2f7 2076
61858a76 2077 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4b30289e 2078 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
9f5c1037 2079 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
61858a76
RD
2080 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
2081 h->deblocking_filter= 0;
2082
afebe2f7 2083 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
ec970c21
2084 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
2085 /* Cheat slightly for speed:
5d81d641 2086 Do not bother to deblock across slices. */
ec970c21
2087 h->deblocking_filter = 2;
2088 } else {
7ae94d52
2089 h0->max_contexts = 1;
2090 if(!h0->single_decode_warning) {
2091 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
2092 h0->single_decode_warning = 1;
2093 }
2094 if(h != h0)
2095 return 1; // deblocking switched inside frame
ec970c21 2096 }
afebe2f7 2097 }
0c32e19d 2098 h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
afebe2f7 2099
0da71265
MN
2100#if 0 //FMO
2101 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
2102 slice_group_change_cycle= get_bits(&s->gb, ?);
2103#endif
2104
afebe2f7
2105 h0->last_slice_type = slice_type;
2106 h->slice_num = ++h0->current_slice;
b735aeea
MN
2107 if(h->slice_num >= MAX_SLICES){
2108 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
2109 }
5175b937 2110
c32867b5 2111 for(j=0; j<2; j++){
6d7e6b26 2112 int id_list[16];
b735aeea 2113 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
6d7e6b26
MN
2114 for(i=0; i<16; i++){
2115 id_list[i]= 60;
2116 if(h->ref_list[j][i].data[0]){
2117 int k;
2118 uint8_t *base= h->ref_list[j][i].base[0];
2119 for(k=0; k<h->short_ref_count; k++)
2120 if(h->short_ref[k]->base[0] == base){
2121 id_list[i]= k;
2122 break;
2123 }
2124 for(k=0; k<h->long_ref_count; k++)
2125 if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
2126 id_list[i]= h->short_ref_count + k;
2127 break;
2128 }
2129 }
2130 }
2131
c32867b5
MN
2132 ref2frm[0]=
2133 ref2frm[1]= -1;
d50cdd82 2134 for(i=0; i<16; i++)
6d7e6b26 2135 ref2frm[i+2]= 4*id_list[i]
c32867b5 2136 +(h->ref_list[j][i].reference&3);
d50cdd82
MN
2137 ref2frm[18+0]=
2138 ref2frm[18+1]= -1;
2139 for(i=16; i<48; i++)
6d7e6b26 2140 ref2frm[i+4]= 4*id_list[(i-16)>>1]
d50cdd82 2141 +(h->ref_list[j][i].reference&3);
c32867b5
MN
2142 }
2143
5d18eaad 2144 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
8a11a969 2145 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
5d18eaad 2146
802e9146
MN
2147 s->avctx->refs= h->sps.ref_frame_count;
2148
0da71265 2149 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
49573a87 2150 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
6867a90b
LLL
2151 h->slice_num,
2152 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
115329f1 2153 first_mb_in_slice,
49573a87 2154 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
0da71265
MN
2155 pps_id, h->frame_num,
2156 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
2157 h->ref_count[0], h->ref_count[1],
2158 s->qscale,
0c32e19d 2159 h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
9f2d1b4f 2160 h->use_weight,
4806b922
MN
2161 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
2162 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
0da71265
MN
2163 );
2164 }
2165
2166 return 0;
2167}
2168
0dc343d4 2169int ff_h264_get_slice_type(const H264Context *h)
75dd6938
LA
2170{
2171 switch (h->slice_type) {
2172 case FF_P_TYPE: return 0;
2173 case FF_B_TYPE: return 1;
2174 case FF_I_TYPE: return 2;
2175 case FF_SP_TYPE: return 3;
2176 case FF_SI_TYPE: return 4;
2177 default: return -1;
2178 }
2179}
2180
c988f975
MN
2181static void loop_filter(H264Context *h){
2182 MpegEncContext * const s = &h->s;
2183 uint8_t *dest_y, *dest_cb, *dest_cr;
2184 int linesize, uvlinesize, mb_x, mb_y;
2185 const int end_mb_y= s->mb_y + FRAME_MBAFF;
2186 const int old_slice_type= h->slice_type;
2187
2188 if(h->deblocking_filter) {
2189 for(mb_x= 0; mb_x<s->mb_width; mb_x++){
2190 for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
78998bf2 2191 int mb_xy, mb_type;
c988f975
MN
2192 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
2193 h->slice_num= h->slice_table[mb_xy];
2194 mb_type= s->current_picture.mb_type[mb_xy];
2195 h->list_count= h->list_counts[mb_xy];
c988f975
MN
2196
2197 if(FRAME_MBAFF)
2198 h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
2199
c988f975
MN
2200 s->mb_x= mb_x;
2201 s->mb_y= mb_y;
2202 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2203 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2204 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2205 //FIXME simplify above
2206
2207 if (MB_FIELD) {
2208 linesize = h->mb_linesize = s->linesize * 2;
2209 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2210 if(mb_y&1){ //FIXME move out of this function?
2211 dest_y -= s->linesize*15;
2212 dest_cb-= s->uvlinesize*7;
2213 dest_cr-= s->uvlinesize*7;
2214 }
2215 } else {
2216 linesize = h->mb_linesize = s->linesize;
2217 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2218 }
77d40dce 2219 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
aaa995d7 2220 if(fill_filter_caches(h, mb_type))
44a5e7b6 2221 continue;
c988f975
MN
2222 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2223 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2224
77d40dce 2225 if (FRAME_MBAFF) {
c988f975
MN
2226 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2227 } else {
2228 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2229 }
2230 }
2231 }
2232 }
2233 h->slice_type= old_slice_type;
2234 s->mb_x= 0;
2235 s->mb_y= end_mb_y - FRAME_MBAFF;
f4b8b825
MN
2236 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2237 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
c988f975
MN
2238}
2239
69a28f3e
MN
2240static void predict_field_decoding_flag(H264Context *h){
2241 MpegEncContext * const s = &h->s;
2242 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2243 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
2244 ? s->current_picture.mb_type[mb_xy-1]
2245 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
2246 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
2247 : 0;
2248 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
2249}
2250
3a84713a
RS
2251static int decode_slice(struct AVCodecContext *avctx, void *arg){
2252 H264Context *h = *(void**)arg;
0da71265
MN
2253 MpegEncContext * const s = &h->s;
2254 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
2255
2256 s->mb_skip_run= -1;
0da71265 2257
89db0bae 2258 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
5317c95b 2259 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
89db0bae 2260
e5017ab8 2261 if( h->pps.cabac ) {
e5017ab8
LA
2262 /* realign */
2263 align_get_bits( &s->gb );
2264
2265 /* init cabac */
d61c4e73 2266 ff_init_cabac_states( &h->cabac);
e5017ab8
LA
2267 ff_init_cabac_decoder( &h->cabac,
2268 s->gb.buffer + get_bits_count(&s->gb)/8,
6e44ba15 2269 (get_bits_left(&s->gb) + 7)/8);
cc51b282
MN
2270
2271 ff_h264_init_cabac_states(h);
95c26348 2272
e5017ab8 2273 for(;;){
851ded89 2274//START_TIMER
cc51b282 2275 int ret = ff_h264_decode_mb_cabac(h);
6867a90b 2276 int eos;
851ded89 2277//STOP_TIMER("decode_mb_cabac")
0da71265 2278
903d58f6 2279 if(ret>=0) ff_h264_hl_decode_mb(h);
0da71265 2280
5d18eaad 2281 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8
LA
2282 s->mb_y++;
2283
cc51b282 2284 ret = ff_h264_decode_mb_cabac(h);
e5017ab8 2285
903d58f6 2286 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2287 s->mb_y--;
2288 }
6867a90b 2289 eos = get_cabac_terminate( &h->cabac );
e5017ab8 2290
3566042a
MN
2291 if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
2292 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2293 return 0;
2294 }
5659b509 2295 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
706da4af 2296 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
e5017ab8
LA
2297 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2298 return -1;
2299 }
2300
2301 if( ++s->mb_x >= s->mb_width ) {
2302 s->mb_x = 0;
c988f975 2303 loop_filter(h);
e5017ab8 2304 ff_draw_horiz_band(s, 16*s->mb_y, 16);
5175b937 2305 ++s->mb_y;
f3e53d9f 2306 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b 2307 ++s->mb_y;
69cc3183
MN
2308 if(FRAME_MBAFF && s->mb_y < s->mb_height)
2309 predict_field_decoding_flag(h);
6867a90b 2310 }
0da71265 2311 }
0da71265 2312
e5017ab8 2313 if( eos || s->mb_y >= s->mb_height ) {
a9c9a240 2314 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8 2315 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
0da71265 2316 return 0;
e5017ab8 2317 }
e5017ab8
LA
2318 }
2319
2320 } else {
2321 for(;;){
e1e94902 2322 int ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2323
903d58f6 2324 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8 2325
5d18eaad 2326 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8 2327 s->mb_y++;
e1e94902 2328 ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2329
903d58f6 2330 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2331 s->mb_y--;
2332 }
2333
2334 if(ret<0){
2335 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2336 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2337
2338 return -1;
2339 }
e5017ab8
LA
2340
2341 if(++s->mb_x >= s->mb_width){
2342 s->mb_x=0;
c988f975 2343 loop_filter(h);
e5017ab8 2344 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6867a90b 2345 ++s->mb_y;
f3e53d9f 2346 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b 2347 ++s->mb_y;
69cc3183
MN
2348 if(FRAME_MBAFF && s->mb_y < s->mb_height)
2349 predict_field_decoding_flag(h);
6867a90b
LLL
2350 }
2351 if(s->mb_y >= s->mb_height){
a9c9a240 2352 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2353
2354 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
2355 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2356
2357 return 0;
2358 }else{
2359 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2360
2361 return -1;
2362 }
2363 }
2364 }
2365
2366 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
a9c9a240 2367 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2368 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
2369 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2370
2371 return 0;
2372 }else{
2373 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2374
2375 return -1;
2376 }
2377 }
0da71265
MN
2378 }
2379 }
e5017ab8 2380
0da71265
MN
2381#if 0
2382 for(;s->mb_y < s->mb_height; s->mb_y++){
2383 for(;s->mb_x < s->mb_width; s->mb_x++){
2384 int ret= decode_mb(h);
115329f1 2385
903d58f6 2386 ff_h264_hl_decode_mb(h);
0da71265
MN
2387
2388 if(ret<0){
267f7edc 2389 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2390 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2391
2392 return -1;
2393 }
115329f1 2394
0da71265
MN
2395 if(++s->mb_x >= s->mb_width){
2396 s->mb_x=0;
2397 if(++s->mb_y >= s->mb_height){
2398 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2399 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2400
2401 return 0;
2402 }else{
2403 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2404
2405 return -1;
2406 }
2407 }
2408 }
115329f1 2409
0da71265
MN
2410 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
2411 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2412 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2413
2414 return 0;
2415 }else{
2416 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2417
2418 return -1;
2419 }
2420 }
2421 }
2422 s->mb_x=0;
2423 ff_draw_horiz_band(s, 16*s->mb_y, 16);
2424 }
2425#endif
2426 return -1; //not reached
2427}
2428
afebe2f7
2429/**
2430 * Call decode_slice() for each context.
2431 *
2432 * @param h h264 master context
2433 * @param context_count number of contexts to execute
2434 */
2435static void execute_decode_slices(H264Context *h, int context_count){
2436 MpegEncContext * const s = &h->s;
2437 AVCodecContext * const avctx= s->avctx;
2438 H264Context *hx;
2439 int i;
2440
40e5d31b
GB
2441 if (s->avctx->hwaccel)
2442 return;
0d3d172f 2443 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
369122dd 2444 return;
afebe2f7 2445 if(context_count == 1) {
74e8b78b 2446 decode_slice(avctx, &h);
afebe2f7
2447 } else {
2448 for(i = 1; i < context_count; i++) {
2449 hx = h->thread_context[i];
047599a4 2450 hx->s.error_recognition = avctx->error_recognition;
afebe2f7
2451 hx->s.error_count = 0;
2452 }
2453
2454 avctx->execute(avctx, (void *)decode_slice,
01418506 2455 h->thread_context, NULL, context_count, sizeof(void*));
afebe2f7
2456
2457 /* pull back stuff from slices to master context */
2458 hx = h->thread_context[context_count - 1];
2459 s->mb_x = hx->s.mb_x;
2460 s->mb_y = hx->s.mb_y;
12d96de3
JD
2461 s->dropable = hx->s.dropable;
2462 s->picture_structure = hx->s.picture_structure;
afebe2f7
2463 for(i = 1; i < context_count; i++)
2464 h->s.error_count += h->thread_context[i]->s.error_count;
2465 }
2466}
2467
2468
30317501 2469static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
0da71265
MN
2470 MpegEncContext * const s = &h->s;
2471 AVCodecContext * const avctx= s->avctx;
2472 int buf_index=0;
afebe2f7
2473 H264Context *hx; ///< thread context
2474 int context_count = 0;
74b14aac 2475 int next_avc= h->is_avc ? 0 : buf_size;
afebe2f7
2476
2477 h->max_contexts = avctx->thread_count;
377ec888 2478#if 0
eb60dddc 2479 int i;
96b6ace2
MN
2480 for(i=0; i<50; i++){
2481 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
0da71265
MN
2482 }
2483#endif
66a4b2c1 2484 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
afebe2f7 2485 h->current_slice = 0;
12d96de3 2486 if (!s->first_field)
f6e3c460 2487 s->current_picture_ptr= NULL;
9c095463 2488 ff_h264_reset_sei(h);
66a4b2c1
MN
2489 }
2490
0da71265
MN
2491 for(;;){
2492 int consumed;
2493 int dst_length;
2494 int bit_length;
30317501 2495 const uint8_t *ptr;
4770b1b4 2496 int i, nalsize = 0;
afebe2f7 2497 int err;
115329f1 2498
74b14aac 2499 if(buf_index >= next_avc) {
1c48415b
2500 if(buf_index >= buf_size) break;
2501 nalsize = 0;
2502 for(i = 0; i < h->nal_length_size; i++)
2503 nalsize = (nalsize << 8) | buf[buf_index++];
8d8409ca 2504 if(nalsize <= 1 || nalsize > buf_size - buf_index){
1c48415b
2505 if(nalsize == 1){
2506 buf_index++;
2507 continue;
2508 }else{
2509 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
2510 break;
2511 }
2512 }
74b14aac 2513 next_avc= buf_index + nalsize;
1c48415b
2514 } else {
2515 // start code prefix search
52255d17 2516 for(; buf_index + 3 < next_avc; buf_index++){
1c48415b
2517 // This should always succeed in the first iteration.
2518 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
2519 break;
8b031359 2520 }
115329f1 2521
1c48415b 2522 if(buf_index+3 >= buf_size) break;
115329f1 2523
1c48415b 2524 buf_index+=3;
52255d17 2525 if(buf_index >= next_avc) continue;
1c48415b 2526 }
115329f1 2527
afebe2f7
2528 hx = h->thread_context[context_count];
2529
74b14aac 2530 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
ff82e429 2531 if (ptr==NULL || dst_length < 0){
ac658be5
FOL
2532 return -1;
2533 }
3566042a
MN
2534 i= buf_index + consumed;
2535 if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
2536 buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
2537 s->workaround_bugs |= FF_BUG_TRUNCATED;
2538
2539 if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
6ac9696e 2540 while(ptr[dst_length - 1] == 0 && dst_length > 0)
c4da83fb 2541 dst_length--;
3566042a 2542 }
1790a5e9 2543 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
0da71265
MN
2544
2545 if(s->avctx->debug&FF_DEBUG_STARTCODE){
afebe2f7 2546 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
0da71265 2547 }
115329f1 2548
74b14aac 2549 if (h->is_avc && (nalsize != consumed) && nalsize){
e262365d 2550 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
9d2cc8c1 2551 }
4770b1b4 2552
0da71265
MN
2553 buf_index += consumed;
2554
755bfeab 2555 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
8c3eba7c 2556 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
0da71265 2557 continue;
115329f1 2558
afebe2f7
2559 again:
2560 err = 0;
2561 switch(hx->nal_unit_type){
0da71265 2562 case NAL_IDR_SLICE:
afebe2f7
2563 if (h->nal_unit_type != NAL_IDR_SLICE) {
2564 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
2565 return -1;
2566 }
3b66c4c5 2567 idr(h); //FIXME ensure we don't loose some frames if there is reordering
0da71265 2568 case NAL_SLICE:
afebe2f7
2569 init_get_bits(&hx->s.gb, ptr, bit_length);
2570 hx->intra_gb_ptr=
2571 hx->inter_gb_ptr= &hx->s.gb;
2572 hx->s.data_partitioning = 0;
2573
2574 if((err = decode_slice_header(hx, h)))
2575 break;
2576
dd0cd3d2
RC
2577 avctx->profile = hx->sps.profile_idc;
2578 avctx->level = hx->sps.level_idc;
2579
3bccd93a
SW
2580 if (h->current_slice == 1) {
2581 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
6026a096 2582 return -1;
3bccd93a
SW
2583 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2584 ff_vdpau_h264_picture_start(s);
6026a096
GB
2585 }
2586
37a558fe
IS
2587 s->current_picture_ptr->key_frame |=
2588 (hx->nal_unit_type == NAL_IDR_SLICE) ||
2589 (h->sei_recovery_frame_cnt >= 0);
afebe2f7
2590 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
2591 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2592 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2593 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
369122dd 2594 && avctx->skip_frame < AVDISCARD_ALL){
d404b3ed
MN
2595 if(avctx->hwaccel) {
2596 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
2597 return -1;
2598 }else
0d3d172f 2599 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
369122dd 2600 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
c639fc72
CEH
2601 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
2602 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
369122dd 2603 }else
f2c214a1 2604 context_count++;
369122dd 2605 }
0da71265
MN
2606 break;
2607 case NAL_DPA:
afebe2f7
2608 init_get_bits(&hx->s.gb, ptr, bit_length);
2609 hx->intra_gb_ptr=
2610 hx->inter_gb_ptr= NULL;
0410ee8f
AS
2611
2612 if ((err = decode_slice_header(hx, h)) < 0)
2613 break;
2614
dd0cd3d2
RC
2615 avctx->profile = hx->sps.profile_idc;
2616 avctx->level = hx->sps.level_idc;
2617
afebe2f7 2618 hx->s.data_partitioning = 1;
115329f1 2619
0da71265
MN
2620 break;
2621 case NAL_DPB:
afebe2f7
2622 init_get_bits(&hx->intra_gb, ptr, bit_length);
2623 hx->intra_gb_ptr= &hx->intra_gb;
0da71265
MN
2624 break;
2625 case NAL_DPC:
afebe2f7
2626 init_get_bits(&hx->inter_gb, ptr, bit_length);
2627 hx->inter_gb_ptr= &hx->inter_gb;
8b92b792 2628
afebe2f7 2629 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
b18e5c03 2630 && s->context_initialized
e0111b32 2631 && s->hurry_up < 5
afebe2f7 2632 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2633 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2634 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
e0111b32 2635 && avctx->skip_frame < AVDISCARD_ALL)
afebe2f7 2636 context_count++;
0da71265
MN
2637 break;
2638 case NAL_SEI:
cdd10689 2639 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2640 ff_h264_decode_sei(h);
0da71265
MN
2641 break;
2642 case NAL_SPS:
2643 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2644 ff_h264_decode_seq_parameter_set(h);
115329f1 2645
0da71265
MN
2646 if(s->flags& CODEC_FLAG_LOW_DELAY)
2647 s->low_delay=1;
115329f1 2648
a18030bb
LM
2649 if(avctx->has_b_frames < 2)
2650 avctx->has_b_frames= !s->low_delay;
0da71265
MN
2651 break;
2652 case NAL_PPS:
2653 init_get_bits(&s->gb, ptr, bit_length);
115329f1 2654
1790a5e9 2655 ff_h264_decode_picture_parameter_set(h, bit_length);
0da71265
MN
2656
2657 break;
ab470fa7
LM
2658 case NAL_AUD:
2659 case NAL_END_SEQUENCE:
2660 case NAL_END_STREAM:
2661 case NAL_FILLER_DATA:
2662 case NAL_SPS_EXT:
2663 case NAL_AUXILIARY_SLICE:
0da71265 2664 break;
bb270c08 2665 default:
4ad04da2 2666 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
115329f1 2667 }
115329f1 2668
afebe2f7
2669 if(context_count == h->max_contexts) {
2670 execute_decode_slices(h, context_count);
2671 context_count = 0;
2672 }
2673
2674 if (err < 0)
2675 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
2676 else if(err == 1) {
2677 /* Slice could not be decoded in parallel mode, copy down
2678 * NAL unit stuff to context 0 and restart. Note that
1412060e 2679 * rbsp_buffer is not transferred, but since we no longer
afebe2f7
2680 * run in parallel mode this should not be an issue. */
2681 h->nal_unit_type = hx->nal_unit_type;
2682 h->nal_ref_idc = hx->nal_ref_idc;
2683 hx = h;
2684 goto again;
2685 }
2686 }
2687 if(context_count)
2688 execute_decode_slices(h, context_count);
0da71265
MN
2689 return buf_index;
2690}
2691
2692/**
3b66c4c5 2693 * returns the number of bytes consumed for building the current frame
0da71265
MN
2694 */
2695static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
755bfeab 2696 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
0da71265
MN
2697 if(pos+10>buf_size) pos=buf_size; // oops ;)
2698
2699 return pos;
0da71265
MN
2700}
2701
115329f1 2702static int decode_frame(AVCodecContext *avctx,
0da71265 2703 void *data, int *data_size,
7a00bbad 2704 AVPacket *avpkt)
0da71265 2705{
7a00bbad
TB
2706 const uint8_t *buf = avpkt->data;
2707 int buf_size = avpkt->size;
0da71265
MN
2708 H264Context *h = avctx->priv_data;
2709 MpegEncContext *s = &h->s;
115329f1 2710 AVFrame *pict = data;
0da71265 2711 int buf_index;
115329f1 2712
0da71265 2713 s->flags= avctx->flags;
303e50e6 2714 s->flags2= avctx->flags2;
0da71265 2715
1412060e 2716 /* end of stream, output what is still in the buffers */
0da71265 2717 if (buf_size == 0) {
97bbb885
MN
2718 Picture *out;
2719 int i, out_idx;
2720
2721//FIXME factorize this with the output code below
2722 out = h->delayed_pic[0];
2723 out_idx = 0;
c173a088 2724 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
97bbb885
MN
2725 if(h->delayed_pic[i]->poc < out->poc){
2726 out = h->delayed_pic[i];
2727 out_idx = i;
2728 }
2729
2730 for(i=out_idx; h->delayed_pic[i]; i++)
2731 h->delayed_pic[i] = h->delayed_pic[i+1];
2732
2733 if(out){
2734 *data_size = sizeof(AVFrame);
2735 *pict= *(AVFrame*)out;
2736 }
2737
0da71265
MN
2738 return 0;
2739 }
115329f1 2740
0da71265 2741 buf_index=decode_nal_units(h, buf, buf_size);
115329f1 2742 if(buf_index < 0)
0da71265
MN
2743 return -1;
2744
56c70e1d 2745 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
1c746a49 2746 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
56c70e1d
MN
2747 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
2748 return -1;
2749 }
2750
66a4b2c1
MN
2751 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
2752 Picture *out = s->current_picture_ptr;
2753 Picture *cur = s->current_picture_ptr;
44be1d64 2754 int i, pics, out_of_order, out_idx;
115329f1 2755
256299d3 2756 field_end(h);
66a4b2c1 2757
357282c6 2758 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
12d96de3
JD
2759 /* Wait for second field. */
2760 *data_size = 0;
2761
2762 } else {
b19d493f 2763 cur->interlaced_frame = 0;
b09a7c05
2764 cur->repeat_pict = 0;
2765
2766 /* Signal interlacing information externally. */
2767 /* Prioritize picture timing SEI information over used decoding process if it exists. */
70e01da3 2768
b09a7c05
2769 if(h->sps.pic_struct_present_flag){
2770 switch (h->sei_pic_struct)
2771 {
b19d493f
HY
2772 case SEI_PIC_STRUCT_FRAME:
2773 break;
2774 case SEI_PIC_STRUCT_TOP_FIELD:
2775 case SEI_PIC_STRUCT_BOTTOM_FIELD:
2776 cur->interlaced_frame = 1;
2777 break;
2778 case SEI_PIC_STRUCT_TOP_BOTTOM:
2779 case SEI_PIC_STRUCT_BOTTOM_TOP:
2780 if (FIELD_OR_MBAFF_PICTURE)
2781 cur->interlaced_frame = 1;
2782 else
2783 // try to flag soft telecine progressive
2784 cur->interlaced_frame = h->prev_interlaced_frame;
2785 break;
b09a7c05
2786 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
2787 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
2788 // Signal the possibility of telecined film externally (pic_struct 5,6)
2789 // From these hints, let the applications decide if they apply deinterlacing.
2790 cur->repeat_pict = 1;
b09a7c05
2791 break;
2792 case SEI_PIC_STRUCT_FRAME_DOUBLING:
2793 // Force progressive here, as doubling interlaced frame is a bad idea.
b09a7c05
2794 cur->repeat_pict = 2;
2795 break;
2796 case SEI_PIC_STRUCT_FRAME_TRIPLING:
b09a7c05
2797 cur->repeat_pict = 4;
2798 break;
2799 }
b19d493f
HY
2800
2801 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
2802 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
b09a7c05
2803 }else{
2804 /* Derive interlacing flag from used decoding process. */
2805 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
2806 }
b19d493f 2807 h->prev_interlaced_frame = cur->interlaced_frame;
b09a7c05
2808
2809 if (cur->field_poc[0] != cur->field_poc[1]){
2810 /* Derive top_field_first from field pocs. */
2811 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
2812 }else{
2813 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
2814 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
2815 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
2816 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
2817 cur->top_field_first = 1;
2818 else
2819 cur->top_field_first = 0;
2820 }else{
2821 /* Most likely progressive */
2822 cur->top_field_first = 0;
2823 }
2824 }
84a8596d 2825
f6e3c460 2826 //FIXME do something with unavailable reference frames
8b92b792 2827
f6e3c460 2828 /* Sort B-frames into display order */
2f944356 2829
f6e3c460
2830 if(h->sps.bitstream_restriction_flag
2831 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
2832 s->avctx->has_b_frames = h->sps.num_reorder_frames;
2833 s->low_delay = 0;
2834 }
9170e345 2835
fb19e144
MN
2836 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
2837 && !h->sps.bitstream_restriction_flag){
2838 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
2839 s->low_delay= 0;
2840 }
2841
f6e3c460
2842 pics = 0;
2843 while(h->delayed_pic[pics]) pics++;
9170e345 2844
64b9d48f 2845 assert(pics <= MAX_DELAYED_PIC_COUNT);
4e4d983e 2846
f6e3c460
2847 h->delayed_pic[pics++] = cur;
2848 if(cur->reference == 0)
2849 cur->reference = DELAYED_PIC_REF;
2f944356 2850
f6e3c460
2851 out = h->delayed_pic[0];
2852 out_idx = 0;
c173a088 2853 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
f6e3c460
2854 if(h->delayed_pic[i]->poc < out->poc){
2855 out = h->delayed_pic[i];
2856 out_idx = i;
2857 }
44be1d64
MN
2858 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
2859 h->outputed_poc= INT_MIN;
2860 out_of_order = out->poc < h->outputed_poc;
1b547aba 2861
f6e3c460
2862 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
2863 { }
2a811db2 2864 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
f6e3c460 2865 || (s->low_delay &&
44be1d64 2866 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
9701840b 2867 || cur->pict_type == FF_B_TYPE)))
f6e3c460
2868 {
2869 s->low_delay = 0;
2870 s->avctx->has_b_frames++;
f6e3c460 2871 }
f6e3c460
2872
2873 if(out_of_order || pics > s->avctx->has_b_frames){
3eaa6d0e 2874 out->reference &= ~DELAYED_PIC_REF;
f6e3c460
2875 for(i=out_idx; h->delayed_pic[i]; i++)
2876 h->delayed_pic[i] = h->delayed_pic[i+1];
2877 }
3eaa6d0e 2878 if(!out_of_order && pics > s->avctx->has_b_frames){
f6e3c460 2879 *data_size = sizeof(AVFrame);
df8a7dff 2880
44be1d64
MN
2881 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
2882 h->outputed_poc = INT_MIN;
2883 } else
67e362ca 2884 h->outputed_poc = out->poc;
f6e3c460 2885 *pict= *(AVFrame*)out;
3eaa6d0e 2886 }else{
f6e3c460 2887 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
3eaa6d0e 2888 }
12d96de3 2889 }
a4dae92b
LM
2890 }
2891
3165e258 2892 assert(pict->data[0] || !*data_size);
4e4d983e 2893 ff_print_debug_info(s, pict);
0da71265 2894//printf("out %d\n", (int)pict->data[0]);
0da71265 2895
0da71265
MN
2896 return get_consumed_bytes(s, buf_index, buf_size);
2897}
2898#if 0
2899static inline void fill_mb_avail(H264Context *h){
2900 MpegEncContext * const s = &h->s;
7bc9090a 2901 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265
MN
2902
2903 if(s->mb_y){
7bc9090a
MN
2904 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
2905 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
2906 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
0da71265
MN
2907 }else{
2908 h->mb_avail[0]=
2909 h->mb_avail[1]=
2910 h->mb_avail[2]= 0;
2911 }
2912 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
2913 h->mb_avail[4]= 1; //FIXME move out
2914 h->mb_avail[5]= 0; //FIXME move out
2915}
2916#endif
2917
07e4e3ea 2918#ifdef TEST
6bf398a0 2919#undef printf
d04d5bcd 2920#undef random
0da71265
MN
2921#define COUNT 8000
2922#define SIZE (COUNT*40)
f8a80fd6 2923int main(void){
0da71265
MN
2924 int i;
2925 uint8_t temp[SIZE];
2926 PutBitContext pb;
2927 GetBitContext gb;
2928// int int_temp[10000];
2929 DSPContext dsp;
2930 AVCodecContext avctx;
115329f1 2931
0da71265
MN
2932 dsputil_init(&dsp, &avctx);
2933
ed7debda 2934 init_put_bits(&pb, temp, SIZE);
0da71265
MN
2935 printf("testing unsigned exp golomb\n");
2936 for(i=0; i<COUNT; i++){
2937 START_TIMER
2938 set_ue_golomb(&pb, i);
2939 STOP_TIMER("set_ue_golomb");
2940 }
2941 flush_put_bits(&pb);
115329f1 2942
0da71265
MN
2943 init_get_bits(&gb, temp, 8*SIZE);
2944 for(i=0; i<COUNT; i++){
2945 int j, s;
115329f1 2946
0da71265 2947 s= show_bits(&gb, 24);
115329f1 2948
0da71265
MN
2949 START_TIMER
2950 j= get_ue_golomb(&gb);
2951 if(j != i){
755bfeab 2952 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
MN
2953// return -1;
2954 }
2955 STOP_TIMER("get_ue_golomb");
2956 }
115329f1
DB
2957
2958
c58222c5 2959 init_put_bits(&pb, temp, SIZE);
0da71265
MN
2960 printf("testing signed exp golomb\n");
2961 for(i=0; i<COUNT; i++){
2962 START_TIMER
2963 set_se_golomb(&pb, i - COUNT/2);
2964 STOP_TIMER("set_se_golomb");
2965 }
2966 flush_put_bits(&pb);
115329f1 2967
0da71265
MN
2968 init_get_bits(&gb, temp, 8*SIZE);
2969 for(i=0; i<COUNT; i++){
2970 int j, s;
115329f1 2971
0da71265 2972 s= show_bits(&gb, 24);
115329f1 2973
0da71265
MN
2974 START_TIMER
2975 j= get_se_golomb(&gb);
2976 if(j != i - COUNT/2){
755bfeab 2977 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
MN
2978// return -1;
2979 }
2980 STOP_TIMER("get_se_golomb");
2981 }
2982
6bf398a0 2983#if 0
0da71265 2984 printf("testing 4x4 (I)DCT\n");
115329f1 2985
0da71265
MN
2986 DCTELEM block[16];
2987 uint8_t src[16], ref[16];
2988 uint64_t error= 0, max_error=0;
2989
2990 for(i=0; i<COUNT; i++){
2991 int j;
2992// printf("%d %d %d\n", r1, r2, (r2-r1)*16);
2993 for(j=0; j<16; j++){
2994 ref[j]= random()%255;
2995 src[j]= random()%255;
2996 }
2997
2998 h264_diff_dct_c(block, src, ref, 4);
115329f1 2999
0da71265
MN
3000 //normalize
3001 for(j=0; j<16; j++){
3002// printf("%d ", block[j]);
3003 block[j]= block[j]*4;
3004 if(j&1) block[j]= (block[j]*4 + 2)/5;
3005 if(j&4) block[j]= (block[j]*4 + 2)/5;
3006 }
3007// printf("\n");
115329f1 3008
4693b031 3009 h->h264dsp.h264_idct_add(ref, block, 4);
0da71265
MN
3010/* for(j=0; j<16; j++){
3011 printf("%d ", ref[j]);
3012 }
3013 printf("\n");*/
115329f1 3014
0da71265 3015 for(j=0; j<16; j++){
c26abfa5 3016 int diff= FFABS(src[j] - ref[j]);
115329f1 3017
0da71265
MN
3018 error+= diff*diff;
3019 max_error= FFMAX(max_error, diff);
3020 }
3021 }
3022 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
0da71265
MN
3023 printf("testing quantizer\n");
3024 for(qp=0; qp<52; qp++){
3025 for(i=0; i<16; i++)
3026 src1_block[i]= src2_block[i]= random()%255;
115329f1 3027
0da71265 3028 }
0da71265 3029 printf("Testing NAL layer\n");
115329f1 3030
0da71265
MN
3031 uint8_t bitstream[COUNT];
3032 uint8_t nal[COUNT*2];
3033 H264Context h;
3034 memset(&h, 0, sizeof(H264Context));
115329f1 3035
0da71265
MN
3036 for(i=0; i<COUNT; i++){
3037 int zeros= i;
3038 int nal_length;
3039 int consumed;
3040 int out_length;
3041 uint8_t *out;
3042 int j;
115329f1 3043
0da71265
MN
3044 for(j=0; j<COUNT; j++){
3045 bitstream[j]= (random() % 255) + 1;
3046 }
115329f1 3047
0da71265
MN
3048 for(j=0; j<zeros; j++){
3049 int pos= random() % COUNT;
3050 while(bitstream[pos] == 0){
3051 pos++;
3052 pos %= COUNT;
3053 }
3054 bitstream[pos]=0;
3055 }
115329f1 3056
0da71265 3057 START_TIMER
115329f1 3058
0da71265
MN
3059 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
3060 if(nal_length<0){
3061 printf("encoding failed\n");
3062 return -1;
3063 }
115329f1 3064
1790a5e9 3065 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
0da71265
MN
3066
3067 STOP_TIMER("NAL")
115329f1 3068
0da71265
MN
3069 if(out_length != COUNT){
3070 printf("incorrect length %d %d\n", out_length, COUNT);
3071 return -1;
3072 }
115329f1 3073
0da71265
MN
3074 if(consumed != nal_length){
3075 printf("incorrect consumed length %d %d\n", nal_length, consumed);
3076 return -1;
3077 }
115329f1 3078
0da71265 3079 if(memcmp(bitstream, out, COUNT)){
755bfeab 3080 printf("mismatch\n");
0da71265
MN
3081 return -1;
3082 }
3083 }
6bf398a0 3084#endif
115329f1 3085
0da71265 3086 printf("Testing RBSP\n");
115329f1
DB
3087
3088
0da71265
MN
3089 return 0;
3090}
07e4e3ea 3091#endif /* TEST */
0da71265
MN
3092
3093
cbf1eae9 3094av_cold void ff_h264_free_context(H264Context *h)
0da71265 3095{
5f129a05 3096 int i;
115329f1 3097
0da71265 3098 free_tables(h); //FIXME cleanup init stuff perhaps
5f129a05
MN
3099
3100 for(i = 0; i < MAX_SPS_COUNT; i++)
3101 av_freep(h->sps_buffers + i);
3102
3103 for(i = 0; i < MAX_PPS_COUNT; i++)
3104 av_freep(h->pps_buffers + i);
15861962
RD
3105}
3106
903d58f6 3107av_cold int ff_h264_decode_end(AVCodecContext *avctx)
15861962
RD
3108{
3109 H264Context *h = avctx->priv_data;
3110 MpegEncContext *s = &h->s;
3111
3112 ff_h264_free_context(h);
5f129a05 3113
0da71265
MN
3114 MPV_common_end(s);
3115
3116// memset(h, 0, sizeof(H264Context));
115329f1 3117
0da71265
MN
3118 return 0;
3119}
3120
3121
3122AVCodec h264_decoder = {
3123 "h264",
312