Merge decode_cabac_mb_dqp() with surronding code.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
bad5537e 23 * @file libavcodec/h264.c
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
40e5d31b 28#include "internal.h"
0da71265
MN
29#include "dsputil.h"
30#include "avcodec.h"
31#include "mpegvideo.h"
26b4fe82 32#include "h264.h"
0da71265 33#include "h264data.h"
188d3c51 34#include "h264_mvpred.h"
26b4fe82 35#include "h264_parser.h"
0da71265 36#include "golomb.h"
199436b9 37#include "mathops.h"
626464fb 38#include "rectangle.h"
369122dd 39#include "vdpau_internal.h"
0da71265 40
e5017ab8
LA
41#include "cabac.h"
42
2848ce84 43//#undef NDEBUG
0da71265
MN
44#include <assert.h>
45
d9ec210b 46static const uint8_t rem6[52]={
acd8d10f
PI
470, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
48};
49
d9ec210b 50static const uint8_t div6[52]={
acd8d10f
PI
510, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
52};
53
903d58f6 54void ff_h264_write_back_intra_pred_mode(H264Context *h){
64514ee8 55 const int mb_xy= h->mb_xy;
0da71265
MN
56
57 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
58 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
59 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
60 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
61 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
62 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
63 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
64}
65
66/**
67 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
68 */
2bedc0e8
MN
69int ff_h264_check_intra4x4_pred_mode(H264Context *h){
70 MpegEncContext * const s = &h->s;
71 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
72 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
73 int i;
74
75 if(!(h->top_samples_available&0x8000)){
76 for(i=0; i<4; i++){
77 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
78 if(status<0){
79 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
80 return -1;
81 } else if(status){
82 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
83 }
84 }
85 }
86
87 if((h->left_samples_available&0x8888)!=0x8888){
88 static const int mask[4]={0x8000,0x2000,0x80,0x20};
89 for(i=0; i<4; i++){
90 if(!(h->left_samples_available&mask[i])){
91 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
92 if(status<0){
93 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
94 return -1;
95 } else if(status){
96 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
97 }
98 }
99 }
100 }
101
102 return 0;
103} //FIXME cleanup like ff_h264_check_intra_pred_mode
104
105/**
106 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
107 */
903d58f6 108int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
0da71265
MN
109 MpegEncContext * const s = &h->s;
110 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
111 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 112
43ff0714 113 if(mode > 6U) {
5175b937 114 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 115 return -1;
5175b937 116 }
115329f1 117
0da71265
MN
118 if(!(h->top_samples_available&0x8000)){
119 mode= top[ mode ];
120 if(mode<0){
9b879566 121 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
122 return -1;
123 }
124 }
115329f1 125
d1d10e91 126 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 127 mode= left[ mode ];
d1d10e91
MN
128 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
129 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
130 }
0da71265 131 if(mode<0){
9b879566 132 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 133 return -1;
115329f1 134 }
0da71265
MN
135 }
136
137 return mode;
138}
139
1790a5e9 140const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
141 int i, si, di;
142 uint8_t *dst;
24456882 143 int bufidx;
0da71265 144
bb270c08 145// src[0]&0x80; //forbidden bit
0da71265
MN
146 h->nal_ref_idc= src[0]>>5;
147 h->nal_unit_type= src[0]&0x1F;
148
149 src++; length--;
115329f1 150#if 0
0da71265
MN
151 for(i=0; i<length; i++)
152 printf("%2X ", src[i]);
153#endif
e08715d3 154
b250f9c6
AJ
155#if HAVE_FAST_UNALIGNED
156# if HAVE_FAST_64BIT
e08715d3
MN
157# define RS 7
158 for(i=0; i+1<length; i+=9){
3878be31 159 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
e08715d3
MN
160# else
161# define RS 3
162 for(i=0; i+1<length; i+=5){
3878be31 163 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
e08715d3
MN
164# endif
165 continue;
166 if(i>0 && !src[i]) i--;
167 while(src[i]) i++;
168#else
169# define RS 0
0da71265
MN
170 for(i=0; i+1<length; i+=2){
171 if(src[i]) continue;
172 if(i>0 && src[i-1]==0) i--;
e08715d3 173#endif
0da71265
MN
174 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
175 if(src[i+2]!=3){
176 /* startcode, so we must be past the end */
177 length=i;
178 }
179 break;
180 }
abb27cfb 181 i-= RS;
0da71265
MN
182 }
183
184 if(i>=length-1){ //no escaped 0
185 *dst_length= length;
186 *consumed= length+1; //+1 for the header
115329f1 187 return src;
0da71265
MN
188 }
189
24456882 190 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
238ef6da 191 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 192 dst= h->rbsp_buffer[bufidx];
0da71265 193
ac658be5
FOL
194 if (dst == NULL){
195 return NULL;
196 }
197
3b66c4c5 198//printf("decoding esc\n");
593af7cd
MN
199 memcpy(dst, src, i);
200 si=di=i;
201 while(si+2<length){
0da71265 202 //remove escapes (very rare 1:2^22)
593af7cd
MN
203 if(src[si+2]>3){
204 dst[di++]= src[si++];
205 dst[di++]= src[si++];
206 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
207 if(src[si+2]==3){ //escape
208 dst[di++]= 0;
209 dst[di++]= 0;
210 si+=3;
c8470cc1 211 continue;
0da71265 212 }else //next start code
593af7cd 213 goto nsc;
0da71265
MN
214 }
215
216 dst[di++]= src[si++];
217 }
593af7cd
MN
218 while(si<length)
219 dst[di++]= src[si++];
220nsc:
0da71265 221
d4369630
AS
222 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
223
0da71265
MN
224 *dst_length= di;
225 *consumed= si + 1;//+1 for the header
90b5b51e 226//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
227 return dst;
228}
229
1790a5e9 230int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
231 int v= *src;
232 int r;
233
a9c9a240 234 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
235
236 for(r=1; r<9; r++){
237 if(v&1) return r;
238 v>>=1;
239 }
240 return 0;
241}
242
243/**
1412060e 244 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
245 * @param qp quantization parameter
246 */
239ea04c 247static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
248#define stride 16
249 int i;
250 int temp[16]; //FIXME check if this is a good idea
251 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
252 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
253
254//memset(block, 64, 2*256);
255//return;
256 for(i=0; i<4; i++){
257 const int offset= y_offset[i];
258 const int z0= block[offset+stride*0] + block[offset+stride*4];
259 const int z1= block[offset+stride*0] - block[offset+stride*4];
260 const int z2= block[offset+stride*1] - block[offset+stride*5];
261 const int z3= block[offset+stride*1] + block[offset+stride*5];
262
263 temp[4*i+0]= z0+z3;
264 temp[4*i+1]= z1+z2;
265 temp[4*i+2]= z1-z2;
266 temp[4*i+3]= z0-z3;
267 }
268
269 for(i=0; i<4; i++){
270 const int offset= x_offset[i];
271 const int z0= temp[4*0+i] + temp[4*2+i];
272 const int z1= temp[4*0+i] - temp[4*2+i];
273 const int z2= temp[4*1+i] - temp[4*3+i];
274 const int z3= temp[4*1+i] + temp[4*3+i];
275
1412060e 276 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
277 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
278 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
279 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
280 }
281}
282
e5017ab8 283#if 0
0da71265 284/**
1412060e 285 * DCT transforms the 16 dc values.
0da71265
MN
286 * @param qp quantization parameter ??? FIXME
287 */
288static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
289// const int qmul= dequant_coeff[qp][0];
290 int i;
291 int temp[16]; //FIXME check if this is a good idea
292 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
293 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
294
295 for(i=0; i<4; i++){
296 const int offset= y_offset[i];
297 const int z0= block[offset+stride*0] + block[offset+stride*4];
298 const int z1= block[offset+stride*0] - block[offset+stride*4];
299 const int z2= block[offset+stride*1] - block[offset+stride*5];
300 const int z3= block[offset+stride*1] + block[offset+stride*5];
301
302 temp[4*i+0]= z0+z3;
303 temp[4*i+1]= z1+z2;
304 temp[4*i+2]= z1-z2;
305 temp[4*i+3]= z0-z3;
306 }
307
308 for(i=0; i<4; i++){
309 const int offset= x_offset[i];
310 const int z0= temp[4*0+i] + temp[4*2+i];
311 const int z1= temp[4*0+i] - temp[4*2+i];
312 const int z2= temp[4*1+i] - temp[4*3+i];
313 const int z3= temp[4*1+i] + temp[4*3+i];
314
315 block[stride*0 +offset]= (z0 + z3)>>1;
316 block[stride*2 +offset]= (z1 + z2)>>1;
317 block[stride*8 +offset]= (z1 - z2)>>1;
318 block[stride*10+offset]= (z0 - z3)>>1;
319 }
320}
e5017ab8
LA
321#endif
322
0da71265
MN
323#undef xStride
324#undef stride
325
239ea04c 326static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
327 const int stride= 16*2;
328 const int xStride= 16;
329 int a,b,c,d,e;
330
331 a= block[stride*0 + xStride*0];
332 b= block[stride*0 + xStride*1];
333 c= block[stride*1 + xStride*0];
334 d= block[stride*1 + xStride*1];
335
336 e= a-b;
337 a= a+b;
338 b= c-d;
339 c= c+d;
340
239ea04c
LM
341 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
342 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
343 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
344 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
345}
346
e5017ab8 347#if 0
0da71265
MN
348static void chroma_dc_dct_c(DCTELEM *block){
349 const int stride= 16*2;
350 const int xStride= 16;
351 int a,b,c,d,e;
352
353 a= block[stride*0 + xStride*0];
354 b= block[stride*0 + xStride*1];
355 c= block[stride*1 + xStride*0];
356 d= block[stride*1 + xStride*1];
357
358 e= a-b;
359 a= a+b;
360 b= c-d;
361 c= c+d;
362
363 block[stride*0 + xStride*0]= (a+c);
364 block[stride*0 + xStride*1]= (e+b);
365 block[stride*1 + xStride*0]= (a-c);
366 block[stride*1 + xStride*1]= (e-b);
367}
e5017ab8 368#endif
0da71265 369
0da71265
MN
370static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
371 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
372 int src_x_offset, int src_y_offset,
373 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
374 MpegEncContext * const s = &h->s;
375 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 376 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 377 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
378 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
379 uint8_t * src_cb, * src_cr;
380 int extra_width= h->emu_edge_width;
381 int extra_height= h->emu_edge_height;
0da71265
MN
382 int emu=0;
383 const int full_mx= mx>>2;
384 const int full_my= my>>2;
fbd312fd 385 const int pic_width = 16*s->mb_width;
0d43dd8c 386 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 387
0da71265
MN
388 if(mx&7) extra_width -= 3;
389 if(my&7) extra_height -= 3;
115329f1
DB
390
391 if( full_mx < 0-extra_width
392 || full_my < 0-extra_height
393 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 394 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
395 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
396 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
397 emu=1;
398 }
115329f1 399
5d18eaad 400 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 401 if(!square){
5d18eaad 402 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 403 }
115329f1 404
49fb20cb 405 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 406
0d43dd8c 407 if(MB_FIELD){
5d18eaad 408 // chroma offset when predicting from a field of opposite parity
2143b118 409 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
410 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
411 }
412 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
413 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
414
0da71265 415 if(emu){
5d18eaad 416 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
417 src_cb= s->edge_emu_buffer;
418 }
5d18eaad 419 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
420
421 if(emu){
5d18eaad 422 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
423 src_cr= s->edge_emu_buffer;
424 }
5d18eaad 425 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
426}
427
9f2d1b4f 428static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
429 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
430 int x_offset, int y_offset,
431 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
432 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
433 int list0, int list1){
434 MpegEncContext * const s = &h->s;
435 qpel_mc_func *qpix_op= qpix_put;
436 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 437
5d18eaad
LM
438 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
439 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
440 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 441 x_offset += 8*s->mb_x;
0d43dd8c 442 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 443
0da71265 444 if(list0){
1924f3ce 445 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
446 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
447 dest_y, dest_cb, dest_cr, x_offset, y_offset,
448 qpix_op, chroma_op);
449
450 qpix_op= qpix_avg;
451 chroma_op= chroma_avg;
452 }
453
454 if(list1){
1924f3ce 455 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
456 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
457 dest_y, dest_cb, dest_cr, x_offset, y_offset,
458 qpix_op, chroma_op);
459 }
460}
461
9f2d1b4f
LM
462static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
463 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
464 int x_offset, int y_offset,
465 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
466 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
467 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
468 int list0, int list1){
469 MpegEncContext * const s = &h->s;
470
5d18eaad
LM
471 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
472 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
473 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 474 x_offset += 8*s->mb_x;
0d43dd8c 475 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 476
9f2d1b4f
LM
477 if(list0 && list1){
478 /* don't optimize for luma-only case, since B-frames usually
479 * use implicit weights => chroma too. */
480 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
481 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
482 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
483 int refn0 = h->ref_cache[0][ scan8[n] ];
484 int refn1 = h->ref_cache[1][ scan8[n] ];
485
486 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
487 dest_y, dest_cb, dest_cr,
488 x_offset, y_offset, qpix_put, chroma_put);
489 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
490 tmp_y, tmp_cb, tmp_cr,
491 x_offset, y_offset, qpix_put, chroma_put);
492
493 if(h->use_weight == 2){
494 int weight0 = h->implicit_weight[refn0][refn1];
495 int weight1 = 64 - weight0;
5d18eaad
LM
496 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
497 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
498 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 499 }else{
5d18eaad 500 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 501 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 502 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 503 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 504 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 505 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 506 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 507 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 508 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
509 }
510 }else{
511 int list = list1 ? 1 : 0;
512 int refn = h->ref_cache[list][ scan8[n] ];
513 Picture *ref= &h->ref_list[list][refn];
514 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
515 dest_y, dest_cb, dest_cr, x_offset, y_offset,
516 qpix_put, chroma_put);
517
5d18eaad 518 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
519 h->luma_weight[list][refn], h->luma_offset[list][refn]);
520 if(h->use_weight_chroma){
5d18eaad 521 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 522 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 523 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
524 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
525 }
526 }
527}
528
529static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
530 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
531 int x_offset, int y_offset,
532 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
533 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 534 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
535 int list0, int list1){
536 if((h->use_weight==2 && list0 && list1
537 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
538 || h->use_weight==1)
539 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
540 x_offset, y_offset, qpix_put, chroma_put,
541 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
542 else
543 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
544 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
545}
546
513fbd8e
LM
547static inline void prefetch_motion(H264Context *h, int list){
548 /* fetch pixels for estimated mv 4 macroblocks ahead
549 * optimized for 64byte cache lines */
550 MpegEncContext * const s = &h->s;
551 const int refn = h->ref_cache[list][scan8[0]];
552 if(refn >= 0){
553 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
554 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
555 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 556 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
557 s->dsp.prefetch(src[0]+off, s->linesize, 4);
558 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
559 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
560 }
561}
562
0da71265
MN
563static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
564 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
565 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
566 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 567 MpegEncContext * const s = &h->s;
64514ee8 568 const int mb_xy= h->mb_xy;
0da71265 569 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 570
0da71265 571 assert(IS_INTER(mb_type));
115329f1 572
513fbd8e
LM
573 prefetch_motion(h, 0);
574
0da71265
MN
575 if(IS_16X16(mb_type)){
576 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
577 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
7231ccf4 578 weight_op, weight_avg,
0da71265
MN
579 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
580 }else if(IS_16X8(mb_type)){
581 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
582 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 583 &weight_op[1], &weight_avg[1],
0da71265
MN
584 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
585 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
586 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 587 &weight_op[1], &weight_avg[1],
0da71265
MN
588 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
589 }else if(IS_8X16(mb_type)){
5d18eaad 590 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 591 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 592 &weight_op[2], &weight_avg[2],
0da71265 593 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 594 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 595 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 596 &weight_op[2], &weight_avg[2],
0da71265
MN
597 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
598 }else{
599 int i;
115329f1 600
0da71265
MN
601 assert(IS_8X8(mb_type));
602
603 for(i=0; i<4; i++){
604 const int sub_mb_type= h->sub_mb_type[i];
605 const int n= 4*i;
606 int x_offset= (i&1)<<2;
607 int y_offset= (i&2)<<1;
608
609 if(IS_SUB_8X8(sub_mb_type)){
610 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
611 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 612 &weight_op[3], &weight_avg[3],
0da71265
MN
613 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
614 }else if(IS_SUB_8X4(sub_mb_type)){
615 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
616 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 617 &weight_op[4], &weight_avg[4],
0da71265
MN
618 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
619 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
620 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 621 &weight_op[4], &weight_avg[4],
0da71265
MN
622 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
623 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 624 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 625 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 626 &weight_op[5], &weight_avg[5],
0da71265 627 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 628 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 629 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 630 &weight_op[5], &weight_avg[5],
0da71265
MN
631 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
632 }else{
633 int j;
634 assert(IS_SUB_4X4(sub_mb_type));
635 for(j=0; j<4; j++){
636 int sub_x_offset= x_offset + 2*(j&1);
637 int sub_y_offset= y_offset + (j&2);
638 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
639 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 640 &weight_op[6], &weight_avg[6],
0da71265
MN
641 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
642 }
643 }
644 }
645 }
513fbd8e
LM
646
647 prefetch_motion(h, 1);
0da71265
MN
648}
649
0da71265 650
0da71265 651static void free_tables(H264Context *h){
7978debd 652 int i;
afebe2f7 653 H264Context *hx;
0da71265 654 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
655 av_freep(&h->chroma_pred_mode_table);
656 av_freep(&h->cbp_table);
9e528114
LA
657 av_freep(&h->mvd_table[0]);
658 av_freep(&h->mvd_table[1]);
5ad984c9 659 av_freep(&h->direct_table);
0da71265
MN
660 av_freep(&h->non_zero_count);
661 av_freep(&h->slice_table_base);
662 h->slice_table= NULL;
c988f975 663 av_freep(&h->list_counts);
e5017ab8 664
0da71265
MN
665 av_freep(&h->mb2b_xy);
666 av_freep(&h->mb2b8_xy);
9f2d1b4f 667
6752dd5a 668 for(i = 0; i < MAX_THREADS; i++) {
afebe2f7
669 hx = h->thread_context[i];
670 if(!hx) continue;
671 av_freep(&hx->top_borders[1]);
672 av_freep(&hx->top_borders[0]);
673 av_freep(&hx->s.obmc_scratchpad);
d2d5e067
AS
674 av_freep(&hx->rbsp_buffer[1]);
675 av_freep(&hx->rbsp_buffer[0]);
eda4ea4e
MS
676 hx->rbsp_buffer_size[0] = 0;
677 hx->rbsp_buffer_size[1] = 0;
d2d5e067 678 if (i) av_freep(&h->thread_context[i]);
afebe2f7 679 }
0da71265
MN
680}
681
239ea04c
LM
682static void init_dequant8_coeff_table(H264Context *h){
683 int i,q,x;
548a1c8a 684 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
685 h->dequant8_coeff[0] = h->dequant8_buffer[0];
686 h->dequant8_coeff[1] = h->dequant8_buffer[1];
687
688 for(i=0; i<2; i++ ){
689 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
690 h->dequant8_coeff[1] = h->dequant8_buffer[0];
691 break;
692 }
693
694 for(q=0; q<52; q++){
d9ec210b
DP
695 int shift = div6[q];
696 int idx = rem6[q];
239ea04c 697 for(x=0; x<64; x++)
548a1c8a
LM
698 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
699 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
700 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
701 }
702 }
703}
704
705static void init_dequant4_coeff_table(H264Context *h){
706 int i,j,q,x;
ab2e3e2c 707 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
708 for(i=0; i<6; i++ ){
709 h->dequant4_coeff[i] = h->dequant4_buffer[i];
710 for(j=0; j<i; j++){
711 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
712 h->dequant4_coeff[i] = h->dequant4_buffer[j];
713 break;
714 }
715 }
716 if(j<i)
717 continue;
718
719 for(q=0; q<52; q++){
d9ec210b
DP
720 int shift = div6[q] + 2;
721 int idx = rem6[q];
239ea04c 722 for(x=0; x<16; x++)
ab2e3e2c
LM
723 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
724 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
725 h->pps.scaling_matrix4[i][x]) << shift;
726 }
727 }
728}
729
730static void init_dequant_tables(H264Context *h){
731 int i,x;
732 init_dequant4_coeff_table(h);
733 if(h->pps.transform_8x8_mode)
734 init_dequant8_coeff_table(h);
735 if(h->sps.transform_bypass){
736 for(i=0; i<6; i++)
737 for(x=0; x<16; x++)
738 h->dequant4_coeff[i][0][x] = 1<<6;
739 if(h->pps.transform_8x8_mode)
740 for(i=0; i<2; i++)
741 for(x=0; x<64; x++)
742 h->dequant8_coeff[i][0][x] = 1<<6;
743 }
744}
745
746
903d58f6 747int ff_h264_alloc_tables(H264Context *h){
0da71265 748 MpegEncContext * const s = &h->s;
7bc9090a 749 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 750 int x,y;
0da71265 751
d31dbec3 752 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t), fail)
e5017ab8 753
c988f975 754 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail)
d31dbec3
RP
755 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
756 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
0da71265 757
d31dbec3
RP
758 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
759 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t), fail);
760 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t), fail);
761 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 32*big_mb_num * sizeof(uint8_t) , fail);
c988f975 762 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
e5017ab8 763
b735aeea 764 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 765 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 766
d31dbec3
RP
767 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
768 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
0da71265
MN
769 for(y=0; y<s->mb_height; y++){
770 for(x=0; x<s->mb_width; x++){
7bc9090a 771 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
772 const int b_xy = 4*x + 4*y*h->b_stride;
773 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 774
0da71265
MN
775 h->mb2b_xy [mb_xy]= b_xy;
776 h->mb2b8_xy[mb_xy]= b8_xy;
777 }
778 }
9f2d1b4f 779
9c6221ae
GV
780 s->obmc_scratchpad = NULL;
781
56edbd81
LM
782 if(!h->dequant4_coeff[0])
783 init_dequant_tables(h);
784
0da71265
MN
785 return 0;
786fail:
787 free_tables(h);
788 return -1;
789}
790
afebe2f7
791/**
792 * Mimic alloc_tables(), but for every context thread.
793 */
794static void clone_tables(H264Context *dst, H264Context *src){
795 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
796 dst->non_zero_count = src->non_zero_count;
797 dst->slice_table = src->slice_table;
798 dst->cbp_table = src->cbp_table;
799 dst->mb2b_xy = src->mb2b_xy;
800 dst->mb2b8_xy = src->mb2b8_xy;
801 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
802 dst->mvd_table[0] = src->mvd_table[0];
803 dst->mvd_table[1] = src->mvd_table[1];
804 dst->direct_table = src->direct_table;
fb823b77 805 dst->list_counts = src->list_counts;
afebe2f7 806
afebe2f7
807 dst->s.obmc_scratchpad = NULL;
808 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
809}
810
811/**
812 * Init context
813 * Allocate buffers which are not shared amongst multiple threads.
814 */
815static int context_init(H264Context *h){
d31dbec3
RP
816 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
817 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
afebe2f7 818
afebe2f7
819 return 0;
820fail:
821 return -1; // free_tables will clean up for us
822}
823
98a6fff9 824static av_cold void common_init(H264Context *h){
0da71265 825 MpegEncContext * const s = &h->s;
0da71265
MN
826
827 s->width = s->avctx->width;
828 s->height = s->avctx->height;
829 s->codec_id= s->avctx->codec->id;
115329f1 830
c92a30bb 831 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 832
239ea04c 833 h->dequant_coeff_pps= -1;
9a41c2c7 834 s->unrestricted_mv=1;
0da71265 835 s->decode=1; //FIXME
56edbd81 836
a5805aa9
MN
837 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
838
56edbd81
LM
839 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
840 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
841}
842
903d58f6 843av_cold int ff_h264_decode_init(AVCodecContext *avctx){
0da71265
MN
844 H264Context *h= avctx->priv_data;
845 MpegEncContext * const s = &h->s;
846
3edcacde 847 MPV_decode_defaults(s);
115329f1 848
0da71265
MN
849 s->avctx = avctx;
850 common_init(h);
851
852 s->out_format = FMT_H264;
853 s->workaround_bugs= avctx->workaround_bugs;
854
855 // set defaults
0da71265 856// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 857 s->quarter_sample = 1;
47cd974a 858 if(!avctx->has_b_frames)
0da71265 859 s->low_delay= 1;
7a9dba3c 860
580a7465 861 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
0da71265 862
e1e94902 863 ff_h264_decode_init_vlc();
115329f1 864
26165f99
MR
865 if(avctx->extradata_size > 0 && avctx->extradata &&
866 *(char *)avctx->extradata == 1){
4770b1b4
RT
867 h->is_avc = 1;
868 h->got_avcC = 0;
26165f99
MR
869 } else {
870 h->is_avc = 0;
4770b1b4
RT
871 }
872
afebe2f7 873 h->thread_context[0] = h;
18c7be65 874 h->outputed_poc = INT_MIN;
e4b8f1fa 875 h->prev_poc_msb= 1<<16;
055a6aa7 876 h->x264_build = -1;
9c095463 877 ff_h264_reset_sei(h);
efd8c1f6
MN
878 if(avctx->codec_id == CODEC_ID_H264){
879 if(avctx->ticks_per_frame == 1){
880 s->avctx->time_base.den *=2;
881 }
19df37a8 882 avctx->ticks_per_frame = 2;
efd8c1f6 883 }
0da71265
MN
884 return 0;
885}
886
903d58f6 887int ff_h264_frame_start(H264Context *h){
0da71265
MN
888 MpegEncContext * const s = &h->s;
889 int i;
890
af8aa846
MN
891 if(MPV_frame_start(s, s->avctx) < 0)
892 return -1;
0da71265 893 ff_er_frame_start(s);
3a22d7fa
JD
894 /*
895 * MPV_frame_start uses pict_type to derive key_frame.
896 * This is incorrect for H.264; IDR markings must be used.
1412060e 897 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
898 * See decode_nal_units().
899 */
900 s->current_picture_ptr->key_frame= 0;
c173a088 901 s->current_picture_ptr->mmco_reset= 0;
0da71265
MN
902
903 assert(s->linesize && s->uvlinesize);
904
905 for(i=0; i<16; i++){
906 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 907 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
908 }
909 for(i=0; i<4; i++){
910 h->block_offset[16+i]=
911 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
912 h->block_offset[24+16+i]=
913 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
914 }
915
934b0821
LM
916 /* can't be in alloc_tables because linesize isn't known there.
917 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
918 for(i = 0; i < s->avctx->thread_count; i++)
919 if(!h->thread_context[i]->s.obmc_scratchpad)
920 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
921
922 /* some macroblocks will be accessed before they're available */
afebe2f7 923 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 924 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 925
0da71265 926// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 927
1412060e 928 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
929 // that if we break out due to an error it can be released automatically
930 // in the next MPV_frame_start().
931 // SVQ3 as well as most other codecs have only last/next/current and thus
932 // get released even with set reference, besides SVQ3 and others do not
933 // mark frames as reference later "naturally".
934 if(s->codec_id != CODEC_ID_SVQ3)
935 s->current_picture_ptr->reference= 0;
357282c6
MN
936
937 s->current_picture_ptr->field_poc[0]=
938 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 939 assert(s->current_picture_ptr->long_ref==0);
357282c6 940
af8aa846 941 return 0;
0da71265
MN
942}
943
93cc10fa 944static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e 945 MpegEncContext * const s = &h->s;
0b69d625 946 uint8_t *top_border;
5f7f9719 947 int top_idx = 1;
115329f1 948
53c05b1e
MN
949 src_y -= linesize;
950 src_cb -= uvlinesize;
951 src_cr -= uvlinesize;
952
5f7f9719
MN
953 if(!simple && FRAME_MBAFF){
954 if(s->mb_y&1){
5f7f9719 955 if(!MB_MBAFF){
0b69d625
AS
956 top_border = h->top_borders[0][s->mb_x];
957 AV_COPY128(top_border, src_y + 15*linesize);
49fb20cb 958 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
959 AV_COPY64(top_border+16, src_cb+7*uvlinesize);
960 AV_COPY64(top_border+24, src_cr+7*uvlinesize);
5f7f9719
MN
961 }
962 }
c988f975
MN
963 }else if(MB_MBAFF){
964 top_idx = 0;
965 }else
966 return;
5f7f9719
MN
967 }
968
0b69d625 969 top_border = h->top_borders[top_idx][s->mb_x];
3b66c4c5 970 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 971 // and the line above the bottom macroblock
0b69d625 972 AV_COPY128(top_border, src_y + 16*linesize);
53c05b1e 973
49fb20cb 974 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
0b69d625
AS
975 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
976 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
53c05b1e
MN
977 }
978}
979
93cc10fa 980static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e 981 MpegEncContext * const s = &h->s;
b69378e2
982 int deblock_left;
983 int deblock_top;
984 int mb_xy;
5f7f9719 985 int top_idx = 1;
1e4f1c56
AS
986 uint8_t *top_border_m1;
987 uint8_t *top_border;
5f7f9719
MN
988
989 if(!simple && FRAME_MBAFF){
990 if(s->mb_y&1){
c988f975
MN
991 if(!MB_MBAFF)
992 return;
5f7f9719 993 }else{
5f7f9719
MN
994 top_idx = MB_MBAFF ? 0 : 1;
995 }
5f7f9719 996 }
b69378e2
997
998 if(h->deblocking_filter == 2) {
64514ee8 999 mb_xy = h->mb_xy;
b69378e2
1000 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
1001 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
1002 } else {
1003 deblock_left = (s->mb_x > 0);
6c805007 1004 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 1005 }
53c05b1e
MN
1006
1007 src_y -= linesize + 1;
1008 src_cb -= uvlinesize + 1;
1009 src_cr -= uvlinesize + 1;
1010
1e4f1c56
AS
1011 top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
1012 top_border = h->top_borders[top_idx][s->mb_x];
1013
0b69d625
AS
1014#define XCHG(a,b,xchg)\
1015if (xchg) AV_SWAP64(b,a);\
1016else AV_COPY64(b,a);
d89dc06a 1017
d89dc06a 1018 if(deblock_top){
c988f975 1019 if(deblock_left){
0b69d625 1020 XCHG(top_border_m1+8, src_y -7, 1);
c988f975 1021 }
0b69d625
AS
1022 XCHG(top_border+0, src_y +1, xchg);
1023 XCHG(top_border+8, src_y +9, 1);
cad4368a 1024 if(s->mb_x+1 < s->mb_width){
0b69d625 1025 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
43efd19a 1026 }
53c05b1e 1027 }
53c05b1e 1028
49fb20cb 1029 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 1030 if(deblock_top){
c988f975 1031 if(deblock_left){
0b69d625
AS
1032 XCHG(top_border_m1+16, src_cb -7, 1);
1033 XCHG(top_border_m1+24, src_cr -7, 1);
c988f975 1034 }
0b69d625
AS
1035 XCHG(top_border+16, src_cb+1, 1);
1036 XCHG(top_border+24, src_cr+1, 1);
53c05b1e 1037 }
53c05b1e
MN
1038 }
1039}
1040
5a6a6cc7 1041static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
1042 MpegEncContext * const s = &h->s;
1043 const int mb_x= s->mb_x;
1044 const int mb_y= s->mb_y;
64514ee8 1045 const int mb_xy= h->mb_xy;
0da71265
MN
1046 const int mb_type= s->current_picture.mb_type[mb_xy];
1047 uint8_t *dest_y, *dest_cb, *dest_cr;
1048 int linesize, uvlinesize /*dct_offset*/;
1049 int i;
6867a90b 1050 int *block_offset = &h->block_offset[0];
41e4055b 1051 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed 1052 /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb 1053 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca 1054 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 1055 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 1056
6120a343
MN
1057 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
1058 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
1059 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 1060
a957c27b
LM
1061 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1062 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
1063
c988f975
MN
1064 h->list_counts[mb_xy]= h->list_count;
1065
bd91fee3 1066 if (!simple && MB_FIELD) {
5d18eaad
LM
1067 linesize = h->mb_linesize = s->linesize * 2;
1068 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 1069 block_offset = &h->block_offset[24];
1412060e 1070 if(mb_y&1){ //FIXME move out of this function?
0da71265 1071 dest_y -= s->linesize*15;
6867a90b
LLL
1072 dest_cb-= s->uvlinesize*7;
1073 dest_cr-= s->uvlinesize*7;
0da71265 1074 }
5d18eaad
LM
1075 if(FRAME_MBAFF) {
1076 int list;
3425501d 1077 for(list=0; list<h->list_count; list++){
5d18eaad
LM
1078 if(!USES_LIST(mb_type, list))
1079 continue;
1080 if(IS_16X16(mb_type)){
1081 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 1082 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
1083 }else{
1084 for(i=0; i<16; i+=4){
5d18eaad
LM
1085 int ref = h->ref_cache[list][scan8[i]];
1086 if(ref >= 0)
1710856c 1087 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
1088 }
1089 }
1090 }
1091 }
0da71265 1092 } else {
5d18eaad
LM
1093 linesize = h->mb_linesize = s->linesize;
1094 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
1095// dct_offset = s->linesize * 16;
1096 }
115329f1 1097
bd91fee3 1098 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
1099 for (i=0; i<16; i++) {
1100 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 1101 }
c1708e8d
MN
1102 for (i=0; i<8; i++) {
1103 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
1104 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 1105 }
e7e09b49
LLL
1106 } else {
1107 if(IS_INTRA(mb_type)){
5f7f9719 1108 if(h->deblocking_filter)
93cc10fa 1109 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 1110
49fb20cb 1111 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
1112 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
1113 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 1114 }
0da71265 1115
e7e09b49 1116 if(IS_INTRA4x4(mb_type)){
bd91fee3 1117 if(simple || !s->encoding){
43efd19a 1118 if(IS_8x8DCT(mb_type)){
1eb96035
MN
1119 if(transform_bypass){
1120 idct_dc_add =
1121 idct_add = s->dsp.add_pixels8;
dae006d7 1122 }else{
1eb96035
MN
1123 idct_dc_add = s->dsp.h264_idct8_dc_add;
1124 idct_add = s->dsp.h264_idct8_add;
1125 }
43efd19a
LM
1126 for(i=0; i<16; i+=4){
1127 uint8_t * const ptr= dest_y + block_offset[i];
1128 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
1129 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1130 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
1131 }else{
ac0623b2
MN
1132 const int nnz = h->non_zero_count_cache[ scan8[i] ];
1133 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1134 (h->topright_samples_available<<i)&0x4000, linesize);
1135 if(nnz){
1136 if(nnz == 1 && h->mb[i*16])
1137 idct_dc_add(ptr, h->mb + i*16, linesize);
1138 else
1139 idct_add (ptr, h->mb + i*16, linesize);
1140 }
41e4055b 1141 }
43efd19a 1142 }
1eb96035
MN
1143 }else{
1144 if(transform_bypass){
1145 idct_dc_add =
1146 idct_add = s->dsp.add_pixels4;
1147 }else{
1148 idct_dc_add = s->dsp.h264_idct_dc_add;
1149 idct_add = s->dsp.h264_idct_add;
1150 }
aebb5d6d
MN
1151 for(i=0; i<16; i++){
1152 uint8_t * const ptr= dest_y + block_offset[i];
1153 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 1154
aebb5d6d
MN
1155 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1156 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
1157 }else{
1158 uint8_t *topright;
1159 int nnz, tr;
1160 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
1161 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1162 assert(mb_y || linesize <= block_offset[i]);
1163 if(!topright_avail){
1164 tr= ptr[3 - linesize]*0x01010101;
1165 topright= (uint8_t*) &tr;
1166 }else
1167 topright= ptr + 4 - linesize;
ac0623b2 1168 }else
aebb5d6d
MN
1169 topright= NULL;
1170
1171 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
1172 nnz = h->non_zero_count_cache[ scan8[i] ];
1173 if(nnz){
1174 if(is_h264){
1175 if(nnz == 1 && h->mb[i*16])
1176 idct_dc_add(ptr, h->mb + i*16, linesize);
1177 else
1178 idct_add (ptr, h->mb + i*16, linesize);
1179 }else
881b5b80 1180 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
aebb5d6d 1181 }
ac0623b2 1182 }
41e4055b 1183 }
8b82a956 1184 }
0da71265 1185 }
e7e09b49 1186 }else{
c92a30bb 1187 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 1188 if(is_h264){
36940eca 1189 if(!transform_bypass)
93f0c0a4 1190 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 1191 }else
881b5b80 1192 ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 1193 }
5f7f9719 1194 if(h->deblocking_filter)
93cc10fa 1195 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 1196 }else if(is_h264){
e7e09b49 1197 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
1198 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
1199 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 1200 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 1201 }
e7e09b49
LLL
1202
1203
1204 if(!IS_INTRA4x4(mb_type)){
bd91fee3 1205 if(is_h264){
ef9d1d15 1206 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
1207 if(transform_bypass){
1208 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
1209 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
1210 }else{
1211 for(i=0; i<16; i++){
1212 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 1213 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1214 }
2fd1f0e0
MN
1215 }
1216 }else{
1217 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 1218 }
49c084a7 1219 }else if(h->cbp&15){
2fd1f0e0 1220 if(transform_bypass){
0a8ca22f 1221 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 1222 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 1223 for(i=0; i<16; i+=di){
62bc966f 1224 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 1225 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1226 }
ef9d1d15 1227 }
2fd1f0e0
MN
1228 }else{
1229 if(IS_8x8DCT(mb_type)){
1230 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1231 }else{
1232 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1233 }
1234 }
4704097a 1235 }
e7e09b49
LLL
1236 }else{
1237 for(i=0; i<16; i++){
1238 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 1239 uint8_t * const ptr= dest_y + block_offset[i];
881b5b80 1240 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
e7e09b49 1241 }
4704097a 1242 }
0da71265
MN
1243 }
1244 }
0da71265 1245
49fb20cb 1246 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
1247 uint8_t *dest[2] = {dest_cb, dest_cr};
1248 if(transform_bypass){
96465b90
MN
1249 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
1250 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
1251 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
1252 }else{
c25ac15a 1253 idct_add = s->dsp.add_pixels4;
96465b90
MN
1254 for(i=16; i<16+8; i++){
1255 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1256 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1257 }
1258 }
ef9d1d15 1259 }else{
4691a77d
1260 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1261 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 1262 if(is_h264){
c25ac15a
MN
1263 idct_add = s->dsp.h264_idct_add;
1264 idct_dc_add = s->dsp.h264_idct_dc_add;
ac0623b2
MN
1265 for(i=16; i<16+8; i++){
1266 if(h->non_zero_count_cache[ scan8[i] ])
1267 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1268 else if(h->mb[i*16])
1269 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1270 }
aebb5d6d
MN
1271 }else{
1272 for(i=16; i<16+8; i++){
1273 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
1274 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
881b5b80 1275 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2);
aebb5d6d 1276 }
e7e09b49 1277 }
4704097a 1278 }
0da71265
MN
1279 }
1280 }
1281 }
c212fb0c
MN
1282 if(h->cbp || IS_INTRA(mb_type))
1283 s->dsp.clear_blocks(h->mb);
0da71265
MN
1284}
1285
0da71265 1286/**
bd91fee3
AS
1287 * Process a macroblock; this case avoids checks for expensive uncommon cases.
1288 */
1289static void hl_decode_mb_simple(H264Context *h){
1290 hl_decode_mb_internal(h, 1);
1291}
1292
1293/**
1294 * Process a macroblock; this handles edge cases, such as interlacing.
1295 */
1296static void av_noinline hl_decode_mb_complex(H264Context *h){
1297 hl_decode_mb_internal(h, 0);
1298}
1299
903d58f6 1300void ff_h264_hl_decode_mb(H264Context *h){
bd91fee3 1301 MpegEncContext * const s = &h->s;
64514ee8 1302 const int mb_xy= h->mb_xy;
bd91fee3 1303 const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb 1304 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 1305
bd91fee3
AS
1306 if (is_complex)
1307 hl_decode_mb_complex(h);
1308 else hl_decode_mb_simple(h);
1309}
1310
0da71265
MN
1311static int pred_weight_table(H264Context *h){
1312 MpegEncContext * const s = &h->s;
1313 int list, i;
9f2d1b4f 1314 int luma_def, chroma_def;
115329f1 1315
9f2d1b4f
LM
1316 h->use_weight= 0;
1317 h->use_weight_chroma= 0;
0da71265
MN
1318 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
1319 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
1320 luma_def = 1<<h->luma_log2_weight_denom;
1321 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
1322
1323 for(list=0; list<2; list++){
cb99c652
GB
1324 h->luma_weight_flag[list] = 0;
1325 h->chroma_weight_flag[list] = 0;
0da71265
MN
1326 for(i=0; i<h->ref_count[list]; i++){
1327 int luma_weight_flag, chroma_weight_flag;
115329f1 1328
0da71265
MN
1329 luma_weight_flag= get_bits1(&s->gb);
1330 if(luma_weight_flag){
1331 h->luma_weight[list][i]= get_se_golomb(&s->gb);
1332 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f 1333 if( h->luma_weight[list][i] != luma_def
cb99c652 1334 || h->luma_offset[list][i] != 0) {
9f2d1b4f 1335 h->use_weight= 1;
cb99c652
GB
1336 h->luma_weight_flag[list]= 1;
1337 }
9f2d1b4f
LM
1338 }else{
1339 h->luma_weight[list][i]= luma_def;
1340 h->luma_offset[list][i]= 0;
0da71265
MN
1341 }
1342
0af6967e 1343 if(CHROMA){
fef744d4
MN
1344 chroma_weight_flag= get_bits1(&s->gb);
1345 if(chroma_weight_flag){
1346 int j;
1347 for(j=0; j<2; j++){
1348 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
1349 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
1350 if( h->chroma_weight[list][i][j] != chroma_def
cb99c652 1351 || h->chroma_offset[list][i][j] != 0) {
fef744d4 1352 h->use_weight_chroma= 1;
cb99c652
GB
1353 h->chroma_weight_flag[list]= 1;
1354 }
fef744d4
MN
1355 }
1356 }else{
1357 int j;
1358 for(j=0; j<2; j++){
1359 h->chroma_weight[list][i][j]= chroma_def;
1360 h->chroma_offset[list][i][j]= 0;
1361 }
0da71265
MN
1362 }
1363 }
1364 }
9f5c1037 1365 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 1366 }
9f2d1b4f 1367 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
1368 return 0;
1369}
1370
9f2d1b4f
LM
1371static void implicit_weight_table(H264Context *h){
1372 MpegEncContext * const s = &h->s;
cb99c652 1373 int ref0, ref1, i;
9f2d1b4f
LM
1374 int cur_poc = s->current_picture_ptr->poc;
1375
ce09f927
GB
1376 for (i = 0; i < 2; i++) {
1377 h->luma_weight_flag[i] = 0;
1378 h->chroma_weight_flag[i] = 0;
1379 }
1380
9f2d1b4f
LM
1381 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
1382 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
1383 h->use_weight= 0;
1384 h->use_weight_chroma= 0;
1385 return;
1386 }
1387
1388 h->use_weight= 2;
1389 h->use_weight_chroma= 2;
1390 h->luma_log2_weight_denom= 5;
1391 h->chroma_log2_weight_denom= 5;
1392
9f2d1b4f
LM
1393 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
1394 int poc0 = h->ref_list[0][ref0].poc;
1395 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 1396 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 1397 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 1398 if(td){
f66e4f5f 1399 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 1400 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 1401 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
1402 if(dist_scale_factor < -64 || dist_scale_factor > 128)
1403 h->implicit_weight[ref0][ref1] = 32;
1404 else
1405 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
1406 }else
1407 h->implicit_weight[ref0][ref1] = 32;
1408 }
1409 }
1410}
1411
8fd57a66 1412/**
5175b937 1413 * instantaneous decoder refresh.
0da71265
MN
1414 */
1415static void idr(H264Context *h){
ea6f00c4 1416 ff_h264_remove_all_refs(h);
a149c1a5 1417 h->prev_frame_num= 0;
80f8e035
MN
1418 h->prev_frame_num_offset= 0;
1419 h->prev_poc_msb=
1420 h->prev_poc_lsb= 0;
0da71265
MN
1421}
1422
7c33ad19
LM
1423/* forget old pics after a seek */
1424static void flush_dpb(AVCodecContext *avctx){
1425 H264Context *h= avctx->priv_data;
1426 int i;
64b9d48f 1427 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
1428 if(h->delayed_pic[i])
1429 h->delayed_pic[i]->reference= 0;
7c33ad19 1430 h->delayed_pic[i]= NULL;
285b570f 1431 }
df8a7dff 1432 h->outputed_poc= INT_MIN;
b19d493f 1433 h->prev_interlaced_frame = 1;
7c33ad19 1434 idr(h);
ca159196
MR
1435 if(h->s.current_picture_ptr)
1436 h->s.current_picture_ptr->reference= 0;
12d96de3 1437 h->s.first_field= 0;
9c095463 1438 ff_h264_reset_sei(h);
e240f898 1439 ff_mpeg_flush(avctx);
7c33ad19
LM
1440}
1441
0da71265
MN
1442static int init_poc(H264Context *h){
1443 MpegEncContext * const s = &h->s;
1444 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
1445 int field_poc[2];
357282c6 1446 Picture *cur = s->current_picture_ptr;
0da71265 1447
b78a6baa 1448 h->frame_num_offset= h->prev_frame_num_offset;
5710b371 1449 if(h->frame_num < h->prev_frame_num)
b78a6baa 1450 h->frame_num_offset += max_frame_num;
0da71265
MN
1451
1452 if(h->sps.poc_type==0){
1453 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
1454
1455 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
1456 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
1457 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
1458 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
1459 else
1460 h->poc_msb = h->prev_poc_msb;
1461//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
115329f1 1462 field_poc[0] =
0da71265 1463 field_poc[1] = h->poc_msb + h->poc_lsb;
115329f1 1464 if(s->picture_structure == PICT_FRAME)
0da71265
MN
1465 field_poc[1] += h->delta_poc_bottom;
1466 }else if(h->sps.poc_type==1){
1467 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
1468 int i;
1469
1470 if(h->sps.poc_cycle_length != 0)
1471 abs_frame_num = h->frame_num_offset + h->frame_num;
1472 else
1473 abs_frame_num = 0;
1474
1475 if(h->nal_ref_idc==0 && abs_frame_num > 0)
1476 abs_frame_num--;
115329f1 1477
0da71265
MN
1478 expected_delta_per_poc_cycle = 0;
1479 for(i=0; i < h->sps.poc_cycle_length; i++)
1480 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
1481
1482 if(abs_frame_num > 0){
1483 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
1484 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
1485
1486 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
1487 for(i = 0; i <= frame_num_in_poc_cycle; i++)
1488 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
1489 } else
1490 expectedpoc = 0;
1491
115329f1 1492 if(h->nal_ref_idc == 0)
0da71265 1493 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
115329f1 1494
0da71265
MN
1495 field_poc[0] = expectedpoc + h->delta_poc[0];
1496 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
1497
1498 if(s->picture_structure == PICT_FRAME)
1499 field_poc[1] += h->delta_poc[1];
1500 }else{
b78a6baa 1501 int poc= 2*(h->frame_num_offset + h->frame_num);
5710b371 1502
b78a6baa
MN
1503 if(!h->nal_ref_idc)
1504 poc--;
5710b371 1505
0da71265
MN
1506 field_poc[0]= poc;
1507 field_poc[1]= poc;
1508 }
115329f1 1509
357282c6 1510 if(s->picture_structure != PICT_BOTTOM_FIELD)
0da71265 1511 s->current_picture_ptr->field_poc[0]= field_poc[0];
357282c6 1512 if(s->picture_structure != PICT_TOP_FIELD)
0da71265 1513 s->current_picture_ptr->field_poc[1]= field_poc[1];
357282c6 1514 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
0da71265
MN
1515
1516 return 0;
1517}
1518
b41c1db3
1519
1520/**
1521 * initialize scan tables
1522 */
1523static void init_scan_tables(H264Context *h){
1524 MpegEncContext * const s = &h->s;
1525 int i;
1526 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
1527 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
1528 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
1529 }else{
1530 for(i=0; i<16; i++){
1531#define T(x) (x>>2) | ((x<<2) & 0xF)
1532 h->zigzag_scan[i] = T(zigzag_scan[i]);
1533 h-> field_scan[i] = T( field_scan[i]);
1534#undef T
1535 }
1536 }
1537 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
45beb850 1538 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
b41c1db3
1539 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
1540 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
1541 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
1542 }else{
1543 for(i=0; i<64; i++){
1544#define T(x) (x>>3) | ((x&7)<<3)
45beb850 1545 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
b41c1db3
1546 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
1547 h->field_scan8x8[i] = T(field_scan8x8[i]);
1548 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
1549#undef T
1550 }
1551 }
1552 if(h->sps.transform_bypass){ //FIXME same ugly
1553 h->zigzag_scan_q0 = zigzag_scan;
45beb850 1554 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
b41c1db3
1555 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
1556 h->field_scan_q0 = field_scan;
1557 h->field_scan8x8_q0 = field_scan8x8;
1558 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
1559 }else{
1560 h->zigzag_scan_q0 = h->zigzag_scan;
1561 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
1562 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
1563 h->field_scan_q0 = h->field_scan;
1564 h->field_scan8x8_q0 = h->field_scan8x8;
1565 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
1566 }
1567}
afebe2f7 1568
256299d3
MN
1569static void field_end(H264Context *h){
1570 MpegEncContext * const s = &h->s;
1571 AVCodecContext * const avctx= s->avctx;
1572 s->mb_y= 0;
1573
1574 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
1575 s->current_picture_ptr->pict_type= s->pict_type;
1576
1577 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1578 ff_vdpau_h264_set_reference_frames(s);
1579
1580 if(!s->dropable) {
ea6f00c4 1581 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
256299d3
MN
1582 h->prev_poc_msb= h->poc_msb;
1583 h->prev_poc_lsb= h->poc_lsb;
1584 }
1585 h->prev_frame_num_offset= h->frame_num_offset;
1586 h->prev_frame_num= h->frame_num;
1587
1588 if (avctx->hwaccel) {
1589 if (avctx->hwaccel->end_frame(avctx) < 0)
1590 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
1591 }
1592
1593 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1594 ff_vdpau_h264_picture_complete(s);
1595
1596 /*
1597 * FIXME: Error handling code does not seem to support interlaced
1598 * when slices span multiple rows
1599 * The ff_er_add_slice calls don't work right for bottom
1600 * fields; they cause massive erroneous error concealing
1601 * Error marking covers both fields (top and bottom).
1602 * This causes a mismatched s->error_count
1603 * and a bad error table. Further, the error count goes to
1604 * INT_MAX when called for bottom field, because mb_y is
1605 * past end by one (callers fault) and resync_mb_y != 0
1606 * causes problems for the first MB line, too.
1607 */
1608 if (!FIELD_PICTURE)
1609 ff_er_frame_end(s);
1610
1611 MPV_frame_end(s);
d225a1e2
MN
1612
1613 h->current_slice=0;
256299d3
MN
1614}
1615
afebe2f7
1616/**
1617 * Replicates H264 "master" context to thread contexts.
1618 */
1619static void clone_slice(H264Context *dst, H264Context *src)
1620{
1621 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
1622 dst->s.current_picture_ptr = src->s.current_picture_ptr;
1623 dst->s.current_picture = src->s.current_picture;
1624 dst->s.linesize = src->s.linesize;
1625 dst->s.uvlinesize = src->s.uvlinesize;
12d96de3 1626 dst->s.first_field = src->s.first_field;
afebe2f7
1627
1628 dst->prev_poc_msb = src->prev_poc_msb;
1629 dst->prev_poc_lsb = src->prev_poc_lsb;
1630 dst->prev_frame_num_offset = src->prev_frame_num_offset;
1631 dst->prev_frame_num = src->prev_frame_num;
1632 dst->short_ref_count = src->short_ref_count;
1633
1634 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
1635 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
1636 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
1637 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
50c21814
1638
1639 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
1640 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
afebe2f7
1641}
1642
0da71265
MN
1643/**
1644 * decodes a slice header.
9c852bcf 1645 * This will also call MPV_common_init() and frame_start() as needed.
afebe2f7
1646 *
1647 * @param h h264context
1648 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
1649 *
d9526386 1650 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
0da71265 1651 */
afebe2f7 1652static int decode_slice_header(H264Context *h, H264Context *h0){
0da71265 1653 MpegEncContext * const s = &h->s;
12d96de3 1654 MpegEncContext * const s0 = &h0->s;
88e7a4d1 1655 unsigned int first_mb_in_slice;
ac658be5 1656 unsigned int pps_id;
0da71265 1657 int num_ref_idx_active_override_flag;
41f5c62f 1658 unsigned int slice_type, tmp, i, j;
0bf79634 1659 int default_ref_list_done = 0;
12d96de3 1660 int last_pic_structure;
0da71265 1661
2f944356 1662 s->dropable= h->nal_ref_idc == 0;
0da71265 1663
cf653d08
JD
1664 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
1665 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
1666 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
1667 }else{
1668 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
1669 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
1670 }
1671
0da71265
MN
1672 first_mb_in_slice= get_ue_golomb(&s->gb);
1673
d225a1e2
MN
1674 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
1675 if(h0->current_slice && FIELD_PICTURE){
1676 field_end(h);
1677 }
1678
afebe2f7 1679 h0->current_slice = 0;
12d96de3 1680 if (!s0->first_field)
f6e3c460 1681 s->current_picture_ptr= NULL;
66a4b2c1
MN
1682 }
1683
9963b332 1684 slice_type= get_ue_golomb_31(&s->gb);
0bf79634 1685 if(slice_type > 9){
9b879566 1686 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
5175b937 1687 return -1;
0da71265 1688 }
0bf79634
LLL
1689 if(slice_type > 4){
1690 slice_type -= 5;
0da71265
MN
1691 h->slice_type_fixed=1;
1692 }else
1693 h->slice_type_fixed=0;
115329f1 1694
ee2a957f 1695 slice_type= golomb_to_pict_type[ slice_type ];
9701840b 1696 if (slice_type == FF_I_TYPE
afebe2f7 1697 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
0bf79634
LLL
1698 default_ref_list_done = 1;
1699 }
1700 h->slice_type= slice_type;
e3e6f18f 1701 h->slice_type_nos= slice_type & 3;
0bf79634 1702
1412060e 1703 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
115329f1 1704
0da71265 1705 pps_id= get_ue_golomb(&s->gb);
ac658be5 1706 if(pps_id>=MAX_PPS_COUNT){
9b879566 1707 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
0da71265
MN
1708 return -1;
1709 }
afebe2f7 1710 if(!h0->pps_buffers[pps_id]) {
a0f80050 1711 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
8b92b792
MN
1712 return -1;
1713 }
afebe2f7 1714 h->pps= *h0->pps_buffers[pps_id];
8b92b792 1715
afebe2f7 1716 if(!h0->sps_buffers[h->pps.sps_id]) {
a0f80050 1717 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
8b92b792
MN
1718 return -1;
1719 }
afebe2f7 1720 h->sps = *h0->sps_buffers[h->pps.sps_id];
239ea04c 1721
50c21814 1722 if(h == h0 && h->dequant_coeff_pps != pps_id){
50eaa857 1723 h->dequant_coeff_pps = pps_id;
239ea04c
LM
1724 init_dequant_tables(h);
1725 }
115329f1 1726
0da71265 1727 s->mb_width= h->sps.mb_width;
6867a90b 1728 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
115329f1 1729
bf4665ee
DM
1730 h->b_stride= s->mb_width*4;
1731 h->b8_stride= s->mb_width*2;
0da71265 1732
faf3dfb9 1733 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
0da71265 1734 if(h->sps.frame_mbs_only_flag)
faf3dfb9 1735 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
0da71265 1736 else
faf3dfb9 1737 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
115329f1
DB
1738
1739 if (s->context_initialized
5ff85f1d 1740 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
afebe2f7
1741 if(h != h0)
1742 return -1; // width / height changed during parallelized decoding
0da71265 1743 free_tables(h);
ff7f75e1 1744 flush_dpb(s->avctx);
0da71265
MN
1745 MPV_common_end(s);
1746 }
1747 if (!s->context_initialized) {
afebe2f7
1748 if(h != h0)
1749 return -1; // we cant (re-)initialize context during parallel decoding
f3bdc3da
RD
1750
1751 avcodec_set_dimensions(s->avctx, s->width, s->height);
1752 s->avctx->sample_aspect_ratio= h->sps.sar;
1753 if(!s->avctx->sample_aspect_ratio.den)
1754 s->avctx->sample_aspect_ratio.den = 1;
1755
c4dffe7e
DC
1756 if(h->sps.video_signal_type_present_flag){
1757 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
1758 if(h->sps.colour_description_present_flag){
1759 s->avctx->color_primaries = h->sps.color_primaries;
1760 s->avctx->color_trc = h->sps.color_trc;
1761 s->avctx->colorspace = h->sps.colorspace;
1762 }
1763 }
1764
f3bdc3da 1765 if(h->sps.timing_info_present_flag){
3102d180 1766 int64_t den= h->sps.time_scale;
055a6aa7 1767 if(h->x264_build < 44U)
3102d180 1768 den *= 2;
f3bdc3da 1769 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3102d180 1770 h->sps.num_units_in_tick, den, 1<<30);
f3bdc3da
RD
1771 }
1772 s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts);
1773 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
1774
0da71265
MN
1775 if (MPV_common_init(s) < 0)
1776 return -1;
12d96de3 1777 s->first_field = 0;
b19d493f 1778 h->prev_interlaced_frame = 1;
115329f1 1779
b41c1db3 1780 init_scan_tables(h);
903d58f6 1781 ff_h264_alloc_tables(h);
0da71265 1782
afebe2f7
1783 for(i = 1; i < s->avctx->thread_count; i++) {
1784 H264Context *c;
1785 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
79db7ac6 1786 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
afebe2f7
1787 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
1788 c->sps = h->sps;
1789 c->pps = h->pps;
1790 init_scan_tables(c);
1791 clone_tables(c, h);
1792 }
1793
1794 for(i = 0; i < s->avctx->thread_count; i++)
1795 if(context_init(h->thread_context[i]) < 0)
1796 return -1;
0da71265
MN
1797 }
1798
0da71265
MN
1799 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
1800
5d18eaad 1801 h->mb_mbaff = 0;
6ba71fc4 1802 h->mb_aff_frame = 0;
12d96de3 1803 last_pic_structure = s0->picture_structure;
0da71265
MN
1804 if(h->sps.frame_mbs_only_flag){
1805 s->picture_structure= PICT_FRAME;
1806 }else{
6ba71fc4 1807 if(get_bits1(&s->gb)) { //field_pic_flag
0da71265 1808 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
6ba71fc4 1809 } else {
0da71265 1810 s->picture_structure= PICT_FRAME;
6ba71fc4 1811 h->mb_aff_frame = h->sps.mb_aff;
6867a90b 1812 }
0da71265 1813 }
44e9dcf1 1814 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
2ddcf84b
JD
1815
1816 if(h0->current_slice == 0){
26b86e47
MN
1817 while(h->frame_num != h->prev_frame_num &&
1818 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
1819 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
903d58f6 1820 if (ff_h264_frame_start(h) < 0)
66e6038c 1821 return -1;
26b86e47
MN
1822 h->prev_frame_num++;
1823 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
1824 s->current_picture_ptr->frame_num= h->prev_frame_num;
ea6f00c4 1825 ff_h264_execute_ref_pic_marking(h, NULL, 0);
26b86e47
MN
1826 }
1827
12d96de3
JD
1828 /* See if we have a decoded first field looking for a pair... */
1829 if (s0->first_field) {
1830 assert(s0->current_picture_ptr);
1831 assert(s0->current_picture_ptr->data[0]);
1832 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
1833
1834 /* figure out if we have a complementary field pair */
1835 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
1836 /*
1837 * Previous field is unmatched. Don't display it, but let it
1838 * remain for reference if marked as such.
1839 */
1840 s0->current_picture_ptr = NULL;
1841 s0->first_field = FIELD_PICTURE;
1842
1843 } else {
1844 if (h->nal_ref_idc &&
1845 s0->current_picture_ptr->reference &&
1846 s0->current_picture_ptr->frame_num != h->frame_num) {
1847 /*
1848 * This and previous field were reference, but had
1849 * different frame_nums. Consider this field first in
1850 * pair. Throw away previous field except for reference
1851 * purposes.
1852 */
1853 s0->first_field = 1;
1854 s0->current_picture_ptr = NULL;
1855
1856 } else {
1857 /* Second field in complementary pair */
1858 s0->first_field = 0;
1859 }
1860 }
1861
1862 } else {
1863 /* Frame or first field in a potentially complementary pair */
1864 assert(!s0->current_picture_ptr);
1865 s0->first_field = FIELD_PICTURE;
1866 }
1867
903d58f6 1868 if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) {
12d96de3 1869 s0->first_field = 0;
2ddcf84b 1870 return -1;
12d96de3 1871 }
2ddcf84b
JD
1872 }
1873 if(h != h0)
1874 clone_slice(h, h0);
1875
1876 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
1877
88e7a4d1 1878 assert(s->mb_num == s->mb_width * s->mb_height);
f3e53d9f 1879 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
88e7a4d1
MN
1880 first_mb_in_slice >= s->mb_num){
1881 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
6b53b87e
MN
1882 return -1;
1883 }
88e7a4d1 1884 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
f3e53d9f
JD
1885 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
1886 if (s->picture_structure == PICT_BOTTOM_FIELD)
1887 s->resync_mb_y = s->mb_y = s->mb_y + 1;
88e7a4d1 1888 assert(s->mb_y < s->mb_height);
115329f1 1889
0da71265
MN
1890 if(s->picture_structure==PICT_FRAME){
1891 h->curr_pic_num= h->frame_num;
1892 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
1893 }else{
f57e2af6 1894 h->curr_pic_num= 2*h->frame_num + 1;
0da71265
MN
1895 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
1896 }
115329f1 1897
0da71265 1898 if(h->nal_unit_type == NAL_IDR_SLICE){
1df1df0b 1899 get_ue_golomb(&s->gb); /* idr_pic_id */
0da71265 1900 }
115329f1 1901
0da71265
MN
1902 if(h->sps.poc_type==0){
1903 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
115329f1 1904
0da71265
MN
1905 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
1906 h->delta_poc_bottom= get_se_golomb(&s->gb);
1907 }
1908 }
115329f1 1909
0da71265
MN
1910 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
1911 h->delta_poc[0]= get_se_golomb(&s->gb);
115329f1 1912
0da71265
MN
1913 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
1914 h->delta_poc[1]= get_se_golomb(&s->gb);
1915 }
115329f1 1916
0da71265 1917 init_poc(h);
115329f1 1918
0da71265
MN
1919 if(h->pps.redundant_pic_cnt_present){
1920 h->redundant_pic_count= get_ue_golomb(&s->gb);
1921 }
1922
1412060e 1923 //set defaults, might be overridden a few lines later
0da71265
MN
1924 h->ref_count[0]= h->pps.ref_count[0];
1925 h->ref_count[1]= h->pps.ref_count[1];
1926
e3e6f18f 1927 if(h->slice_type_nos != FF_I_TYPE){
9f5c1037 1928 if(h->slice_type_nos == FF_B_TYPE){
0da71265
MN
1929 h->direct_spatial_mv_pred= get_bits1(&s->gb);
1930 }
1931 num_ref_idx_active_override_flag= get_bits1(&s->gb);
115329f1 1932
0da71265
MN
1933 if(num_ref_idx_active_override_flag){
1934 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
9f5c1037 1935 if(h->slice_type_nos==FF_B_TYPE)
0da71265
MN
1936 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
1937
187696fa 1938 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
9b879566 1939 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
88e7a4d1 1940 h->ref_count[0]= h->ref_count[1]= 1;
0da71265
MN
1941 return -1;
1942 }
1943 }
9f5c1037 1944 if(h->slice_type_nos == FF_B_TYPE)
187696fa
MN
1945 h->list_count= 2;
1946 else
1947 h->list_count= 1;
1948 }else
1949 h->list_count= 0;
0da71265 1950
0bf79634 1951 if(!default_ref_list_done){
ea6f00c4 1952 ff_h264_fill_default_ref_list(h);
0da71265
MN
1953 }
1954
ea6f00c4 1955 if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0)
806bb93f 1956 return -1;
0da71265 1957
07dff5c7
MN
1958 if(h->slice_type_nos!=FF_I_TYPE){
1959 s->last_picture_ptr= &h->ref_list[0][0];
8d2fc163 1960 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
07dff5c7
MN
1961 }
1962 if(h->slice_type_nos==FF_B_TYPE){
1963 s->next_picture_ptr= &h->ref_list[1][0];
8d2fc163 1964 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
07dff5c7
MN
1965 }
1966
932f396f 1967 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
9f5c1037 1968 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
0da71265 1969 pred_weight_table(h);
9f5c1037 1970 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
9f2d1b4f 1971 implicit_weight_table(h);
cb99c652 1972 else {
9f2d1b4f 1973 h->use_weight = 0;
cb99c652
GB
1974 for (i = 0; i < 2; i++) {
1975 h->luma_weight_flag[i] = 0;
1976 h->chroma_weight_flag[i] = 0;
1977 }
1978 }
115329f1 1979
2ddcf84b 1980 if(h->nal_ref_idc)
ea6f00c4 1981 ff_h264_decode_ref_pic_marking(h0, &s->gb);
0da71265 1982
5d18eaad 1983 if(FRAME_MBAFF)
ea6f00c4 1984 ff_h264_fill_mbaff_ref_list(h);
5d18eaad 1985
8f56e219 1986 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
943f69a6
MN
1987 ff_h264_direct_dist_scale_factor(h);
1988 ff_h264_direct_ref_list_init(h);
8f56e219 1989
e3e6f18f 1990 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
9963b332 1991 tmp = get_ue_golomb_31(&s->gb);
88e7a4d1
MN
1992 if(tmp > 2){
1993 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
1994 return -1;
1995 }
1996 h->cabac_init_idc= tmp;
1997 }
e5017ab8
LA
1998
1999 h->last_qscale_diff = 0;
88e7a4d1
MN
2000 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
2001 if(tmp>51){
2002 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3ebc7e04
MN
2003 return -1;
2004 }
88e7a4d1 2005 s->qscale= tmp;
4691a77d
2006 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2007 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
0da71265 2008 //FIXME qscale / qp ... stuff
9701840b 2009 if(h->slice_type == FF_SP_TYPE){
1df1df0b 2010 get_bits1(&s->gb); /* sp_for_switch_flag */
0da71265 2011 }
9701840b 2012 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
1df1df0b 2013 get_se_golomb(&s->gb); /* slice_qs_delta */
0da71265
MN
2014 }
2015
53c05b1e 2016 h->deblocking_filter = 1;
0c32e19d
MN
2017 h->slice_alpha_c0_offset = 52;
2018 h->slice_beta_offset = 52;
0da71265 2019 if( h->pps.deblocking_filter_parameters_present ) {
9963b332 2020 tmp= get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2021 if(tmp > 2){
2022 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
2023 return -1;
2024 }
2025 h->deblocking_filter= tmp;
115329f1 2026 if(h->deblocking_filter < 2)
53c05b1e
MN
2027 h->deblocking_filter^= 1; // 1<->0
2028
2029 if( h->deblocking_filter ) {
0c32e19d
MN
2030 h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
2031 h->slice_beta_offset += get_se_golomb(&s->gb) << 1;
2032 if( h->slice_alpha_c0_offset > 104U
2033 || h->slice_beta_offset > 104U){
2034 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
2035 return -1;
2036 }
0da71265 2037 }
980a82b7 2038 }
afebe2f7 2039
61858a76 2040 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4b30289e 2041 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
9f5c1037 2042 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
61858a76
RD
2043 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
2044 h->deblocking_filter= 0;
2045
afebe2f7 2046 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
ec970c21
2047 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
2048 /* Cheat slightly for speed:
5d81d641 2049 Do not bother to deblock across slices. */
ec970c21
2050 h->deblocking_filter = 2;
2051 } else {
7ae94d52
2052 h0->max_contexts = 1;
2053 if(!h0->single_decode_warning) {
2054 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
2055 h0->single_decode_warning = 1;
2056 }
2057 if(h != h0)
2058 return 1; // deblocking switched inside frame
ec970c21 2059 }
afebe2f7 2060 }
0c32e19d 2061 h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
afebe2f7 2062
0da71265
MN
2063#if 0 //FMO
2064 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
2065 slice_group_change_cycle= get_bits(&s->gb, ?);
2066#endif
2067
afebe2f7
2068 h0->last_slice_type = slice_type;
2069 h->slice_num = ++h0->current_slice;
b735aeea
MN
2070 if(h->slice_num >= MAX_SLICES){
2071 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
2072 }
5175b937 2073
c32867b5 2074 for(j=0; j<2; j++){
6d7e6b26 2075 int id_list[16];
b735aeea 2076 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
6d7e6b26
MN
2077 for(i=0; i<16; i++){
2078 id_list[i]= 60;
2079 if(h->ref_list[j][i].data[0]){
2080 int k;
2081 uint8_t *base= h->ref_list[j][i].base[0];
2082 for(k=0; k<h->short_ref_count; k++)
2083 if(h->short_ref[k]->base[0] == base){
2084 id_list[i]= k;
2085 break;
2086 }
2087 for(k=0; k<h->long_ref_count; k++)
2088 if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
2089 id_list[i]= h->short_ref_count + k;
2090 break;
2091 }
2092 }
2093 }
2094
c32867b5
MN
2095 ref2frm[0]=
2096 ref2frm[1]= -1;
d50cdd82 2097 for(i=0; i<16; i++)
6d7e6b26 2098 ref2frm[i+2]= 4*id_list[i]
c32867b5 2099 +(h->ref_list[j][i].reference&3);
d50cdd82
MN
2100 ref2frm[18+0]=
2101 ref2frm[18+1]= -1;
2102 for(i=16; i<48; i++)
6d7e6b26 2103 ref2frm[i+4]= 4*id_list[(i-16)>>1]
d50cdd82 2104 +(h->ref_list[j][i].reference&3);
c32867b5
MN
2105 }
2106
5d18eaad 2107 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
8a11a969 2108 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
5d18eaad 2109
802e9146
MN
2110 s->avctx->refs= h->sps.ref_frame_count;
2111
0da71265 2112 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
49573a87 2113 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
6867a90b
LLL
2114 h->slice_num,
2115 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
115329f1 2116 first_mb_in_slice,
49573a87 2117 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
0da71265
MN
2118 pps_id, h->frame_num,
2119 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
2120 h->ref_count[0], h->ref_count[1],
2121 s->qscale,
0c32e19d 2122 h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
9f2d1b4f 2123 h->use_weight,
4806b922
MN
2124 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
2125 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
0da71265
MN
2126 );
2127 }
2128
2129 return 0;
2130}
2131
0dc343d4 2132int ff_h264_get_slice_type(const H264Context *h)
75dd6938
LA
2133{
2134 switch (h->slice_type) {
2135 case FF_P_TYPE: return 0;
2136 case FF_B_TYPE: return 1;
2137 case FF_I_TYPE: return 2;
2138 case FF_SP_TYPE: return 3;
2139 case FF_SI_TYPE: return 4;
2140 default: return -1;
2141 }
2142}
2143
c988f975
MN
2144static void loop_filter(H264Context *h){
2145 MpegEncContext * const s = &h->s;
2146 uint8_t *dest_y, *dest_cb, *dest_cr;
2147 int linesize, uvlinesize, mb_x, mb_y;
2148 const int end_mb_y= s->mb_y + FRAME_MBAFF;
2149 const int old_slice_type= h->slice_type;
2150
2151 if(h->deblocking_filter) {
2152 for(mb_x= 0; mb_x<s->mb_width; mb_x++){
2153 for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
78998bf2 2154 int mb_xy, mb_type;
c988f975
MN
2155 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
2156 h->slice_num= h->slice_table[mb_xy];
2157 mb_type= s->current_picture.mb_type[mb_xy];
2158 h->list_count= h->list_counts[mb_xy];
c988f975
MN
2159
2160 if(FRAME_MBAFF)
2161 h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
2162
c988f975
MN
2163 s->mb_x= mb_x;
2164 s->mb_y= mb_y;
2165 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2166 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2167 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2168 //FIXME simplify above
2169
2170 if (MB_FIELD) {
2171 linesize = h->mb_linesize = s->linesize * 2;
2172 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2173 if(mb_y&1){ //FIXME move out of this function?
2174 dest_y -= s->linesize*15;
2175 dest_cb-= s->uvlinesize*7;
2176 dest_cr-= s->uvlinesize*7;
2177 }
2178 } else {
2179 linesize = h->mb_linesize = s->linesize;
2180 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2181 }
77d40dce 2182 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
aaa995d7 2183 if(fill_filter_caches(h, mb_type))
44a5e7b6 2184 continue;
c988f975
MN
2185 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2186 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2187
77d40dce 2188 if (FRAME_MBAFF) {
c988f975
MN
2189 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2190 } else {
2191 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2192 }
2193 }
2194 }
2195 }
2196 h->slice_type= old_slice_type;
2197 s->mb_x= 0;
2198 s->mb_y= end_mb_y - FRAME_MBAFF;
f4b8b825
MN
2199 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2200 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
c988f975
MN
2201}
2202
3a84713a
RS
2203static int decode_slice(struct AVCodecContext *avctx, void *arg){
2204 H264Context *h = *(void**)arg;
0da71265
MN
2205 MpegEncContext * const s = &h->s;
2206 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
2207
2208 s->mb_skip_run= -1;
0da71265 2209
89db0bae 2210 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
5317c95b 2211 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
89db0bae 2212
e5017ab8 2213 if( h->pps.cabac ) {
e5017ab8
LA
2214 /* realign */
2215 align_get_bits( &s->gb );
2216
2217 /* init cabac */
d61c4e73 2218 ff_init_cabac_states( &h->cabac);
e5017ab8
LA
2219 ff_init_cabac_decoder( &h->cabac,
2220 s->gb.buffer + get_bits_count(&s->gb)/8,
6e44ba15 2221 (get_bits_left(&s->gb) + 7)/8);
cc51b282
MN
2222
2223 ff_h264_init_cabac_states(h);
95c26348 2224
e5017ab8 2225 for(;;){
851ded89 2226//START_TIMER
cc51b282 2227 int ret = ff_h264_decode_mb_cabac(h);
6867a90b 2228 int eos;
851ded89 2229//STOP_TIMER("decode_mb_cabac")
0da71265 2230
903d58f6 2231 if(ret>=0) ff_h264_hl_decode_mb(h);
0da71265 2232
5d18eaad 2233 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8
LA
2234 s->mb_y++;
2235
cc51b282 2236 ret = ff_h264_decode_mb_cabac(h);
e5017ab8 2237
903d58f6 2238 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2239 s->mb_y--;
2240 }
6867a90b 2241 eos = get_cabac_terminate( &h->cabac );
e5017ab8 2242
3566042a
MN
2243 if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
2244 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2245 return 0;
2246 }
5659b509 2247 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
706da4af 2248 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
e5017ab8
LA
2249 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2250 return -1;
2251 }
2252
2253 if( ++s->mb_x >= s->mb_width ) {
2254 s->mb_x = 0;
c988f975 2255 loop_filter(h);
e5017ab8 2256 ff_draw_horiz_band(s, 16*s->mb_y, 16);
5175b937 2257 ++s->mb_y;
f3e53d9f 2258 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b
LLL
2259 ++s->mb_y;
2260 }
0da71265 2261 }
0da71265 2262
e5017ab8 2263 if( eos || s->mb_y >= s->mb_height ) {
a9c9a240 2264 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8 2265 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
0da71265 2266 return 0;
e5017ab8 2267 }
e5017ab8
LA
2268 }
2269
2270 } else {
2271 for(;;){
e1e94902 2272 int ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2273
903d58f6 2274 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8 2275
5d18eaad 2276 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8 2277 s->mb_y++;
e1e94902 2278 ret = ff_h264_decode_mb_cavlc(h);
e5017ab8 2279
903d58f6 2280 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
LA
2281 s->mb_y--;
2282 }
2283
2284 if(ret<0){
2285 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2286 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2287
2288 return -1;
2289 }
e5017ab8
LA
2290
2291 if(++s->mb_x >= s->mb_width){
2292 s->mb_x=0;
c988f975 2293 loop_filter(h);
e5017ab8 2294 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6867a90b 2295 ++s->mb_y;
f3e53d9f 2296 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b
LLL
2297 ++s->mb_y;
2298 }
2299 if(s->mb_y >= s->mb_height){
a9c9a240 2300 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2301
2302 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
2303 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2304
2305 return 0;
2306 }else{
2307 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2308
2309 return -1;
2310 }
2311 }
2312 }
2313
2314 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
a9c9a240 2315 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
LA
2316 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
2317 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2318
2319 return 0;
2320 }else{
2321 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2322
2323 return -1;
2324 }
2325 }
0da71265
MN
2326 }
2327 }
e5017ab8 2328
0da71265
MN
2329#if 0
2330 for(;s->mb_y < s->mb_height; s->mb_y++){
2331 for(;s->mb_x < s->mb_width; s->mb_x++){
2332 int ret= decode_mb(h);
115329f1 2333
903d58f6 2334 ff_h264_hl_decode_mb(h);
0da71265
MN
2335
2336 if(ret<0){
267f7edc 2337 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
2338 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2339
2340 return -1;
2341 }
115329f1 2342
0da71265
MN
2343 if(++s->mb_x >= s->mb_width){
2344 s->mb_x=0;
2345 if(++s->mb_y >= s->mb_height){
2346 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2347 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2348
2349 return 0;
2350 }else{
2351 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2352
2353 return -1;
2354 }
2355 }
2356 }
115329f1 2357
0da71265
MN
2358 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
2359 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2360 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2361
2362 return 0;
2363 }else{
2364 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2365
2366 return -1;
2367 }
2368 }
2369 }
2370 s->mb_x=0;
2371 ff_draw_horiz_band(s, 16*s->mb_y, 16);
2372 }
2373#endif
2374 return -1; //not reached
2375}
2376
afebe2f7
2377/**
2378 * Call decode_slice() for each context.
2379 *
2380 * @param h h264 master context
2381 * @param context_count number of contexts to execute
2382 */
2383static void execute_decode_slices(H264Context *h, int context_count){
2384 MpegEncContext * const s = &h->s;
2385 AVCodecContext * const avctx= s->avctx;
2386 H264Context *hx;
2387 int i;
2388
40e5d31b
GB
2389 if (s->avctx->hwaccel)
2390 return;
0d3d172f 2391 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
369122dd 2392 return;
afebe2f7 2393 if(context_count == 1) {
74e8b78b 2394 decode_slice(avctx, &h);
afebe2f7
2395 } else {
2396 for(i = 1; i < context_count; i++) {
2397 hx = h->thread_context[i];
047599a4 2398 hx->s.error_recognition = avctx->error_recognition;
afebe2f7
2399 hx->s.error_count = 0;
2400 }
2401
2402 avctx->execute(avctx, (void *)decode_slice,
01418506 2403 h->thread_context, NULL, context_count, sizeof(void*));
afebe2f7
2404
2405 /* pull back stuff from slices to master context */
2406 hx = h->thread_context[context_count - 1];
2407 s->mb_x = hx->s.mb_x;
2408 s->mb_y = hx->s.mb_y;
12d96de3
JD
2409 s->dropable = hx->s.dropable;
2410 s->picture_structure = hx->s.picture_structure;
afebe2f7
2411 for(i = 1; i < context_count; i++)
2412 h->s.error_count += h->thread_context[i]->s.error_count;
2413 }
2414}
2415
2416
30317501 2417static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
0da71265
MN
2418 MpegEncContext * const s = &h->s;
2419 AVCodecContext * const avctx= s->avctx;
2420 int buf_index=0;
afebe2f7
2421 H264Context *hx; ///< thread context
2422 int context_count = 0;
74b14aac 2423 int next_avc= h->is_avc ? 0 : buf_size;
afebe2f7
2424
2425 h->max_contexts = avctx->thread_count;
377ec888 2426#if 0
eb60dddc 2427 int i;
96b6ace2
MN
2428 for(i=0; i<50; i++){
2429 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
0da71265
MN
2430 }
2431#endif
66a4b2c1 2432 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
afebe2f7 2433 h->current_slice = 0;
12d96de3 2434 if (!s->first_field)
f6e3c460 2435 s->current_picture_ptr= NULL;
9c095463 2436 ff_h264_reset_sei(h);
66a4b2c1
MN
2437 }
2438
0da71265
MN
2439 for(;;){
2440 int consumed;
2441 int dst_length;
2442 int bit_length;
30317501 2443 const uint8_t *ptr;
4770b1b4 2444 int i, nalsize = 0;
afebe2f7 2445 int err;
115329f1 2446
74b14aac 2447 if(buf_index >= next_avc) {
1c48415b
2448 if(buf_index >= buf_size) break;
2449 nalsize = 0;
2450 for(i = 0; i < h->nal_length_size; i++)
2451 nalsize = (nalsize << 8) | buf[buf_index++];
8d8409ca 2452 if(nalsize <= 1 || nalsize > buf_size - buf_index){
1c48415b
2453 if(nalsize == 1){
2454 buf_index++;
2455 continue;
2456 }else{
2457 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
2458 break;
2459 }
2460 }
74b14aac 2461 next_avc= buf_index + nalsize;
1c48415b
2462 } else {
2463 // start code prefix search
52255d17 2464 for(; buf_index + 3 < next_avc; buf_index++){
1c48415b
2465 // This should always succeed in the first iteration.
2466 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
2467 break;
8b031359 2468 }
115329f1 2469
1c48415b 2470 if(buf_index+3 >= buf_size) break;
115329f1 2471
1c48415b 2472 buf_index+=3;
52255d17 2473 if(buf_index >= next_avc) continue;
1c48415b 2474 }
115329f1 2475
afebe2f7
2476 hx = h->thread_context[context_count];
2477
74b14aac 2478 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
ff82e429 2479 if (ptr==NULL || dst_length < 0){
ac658be5
FOL
2480 return -1;
2481 }
3566042a
MN
2482 i= buf_index + consumed;
2483 if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
2484 buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
2485 s->workaround_bugs |= FF_BUG_TRUNCATED;
2486
2487 if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
6ac9696e 2488 while(ptr[dst_length - 1] == 0 && dst_length > 0)
c4da83fb 2489 dst_length--;
3566042a 2490 }
1790a5e9 2491 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
0da71265
MN
2492
2493 if(s->avctx->debug&FF_DEBUG_STARTCODE){
afebe2f7 2494 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
0da71265 2495 }
115329f1 2496
74b14aac 2497 if (h->is_avc && (nalsize != consumed) && nalsize){
e262365d 2498 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
9d2cc8c1 2499 }
4770b1b4 2500
0da71265
MN
2501 buf_index += consumed;
2502
755bfeab 2503 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
8c3eba7c 2504 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
0da71265 2505 continue;
115329f1 2506
afebe2f7
2507 again:
2508 err = 0;
2509 switch(hx->nal_unit_type){
0da71265 2510 case NAL_IDR_SLICE:
afebe2f7
2511 if (h->nal_unit_type != NAL_IDR_SLICE) {
2512 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
2513 return -1;
2514 }
3b66c4c5 2515 idr(h); //FIXME ensure we don't loose some frames if there is reordering
0da71265 2516 case NAL_SLICE:
afebe2f7
2517 init_get_bits(&hx->s.gb, ptr, bit_length);
2518 hx->intra_gb_ptr=
2519 hx->inter_gb_ptr= &hx->s.gb;
2520 hx->s.data_partitioning = 0;
2521
2522 if((err = decode_slice_header(hx, h)))
2523 break;
2524
dd0cd3d2
RC
2525 avctx->profile = hx->sps.profile_idc;
2526 avctx->level = hx->sps.level_idc;
2527
6026a096
GB
2528 if (s->avctx->hwaccel && h->current_slice == 1) {
2529 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
2530 return -1;
2531 }
2532
37a558fe
IS
2533 s->current_picture_ptr->key_frame |=
2534 (hx->nal_unit_type == NAL_IDR_SLICE) ||
2535 (h->sei_recovery_frame_cnt >= 0);
afebe2f7
2536 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
2537 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2538 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2539 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
369122dd 2540 && avctx->skip_frame < AVDISCARD_ALL){
d404b3ed
MN
2541 if(avctx->hwaccel) {
2542 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
2543 return -1;
2544 }else
0d3d172f 2545 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
369122dd 2546 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
c639fc72
CEH
2547 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
2548 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
369122dd 2549 }else
f2c214a1 2550 context_count++;
369122dd 2551 }
0da71265
MN
2552 break;
2553 case NAL_DPA:
afebe2f7
2554 init_get_bits(&hx->s.gb, ptr, bit_length);
2555 hx->intra_gb_ptr=
2556 hx->inter_gb_ptr= NULL;
0410ee8f
AS
2557
2558 if ((err = decode_slice_header(hx, h)) < 0)
2559 break;
2560
dd0cd3d2
RC
2561 avctx->profile = hx->sps.profile_idc;
2562 avctx->level = hx->sps.level_idc;
2563
afebe2f7 2564 hx->s.data_partitioning = 1;
115329f1 2565
0da71265
MN
2566 break;
2567 case NAL_DPB:
afebe2f7
2568 init_get_bits(&hx->intra_gb, ptr, bit_length);
2569 hx->intra_gb_ptr= &hx->intra_gb;
0da71265
MN
2570 break;
2571 case NAL_DPC:
afebe2f7
2572 init_get_bits(&hx->inter_gb, ptr, bit_length);
2573 hx->inter_gb_ptr= &hx->inter_gb;
8b92b792 2574
afebe2f7 2575 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
b18e5c03 2576 && s->context_initialized
e0111b32 2577 && s->hurry_up < 5
afebe2f7 2578 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
9f5c1037 2579 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
4b30289e 2580 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
e0111b32 2581 && avctx->skip_frame < AVDISCARD_ALL)
afebe2f7 2582 context_count++;
0da71265
MN
2583 break;
2584 case NAL_SEI:
cdd10689 2585 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2586 ff_h264_decode_sei(h);
0da71265
MN
2587 break;
2588 case NAL_SPS:
2589 init_get_bits(&s->gb, ptr, bit_length);
1790a5e9 2590 ff_h264_decode_seq_parameter_set(h);
115329f1 2591
0da71265
MN
2592 if(s->flags& CODEC_FLAG_LOW_DELAY)
2593 s->low_delay=1;
115329f1 2594
a18030bb
LM
2595 if(avctx->has_b_frames < 2)
2596 avctx->has_b_frames= !s->low_delay;
0da71265
MN
2597 break;
2598 case NAL_PPS:
2599 init_get_bits(&s->gb, ptr, bit_length);
115329f1 2600
1790a5e9 2601 ff_h264_decode_picture_parameter_set(h, bit_length);
0da71265
MN
2602
2603 break;
ab470fa7
LM
2604 case NAL_AUD:
2605 case NAL_END_SEQUENCE:
2606 case NAL_END_STREAM:
2607 case NAL_FILLER_DATA:
2608 case NAL_SPS_EXT:
2609 case NAL_AUXILIARY_SLICE:
0da71265 2610 break;
bb270c08 2611 default:
4ad04da2 2612 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
115329f1 2613 }
115329f1 2614
afebe2f7
2615 if(context_count == h->max_contexts) {
2616 execute_decode_slices(h, context_count);
2617 context_count = 0;
2618 }
2619
2620 if (err < 0)
2621 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
2622 else if(err == 1) {
2623 /* Slice could not be decoded in parallel mode, copy down
2624 * NAL unit stuff to context 0 and restart. Note that
1412060e 2625 * rbsp_buffer is not transferred, but since we no longer
afebe2f7
2626 * run in parallel mode this should not be an issue. */
2627 h->nal_unit_type = hx->nal_unit_type;
2628 h->nal_ref_idc = hx->nal_ref_idc;
2629 hx = h;
2630 goto again;
2631 }
2632 }
2633 if(context_count)
2634 execute_decode_slices(h, context_count);
0da71265
MN
2635 return buf_index;
2636}
2637
2638/**
3b66c4c5 2639 * returns the number of bytes consumed for building the current frame
0da71265
MN
2640 */
2641static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
755bfeab 2642 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
0da71265
MN
2643 if(pos+10>buf_size) pos=buf_size; // oops ;)
2644
2645 return pos;
0da71265
MN
2646}
2647
115329f1 2648static int decode_frame(AVCodecContext *avctx,
0da71265 2649 void *data, int *data_size,
7a00bbad 2650 AVPacket *avpkt)
0da71265 2651{
7a00bbad
TB
2652 const uint8_t *buf = avpkt->data;
2653 int buf_size = avpkt->size;
0da71265
MN
2654 H264Context *h = avctx->priv_data;
2655 MpegEncContext *s = &h->s;
115329f1 2656 AVFrame *pict = data;
0da71265 2657 int buf_index;
115329f1 2658
0da71265 2659 s->flags= avctx->flags;
303e50e6 2660 s->flags2= avctx->flags2;
0da71265 2661
1412060e 2662 /* end of stream, output what is still in the buffers */
0da71265 2663 if (buf_size == 0) {
97bbb885
MN
2664 Picture *out;
2665 int i, out_idx;
2666
2667//FIXME factorize this with the output code below
2668 out = h->delayed_pic[0];
2669 out_idx = 0;
c173a088 2670 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
97bbb885
MN
2671 if(h->delayed_pic[i]->poc < out->poc){
2672 out = h->delayed_pic[i];
2673 out_idx = i;
2674 }
2675
2676 for(i=out_idx; h->delayed_pic[i]; i++)
2677 h->delayed_pic[i] = h->delayed_pic[i+1];
2678
2679 if(out){
2680 *data_size = sizeof(AVFrame);
2681 *pict= *(AVFrame*)out;
2682 }
2683
0da71265
MN
2684 return 0;
2685 }
115329f1 2686
4770b1b4
RT
2687 if(h->is_avc && !h->got_avcC) {
2688 int i, cnt, nalsize;
2689 unsigned char *p = avctx->extradata;
2690 if(avctx->extradata_size < 7) {
2691 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
2692 return -1;
2693 }
2694 if(*p != 1) {
2695 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
2696 return -1;
2697 }
2698 /* sps and pps in the avcC always have length coded with 2 bytes,
2699 so put a fake nal_length_size = 2 while parsing them */
2700 h->nal_length_size = 2;
2701 // Decode sps from avcC
2702 cnt = *(p+5) & 0x1f; // Number of sps
2703 p += 6;
2704 for (i = 0; i < cnt; i++) {
fead30d4 2705 nalsize = AV_RB16(p) + 2;
96b6ace2 2706 if(decode_nal_units(h, p, nalsize) < 0) {
4770b1b4
RT
2707 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
2708 return -1;
2709 }
2710 p += nalsize;
115329f1 2711 }
4770b1b4
RT
2712 // Decode pps from avcC
2713 cnt = *(p++); // Number of pps
2714 for (i = 0; i < cnt; i++) {
fead30d4 2715 nalsize = AV_RB16(p) + 2;
4770b1b4
RT
2716 if(decode_nal_units(h, p, nalsize) != nalsize) {
2717 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
2718 return -1;
2719 }
2720 p += nalsize;
115329f1 2721 }
4770b1b4
RT
2722 // Now store right nal length size, that will be use to parse all other nals
2723 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
2724 // Do not reparse avcC
2725 h->got_avcC = 1;
2726 }
2727
d464bcef 2728 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
115329f1 2729 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
0da71265 2730 return -1;
d464bcef 2731 h->got_avcC = 1;
0da71265
MN
2732 }
2733
2734 buf_index=decode_nal_units(h, buf, buf_size);
115329f1 2735 if(buf_index < 0)
0da71265
MN
2736 return -1;
2737
56c70e1d 2738 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
1c746a49 2739 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
56c70e1d
MN
2740 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
2741 return -1;
2742 }
2743
66a4b2c1
MN
2744 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
2745 Picture *out = s->current_picture_ptr;
2746 Picture *cur = s->current_picture_ptr;
44be1d64 2747 int i, pics, out_of_order, out_idx;
115329f1 2748
256299d3 2749 field_end(h);
66a4b2c1 2750
357282c6 2751 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
12d96de3
JD
2752 /* Wait for second field. */
2753 *data_size = 0;
2754
2755 } else {
b19d493f 2756 cur->interlaced_frame = 0;
b09a7c05
2757 cur->repeat_pict = 0;
2758
2759 /* Signal interlacing information externally. */
2760 /* Prioritize picture timing SEI information over used decoding process if it exists. */
70e01da3 2761
b09a7c05
2762 if(h->sps.pic_struct_present_flag){
2763 switch (h->sei_pic_struct)
2764 {
b19d493f
HY
2765 case SEI_PIC_STRUCT_FRAME:
2766 break;
2767 case SEI_PIC_STRUCT_TOP_FIELD:
2768 case SEI_PIC_STRUCT_BOTTOM_FIELD:
2769 cur->interlaced_frame = 1;
2770 break;
2771 case SEI_PIC_STRUCT_TOP_BOTTOM:
2772 case SEI_PIC_STRUCT_BOTTOM_TOP:
2773 if (FIELD_OR_MBAFF_PICTURE)
2774 cur->interlaced_frame = 1;
2775 else
2776 // try to flag soft telecine progressive
2777 cur->interlaced_frame = h->prev_interlaced_frame;
2778 break;
b09a7c05
2779 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
2780 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
2781 // Signal the possibility of telecined film externally (pic_struct 5,6)
2782 // From these hints, let the applications decide if they apply deinterlacing.
2783 cur->repeat_pict = 1;
b09a7c05
2784 break;
2785 case SEI_PIC_STRUCT_FRAME_DOUBLING:
2786 // Force progressive here, as doubling interlaced frame is a bad idea.
b09a7c05
2787 cur->repeat_pict = 2;
2788 break;
2789 case SEI_PIC_STRUCT_FRAME_TRIPLING:
b09a7c05
2790 cur->repeat_pict = 4;
2791 break;
2792 }
b19d493f
HY
2793
2794 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
2795 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
b09a7c05
2796 }else{
2797 /* Derive interlacing flag from used decoding process. */
2798 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
2799 }
b19d493f 2800 h->prev_interlaced_frame = cur->interlaced_frame;
b09a7c05
2801
2802 if (cur->field_poc[0] != cur->field_poc[1]){
2803 /* Derive top_field_first from field pocs. */
2804 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
2805 }else{
2806 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
2807 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
2808 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
2809 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
2810 cur->top_field_first = 1;
2811 else
2812 cur->top_field_first = 0;
2813 }else{
2814 /* Most likely progressive */
2815 cur->top_field_first = 0;
2816 }
2817 }
84a8596d 2818
f6e3c460 2819 //FIXME do something with unavailable reference frames
8b92b792 2820
f6e3c460 2821 /* Sort B-frames into display order */
2f944356 2822
f6e3c460
2823 if(h->sps.bitstream_restriction_flag
2824 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
2825 s->avctx->has_b_frames = h->sps.num_reorder_frames;
2826 s->low_delay = 0;
2827 }
9170e345 2828
fb19e144
MN
2829 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
2830 && !h->sps.bitstream_restriction_flag){
2831 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
2832 s->low_delay= 0;
2833 }
2834
f6e3c460
2835 pics = 0;
2836 while(h->delayed_pic[pics]) pics++;
9170e345 2837
64b9d48f 2838 assert(pics <= MAX_DELAYED_PIC_COUNT);
4e4d983e 2839
f6e3c460
2840 h->delayed_pic[pics++] = cur;
2841 if(cur->reference == 0)
2842 cur->reference = DELAYED_PIC_REF;
2f944356 2843
f6e3c460
2844 out = h->delayed_pic[0];
2845 out_idx = 0;
c173a088 2846 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
f6e3c460
2847 if(h->delayed_pic[i]->poc < out->poc){
2848 out = h->delayed_pic[i];
2849 out_idx = i;
2850 }
44be1d64
MN
2851 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
2852 h->outputed_poc= INT_MIN;
2853 out_of_order = out->poc < h->outputed_poc;
1b547aba 2854
f6e3c460
2855 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
2856 { }
2a811db2 2857 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
f6e3c460 2858 || (s->low_delay &&
44be1d64 2859 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
9701840b 2860 || cur->pict_type == FF_B_TYPE)))
f6e3c460
2861 {
2862 s->low_delay = 0;
2863 s->avctx->has_b_frames++;
f6e3c460 2864 }
f6e3c460
2865
2866 if(out_of_order || pics > s->avctx->has_b_frames){
3eaa6d0e 2867 out->reference &= ~DELAYED_PIC_REF;
f6e3c460
2868 for(i=out_idx; h->delayed_pic[i]; i++)
2869 h->delayed_pic[i] = h->delayed_pic[i+1];
2870 }
3eaa6d0e 2871 if(!out_of_order && pics > s->avctx->has_b_frames){
f6e3c460 2872 *data_size = sizeof(AVFrame);
df8a7dff 2873
44be1d64
MN
2874 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
2875 h->outputed_poc = INT_MIN;
2876 } else
67e362ca 2877 h->outputed_poc = out->poc;
f6e3c460 2878 *pict= *(AVFrame*)out;
3eaa6d0e 2879 }else{
f6e3c460 2880 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
3eaa6d0e 2881 }
12d96de3 2882 }
a4dae92b
LM
2883 }
2884
3165e258 2885 assert(pict->data[0] || !*data_size);
4e4d983e 2886 ff_print_debug_info(s, pict);
0da71265 2887//printf("out %d\n", (int)pict->data[0]);
0da71265 2888
0da71265
MN
2889 return get_consumed_bytes(s, buf_index, buf_size);
2890}
2891#if 0
2892static inline void fill_mb_avail(H264Context *h){
2893 MpegEncContext * const s = &h->s;
7bc9090a 2894 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265
MN
2895
2896 if(s->mb_y){
7bc9090a
MN
2897 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
2898 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
2899 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
0da71265
MN
2900 }else{
2901 h->mb_avail[0]=
2902 h->mb_avail[1]=
2903 h->mb_avail[2]= 0;
2904 }
2905 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
2906 h->mb_avail[4]= 1; //FIXME move out
2907 h->mb_avail[5]= 0; //FIXME move out
2908}
2909#endif
2910
07e4e3ea 2911#ifdef TEST
6bf398a0 2912#undef printf
d04d5bcd 2913#undef random
0da71265
MN
2914#define COUNT 8000
2915#define SIZE (COUNT*40)
f8a80fd6 2916int main(void){
0da71265
MN
2917 int i;
2918 uint8_t temp[SIZE];
2919 PutBitContext pb;
2920 GetBitContext gb;
2921// int int_temp[10000];
2922 DSPContext dsp;
2923 AVCodecContext avctx;
115329f1 2924
0da71265
MN
2925 dsputil_init(&dsp, &avctx);
2926
ed7debda 2927 init_put_bits(&pb, temp, SIZE);
0da71265
MN
2928 printf("testing unsigned exp golomb\n");
2929 for(i=0; i<COUNT; i++){
2930 START_TIMER
2931 set_ue_golomb(&pb, i);
2932 STOP_TIMER("set_ue_golomb");
2933 }
2934 flush_put_bits(&pb);
115329f1 2935
0da71265
MN
2936 init_get_bits(&gb, temp, 8*SIZE);
2937 for(i=0; i<COUNT; i++){
2938 int j, s;
115329f1 2939
0da71265 2940 s= show_bits(&gb, 24);
115329f1 2941
0da71265
MN
2942 START_TIMER
2943 j= get_ue_golomb(&gb);
2944 if(j != i){
755bfeab 2945 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
MN
2946// return -1;
2947 }
2948 STOP_TIMER("get_ue_golomb");
2949 }
115329f1
DB
2950
2951
c58222c5 2952 init_put_bits(&pb, temp, SIZE);
0da71265
MN
2953 printf("testing signed exp golomb\n");
2954 for(i=0; i<COUNT; i++){
2955 START_TIMER
2956 set_se_golomb(&pb, i - COUNT/2);
2957 STOP_TIMER("set_se_golomb");
2958 }
2959 flush_put_bits(&pb);
115329f1 2960
0da71265
MN
2961 init_get_bits(&gb, temp, 8*SIZE);
2962 for(i=0; i<COUNT; i++){
2963 int j, s;
115329f1 2964
0da71265 2965 s= show_bits(&gb, 24);
115329f1 2966
0da71265
MN
2967 START_TIMER
2968 j= get_se_golomb(&gb);
2969 if(j != i - COUNT/2){
755bfeab 2970 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
MN
2971// return -1;
2972 }
2973 STOP_TIMER("get_se_golomb");
2974 }
2975
6bf398a0 2976#if 0
0da71265 2977 printf("testing 4x4 (I)DCT\n");
115329f1 2978
0da71265
MN
2979 DCTELEM block[16];
2980 uint8_t src[16], ref[16];
2981 uint64_t error= 0, max_error=0;
2982
2983 for(i=0; i<COUNT; i++){
2984 int j;
2985// printf("%d %d %d\n", r1, r2, (r2-r1)*16);
2986 for(j=0; j<16; j++){
2987 ref[j]= random()%255;
2988 src[j]= random()%255;
2989 }
2990
2991 h264_diff_dct_c(block, src, ref, 4);
115329f1 2992
0da71265
MN
2993 //normalize
2994 for(j=0; j<16; j++){
2995// printf("%d ", block[j]);
2996 block[j]= block[j]*4;
2997 if(j&1) block[j]= (block[j]*4 + 2)/5;
2998 if(j&4) block[j]= (block[j]*4 + 2)/5;
2999 }
3000// printf("\n");
115329f1 3001
0fa8158d 3002 s->dsp.h264_idct_add(ref, block, 4);
0da71265
MN
3003/* for(j=0; j<16; j++){
3004 printf("%d ", ref[j]);
3005 }
3006 printf("\n");*/
115329f1 3007
0da71265 3008 for(j=0; j<16; j++){
c26abfa5 3009 int diff= FFABS(src[j] - ref[j]);
115329f1 3010
0da71265
MN
3011 error+= diff*diff;
3012 max_error= FFMAX(max_error, diff);
3013 }
3014 }
3015 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
0da71265
MN
3016 printf("testing quantizer\n");
3017 for(qp=0; qp<52; qp++){
3018 for(i=0; i<16; i++)
3019 src1_block[i]= src2_block[i]= random()%255;
115329f1 3020
0da71265 3021 }
0da71265 3022 printf("Testing NAL layer\n");
115329f1 3023
0da71265
MN
3024 uint8_t bitstream[COUNT];
3025 uint8_t nal[COUNT*2];
3026 H264Context h;
3027 memset(&h, 0, sizeof(H264Context));
115329f1 3028
0da71265
MN
3029 for(i=0; i<COUNT; i++){
3030 int zeros= i;
3031 int nal_length;
3032 int consumed;
3033 int out_length;
3034 uint8_t *out;
3035 int j;
115329f1 3036
0da71265
MN
3037 for(j=0; j<COUNT; j++){
3038 bitstream[j]= (random() % 255) + 1;
3039 }
115329f1 3040
0da71265
MN
3041 for(j=0; j<zeros; j++){
3042 int pos= random() % COUNT;
3043 while(bitstream[pos] == 0){
3044 pos++;
3045 pos %= COUNT;
3046 }
3047 bitstream[pos]=0;
3048 }
115329f1 3049
0da71265 3050 START_TIMER
115329f1 3051
0da71265
MN
3052 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
3053 if(nal_length<0){
3054 printf("encoding failed\n");
3055 return -1;
3056 }
115329f1 3057
1790a5e9 3058 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
0da71265
MN
3059
3060 STOP_TIMER("NAL")
115329f1 3061
0da71265
MN
3062 if(out_length != COUNT){
3063 printf("incorrect length %d %d\n", out_length, COUNT);
3064 return -1;
3065 }
115329f1 3066
0da71265
MN
3067 if(consumed != nal_length){
3068 printf("incorrect consumed length %d %d\n", nal_length, consumed);
3069 return -1;
3070 }
115329f1 3071
0da71265 3072 if(memcmp(bitstream, out, COUNT)){
755bfeab 3073 printf("mismatch\n");
0da71265
MN
3074 return -1;
3075 }
3076 }
6bf398a0 3077#endif
115329f1 3078
0da71265 3079 printf("Testing RBSP\n");
115329f1
DB
3080
3081
0da71265
MN
3082 return 0;
3083}
07e4e3ea 3084#endif /* TEST */
0da71265
MN
3085
3086
cbf1eae9 3087av_cold void ff_h264_free_context(H264Context *h)
0da71265 3088{
5f129a05 3089 int i;
115329f1 3090
0da71265 3091 free_tables(h); //FIXME cleanup init stuff perhaps
5f129a05
MN
3092
3093 for(i = 0; i < MAX_SPS_COUNT; i++)
3094 av_freep(h->sps_buffers + i);
3095
3096 for(i = 0; i < MAX_PPS_COUNT; i++)
3097 av_freep(h->pps_buffers + i);
15861962
RD
3098}
3099
903d58f6 3100av_cold int ff_h264_decode_end(AVCodecContext *avctx)
15861962
RD
3101{
3102 H264Context *h = avctx->priv_data;
3103 MpegEncContext *s = &h->s;
3104
3105 ff_h264_free_context(h);
5f129a05 3106
0da71265
MN
3107 MPV_common_end(s);
3108
3109// memset(h, 0, sizeof(H264Context));
115329f1 3110
0da71265
MN
3111 return 0;
3112}
3113
3114
3115AVCodec h264_decoder = {
3116 "h264",
3117 CODEC_TYPE_VIDEO,
3118 CODEC_ID_H264,
3119 sizeof(H264Context),
903d58f6 3120 ff_h264_decode_init,
0da71265 3121 NULL,
903d58f6 3122 ff_h264_decode_end,
0da71265 3123 decode_frame,
f3ba9db4 3124 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7c33ad19 3125 .flush= flush_dpb,
fe4bf374 3126 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
6026a096 3127 .pix_fmts= ff_hwaccel_pixfmt_list_420,
0da71265
MN
3128};
3129
b250f9c6 3130#if CONFIG_H264_VDPAU_DECODER
369122dd
NC
3131AVCodec h264_vdpau_decoder = {
3132 "h264_vdpau",
3133 CODEC_TYPE_VIDEO,
0d3d172f 3134 CODEC_ID_H264,
369122dd 3135 sizeof(H264Context),
903d58f6 3136 ff_h264_decode_init,
369122dd 3137 NULL,