Commit | Line | Data |
---|---|---|
0da71265 MN |
1 | /* |
2 | * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder | |
3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 | * | |
b78e7197 DB |
5 | * This file is part of FFmpeg. |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
0da71265 MN |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either | |
b78e7197 | 10 | * version 2.1 of the License, or (at your option) any later version. |
0da71265 | 11 | * |
b78e7197 | 12 | * FFmpeg is distributed in the hope that it will be useful, |
0da71265 MN |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
b78e7197 | 18 | * License along with FFmpeg; if not, write to the Free Software |
5509bffa | 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
0da71265 | 20 | */ |
115329f1 | 21 | |
0da71265 | 22 | /** |
bad5537e | 23 | * @file libavcodec/h264.c |
0da71265 MN |
24 | * H.264 / AVC / MPEG4 part10 codec. |
25 | * @author Michael Niedermayer <michaelni@gmx.at> | |
26 | */ | |
27 | ||
40e5d31b | 28 | #include "internal.h" |
0da71265 MN |
29 | #include "dsputil.h" |
30 | #include "avcodec.h" | |
31 | #include "mpegvideo.h" | |
26b4fe82 | 32 | #include "h264.h" |
0da71265 | 33 | #include "h264data.h" |
188d3c51 | 34 | #include "h264_mvpred.h" |
26b4fe82 | 35 | #include "h264_parser.h" |
0da71265 | 36 | #include "golomb.h" |
199436b9 | 37 | #include "mathops.h" |
626464fb | 38 | #include "rectangle.h" |
369122dd | 39 | #include "vdpau_internal.h" |
0da71265 | 40 | |
e5017ab8 LA |
41 | #include "cabac.h" |
42 | ||
2848ce84 | 43 | //#undef NDEBUG |
0da71265 MN |
44 | #include <assert.h> |
45 | ||
d9ec210b | 46 | static const uint8_t rem6[52]={ |
acd8d10f PI |
47 | 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, |
48 | }; | |
49 | ||
d9ec210b | 50 | static const uint8_t div6[52]={ |
acd8d10f PI |
51 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, |
52 | }; | |
53 | ||
903d58f6 | 54 | void ff_h264_write_back_intra_pred_mode(H264Context *h){ |
5b0fb524 | 55 | int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; |
0da71265 | 56 | |
662a5b23 MN |
57 | AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4); |
58 | mode[4]= h->intra4x4_pred_mode_cache[7+8*3]; | |
59 | mode[5]= h->intra4x4_pred_mode_cache[7+8*2]; | |
60 | mode[6]= h->intra4x4_pred_mode_cache[7+8*1]; | |
0da71265 MN |
61 | } |
62 | ||
63 | /** | |
64 | * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. | |
65 | */ | |
2bedc0e8 MN |
66 | int ff_h264_check_intra4x4_pred_mode(H264Context *h){ |
67 | MpegEncContext * const s = &h->s; | |
68 | static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0}; | |
69 | static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED}; | |
70 | int i; | |
71 | ||
72 | if(!(h->top_samples_available&0x8000)){ | |
73 | for(i=0; i<4; i++){ | |
74 | int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ]; | |
75 | if(status<0){ | |
76 | av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); | |
77 | return -1; | |
78 | } else if(status){ | |
79 | h->intra4x4_pred_mode_cache[scan8[0] + i]= status; | |
80 | } | |
81 | } | |
82 | } | |
83 | ||
84 | if((h->left_samples_available&0x8888)!=0x8888){ | |
85 | static const int mask[4]={0x8000,0x2000,0x80,0x20}; | |
86 | for(i=0; i<4; i++){ | |
87 | if(!(h->left_samples_available&mask[i])){ | |
88 | int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ]; | |
89 | if(status<0){ | |
90 | av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); | |
91 | return -1; | |
92 | } else if(status){ | |
93 | h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status; | |
94 | } | |
95 | } | |
96 | } | |
97 | } | |
98 | ||
99 | return 0; | |
100 | } //FIXME cleanup like ff_h264_check_intra_pred_mode | |
101 | ||
102 | /** | |
103 | * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. | |
104 | */ | |
903d58f6 | 105 | int ff_h264_check_intra_pred_mode(H264Context *h, int mode){ |
0da71265 MN |
106 | MpegEncContext * const s = &h->s; |
107 | static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1}; | |
108 | static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8}; | |
115329f1 | 109 | |
43ff0714 | 110 | if(mode > 6U) { |
5175b937 | 111 | av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y); |
7440fe83 | 112 | return -1; |
5175b937 | 113 | } |
115329f1 | 114 | |
0da71265 MN |
115 | if(!(h->top_samples_available&0x8000)){ |
116 | mode= top[ mode ]; | |
117 | if(mode<0){ | |
9b879566 | 118 | av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); |
0da71265 MN |
119 | return -1; |
120 | } | |
121 | } | |
115329f1 | 122 | |
d1d10e91 | 123 | if((h->left_samples_available&0x8080) != 0x8080){ |
0da71265 | 124 | mode= left[ mode ]; |
d1d10e91 MN |
125 | if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred |
126 | mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8); | |
127 | } | |
0da71265 | 128 | if(mode<0){ |
9b879566 | 129 | av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); |
0da71265 | 130 | return -1; |
115329f1 | 131 | } |
0da71265 MN |
132 | } |
133 | ||
134 | return mode; | |
135 | } | |
136 | ||
1790a5e9 | 137 | const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){ |
0da71265 MN |
138 | int i, si, di; |
139 | uint8_t *dst; | |
24456882 | 140 | int bufidx; |
0da71265 | 141 | |
bb270c08 | 142 | // src[0]&0x80; //forbidden bit |
0da71265 MN |
143 | h->nal_ref_idc= src[0]>>5; |
144 | h->nal_unit_type= src[0]&0x1F; | |
145 | ||
146 | src++; length--; | |
115329f1 | 147 | #if 0 |
0da71265 MN |
148 | for(i=0; i<length; i++) |
149 | printf("%2X ", src[i]); | |
150 | #endif | |
e08715d3 | 151 | |
b250f9c6 AJ |
152 | #if HAVE_FAST_UNALIGNED |
153 | # if HAVE_FAST_64BIT | |
e08715d3 MN |
154 | # define RS 7 |
155 | for(i=0; i+1<length; i+=9){ | |
19769ece | 156 | if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL)) |
e08715d3 MN |
157 | # else |
158 | # define RS 3 | |
159 | for(i=0; i+1<length; i+=5){ | |
19769ece | 160 | if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U)) |
e08715d3 MN |
161 | # endif |
162 | continue; | |
163 | if(i>0 && !src[i]) i--; | |
164 | while(src[i]) i++; | |
165 | #else | |
166 | # define RS 0 | |
0da71265 MN |
167 | for(i=0; i+1<length; i+=2){ |
168 | if(src[i]) continue; | |
169 | if(i>0 && src[i-1]==0) i--; | |
e08715d3 | 170 | #endif |
0da71265 MN |
171 | if(i+2<length && src[i+1]==0 && src[i+2]<=3){ |
172 | if(src[i+2]!=3){ | |
173 | /* startcode, so we must be past the end */ | |
174 | length=i; | |
175 | } | |
176 | break; | |
177 | } | |
abb27cfb | 178 | i-= RS; |
0da71265 MN |
179 | } |
180 | ||
181 | if(i>=length-1){ //no escaped 0 | |
182 | *dst_length= length; | |
183 | *consumed= length+1; //+1 for the header | |
115329f1 | 184 | return src; |
0da71265 MN |
185 | } |
186 | ||
24456882 | 187 | bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data |
238ef6da | 188 | av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE); |
24456882 | 189 | dst= h->rbsp_buffer[bufidx]; |
0da71265 | 190 | |
ac658be5 FOL |
191 | if (dst == NULL){ |
192 | return NULL; | |
193 | } | |
194 | ||
3b66c4c5 | 195 | //printf("decoding esc\n"); |
593af7cd MN |
196 | memcpy(dst, src, i); |
197 | si=di=i; | |
198 | while(si+2<length){ | |
0da71265 | 199 | //remove escapes (very rare 1:2^22) |
593af7cd MN |
200 | if(src[si+2]>3){ |
201 | dst[di++]= src[si++]; | |
202 | dst[di++]= src[si++]; | |
203 | }else if(src[si]==0 && src[si+1]==0){ | |
0da71265 MN |
204 | if(src[si+2]==3){ //escape |
205 | dst[di++]= 0; | |
206 | dst[di++]= 0; | |
207 | si+=3; | |
c8470cc1 | 208 | continue; |
0da71265 | 209 | }else //next start code |
593af7cd | 210 | goto nsc; |
0da71265 MN |
211 | } |
212 | ||
213 | dst[di++]= src[si++]; | |
214 | } | |
593af7cd MN |
215 | while(si<length) |
216 | dst[di++]= src[si++]; | |
217 | nsc: | |
0da71265 | 218 | |
d4369630 AS |
219 | memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE); |
220 | ||
0da71265 MN |
221 | *dst_length= di; |
222 | *consumed= si + 1;//+1 for the header | |
90b5b51e | 223 | //FIXME store exact number of bits in the getbitcontext (it is needed for decoding) |
0da71265 MN |
224 | return dst; |
225 | } | |
226 | ||
1790a5e9 | 227 | int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){ |
0da71265 MN |
228 | int v= *src; |
229 | int r; | |
230 | ||
a9c9a240 | 231 | tprintf(h->s.avctx, "rbsp trailing %X\n", v); |
0da71265 MN |
232 | |
233 | for(r=1; r<9; r++){ | |
234 | if(v&1) return r; | |
235 | v>>=1; | |
236 | } | |
237 | return 0; | |
238 | } | |
239 | ||
240 | /** | |
1412060e | 241 | * IDCT transforms the 16 dc values and dequantizes them. |
0da71265 MN |
242 | * @param qp quantization parameter |
243 | */ | |
239ea04c | 244 | static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ |
0da71265 MN |
245 | #define stride 16 |
246 | int i; | |
247 | int temp[16]; //FIXME check if this is a good idea | |
248 | static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; | |
249 | static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; | |
250 | ||
251 | //memset(block, 64, 2*256); | |
252 | //return; | |
253 | for(i=0; i<4; i++){ | |
254 | const int offset= y_offset[i]; | |
255 | const int z0= block[offset+stride*0] + block[offset+stride*4]; | |
256 | const int z1= block[offset+stride*0] - block[offset+stride*4]; | |
257 | const int z2= block[offset+stride*1] - block[offset+stride*5]; | |
258 | const int z3= block[offset+stride*1] + block[offset+stride*5]; | |
259 | ||
260 | temp[4*i+0]= z0+z3; | |
261 | temp[4*i+1]= z1+z2; | |
262 | temp[4*i+2]= z1-z2; | |
263 | temp[4*i+3]= z0-z3; | |
264 | } | |
265 | ||
266 | for(i=0; i<4; i++){ | |
267 | const int offset= x_offset[i]; | |
268 | const int z0= temp[4*0+i] + temp[4*2+i]; | |
269 | const int z1= temp[4*0+i] - temp[4*2+i]; | |
270 | const int z2= temp[4*1+i] - temp[4*3+i]; | |
271 | const int z3= temp[4*1+i] + temp[4*3+i]; | |
272 | ||
1412060e | 273 | block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual |
239ea04c LM |
274 | block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8)); |
275 | block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8)); | |
276 | block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8)); | |
0da71265 MN |
277 | } |
278 | } | |
279 | ||
e5017ab8 | 280 | #if 0 |
0da71265 | 281 | /** |
1412060e | 282 | * DCT transforms the 16 dc values. |
0da71265 MN |
283 | * @param qp quantization parameter ??? FIXME |
284 | */ | |
285 | static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){ | |
286 | // const int qmul= dequant_coeff[qp][0]; | |
287 | int i; | |
288 | int temp[16]; //FIXME check if this is a good idea | |
289 | static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; | |
290 | static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; | |
291 | ||
292 | for(i=0; i<4; i++){ | |
293 | const int offset= y_offset[i]; | |
294 | const int z0= block[offset+stride*0] + block[offset+stride*4]; | |
295 | const int z1= block[offset+stride*0] - block[offset+stride*4]; | |
296 | const int z2= block[offset+stride*1] - block[offset+stride*5]; | |
297 | const int z3= block[offset+stride*1] + block[offset+stride*5]; | |
298 | ||
299 | temp[4*i+0]= z0+z3; | |
300 | temp[4*i+1]= z1+z2; | |
301 | temp[4*i+2]= z1-z2; | |
302 | temp[4*i+3]= z0-z3; | |
303 | } | |
304 | ||
305 | for(i=0; i<4; i++){ | |
306 | const int offset= x_offset[i]; | |
307 | const int z0= temp[4*0+i] + temp[4*2+i]; | |
308 | const int z1= temp[4*0+i] - temp[4*2+i]; | |
309 | const int z2= temp[4*1+i] - temp[4*3+i]; | |
310 | const int z3= temp[4*1+i] + temp[4*3+i]; | |
311 | ||
312 | block[stride*0 +offset]= (z0 + z3)>>1; | |
313 | block[stride*2 +offset]= (z1 + z2)>>1; | |
314 | block[stride*8 +offset]= (z1 - z2)>>1; | |
315 | block[stride*10+offset]= (z0 - z3)>>1; | |
316 | } | |
317 | } | |
e5017ab8 LA |
318 | #endif |
319 | ||
0da71265 MN |
320 | #undef xStride |
321 | #undef stride | |
322 | ||
239ea04c | 323 | static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ |
0da71265 MN |
324 | const int stride= 16*2; |
325 | const int xStride= 16; | |
326 | int a,b,c,d,e; | |
327 | ||
328 | a= block[stride*0 + xStride*0]; | |
329 | b= block[stride*0 + xStride*1]; | |
330 | c= block[stride*1 + xStride*0]; | |
331 | d= block[stride*1 + xStride*1]; | |
332 | ||
333 | e= a-b; | |
334 | a= a+b; | |
335 | b= c-d; | |
336 | c= c+d; | |
337 | ||
239ea04c LM |
338 | block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7; |
339 | block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7; | |
340 | block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7; | |
341 | block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7; | |
0da71265 MN |
342 | } |
343 | ||
e5017ab8 | 344 | #if 0 |
0da71265 MN |
345 | static void chroma_dc_dct_c(DCTELEM *block){ |
346 | const int stride= 16*2; | |
347 | const int xStride= 16; | |
348 | int a,b,c,d,e; | |
349 | ||
350 | a= block[stride*0 + xStride*0]; | |
351 | b= block[stride*0 + xStride*1]; | |
352 | c= block[stride*1 + xStride*0]; | |
353 | d= block[stride*1 + xStride*1]; | |
354 | ||
355 | e= a-b; | |
356 | a= a+b; | |
357 | b= c-d; | |
358 | c= c+d; | |
359 | ||
360 | block[stride*0 + xStride*0]= (a+c); | |
361 | block[stride*0 + xStride*1]= (e+b); | |
362 | block[stride*1 + xStride*0]= (a-c); | |
363 | block[stride*1 + xStride*1]= (e-b); | |
364 | } | |
e5017ab8 | 365 | #endif |
0da71265 | 366 | |
0da71265 MN |
367 | static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, |
368 | uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |
369 | int src_x_offset, int src_y_offset, | |
370 | qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ | |
371 | MpegEncContext * const s = &h->s; | |
372 | const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; | |
5d18eaad | 373 | int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; |
0da71265 | 374 | const int luma_xy= (mx&3) + ((my&3)<<2); |
5d18eaad LM |
375 | uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize; |
376 | uint8_t * src_cb, * src_cr; | |
377 | int extra_width= h->emu_edge_width; | |
378 | int extra_height= h->emu_edge_height; | |
0da71265 MN |
379 | int emu=0; |
380 | const int full_mx= mx>>2; | |
381 | const int full_my= my>>2; | |
fbd312fd | 382 | const int pic_width = 16*s->mb_width; |
0d43dd8c | 383 | const int pic_height = 16*s->mb_height >> MB_FIELD; |
115329f1 | 384 | |
0da71265 MN |
385 | if(mx&7) extra_width -= 3; |
386 | if(my&7) extra_height -= 3; | |
115329f1 DB |
387 | |
388 | if( full_mx < 0-extra_width | |
389 | || full_my < 0-extra_height | |
390 | || full_mx + 16/*FIXME*/ > pic_width + extra_width | |
fbd312fd | 391 | || full_my + 16/*FIXME*/ > pic_height + extra_height){ |
5d18eaad LM |
392 | ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); |
393 | src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize; | |
0da71265 MN |
394 | emu=1; |
395 | } | |
115329f1 | 396 | |
5d18eaad | 397 | qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps? |
0da71265 | 398 | if(!square){ |
5d18eaad | 399 | qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); |
0da71265 | 400 | } |
115329f1 | 401 | |
49fb20cb | 402 | if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; |
115329f1 | 403 | |
0d43dd8c | 404 | if(MB_FIELD){ |
5d18eaad | 405 | // chroma offset when predicting from a field of opposite parity |
2143b118 | 406 | my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); |
5d18eaad LM |
407 | emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); |
408 | } | |
409 | src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize; | |
410 | src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize; | |
411 | ||
0da71265 | 412 | if(emu){ |
5d18eaad | 413 | ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); |
0da71265 MN |
414 | src_cb= s->edge_emu_buffer; |
415 | } | |
5d18eaad | 416 | chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); |
0da71265 MN |
417 | |
418 | if(emu){ | |
5d18eaad | 419 | ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); |
0da71265 MN |
420 | src_cr= s->edge_emu_buffer; |
421 | } | |
5d18eaad | 422 | chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); |
0da71265 MN |
423 | } |
424 | ||
9f2d1b4f | 425 | static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, |
0da71265 MN |
426 | uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, |
427 | int x_offset, int y_offset, | |
428 | qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |
429 | qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, | |
430 | int list0, int list1){ | |
431 | MpegEncContext * const s = &h->s; | |
432 | qpel_mc_func *qpix_op= qpix_put; | |
433 | h264_chroma_mc_func chroma_op= chroma_put; | |
115329f1 | 434 | |
5d18eaad LM |
435 | dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; |
436 | dest_cb += x_offset + y_offset*h->mb_uvlinesize; | |
437 | dest_cr += x_offset + y_offset*h->mb_uvlinesize; | |
0da71265 | 438 | x_offset += 8*s->mb_x; |
0d43dd8c | 439 | y_offset += 8*(s->mb_y >> MB_FIELD); |
115329f1 | 440 | |
0da71265 | 441 | if(list0){ |
1924f3ce | 442 | Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; |
0da71265 MN |
443 | mc_dir_part(h, ref, n, square, chroma_height, delta, 0, |
444 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
445 | qpix_op, chroma_op); | |
446 | ||
447 | qpix_op= qpix_avg; | |
448 | chroma_op= chroma_avg; | |
449 | } | |
450 | ||
451 | if(list1){ | |
1924f3ce | 452 | Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; |
0da71265 MN |
453 | mc_dir_part(h, ref, n, square, chroma_height, delta, 1, |
454 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
455 | qpix_op, chroma_op); | |
456 | } | |
457 | } | |
458 | ||
9f2d1b4f LM |
459 | static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, |
460 | uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |
461 | int x_offset, int y_offset, | |
462 | qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |
463 | h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, | |
464 | h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, | |
465 | int list0, int list1){ | |
466 | MpegEncContext * const s = &h->s; | |
467 | ||
5d18eaad LM |
468 | dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; |
469 | dest_cb += x_offset + y_offset*h->mb_uvlinesize; | |
470 | dest_cr += x_offset + y_offset*h->mb_uvlinesize; | |
9f2d1b4f | 471 | x_offset += 8*s->mb_x; |
0d43dd8c | 472 | y_offset += 8*(s->mb_y >> MB_FIELD); |
115329f1 | 473 | |
9f2d1b4f LM |
474 | if(list0 && list1){ |
475 | /* don't optimize for luma-only case, since B-frames usually | |
476 | * use implicit weights => chroma too. */ | |
477 | uint8_t *tmp_cb = s->obmc_scratchpad; | |
5d18eaad LM |
478 | uint8_t *tmp_cr = s->obmc_scratchpad + 8; |
479 | uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize; | |
9f2d1b4f LM |
480 | int refn0 = h->ref_cache[0][ scan8[n] ]; |
481 | int refn1 = h->ref_cache[1][ scan8[n] ]; | |
482 | ||
483 | mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, | |
484 | dest_y, dest_cb, dest_cr, | |
485 | x_offset, y_offset, qpix_put, chroma_put); | |
486 | mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, | |
487 | tmp_y, tmp_cb, tmp_cr, | |
488 | x_offset, y_offset, qpix_put, chroma_put); | |
489 | ||
490 | if(h->use_weight == 2){ | |
1052b76f | 491 | int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; |
9f2d1b4f | 492 | int weight1 = 64 - weight0; |
5d18eaad LM |
493 | luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); |
494 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); | |
495 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); | |
9f2d1b4f | 496 | }else{ |
5d18eaad | 497 | luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, |
3d9137c8 MN |
498 | h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], |
499 | h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]); | |
5d18eaad | 500 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
3d9137c8 MN |
501 | h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], |
502 | h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); | |
5d18eaad | 503 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
3d9137c8 MN |
504 | h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], |
505 | h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); | |
9f2d1b4f LM |
506 | } |
507 | }else{ | |
508 | int list = list1 ? 1 : 0; | |
509 | int refn = h->ref_cache[list][ scan8[n] ]; | |
510 | Picture *ref= &h->ref_list[list][refn]; | |
511 | mc_dir_part(h, ref, n, square, chroma_height, delta, list, | |
512 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
513 | qpix_put, chroma_put); | |
514 | ||
5d18eaad | 515 | luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, |
3d9137c8 | 516 | h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); |
9f2d1b4f | 517 | if(h->use_weight_chroma){ |
5d18eaad | 518 | chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
3d9137c8 | 519 | h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); |
5d18eaad | 520 | chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
3d9137c8 | 521 | h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); |
9f2d1b4f LM |
522 | } |
523 | } | |
524 | } | |
525 | ||
526 | static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, | |
527 | uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |
528 | int x_offset, int y_offset, | |
529 | qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |
530 | qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, | |
115329f1 | 531 | h264_weight_func *weight_op, h264_biweight_func *weight_avg, |
9f2d1b4f LM |
532 | int list0, int list1){ |
533 | if((h->use_weight==2 && list0 && list1 | |
1052b76f | 534 | && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) |
9f2d1b4f LM |
535 | || h->use_weight==1) |
536 | mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, | |
537 | x_offset, y_offset, qpix_put, chroma_put, | |
538 | weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); | |
539 | else | |
540 | mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, | |
541 | x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); | |
542 | } | |
543 | ||
513fbd8e LM |
544 | static inline void prefetch_motion(H264Context *h, int list){ |
545 | /* fetch pixels for estimated mv 4 macroblocks ahead | |
546 | * optimized for 64byte cache lines */ | |
547 | MpegEncContext * const s = &h->s; | |
548 | const int refn = h->ref_cache[list][scan8[0]]; | |
549 | if(refn >= 0){ | |
550 | const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; | |
551 | const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; | |
552 | uint8_t **src= h->ref_list[list][refn].data; | |
5d18eaad | 553 | int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64; |
513fbd8e LM |
554 | s->dsp.prefetch(src[0]+off, s->linesize, 4); |
555 | off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; | |
556 | s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); | |
557 | } | |
558 | } | |
559 | ||
0da71265 MN |
560 | static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, |
561 | qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), | |
9f2d1b4f LM |
562 | qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), |
563 | h264_weight_func *weight_op, h264_biweight_func *weight_avg){ | |
0da71265 | 564 | MpegEncContext * const s = &h->s; |
64514ee8 | 565 | const int mb_xy= h->mb_xy; |
0da71265 | 566 | const int mb_type= s->current_picture.mb_type[mb_xy]; |
115329f1 | 567 | |
0da71265 | 568 | assert(IS_INTER(mb_type)); |
115329f1 | 569 | |
513fbd8e LM |
570 | prefetch_motion(h, 0); |
571 | ||
0da71265 MN |
572 | if(IS_16X16(mb_type)){ |
573 | mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, | |
574 | qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], | |
7231ccf4 | 575 | weight_op, weight_avg, |
0da71265 MN |
576 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); |
577 | }else if(IS_16X8(mb_type)){ | |
578 | mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, | |
579 | qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], | |
9f2d1b4f | 580 | &weight_op[1], &weight_avg[1], |
0da71265 MN |
581 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); |
582 | mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, | |
583 | qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], | |
9f2d1b4f | 584 | &weight_op[1], &weight_avg[1], |
0da71265 MN |
585 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); |
586 | }else if(IS_8X16(mb_type)){ | |
5d18eaad | 587 | mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, |
0da71265 | 588 | qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], |
9f2d1b4f | 589 | &weight_op[2], &weight_avg[2], |
0da71265 | 590 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); |
5d18eaad | 591 | mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, |
0da71265 | 592 | qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], |
9f2d1b4f | 593 | &weight_op[2], &weight_avg[2], |
0da71265 MN |
594 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); |
595 | }else{ | |
596 | int i; | |
115329f1 | 597 | |
0da71265 MN |
598 | assert(IS_8X8(mb_type)); |
599 | ||
600 | for(i=0; i<4; i++){ | |
601 | const int sub_mb_type= h->sub_mb_type[i]; | |
602 | const int n= 4*i; | |
603 | int x_offset= (i&1)<<2; | |
604 | int y_offset= (i&2)<<1; | |
605 | ||
606 | if(IS_SUB_8X8(sub_mb_type)){ | |
607 | mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
608 | qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], | |
9f2d1b4f | 609 | &weight_op[3], &weight_avg[3], |
0da71265 MN |
610 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
611 | }else if(IS_SUB_8X4(sub_mb_type)){ | |
612 | mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
613 | qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], | |
9f2d1b4f | 614 | &weight_op[4], &weight_avg[4], |
0da71265 MN |
615 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
616 | mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, | |
617 | qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], | |
9f2d1b4f | 618 | &weight_op[4], &weight_avg[4], |
0da71265 MN |
619 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
620 | }else if(IS_SUB_4X8(sub_mb_type)){ | |
5d18eaad | 621 | mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, |
0da71265 | 622 | qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], |
9f2d1b4f | 623 | &weight_op[5], &weight_avg[5], |
0da71265 | 624 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
5d18eaad | 625 | mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, |
0da71265 | 626 | qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], |
9f2d1b4f | 627 | &weight_op[5], &weight_avg[5], |
0da71265 MN |
628 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
629 | }else{ | |
630 | int j; | |
631 | assert(IS_SUB_4X4(sub_mb_type)); | |
632 | for(j=0; j<4; j++){ | |
633 | int sub_x_offset= x_offset + 2*(j&1); | |
634 | int sub_y_offset= y_offset + (j&2); | |
635 | mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, | |
636 | qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], | |
9f2d1b4f | 637 | &weight_op[6], &weight_avg[6], |
0da71265 MN |
638 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
639 | } | |
640 | } | |
641 | } | |
642 | } | |
513fbd8e LM |
643 | |
644 | prefetch_motion(h, 1); | |
0da71265 MN |
645 | } |
646 | ||
0da71265 | 647 | |
0da71265 | 648 | static void free_tables(H264Context *h){ |
7978debd | 649 | int i; |
afebe2f7 | 650 | H264Context *hx; |
0da71265 | 651 | av_freep(&h->intra4x4_pred_mode); |
e5017ab8 LA |
652 | av_freep(&h->chroma_pred_mode_table); |
653 | av_freep(&h->cbp_table); | |
9e528114 LA |
654 | av_freep(&h->mvd_table[0]); |
655 | av_freep(&h->mvd_table[1]); | |
5ad984c9 | 656 | av_freep(&h->direct_table); |
0da71265 MN |
657 | av_freep(&h->non_zero_count); |
658 | av_freep(&h->slice_table_base); | |
659 | h->slice_table= NULL; | |
c988f975 | 660 | av_freep(&h->list_counts); |
e5017ab8 | 661 | |
0da71265 | 662 | av_freep(&h->mb2b_xy); |
d43c1922 | 663 | av_freep(&h->mb2br_xy); |
9f2d1b4f | 664 | |
6752dd5a | 665 | for(i = 0; i < MAX_THREADS; i++) { |
afebe2f7 AÖ |
666 | hx = h->thread_context[i]; |
667 | if(!hx) continue; | |
668 | av_freep(&hx->top_borders[1]); | |
669 | av_freep(&hx->top_borders[0]); | |
670 | av_freep(&hx->s.obmc_scratchpad); | |
d2d5e067 AS |
671 | av_freep(&hx->rbsp_buffer[1]); |
672 | av_freep(&hx->rbsp_buffer[0]); | |
eda4ea4e MS |
673 | hx->rbsp_buffer_size[0] = 0; |
674 | hx->rbsp_buffer_size[1] = 0; | |
d2d5e067 | 675 | if (i) av_freep(&h->thread_context[i]); |
afebe2f7 | 676 | } |
0da71265 MN |
677 | } |
678 | ||
239ea04c LM |
679 | static void init_dequant8_coeff_table(H264Context *h){ |
680 | int i,q,x; | |
4693b031 | 681 | const int transpose = (h->h264dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly |
239ea04c LM |
682 | h->dequant8_coeff[0] = h->dequant8_buffer[0]; |
683 | h->dequant8_coeff[1] = h->dequant8_buffer[1]; | |
684 | ||
685 | for(i=0; i<2; i++ ){ | |
686 | if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ | |
687 | h->dequant8_coeff[1] = h->dequant8_buffer[0]; | |
688 | break; | |
689 | } | |
690 | ||
691 | for(q=0; q<52; q++){ | |
d9ec210b DP |
692 | int shift = div6[q]; |
693 | int idx = rem6[q]; | |
239ea04c | 694 | for(x=0; x<64; x++) |
548a1c8a LM |
695 | h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = |
696 | ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * | |
697 | h->pps.scaling_matrix8[i][x]) << shift; | |
239ea04c LM |
698 | } |
699 | } | |
700 | } | |
701 | ||
702 | static void init_dequant4_coeff_table(H264Context *h){ | |
703 | int i,j,q,x; | |
4693b031 | 704 | const int transpose = (h->h264dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly |
239ea04c LM |
705 | for(i=0; i<6; i++ ){ |
706 | h->dequant4_coeff[i] = h->dequant4_buffer[i]; | |
707 | for(j=0; j<i; j++){ | |
708 | if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ | |
709 | h->dequant4_coeff[i] = h->dequant4_buffer[j]; | |
710 | break; | |
711 | } | |
712 | } | |
713 | if(j<i) | |
714 | continue; | |
715 | ||
716 | for(q=0; q<52; q++){ | |
d9ec210b DP |
717 | int shift = div6[q] + 2; |
718 | int idx = rem6[q]; | |
239ea04c | 719 | for(x=0; x<16; x++) |
ab2e3e2c LM |
720 | h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] = |
721 | ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * | |
239ea04c LM |
722 | h->pps.scaling_matrix4[i][x]) << shift; |
723 | } | |
724 | } | |
725 | } | |
726 | ||
727 | static void init_dequant_tables(H264Context *h){ | |
728 | int i,x; | |
729 | init_dequant4_coeff_table(h); | |
730 | if(h->pps.transform_8x8_mode) | |
731 | init_dequant8_coeff_table(h); | |
732 | if(h->sps.transform_bypass){ | |
733 | for(i=0; i<6; i++) | |
734 | for(x=0; x<16; x++) | |
735 | h->dequant4_coeff[i][0][x] = 1<<6; | |
736 | if(h->pps.transform_8x8_mode) | |
737 | for(i=0; i<2; i++) | |
738 | for(x=0; x<64; x++) | |
739 | h->dequant8_coeff[i][0][x] = 1<<6; | |
740 | } | |
741 | } | |
742 | ||
743 | ||
903d58f6 | 744 | int ff_h264_alloc_tables(H264Context *h){ |
0da71265 | 745 | MpegEncContext * const s = &h->s; |
7bc9090a | 746 | const int big_mb_num= s->mb_stride * (s->mb_height+1); |
145061a1 | 747 | const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count; |
239ea04c | 748 | int x,y; |
0da71265 | 749 | |
145061a1 | 750 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail) |
e5017ab8 | 751 | |
c988f975 | 752 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail) |
d31dbec3 RP |
753 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail) |
754 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail) | |
0da71265 | 755 | |
d31dbec3 | 756 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail) |
145061a1 MN |
757 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail); |
758 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail); | |
36b54927 | 759 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail); |
c988f975 | 760 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail) |
e5017ab8 | 761 | |
b735aeea | 762 | memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)); |
5d18eaad | 763 | h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; |
0da71265 | 764 | |
d31dbec3 | 765 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail); |
d43c1922 | 766 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail); |
0da71265 MN |
767 | for(y=0; y<s->mb_height; y++){ |
768 | for(x=0; x<s->mb_width; x++){ | |
7bc9090a | 769 | const int mb_xy= x + y*s->mb_stride; |
0da71265 | 770 | const int b_xy = 4*x + 4*y*h->b_stride; |
115329f1 | 771 | |
0da71265 | 772 | h->mb2b_xy [mb_xy]= b_xy; |
e1c88a21 | 773 | h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride))); |
0da71265 MN |
774 | } |
775 | } | |
9f2d1b4f | 776 | |
9c6221ae GV |
777 | s->obmc_scratchpad = NULL; |
778 | ||
56edbd81 LM |
779 | if(!h->dequant4_coeff[0]) |
780 | init_dequant_tables(h); | |
781 | ||
0da71265 MN |
782 | return 0; |
783 | fail: | |
784 | free_tables(h); | |
785 | return -1; | |
786 | } | |
787 | ||
afebe2f7 AÖ |
788 | /** |
789 | * Mimic alloc_tables(), but for every context thread. | |
790 | */ | |
145061a1 MN |
791 | static void clone_tables(H264Context *dst, H264Context *src, int i){ |
792 | MpegEncContext * const s = &src->s; | |
793 | dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride; | |
afebe2f7 AÖ |
794 | dst->non_zero_count = src->non_zero_count; |
795 | dst->slice_table = src->slice_table; | |
796 | dst->cbp_table = src->cbp_table; | |
797 | dst->mb2b_xy = src->mb2b_xy; | |
d43c1922 | 798 | dst->mb2br_xy = src->mb2br_xy; |
afebe2f7 | 799 | dst->chroma_pred_mode_table = src->chroma_pred_mode_table; |
145061a1 MN |
800 | dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride; |
801 | dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride; | |
afebe2f7 | 802 | dst->direct_table = src->direct_table; |
fb823b77 | 803 | dst->list_counts = src->list_counts; |
afebe2f7 | 804 | |
afebe2f7 AÖ |
805 | dst->s.obmc_scratchpad = NULL; |
806 | ff_h264_pred_init(&dst->hpc, src->s.codec_id); | |
afebe2f7 AÖ |
807 | } |
808 | ||
809 | /** | |
810 | * Init context | |
811 | * Allocate buffers which are not shared amongst multiple threads. | |
812 | */ | |
813 | static int context_init(H264Context *h){ | |
d31dbec3 RP |
814 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) |
815 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) | |
afebe2f7 | 816 | |
145061a1 MN |
817 | h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] = |
818 | h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE; | |
819 | ||
afebe2f7 AÖ |
820 | return 0; |
821 | fail: | |
822 | return -1; // free_tables will clean up for us | |
823 | } | |
824 | ||
9855b2e3 MN |
825 | static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size); |
826 | ||
98a6fff9 | 827 | static av_cold void common_init(H264Context *h){ |
0da71265 | 828 | MpegEncContext * const s = &h->s; |
0da71265 MN |
829 | |
830 | s->width = s->avctx->width; | |
831 | s->height = s->avctx->height; | |
832 | s->codec_id= s->avctx->codec->id; | |
115329f1 | 833 | |
4693b031 | 834 | ff_h264dsp_init(&h->h264dsp); |
c92a30bb | 835 | ff_h264_pred_init(&h->hpc, s->codec_id); |
0da71265 | 836 | |
239ea04c | 837 | h->dequant_coeff_pps= -1; |
9a41c2c7 | 838 | s->unrestricted_mv=1; |
0da71265 | 839 | s->decode=1; //FIXME |
56edbd81 | 840 | |
a5805aa9 MN |
841 | dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early |
842 | ||
56edbd81 LM |
843 | memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t)); |
844 | memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t)); | |
0da71265 MN |
845 | } |
846 | ||
903d58f6 | 847 | av_cold int ff_h264_decode_init(AVCodecContext *avctx){ |
0da71265 MN |
848 | H264Context *h= avctx->priv_data; |
849 | MpegEncContext * const s = &h->s; | |
850 | ||
3edcacde | 851 | MPV_decode_defaults(s); |
115329f1 | 852 | |
0da71265 MN |
853 | s->avctx = avctx; |
854 | common_init(h); | |
855 | ||
856 | s->out_format = FMT_H264; | |
857 | s->workaround_bugs= avctx->workaround_bugs; | |
858 | ||
859 | // set defaults | |
0da71265 | 860 | // s->decode_mb= ff_h263_decode_mb; |
9a5a05d0 | 861 | s->quarter_sample = 1; |
47cd974a | 862 | if(!avctx->has_b_frames) |
0da71265 | 863 | s->low_delay= 1; |
7a9dba3c | 864 | |
580a7465 | 865 | avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; |
0da71265 | 866 | |
e1e94902 | 867 | ff_h264_decode_init_vlc(); |
115329f1 | 868 | |
afebe2f7 | 869 | h->thread_context[0] = h; |
18c7be65 | 870 | h->outputed_poc = INT_MIN; |
e4b8f1fa | 871 | h->prev_poc_msb= 1<<16; |
055a6aa7 | 872 | h->x264_build = -1; |
9c095463 | 873 | ff_h264_reset_sei(h); |
efd8c1f6 MN |
874 | if(avctx->codec_id == CODEC_ID_H264){ |
875 | if(avctx->ticks_per_frame == 1){ | |
876 | s->avctx->time_base.den *=2; | |
877 | } | |
19df37a8 | 878 | avctx->ticks_per_frame = 2; |
efd8c1f6 | 879 | } |
9855b2e3 MN |
880 | |
881 | if(avctx->extradata_size > 0 && avctx->extradata && *(char *)avctx->extradata == 1){ | |
882 | int i, cnt, nalsize; | |
883 | unsigned char *p = avctx->extradata; | |
884 | ||
885 | h->is_avc = 1; | |
886 | ||
887 | if(avctx->extradata_size < 7) { | |
888 | av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); | |
889 | return -1; | |
890 | } | |
891 | /* sps and pps in the avcC always have length coded with 2 bytes, | |
892 | so put a fake nal_length_size = 2 while parsing them */ | |
893 | h->nal_length_size = 2; | |
894 | // Decode sps from avcC | |
895 | cnt = *(p+5) & 0x1f; // Number of sps | |
896 | p += 6; | |
897 | for (i = 0; i < cnt; i++) { | |
898 | nalsize = AV_RB16(p) + 2; | |
899 | if(decode_nal_units(h, p, nalsize) < 0) { | |
900 | av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i); | |
901 | return -1; | |
902 | } | |
903 | p += nalsize; | |
904 | } | |
905 | // Decode pps from avcC | |
906 | cnt = *(p++); // Number of pps | |
907 | for (i = 0; i < cnt; i++) { | |
908 | nalsize = AV_RB16(p) + 2; | |
909 | if(decode_nal_units(h, p, nalsize) != nalsize) { | |
910 | av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i); | |
911 | return -1; | |
912 | } | |
913 | p += nalsize; | |
914 | } | |
915 | // Now store right nal length size, that will be use to parse all other nals | |
916 | h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1; | |
917 | } else { | |
918 | h->is_avc = 0; | |
919 | if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0) | |
920 | return -1; | |
921 | } | |
db8cb47d MN |
922 | if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){ |
923 | s->avctx->has_b_frames = h->sps.num_reorder_frames; | |
924 | s->low_delay = 0; | |
925 | } | |
9855b2e3 | 926 | |
0da71265 MN |
927 | return 0; |
928 | } | |
929 | ||
903d58f6 | 930 | int ff_h264_frame_start(H264Context *h){ |
0da71265 MN |
931 | MpegEncContext * const s = &h->s; |
932 | int i; | |
933 | ||
af8aa846 MN |
934 | if(MPV_frame_start(s, s->avctx) < 0) |
935 | return -1; | |
0da71265 | 936 | ff_er_frame_start(s); |
3a22d7fa JD |
937 | /* |
938 | * MPV_frame_start uses pict_type to derive key_frame. | |
939 | * This is incorrect for H.264; IDR markings must be used. | |
1412060e | 940 | * Zero here; IDR markings per slice in frame or fields are ORed in later. |
3a22d7fa JD |
941 | * See decode_nal_units(). |
942 | */ | |
943 | s->current_picture_ptr->key_frame= 0; | |
c173a088 | 944 | s->current_picture_ptr->mmco_reset= 0; |
0da71265 MN |
945 | |
946 | assert(s->linesize && s->uvlinesize); | |
947 | ||
948 | for(i=0; i<16; i++){ | |
949 | h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3); | |
6867a90b | 950 | h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3); |
0da71265 MN |
951 | } |
952 | for(i=0; i<4; i++){ | |
953 | h->block_offset[16+i]= | |
954 | h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); | |
6867a90b LLL |
955 | h->block_offset[24+16+i]= |
956 | h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); | |
0da71265 MN |
957 | } |
958 | ||
934b0821 LM |
959 | /* can't be in alloc_tables because linesize isn't known there. |
960 | * FIXME: redo bipred weight to not require extra buffer? */ | |
afebe2f7 AÖ |
961 | for(i = 0; i < s->avctx->thread_count; i++) |
962 | if(!h->thread_context[i]->s.obmc_scratchpad) | |
963 | h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); | |
5d18eaad | 964 | |
2ce1c2e0 | 965 | /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/ |
5820b90d | 966 | memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table)); |
934b0821 | 967 | |
0da71265 | 968 | // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; |
28bb9eb2 | 969 | |
1412060e | 970 | // We mark the current picture as non-reference after allocating it, so |
28bb9eb2 MN |
971 | // that if we break out due to an error it can be released automatically |
972 | // in the next MPV_frame_start(). | |
973 | // SVQ3 as well as most other codecs have only last/next/current and thus | |
974 | // get released even with set reference, besides SVQ3 and others do not | |
975 | // mark frames as reference later "naturally". | |
976 | if(s->codec_id != CODEC_ID_SVQ3) | |
977 | s->current_picture_ptr->reference= 0; | |
357282c6 MN |
978 | |
979 | s->current_picture_ptr->field_poc[0]= | |
980 | s->current_picture_ptr->field_poc[1]= INT_MAX; | |
5118c6c7 | 981 | assert(s->current_picture_ptr->long_ref==0); |
357282c6 | 982 | |
af8aa846 | 983 | return 0; |
0da71265 MN |
984 | } |
985 | ||
93cc10fa | 986 | static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ |
53c05b1e | 987 | MpegEncContext * const s = &h->s; |
0b69d625 | 988 | uint8_t *top_border; |
5f7f9719 | 989 | int top_idx = 1; |
115329f1 | 990 | |
53c05b1e MN |
991 | src_y -= linesize; |
992 | src_cb -= uvlinesize; | |
993 | src_cr -= uvlinesize; | |
994 | ||
5f7f9719 MN |
995 | if(!simple && FRAME_MBAFF){ |
996 | if(s->mb_y&1){ | |
5f7f9719 | 997 | if(!MB_MBAFF){ |
0b69d625 AS |
998 | top_border = h->top_borders[0][s->mb_x]; |
999 | AV_COPY128(top_border, src_y + 15*linesize); | |
49fb20cb | 1000 | if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
0b69d625 AS |
1001 | AV_COPY64(top_border+16, src_cb+7*uvlinesize); |
1002 | AV_COPY64(top_border+24, src_cr+7*uvlinesize); | |
5f7f9719 MN |
1003 | } |
1004 | } | |
c988f975 MN |
1005 | }else if(MB_MBAFF){ |
1006 | top_idx = 0; | |
1007 | }else | |
1008 | return; | |
5f7f9719 MN |
1009 | } |
1010 | ||
0b69d625 | 1011 | top_border = h->top_borders[top_idx][s->mb_x]; |
3b66c4c5 | 1012 | // There are two lines saved, the line above the the top macroblock of a pair, |
6867a90b | 1013 | // and the line above the bottom macroblock |
0b69d625 | 1014 | AV_COPY128(top_border, src_y + 16*linesize); |
53c05b1e | 1015 | |
49fb20cb | 1016 | if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
0b69d625 AS |
1017 | AV_COPY64(top_border+16, src_cb+8*uvlinesize); |
1018 | AV_COPY64(top_border+24, src_cr+8*uvlinesize); | |
53c05b1e MN |
1019 | } |
1020 | } | |
1021 | ||
93cc10fa | 1022 | static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){ |
53c05b1e | 1023 | MpegEncContext * const s = &h->s; |
b69378e2 AÖ |
1024 | int deblock_left; |
1025 | int deblock_top; | |
5f7f9719 | 1026 | int top_idx = 1; |
1e4f1c56 AS |
1027 | uint8_t *top_border_m1; |
1028 | uint8_t *top_border; | |
5f7f9719 MN |
1029 | |
1030 | if(!simple && FRAME_MBAFF){ | |
1031 | if(s->mb_y&1){ | |
c988f975 MN |
1032 | if(!MB_MBAFF) |
1033 | return; | |
5f7f9719 | 1034 | }else{ |
5f7f9719 MN |
1035 | top_idx = MB_MBAFF ? 0 : 1; |
1036 | } | |
5f7f9719 | 1037 | } |
b69378e2 AÖ |
1038 | |
1039 | if(h->deblocking_filter == 2) { | |
024bf79f MN |
1040 | deblock_left = h->left_type[0]; |
1041 | deblock_top = h->top_type; | |
b69378e2 AÖ |
1042 | } else { |
1043 | deblock_left = (s->mb_x > 0); | |
6c805007 | 1044 | deblock_top = (s->mb_y > !!MB_FIELD); |
b69378e2 | 1045 | } |
53c05b1e MN |
1046 | |
1047 | src_y -= linesize + 1; | |
1048 | src_cb -= uvlinesize + 1; | |
1049 | src_cr -= uvlinesize + 1; | |
1050 | ||
1e4f1c56 AS |
1051 | top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; |
1052 | top_border = h->top_borders[top_idx][s->mb_x]; | |
1053 | ||
0b69d625 AS |
1054 | #define XCHG(a,b,xchg)\ |
1055 | if (xchg) AV_SWAP64(b,a);\ | |
1056 | else AV_COPY64(b,a); | |
d89dc06a | 1057 | |
d89dc06a | 1058 | if(deblock_top){ |
c988f975 | 1059 | if(deblock_left){ |
0b69d625 | 1060 | XCHG(top_border_m1+8, src_y -7, 1); |
c988f975 | 1061 | } |
0b69d625 AS |
1062 | XCHG(top_border+0, src_y +1, xchg); |
1063 | XCHG(top_border+8, src_y +9, 1); | |
cad4368a | 1064 | if(s->mb_x+1 < s->mb_width){ |
0b69d625 | 1065 | XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1); |
43efd19a | 1066 | } |
53c05b1e | 1067 | } |
53c05b1e | 1068 | |
49fb20cb | 1069 | if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
d89dc06a | 1070 | if(deblock_top){ |
c988f975 | 1071 | if(deblock_left){ |
0b69d625 AS |
1072 | XCHG(top_border_m1+16, src_cb -7, 1); |
1073 | XCHG(top_border_m1+24, src_cr -7, 1); | |
c988f975 | 1074 | } |
0b69d625 AS |
1075 | XCHG(top_border+16, src_cb+1, 1); |
1076 | XCHG(top_border+24, src_cr+1, 1); | |
53c05b1e | 1077 | } |
53c05b1e MN |
1078 | } |
1079 | } | |
1080 | ||
5a6a6cc7 | 1081 | static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ |
0da71265 MN |
1082 | MpegEncContext * const s = &h->s; |
1083 | const int mb_x= s->mb_x; | |
1084 | const int mb_y= s->mb_y; | |
64514ee8 | 1085 | const int mb_xy= h->mb_xy; |
0da71265 MN |
1086 | const int mb_type= s->current_picture.mb_type[mb_xy]; |
1087 | uint8_t *dest_y, *dest_cb, *dest_cr; | |
1088 | int linesize, uvlinesize /*dct_offset*/; | |
1089 | int i; | |
6867a90b | 1090 | int *block_offset = &h->block_offset[0]; |
41e4055b | 1091 | const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass); |
8b6871ed | 1092 | /* is_h264 should always be true if SVQ3 is disabled. */ |
49fb20cb | 1093 | const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; |
36940eca | 1094 | void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); |
ef9d1d15 | 1095 | void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); |
0da71265 | 1096 | |
6120a343 MN |
1097 | dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; |
1098 | dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; | |
1099 | dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; | |
0da71265 | 1100 | |
a957c27b LM |
1101 | s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4); |
1102 | s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2); | |
1103 | ||
c988f975 MN |
1104 | h->list_counts[mb_xy]= h->list_count; |
1105 | ||
bd91fee3 | 1106 | if (!simple && MB_FIELD) { |
5d18eaad LM |
1107 | linesize = h->mb_linesize = s->linesize * 2; |
1108 | uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; | |
6867a90b | 1109 | block_offset = &h->block_offset[24]; |
1412060e | 1110 | if(mb_y&1){ //FIXME move out of this function? |
0da71265 | 1111 | dest_y -= s->linesize*15; |
6867a90b LLL |
1112 | dest_cb-= s->uvlinesize*7; |
1113 | dest_cr-= s->uvlinesize*7; | |
0da71265 | 1114 | } |
5d18eaad LM |
1115 | if(FRAME_MBAFF) { |
1116 | int list; | |
3425501d | 1117 | for(list=0; list<h->list_count; list++){ |
5d18eaad LM |
1118 | if(!USES_LIST(mb_type, list)) |
1119 | continue; | |
1120 | if(IS_16X16(mb_type)){ | |
1121 | int8_t *ref = &h->ref_cache[list][scan8[0]]; | |
1710856c | 1122 | fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1); |
5d18eaad LM |
1123 | }else{ |
1124 | for(i=0; i<16; i+=4){ | |
5d18eaad LM |
1125 | int ref = h->ref_cache[list][scan8[i]]; |
1126 | if(ref >= 0) | |
1710856c | 1127 | fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1); |
5d18eaad LM |
1128 | } |
1129 | } | |
1130 | } | |
1131 | } | |
0da71265 | 1132 | } else { |
5d18eaad LM |
1133 | linesize = h->mb_linesize = s->linesize; |
1134 | uvlinesize = h->mb_uvlinesize = s->uvlinesize; | |
0da71265 MN |
1135 | // dct_offset = s->linesize * 16; |
1136 | } | |
115329f1 | 1137 | |
bd91fee3 | 1138 | if (!simple && IS_INTRA_PCM(mb_type)) { |
c1708e8d MN |
1139 | for (i=0; i<16; i++) { |
1140 | memcpy(dest_y + i* linesize, h->mb + i*8, 16); | |
6fbcaaa0 | 1141 | } |
c1708e8d MN |
1142 | for (i=0; i<8; i++) { |
1143 | memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); | |
1144 | memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); | |
6fbcaaa0 | 1145 | } |
e7e09b49 LLL |
1146 | } else { |
1147 | if(IS_INTRA(mb_type)){ | |
5f7f9719 | 1148 | if(h->deblocking_filter) |
93cc10fa | 1149 | xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple); |
53c05b1e | 1150 | |
49fb20cb | 1151 | if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
c92a30bb KS |
1152 | h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); |
1153 | h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize); | |
e7e09b49 | 1154 | } |
0da71265 | 1155 | |
e7e09b49 | 1156 | if(IS_INTRA4x4(mb_type)){ |
bd91fee3 | 1157 | if(simple || !s->encoding){ |
43efd19a | 1158 | if(IS_8x8DCT(mb_type)){ |
1eb96035 MN |
1159 | if(transform_bypass){ |
1160 | idct_dc_add = | |
1161 | idct_add = s->dsp.add_pixels8; | |
dae006d7 | 1162 | }else{ |
4693b031 MR |
1163 | idct_dc_add = h->h264dsp.h264_idct8_dc_add; |
1164 | idct_add = h->h264dsp.h264_idct8_add; | |
1eb96035 | 1165 | } |
43efd19a LM |
1166 | for(i=0; i<16; i+=4){ |
1167 | uint8_t * const ptr= dest_y + block_offset[i]; | |
1168 | const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; | |
41e4055b MN |
1169 | if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ |
1170 | h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize); | |
1171 | }else{ | |
ac0623b2 MN |
1172 | const int nnz = h->non_zero_count_cache[ scan8[i] ]; |
1173 | h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, | |
1174 | (h->topright_samples_available<<i)&0x4000, linesize); | |
1175 | if(nnz){ | |
1176 | if(nnz == 1 && h->mb[i*16]) | |
1177 | idct_dc_add(ptr, h->mb + i*16, linesize); | |
1178 | else | |
1179 | idct_add (ptr, h->mb + i*16, linesize); | |
1180 | } | |
41e4055b | 1181 | } |
43efd19a | 1182 | } |
1eb96035 MN |
1183 | }else{ |
1184 | if(transform_bypass){ | |
1185 | idct_dc_add = | |
1186 | idct_add = s->dsp.add_pixels4; | |
1187 | }else{ | |
4693b031 MR |
1188 | idct_dc_add = h->h264dsp.h264_idct_dc_add; |
1189 | idct_add = h->h264dsp.h264_idct_add; | |
1eb96035 | 1190 | } |
aebb5d6d MN |
1191 | for(i=0; i<16; i++){ |
1192 | uint8_t * const ptr= dest_y + block_offset[i]; | |
1193 | const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; | |
e7e09b49 | 1194 | |
aebb5d6d MN |
1195 | if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ |
1196 | h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize); | |
1197 | }else{ | |
1198 | uint8_t *topright; | |
1199 | int nnz, tr; | |
1200 | if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ | |
1201 | const int topright_avail= (h->topright_samples_available<<i)&0x8000; | |
1202 | assert(mb_y || linesize <= block_offset[i]); | |
1203 | if(!topright_avail){ | |
1204 | tr= ptr[3 - linesize]*0x01010101; | |
1205 | topright= (uint8_t*) &tr; | |
1206 | }else | |
1207 | topright= ptr + 4 - linesize; | |
ac0623b2 | 1208 | }else |
aebb5d6d MN |
1209 | topright= NULL; |
1210 | ||
1211 | h->hpc.pred4x4[ dir ](ptr, topright, linesize); | |
1212 | nnz = h->non_zero_count_cache[ scan8[i] ]; | |
1213 | if(nnz){ | |
1214 | if(is_h264){ | |
1215 | if(nnz == 1 && h->mb[i*16]) | |
1216 | idct_dc_add(ptr, h->mb + i*16, linesize); | |
1217 | else | |
1218 | idct_add (ptr, h->mb + i*16, linesize); | |
1219 | }else | |
881b5b80 | 1220 | ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); |
aebb5d6d | 1221 | } |
ac0623b2 | 1222 | } |
41e4055b | 1223 | } |
8b82a956 | 1224 | } |
0da71265 | 1225 | } |
e7e09b49 | 1226 | }else{ |
c92a30bb | 1227 | h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); |
bd91fee3 | 1228 | if(is_h264){ |
36940eca | 1229 | if(!transform_bypass) |
93f0c0a4 | 1230 | h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]); |
36940eca | 1231 | }else |
881b5b80 | 1232 | ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); |
0da71265 | 1233 | } |
5f7f9719 | 1234 | if(h->deblocking_filter) |
93cc10fa | 1235 | xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); |
bd91fee3 | 1236 | }else if(is_h264){ |
e7e09b49 | 1237 | hl_motion(h, dest_y, dest_cb, dest_cr, |
2833fc46 LM |
1238 | s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, |
1239 | s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, | |
4693b031 | 1240 | h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab); |
0da71265 | 1241 | } |
e7e09b49 LLL |
1242 | |
1243 | ||
1244 | if(!IS_INTRA4x4(mb_type)){ | |
bd91fee3 | 1245 | if(is_h264){ |
ef9d1d15 | 1246 | if(IS_INTRA16x16(mb_type)){ |
2fd1f0e0 MN |
1247 | if(transform_bypass){ |
1248 | if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){ | |
0a8ca22f MN |
1249 | h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); |
1250 | }else{ | |
1251 | for(i=0; i<16; i++){ | |
1252 | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) | |
1eb96035 | 1253 | s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize); |
0a8ca22f | 1254 | } |
2fd1f0e0 MN |
1255 | } |
1256 | }else{ | |
4693b031 | 1257 | h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); |
41e4055b | 1258 | } |
49c084a7 | 1259 | }else if(h->cbp&15){ |
2fd1f0e0 | 1260 | if(transform_bypass){ |
0a8ca22f | 1261 | const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
1eb96035 | 1262 | idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; |
0a8ca22f | 1263 | for(i=0; i<16; i+=di){ |
62bc966f | 1264 | if(h->non_zero_count_cache[ scan8[i] ]){ |
ef9d1d15 | 1265 | idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); |
0a8ca22f | 1266 | } |
ef9d1d15 | 1267 | } |
2fd1f0e0 MN |
1268 | }else{ |
1269 | if(IS_8x8DCT(mb_type)){ | |
4693b031 | 1270 | h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); |
2fd1f0e0 | 1271 | }else{ |
4693b031 | 1272 | h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); |
2fd1f0e0 MN |
1273 | } |
1274 | } | |
4704097a | 1275 | } |
e7e09b49 LLL |
1276 | }else{ |
1277 | for(i=0; i<16; i++){ | |
1278 | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below | |
6867a90b | 1279 | uint8_t * const ptr= dest_y + block_offset[i]; |
881b5b80 | 1280 | ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); |
e7e09b49 | 1281 | } |
4704097a | 1282 | } |
0da71265 MN |
1283 | } |
1284 | } | |
0da71265 | 1285 | |
49fb20cb | 1286 | if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){ |
ef9d1d15 LM |
1287 | uint8_t *dest[2] = {dest_cb, dest_cr}; |
1288 | if(transform_bypass){ | |
96465b90 MN |
1289 | if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ |
1290 | h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize); | |
1291 | h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize); | |
1292 | }else{ | |
c25ac15a | 1293 | idct_add = s->dsp.add_pixels4; |
96465b90 MN |
1294 | for(i=16; i<16+8; i++){ |
1295 | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) | |
1296 | idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); | |
1297 | } | |
1298 | } | |
ef9d1d15 | 1299 | }else{ |
4691a77d AÖ |
1300 | chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); |
1301 | chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); | |
aebb5d6d | 1302 | if(is_h264){ |
4693b031 MR |
1303 | idct_add = h->h264dsp.h264_idct_add; |
1304 | idct_dc_add = h->h264dsp.h264_idct_dc_add; | |
ac0623b2 MN |
1305 | for(i=16; i<16+8; i++){ |
1306 | if(h->non_zero_count_cache[ scan8[i] ]) | |
1307 | idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); | |
1308 | else if(h->mb[i*16]) | |
1309 | idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); | |
1310 | } | |
aebb5d6d MN |
1311 | }else{ |
1312 | for(i=16; i<16+8; i++){ | |
1313 | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ | |
1314 | uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i]; | |
881b5b80 | 1315 | ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2); |
aebb5d6d | 1316 | } |
e7e09b49 | 1317 | } |
4704097a | 1318 | } |
0da71265 MN |
1319 | } |
1320 | } | |
1321 | } | |
c212fb0c MN |
1322 | if(h->cbp || IS_INTRA(mb_type)) |
1323 | s->dsp.clear_blocks(h->mb); | |
0da71265 MN |
1324 | } |
1325 | ||
0da71265 | 1326 | /** |
bd91fee3 AS |
1327 | * Process a macroblock; this case avoids checks for expensive uncommon cases. |
1328 | */ | |
1329 | static void hl_decode_mb_simple(H264Context *h){ | |
1330 | hl_decode_mb_internal(h, 1); | |
1331 | } | |
1332 | ||
1333 | /** | |
1334 | * Process a macroblock; this handles edge cases, such as interlacing. | |
1335 | */ | |
1336 | static void av_noinline hl_decode_mb_complex(H264Context *h){ | |
1337 | hl_decode_mb_internal(h, 0); | |
1338 | } | |
1339 | ||
903d58f6 | 1340 | void ff_h264_hl_decode_mb(H264Context *h){ |
bd91fee3 | 1341 | MpegEncContext * const s = &h->s; |
64514ee8 | 1342 | const int mb_xy= h->mb_xy; |
bd91fee3 | 1343 | const int mb_type= s->current_picture.mb_type[mb_xy]; |
49fb20cb | 1344 | int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; |
bd91fee3 | 1345 | |
bd91fee3 AS |
1346 | if (is_complex) |
1347 | hl_decode_mb_complex(h); | |
1348 | else hl_decode_mb_simple(h); | |
1349 | } | |
1350 | ||
0da71265 MN |
1351 | static int pred_weight_table(H264Context *h){ |
1352 | MpegEncContext * const s = &h->s; | |
1353 | int list, i; | |
9f2d1b4f | 1354 | int luma_def, chroma_def; |
115329f1 | 1355 | |
9f2d1b4f LM |
1356 | h->use_weight= 0; |
1357 | h->use_weight_chroma= 0; | |
0da71265 MN |
1358 | h->luma_log2_weight_denom= get_ue_golomb(&s->gb); |
1359 | h->chroma_log2_weight_denom= get_ue_golomb(&s->gb); | |
9f2d1b4f LM |
1360 | luma_def = 1<<h->luma_log2_weight_denom; |
1361 | chroma_def = 1<<h->chroma_log2_weight_denom; | |
0da71265 MN |
1362 | |
1363 | for(list=0; list<2; list++){ | |
cb99c652 GB |
1364 | h->luma_weight_flag[list] = 0; |
1365 | h->chroma_weight_flag[list] = 0; | |
0da71265 MN |
1366 | for(i=0; i<h->ref_count[list]; i++){ |
1367 | int luma_weight_flag, chroma_weight_flag; | |
115329f1 | 1368 | |
0da71265 MN |
1369 | luma_weight_flag= get_bits1(&s->gb); |
1370 | if(luma_weight_flag){ | |
3d9137c8 MN |
1371 | h->luma_weight[i][list][0]= get_se_golomb(&s->gb); |
1372 | h->luma_weight[i][list][1]= get_se_golomb(&s->gb); | |
1373 | if( h->luma_weight[i][list][0] != luma_def | |
1374 | || h->luma_weight[i][list][1] != 0) { | |
9f2d1b4f | 1375 | h->use_weight= 1; |
cb99c652 GB |
1376 | h->luma_weight_flag[list]= 1; |
1377 | } | |
9f2d1b4f | 1378 | }else{ |
3d9137c8 MN |
1379 | h->luma_weight[i][list][0]= luma_def; |
1380 | h->luma_weight[i][list][1]= 0; | |
0da71265 MN |
1381 | } |
1382 | ||
0af6967e | 1383 | if(CHROMA){ |
fef744d4 MN |
1384 | chroma_weight_flag= get_bits1(&s->gb); |
1385 | if(chroma_weight_flag){ | |
1386 | int j; | |
1387 | for(j=0; j<2; j++){ | |
3d9137c8 MN |
1388 | h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb); |
1389 | h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb); | |
1390 | if( h->chroma_weight[i][list][j][0] != chroma_def | |
1391 | || h->chroma_weight[i][list][j][1] != 0) { | |
fef744d4 | 1392 | h->use_weight_chroma= 1; |
cb99c652 GB |
1393 | h->chroma_weight_flag[list]= 1; |
1394 | } | |
fef744d4 MN |
1395 | } |
1396 | }else{ | |
1397 | int j; | |
1398 | for(j=0; j<2; j++){ | |
3d9137c8 MN |
1399 | h->chroma_weight[i][list][j][0]= chroma_def; |
1400 | h->chroma_weight[i][list][j][1]= 0; | |
fef744d4 | 1401 | } |
0da71265 MN |
1402 | } |
1403 | } | |
1404 | } | |
9f5c1037 | 1405 | if(h->slice_type_nos != FF_B_TYPE) break; |
0da71265 | 1406 | } |
9f2d1b4f | 1407 | h->use_weight= h->use_weight || h->use_weight_chroma; |
0da71265 MN |
1408 | return 0; |
1409 | } | |
1410 | ||
1052b76f MN |
1411 | /** |
1412 | * Initialize implicit_weight table. | |
1413 | * @param field, 0/1 initialize the weight for interlaced MBAFF | |
1414 | * -1 initializes the rest | |
1415 | */ | |
1416 | static void implicit_weight_table(H264Context *h, int field){ | |
9f2d1b4f | 1417 | MpegEncContext * const s = &h->s; |
1052b76f | 1418 | int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1; |
9f2d1b4f | 1419 | |
ce09f927 GB |
1420 | for (i = 0; i < 2; i++) { |
1421 | h->luma_weight_flag[i] = 0; | |
1422 | h->chroma_weight_flag[i] = 0; | |
1423 | } | |
1424 | ||
1052b76f MN |
1425 | if(field < 0){ |
1426 | cur_poc = s->current_picture_ptr->poc; | |
1427 | if( h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF | |
9f2d1b4f LM |
1428 | && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){ |
1429 | h->use_weight= 0; | |
1430 | h->use_weight_chroma= 0; | |
1431 | return; | |
1432 | } | |
1052b76f MN |
1433 | ref_start= 0; |
1434 | ref_count0= h->ref_count[0]; | |
1435 | ref_count1= h->ref_count[1]; | |
1436 | }else{ | |
1437 | cur_poc = s->current_picture_ptr->field_poc[field]; | |
1438 | ref_start= 16; | |
1439 | ref_count0= 16+2*h->ref_count[0]; | |
1440 | ref_count1= 16+2*h->ref_count[1]; | |
1441 | } | |
9f2d1b4f LM |
1442 | |
1443 | h->use_weight= 2; | |
1444 | h->use_weight_chroma= 2; | |
1445 | h->luma_log2_weight_denom= 5; | |
1446 | h->chroma_log2_weight_denom= 5; | |
1447 | ||
1052b76f | 1448 | for(ref0=ref_start; ref0 < ref_count0; ref0++){ |
9f2d1b4f | 1449 | int poc0 = h->ref_list[0][ref0].poc; |
1052b76f | 1450 | for(ref1=ref_start; ref1 < ref_count1; ref1++){ |
738386a5 | 1451 | int poc1 = h->ref_list[1][ref1].poc; |
f66e4f5f | 1452 | int td = av_clip(poc1 - poc0, -128, 127); |
1052b76f | 1453 | int w= 32; |
9f2d1b4f | 1454 | if(td){ |
f66e4f5f | 1455 | int tb = av_clip(cur_poc - poc0, -128, 127); |
c26abfa5 | 1456 | int tx = (16384 + (FFABS(td) >> 1)) / td; |
72f86ec0 MN |
1457 | int dist_scale_factor = (tb*tx + 32) >> 8; |
1458 | if(dist_scale_factor >= -64 && dist_scale_factor <= 128) | |
1052b76f MN |
1459 | w = 64 - dist_scale_factor; |
1460 | } | |
1461 | if(field<0){ | |
1462 | h->implicit_weight[ref0][ref1][0]= | |
1463 | h->implicit_weight[ref0][ref1][1]= w; | |
1464 | }else{ | |
1465 | h->implicit_weight[ref0][ref1][field]=w; | |
72f86ec0 | 1466 | } |
9f2d1b4f LM |
1467 | } |
1468 | } | |
1469 | } | |
1470 | ||
8fd57a66 | 1471 | /** |
5175b937 | 1472 | * instantaneous decoder refresh. |
0da71265 MN |
1473 | */ |
1474 | static void idr(H264Context *h){ | |
ea6f00c4 | 1475 | ff_h264_remove_all_refs(h); |
a149c1a5 | 1476 | h->prev_frame_num= 0; |
80f8e035 MN |
1477 | h->prev_frame_num_offset= 0; |
1478 | h->prev_poc_msb= | |
1479 | h->prev_poc_lsb= 0; | |
0da71265 MN |
1480 | } |
1481 | ||
7c33ad19 LM |
1482 | /* forget old pics after a seek */ |
1483 | static void flush_dpb(AVCodecContext *avctx){ | |
1484 | H264Context *h= avctx->priv_data; | |
1485 | int i; | |
64b9d48f | 1486 | for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) { |
285b570f LM |
1487 | if(h->delayed_pic[i]) |
1488 | h->delayed_pic[i]->reference= 0; | |
7c33ad19 | 1489 | h->delayed_pic[i]= NULL; |
285b570f | 1490 | } |
df8a7dff | 1491 | h->outputed_poc= INT_MIN; |
b19d493f | 1492 | h->prev_interlaced_frame = 1; |
7c33ad19 | 1493 | idr(h); |
ca159196 MR |
1494 | if(h->s.current_picture_ptr) |
1495 | h->s.current_picture_ptr->reference= 0; | |
12d96de3 | 1496 | h->s.first_field= 0; |
9c095463 | 1497 | ff_h264_reset_sei(h); |
e240f898 | 1498 | ff_mpeg_flush(avctx); |
7c33ad19 LM |
1499 | } |
1500 | ||
0da71265 MN |
1501 | static int init_poc(H264Context *h){ |
1502 | MpegEncContext * const s = &h->s; | |
1503 | const int max_frame_num= 1<<h->sps.log2_max_frame_num; | |
1504 | int field_poc[2]; | |
357282c6 | 1505 | Picture *cur = s->current_picture_ptr; |
0da71265 | 1506 | |
b78a6baa | 1507 | h->frame_num_offset= h->prev_frame_num_offset; |
5710b371 | 1508 | if(h->frame_num < h->prev_frame_num) |
b78a6baa | 1509 | h->frame_num_offset += max_frame_num; |
0da71265 MN |
1510 | |
1511 | if(h->sps.poc_type==0){ | |
1512 | const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb; | |
1513 | ||
1514 | if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2) | |
1515 | h->poc_msb = h->prev_poc_msb + max_poc_lsb; | |
1516 | else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2) | |
1517 | h->poc_msb = h->prev_poc_msb - max_poc_lsb; | |
1518 | else | |
1519 | h->poc_msb = h->prev_poc_msb; | |
1520 | //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb); | |
115329f1 | 1521 | field_poc[0] = |
0da71265 | 1522 | field_poc[1] = h->poc_msb + h->poc_lsb; |
115329f1 | 1523 | if(s->picture_structure == PICT_FRAME) |
0da71265 MN |
1524 | field_poc[1] += h->delta_poc_bottom; |
1525 | }else if(h->sps.poc_type==1){ | |
1526 | int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc; | |
1527 | int i; | |
1528 | ||
1529 | if(h->sps.poc_cycle_length != 0) | |
1530 | abs_frame_num = h->frame_num_offset + h->frame_num; | |
1531 | else | |
1532 | abs_frame_num = 0; | |
1533 | ||
1534 | if(h->nal_ref_idc==0 && abs_frame_num > 0) | |
1535 | abs_frame_num--; | |
115329f1 | 1536 | |
0da71265 MN |
1537 | expected_delta_per_poc_cycle = 0; |
1538 | for(i=0; i < h->sps.poc_cycle_length; i++) | |
1539 | expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse | |
1540 | ||
1541 | if(abs_frame_num > 0){ | |
1542 | int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length; | |
1543 | int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length; | |
1544 | ||
1545 | expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle; | |
1546 | for(i = 0; i <= frame_num_in_poc_cycle; i++) | |
1547 | expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ]; | |
1548 | } else | |
1549 | expectedpoc = 0; | |
1550 | ||
115329f1 | 1551 | if(h->nal_ref_idc == 0) |
0da71265 | 1552 | expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic; |
115329f1 | 1553 | |
0da71265 MN |
1554 | field_poc[0] = expectedpoc + h->delta_poc[0]; |
1555 | field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field; | |
1556 | ||
1557 | if(s->picture_structure == PICT_FRAME) | |
1558 | field_poc[1] += h->delta_poc[1]; | |
1559 | }else{ | |
b78a6baa | 1560 | int poc= 2*(h->frame_num_offset + h->frame_num); |
5710b371 | 1561 | |
b78a6baa MN |
1562 | if(!h->nal_ref_idc) |
1563 | poc--; | |
5710b371 | 1564 | |
0da71265 MN |
1565 | field_poc[0]= poc; |
1566 | field_poc[1]= poc; | |
1567 | } | |
115329f1 | 1568 | |
357282c6 | 1569 | if(s->picture_structure != PICT_BOTTOM_FIELD) |
0da71265 | 1570 | s->current_picture_ptr->field_poc[0]= field_poc[0]; |
357282c6 | 1571 | if(s->picture_structure != PICT_TOP_FIELD) |
0da71265 | 1572 | s->current_picture_ptr->field_poc[1]= field_poc[1]; |
357282c6 | 1573 | cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]); |
0da71265 MN |
1574 | |
1575 | return 0; | |
1576 | } | |
1577 | ||
b41c1db3 AÖ |
1578 | |
1579 | /** | |
1580 | * initialize scan tables | |
1581 | */ | |
1582 | static void init_scan_tables(H264Context *h){ | |
b41c1db3 | 1583 | int i; |
4693b031 | 1584 | if(h->h264dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly |
b41c1db3 AÖ |
1585 | memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t)); |
1586 | memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t)); | |
1587 | }else{ | |
1588 | for(i=0; i<16; i++){ | |
1589 | #define T(x) (x>>2) | ((x<<2) & 0xF) | |
1590 | h->zigzag_scan[i] = T(zigzag_scan[i]); | |
1591 | h-> field_scan[i] = T( field_scan[i]); | |
1592 | #undef T | |
1593 | } | |
1594 | } | |
4693b031 | 1595 | if(h->h264dsp.h264_idct8_add == ff_h264_idct8_add_c){ |
45beb850 | 1596 | memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t)); |
b41c1db3 AÖ |
1597 | memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t)); |
1598 | memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t)); | |
1599 | memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t)); | |
1600 | }else{ | |
1601 | for(i=0; i<64; i++){ | |
1602 | #define T(x) (x>>3) | ((x&7)<<3) | |
45beb850 | 1603 | h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); |
b41c1db3 AÖ |
1604 | h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); |
1605 | h->field_scan8x8[i] = T(field_scan8x8[i]); | |
1606 | h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); | |
1607 | #undef T | |
1608 | } | |
1609 | } | |
1610 | if(h->sps.transform_bypass){ //FIXME same ugly | |
1611 | h->zigzag_scan_q0 = zigzag_scan; | |
45beb850 | 1612 | h->zigzag_scan8x8_q0 = ff_zigzag_direct; |
b41c1db3 AÖ |
1613 | h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; |
1614 | h->field_scan_q0 = field_scan; | |
1615 | h->field_scan8x8_q0 = field_scan8x8; | |
1616 | h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc; | |
1617 | }else{ | |
1618 | h->zigzag_scan_q0 = h->zigzag_scan; | |
1619 | h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; | |
1620 | h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; | |
1621 | h->field_scan_q0 = h->field_scan; | |
1622 | h->field_scan8x8_q0 = h->field_scan8x8; | |
1623 | h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; | |
1624 | } | |
1625 | } | |
afebe2f7 | 1626 | |
256299d3 MN |
1627 | static void field_end(H264Context *h){ |
1628 | MpegEncContext * const s = &h->s; | |
1629 | AVCodecContext * const avctx= s->avctx; | |
1630 | s->mb_y= 0; | |
1631 | ||
1632 | s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264; | |
1633 | s->current_picture_ptr->pict_type= s->pict_type; | |
1634 | ||
1635 | if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) | |
1636 | ff_vdpau_h264_set_reference_frames(s); | |
1637 | ||
1638 | if(!s->dropable) { | |
ea6f00c4 | 1639 | ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
256299d3 MN |
1640 | h->prev_poc_msb= h->poc_msb; |
1641 | h->prev_poc_lsb= h->poc_lsb; | |
1642 | } | |
1643 | h->prev_frame_num_offset= h->frame_num_offset; | |
1644 | h->prev_frame_num= h->frame_num; | |
1645 | ||
1646 | if (avctx->hwaccel) { | |
1647 | if (avctx->hwaccel->end_frame(avctx) < 0) | |
1648 | av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n"); | |
1649 | } | |
1650 | ||
1651 | if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) | |
1652 | ff_vdpau_h264_picture_complete(s); | |
1653 | ||
1654 | /* | |
1655 | * FIXME: Error handling code does not seem to support interlaced | |
1656 | * when slices span multiple rows | |
1657 | * The ff_er_add_slice calls don't work right for bottom | |
1658 | * fields; they cause massive erroneous error concealing | |
1659 | * Error marking covers both fields (top and bottom). | |
1660 | * This causes a mismatched s->error_count | |
1661 | * and a bad error table. Further, the error count goes to | |
1662 | * INT_MAX when called for bottom field, because mb_y is | |
1663 | * past end by one (callers fault) and resync_mb_y != 0 | |
1664 | * causes problems for the first MB line, too. | |
1665 | */ | |
1666 | if (!FIELD_PICTURE) | |
1667 | ff_er_frame_end(s); | |
1668 | ||
1669 | MPV_frame_end(s); | |
d225a1e2 MN |
1670 | |
1671 | h->current_slice=0; | |
256299d3 MN |
1672 | } |
1673 | ||
afebe2f7 AÖ |
1674 | /** |
1675 | * Replicates H264 "master" context to thread contexts. | |
1676 | */ | |
1677 | static void clone_slice(H264Context *dst, H264Context *src) | |
1678 | { | |
1679 | memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset)); | |
1680 | dst->s.current_picture_ptr = src->s.current_picture_ptr; | |
1681 | dst->s.current_picture = src->s.current_picture; | |
1682 | dst->s.linesize = src->s.linesize; | |
1683 | dst->s.uvlinesize = src->s.uvlinesize; | |
12d96de3 | 1684 | dst->s.first_field = src->s.first_field; |
afebe2f7 AÖ |
1685 | |
1686 | dst->prev_poc_msb = src->prev_poc_msb; | |
1687 | dst->prev_poc_lsb = src->prev_poc_lsb; | |
1688 | dst->prev_frame_num_offset = src->prev_frame_num_offset; | |
1689 | dst->prev_frame_num = src->prev_frame_num; | |
1690 | dst->short_ref_count = src->short_ref_count; | |
1691 | ||
1692 | memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref)); | |
1693 | memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref)); | |
1694 | memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list)); | |
1695 | memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list)); | |
50c21814 AÖ |
1696 | |
1697 | memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff)); | |
1698 | memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff)); | |
afebe2f7 AÖ |
1699 | } |
1700 | ||
0da71265 MN |
1701 | /** |
1702 | * decodes a slice header. | |
9c852bcf | 1703 | * This will also call MPV_common_init() and frame_start() as needed. |
afebe2f7 AÖ |
1704 | * |
1705 | * @param h h264context | |
1706 | * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding) | |
1707 | * | |
d9526386 | 1708 | * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded |
0da71265 | 1709 | */ |
afebe2f7 | 1710 | static int decode_slice_header(H264Context *h, H264Context *h0){ |
0da71265 | 1711 | MpegEncContext * const s = &h->s; |
12d96de3 | 1712 | MpegEncContext * const s0 = &h0->s; |
88e7a4d1 | 1713 | unsigned int first_mb_in_slice; |
ac658be5 | 1714 | unsigned int pps_id; |
0da71265 | 1715 | int num_ref_idx_active_override_flag; |
41f5c62f | 1716 | unsigned int slice_type, tmp, i, j; |
0bf79634 | 1717 | int default_ref_list_done = 0; |
12d96de3 | 1718 | int last_pic_structure; |
0da71265 | 1719 | |
2f944356 | 1720 | s->dropable= h->nal_ref_idc == 0; |
0da71265 | 1721 | |
cf653d08 JD |
1722 | if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){ |
1723 | s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab; | |
1724 | s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab; | |
1725 | }else{ | |
1726 | s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab; | |
1727 | s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab; | |
1728 | } | |
1729 | ||
0da71265 MN |
1730 | first_mb_in_slice= get_ue_golomb(&s->gb); |
1731 | ||
d225a1e2 MN |
1732 | if(first_mb_in_slice == 0){ //FIXME better field boundary detection |
1733 | if(h0->current_slice && FIELD_PICTURE){ | |
1734 | field_end(h); | |
1735 | } | |
1736 | ||
afebe2f7 | 1737 | h0->current_slice = 0; |
12d96de3 | 1738 | if (!s0->first_field) |
f6e3c460 | 1739 | s->current_picture_ptr= NULL; |
66a4b2c1 MN |
1740 | } |
1741 | ||
9963b332 | 1742 | slice_type= get_ue_golomb_31(&s->gb); |
0bf79634 | 1743 | if(slice_type > 9){ |
9b879566 | 1744 | av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y); |
5175b937 | 1745 | return -1; |
0da71265 | 1746 | } |
0bf79634 LLL |
1747 | if(slice_type > 4){ |
1748 | slice_type -= 5; | |
0da71265 MN |
1749 | h->slice_type_fixed=1; |
1750 | }else | |
1751 | h->slice_type_fixed=0; | |
115329f1 | 1752 | |
ee2a957f | 1753 | slice_type= golomb_to_pict_type[ slice_type ]; |
9701840b | 1754 | if (slice_type == FF_I_TYPE |
afebe2f7 | 1755 | || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) { |
0bf79634 LLL |
1756 | default_ref_list_done = 1; |
1757 | } | |
1758 | h->slice_type= slice_type; | |
e3e6f18f | 1759 | h->slice_type_nos= slice_type & 3; |
0bf79634 | 1760 | |
1412060e | 1761 | s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though |
115329f1 | 1762 | |
0da71265 | 1763 | pps_id= get_ue_golomb(&s->gb); |
ac658be5 | 1764 | if(pps_id>=MAX_PPS_COUNT){ |
9b879566 | 1765 | av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n"); |
0da71265 MN |
1766 | return -1; |
1767 | } | |
afebe2f7 | 1768 | if(!h0->pps_buffers[pps_id]) { |
a0f80050 | 1769 | av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id); |
8b92b792 MN |
1770 | return -1; |
1771 | } | |
afebe2f7 | 1772 | h->pps= *h0->pps_buffers[pps_id]; |
8b92b792 | 1773 | |
afebe2f7 | 1774 | if(!h0->sps_buffers[h->pps.sps_id]) { |
a0f80050 | 1775 | av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id); |
8b92b792 MN |
1776 | return -1; |
1777 | } | |
afebe2f7 | 1778 | h->sps = *h0->sps_buffers[h->pps.sps_id]; |
239ea04c | 1779 | |
50c21814 | 1780 | if(h == h0 && h->dequant_coeff_pps != pps_id){ |
50eaa857 | 1781 | h->dequant_coeff_pps = pps_id; |
239ea04c LM |
1782 | init_dequant_tables(h); |
1783 | } | |
115329f1 | 1784 | |
0da71265 | 1785 | s->mb_width= h->sps.mb_width; |
6867a90b | 1786 | s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); |
115329f1 | 1787 | |
bf4665ee | 1788 | h->b_stride= s->mb_width*4; |
0da71265 | 1789 | |
faf3dfb9 | 1790 | s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7); |
0da71265 | 1791 | if(h->sps.frame_mbs_only_flag) |
faf3dfb9 | 1792 | s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7); |
0da71265 | 1793 | else |
faf3dfb9 | 1794 | s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3); |
115329f1 DB |
1795 | |
1796 | if (s->context_initialized | |
5388f0b4 JK |
1797 | && ( s->width != s->avctx->width || s->height != s->avctx->height |
1798 | || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) { | |
afebe2f7 AÖ |
1799 | if(h != h0) |
1800 | return -1; // width / height changed during parallelized decoding | |
0da71265 | 1801 | free_tables(h); |
ff7f75e1 | 1802 | flush_dpb(s->avctx); |
0da71265 MN |
1803 | MPV_common_end(s); |
1804 | } | |
1805 | if (!s->context_initialized) { | |
afebe2f7 AÖ |
1806 | if(h != h0) |
1807 | return -1; // we cant (re-)initialize context during parallel decoding | |
f3bdc3da RD |
1808 | |
1809 | avcodec_set_dimensions(s->avctx, s->width, s->height); | |
1810 | s->avctx->sample_aspect_ratio= h->sps.sar; | |
1811 | if(!s->avctx->sample_aspect_ratio.den) | |
1812 | s->avctx->sample_aspect_ratio.den = 1; | |
1813 | ||
c4dffe7e DC |
1814 | if(h->sps.video_signal_type_present_flag){ |
1815 | s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; | |
1816 | if(h->sps.colour_description_present_flag){ | |
1817 | s->avctx->color_primaries = h->sps.color_primaries; | |
1818 | s->avctx->color_trc = h->sps.color_trc; | |
1819 | s->avctx->colorspace = h->sps.colorspace; | |
1820 | } | |
1821 | } | |
1822 | ||
f3bdc3da | 1823 | if(h->sps.timing_info_present_flag){ |
3102d180 | 1824 | int64_t den= h->sps.time_scale; |
055a6aa7 | 1825 | if(h->x264_build < 44U) |
3102d180 | 1826 | den *= 2; |
f3bdc3da | 1827 | av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, |
3102d180 | 1828 | h->sps.num_units_in_tick, den, 1<<30); |
f3bdc3da RD |
1829 | } |
1830 | s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts); | |
1831 | s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt); | |
1832 | ||
0da71265 MN |
1833 | if (MPV_common_init(s) < 0) |
1834 | return -1; | |
12d96de3 | 1835 | s->first_field = 0; |
b19d493f | 1836 | h->prev_interlaced_frame = 1; |
115329f1 | 1837 | |
b41c1db3 | 1838 | init_scan_tables(h); |
903d58f6 | 1839 | ff_h264_alloc_tables(h); |
0da71265 | 1840 | |
afebe2f7 AÖ |
1841 | for(i = 1; i < s->avctx->thread_count; i++) { |
1842 | H264Context *c; | |
1843 | c = h->thread_context[i] = av_malloc(sizeof(H264Context)); | |
79db7ac6 | 1844 | memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext)); |
afebe2f7 | 1845 | memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext)); |
7a5c850b | 1846 | c->h264dsp = h->h264dsp; |
afebe2f7 AÖ |
1847 | c->sps = h->sps; |
1848 | c->pps = h->pps; | |
1849 | init_scan_tables(c); | |
145061a1 | 1850 | clone_tables(c, h, i); |
afebe2f7 AÖ |
1851 | } |
1852 | ||
1853 | for(i = 0; i < s->avctx->thread_count; i++) | |
1854 | if(context_init(h->thread_context[i]) < 0) | |
1855 | return -1; | |
0da71265 MN |
1856 | } |
1857 | ||
0da71265 MN |
1858 | h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); |
1859 | ||
5d18eaad | 1860 | h->mb_mbaff = 0; |
6ba71fc4 | 1861 | h->mb_aff_frame = 0; |
12d96de3 | 1862 | last_pic_structure = s0->picture_structure; |
0da71265 MN |
1863 | if(h->sps.frame_mbs_only_flag){ |
1864 | s->picture_structure= PICT_FRAME; | |
1865 | }else{ | |
6ba71fc4 | 1866 | if(get_bits1(&s->gb)) { //field_pic_flag |
0da71265 | 1867 | s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag |
6ba71fc4 | 1868 | } else { |
0da71265 | 1869 | s->picture_structure= PICT_FRAME; |
6ba71fc4 | 1870 | h->mb_aff_frame = h->sps.mb_aff; |
6867a90b | 1871 | } |
0da71265 | 1872 | } |
44e9dcf1 | 1873 | h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME; |
2ddcf84b JD |
1874 | |
1875 | if(h0->current_slice == 0){ | |
26b86e47 MN |
1876 | while(h->frame_num != h->prev_frame_num && |
1877 | h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){ | |
1878 | av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num); | |
903d58f6 | 1879 | if (ff_h264_frame_start(h) < 0) |
66e6038c | 1880 | return -1; |
26b86e47 MN |
1881 | h->prev_frame_num++; |
1882 | h->prev_frame_num %= 1<<h->sps.log2_max_frame_num; | |
1883 | s->current_picture_ptr->frame_num= h->prev_frame_num; | |
ea6f00c4 | 1884 | ff_h264_execute_ref_pic_marking(h, NULL, 0); |
26b86e47 MN |
1885 | } |
1886 | ||
12d96de3 JD |
1887 | /* See if we have a decoded first field looking for a pair... */ |
1888 | if (s0->first_field) { | |
1889 | assert(s0->current_picture_ptr); | |
1890 | assert(s0->current_picture_ptr->data[0]); | |
1891 | assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF); | |
1892 | ||
1893 | /* figure out if we have a complementary field pair */ | |
1894 | if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) { | |
1895 | /* | |
1896 | * Previous field is unmatched. Don't display it, but let it | |
1897 | * remain for reference if marked as such. | |
1898 | */ | |
1899 | s0->current_picture_ptr = NULL; | |
1900 | s0->first_field = FIELD_PICTURE; | |
1901 | ||
1902 | } else { | |
1903 | if (h->nal_ref_idc && | |
1904 | s0->current_picture_ptr->reference && | |
1905 | s0->current_picture_ptr->frame_num != h->frame_num) { | |
1906 | /* | |
1907 | * This and previous field were reference, but had | |
1908 | * different frame_nums. Consider this field first in | |
1909 | * pair. Throw away previous field except for reference | |
1910 | * purposes. | |
1911 | */ | |
1912 | s0->first_field = 1; | |
1913 | s0->current_picture_ptr = NULL; | |
1914 | ||
1915 | } else { | |
1916 | /* Second field in complementary pair */ | |
1917 | s0->first_field = 0; | |
1918 | } | |
1919 | } | |
1920 | ||
1921 | } else { | |
1922 | /* Frame or first field in a potentially complementary pair */ | |
1923 | assert(!s0->current_picture_ptr); | |
1924 | s0->first_field = FIELD_PICTURE; | |
1925 | } | |
1926 | ||
903d58f6 | 1927 | if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) { |
12d96de3 | 1928 | s0->first_field = 0; |
2ddcf84b | 1929 | return -1; |
12d96de3 | 1930 | } |
2ddcf84b JD |
1931 | } |
1932 | if(h != h0) | |
1933 | clone_slice(h, h0); | |
1934 | ||
1935 | s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup | |
1936 | ||
88e7a4d1 | 1937 | assert(s->mb_num == s->mb_width * s->mb_height); |
f3e53d9f | 1938 | if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num || |
88e7a4d1 MN |
1939 | first_mb_in_slice >= s->mb_num){ |
1940 | av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n"); | |
6b53b87e MN |
1941 | return -1; |
1942 | } | |
88e7a4d1 | 1943 | s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width; |
f3e53d9f JD |
1944 | s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE; |
1945 | if (s->picture_structure == PICT_BOTTOM_FIELD) | |
1946 | s->resync_mb_y = s->mb_y = s->mb_y + 1; | |
88e7a4d1 | 1947 | assert(s->mb_y < s->mb_height); |
115329f1 | 1948 | |
0da71265 MN |
1949 | if(s->picture_structure==PICT_FRAME){ |
1950 | h->curr_pic_num= h->frame_num; | |
1951 | h->max_pic_num= 1<< h->sps.log2_max_frame_num; | |
1952 | }else{ | |
f57e2af6 | 1953 | h->curr_pic_num= 2*h->frame_num + 1; |
0da71265 MN |
1954 | h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1); |
1955 | } | |
115329f1 | 1956 | |
0da71265 | 1957 | if(h->nal_unit_type == NAL_IDR_SLICE){ |
1df1df0b | 1958 | get_ue_golomb(&s->gb); /* idr_pic_id */ |
0da71265 | 1959 | } |
115329f1 | 1960 | |
0da71265 MN |
1961 | if(h->sps.poc_type==0){ |
1962 | h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb); | |
115329f1 | 1963 | |
0da71265 MN |
1964 | if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){ |
1965 | h->delta_poc_bottom= get_se_golomb(&s->gb); | |
1966 | } | |
1967 | } | |
115329f1 | 1968 | |
0da71265 MN |
1969 | if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){ |
1970 | h->delta_poc[0]= get_se_golomb(&s->gb); | |
115329f1 | 1971 | |
0da71265 MN |
1972 | if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME) |
1973 | h->delta_poc[1]= get_se_golomb(&s->gb); | |
1974 | } | |
115329f1 | 1975 | |
0da71265 | 1976 | init_poc(h); |
115329f1 | 1977 | |
0da71265 MN |
1978 | if(h->pps.redundant_pic_cnt_present){ |
1979 | h->redundant_pic_count= get_ue_golomb(&s->gb); | |
1980 | } | |
1981 | ||
1412060e | 1982 | //set defaults, might be overridden a few lines later |
0da71265 MN |
1983 | h->ref_count[0]= h->pps.ref_count[0]; |
1984 | h->ref_count[1]= h->pps.ref_count[1]; | |
1985 | ||
e3e6f18f | 1986 | if(h->slice_type_nos != FF_I_TYPE){ |
9f5c1037 | 1987 | if(h->slice_type_nos == FF_B_TYPE){ |
0da71265 MN |
1988 | h->direct_spatial_mv_pred= get_bits1(&s->gb); |
1989 | } | |
1990 | num_ref_idx_active_override_flag= get_bits1(&s->gb); | |
115329f1 | 1991 | |
0da71265 MN |
1992 | if(num_ref_idx_active_override_flag){ |
1993 | h->ref_count[0]= get_ue_golomb(&s->gb) + 1; | |
9f5c1037 | 1994 | if(h->slice_type_nos==FF_B_TYPE) |
0da71265 MN |
1995 | h->ref_count[1]= get_ue_golomb(&s->gb) + 1; |
1996 | ||
187696fa | 1997 | if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){ |
9b879566 | 1998 | av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n"); |
88e7a4d1 | 1999 | h->ref_count[0]= h->ref_count[1]= 1; |
0da71265 MN |
2000 | return -1; |
2001 | } | |
2002 | } | |
9f5c1037 | 2003 | if(h->slice_type_nos == FF_B_TYPE) |
187696fa MN |
2004 | h->list_count= 2; |
2005 | else | |
2006 | h->list_count= 1; | |
2007 | }else | |
2008 | h->list_count= 0; | |
0da71265 | 2009 | |
0bf79634 | 2010 | if(!default_ref_list_done){ |
ea6f00c4 | 2011 | ff_h264_fill_default_ref_list(h); |
0da71265 MN |
2012 | } |
2013 | ||
ea6f00c4 | 2014 | if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0) |
806bb93f | 2015 | return -1; |
0da71265 | 2016 | |
07dff5c7 MN |
2017 | if(h->slice_type_nos!=FF_I_TYPE){ |
2018 | s->last_picture_ptr= &h->ref_list[0][0]; | |
8d2fc163 | 2019 | ff_copy_picture(&s->last_picture, s->last_picture_ptr); |
07dff5c7 MN |
2020 | } |
2021 | if(h->slice_type_nos==FF_B_TYPE){ | |
2022 | s->next_picture_ptr= &h->ref_list[1][0]; | |
8d2fc163 | 2023 | ff_copy_picture(&s->next_picture, s->next_picture_ptr); |
07dff5c7 MN |
2024 | } |
2025 | ||
932f396f | 2026 | if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE ) |
9f5c1037 | 2027 | || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) ) |
0da71265 | 2028 | pred_weight_table(h); |
1a29c6a0 | 2029 | else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){ |
1052b76f | 2030 | implicit_weight_table(h, -1); |
1a29c6a0 | 2031 | }else { |
9f2d1b4f | 2032 | h->use_weight = 0; |
cb99c652 GB |
2033 | for (i = 0; i < 2; i++) { |
2034 | h->luma_weight_flag[i] = 0; | |
2035 | h->chroma_weight_flag[i] = 0; | |
2036 | } | |
2037 | } | |
115329f1 | 2038 | |
2ddcf84b | 2039 | if(h->nal_ref_idc) |
ea6f00c4 | 2040 | ff_h264_decode_ref_pic_marking(h0, &s->gb); |
0da71265 | 2041 | |
1052b76f | 2042 | if(FRAME_MBAFF){ |
ea6f00c4 | 2043 | ff_h264_fill_mbaff_ref_list(h); |
5d18eaad | 2044 | |
1052b76f MN |
2045 | if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){ |
2046 | implicit_weight_table(h, 0); | |
2047 | implicit_weight_table(h, 1); | |
2048 | } | |
2049 | } | |
2050 | ||
8f56e219 | 2051 | if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred) |
943f69a6 MN |
2052 | ff_h264_direct_dist_scale_factor(h); |
2053 | ff_h264_direct_ref_list_init(h); | |
8f56e219 | 2054 | |
e3e6f18f | 2055 | if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){ |
9963b332 | 2056 | tmp = get_ue_golomb_31(&s->gb); |
88e7a4d1 MN |
2057 | if(tmp > 2){ |
2058 | av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n"); | |
2059 | return -1; | |
2060 | } | |
2061 | h->cabac_init_idc= tmp; | |
2062 | } | |
e5017ab8 LA |
2063 | |
2064 | h->last_qscale_diff = 0; | |
88e7a4d1 MN |
2065 | tmp = h->pps.init_qp + get_se_golomb(&s->gb); |
2066 | if(tmp>51){ | |
2067 | av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp); | |
3ebc7e04 MN |
2068 | return -1; |
2069 | } | |
88e7a4d1 | 2070 | s->qscale= tmp; |
4691a77d AÖ |
2071 | h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); |
2072 | h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); | |
0da71265 | 2073 | //FIXME qscale / qp ... stuff |
9701840b | 2074 | if(h->slice_type == FF_SP_TYPE){ |
1df1df0b | 2075 | get_bits1(&s->gb); /* sp_for_switch_flag */ |
0da71265 | 2076 | } |
9701840b | 2077 | if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){ |
1df1df0b | 2078 | get_se_golomb(&s->gb); /* slice_qs_delta */ |
0da71265 MN |
2079 | } |
2080 | ||
53c05b1e | 2081 | h->deblocking_filter = 1; |
0c32e19d MN |
2082 | h->slice_alpha_c0_offset = 52; |
2083 | h->slice_beta_offset = 52; | |
0da71265 | 2084 | if( h->pps.deblocking_filter_parameters_present ) { |
9963b332 | 2085 | tmp= get_ue_golomb_31(&s->gb); |
88e7a4d1 MN |
2086 | if(tmp > 2){ |
2087 | av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp); | |
2088 | return -1; | |
2089 | } | |
2090 | h->deblocking_filter= tmp; | |
115329f1 | 2091 | if(h->deblocking_filter < 2) |
53c05b1e MN |
2092 | h->deblocking_filter^= 1; // 1<->0 |
2093 | ||
2094 | if( h->deblocking_filter ) { | |
0c32e19d MN |
2095 | h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1; |
2096 | h->slice_beta_offset += get_se_golomb(&s->gb) << 1; | |
2097 | if( h->slice_alpha_c0_offset > 104U | |
2098 | || h->slice_beta_offset > 104U){ | |
2099 | av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset); | |
2100 | return -1; | |
2101 | } | |
0da71265 | 2102 | } |
980a82b7 | 2103 | } |
afebe2f7 | 2104 | |
61858a76 | 2105 | if( s->avctx->skip_loop_filter >= AVDISCARD_ALL |
4b30289e | 2106 | ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE) |
9f5c1037 | 2107 | ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE) |
61858a76 RD |
2108 | ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) |
2109 | h->deblocking_filter= 0; | |
2110 | ||
afebe2f7 | 2111 | if(h->deblocking_filter == 1 && h0->max_contexts > 1) { |
ec970c21 AÖ |
2112 | if(s->avctx->flags2 & CODEC_FLAG2_FAST) { |
2113 | /* Cheat slightly for speed: | |
5d81d641 | 2114 | Do not bother to deblock across slices. */ |
ec970c21 AÖ |
2115 | h->deblocking_filter = 2; |
2116 | } else { | |
7ae94d52 AÖ |
2117 | h0->max_contexts = 1; |
2118 | if(!h0->single_decode_warning) { | |
2119 | av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); | |
2120 | h0->single_decode_warning = 1; | |
2121 | } | |
2122 | if(h != h0) | |
2123 | return 1; // deblocking switched inside frame | |
ec970c21 | 2124 | } |
afebe2f7 | 2125 | } |
0c32e19d | 2126 | h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]); |
afebe2f7 | 2127 | |
0da71265 MN |
2128 | #if 0 //FMO |
2129 | if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5) | |
2130 | slice_group_change_cycle= get_bits(&s->gb, ?); | |
2131 | #endif | |
2132 | ||
afebe2f7 AÖ |
2133 | h0->last_slice_type = slice_type; |
2134 | h->slice_num = ++h0->current_slice; | |
b735aeea MN |
2135 | if(h->slice_num >= MAX_SLICES){ |
2136 | av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n"); | |
2137 | } | |
5175b937 | 2138 | |
c32867b5 | 2139 | for(j=0; j<2; j++){ |
6d7e6b26 | 2140 | int id_list[16]; |
b735aeea | 2141 | int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j]; |
6d7e6b26 MN |
2142 | for(i=0; i<16; i++){ |
2143 | id_list[i]= 60; | |
2144 | if(h->ref_list[j][i].data[0]){ | |
2145 | int k; | |
2146 | uint8_t *base= h->ref_list[j][i].base[0]; | |
2147 | for(k=0; k<h->short_ref_count; k++) | |
2148 | if(h->short_ref[k]->base[0] == base){ | |
2149 | id_list[i]= k; | |
2150 | break; | |
2151 | } | |
2152 | for(k=0; k<h->long_ref_count; k++) | |
2153 | if(h->long_ref[k] && h->long_ref[k]->base[0] == base){ | |
2154 | id_list[i]= h->short_ref_count + k; | |
2155 | break; | |
2156 | } | |
2157 | } | |
2158 | } | |
2159 | ||
c32867b5 MN |
2160 | ref2frm[0]= |
2161 | ref2frm[1]= -1; | |
d50cdd82 | 2162 | for(i=0; i<16; i++) |
6d7e6b26 | 2163 | ref2frm[i+2]= 4*id_list[i] |
c32867b5 | 2164 | +(h->ref_list[j][i].reference&3); |
d50cdd82 MN |
2165 | ref2frm[18+0]= |
2166 | ref2frm[18+1]= -1; | |
2167 | for(i=16; i<48; i++) | |
6d7e6b26 | 2168 | ref2frm[i+4]= 4*id_list[(i-16)>>1] |
d50cdd82 | 2169 | +(h->ref_list[j][i].reference&3); |
c32867b5 MN |
2170 | } |
2171 | ||
5d18eaad | 2172 | h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; |
8a11a969 | 2173 | h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width; |
5d18eaad | 2174 | |
802e9146 MN |
2175 | s->avctx->refs= h->sps.ref_frame_count; |
2176 | ||
0da71265 | 2177 | if(s->avctx->debug&FF_DEBUG_PICT_INFO){ |
49573a87 | 2178 | av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n", |
6867a90b LLL |
2179 | h->slice_num, |
2180 | (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"), | |
115329f1 | 2181 | first_mb_in_slice, |
49573a87 | 2182 | av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "", |
0da71265 MN |
2183 | pps_id, h->frame_num, |
2184 | s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1], | |
2185 | h->ref_count[0], h->ref_count[1], | |
2186 | s->qscale, | |
0c32e19d | 2187 | h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26, |
9f2d1b4f | 2188 | h->use_weight, |
4806b922 MN |
2189 | h->use_weight==1 && h->use_weight_chroma ? "c" : "", |
2190 | h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "" | |
0da71265 MN |
2191 | ); |
2192 | } | |
2193 | ||
2194 | return 0; | |
2195 | } | |
2196 | ||
0dc343d4 | 2197 | int ff_h264_get_slice_type(const H264Context *h) |
75dd6938 LA |
2198 | { |
2199 | switch (h->slice_type) { | |
2200 | case FF_P_TYPE: return 0; | |
2201 | case FF_B_TYPE: return 1; | |
2202 | case FF_I_TYPE: return 2; | |
2203 | case FF_SP_TYPE: return 3; | |
2204 | case FF_SI_TYPE: return 4; | |
2205 | default: return -1; | |
2206 | } | |
2207 | } | |
2208 | ||
d02bb3ec DB |
2209 | /** |
2210 | * | |
2211 | * @return non zero if the loop filter can be skiped | |
2212 | */ | |
2213 | static int fill_filter_caches(H264Context *h, int mb_type){ | |
2214 | MpegEncContext * const s = &h->s; | |
2215 | const int mb_xy= h->mb_xy; | |
2216 | int top_xy, left_xy[2]; | |
2217 | int top_type, left_type[2]; | |
2218 | ||
2219 | top_xy = mb_xy - (s->mb_stride << MB_FIELD); | |
2220 | ||
2221 | //FIXME deblocking could skip the intra and nnz parts. | |
2222 | ||
2223 | /* Wow, what a mess, why didn't they simplify the interlacing & intra | |
2224 | * stuff, I can't imagine that these complex rules are worth it. */ | |
2225 | ||
2226 | left_xy[1] = left_xy[0] = mb_xy-1; | |
2227 | if(FRAME_MBAFF){ | |
2228 | const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); | |
2229 | const int curr_mb_field_flag = IS_INTERLACED(mb_type); | |
2230 | if(s->mb_y&1){ | |
2231 | if (left_mb_field_flag != curr_mb_field_flag) { | |
2232 | left_xy[0] -= s->mb_stride; | |
2233 | } | |
2234 | }else{ | |
2235 | if(curr_mb_field_flag){ | |
2236 | top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); | |
2237 | } | |
2238 | if (left_mb_field_flag != curr_mb_field_flag) { | |
2239 | left_xy[1] += s->mb_stride; | |
2240 | } | |
2241 | } | |
2242 | } | |
2243 | ||
2244 | h->top_mb_xy = top_xy; | |
2245 | h->left_mb_xy[0] = left_xy[0]; | |
2246 | h->left_mb_xy[1] = left_xy[1]; | |
2247 | { | |
2248 | //for sufficiently low qp, filtering wouldn't do anything | |
2249 | //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp | |
2250 | int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice | |
2251 | int qp = s->current_picture.qscale_table[mb_xy]; | |
2252 | if(qp <= qp_thresh | |
2253 | && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh) | |
2254 | && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){ | |
2255 | if(!FRAME_MBAFF) | |
2256 | return 1; | |
2257 | if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh) | |
2258 | && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) | |
2259 | return 1; | |
2260 | } | |
2261 | } | |
2262 | ||
2263 | top_type = s->current_picture.mb_type[top_xy] ; | |
2264 | left_type[0] = s->current_picture.mb_type[left_xy[0]]; | |
2265 | left_type[1] = s->current_picture.mb_type[left_xy[1]]; | |
2266 | if(h->deblocking_filter == 2){ | |
2267 | if(h->slice_table[top_xy ] != h->slice_num) top_type= 0; | |
2268 | if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0; | |
2269 | }else{ | |
2270 | if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0; | |
2271 | if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0; | |
2272 | } | |
2273 | h->top_type = top_type ; | |
2274 | h->left_type[0]= left_type[0]; | |
2275 | h->left_type[1]= left_type[1]; | |
2276 | ||
2277 | if(IS_INTRA(mb_type)) | |
2278 | return 0; | |
2279 | ||
2280 | AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); | |
2281 | AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); | |
2282 | AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]); | |
2283 | AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]); | |
2284 | AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); | |
2285 | ||
2286 | h->cbp= h->cbp_table[mb_xy]; | |
2287 | ||
2288 | { | |
2289 | int list; | |
2290 | for(list=0; list<h->list_count; list++){ | |
2291 | int8_t *ref; | |
2292 | int y, b_stride; | |
2293 | int16_t (*mv_dst)[2]; | |
2294 | int16_t (*mv_src)[2]; | |
2295 | ||
2296 | if(!USES_LIST(mb_type, list)){ | |
2297 | fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); | |
2298 | AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u); | |
2299 | AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u); | |
2300 | AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u); | |
2301 | AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u); | |
2302 | continue; | |
2303 | } | |
2304 | ||
2305 | ref = &s->current_picture.ref_index[list][4*mb_xy]; | |
2306 | { | |
2307 | int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | |
2308 | AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); | |
2309 | AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); | |
2310 | ref += 2; | |
2311 | AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); | |
2312 | AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); | |
2313 | } | |
2314 | ||
2315 | b_stride = h->b_stride; | |
2316 | mv_dst = &h->mv_cache[list][scan8[0]]; | |
2317 | mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride]; | |
2318 | for(y=0; y<4; y++){ | |
2319 | AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride); | |
2320 | } | |
2321 | ||
2322 | } | |
2323 | } | |
2324 | ||
2325 | ||
2326 | /* | |
2327 | 0 . T T. T T T T | |
2328 | 1 L . .L . . . . | |
2329 | 2 L . .L . . . . | |
2330 | 3 . T TL . . . . | |
2331 | 4 L . .L . . . . | |
2332 | 5 L . .. . . . . | |
2333 | */ | |
2334 | //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) | |
2335 | if(top_type){ | |
2336 | AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); | |
2337 | } | |
2338 | ||
2339 | if(left_type[0]){ | |
2340 | h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; | |
2341 | h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; | |
2342 | h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8]; | |
2343 | h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8]; | |
2344 | } | |
2345 | ||
2346 | // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs | |
2347 | if(!CABAC && h->pps.transform_8x8_mode){ | |
2348 | if(IS_8x8DCT(top_type)){ | |
2349 | h->non_zero_count_cache[4+8*0]= | |
2350 | h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4; | |
2351 | h->non_zero_count_cache[6+8*0]= | |
2352 | h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8; | |
2353 | } | |
2354 | if(IS_8x8DCT(left_type[0])){ | |
2355 | h->non_zero_count_cache[3+8*1]= | |
2356 | h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF | |
2357 | } | |
2358 | if(IS_8x8DCT(left_type[1])){ | |
2359 | h->non_zero_count_cache[3+8*3]= | |
2360 | h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF | |
2361 | } | |
2362 | ||
2363 | if(IS_8x8DCT(mb_type)){ | |
2364 | h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= | |
2365 | h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1; | |
2366 | ||
2367 | h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= | |
2368 | h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2; | |
2369 | ||
2370 | h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= | |
2371 | h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4; | |
2372 | ||
2373 | h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= | |
2374 | h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8; | |
2375 | } | |
2376 | } | |
2377 | ||
2378 | if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ | |
2379 | int list; | |
2380 | for(list=0; list<h->list_count; list++){ | |
2381 | if(USES_LIST(top_type, list)){ | |
2382 | const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | |
2383 | const int b8_xy= 4*top_xy + 2; | |
2384 | int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | |
2385 | AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); | |
2386 | h->ref_cache[list][scan8[0] + 0 - 1*8]= | |
2387 | h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; | |
2388 | h->ref_cache[list][scan8[0] + 2 - 1*8]= | |
2389 | h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; | |
2390 | }else{ | |
2391 | AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); | |
2392 | AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); | |
2393 | } | |
2394 | ||
2395 | if(!IS_INTERLACED(mb_type^left_type[0])){ | |
2396 | if(USES_LIST(left_type[0], list)){ | |
2397 | const int b_xy= h->mb2b_xy[left_xy[0]] + 3; | |
2398 | const int b8_xy= 4*left_xy[0] + 1; | |
2399 | int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | |
2400 | AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]); | |
2401 | AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]); | |
2402 | AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]); | |
2403 | AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]); | |
2404 | h->ref_cache[list][scan8[0] - 1 + 0 ]= | |
2405 | h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]]; | |
2406 | h->ref_cache[list][scan8[0] - 1 +16 ]= | |
2407 | h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]]; | |
2408 | }else{ | |
2409 | AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]); | |
2410 | AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]); | |
2411 | AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]); | |
2412 | AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]); | |
2413 | h->ref_cache[list][scan8[0] - 1 + 0 ]= | |
2414 | h->ref_cache[list][scan8[0] - 1 + 8 ]= | |
2415 | h->ref_cache[list][scan8[0] - 1 + 16 ]= | |
2416 | h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; | |
2417 | } | |
2418 | } | |
2419 | } | |
2420 | } | |
2421 | ||
2422 | return 0; | |
2423 | } | |
2424 | ||
c988f975 MN |
2425 | static void loop_filter(H264Context *h){ |
2426 | MpegEncContext * const s = &h->s; | |
2427 | uint8_t *dest_y, *dest_cb, *dest_cr; | |
2428 | int linesize, uvlinesize, mb_x, mb_y; | |
2429 | const int end_mb_y= s->mb_y + FRAME_MBAFF; | |
2430 | const int old_slice_type= h->slice_type; | |
2431 | ||
2432 | if(h->deblocking_filter) { | |
2433 | for(mb_x= 0; mb_x<s->mb_width; mb_x++){ | |
2434 | for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){ | |
78998bf2 | 2435 | int mb_xy, mb_type; |
c988f975 MN |
2436 | mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride; |
2437 | h->slice_num= h->slice_table[mb_xy]; | |
2438 | mb_type= s->current_picture.mb_type[mb_xy]; | |
2439 | h->list_count= h->list_counts[mb_xy]; | |
c988f975 MN |
2440 | |
2441 | if(FRAME_MBAFF) | |
2442 | h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type); | |
2443 | ||
c988f975 MN |
2444 | s->mb_x= mb_x; |
2445 | s->mb_y= mb_y; | |
2446 | dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; | |
2447 | dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; | |
2448 | dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; | |
2449 | //FIXME simplify above | |
2450 | ||
2451 | if (MB_FIELD) { | |
2452 | linesize = h->mb_linesize = s->linesize * 2; | |
2453 | uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; | |
2454 | if(mb_y&1){ //FIXME move out of this function? | |
2455 | dest_y -= s->linesize*15; | |
2456 | dest_cb-= s->uvlinesize*7; | |
2457 | dest_cr-= s->uvlinesize*7; | |
2458 | } | |
2459 | } else { | |
2460 | linesize = h->mb_linesize = s->linesize; | |
2461 | uvlinesize = h->mb_uvlinesize = s->uvlinesize; | |
2462 | } | |
77d40dce | 2463 | backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); |
aaa995d7 | 2464 | if(fill_filter_caches(h, mb_type)) |
44a5e7b6 | 2465 | continue; |
c988f975 MN |
2466 | h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]); |
2467 | h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]); | |
2468 | ||
77d40dce | 2469 | if (FRAME_MBAFF) { |
c988f975 MN |
2470 | ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); |
2471 | } else { | |
2472 | ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); | |
2473 | } | |
2474 | } | |
2475 | } | |
2476 | } | |
2477 | h->slice_type= old_slice_type; | |
2478 | s->mb_x= 0; | |
2479 | s->mb_y= end_mb_y - FRAME_MBAFF; | |
f4b8b825 MN |
2480 | h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); |
2481 | h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); | |
c988f975 MN |
2482 | } |
2483 | ||
69a28f3e MN |
2484 | static void predict_field_decoding_flag(H264Context *h){ |
2485 | MpegEncContext * const s = &h->s; | |
2486 | const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; | |
2487 | int mb_type = (h->slice_table[mb_xy-1] == h->slice_num) | |
2488 | ? s->current_picture.mb_type[mb_xy-1] | |
2489 | : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num) | |
2490 | ? s->current_picture.mb_type[mb_xy-s->mb_stride] | |
2491 | : 0; | |
2492 | h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; | |
2493 | } | |
2494 | ||
3a84713a RS |
2495 | static int decode_slice(struct AVCodecContext *avctx, void *arg){ |
2496 | H264Context *h = *(void**)arg; | |
0da71265 MN |
2497 | MpegEncContext * const s = &h->s; |
2498 | const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; | |
2499 | ||
2500 | s->mb_skip_run= -1; | |
0da71265 | 2501 | |
89db0bae | 2502 | h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 || |
5317c95b | 2503 | (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); |
89db0bae | 2504 | |
e5017ab8 | 2505 | if( h->pps.cabac ) { |
e5017ab8 LA |
2506 | /* realign */ |
2507 | align_get_bits( &s->gb ); | |
2508 | ||
2509 | /* init cabac */ | |
d61c4e73 | 2510 | ff_init_cabac_states( &h->cabac); |
e5017ab8 LA |
2511 | ff_init_cabac_decoder( &h->cabac, |
2512 | s->gb.buffer + get_bits_count(&s->gb)/8, | |
6e44ba15 | 2513 | (get_bits_left(&s->gb) + 7)/8); |
cc51b282 MN |
2514 | |
2515 | ff_h264_init_cabac_states(h); | |
95c26348 | 2516 | |
e5017ab8 | 2517 | for(;;){ |
851ded89 | 2518 | //START_TIMER |
cc51b282 | 2519 | int ret = ff_h264_decode_mb_cabac(h); |
6867a90b | 2520 | int eos; |
851ded89 | 2521 | //STOP_TIMER("decode_mb_cabac") |
0da71265 | 2522 | |
903d58f6 | 2523 | if(ret>=0) ff_h264_hl_decode_mb(h); |
0da71265 | 2524 | |
5d18eaad | 2525 | if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ? |
e5017ab8 LA |
2526 | s->mb_y++; |
2527 | ||
cc51b282 | 2528 | ret = ff_h264_decode_mb_cabac(h); |
e5017ab8 | 2529 | |
903d58f6 | 2530 | if(ret>=0) ff_h264_hl_decode_mb(h); |
e5017ab8 LA |
2531 | s->mb_y--; |
2532 | } | |
6867a90b | 2533 | eos = get_cabac_terminate( &h->cabac ); |
e5017ab8 | 2534 | |
3566042a MN |
2535 | if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){ |
2536 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2537 | return 0; | |
2538 | } | |
5659b509 | 2539 | if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) { |
706da4af | 2540 | av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream); |
e5017ab8 LA |
2541 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); |
2542 | return -1; | |
2543 | } | |
2544 | ||
2545 | if( ++s->mb_x >= s->mb_width ) { | |
2546 | s->mb_x = 0; | |
c988f975 | 2547 | loop_filter(h); |
e5017ab8 | 2548 | ff_draw_horiz_band(s, 16*s->mb_y, 16); |
5175b937 | 2549 | ++s->mb_y; |
f3e53d9f | 2550 | if(FIELD_OR_MBAFF_PICTURE) { |
6867a90b | 2551 | ++s->mb_y; |
69cc3183 MN |
2552 | if(FRAME_MBAFF && s->mb_y < s->mb_height) |
2553 | predict_field_decoding_flag(h); | |
6867a90b | 2554 | } |
0da71265 | 2555 | } |
0da71265 | 2556 | |
e5017ab8 | 2557 | if( eos || s->mb_y >= s->mb_height ) { |
a9c9a240 | 2558 | tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); |
e5017ab8 | 2559 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); |
0da71265 | 2560 | return 0; |
e5017ab8 | 2561 | } |
e5017ab8 LA |
2562 | } |
2563 | ||
2564 | } else { | |
2565 | for(;;){ | |
e1e94902 | 2566 | int ret = ff_h264_decode_mb_cavlc(h); |
e5017ab8 | 2567 | |
903d58f6 | 2568 | if(ret>=0) ff_h264_hl_decode_mb(h); |
e5017ab8 | 2569 | |
5d18eaad | 2570 | if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ? |
e5017ab8 | 2571 | s->mb_y++; |
e1e94902 | 2572 | ret = ff_h264_decode_mb_cavlc(h); |
e5017ab8 | 2573 | |
903d58f6 | 2574 | if(ret>=0) ff_h264_hl_decode_mb(h); |
e5017ab8 LA |
2575 | s->mb_y--; |
2576 | } | |
2577 | ||
2578 | if(ret<0){ | |
2579 | av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); | |
0da71265 MN |
2580 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); |
2581 | ||
2582 | return -1; | |
2583 | } | |
e5017ab8 LA |
2584 | |
2585 | if(++s->mb_x >= s->mb_width){ | |
2586 | s->mb_x=0; | |
c988f975 | 2587 | loop_filter(h); |
e5017ab8 | 2588 | ff_draw_horiz_band(s, 16*s->mb_y, 16); |
6867a90b | 2589 | ++s->mb_y; |
f3e53d9f | 2590 | if(FIELD_OR_MBAFF_PICTURE) { |
6867a90b | 2591 | ++s->mb_y; |
69cc3183 MN |
2592 | if(FRAME_MBAFF && s->mb_y < s->mb_height) |
2593 | predict_field_decoding_flag(h); | |
6867a90b LLL |
2594 | } |
2595 | if(s->mb_y >= s->mb_height){ | |
a9c9a240 | 2596 | tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); |
e5017ab8 LA |
2597 | |
2598 | if(get_bits_count(&s->gb) == s->gb.size_in_bits ) { | |
2599 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2600 | ||
2601 | return 0; | |
2602 | }else{ | |
2603 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2604 | ||
2605 | return -1; | |
2606 | } | |
2607 | } | |
2608 | } | |
2609 | ||
2610 | if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){ | |
a9c9a240 | 2611 | tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); |
e5017ab8 LA |
2612 | if(get_bits_count(&s->gb) == s->gb.size_in_bits ){ |
2613 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2614 | ||
2615 | return 0; | |
2616 | }else{ | |
2617 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); | |
2618 | ||
2619 | return -1; | |
2620 | } | |
2621 | } | |
0da71265 MN |
2622 | } |
2623 | } | |
e5017ab8 | 2624 | |
0da71265 MN |
2625 | #if 0 |
2626 | for(;s->mb_y < s->mb_height; s->mb_y++){ | |
2627 | for(;s->mb_x < s->mb_width; s->mb_x++){ | |
2628 | int ret= decode_mb(h); | |
115329f1 | 2629 | |
903d58f6 | 2630 | ff_h264_hl_decode_mb(h); |
0da71265 MN |
2631 | |
2632 | if(ret<0){ | |
267f7edc | 2633 | av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); |
0da71265 MN |
2634 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); |
2635 | ||
2636 | return -1; | |
2637 | } | |
115329f1 | 2638 | |
0da71265 MN |
2639 | if(++s->mb_x >= s->mb_width){ |
2640 | s->mb_x=0; | |
2641 | if(++s->mb_y >= s->mb_height){ | |
2642 | if(get_bits_count(s->gb) == s->gb.size_in_bits){ | |
2643 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2644 | ||
2645 | return 0; | |
2646 | }else{ | |
2647 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2648 | ||
2649 | return -1; | |
2650 | } | |
2651 | } | |
2652 | } | |
115329f1 | 2653 | |
0da71265 MN |
2654 | if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){ |
2655 | if(get_bits_count(s->gb) == s->gb.size_in_bits){ | |
2656 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2657 | ||
2658 | return 0; | |
2659 | }else{ | |
2660 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); | |
2661 | ||
2662 | return -1; | |
2663 | } | |
2664 | } | |
2665 | } | |
2666 | s->mb_x=0; | |
2667 | ff_draw_horiz_band(s, 16*s->mb_y, 16); | |
2668 | } | |
2669 | #endif | |
2670 | return -1; //not reached | |
2671 | } | |
2672 | ||
afebe2f7 AÖ |
2673 | /** |
2674 | * Call decode_slice() for each context. | |
2675 | * | |
2676 | * @param h h264 master context | |
2677 | * @param context_count number of contexts to execute | |
2678 | */ | |
2679 | static void execute_decode_slices(H264Context *h, int context_count){ | |
2680 | MpegEncContext * const s = &h->s; | |
2681 | AVCodecContext * const avctx= s->avctx; | |
2682 | H264Context *hx; | |
2683 | int i; | |
2684 | ||
40e5d31b GB |
2685 | if (s->avctx->hwaccel) |
2686 | return; | |
0d3d172f | 2687 | if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) |
369122dd | 2688 | return; |
afebe2f7 | 2689 | if(context_count == 1) { |
74e8b78b | 2690 | decode_slice(avctx, &h); |
afebe2f7 AÖ |
2691 | } else { |
2692 | for(i = 1; i < context_count; i++) { | |
2693 | hx = h->thread_context[i]; | |
047599a4 | 2694 | hx->s.error_recognition = avctx->error_recognition; |
afebe2f7 AÖ |
2695 | hx->s.error_count = 0; |
2696 | } | |
2697 | ||
2698 | avctx->execute(avctx, (void *)decode_slice, | |
01418506 | 2699 | h->thread_context, NULL, context_count, sizeof(void*)); |
afebe2f7 AÖ |
2700 | |
2701 | /* pull back stuff from slices to master context */ | |
2702 | hx = h->thread_context[context_count - 1]; | |
2703 | s->mb_x = hx->s.mb_x; | |
2704 | s->mb_y = hx->s.mb_y; | |
12d96de3 JD |
2705 | s->dropable = hx->s.dropable; |
2706 | s->picture_structure = hx->s.picture_structure; | |
afebe2f7 AÖ |
2707 | for(i = 1; i < context_count; i++) |
2708 | h->s.error_count += h->thread_context[i]->s.error_count; | |
2709 | } | |
2710 | } | |
2711 | ||
2712 | ||
30317501 | 2713 | static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ |
0da71265 MN |
2714 | MpegEncContext * const s = &h->s; |
2715 | AVCodecContext * const avctx= s->avctx; | |
2716 | int buf_index=0; | |
afebe2f7 AÖ |
2717 | H264Context *hx; ///< thread context |
2718 | int context_count = 0; | |
74b14aac | 2719 | int next_avc= h->is_avc ? 0 : buf_size; |
afebe2f7 AÖ |
2720 | |
2721 | h->max_contexts = avctx->thread_count; | |
377ec888 | 2722 | #if 0 |
eb60dddc | 2723 | int i; |
96b6ace2 MN |
2724 | for(i=0; i<50; i++){ |
2725 | av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]); | |
0da71265 MN |
2726 | } |
2727 | #endif | |
66a4b2c1 | 2728 | if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){ |
afebe2f7 | 2729 | h->current_slice = 0; |
12d96de3 | 2730 | if (!s->first_field) |
f6e3c460 | 2731 | s->current_picture_ptr= NULL; |
9c095463 | 2732 | ff_h264_reset_sei(h); |
66a4b2c1 MN |
2733 | } |
2734 | ||
0da71265 MN |
2735 | for(;;){ |
2736 | int consumed; | |
2737 | int dst_length; | |
2738 | int bit_length; | |
30317501 | 2739 | const uint8_t *ptr; |
4770b1b4 | 2740 | int i, nalsize = 0; |
afebe2f7 | 2741 | int err; |
115329f1 | 2742 | |
74b14aac | 2743 | if(buf_index >= next_avc) { |
1c48415b AÖ |
2744 | if(buf_index >= buf_size) break; |
2745 | nalsize = 0; | |
2746 | for(i = 0; i < h->nal_length_size; i++) | |
2747 | nalsize = (nalsize << 8) | buf[buf_index++]; | |
8d8409ca | 2748 | if(nalsize <= 1 || nalsize > buf_size - buf_index){ |
1c48415b AÖ |
2749 | if(nalsize == 1){ |
2750 | buf_index++; | |
2751 | continue; | |
2752 | }else{ | |
2753 | av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize); | |
2754 | break; | |
2755 | } | |
2756 | } | |
74b14aac | 2757 | next_avc= buf_index + nalsize; |
1c48415b AÖ |
2758 | } else { |
2759 | // start code prefix search | |
52255d17 | 2760 | for(; buf_index + 3 < next_avc; buf_index++){ |
1c48415b AÖ |
2761 | // This should always succeed in the first iteration. |
2762 | if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1) | |
2763 | break; | |
8b031359 | 2764 | } |
115329f1 | 2765 | |
1c48415b | 2766 | if(buf_index+3 >= buf_size) break; |
115329f1 | 2767 | |
1c48415b | 2768 | buf_index+=3; |
52255d17 | 2769 | if(buf_index >= next_avc) continue; |
1c48415b | 2770 | } |
115329f1 | 2771 | |
afebe2f7 AÖ |
2772 | hx = h->thread_context[context_count]; |
2773 | ||
74b14aac | 2774 | ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index); |
ff82e429 | 2775 | if (ptr==NULL || dst_length < 0){ |
ac658be5 FOL |
2776 | return -1; |
2777 | } | |
3566042a MN |
2778 | i= buf_index + consumed; |
2779 | if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc && | |
2780 | buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0) | |
2781 | s->workaround_bugs |= FF_BUG_TRUNCATED; | |
2782 | ||
2783 | if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){ | |
6ac9696e | 2784 | while(ptr[dst_length - 1] == 0 && dst_length > 0) |
c4da83fb | 2785 | dst_length--; |
3566042a | 2786 | } |
1790a5e9 | 2787 | bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1)); |
0da71265 MN |
2788 | |
2789 | if(s->avctx->debug&FF_DEBUG_STARTCODE){ | |
afebe2f7 | 2790 | av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length); |
0da71265 | 2791 | } |
115329f1 | 2792 | |
74b14aac | 2793 | if (h->is_avc && (nalsize != consumed) && nalsize){ |
e262365d | 2794 | av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize); |
9d2cc8c1 | 2795 | } |
4770b1b4 | 2796 | |
0da71265 MN |
2797 | buf_index += consumed; |
2798 | ||
755bfeab | 2799 | if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id |
8c3eba7c | 2800 | ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) |
0da71265 | 2801 | continue; |
115329f1 | 2802 | |
afebe2f7 AÖ |
2803 | again: |
2804 | err = 0; | |
2805 | switch(hx->nal_unit_type){ | |
0da71265 | 2806 | case NAL_IDR_SLICE: |
afebe2f7 AÖ |
2807 | if (h->nal_unit_type != NAL_IDR_SLICE) { |
2808 | av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices"); | |
2809 | return -1; | |
2810 | } | |
3b66c4c5 | 2811 | idr(h); //FIXME ensure we don't loose some frames if there is reordering |
0da71265 | 2812 | case NAL_SLICE: |
afebe2f7 AÖ |
2813 | init_get_bits(&hx->s.gb, ptr, bit_length); |
2814 | hx->intra_gb_ptr= | |
2815 | hx->inter_gb_ptr= &hx->s.gb; | |
2816 | hx->s.data_partitioning = 0; | |
2817 | ||
2818 | if((err = decode_slice_header(hx, h))) | |
2819 | break; | |
2820 | ||
dd0cd3d2 RC |
2821 | avctx->profile = hx->sps.profile_idc; |
2822 | avctx->level = hx->sps.level_idc; | |
2823 | ||
3bccd93a SW |
2824 | if (h->current_slice == 1) { |
2825 | if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0) | |
6026a096 | 2826 | return -1; |
3bccd93a SW |
2827 | if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) |
2828 | ff_vdpau_h264_picture_start(s); | |
6026a096 GB |
2829 | } |
2830 | ||
37a558fe IS |
2831 | s->current_picture_ptr->key_frame |= |
2832 | (hx->nal_unit_type == NAL_IDR_SLICE) || | |
2833 | (h->sei_recovery_frame_cnt >= 0); | |
afebe2f7 AÖ |
2834 | if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5 |
2835 | && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) | |
9f5c1037 | 2836 | && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE) |
4b30289e | 2837 | && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE) |
369122dd | 2838 | && avctx->skip_frame < AVDISCARD_ALL){ |
d404b3ed MN |
2839 | if(avctx->hwaccel) { |
2840 | if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0) | |
2841 | return -1; | |
2842 | }else | |
0d3d172f | 2843 | if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){ |
369122dd | 2844 | static const uint8_t start_code[] = {0x00, 0x00, 0x01}; |
c639fc72 CEH |
2845 | ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code)); |
2846 | ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed ); | |
369122dd | 2847 | }else |
f2c214a1 | 2848 | context_count++; |
369122dd | 2849 | } |
0da71265 MN |
2850 | break; |
2851 | case NAL_DPA: | |
afebe2f7 AÖ |
2852 | init_get_bits(&hx->s.gb, ptr, bit_length); |
2853 | hx->intra_gb_ptr= | |
2854 | hx->inter_gb_ptr= NULL; | |
0410ee8f AS |
2855 | |
2856 | if ((err = decode_slice_header(hx, h)) < 0) | |
2857 | break; | |
2858 | ||
dd0cd3d2 RC |
2859 | avctx->profile = hx->sps.profile_idc; |
2860 | avctx->level = hx->sps.level_idc; | |
2861 | ||
afebe2f7 | 2862 | hx->s.data_partitioning = 1; |
115329f1 | 2863 | |
0da71265 MN |
2864 | break; |
2865 | case NAL_DPB: | |
afebe2f7 AÖ |
2866 | init_get_bits(&hx->intra_gb, ptr, bit_length); |
2867 | hx->intra_gb_ptr= &hx->intra_gb; | |
0da71265 MN |
2868 | break; |
2869 | case NAL_DPC: | |
afebe2f7 AÖ |
2870 | init_get_bits(&hx->inter_gb, ptr, bit_length); |
2871 | hx->inter_gb_ptr= &hx->inter_gb; | |
8b92b792 | 2872 | |
afebe2f7 | 2873 | if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning |
b18e5c03 | 2874 | && s->context_initialized |
e0111b32 | 2875 | && s->hurry_up < 5 |
afebe2f7 | 2876 | && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) |
9f5c1037 | 2877 | && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE) |
4b30289e | 2878 | && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE) |
e0111b32 | 2879 | && avctx->skip_frame < AVDISCARD_ALL) |
afebe2f7 | 2880 | context_count++; |
0da71265 MN |
2881 | break; |
2882 | case NAL_SEI: | |
cdd10689 | 2883 | init_get_bits(&s->gb, ptr, bit_length); |
1790a5e9 | 2884 | ff_h264_decode_sei(h); |
0da71265 MN |
2885 | break; |
2886 | case NAL_SPS: | |
2887 | init_get_bits(&s->gb, ptr, bit_length); | |
1790a5e9 | 2888 | ff_h264_decode_seq_parameter_set(h); |
115329f1 | 2889 | |
0da71265 MN |
2890 | if(s->flags& CODEC_FLAG_LOW_DELAY) |
2891 | s->low_delay=1; | |
115329f1 | 2892 | |
a18030bb LM |
2893 | if(avctx->has_b_frames < 2) |
2894 | avctx->has_b_frames= !s->low_delay; | |
0da71265 MN |
2895 | break; |
2896 | case NAL_PPS: | |
2897 | init_get_bits(&s->gb, ptr, bit_length); | |
115329f1 | 2898 | |
1790a5e9 | 2899 | ff_h264_decode_picture_parameter_set(h, bit_length); |
0da71265 MN |
2900 | |
2901 | break; | |
ab470fa7 LM |
2902 | case NAL_AUD: |
2903 | case NAL_END_SEQUENCE: | |
2904 | case NAL_END_STREAM: | |
2905 | case NAL_FILLER_DATA: | |
2906 | case NAL_SPS_EXT: | |
2907 | case NAL_AUXILIARY_SLICE: | |
0da71265 | 2908 | break; |
bb270c08 | 2909 | default: |
4ad04da2 | 2910 | av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length); |
115329f1 | 2911 | } |
115329f1 | 2912 | |
afebe2f7 AÖ |
2913 | if(context_count == h->max_contexts) { |
2914 | execute_decode_slices(h, context_count); | |
2915 | context_count = 0; | |
2916 | } | |
2917 | ||
2918 | if (err < 0) | |
2919 | av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); | |
2920 | else if(err == 1) { | |
2921 | /* Slice could not be decoded in parallel mode, copy down | |
2922 | * NAL unit stuff to context 0 and restart. Note that | |
1412060e | 2923 | * rbsp_buffer is not transferred, but since we no longer |
afebe2f7 AÖ |
2924 | * run in parallel mode this should not be an issue. */ |
2925 | h->nal_unit_type = hx->nal_unit_type; | |
2926 | h->nal_ref_idc = hx->nal_ref_idc; | |
2927 | hx = h; | |
2928 | goto again; | |
2929 | } | |
2930 | } | |
2931 | if(context_count) | |
2932 | execute_decode_slices(h, context_count); | |
0da71265 MN |
2933 | return buf_index; |
2934 | } | |
2935 | ||
2936 | /** | |
3b66c4c5 | 2937 | * returns the number of bytes consumed for building the current frame |
0da71265 MN |
2938 | */ |
2939 | static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){ | |
755bfeab | 2940 | if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...) |
0da71265 MN |
2941 | if(pos+10>buf_size) pos=buf_size; // oops ;) |
2942 | ||
2943 | return pos; | |
0da71265 MN |
2944 | } |
2945 | ||
115329f1 | 2946 | static int decode_frame(AVCodecContext *avctx, |
0da71265 | 2947 | void *data, int *data_size, |
7a00bbad | 2948 | AVPacket *avpkt) |
0da71265 | 2949 | { |
7a00bbad TB |
2950 | const uint8_t *buf = avpkt->data; |
2951 | int buf_size = avpkt->size; | |
0da71265 MN |
2952 | H264Context *h = avctx->priv_data; |
2953 | MpegEncContext *s = &h->s; | |
115329f1 | 2954 | AVFrame *pict = data; |
0da71265 | 2955 | int buf_index; |
115329f1 | 2956 | |
0da71265 | 2957 | s->flags= avctx->flags; |
303e50e6 | 2958 | s->flags2= avctx->flags2; |
0da71265 | 2959 | |
1412060e | 2960 | /* end of stream, output what is still in the buffers */ |
0da71265 | 2961 | if (buf_size == 0) { |
97bbb885 MN |
2962 | Picture *out; |
2963 | int i, out_idx; | |
2964 | ||
2965 | //FIXME factorize this with the output code below | |
2966 | out = h->delayed_pic[0]; | |
2967 | out_idx = 0; | |
c173a088 | 2968 | for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++) |
97bbb885 MN |
2969 | if(h->delayed_pic[i]->poc < out->poc){ |
2970 | out = h->delayed_pic[i]; | |
2971 | out_idx = i; | |
2972 | } | |
2973 | ||
2974 | for(i=out_idx; h->delayed_pic[i]; i++) | |
2975 | h->delayed_pic[i] = h->delayed_pic[i+1]; | |
2976 | ||
2977 | if(out){ | |
2978 | *data_size = sizeof(AVFrame); | |
2979 | *pict= *(AVFrame*)out; | |
2980 | } | |
2981 | ||
0da71265 MN |
2982 | return 0; |
2983 | } | |
115329f1 | 2984 | |
0da71265 | 2985 | buf_index=decode_nal_units(h, buf, buf_size); |
115329f1 | 2986 | if(buf_index < 0) |
0da71265 MN |
2987 | return -1; |
2988 | ||
56c70e1d | 2989 | if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){ |
1c746a49 | 2990 | if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0; |
56c70e1d MN |
2991 | av_log(avctx, AV_LOG_ERROR, "no frame!\n"); |
2992 | return -1; | |
2993 | } | |
2994 | ||
66a4b2c1 MN |
2995 | if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){ |
2996 | Picture *out = s->current_picture_ptr; | |
2997 | Picture *cur = s->current_picture_ptr; | |
44be1d64 | 2998 | int i, pics, out_of_order, out_idx; |
115329f1 | 2999 | |
256299d3 | 3000 | field_end(h); |
66a4b2c1 | 3001 | |
357282c6 | 3002 | if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) { |
12d96de3 JD |
3003 | /* Wait for second field. */ |
3004 | *data_size = 0; | |
3005 | ||
3006 | } else { | |
b19d493f | 3007 | cur->interlaced_frame = 0; |
b09a7c05 AÖ |
3008 | cur->repeat_pict = 0; |
3009 | ||
3010 | /* Signal interlacing information externally. */ | |
3011 | /* Prioritize picture timing SEI information over used decoding process if it exists. */ | |
70e01da3 | 3012 | |
b09a7c05 AÖ |
3013 | if(h->sps.pic_struct_present_flag){ |
3014 | switch (h->sei_pic_struct) | |
3015 | { | |
b19d493f HY |
3016 | case SEI_PIC_STRUCT_FRAME: |
3017 | break; | |
3018 | case SEI_PIC_STRUCT_TOP_FIELD: | |
3019 | case SEI_PIC_STRUCT_BOTTOM_FIELD: | |
3020 | cur->interlaced_frame = 1; | |
3021 | break; | |
3022 | case SEI_PIC_STRUCT_TOP_BOTTOM: | |
3023 | case SEI_PIC_STRUCT_BOTTOM_TOP: | |
3024 | if (FIELD_OR_MBAFF_PICTURE) | |
3025 | cur->interlaced_frame = 1; | |
3026 | else | |
3027 | // try to flag soft telecine progressive | |
3028 | cur->interlaced_frame = h->prev_interlaced_frame; | |
3029 | break; | |
b09a7c05 AÖ |
3030 | case SEI_PIC_STRUCT_TOP_BOTTOM_TOP: |
3031 | case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: | |
3032 | // Signal the possibility of telecined film externally (pic_struct 5,6) | |
3033 | // From these hints, let the applications decide if they apply deinterlacing. | |
3034 | cur->repeat_pict = 1; | |
b09a7c05 AÖ |
3035 | break; |
3036 | case SEI_PIC_STRUCT_FRAME_DOUBLING: | |
3037 | // Force progressive here, as doubling interlaced frame is a bad idea. | |
b09a7c05 AÖ |
3038 | cur->repeat_pict = 2; |
3039 | break; | |
3040 | case SEI_PIC_STRUCT_FRAME_TRIPLING: | |
b09a7c05 AÖ |
3041 | cur->repeat_pict = 4; |
3042 | break; | |
3043 | } | |
b19d493f HY |
3044 | |
3045 | if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP) | |
3046 | cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0; | |
b09a7c05 AÖ |
3047 | }else{ |
3048 | /* Derive interlacing flag from used decoding process. */ | |
3049 | cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE; | |
3050 | } | |
b19d493f | 3051 | h->prev_interlaced_frame = cur->interlaced_frame; |
b09a7c05 AÖ |
3052 | |
3053 | if (cur->field_poc[0] != cur->field_poc[1]){ | |
3054 | /* Derive top_field_first from field pocs. */ | |
3055 | cur->top_field_first = cur->field_poc[0] < cur->field_poc[1]; | |
3056 | }else{ | |
3057 | if(cur->interlaced_frame || h->sps.pic_struct_present_flag){ | |
3058 | /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */ | |
3059 | if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM | |
3060 | || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP) | |
3061 | cur->top_field_first = 1; | |
3062 | else | |
3063 | cur->top_field_first = 0; | |
3064 | }else{ | |
3065 | /* Most likely progressive */ | |
3066 | cur->top_field_first = 0; | |
3067 | } | |
3068 | } | |
84a8596d | 3069 | |
f6e3c460 | 3070 | //FIXME do something with unavailable reference frames |
8b92b792 | 3071 | |
f6e3c460 | 3072 | /* Sort B-frames into display order */ |
2f944356 | 3073 | |
f6e3c460 AÖ |
3074 | if(h->sps.bitstream_restriction_flag |
3075 | && s->avctx->has_b_frames < h->sps.num_reorder_frames){ | |
3076 | s->avctx->has_b_frames = h->sps.num_reorder_frames; | |
3077 | s->low_delay = 0; | |
3078 | } | |
9170e345 | 3079 | |
fb19e144 MN |
3080 | if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT |
3081 | && !h->sps.bitstream_restriction_flag){ | |
3082 | s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT; | |
3083 | s->low_delay= 0; | |
3084 | } | |
3085 | ||
f6e3c460 AÖ |
3086 | pics = 0; |
3087 | while(h->delayed_pic[pics]) pics++; | |
9170e345 | 3088 | |
64b9d48f | 3089 | assert(pics <= MAX_DELAYED_PIC_COUNT); |
4e4d983e | 3090 | |
f6e3c460 AÖ |
3091 | h->delayed_pic[pics++] = cur; |
3092 | if(cur->reference == 0) | |
3093 | cur->reference = DELAYED_PIC_REF; | |
2f944356 | 3094 | |
f6e3c460 AÖ |
3095 | out = h->delayed_pic[0]; |
3096 | out_idx = 0; | |
c173a088 | 3097 | for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++) |
f6e3c460 AÖ |
3098 | if(h->delayed_pic[i]->poc < out->poc){ |
3099 | out = h->delayed_pic[i]; | |
3100 | out_idx = i; | |
3101 | } | |
44be1d64 MN |
3102 | if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) |
3103 | h->outputed_poc= INT_MIN; | |
3104 | out_of_order = out->poc < h->outputed_poc; | |
1b547aba | 3105 | |
f6e3c460 AÖ |
3106 | if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames) |
3107 | { } | |
2a811db2 | 3108 | else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) |
f6e3c460 | 3109 | || (s->low_delay && |
44be1d64 | 3110 | ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2) |
9701840b | 3111 | || cur->pict_type == FF_B_TYPE))) |
f6e3c460 AÖ |
3112 | { |
3113 | s->low_delay = 0; | |
3114 | s->avctx->has_b_frames++; | |
f6e3c460 | 3115 | } |
f6e3c460 AÖ |
3116 | |
3117 | if(out_of_order || pics > s->avctx->has_b_frames){ | |
3eaa6d0e | 3118 | out->reference &= ~DELAYED_PIC_REF; |
f6e3c460 AÖ |
3119 | for(i=out_idx; h->delayed_pic[i]; i++) |
3120 | h->delayed_pic[i] = h->delayed_pic[i+1]; | |
3121 | } | |
3eaa6d0e | 3122 | if(!out_of_order && pics > s->avctx->has_b_frames){ |
f6e3c460 | 3123 | *data_size = sizeof(AVFrame); |
df8a7dff | 3124 | |
44be1d64 MN |
3125 | if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) { |
3126 | h->outputed_poc = INT_MIN; | |
3127 | } else | |
67e362ca | 3128 | h->outputed_poc = out->poc; |
f6e3c460 | 3129 | *pict= *(AVFrame*)out; |
3eaa6d0e | 3130 | }else{ |
f6e3c460 | 3131 | av_log(avctx, AV_LOG_DEBUG, "no picture\n"); |
3eaa6d0e | 3132 | } |
12d96de3 | 3133 | } |
a4dae92b LM |
3134 | } |
3135 | ||
3165e258 | 3136 | assert(pict->data[0] || !*data_size); |
4e4d983e | 3137 | ff_print_debug_info(s, pict); |
0da71265 | 3138 | //printf("out %d\n", (int)pict->data[0]); |
0da71265 | 3139 | |
0da71265 MN |
3140 | return get_consumed_bytes(s, buf_index, buf_size); |
3141 | } | |
3142 | #if 0 | |
3143 | static inline void fill_mb_avail(H264Context *h){ | |
3144 | MpegEncContext * const s = &h->s; | |
7bc9090a | 3145 | const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; |
0da71265 MN |
3146 | |
3147 | if(s->mb_y){ | |
7bc9090a MN |
3148 | h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num; |
3149 | h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num; | |
3150 | h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num; | |
0da71265 MN |
3151 | }else{ |
3152 | h->mb_avail[0]= | |
3153 | h->mb_avail[1]= | |
3154 | h->mb_avail[2]= 0; | |
3155 | } | |
3156 | h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num; | |
3157 | h->mb_avail[4]= 1; //FIXME move out | |
3158 | h->mb_avail[5]= 0; //FIXME move out | |
3159 | } | |
3160 | #endif | |
3161 | ||
07e4e3ea | 3162 | #ifdef TEST |
6bf398a0 | 3163 | #undef printf |
d04d5bcd | 3164 | #undef random |
0da71265 MN |