Commit | Line | Data |
---|---|---|
0da71265 MN |
1 | /* |
2 | * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder | |
3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 | * | |
b78e7197 DB |
5 | * This file is part of FFmpeg. |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
0da71265 MN |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either | |
b78e7197 | 10 | * version 2.1 of the License, or (at your option) any later version. |
0da71265 | 11 | * |
b78e7197 | 12 | * FFmpeg is distributed in the hope that it will be useful, |
0da71265 MN |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
b78e7197 | 18 | * License along with FFmpeg; if not, write to the Free Software |
5509bffa | 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
0da71265 | 20 | */ |
115329f1 | 21 | |
0da71265 | 22 | /** |
bad5537e | 23 | * @file libavcodec/h264.c |
0da71265 MN |
24 | * H.264 / AVC / MPEG4 part10 codec. |
25 | * @author Michael Niedermayer <michaelni@gmx.at> | |
26 | */ | |
27 | ||
40e5d31b | 28 | #include "internal.h" |
0da71265 MN |
29 | #include "dsputil.h" |
30 | #include "avcodec.h" | |
31 | #include "mpegvideo.h" | |
26b4fe82 | 32 | #include "h264.h" |
0da71265 | 33 | #include "h264data.h" |
188d3c51 | 34 | #include "h264_mvpred.h" |
26b4fe82 | 35 | #include "h264_parser.h" |
0da71265 | 36 | #include "golomb.h" |
199436b9 | 37 | #include "mathops.h" |
626464fb | 38 | #include "rectangle.h" |
369122dd | 39 | #include "vdpau_internal.h" |
0da71265 | 40 | |
e5017ab8 LA |
41 | #include "cabac.h" |
42 | ||
2848ce84 | 43 | //#undef NDEBUG |
0da71265 MN |
44 | #include <assert.h> |
45 | ||
d9ec210b | 46 | static const uint8_t rem6[52]={ |
acd8d10f PI |
47 | 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, |
48 | }; | |
49 | ||
d9ec210b | 50 | static const uint8_t div6[52]={ |
acd8d10f PI |
51 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, |
52 | }; | |
53 | ||
903d58f6 | 54 | void ff_h264_write_back_intra_pred_mode(H264Context *h){ |
5b0fb524 | 55 | int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; |
0da71265 | 56 | |
662a5b23 MN |
57 | AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4); |
58 | mode[4]= h->intra4x4_pred_mode_cache[7+8*3]; | |
59 | mode[5]= h->intra4x4_pred_mode_cache[7+8*2]; | |
60 | mode[6]= h->intra4x4_pred_mode_cache[7+8*1]; | |
0da71265 MN |
61 | } |
62 | ||
63 | /** | |
64 | * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. | |
65 | */ | |
2bedc0e8 MN |
66 | int ff_h264_check_intra4x4_pred_mode(H264Context *h){ |
67 | MpegEncContext * const s = &h->s; | |
68 | static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0}; | |
69 | static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED}; | |
70 | int i; | |
71 | ||
72 | if(!(h->top_samples_available&0x8000)){ | |
73 | for(i=0; i<4; i++){ | |
74 | int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ]; | |
75 | if(status<0){ | |
76 | av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); | |
77 | return -1; | |
78 | } else if(status){ | |
79 | h->intra4x4_pred_mode_cache[scan8[0] + i]= status; | |
80 | } | |
81 | } | |
82 | } | |
83 | ||
84 | if((h->left_samples_available&0x8888)!=0x8888){ | |
85 | static const int mask[4]={0x8000,0x2000,0x80,0x20}; | |
86 | for(i=0; i<4; i++){ | |
87 | if(!(h->left_samples_available&mask[i])){ | |
88 | int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ]; | |
89 | if(status<0){ | |
90 | av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); | |
91 | return -1; | |
92 | } else if(status){ | |
93 | h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status; | |
94 | } | |
95 | } | |
96 | } | |
97 | } | |
98 | ||
99 | return 0; | |
100 | } //FIXME cleanup like ff_h264_check_intra_pred_mode | |
101 | ||
102 | /** | |
103 | * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. | |
104 | */ | |
903d58f6 | 105 | int ff_h264_check_intra_pred_mode(H264Context *h, int mode){ |
0da71265 MN |
106 | MpegEncContext * const s = &h->s; |
107 | static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1}; | |
108 | static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8}; | |
115329f1 | 109 | |
43ff0714 | 110 | if(mode > 6U) { |
5175b937 | 111 | av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y); |
7440fe83 | 112 | return -1; |
5175b937 | 113 | } |
115329f1 | 114 | |
0da71265 MN |
115 | if(!(h->top_samples_available&0x8000)){ |
116 | mode= top[ mode ]; | |
117 | if(mode<0){ | |
9b879566 | 118 | av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); |
0da71265 MN |
119 | return -1; |
120 | } | |
121 | } | |
115329f1 | 122 | |
d1d10e91 | 123 | if((h->left_samples_available&0x8080) != 0x8080){ |
0da71265 | 124 | mode= left[ mode ]; |
d1d10e91 MN |
125 | if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred |
126 | mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8); | |
127 | } | |
0da71265 | 128 | if(mode<0){ |
9b879566 | 129 | av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); |
0da71265 | 130 | return -1; |
115329f1 | 131 | } |
0da71265 MN |
132 | } |
133 | ||
134 | return mode; | |
135 | } | |
136 | ||
1790a5e9 | 137 | const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){ |
0da71265 MN |
138 | int i, si, di; |
139 | uint8_t *dst; | |
24456882 | 140 | int bufidx; |
0da71265 | 141 | |
bb270c08 | 142 | // src[0]&0x80; //forbidden bit |
0da71265 MN |
143 | h->nal_ref_idc= src[0]>>5; |
144 | h->nal_unit_type= src[0]&0x1F; | |
145 | ||
146 | src++; length--; | |
115329f1 | 147 | #if 0 |
0da71265 MN |
148 | for(i=0; i<length; i++) |
149 | printf("%2X ", src[i]); | |
150 | #endif | |
e08715d3 | 151 | |
b250f9c6 AJ |
152 | #if HAVE_FAST_UNALIGNED |
153 | # if HAVE_FAST_64BIT | |
e08715d3 MN |
154 | # define RS 7 |
155 | for(i=0; i+1<length; i+=9){ | |
19769ece | 156 | if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL)) |
e08715d3 MN |
157 | # else |
158 | # define RS 3 | |
159 | for(i=0; i+1<length; i+=5){ | |
19769ece | 160 | if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U)) |
e08715d3 MN |
161 | # endif |
162 | continue; | |
163 | if(i>0 && !src[i]) i--; | |
164 | while(src[i]) i++; | |
165 | #else | |
166 | # define RS 0 | |
0da71265 MN |
167 | for(i=0; i+1<length; i+=2){ |
168 | if(src[i]) continue; | |
169 | if(i>0 && src[i-1]==0) i--; | |
e08715d3 | 170 | #endif |
0da71265 MN |
171 | if(i+2<length && src[i+1]==0 && src[i+2]<=3){ |
172 | if(src[i+2]!=3){ | |
173 | /* startcode, so we must be past the end */ | |
174 | length=i; | |
175 | } | |
176 | break; | |
177 | } | |
abb27cfb | 178 | i-= RS; |
0da71265 MN |
179 | } |
180 | ||
181 | if(i>=length-1){ //no escaped 0 | |
182 | *dst_length= length; | |
183 | *consumed= length+1; //+1 for the header | |
115329f1 | 184 | return src; |
0da71265 MN |
185 | } |
186 | ||
24456882 | 187 | bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data |
238ef6da | 188 | av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE); |
24456882 | 189 | dst= h->rbsp_buffer[bufidx]; |
0da71265 | 190 | |
ac658be5 FOL |
191 | if (dst == NULL){ |
192 | return NULL; | |
193 | } | |
194 | ||
3b66c4c5 | 195 | //printf("decoding esc\n"); |
593af7cd MN |
196 | memcpy(dst, src, i); |
197 | si=di=i; | |
198 | while(si+2<length){ | |
0da71265 | 199 | //remove escapes (very rare 1:2^22) |
593af7cd MN |
200 | if(src[si+2]>3){ |
201 | dst[di++]= src[si++]; | |
202 | dst[di++]= src[si++]; | |
203 | }else if(src[si]==0 && src[si+1]==0){ | |
0da71265 MN |
204 | if(src[si+2]==3){ //escape |
205 | dst[di++]= 0; | |
206 | dst[di++]= 0; | |
207 | si+=3; | |
c8470cc1 | 208 | continue; |
0da71265 | 209 | }else //next start code |
593af7cd | 210 | goto nsc; |
0da71265 MN |
211 | } |
212 | ||
213 | dst[di++]= src[si++]; | |
214 | } | |
593af7cd MN |
215 | while(si<length) |
216 | dst[di++]= src[si++]; | |
217 | nsc: | |
0da71265 | 218 | |
d4369630 AS |
219 | memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE); |
220 | ||
0da71265 MN |
221 | *dst_length= di; |
222 | *consumed= si + 1;//+1 for the header | |
90b5b51e | 223 | //FIXME store exact number of bits in the getbitcontext (it is needed for decoding) |
0da71265 MN |
224 | return dst; |
225 | } | |
226 | ||
1790a5e9 | 227 | int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){ |
0da71265 MN |
228 | int v= *src; |
229 | int r; | |
230 | ||
a9c9a240 | 231 | tprintf(h->s.avctx, "rbsp trailing %X\n", v); |
0da71265 MN |
232 | |
233 | for(r=1; r<9; r++){ | |
234 | if(v&1) return r; | |
235 | v>>=1; | |
236 | } | |
237 | return 0; | |
238 | } | |
239 | ||
240 | /** | |
1412060e | 241 | * IDCT transforms the 16 dc values and dequantizes them. |
0da71265 MN |
242 | * @param qp quantization parameter |
243 | */ | |
239ea04c | 244 | static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ |
0da71265 MN |
245 | #define stride 16 |
246 | int i; | |
247 | int temp[16]; //FIXME check if this is a good idea | |
248 | static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; | |
249 | static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; | |
250 | ||
251 | //memset(block, 64, 2*256); | |
252 | //return; | |
253 | for(i=0; i<4; i++){ | |
254 | const int offset= y_offset[i]; | |
255 | const int z0= block[offset+stride*0] + block[offset+stride*4]; | |
256 | const int z1= block[offset+stride*0] - block[offset+stride*4]; | |
257 | const int z2= block[offset+stride*1] - block[offset+stride*5]; | |
258 | const int z3= block[offset+stride*1] + block[offset+stride*5]; | |
259 | ||
260 | temp[4*i+0]= z0+z3; | |
261 | temp[4*i+1]= z1+z2; | |
262 | temp[4*i+2]= z1-z2; | |
263 | temp[4*i+3]= z0-z3; | |
264 | } | |
265 | ||
266 | for(i=0; i<4; i++){ | |
267 | const int offset= x_offset[i]; | |
268 | const int z0= temp[4*0+i] + temp[4*2+i]; | |
269 | const int z1= temp[4*0+i] - temp[4*2+i]; | |
270 | const int z2= temp[4*1+i] - temp[4*3+i]; | |
271 | const int z3= temp[4*1+i] + temp[4*3+i]; | |
272 | ||
1412060e | 273 | block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual |
239ea04c LM |
274 | block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8)); |
275 | block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8)); | |
276 | block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8)); | |
0da71265 MN |
277 | } |
278 | } | |
279 | ||
e5017ab8 | 280 | #if 0 |
0da71265 | 281 | /** |
1412060e | 282 | * DCT transforms the 16 dc values. |
0da71265 MN |
283 | * @param qp quantization parameter ??? FIXME |
284 | */ | |
285 | static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){ | |
286 | // const int qmul= dequant_coeff[qp][0]; | |
287 | int i; | |
288 | int temp[16]; //FIXME check if this is a good idea | |
289 | static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; | |
290 | static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; | |
291 | ||
292 | for(i=0; i<4; i++){ | |
293 | const int offset= y_offset[i]; | |
294 | const int z0= block[offset+stride*0] + block[offset+stride*4]; | |
295 | const int z1= block[offset+stride*0] - block[offset+stride*4]; | |
296 | const int z2= block[offset+stride*1] - block[offset+stride*5]; | |
297 | const int z3= block[offset+stride*1] + block[offset+stride*5]; | |
298 | ||
299 | temp[4*i+0]= z0+z3; | |
300 | temp[4*i+1]= z1+z2; | |
301 | temp[4*i+2]= z1-z2; | |
302 | temp[4*i+3]= z0-z3; | |
303 | } | |
304 | ||
305 | for(i=0; i<4; i++){ | |
306 | const int offset= x_offset[i]; | |
307 | const int z0= temp[4*0+i] + temp[4*2+i]; | |
308 | const int z1= temp[4*0+i] - temp[4*2+i]; | |
309 | const int z2= temp[4*1+i] - temp[4*3+i]; | |
310 | const int z3= temp[4*1+i] + temp[4*3+i]; | |
311 | ||
312 | block[stride*0 +offset]= (z0 + z3)>>1; | |
313 | block[stride*2 +offset]= (z1 + z2)>>1; | |
314 | block[stride*8 +offset]= (z1 - z2)>>1; | |
315 | block[stride*10+offset]= (z0 - z3)>>1; | |
316 | } | |
317 | } | |
e5017ab8 LA |
318 | #endif |
319 | ||
0da71265 MN |
320 | #undef xStride |
321 | #undef stride | |
322 | ||
239ea04c | 323 | static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ |
0da71265 MN |
324 | const int stride= 16*2; |
325 | const int xStride= 16; | |
326 | int a,b,c,d,e; | |
327 | ||
328 | a= block[stride*0 + xStride*0]; | |
329 | b= block[stride*0 + xStride*1]; | |
330 | c= block[stride*1 + xStride*0]; | |
331 | d= block[stride*1 + xStride*1]; | |
332 | ||
333 | e= a-b; | |
334 | a= a+b; | |
335 | b= c-d; | |
336 | c= c+d; | |
337 | ||
239ea04c LM |
338 | block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7; |
339 | block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7; | |
340 | block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7; | |
341 | block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7; | |
0da71265 MN |
342 | } |
343 | ||
e5017ab8 | 344 | #if 0 |
0da71265 MN |
345 | static void chroma_dc_dct_c(DCTELEM *block){ |
346 | const int stride= 16*2; | |
347 | const int xStride= 16; | |
348 | int a,b,c,d,e; | |
349 | ||
350 | a= block[stride*0 + xStride*0]; | |
351 | b= block[stride*0 + xStride*1]; | |
352 | c= block[stride*1 + xStride*0]; | |
353 | d= block[stride*1 + xStride*1]; | |
354 | ||
355 | e= a-b; | |
356 | a= a+b; | |
357 | b= c-d; | |
358 | c= c+d; | |
359 | ||
360 | block[stride*0 + xStride*0]= (a+c); | |
361 | block[stride*0 + xStride*1]= (e+b); | |
362 | block[stride*1 + xStride*0]= (a-c); | |
363 | block[stride*1 + xStride*1]= (e-b); | |
364 | } | |
e5017ab8 | 365 | #endif |
0da71265 | 366 | |
0da71265 MN |
367 | static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, |
368 | uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |
369 | int src_x_offset, int src_y_offset, | |
370 | qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ | |
371 | MpegEncContext * const s = &h->s; | |
372 | const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; | |
5d18eaad | 373 | int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; |
0da71265 | 374 | const int luma_xy= (mx&3) + ((my&3)<<2); |
5d18eaad LM |
375 | uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize; |
376 | uint8_t * src_cb, * src_cr; | |
377 | int extra_width= h->emu_edge_width; | |
378 | int extra_height= h->emu_edge_height; | |
0da71265 MN |
379 | int emu=0; |
380 | const int full_mx= mx>>2; | |
381 | const int full_my= my>>2; | |
fbd312fd | 382 | const int pic_width = 16*s->mb_width; |
0d43dd8c | 383 | const int pic_height = 16*s->mb_height >> MB_FIELD; |
115329f1 | 384 | |
0da71265 MN |
385 | if(mx&7) extra_width -= 3; |
386 | if(my&7) extra_height -= 3; | |
115329f1 DB |
387 | |
388 | if( full_mx < 0-extra_width | |
389 | || full_my < 0-extra_height | |
390 | || full_mx + 16/*FIXME*/ > pic_width + extra_width | |
fbd312fd | 391 | || full_my + 16/*FIXME*/ > pic_height + extra_height){ |
5d18eaad LM |
392 | ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); |
393 | src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize; | |
0da71265 MN |
394 | emu=1; |
395 | } | |
115329f1 | 396 | |
5d18eaad | 397 | qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps? |
0da71265 | 398 | if(!square){ |
5d18eaad | 399 | qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); |
0da71265 | 400 | } |
115329f1 | 401 | |
49fb20cb | 402 | if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; |
115329f1 | 403 | |
0d43dd8c | 404 | if(MB_FIELD){ |
5d18eaad | 405 | // chroma offset when predicting from a field of opposite parity |
2143b118 | 406 | my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); |
5d18eaad LM |
407 | emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); |
408 | } | |
409 | src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize; | |
410 | src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize; | |
411 | ||
0da71265 | 412 | if(emu){ |
5d18eaad | 413 | ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); |
0da71265 MN |
414 | src_cb= s->edge_emu_buffer; |
415 | } | |
5d18eaad | 416 | chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); |
0da71265 MN |
417 | |
418 | if(emu){ | |
5d18eaad | 419 | ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); |
0da71265 MN |
420 | src_cr= s->edge_emu_buffer; |
421 | } | |
5d18eaad | 422 | chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); |
0da71265 MN |
423 | } |
424 | ||
9f2d1b4f | 425 | static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, |
0da71265 MN |
426 | uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, |
427 | int x_offset, int y_offset, | |
428 | qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |
429 | qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, | |
430 | int list0, int list1){ | |
431 | MpegEncContext * const s = &h->s; | |
432 | qpel_mc_func *qpix_op= qpix_put; | |
433 | h264_chroma_mc_func chroma_op= chroma_put; | |
115329f1 | 434 | |
5d18eaad LM |
435 | dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; |
436 | dest_cb += x_offset + y_offset*h->mb_uvlinesize; | |
437 | dest_cr += x_offset + y_offset*h->mb_uvlinesize; | |
0da71265 | 438 | x_offset += 8*s->mb_x; |
0d43dd8c | 439 | y_offset += 8*(s->mb_y >> MB_FIELD); |
115329f1 | 440 | |
0da71265 | 441 | if(list0){ |
1924f3ce | 442 | Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; |
0da71265 MN |
443 | mc_dir_part(h, ref, n, square, chroma_height, delta, 0, |
444 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
445 | qpix_op, chroma_op); | |
446 | ||
447 | qpix_op= qpix_avg; | |
448 | chroma_op= chroma_avg; | |
449 | } | |
450 | ||
451 | if(list1){ | |
1924f3ce | 452 | Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; |
0da71265 MN |
453 | mc_dir_part(h, ref, n, square, chroma_height, delta, 1, |
454 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
455 | qpix_op, chroma_op); | |
456 | } | |
457 | } | |
458 | ||
9f2d1b4f LM |
459 | static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, |
460 | uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |
461 | int x_offset, int y_offset, | |
462 | qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |
463 | h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, | |
464 | h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, | |
465 | int list0, int list1){ | |
466 | MpegEncContext * const s = &h->s; | |
467 | ||
5d18eaad LM |
468 | dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; |
469 | dest_cb += x_offset + y_offset*h->mb_uvlinesize; | |
470 | dest_cr += x_offset + y_offset*h->mb_uvlinesize; | |
9f2d1b4f | 471 | x_offset += 8*s->mb_x; |
0d43dd8c | 472 | y_offset += 8*(s->mb_y >> MB_FIELD); |
115329f1 | 473 | |
9f2d1b4f LM |
474 | if(list0 && list1){ |
475 | /* don't optimize for luma-only case, since B-frames usually | |
476 | * use implicit weights => chroma too. */ | |
477 | uint8_t *tmp_cb = s->obmc_scratchpad; | |
5d18eaad LM |
478 | uint8_t *tmp_cr = s->obmc_scratchpad + 8; |
479 | uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize; | |
9f2d1b4f LM |
480 | int refn0 = h->ref_cache[0][ scan8[n] ]; |
481 | int refn1 = h->ref_cache[1][ scan8[n] ]; | |
482 | ||
483 | mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, | |
484 | dest_y, dest_cb, dest_cr, | |
485 | x_offset, y_offset, qpix_put, chroma_put); | |
486 | mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, | |
487 | tmp_y, tmp_cb, tmp_cr, | |
488 | x_offset, y_offset, qpix_put, chroma_put); | |
489 | ||
490 | if(h->use_weight == 2){ | |
491 | int weight0 = h->implicit_weight[refn0][refn1]; | |
492 | int weight1 = 64 - weight0; | |
5d18eaad LM |
493 | luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); |
494 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); | |
495 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); | |
9f2d1b4f | 496 | }else{ |
5d18eaad | 497 | luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, |
3d9137c8 MN |
498 | h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], |
499 | h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]); | |
5d18eaad | 500 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
3d9137c8 MN |
501 | h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], |
502 | h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); | |
5d18eaad | 503 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
3d9137c8 MN |
504 | h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], |
505 | h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); | |
9f2d1b4f LM |
506 | } |
507 | }else{ | |
508 | int list = list1 ? 1 : 0; | |
509 | int refn = h->ref_cache[list][ scan8[n] ]; | |
510 | Picture *ref= &h->ref_list[list][refn]; | |
511 | mc_dir_part(h, ref, n, square, chroma_height, delta, list, | |
512 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
513 | qpix_put, chroma_put); | |
514 | ||
5d18eaad | 515 | luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, |
3d9137c8 | 516 | h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); |
9f2d1b4f | 517 | if(h->use_weight_chroma){ |
5d18eaad | 518 | chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
3d9137c8 | 519 | h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); |
5d18eaad | 520 | chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
3d9137c8 | 521 | h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); |
9f2d1b4f LM |
522 | } |
523 | } | |
524 | } | |
525 | ||
526 | static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, | |
527 | uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |
528 | int x_offset, int y_offset, | |
529 | qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |
530 | qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, | |
115329f1 | 531 | h264_weight_func *weight_op, h264_biweight_func *weight_avg, |
9f2d1b4f LM |
532 | int list0, int list1){ |
533 | if((h->use_weight==2 && list0 && list1 | |
534 | && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32)) | |
535 | || h->use_weight==1) | |
536 | mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, | |
537 | x_offset, y_offset, qpix_put, chroma_put, | |
538 | weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); | |
539 | else | |
540 | mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, | |
541 | x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); | |
542 | } | |
543 | ||
513fbd8e LM |
544 | static inline void prefetch_motion(H264Context *h, int list){ |
545 | /* fetch pixels for estimated mv 4 macroblocks ahead | |
546 | * optimized for 64byte cache lines */ | |
547 | MpegEncContext * const s = &h->s; | |
548 | const int refn = h->ref_cache[list][scan8[0]]; | |
549 | if(refn >= 0){ | |
550 | const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; | |
551 | const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; | |
552 | uint8_t **src= h->ref_list[list][refn].data; | |
5d18eaad | 553 | int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64; |
513fbd8e LM |
554 | s->dsp.prefetch(src[0]+off, s->linesize, 4); |
555 | off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; | |
556 | s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); | |
557 | } | |
558 | } | |
559 | ||
0da71265 MN |
560 | static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, |
561 | qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), | |
9f2d1b4f LM |
562 | qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), |
563 | h264_weight_func *weight_op, h264_biweight_func *weight_avg){ | |
0da71265 | 564 | MpegEncContext * const s = &h->s; |
64514ee8 | 565 | const int mb_xy= h->mb_xy; |
0da71265 | 566 | const int mb_type= s->current_picture.mb_type[mb_xy]; |
115329f1 | 567 | |
0da71265 | 568 | assert(IS_INTER(mb_type)); |
115329f1 | 569 | |
513fbd8e LM |
570 | prefetch_motion(h, 0); |
571 | ||
0da71265 MN |
572 | if(IS_16X16(mb_type)){ |
573 | mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, | |
574 | qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], | |
7231ccf4 | 575 | weight_op, weight_avg, |
0da71265 MN |
576 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); |
577 | }else if(IS_16X8(mb_type)){ | |
578 | mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, | |
579 | qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], | |
9f2d1b4f | 580 | &weight_op[1], &weight_avg[1], |
0da71265 MN |
581 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); |
582 | mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, | |
583 | qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], | |
9f2d1b4f | 584 | &weight_op[1], &weight_avg[1], |
0da71265 MN |
585 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); |
586 | }else if(IS_8X16(mb_type)){ | |
5d18eaad | 587 | mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, |
0da71265 | 588 | qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], |
9f2d1b4f | 589 | &weight_op[2], &weight_avg[2], |
0da71265 | 590 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); |
5d18eaad | 591 | mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, |
0da71265 | 592 | qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], |
9f2d1b4f | 593 | &weight_op[2], &weight_avg[2], |
0da71265 MN |
594 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); |
595 | }else{ | |
596 | int i; | |
115329f1 | 597 | |
0da71265 MN |
598 | assert(IS_8X8(mb_type)); |
599 | ||
600 | for(i=0; i<4; i++){ | |
601 | const int sub_mb_type= h->sub_mb_type[i]; | |
602 | const int n= 4*i; | |
603 | int x_offset= (i&1)<<2; | |
604 | int y_offset= (i&2)<<1; | |
605 | ||
606 | if(IS_SUB_8X8(sub_mb_type)){ | |
607 | mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
608 | qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], | |
9f2d1b4f | 609 | &weight_op[3], &weight_avg[3], |
0da71265 MN |
610 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
611 | }else if(IS_SUB_8X4(sub_mb_type)){ | |
612 | mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
613 | qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], | |
9f2d1b4f | 614 | &weight_op[4], &weight_avg[4], |
0da71265 MN |
615 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
616 | mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, | |
617 | qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], | |
9f2d1b4f | 618 | &weight_op[4], &weight_avg[4], |
0da71265 MN |
619 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
620 | }else if(IS_SUB_4X8(sub_mb_type)){ | |
5d18eaad | 621 | mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, |
0da71265 | 622 | qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], |
9f2d1b4f | 623 | &weight_op[5], &weight_avg[5], |
0da71265 | 624 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
5d18eaad | 625 | mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, |
0da71265 | 626 | qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], |
9f2d1b4f | 627 | &weight_op[5], &weight_avg[5], |
0da71265 MN |
628 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
629 | }else{ | |
630 | int j; | |
631 | assert(IS_SUB_4X4(sub_mb_type)); | |
632 | for(j=0; j<4; j++){ | |
633 | int sub_x_offset= x_offset + 2*(j&1); | |
634 | int sub_y_offset= y_offset + (j&2); | |
635 | mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, | |
636 | qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], | |
9f2d1b4f | 637 | &weight_op[6], &weight_avg[6], |
0da71265 MN |
638 | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
639 | } | |
640 | } | |
641 | } | |
642 | } | |
513fbd8e LM |
643 | |
644 | prefetch_motion(h, 1); | |
0da71265 MN |
645 | } |
646 | ||
0da71265 | 647 | |
0da71265 | 648 | static void free_tables(H264Context *h){ |
7978debd | 649 | int i; |
afebe2f7 | 650 | H264Context *hx; |
0da71265 | 651 | av_freep(&h->intra4x4_pred_mode); |
e5017ab8 LA |
652 | av_freep(&h->chroma_pred_mode_table); |
653 | av_freep(&h->cbp_table); | |
9e528114 LA |
654 | av_freep(&h->mvd_table[0]); |
655 | av_freep(&h->mvd_table[1]); | |
5ad984c9 | 656 | av_freep(&h->direct_table); |
0da71265 MN |
657 | av_freep(&h->non_zero_count); |
658 | av_freep(&h->slice_table_base); | |
659 | h->slice_table= NULL; | |
c988f975 | 660 | av_freep(&h->list_counts); |
e5017ab8 | 661 | |
0da71265 | 662 | av_freep(&h->mb2b_xy); |
d43c1922 | 663 | av_freep(&h->mb2br_xy); |
9f2d1b4f | 664 | |
6752dd5a | 665 | for(i = 0; i < MAX_THREADS; i++) { |
afebe2f7 AÖ |
666 | hx = h->thread_context[i]; |
667 | if(!hx) continue; | |
668 | av_freep(&hx->top_borders[1]); | |
669 | av_freep(&hx->top_borders[0]); | |
670 | av_freep(&hx->s.obmc_scratchpad); | |
d2d5e067 AS |
671 | av_freep(&hx->rbsp_buffer[1]); |
672 | av_freep(&hx->rbsp_buffer[0]); | |
eda4ea4e MS |
673 | hx->rbsp_buffer_size[0] = 0; |
674 | hx->rbsp_buffer_size[1] = 0; | |
d2d5e067 | 675 | if (i) av_freep(&h->thread_context[i]); |
afebe2f7 | 676 | } |
0da71265 MN |
677 | } |
678 | ||
239ea04c LM |
679 | static void init_dequant8_coeff_table(H264Context *h){ |
680 | int i,q,x; | |
4693b031 | 681 | const int transpose = (h->h264dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly |
239ea04c LM |
682 | h->dequant8_coeff[0] = h->dequant8_buffer[0]; |
683 | h->dequant8_coeff[1] = h->dequant8_buffer[1]; | |
684 | ||
685 | for(i=0; i<2; i++ ){ | |
686 | if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ | |
687 | h->dequant8_coeff[1] = h->dequant8_buffer[0]; | |
688 | break; | |
689 | } | |
690 | ||
691 | for(q=0; q<52; q++){ | |
d9ec210b DP |
692 | int shift = div6[q]; |
693 | int idx = rem6[q]; | |
239ea04c | 694 | for(x=0; x<64; x++) |
548a1c8a LM |
695 | h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = |
696 | ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * | |
697 | h->pps.scaling_matrix8[i][x]) << shift; | |
239ea04c LM |
698 | } |
699 | } | |
700 | } | |
701 | ||
702 | static void init_dequant4_coeff_table(H264Context *h){ | |
703 | int i,j,q,x; | |
4693b031 | 704 | const int transpose = (h->h264dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly |
239ea04c LM |
705 | for(i=0; i<6; i++ ){ |
706 | h->dequant4_coeff[i] = h->dequant4_buffer[i]; | |
707 | for(j=0; j<i; j++){ | |
708 | if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ | |
709 | h->dequant4_coeff[i] = h->dequant4_buffer[j]; | |
710 | break; | |
711 | } | |
712 | } | |
713 | if(j<i) | |
714 | continue; | |
715 | ||
716 | for(q=0; q<52; q++){ | |
d9ec210b DP |
717 | int shift = div6[q] + 2; |
718 | int idx = rem6[q]; | |
239ea04c | 719 | for(x=0; x<16; x++) |
ab2e3e2c LM |
720 | h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] = |
721 | ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * | |
239ea04c LM |
722 | h->pps.scaling_matrix4[i][x]) << shift; |
723 | } | |
724 | } | |
725 | } | |
726 | ||
727 | static void init_dequant_tables(H264Context *h){ | |
728 | int i,x; | |
729 | init_dequant4_coeff_table(h); | |
730 | if(h->pps.transform_8x8_mode) | |
731 | init_dequant8_coeff_table(h); | |
732 | if(h->sps.transform_bypass){ | |
733 | for(i=0; i<6; i++) | |
734 | for(x=0; x<16; x++) | |
735 | h->dequant4_coeff[i][0][x] = 1<<6; | |
736 | if(h->pps.transform_8x8_mode) | |
737 | for(i=0; i<2; i++) | |
738 | for(x=0; x<64; x++) | |
739 | h->dequant8_coeff[i][0][x] = 1<<6; | |
740 | } | |
741 | } | |
742 | ||
743 | ||
903d58f6 | 744 | int ff_h264_alloc_tables(H264Context *h){ |
0da71265 | 745 | MpegEncContext * const s = &h->s; |
7bc9090a | 746 | const int big_mb_num= s->mb_stride * (s->mb_height+1); |
145061a1 | 747 | const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count; |
239ea04c | 748 | int x,y; |
0da71265 | 749 | |
145061a1 | 750 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail) |
e5017ab8 | 751 | |
c988f975 | 752 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail) |
d31dbec3 RP |
753 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail) |
754 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail) | |
0da71265 | 755 | |
d31dbec3 | 756 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail) |
145061a1 MN |
757 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail); |
758 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail); | |
36b54927 | 759 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail); |
c988f975 | 760 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail) |
e5017ab8 | 761 | |
b735aeea | 762 | memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)); |
5d18eaad | 763 | h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; |
0da71265 | 764 | |
d31dbec3 | 765 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail); |
d43c1922 | 766 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail); |
0da71265 MN |
767 | for(y=0; y<s->mb_height; y++){ |
768 | for(x=0; x<s->mb_width; x++){ | |
7bc9090a | 769 | const int mb_xy= x + y*s->mb_stride; |
0da71265 | 770 | const int b_xy = 4*x + 4*y*h->b_stride; |
115329f1 | 771 | |
0da71265 | 772 | h->mb2b_xy [mb_xy]= b_xy; |
e1c88a21 | 773 | h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride))); |
0da71265 MN |
774 | } |
775 | } | |
9f2d1b4f | 776 | |
9c6221ae GV |
777 | s->obmc_scratchpad = NULL; |
778 | ||
56edbd81 LM |
779 | if(!h->dequant4_coeff[0]) |
780 | init_dequant_tables(h); | |
781 | ||
0da71265 MN |
782 | return 0; |
783 | fail: | |
784 | free_tables(h); | |
785 | return -1; | |
786 | } | |
787 | ||
afebe2f7 AÖ |
788 | /** |
789 | * Mimic alloc_tables(), but for every context thread. | |
790 | */ | |
145061a1 MN |
791 | static void clone_tables(H264Context *dst, H264Context *src, int i){ |
792 | MpegEncContext * const s = &src->s; | |
793 | dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride; | |
afebe2f7 AÖ |
794 | dst->non_zero_count = src->non_zero_count; |
795 | dst->slice_table = src->slice_table; | |
796 | dst->cbp_table = src->cbp_table; | |
797 | dst->mb2b_xy = src->mb2b_xy; | |
d43c1922 | 798 | dst->mb2br_xy = src->mb2br_xy; |
afebe2f7 | 799 | dst->chroma_pred_mode_table = src->chroma_pred_mode_table; |
145061a1 MN |
800 | dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride; |
801 | dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride; | |
afebe2f7 | 802 | dst->direct_table = src->direct_table; |
fb823b77 | 803 | dst->list_counts = src->list_counts; |
afebe2f7 | 804 | |
afebe2f7 AÖ |
805 | dst->s.obmc_scratchpad = NULL; |
806 | ff_h264_pred_init(&dst->hpc, src->s.codec_id); | |
afebe2f7 AÖ |
807 | } |
808 | ||
809 | /** | |
810 | * Init context | |
811 | * Allocate buffers which are not shared amongst multiple threads. | |
812 | */ | |
813 | static int context_init(H264Context *h){ | |
d31dbec3 RP |
814 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) |
815 | FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) | |
afebe2f7 | 816 | |
145061a1 MN |
817 | h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] = |
818 | h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE; | |
819 | ||
afebe2f7 AÖ |
820 | return 0; |
821 | fail: | |
822 | return -1; // free_tables will clean up for us | |
823 | } | |
824 | ||
9855b2e3 MN |
825 | static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size); |
826 | ||
98a6fff9 | 827 | static av_cold void common_init(H264Context *h){ |
0da71265 | 828 | MpegEncContext * const s = &h->s; |
0da71265 MN |
829 | |
830 | s->width = s->avctx->width; | |
831 | s->height = s->avctx->height; | |
832 | s->codec_id= s->avctx->codec->id; | |
115329f1 | 833 | |
4693b031 | 834 | ff_h264dsp_init(&h->h264dsp); |
c92a30bb | 835 | ff_h264_pred_init(&h->hpc, s->codec_id); |
0da71265 | 836 | |
239ea04c | 837 | h->dequant_coeff_pps= -1; |
9a41c2c7 | 838 | s->unrestricted_mv=1; |
0da71265 | 839 | s->decode=1; //FIXME |
56edbd81 | 840 | |
a5805aa9 MN |
841 | dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early |
842 | ||
56edbd81 LM |
843 | memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t)); |
844 | memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t)); | |
0da71265 MN |
845 | } |
846 | ||
903d58f6 | 847 | av_cold int ff_h264_decode_init(AVCodecContext *avctx){ |
0da71265 MN |
848 | H264Context *h= avctx->priv_data; |
849 | MpegEncContext * const s = &h->s; | |
850 | ||
3edcacde | 851 | MPV_decode_defaults(s); |
115329f1 | 852 | |
0da71265 MN |
853 | s->avctx = avctx; |
854 | common_init(h); | |
855 | ||
856 | s->out_format = FMT_H264; | |
857 | s->workaround_bugs= avctx->workaround_bugs; | |
858 | ||
859 | // set defaults | |
0da71265 | 860 | // s->decode_mb= ff_h263_decode_mb; |
9a5a05d0 | 861 | s->quarter_sample = 1; |
47cd974a | 862 | if(!avctx->has_b_frames) |
0da71265 | 863 | s->low_delay= 1; |
7a9dba3c | 864 | |
580a7465 | 865 | avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; |
0da71265 | 866 | |
e1e94902 | 867 | ff_h264_decode_init_vlc(); |
115329f1 | 868 | |
afebe2f7 | 869 | h->thread_context[0] = h; |
18c7be65 | 870 | h->outputed_poc = INT_MIN; |
e4b8f1fa | 871 | h->prev_poc_msb= 1<<16; |
055a6aa7 | 872 | h->x264_build = -1; |
9c095463 | 873 | ff_h264_reset_sei(h); |
efd8c1f6 MN |
874 | if(avctx->codec_id == CODEC_ID_H264){ |
875 | if(avctx->ticks_per_frame == 1){ | |
876 | s->avctx->time_base.den *=2; | |
877 | } | |
19df37a8 | 878 | avctx->ticks_per_frame = 2; |
efd8c1f6 | 879 | } |
9855b2e3 MN |
880 | |
881 | if(avctx->extradata_size > 0 && avctx->extradata && *(char *)avctx->extradata == 1){ | |
882 | int i, cnt, nalsize; | |
883 | unsigned char *p = avctx->extradata; | |
884 | ||
885 | h->is_avc = 1; | |
886 | ||
887 | if(avctx->extradata_size < 7) { | |
888 | av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); | |
889 | return -1; | |
890 | } | |
891 | /* sps and pps in the avcC always have length coded with 2 bytes, | |
892 | so put a fake nal_length_size = 2 while parsing them */ | |
893 | h->nal_length_size = 2; | |
894 | // Decode sps from avcC | |
895 | cnt = *(p+5) & 0x1f; // Number of sps | |
896 | p += 6; | |
897 | for (i = 0; i < cnt; i++) { | |
898 | nalsize = AV_RB16(p) + 2; | |
899 | if(decode_nal_units(h, p, nalsize) < 0) { | |
900 | av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i); | |
901 | return -1; | |
902 | } | |
903 | p += nalsize; | |
904 | } | |
905 | // Decode pps from avcC | |
906 | cnt = *(p++); // Number of pps | |
907 | for (i = 0; i < cnt; i++) { | |
908 | nalsize = AV_RB16(p) + 2; | |
909 | if(decode_nal_units(h, p, nalsize) != nalsize) { | |
910 | av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i); | |
911 | return -1; | |
912 | } | |
913 | p += nalsize; | |
914 | } | |
915 | // Now store right nal length size, that will be use to parse all other nals | |
916 | h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1; | |
917 | } else { | |
918 | h->is_avc = 0; | |
919 | if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0) | |
920 | return -1; | |
921 | } | |
db8cb47d MN |
922 | if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){ |
923 | s->avctx->has_b_frames = h->sps.num_reorder_frames; | |
924 | s->low_delay = 0; | |
925 | } | |
9855b2e3 | 926 | |
0da71265 MN |
927 | return 0; |
928 | } | |
929 | ||
903d58f6 | 930 | int ff_h264_frame_start(H264Context *h){ |
0da71265 MN |
931 | MpegEncContext * const s = &h->s; |
932 | int i; | |
933 | ||
af8aa846 MN |
934 | if(MPV_frame_start(s, s->avctx) < 0) |
935 | return -1; | |
0da71265 | 936 | ff_er_frame_start(s); |
3a22d7fa JD |
937 | /* |
938 | * MPV_frame_start uses pict_type to derive key_frame. | |
939 | * This is incorrect for H.264; IDR markings must be used. | |
1412060e | 940 | * Zero here; IDR markings per slice in frame or fields are ORed in later. |
3a22d7fa JD |
941 | * See decode_nal_units(). |
942 | */ | |
943 | s->current_picture_ptr->key_frame= 0; | |
c173a088 | 944 | s->current_picture_ptr->mmco_reset= 0; |
0da71265 MN |
945 | |
946 | assert(s->linesize && s->uvlinesize); | |
947 | ||
948 | for(i=0; i<16; i++){ | |
949 | h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3); | |
6867a90b | 950 | h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3); |
0da71265 MN |
951 | } |
952 | for(i=0; i<4; i++){ | |
953 | h->block_offset[16+i]= | |
954 | h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); | |
6867a90b LLL |
955 | h->block_offset[24+16+i]= |
956 | h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); | |
0da71265 MN |
957 | } |
958 | ||
934b0821 LM |
959 | /* can't be in alloc_tables because linesize isn't known there. |
960 | * FIXME: redo bipred weight to not require extra buffer? */ | |
afebe2f7 AÖ |
961 | for(i = 0; i < s->avctx->thread_count; i++) |
962 | if(!h->thread_context[i]->s.obmc_scratchpad) | |
963 | h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); | |
5d18eaad | 964 | |
2ce1c2e0 | 965 | /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/ |
5820b90d | 966 | memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table)); |
934b0821 | 967 | |
0da71265 | 968 | // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; |
28bb9eb2 | 969 | |
1412060e | 970 | // We mark the current picture as non-reference after allocating it, so |
28bb9eb2 MN |
971 | // that if we break out due to an error it can be released automatically |
972 | // in the next MPV_frame_start(). | |
973 | // SVQ3 as well as most other codecs have only last/next/current and thus | |
974 | // get released even with set reference, besides SVQ3 and others do not | |
975 | // mark frames as reference later "naturally". | |
976 | if(s->codec_id != CODEC_ID_SVQ3) | |
977 | s->current_picture_ptr->reference= 0; | |
357282c6 MN |
978 | |
979 | s->current_picture_ptr->field_poc[0]= | |
980 | s->current_picture_ptr->field_poc[1]= INT_MAX; | |
5118c6c7 | 981 | assert(s->current_picture_ptr->long_ref==0); |
357282c6 | 982 | |
af8aa846 | 983 | return 0; |
0da71265 MN |
984 | } |
985 | ||
93cc10fa | 986 | static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ |
53c05b1e | 987 | MpegEncContext * const s = &h->s; |
0b69d625 | 988 | uint8_t *top_border; |
5f7f9719 | 989 | int top_idx = 1; |
115329f1 | 990 | |
53c05b1e MN |
991 | src_y -= linesize; |
992 | src_cb -= uvlinesize; | |
993 | src_cr -= uvlinesize; | |
994 | ||
5f7f9719 MN |
995 | if(!simple && FRAME_MBAFF){ |
996 | if(s->mb_y&1){ | |
5f7f9719 | 997 | if(!MB_MBAFF){ |
0b69d625 AS |
998 | top_border = h->top_borders[0][s->mb_x]; |
999 | AV_COPY128(top_border, src_y + 15*linesize); | |
49fb20cb | 1000 | if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
0b69d625 AS |
1001 | AV_COPY64(top_border+16, src_cb+7*uvlinesize); |
1002 | AV_COPY64(top_border+24, src_cr+7*uvlinesize); | |
5f7f9719 MN |
1003 | } |
1004 | } | |
c988f975 MN |
1005 | }else if(MB_MBAFF){ |
1006 | top_idx = 0; | |
1007 | }else | |
1008 | return; | |
5f7f9719 MN |
1009 | } |
1010 | ||
0b69d625 | 1011 | top_border = h->top_borders[top_idx][s->mb_x]; |
3b66c4c5 | 1012 | // There are two lines saved, the line above the the top macroblock of a pair, |
6867a90b | 1013 | // and the line above the bottom macroblock |
0b69d625 | 1014 | AV_COPY128(top_border, src_y + 16*linesize); |
53c05b1e | 1015 | |
49fb20cb | 1016 | if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
0b69d625 AS |
1017 | AV_COPY64(top_border+16, src_cb+8*uvlinesize); |
1018 | AV_COPY64(top_border+24, src_cr+8*uvlinesize); | |
53c05b1e MN |
1019 | } |
1020 | } | |
1021 | ||
93cc10fa | 1022 | static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){ |
53c05b1e | 1023 | MpegEncContext * const s = &h->s; |
b69378e2 AÖ |
1024 | int deblock_left; |
1025 | int deblock_top; | |
5f7f9719 | 1026 | int top_idx = 1; |
1e4f1c56 AS |
1027 | uint8_t *top_border_m1; |
1028 | uint8_t *top_border; | |
5f7f9719 MN |
1029 | |
1030 | if(!simple && FRAME_MBAFF){ | |
1031 | if(s->mb_y&1){ | |
c988f975 MN |
1032 | if(!MB_MBAFF) |
1033 | return; | |
5f7f9719 | 1034 | }else{ |
5f7f9719 MN |
1035 | top_idx = MB_MBAFF ? 0 : 1; |
1036 | } | |
5f7f9719 | 1037 | } |
b69378e2 AÖ |
1038 | |
1039 | if(h->deblocking_filter == 2) { | |
024bf79f MN |
1040 | deblock_left = h->left_type[0]; |
1041 | deblock_top = h->top_type; | |
b69378e2 AÖ |
1042 | } else { |
1043 | deblock_left = (s->mb_x > 0); | |
6c805007 | 1044 | deblock_top = (s->mb_y > !!MB_FIELD); |
b69378e2 | 1045 | } |
53c05b1e MN |
1046 | |
1047 | src_y -= linesize + 1; | |
1048 | src_cb -= uvlinesize + 1; | |
1049 | src_cr -= uvlinesize + 1; | |
1050 | ||
1e4f1c56 AS |
1051 | top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; |
1052 | top_border = h->top_borders[top_idx][s->mb_x]; | |
1053 | ||
0b69d625 AS |
1054 | #define XCHG(a,b,xchg)\ |
1055 | if (xchg) AV_SWAP64(b,a);\ | |
1056 | else AV_COPY64(b,a); | |
d89dc06a | 1057 | |
d89dc06a | 1058 | if(deblock_top){ |
c988f975 | 1059 | if(deblock_left){ |
0b69d625 | 1060 | XCHG(top_border_m1+8, src_y -7, 1); |
c988f975 | 1061 | } |
0b69d625 AS |
1062 | XCHG(top_border+0, src_y +1, xchg); |
1063 | XCHG(top_border+8, src_y +9, 1); | |
cad4368a | 1064 | if(s->mb_x+1 < s->mb_width){ |
0b69d625 | 1065 | XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1); |
43efd19a | 1066 | } |
53c05b1e | 1067 | } |
53c05b1e | 1068 | |
49fb20cb | 1069 | if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
d89dc06a | 1070 | if(deblock_top){ |
c988f975 | 1071 | if(deblock_left){ |
0b69d625 AS |
1072 | XCHG(top_border_m1+16, src_cb -7, 1); |
1073 | XCHG(top_border_m1+24, src_cr -7, 1); | |
c988f975 | 1074 | } |
0b69d625 AS |
1075 | XCHG(top_border+16, src_cb+1, 1); |
1076 | XCHG(top_border+24, src_cr+1, 1); | |
53c05b1e | 1077 | } |
53c05b1e MN |
1078 | } |
1079 | } | |
1080 | ||
5a6a6cc7 | 1081 | static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ |
0da71265 MN |
1082 | MpegEncContext * const s = &h->s; |
1083 | const int mb_x= s->mb_x; | |
1084 | const int mb_y= s->mb_y; | |
64514ee8 | 1085 | const int mb_xy= h->mb_xy; |
0da71265 MN |
1086 | const int mb_type= s->current_picture.mb_type[mb_xy]; |
1087 | uint8_t *dest_y, *dest_cb, *dest_cr; | |
1088 | int linesize, uvlinesize /*dct_offset*/; | |
1089 | int i; | |
6867a90b | 1090 | int *block_offset = &h->block_offset[0]; |
41e4055b | 1091 | const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass); |
8b6871ed | 1092 | /* is_h264 should always be true if SVQ3 is disabled. */ |
49fb20cb | 1093 | const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; |
36940eca | 1094 | void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); |
ef9d1d15 | 1095 | void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); |
0da71265 | 1096 | |
6120a343 MN |
1097 | dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; |
1098 | dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; | |
1099 | dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; | |
0da71265 | 1100 | |
a957c27b LM |
1101 | s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4); |
1102 | s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2); | |
1103 | ||
c988f975 MN |
1104 | h->list_counts[mb_xy]= h->list_count; |
1105 | ||
bd91fee3 | 1106 | if (!simple && MB_FIELD) { |
5d18eaad LM |
1107 | linesize = h->mb_linesize = s->linesize * 2; |
1108 | uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; | |
6867a90b | 1109 | block_offset = &h->block_offset[24]; |
1412060e | 1110 | if(mb_y&1){ //FIXME move out of this function? |
0da71265 | 1111 | dest_y -= s->linesize*15; |
6867a90b LLL |
1112 | dest_cb-= s->uvlinesize*7; |
1113 | dest_cr-= s->uvlinesize*7; | |
0da71265 | 1114 | } |
5d18eaad LM |
1115 | if(FRAME_MBAFF) { |
1116 | int list; | |
3425501d | 1117 | for(list=0; list<h->list_count; list++){ |
5d18eaad LM |
1118 | if(!USES_LIST(mb_type, list)) |
1119 | continue; | |
1120 | if(IS_16X16(mb_type)){ | |
1121 | int8_t *ref = &h->ref_cache[list][scan8[0]]; | |
1710856c | 1122 | fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1); |
5d18eaad LM |
1123 | }else{ |
1124 | for(i=0; i<16; i+=4){ | |
5d18eaad LM |
1125 | int ref = h->ref_cache[list][scan8[i]]; |
1126 | if(ref >= 0) | |
1710856c | 1127 | fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1); |
5d18eaad LM |
1128 | } |
1129 | } | |
1130 | } | |
1131 | } | |
0da71265 | 1132 | } else { |
5d18eaad LM |
1133 | linesize = h->mb_linesize = s->linesize; |
1134 | uvlinesize = h->mb_uvlinesize = s->uvlinesize; | |
0da71265 MN |
1135 | // dct_offset = s->linesize * 16; |
1136 | } | |
115329f1 | 1137 | |
bd91fee3 | 1138 | if (!simple && IS_INTRA_PCM(mb_type)) { |
c1708e8d MN |
1139 | for (i=0; i<16; i++) { |
1140 | memcpy(dest_y + i* linesize, h->mb + i*8, 16); | |
6fbcaaa0 | 1141 | } |
c1708e8d MN |
1142 | for (i=0; i<8; i++) { |
1143 | memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); | |
1144 | memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); | |
6fbcaaa0 | 1145 | } |
e7e09b49 LLL |
1146 | } else { |
1147 | if(IS_INTRA(mb_type)){ | |
5f7f9719 | 1148 | if(h->deblocking_filter) |
93cc10fa | 1149 | xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple); |
53c05b1e | 1150 | |
49fb20cb | 1151 | if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
c92a30bb KS |
1152 | h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); |
1153 | h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize); | |
e7e09b49 | 1154 | } |
0da71265 | 1155 | |
e7e09b49 | 1156 | if(IS_INTRA4x4(mb_type)){ |
bd91fee3 | 1157 | if(simple || !s->encoding){ |
43efd19a | 1158 | if(IS_8x8DCT(mb_type)){ |
1eb96035 MN |
1159 | if(transform_bypass){ |
1160 | idct_dc_add = | |
1161 | idct_add = s->dsp.add_pixels8; | |
dae006d7 | 1162 | }else{ |
4693b031 MR |
1163 | idct_dc_add = h->h264dsp.h264_idct8_dc_add; |
1164 | idct_add = h->h264dsp.h264_idct8_add; | |
1eb96035 | 1165 | } |
43efd19a LM |
1166 | for(i=0; i<16; i+=4){ |
1167 | uint8_t * const ptr= dest_y + block_offset[i]; | |
1168 | const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; | |
41e4055b MN |
1169 | if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ |
1170 | h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize); | |
1171 | }else{ | |
ac0623b2 MN |
1172 | const int nnz = h->non_zero_count_cache[ scan8[i] ]; |
1173 | h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, | |
1174 | (h->topright_samples_available<<i)&0x4000, linesize); | |
1175 | if(nnz){ | |
1176 | if(nnz == 1 && h->mb[i*16]) | |
1177 | idct_dc_add(ptr, h->mb + i*16, linesize); | |
1178 | else | |
1179 | idct_add (ptr, h->mb + i*16, linesize); | |
1180 | } | |
41e4055b | 1181 | } |
43efd19a | 1182 | } |
1eb96035 MN |
1183 | }else{ |
1184 | if(transform_bypass){ | |
1185 | idct_dc_add = | |
1186 | idct_add = s->dsp.add_pixels4; | |
1187 | }else{ | |
4693b031 MR |
1188 | idct_dc_add = h->h264dsp.h264_idct_dc_add; |
1189 | idct_add = h->h264dsp.h264_idct_add; | |
1eb96035 | 1190 | } |
aebb5d6d MN |
1191 | for(i=0; i<16; i++){ |
1192 | uint8_t * const ptr= dest_y + block_offset[i]; | |
1193 | const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; | |
e7e09b49 | 1194 | |
aebb5d6d MN |
1195 | if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ |
1196 | h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize); | |
1197 | }else{ | |
1198 | uint8_t *topright; | |
1199 | int nnz, tr; | |
1200 | if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ | |
1201 | const int topright_avail= (h->topright_samples_available<<i)&0x8000; | |
1202 | assert(mb_y || linesize <= block_offset[i]); | |
1203 | if(!topright_avail){ | |
1204 | tr= ptr[3 - linesize]*0x01010101; | |
1205 | topright= (uint8_t*) &tr; | |
1206 | }else | |
1207 | topright= ptr + 4 - linesize; | |
ac0623b2 | 1208 | }else |
aebb5d6d MN |
1209 | topright= NULL; |
1210 | ||
1211 | h->hpc.pred4x4[ dir ](ptr, topright, linesize); | |
1212 | nnz = h->non_zero_count_cache[ scan8[i] ]; | |
1213 | if(nnz){ | |
1214 | if(is_h264){ | |
1215 | if(nnz == 1 && h->mb[i*16]) | |
1216 | idct_dc_add(ptr, h->mb + i*16, linesize); | |
1217 | else | |
1218 | idct_add (ptr, h->mb + i*16, linesize); | |
1219 | }else | |
881b5b80 | 1220 | ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); |
aebb5d6d | 1221 | } |
ac0623b2 | 1222 | } |
41e4055b | 1223 | } |
8b82a956 | 1224 | } |
0da71265 | 1225 | } |
e7e09b49 | 1226 | }else{ |
c92a30bb | 1227 | h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); |
bd91fee3 | 1228 | if(is_h264){ |
36940eca | 1229 | if(!transform_bypass) |
93f0c0a4 | 1230 | h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]); |
36940eca | 1231 | }else |
881b5b80 | 1232 | ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); |
0da71265 | 1233 | } |
5f7f9719 | 1234 | if(h->deblocking_filter) |
93cc10fa | 1235 | xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); |
bd91fee3 | 1236 | }else if(is_h264){ |
e7e09b49 | 1237 | hl_motion(h, dest_y, dest_cb, dest_cr, |
2833fc46 LM |
1238 | s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, |
1239 | s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, | |
4693b031 | 1240 | h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab); |
0da71265 | 1241 | } |
e7e09b49 LLL |
1242 | |
1243 | ||
1244 | if(!IS_INTRA4x4(mb_type)){ | |
bd91fee3 | 1245 | if(is_h264){ |
ef9d1d15 | 1246 | if(IS_INTRA16x16(mb_type)){ |
2fd1f0e0 MN |
1247 | if(transform_bypass){ |
1248 | if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){ | |
0a8ca22f MN |
1249 | h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); |
1250 | }else{ | |
1251 | for(i=0; i<16; i++){ | |
1252 | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) | |
1eb96035 | 1253 | s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize); |
0a8ca22f | 1254 | } |
2fd1f0e0 MN |
1255 | } |
1256 | }else{ | |
4693b031 | 1257 | h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); |
41e4055b | 1258 | } |
49c084a7 | 1259 | }else if(h->cbp&15){ |
2fd1f0e0 | 1260 | if(transform_bypass){ |
0a8ca22f | 1261 | const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
1eb96035 | 1262 | idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; |
0a8ca22f | 1263 | for(i=0; i<16; i+=di){ |
62bc966f | 1264 | if(h->non_zero_count_cache[ scan8[i] ]){ |
ef9d1d15 | 1265 | idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); |
0a8ca22f | 1266 | } |
ef9d1d15 | 1267 | } |
2fd1f0e0 MN |
1268 | }else{ |
1269 | if(IS_8x8DCT(mb_type)){ | |
4693b031 | 1270 | h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); |
2fd1f0e0 | 1271 | }else{ |
4693b031 | 1272 | h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); |
2fd1f0e0 MN |
1273 | } |
1274 | } | |
4704097a | 1275 | } |
e7e09b49 LLL |
1276 | }else{ |
1277 | for(i=0; i<16; i++){ | |
1278 | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below | |
6867a90b | 1279 | uint8_t * const ptr= dest_y + block_offset[i]; |
881b5b80 | 1280 | ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); |
e7e09b49 | 1281 | } |
4704097a | 1282 | } |
0da71265 MN |
1283 | } |
1284 | } | |
0da71265 | 1285 | |
49fb20cb | 1286 | if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){ |
ef9d1d15 LM |
1287 | uint8_t *dest[2] = {dest_cb, dest_cr}; |
1288 | if(transform_bypass){ | |
96465b90 MN |
1289 | if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ |
1290 | h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize); | |
1291 | h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize); | |
1292 | }else{ | |
c25ac15a | 1293 | idct_add = s->dsp.add_pixels4; |
96465b90 MN |
1294 | for(i=16; i<16+8; i++){ |
1295 | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) | |
1296 | idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); | |
1297 | } | |
1298 | } | |
ef9d1d15 | 1299 | }else{ |
4691a77d AÖ |
1300 | chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); |
1301 | chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); | |
aebb5d6d | 1302 | if(is_h264){ |
4693b031 MR |
1303 | idct_add = h->h264dsp.h264_idct_add; |
1304 | idct_dc_add = h->h264dsp.h264_idct_dc_add; | |
ac0623b2 MN |
1305 | for(i=16; i<16+8; i++){ |
1306 | if(h->non_zero_count_cache[ scan8[i] ]) | |
1307 | idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); | |
1308 | else if(h->mb[i*16]) | |
1309 | idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); | |
1310 | } | |
aebb5d6d MN |
1311 | }else{ |
1312 | for(i=16; i<16+8; i++){ | |
1313 | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ | |
1314 | uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i]; | |
881b5b80 | 1315 | ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2); |
aebb5d6d | 1316 | } |
e7e09b49 | 1317 | } |
4704097a | 1318 | } |
0da71265 MN |
1319 | } |
1320 | } | |
1321 | } | |
c212fb0c MN |
1322 | if(h->cbp || IS_INTRA(mb_type)) |
1323 | s->dsp.clear_blocks(h->mb); | |
0da71265 MN |
1324 | } |
1325 | ||
0da71265 | 1326 | /** |
bd91fee3 AS |
1327 | * Process a macroblock; this case avoids checks for expensive uncommon cases. |
1328 | */ | |
1329 | static void hl_decode_mb_simple(H264Context *h){ | |
1330 | hl_decode_mb_internal(h, 1); | |
1331 | } | |
1332 | ||
1333 | /** | |
1334 | * Process a macroblock; this handles edge cases, such as interlacing. | |
1335 | */ | |
1336 | static void av_noinline hl_decode_mb_complex(H264Context *h){ | |
1337 | hl_decode_mb_internal(h, 0); | |
1338 | } | |
1339 | ||
903d58f6 | 1340 | void ff_h264_hl_decode_mb(H264Context *h){ |
bd91fee3 | 1341 | MpegEncContext * const s = &h->s; |
64514ee8 | 1342 | const int mb_xy= h->mb_xy; |
bd91fee3 | 1343 | const int mb_type= s->current_picture.mb_type[mb_xy]; |
49fb20cb | 1344 | int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; |
bd91fee3 | 1345 | |
bd91fee3 AS |
1346 | if (is_complex) |
1347 | hl_decode_mb_complex(h); | |
1348 | else hl_decode_mb_simple(h); | |
1349 | } | |
1350 | ||
0da71265 MN |
1351 | static int pred_weight_table(H264Context *h){ |
1352 | MpegEncContext * const s = &h->s; | |
1353 | int list, i; | |
9f2d1b4f | 1354 | int luma_def, chroma_def; |
115329f1 | 1355 | |
9f2d1b4f LM |
1356 | h->use_weight= 0; |
1357 | h->use_weight_chroma= 0; | |
0da71265 MN |
1358 | h->luma_log2_weight_denom= get_ue_golomb(&s->gb); |
1359 | h->chroma_log2_weight_denom= get_ue_golomb(&s->gb); | |
9f2d1b4f LM |
1360 | luma_def = 1<<h->luma_log2_weight_denom; |
1361 | chroma_def = 1<<h->chroma_log2_weight_denom; | |
0da71265 MN |
1362 | |
1363 | for(list=0; list<2; list++){ | |
cb99c652 GB |
1364 | h->luma_weight_flag[list] = 0; |
1365 | h->chroma_weight_flag[list] = 0; | |
0da71265 MN |
1366 | for(i=0; i<h->ref_count[list]; i++){ |
1367 | int luma_weight_flag, chroma_weight_flag; | |
115329f1 | 1368 | |
0da71265 MN |
1369 | luma_weight_flag= get_bits1(&s->gb); |
1370 | if(luma_weight_flag){ | |
3d9137c8 MN |
1371 | h->luma_weight[i][list][0]= get_se_golomb(&s->gb); |
1372 | h->luma_weight[i][list][1]= get_se_golomb(&s->gb); | |
1373 | if( h->luma_weight[i][list][0] != luma_def | |
1374 | || h->luma_weight[i][list][1] != 0) { | |
9f2d1b4f | 1375 | h->use_weight= 1; |
cb99c652 GB |
1376 | h->luma_weight_flag[list]= 1; |
1377 | } | |
9f2d1b4f | 1378 | }else{ |
3d9137c8 MN |
1379 | h->luma_weight[i][list][0]= luma_def; |
1380 | h->luma_weight[i][list][1]= 0; | |
0da71265 MN |
1381 | } |
1382 | ||
0af6967e | 1383 | if(CHROMA){ |
fef744d4 MN |
1384 | chroma_weight_flag= get_bits1(&s->gb); |
1385 | if(chroma_weight_flag){ | |
1386 | int j; | |
1387 | for(j=0; j<2; j++){ | |
3d9137c8 MN |
1388 | h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb); |
1389 | h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb); | |
1390 | if( h->chroma_weight[i][list][j][0] != chroma_def | |
1391 | || h->chroma_weight[i][list][j][1] != 0) { | |
fef744d4 | 1392 | h->use_weight_chroma= 1; |
cb99c652 GB |
1393 | h->chroma_weight_flag[list]= 1; |
1394 | } | |
fef744d4 MN |
1395 | } |
1396 | }else{ | |
1397 | int j; | |
1398 | for(j=0; j<2; j++){ | |
3d9137c8 MN |
1399 | h->chroma_weight[i][list][j][0]= chroma_def; |
1400 | h->chroma_weight[i][list][j][1]= 0; | |
fef744d4 | 1401 | } |
0da71265 MN |
1402 | } |
1403 | } | |
1404 | } | |
9f5c1037 | 1405 | if(h->slice_type_nos != FF_B_TYPE) break; |
0da71265 | 1406 | } |
9f2d1b4f | 1407 | h->use_weight= h->use_weight || h->use_weight_chroma; |
0da71265 MN |
1408 | return 0; |
1409 | } | |
1410 | ||
9f2d1b4f LM |
1411 | static void implicit_weight_table(H264Context *h){ |
1412 | MpegEncContext * const s = &h->s; | |
cb99c652 | 1413 | int ref0, ref1, i; |
9f2d1b4f LM |
1414 | int cur_poc = s->current_picture_ptr->poc; |
1415 | ||
ce09f927 GB |
1416 | for (i = 0; i < 2; i++) { |
1417 | h->luma_weight_flag[i] = 0; | |
1418 | h->chroma_weight_flag[i] = 0; | |
1419 | } | |
1420 | ||
9f2d1b4f LM |
1421 | if( h->ref_count[0] == 1 && h->ref_count[1] == 1 |
1422 | && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){ | |
1423 | h->use_weight= 0; | |
1424 | h->use_weight_chroma= 0; | |
1425 | return; | |
1426 | } | |
1427 | ||
1428 | h->use_weight= 2; | |
1429 | h->use_weight_chroma= 2; | |
1430 | h->luma_log2_weight_denom= 5; | |
1431 | h->chroma_log2_weight_denom= 5; | |
1432 | ||
9f2d1b4f LM |
1433 | for(ref0=0; ref0 < h->ref_count[0]; ref0++){ |
1434 | int poc0 = h->ref_list[0][ref0].poc; | |
1435 | for(ref1=0; ref1 < h->ref_count[1]; ref1++){ | |
738386a5 | 1436 | int poc1 = h->ref_list[1][ref1].poc; |
f66e4f5f | 1437 | int td = av_clip(poc1 - poc0, -128, 127); |
72f86ec0 | 1438 | h->implicit_weight[ref0][ref1] = 32; |
9f2d1b4f | 1439 | if(td){ |
f66e4f5f | 1440 | int tb = av_clip(cur_poc - poc0, -128, 127); |
c26abfa5 | 1441 | int tx = (16384 + (FFABS(td) >> 1)) / td; |
72f86ec0 MN |
1442 | int dist_scale_factor = (tb*tx + 32) >> 8; |
1443 | if(dist_scale_factor >= -64 && dist_scale_factor <= 128) | |
9f2d1b4f | 1444 | h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor; |
72f86ec0 | 1445 | } |
9f2d1b4f LM |
1446 | } |
1447 | } | |
1448 | } | |
1449 | ||
8fd57a66 | 1450 | /** |
5175b937 | 1451 | * instantaneous decoder refresh. |
0da71265 MN |
1452 | */ |
1453 | static void idr(H264Context *h){ | |
ea6f00c4 | 1454 | ff_h264_remove_all_refs(h); |
a149c1a5 | 1455 | h->prev_frame_num= 0; |
80f8e035 MN |
1456 | h->prev_frame_num_offset= 0; |
1457 | h->prev_poc_msb= | |
1458 | h->prev_poc_lsb= 0; | |
0da71265 MN |
1459 | } |
1460 | ||
7c33ad19 LM |
1461 | /* forget old pics after a seek */ |
1462 | static void flush_dpb(AVCodecContext *avctx){ | |
1463 | H264Context *h= avctx->priv_data; | |
1464 | int i; | |
64b9d48f | 1465 | for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) { |
285b570f LM |
1466 | if(h->delayed_pic[i]) |
1467 | h->delayed_pic[i]->reference= 0; | |
7c33ad19 | 1468 | h->delayed_pic[i]= NULL; |
285b570f | 1469 | } |
df8a7dff | 1470 | h->outputed_poc= INT_MIN; |
b19d493f | 1471 | h->prev_interlaced_frame = 1; |
7c33ad19 | 1472 | idr(h); |
ca159196 MR |
1473 | if(h->s.current_picture_ptr) |
1474 | h->s.current_picture_ptr->reference= 0; | |
12d96de3 | 1475 | h->s.first_field= 0; |
9c095463 | 1476 | ff_h264_reset_sei(h); |
e240f898 | 1477 | ff_mpeg_flush(avctx); |
7c33ad19 LM |
1478 | } |
1479 | ||
0da71265 MN |
1480 | static int init_poc(H264Context *h){ |
1481 | MpegEncContext * const s = &h->s; | |
1482 | const int max_frame_num= 1<<h->sps.log2_max_frame_num; | |
1483 | int field_poc[2]; | |
357282c6 | 1484 | Picture *cur = s->current_picture_ptr; |
0da71265 | 1485 | |
b78a6baa | 1486 | h->frame_num_offset= h->prev_frame_num_offset; |
5710b371 | 1487 | if(h->frame_num < h->prev_frame_num) |
b78a6baa | 1488 | h->frame_num_offset += max_frame_num; |
0da71265 MN |
1489 | |
1490 | if(h->sps.poc_type==0){ | |
1491 | const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb; | |
1492 | ||
1493 | if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2) | |
1494 | h->poc_msb = h->prev_poc_msb + max_poc_lsb; | |
1495 | else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2) | |
1496 | h->poc_msb = h->prev_poc_msb - max_poc_lsb; | |
1497 | else | |
1498 | h->poc_msb = h->prev_poc_msb; | |
1499 | //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb); | |
115329f1 | 1500 | field_poc[0] = |
0da71265 | 1501 | field_poc[1] = h->poc_msb + h->poc_lsb; |
115329f1 | 1502 | if(s->picture_structure == PICT_FRAME) |
0da71265 MN |
1503 | field_poc[1] += h->delta_poc_bottom; |
1504 | }else if(h->sps.poc_type==1){ | |
1505 | int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc; | |
1506 | int i; | |
1507 | ||
1508 | if(h->sps.poc_cycle_length != 0) | |
1509 | abs_frame_num = h->frame_num_offset + h->frame_num; | |
1510 | else | |
1511 | abs_frame_num = 0; | |
1512 | ||
1513 | if(h->nal_ref_idc==0 && abs_frame_num > 0) | |
1514 | abs_frame_num--; | |
115329f1 | 1515 | |
0da71265 MN |
1516 | expected_delta_per_poc_cycle = 0; |
1517 | for(i=0; i < h->sps.poc_cycle_length; i++) | |
1518 | expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse | |
1519 | ||
1520 | if(abs_frame_num > 0){ | |
1521 | int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length; | |
1522 | int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length; | |
1523 | ||
1524 | expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle; | |
1525 | for(i = 0; i <= frame_num_in_poc_cycle; i++) | |
1526 | expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ]; | |
1527 | } else | |
1528 | expectedpoc = 0; | |
1529 | ||
115329f1 | 1530 | if(h->nal_ref_idc == 0) |
0da71265 | 1531 | expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic; |
115329f1 | 1532 | |
0da71265 MN |
1533 | field_poc[0] = expectedpoc + h->delta_poc[0]; |
1534 | field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field; | |
1535 | ||
1536 | if(s->picture_structure == PICT_FRAME) | |
1537 | field_poc[1] += h->delta_poc[1]; | |
1538 | }else{ | |
b78a6baa | 1539 | int poc= 2*(h->frame_num_offset + h->frame_num); |
5710b371 | 1540 | |
b78a6baa MN |
1541 | if(!h->nal_ref_idc) |
1542 | poc--; | |
5710b371 | 1543 | |
0da71265 MN |
1544 | field_poc[0]= poc; |
1545 | field_poc[1]= poc; | |
1546 | } | |
115329f1 | 1547 | |
357282c6 | 1548 | if(s->picture_structure != PICT_BOTTOM_FIELD) |
0da71265 | 1549 | s->current_picture_ptr->field_poc[0]= field_poc[0]; |
357282c6 | 1550 | if(s->picture_structure != PICT_TOP_FIELD) |
0da71265 | 1551 | s->current_picture_ptr->field_poc[1]= field_poc[1]; |
357282c6 | 1552 | cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]); |
0da71265 MN |
1553 | |
1554 | return 0; | |
1555 | } | |
1556 | ||
b41c1db3 AÖ |
1557 | |
1558 | /** | |
1559 | * initialize scan tables | |
1560 | */ | |
1561 | static void init_scan_tables(H264Context *h){ | |
b41c1db3 | 1562 | int i; |
4693b031 | 1563 | if(h->h264dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly |
b41c1db3 AÖ |
1564 | memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t)); |
1565 | memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t)); | |
1566 | }else{ | |
1567 | for(i=0; i<16; i++){ | |
1568 | #define T(x) (x>>2) | ((x<<2) & 0xF) | |
1569 | h->zigzag_scan[i] = T(zigzag_scan[i]); | |
1570 | h-> field_scan[i] = T( field_scan[i]); | |
1571 | #undef T | |
1572 | } | |
1573 | } | |
4693b031 | 1574 | if(h->h264dsp.h264_idct8_add == ff_h264_idct8_add_c){ |
45beb850 | 1575 | memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t)); |
b41c1db3 AÖ |
1576 | memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t)); |
1577 | memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t)); | |
1578 | memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t)); | |
1579 | }else{ | |
1580 | for(i=0; i<64; i++){ | |
1581 | #define T(x) (x>>3) | ((x&7)<<3) | |
45beb850 | 1582 | h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); |
b41c1db3 AÖ |
1583 | h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); |
1584 | h->field_scan8x8[i] = T(field_scan8x8[i]); | |
1585 | h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); | |
1586 | #undef T | |
1587 | } | |
1588 | } | |
1589 | if(h->sps.transform_bypass){ //FIXME same ugly | |
1590 | h->zigzag_scan_q0 = zigzag_scan; | |
45beb850 | 1591 | h->zigzag_scan8x8_q0 = ff_zigzag_direct; |
b41c1db3 AÖ |
1592 | h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; |
1593 | h->field_scan_q0 = field_scan; | |
1594 | h->field_scan8x8_q0 = field_scan8x8; | |
1595 | h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc; | |
1596 | }else{ | |
1597 | h->zigzag_scan_q0 = h->zigzag_scan; | |
1598 | h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; | |
1599 | h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; | |
1600 | h->field_scan_q0 = h->field_scan; | |
1601 | h->field_scan8x8_q0 = h->field_scan8x8; | |
1602 | h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; | |
1603 | } | |
1604 | } | |
afebe2f7 | 1605 | |
256299d3 MN |
1606 | static void field_end(H264Context *h){ |
1607 | MpegEncContext * const s = &h->s; | |
1608 | AVCodecContext * const avctx= s->avctx; | |
1609 | s->mb_y= 0; | |
1610 | ||
1611 | s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264; | |
1612 | s->current_picture_ptr->pict_type= s->pict_type; | |
1613 | ||
1614 | if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) | |
1615 | ff_vdpau_h264_set_reference_frames(s); | |
1616 | ||
1617 | if(!s->dropable) { | |
ea6f00c4 | 1618 | ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
256299d3 MN |
1619 | h->prev_poc_msb= h->poc_msb; |
1620 | h->prev_poc_lsb= h->poc_lsb; | |
1621 | } | |
1622 | h->prev_frame_num_offset= h->frame_num_offset; | |
1623 | h->prev_frame_num= h->frame_num; | |
1624 | ||
1625 | if (avctx->hwaccel) { | |
1626 | if (avctx->hwaccel->end_frame(avctx) < 0) | |
1627 | av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n"); | |
1628 | } | |
1629 | ||
1630 | if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) | |
1631 | ff_vdpau_h264_picture_complete(s); | |
1632 | ||
1633 | /* | |
1634 | * FIXME: Error handling code does not seem to support interlaced | |
1635 | * when slices span multiple rows | |
1636 | * The ff_er_add_slice calls don't work right for bottom | |
1637 | * fields; they cause massive erroneous error concealing | |
1638 | * Error marking covers both fields (top and bottom). | |
1639 | * This causes a mismatched s->error_count | |
1640 | * and a bad error table. Further, the error count goes to | |
1641 | * INT_MAX when called for bottom field, because mb_y is | |
1642 | * past end by one (callers fault) and resync_mb_y != 0 | |
1643 | * causes problems for the first MB line, too. | |
1644 | */ | |
1645 | if (!FIELD_PICTURE) | |
1646 | ff_er_frame_end(s); | |
1647 | ||
1648 | MPV_frame_end(s); | |
d225a1e2 MN |
1649 | |
1650 | h->current_slice=0; | |
256299d3 MN |
1651 | } |
1652 | ||
afebe2f7 AÖ |
1653 | /** |
1654 | * Replicates H264 "master" context to thread contexts. | |
1655 | */ | |
1656 | static void clone_slice(H264Context *dst, H264Context *src) | |
1657 | { | |
1658 | memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset)); | |
1659 | dst->s.current_picture_ptr = src->s.current_picture_ptr; | |
1660 | dst->s.current_picture = src->s.current_picture; | |
1661 | dst->s.linesize = src->s.linesize; | |
1662 | dst->s.uvlinesize = src->s.uvlinesize; | |
12d96de3 | 1663 | dst->s.first_field = src->s.first_field; |
afebe2f7 AÖ |
1664 | |
1665 | dst->prev_poc_msb = src->prev_poc_msb; | |
1666 | dst->prev_poc_lsb = src->prev_poc_lsb; | |
1667 | dst->prev_frame_num_offset = src->prev_frame_num_offset; | |
1668 | dst->prev_frame_num = src->prev_frame_num; | |
1669 | dst->short_ref_count = src->short_ref_count; | |
1670 | ||
1671 | memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref)); | |
1672 | memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref)); | |
1673 | memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list)); | |
1674 | memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list)); | |
50c21814 AÖ |
1675 | |
1676 | memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff)); | |
1677 | memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff)); | |
afebe2f7 AÖ |
1678 | } |
1679 | ||
0da71265 MN |
1680 | /** |
1681 | * decodes a slice header. | |
9c852bcf | 1682 | * This will also call MPV_common_init() and frame_start() as needed. |
afebe2f7 AÖ |
1683 | * |
1684 | * @param h h264context | |
1685 | * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding) | |
1686 | * | |
d9526386 | 1687 | * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded |
0da71265 | 1688 | */ |
afebe2f7 | 1689 | static int decode_slice_header(H264Context *h, H264Context *h0){ |
0da71265 | 1690 | MpegEncContext * const s = &h->s; |
12d96de3 | 1691 | MpegEncContext * const s0 = &h0->s; |
88e7a4d1 | 1692 | unsigned int first_mb_in_slice; |
ac658be5 | 1693 | unsigned int pps_id; |
0da71265 | 1694 | int num_ref_idx_active_override_flag; |
41f5c62f | 1695 | unsigned int slice_type, tmp, i, j; |
0bf79634 | 1696 | int default_ref_list_done = 0; |
12d96de3 | 1697 | int last_pic_structure; |
0da71265 | 1698 | |
2f944356 | 1699 | s->dropable= h->nal_ref_idc == 0; |
0da71265 | 1700 | |
cf653d08 JD |
1701 | if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){ |
1702 | s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab; | |
1703 | s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab; | |
1704 | }else{ | |
1705 | s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab; | |
1706 | s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab; | |
1707 | } | |
1708 | ||
0da71265 MN |
1709 | first_mb_in_slice= get_ue_golomb(&s->gb); |
1710 | ||
d225a1e2 MN |
1711 | if(first_mb_in_slice == 0){ //FIXME better field boundary detection |
1712 | if(h0->current_slice && FIELD_PICTURE){ | |
1713 | field_end(h); | |
1714 | } | |
1715 | ||
afebe2f7 | 1716 | h0->current_slice = 0; |
12d96de3 | 1717 | if (!s0->first_field) |
f6e3c460 | 1718 | s->current_picture_ptr= NULL; |
66a4b2c1 MN |
1719 | } |
1720 | ||
9963b332 | 1721 | slice_type= get_ue_golomb_31(&s->gb); |
0bf79634 | 1722 | if(slice_type > 9){ |
9b879566 | 1723 | av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y); |
5175b937 | 1724 | return -1; |
0da71265 | 1725 | } |
0bf79634 LLL |
1726 | if(slice_type > 4){ |
1727 | slice_type -= 5; | |
0da71265 MN |
1728 | h->slice_type_fixed=1; |
1729 | }else | |
1730 | h->slice_type_fixed=0; | |
115329f1 | 1731 | |
ee2a957f | 1732 | slice_type= golomb_to_pict_type[ slice_type ]; |
9701840b | 1733 | if (slice_type == FF_I_TYPE |
afebe2f7 | 1734 | || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) { |
0bf79634 LLL |
1735 | default_ref_list_done = 1; |
1736 | } | |
1737 | h->slice_type= slice_type; | |
e3e6f18f | 1738 | h->slice_type_nos= slice_type & 3; |
0bf79634 | 1739 | |
1412060e | 1740 | s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though |
115329f1 | 1741 | |
0da71265 | 1742 | pps_id= get_ue_golomb(&s->gb); |
ac658be5 | 1743 | if(pps_id>=MAX_PPS_COUNT){ |
9b879566 | 1744 | av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n"); |
0da71265 MN |
1745 | return -1; |
1746 | } | |
afebe2f7 | 1747 | if(!h0->pps_buffers[pps_id]) { |
a0f80050 | 1748 | av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id); |
8b92b792 MN |
1749 | return -1; |
1750 | } | |
afebe2f7 | 1751 | h->pps= *h0->pps_buffers[pps_id]; |
8b92b792 | 1752 | |
afebe2f7 | 1753 | if(!h0->sps_buffers[h->pps.sps_id]) { |
a0f80050 | 1754 | av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id); |
8b92b792 MN |
1755 | return -1; |
1756 | } | |
afebe2f7 | 1757 | h->sps = *h0->sps_buffers[h->pps.sps_id]; |
239ea04c | 1758 | |
50c21814 | 1759 | if(h == h0 && h->dequant_coeff_pps != pps_id){ |
50eaa857 | 1760 | h->dequant_coeff_pps = pps_id; |
239ea04c LM |
1761 | init_dequant_tables(h); |
1762 | } | |
115329f1 | 1763 | |
0da71265 | 1764 | s->mb_width= h->sps.mb_width; |
6867a90b | 1765 | s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); |
115329f1 | 1766 | |
bf4665ee | 1767 | h->b_stride= s->mb_width*4; |
0da71265 | 1768 | |
faf3dfb9 | 1769 | s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7); |
0da71265 | 1770 | if(h->sps.frame_mbs_only_flag) |
faf3dfb9 | 1771 | s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7); |
0da71265 | 1772 | else |
faf3dfb9 | 1773 | s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3); |
115329f1 DB |
1774 | |
1775 | if (s->context_initialized | |
5388f0b4 JK |
1776 | && ( s->width != s->avctx->width || s->height != s->avctx->height |
1777 | || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) { | |
afebe2f7 AÖ |
1778 | if(h != h0) |
1779 | return -1; // width / height changed during parallelized decoding | |
0da71265 | 1780 | free_tables(h); |
ff7f75e1 | 1781 | flush_dpb(s->avctx); |
0da71265 MN |
1782 | MPV_common_end(s); |
1783 | } | |
1784 | if (!s->context_initialized) { | |
afebe2f7 AÖ |
1785 | if(h != h0) |
1786 | return -1; // we cant (re-)initialize context during parallel decoding | |
f3bdc3da RD |
1787 | |
1788 | avcodec_set_dimensions(s->avctx, s->width, s->height); | |
1789 | s->avctx->sample_aspect_ratio= h->sps.sar; | |
1790 | if(!s->avctx->sample_aspect_ratio.den) | |
1791 | s->avctx->sample_aspect_ratio.den = 1; | |
1792 | ||
c4dffe7e DC |
1793 | if(h->sps.video_signal_type_present_flag){ |
1794 | s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; | |
1795 | if(h->sps.colour_description_present_flag){ | |
1796 | s->avctx->color_primaries = h->sps.color_primaries; | |
1797 | s->avctx->color_trc = h->sps.color_trc; | |
1798 | s->avctx->colorspace = h->sps.colorspace; | |
1799 | } | |
1800 | } | |
1801 | ||
f3bdc3da | 1802 | if(h->sps.timing_info_present_flag){ |
3102d180 | 1803 | int64_t den= h->sps.time_scale; |
055a6aa7 | 1804 | if(h->x264_build < 44U) |
3102d180 | 1805 | den *= 2; |
f3bdc3da | 1806 | av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, |
3102d180 | 1807 | h->sps.num_units_in_tick, den, 1<<30); |
f3bdc3da RD |
1808 | } |
1809 | s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts); | |
1810 | s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt); | |
1811 | ||
0da71265 MN |
1812 | if (MPV_common_init(s) < 0) |
1813 | return -1; | |
12d96de3 | 1814 | s->first_field = 0; |
b19d493f | 1815 | h->prev_interlaced_frame = 1; |
115329f1 | 1816 | |
b41c1db3 | 1817 | init_scan_tables(h); |
903d58f6 | 1818 | ff_h264_alloc_tables(h); |
0da71265 | 1819 | |
afebe2f7 AÖ |
1820 | for(i = 1; i < s->avctx->thread_count; i++) { |
1821 | H264Context *c; | |
1822 | c = h->thread_context[i] = av_malloc(sizeof(H264Context)); | |
79db7ac6 | 1823 | memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext)); |
afebe2f7 AÖ |
1824 | memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext)); |
1825 | c->sps = h->sps; | |
1826 | c->pps = h->pps; | |
1827 | init_scan_tables(c); | |
145061a1 | 1828 | clone_tables(c, h, i); |
afebe2f7 AÖ |
1829 | } |
1830 | ||
1831 | for(i = 0; i < s->avctx->thread_count; i++) | |
1832 | if(context_init(h->thread_context[i]) < 0) | |
1833 | return -1; | |
0da71265 MN |
1834 | } |
1835 | ||
0da71265 MN |
1836 | h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); |
1837 | ||
5d18eaad | 1838 | h->mb_mbaff = 0; |
6ba71fc4 | 1839 | h->mb_aff_frame = 0; |
12d96de3 | 1840 | last_pic_structure = s0->picture_structure; |
0da71265 MN |
1841 | if(h->sps.frame_mbs_only_flag){ |
1842 | s->picture_structure= PICT_FRAME; | |
1843 | }else{ | |
6ba71fc4 | 1844 | if(get_bits1(&s->gb)) { //field_pic_flag |
0da71265 | 1845 | s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag |
6ba71fc4 | 1846 | } else { |
0da71265 | 1847 | s->picture_structure= PICT_FRAME; |
6ba71fc4 | 1848 | h->mb_aff_frame = h->sps.mb_aff; |
6867a90b | 1849 | } |
0da71265 | 1850 | } |
44e9dcf1 | 1851 | h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME; |
2ddcf84b JD |
1852 | |
1853 | if(h0->current_slice == 0){ | |
26b86e47 MN |
1854 | while(h->frame_num != h->prev_frame_num && |
1855 | h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){ | |
1856 | av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num); | |
903d58f6 | 1857 | if (ff_h264_frame_start(h) < 0) |
66e6038c | 1858 | return -1; |
26b86e47 MN |
1859 | h->prev_frame_num++; |
1860 | h->prev_frame_num %= 1<<h->sps.log2_max_frame_num; | |
1861 | s->current_picture_ptr->frame_num= h->prev_frame_num; | |
ea6f00c4 | 1862 | ff_h264_execute_ref_pic_marking(h, NULL, 0); |
26b86e47 MN |
1863 | } |
1864 | ||
12d96de3 JD |
1865 | /* See if we have a decoded first field looking for a pair... */ |
1866 | if (s0->first_field) { | |
1867 | assert(s0->current_picture_ptr); | |
1868 | assert(s0->current_picture_ptr->data[0]); | |
1869 | assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF); | |
1870 | ||
1871 | /* figure out if we have a complementary field pair */ | |
1872 | if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) { | |
1873 | /* | |
1874 | * Previous field is unmatched. Don't display it, but let it | |
1875 | * remain for reference if marked as such. | |
1876 | */ | |
1877 | s0->current_picture_ptr = NULL; | |
1878 | s0->first_field = FIELD_PICTURE; | |
1879 | ||
1880 | } else { | |
1881 | if (h->nal_ref_idc && | |
1882 | s0->current_picture_ptr->reference && | |
1883 | s0->current_picture_ptr->frame_num != h->frame_num) { | |
1884 | /* | |
1885 | * This and previous field were reference, but had | |
1886 | * different frame_nums. Consider this field first in | |
1887 | * pair. Throw away previous field except for reference | |
1888 | * purposes. | |
1889 | */ | |
1890 | s0->first_field = 1; | |
1891 | s0->current_picture_ptr = NULL; | |
1892 | ||
1893 | } else { | |
1894 | /* Second field in complementary pair */ | |
1895 | s0->first_field = 0; | |
1896 | } | |
1897 | } | |
1898 | ||
1899 | } else { | |
1900 | /* Frame or first field in a potentially complementary pair */ | |
1901 | assert(!s0->current_picture_ptr); | |
1902 | s0->first_field = FIELD_PICTURE; | |
1903 | } | |
1904 | ||
903d58f6 | 1905 | if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) { |
12d96de3 | 1906 | s0->first_field = 0; |
2ddcf84b | 1907 | return -1; |
12d96de3 | 1908 | } |
2ddcf84b JD |
1909 | } |
1910 | if(h != h0) | |
1911 | clone_slice(h, h0); | |
1912 | ||
1913 | s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup | |
1914 | ||
88e7a4d1 | 1915 | assert(s->mb_num == s->mb_width * s->mb_height); |
f3e53d9f | 1916 | if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num || |
88e7a4d1 MN |
1917 | first_mb_in_slice >= s->mb_num){ |
1918 | av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n"); | |
6b53b87e MN |
1919 | return -1; |
1920 | } | |
88e7a4d1 | 1921 | s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width; |
f3e53d9f JD |
1922 | s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE; |
1923 | if (s->picture_structure == PICT_BOTTOM_FIELD) | |
1924 | s->resync_mb_y = s->mb_y = s->mb_y + 1; | |
88e7a4d1 | 1925 | assert(s->mb_y < s->mb_height); |
115329f1 | 1926 | |
0da71265 MN |
1927 | if(s->picture_structure==PICT_FRAME){ |
1928 | h->curr_pic_num= h->frame_num; | |
1929 | h->max_pic_num= 1<< h->sps.log2_max_frame_num; | |
1930 | }else{ | |
f57e2af6 | 1931 | h->curr_pic_num= 2*h->frame_num + 1; |
0da71265 MN |
1932 | h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1); |
1933 | } | |
115329f1 | 1934 | |
0da71265 | 1935 | if(h->nal_unit_type == NAL_IDR_SLICE){ |
1df1df0b | 1936 | get_ue_golomb(&s->gb); /* idr_pic_id */ |
0da71265 | 1937 | } |
115329f1 | 1938 | |
0da71265 MN |
1939 | if(h->sps.poc_type==0){ |
1940 | h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb); | |
115329f1 | 1941 | |
0da71265 MN |
1942 | if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){ |
1943 | h->delta_poc_bottom= get_se_golomb(&s->gb); | |
1944 | } | |
1945 | } | |
115329f1 | 1946 | |
0da71265 MN |
1947 | if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){ |
1948 | h->delta_poc[0]= get_se_golomb(&s->gb); | |
115329f1 | 1949 | |
0da71265 MN |
1950 | if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME) |
1951 | h->delta_poc[1]= get_se_golomb(&s->gb); | |
1952 | } | |
115329f1 | 1953 | |
0da71265 | 1954 | init_poc(h); |
115329f1 | 1955 | |
0da71265 MN |
1956 | if(h->pps.redundant_pic_cnt_present){ |
1957 | h->redundant_pic_count= get_ue_golomb(&s->gb); | |
1958 | } | |
1959 | ||
1412060e | 1960 | //set defaults, might be overridden a few lines later |
0da71265 MN |
1961 | h->ref_count[0]= h->pps.ref_count[0]; |
1962 | h->ref_count[1]= h->pps.ref_count[1]; | |
1963 | ||
e3e6f18f | 1964 | if(h->slice_type_nos != FF_I_TYPE){ |
9f5c1037 | 1965 | if(h->slice_type_nos == FF_B_TYPE){ |
0da71265 MN |
1966 | h->direct_spatial_mv_pred= get_bits1(&s->gb); |
1967 | } | |
1968 | num_ref_idx_active_override_flag= get_bits1(&s->gb); | |
115329f1 | 1969 | |
0da71265 MN |
1970 | if(num_ref_idx_active_override_flag){ |
1971 | h->ref_count[0]= get_ue_golomb(&s->gb) + 1; | |
9f5c1037 | 1972 | if(h->slice_type_nos==FF_B_TYPE) |
0da71265 MN |
1973 | h->ref_count[1]= get_ue_golomb(&s->gb) + 1; |
1974 | ||
187696fa | 1975 | if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){ |
9b879566 | 1976 | av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n"); |
88e7a4d1 | 1977 | h->ref_count[0]= h->ref_count[1]= 1; |
0da71265 MN |
1978 | return -1; |
1979 | } | |
1980 | } | |
9f5c1037 | 1981 | if(h->slice_type_nos == FF_B_TYPE) |
187696fa MN |
1982 | h->list_count= 2; |
1983 | else | |
1984 | h->list_count= 1; | |
1985 | }else | |
1986 | h->list_count= 0; | |
0da71265 | 1987 | |
0bf79634 | 1988 | if(!default_ref_list_done){ |
ea6f00c4 | 1989 | ff_h264_fill_default_ref_list(h); |
0da71265 MN |
1990 | } |
1991 | ||
ea6f00c4 | 1992 | if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0) |
806bb93f | 1993 | return -1; |
0da71265 | 1994 | |
07dff5c7 MN |
1995 | if(h->slice_type_nos!=FF_I_TYPE){ |
1996 | s->last_picture_ptr= &h->ref_list[0][0]; | |
8d2fc163 | 1997 | ff_copy_picture(&s->last_picture, s->last_picture_ptr); |
07dff5c7 MN |
1998 | } |
1999 | if(h->slice_type_nos==FF_B_TYPE){ | |
2000 | s->next_picture_ptr= &h->ref_list[1][0]; | |
8d2fc163 | 2001 | ff_copy_picture(&s->next_picture, s->next_picture_ptr); |
07dff5c7 MN |
2002 | } |
2003 | ||
932f396f | 2004 | if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE ) |
9f5c1037 | 2005 | || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) ) |
0da71265 | 2006 | pred_weight_table(h); |
1a29c6a0 | 2007 | else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){ |
9f2d1b4f | 2008 | implicit_weight_table(h); |
1a29c6a0 | 2009 | }else { |
9f2d1b4f | 2010 | h->use_weight = 0; |
cb99c652 GB |
2011 | for (i = 0; i < 2; i++) { |
2012 | h->luma_weight_flag[i] = 0; | |
2013 | h->chroma_weight_flag[i] = 0; | |
2014 | } | |
2015 | } | |
115329f1 | 2016 | |
2ddcf84b | 2017 | if(h->nal_ref_idc) |
ea6f00c4 | 2018 | ff_h264_decode_ref_pic_marking(h0, &s->gb); |
0da71265 | 2019 | |
5d18eaad | 2020 | if(FRAME_MBAFF) |
ea6f00c4 | 2021 | ff_h264_fill_mbaff_ref_list(h); |
5d18eaad | 2022 | |
8f56e219 | 2023 | if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred) |
943f69a6 MN |
2024 | ff_h264_direct_dist_scale_factor(h); |
2025 | ff_h264_direct_ref_list_init(h); | |
8f56e219 | 2026 | |
e3e6f18f | 2027 | if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){ |
9963b332 | 2028 | tmp = get_ue_golomb_31(&s->gb); |
88e7a4d1 MN |
2029 | if(tmp > 2){ |
2030 | av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n"); | |
2031 | return -1; | |
2032 | } | |
2033 | h->cabac_init_idc= tmp; | |
2034 | } | |
e5017ab8 LA |
2035 | |
2036 | h->last_qscale_diff = 0; | |
88e7a4d1 MN |
2037 | tmp = h->pps.init_qp + get_se_golomb(&s->gb); |
2038 | if(tmp>51){ | |
2039 | av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp); | |
3ebc7e04 MN |
2040 | return -1; |
2041 | } | |
88e7a4d1 | 2042 | s->qscale= tmp; |
4691a77d AÖ |
2043 | h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); |
2044 | h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); | |
0da71265 | 2045 | //FIXME qscale / qp ... stuff |
9701840b | 2046 | if(h->slice_type == FF_SP_TYPE){ |
1df1df0b | 2047 | get_bits1(&s->gb); /* sp_for_switch_flag */ |
0da71265 | 2048 | } |
9701840b | 2049 | if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){ |
1df1df0b | 2050 | get_se_golomb(&s->gb); /* slice_qs_delta */ |
0da71265 MN |
2051 | } |
2052 | ||
53c05b1e | 2053 | h->deblocking_filter = 1; |
0c32e19d MN |
2054 | h->slice_alpha_c0_offset = 52; |
2055 | h->slice_beta_offset = 52; | |
0da71265 | 2056 | if( h->pps.deblocking_filter_parameters_present ) { |
9963b332 | 2057 | tmp= get_ue_golomb_31(&s->gb); |
88e7a4d1 MN |
2058 | if(tmp > 2){ |
2059 | av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp); | |
2060 | return -1; | |
2061 | } | |
2062 | h->deblocking_filter= tmp; | |
115329f1 | 2063 | if(h->deblocking_filter < 2) |
53c05b1e MN |
2064 | h->deblocking_filter^= 1; // 1<->0 |
2065 | ||
2066 | if( h->deblocking_filter ) { | |
0c32e19d MN |
2067 | h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1; |
2068 | h->slice_beta_offset += get_se_golomb(&s->gb) << 1; | |
2069 | if( h->slice_alpha_c0_offset > 104U | |
2070 | || h->slice_beta_offset > 104U){ | |
2071 | av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset); | |
2072 | return -1; | |
2073 | } | |
0da71265 | 2074 | } |
980a82b7 | 2075 | } |
afebe2f7 | 2076 | |
61858a76 | 2077 | if( s->avctx->skip_loop_filter >= AVDISCARD_ALL |
4b30289e | 2078 | ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE) |
9f5c1037 | 2079 | ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE) |
61858a76 RD |
2080 | ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) |
2081 | h->deblocking_filter= 0; | |
2082 | ||
afebe2f7 | 2083 | if(h->deblocking_filter == 1 && h0->max_contexts > 1) { |
ec970c21 AÖ |
2084 | if(s->avctx->flags2 & CODEC_FLAG2_FAST) { |
2085 | /* Cheat slightly for speed: | |
5d81d641 | 2086 | Do not bother to deblock across slices. */ |
ec970c21 AÖ |
2087 | h->deblocking_filter = 2; |
2088 | } else { | |
7ae94d52 AÖ |
2089 | h0->max_contexts = 1; |
2090 | if(!h0->single_decode_warning) { | |
2091 | av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); | |
2092 | h0->single_decode_warning = 1; | |
2093 | } | |
2094 | if(h != h0) | |
2095 | return 1; // deblocking switched inside frame | |
ec970c21 | 2096 | } |
afebe2f7 | 2097 | } |
0c32e19d | 2098 | h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]); |
afebe2f7 | 2099 | |
0da71265 MN |
2100 | #if 0 //FMO |
2101 | if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5) | |
2102 | slice_group_change_cycle= get_bits(&s->gb, ?); | |
2103 | #endif | |
2104 | ||
afebe2f7 AÖ |
2105 | h0->last_slice_type = slice_type; |
2106 | h->slice_num = ++h0->current_slice; | |
b735aeea MN |
2107 | if(h->slice_num >= MAX_SLICES){ |
2108 | av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n"); | |
2109 | } | |
5175b937 | 2110 | |
c32867b5 | 2111 | for(j=0; j<2; j++){ |
6d7e6b26 | 2112 | int id_list[16]; |
b735aeea | 2113 | int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j]; |
6d7e6b26 MN |
2114 | for(i=0; i<16; i++){ |
2115 | id_list[i]= 60; | |
2116 | if(h->ref_list[j][i].data[0]){ | |
2117 | int k; | |
2118 | uint8_t *base= h->ref_list[j][i].base[0]; | |
2119 | for(k=0; k<h->short_ref_count; k++) | |
2120 | if(h->short_ref[k]->base[0] == base){ | |
2121 | id_list[i]= k; | |
2122 | break; | |
2123 | } | |
2124 | for(k=0; k<h->long_ref_count; k++) | |
2125 | if(h->long_ref[k] && h->long_ref[k]->base[0] == base){ | |
2126 | id_list[i]= h->short_ref_count + k; | |
2127 | break; | |
2128 | } | |
2129 | } | |
2130 | } | |
2131 | ||
c32867b5 MN |
2132 | ref2frm[0]= |
2133 | ref2frm[1]= -1; | |
d50cdd82 | 2134 | for(i=0; i<16; i++) |
6d7e6b26 | 2135 | ref2frm[i+2]= 4*id_list[i] |
c32867b5 | 2136 | +(h->ref_list[j][i].reference&3); |
d50cdd82 MN |
2137 | ref2frm[18+0]= |
2138 | ref2frm[18+1]= -1; | |
2139 | for(i=16; i<48; i++) | |
6d7e6b26 | 2140 | ref2frm[i+4]= 4*id_list[(i-16)>>1] |
d50cdd82 | 2141 | +(h->ref_list[j][i].reference&3); |
c32867b5 MN |
2142 | } |
2143 | ||
5d18eaad | 2144 | h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; |
8a11a969 | 2145 | h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width; |
5d18eaad | 2146 | |
802e9146 MN |
2147 | s->avctx->refs= h->sps.ref_frame_count; |
2148 | ||
0da71265 | 2149 | if(s->avctx->debug&FF_DEBUG_PICT_INFO){ |
49573a87 | 2150 | av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n", |
6867a90b LLL |
2151 | h->slice_num, |
2152 | (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"), | |
115329f1 | 2153 | first_mb_in_slice, |
49573a87 | 2154 | av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "", |
0da71265 MN |
2155 | pps_id, h->frame_num, |
2156 | s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1], | |
2157 | h->ref_count[0], h->ref_count[1], | |
2158 | s->qscale, | |
0c32e19d | 2159 | h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26, |
9f2d1b4f | 2160 | h->use_weight, |
4806b922 MN |
2161 | h->use_weight==1 && h->use_weight_chroma ? "c" : "", |
2162 | h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "" | |
0da71265 MN |
2163 | ); |
2164 | } | |
2165 | ||
2166 | return 0; | |
2167 | } | |
2168 | ||
0dc343d4 | 2169 | int ff_h264_get_slice_type(const H264Context *h) |
75dd6938 LA |
2170 | { |
2171 | switch (h->slice_type) { | |
2172 | case FF_P_TYPE: return 0; | |
2173 | case FF_B_TYPE: return 1; | |
2174 | case FF_I_TYPE: return 2; | |
2175 | case FF_SP_TYPE: return 3; | |
2176 | case FF_SI_TYPE: return 4; | |
2177 | default: return -1; | |
2178 | } | |
2179 | } | |
2180 | ||
c988f975 MN |
2181 | static void loop_filter(H264Context *h){ |
2182 | MpegEncContext * const s = &h->s; | |
2183 | uint8_t *dest_y, *dest_cb, *dest_cr; | |
2184 | int linesize, uvlinesize, mb_x, mb_y; | |
2185 | const int end_mb_y= s->mb_y + FRAME_MBAFF; | |
2186 | const int old_slice_type= h->slice_type; | |
2187 | ||
2188 | if(h->deblocking_filter) { | |
2189 | for(mb_x= 0; mb_x<s->mb_width; mb_x++){ | |
2190 | for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){ | |
78998bf2 | 2191 | int mb_xy, mb_type; |
c988f975 MN |
2192 | mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride; |
2193 | h->slice_num= h->slice_table[mb_xy]; | |
2194 | mb_type= s->current_picture.mb_type[mb_xy]; | |
2195 | h->list_count= h->list_counts[mb_xy]; | |
c988f975 MN |
2196 | |
2197 | if(FRAME_MBAFF) | |
2198 | h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type); | |
2199 | ||
c988f975 MN |
2200 | s->mb_x= mb_x; |
2201 | s->mb_y= mb_y; | |
2202 | dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; | |
2203 | dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; | |
2204 | dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; | |
2205 | //FIXME simplify above | |
2206 | ||
2207 | if (MB_FIELD) { | |
2208 | linesize = h->mb_linesize = s->linesize * 2; | |
2209 | uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; | |
2210 | if(mb_y&1){ //FIXME move out of this function? | |
2211 | dest_y -= s->linesize*15; | |
2212 | dest_cb-= s->uvlinesize*7; | |
2213 | dest_cr-= s->uvlinesize*7; | |
2214 | } | |
2215 | } else { | |
2216 | linesize = h->mb_linesize = s->linesize; | |
2217 | uvlinesize = h->mb_uvlinesize = s->uvlinesize; | |
2218 | } | |
77d40dce | 2219 | backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); |
aaa995d7 | 2220 | if(fill_filter_caches(h, mb_type)) |
44a5e7b6 | 2221 | continue; |
c988f975 MN |
2222 | h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]); |
2223 | h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]); | |
2224 | ||
77d40dce | 2225 | if (FRAME_MBAFF) { |
c988f975 MN |
2226 | ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); |
2227 | } else { | |
2228 | ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); | |
2229 | } | |
2230 | } | |
2231 | } | |
2232 | } | |
2233 | h->slice_type= old_slice_type; | |
2234 | s->mb_x= 0; | |
2235 | s->mb_y= end_mb_y - FRAME_MBAFF; | |
f4b8b825 MN |
2236 | h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); |
2237 | h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); | |
c988f975 MN |
2238 | } |
2239 | ||
69a28f3e MN |
2240 | static void predict_field_decoding_flag(H264Context *h){ |
2241 | MpegEncContext * const s = &h->s; | |
2242 | const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; | |
2243 | int mb_type = (h->slice_table[mb_xy-1] == h->slice_num) | |
2244 | ? s->current_picture.mb_type[mb_xy-1] | |
2245 | : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num) | |
2246 | ? s->current_picture.mb_type[mb_xy-s->mb_stride] | |
2247 | : 0; | |
2248 | h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; | |
2249 | } | |
2250 | ||
3a84713a RS |
2251 | static int decode_slice(struct AVCodecContext *avctx, void *arg){ |
2252 | H264Context *h = *(void**)arg; | |
0da71265 MN |
2253 | MpegEncContext * const s = &h->s; |
2254 | const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; | |
2255 | ||
2256 | s->mb_skip_run= -1; | |
0da71265 | 2257 | |
89db0bae | 2258 | h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 || |
5317c95b | 2259 | (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); |
89db0bae | 2260 | |
e5017ab8 | 2261 | if( h->pps.cabac ) { |
e5017ab8 LA |
2262 | /* realign */ |
2263 | align_get_bits( &s->gb ); | |
2264 | ||
2265 | /* init cabac */ | |
d61c4e73 | 2266 | ff_init_cabac_states( &h->cabac); |
e5017ab8 LA |
2267 | ff_init_cabac_decoder( &h->cabac, |
2268 | s->gb.buffer + get_bits_count(&s->gb)/8, | |
6e44ba15 | 2269 | (get_bits_left(&s->gb) + 7)/8); |
cc51b282 MN |
2270 | |
2271 | ff_h264_init_cabac_states(h); | |
95c26348 | 2272 | |
e5017ab8 | 2273 | for(;;){ |
851ded89 | 2274 | //START_TIMER |
cc51b282 | 2275 | int ret = ff_h264_decode_mb_cabac(h); |
6867a90b | 2276 | int eos; |
851ded89 | 2277 | //STOP_TIMER("decode_mb_cabac") |
0da71265 | 2278 | |
903d58f6 | 2279 | if(ret>=0) ff_h264_hl_decode_mb(h); |
0da71265 | 2280 | |
5d18eaad | 2281 | if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ? |
e5017ab8 LA |
2282 | s->mb_y++; |
2283 | ||
cc51b282 | 2284 | ret = ff_h264_decode_mb_cabac(h); |
e5017ab8 | 2285 | |
903d58f6 | 2286 | if(ret>=0) ff_h264_hl_decode_mb(h); |
e5017ab8 LA |
2287 | s->mb_y--; |
2288 | } | |
6867a90b | 2289 | eos = get_cabac_terminate( &h->cabac ); |
e5017ab8 | 2290 | |
3566042a MN |
2291 | if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){ |
2292 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2293 | return 0; | |
2294 | } | |
5659b509 | 2295 | if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) { |
706da4af | 2296 | av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream); |
e5017ab8 LA |
2297 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); |
2298 | return -1; | |
2299 | } | |
2300 | ||
2301 | if( ++s->mb_x >= s->mb_width ) { | |
2302 | s->mb_x = 0; | |
c988f975 | 2303 | loop_filter(h); |
e5017ab8 | 2304 | ff_draw_horiz_band(s, 16*s->mb_y, 16); |
5175b937 | 2305 | ++s->mb_y; |
f3e53d9f | 2306 | if(FIELD_OR_MBAFF_PICTURE) { |
6867a90b | 2307 | ++s->mb_y; |
69cc3183 MN |
2308 | if(FRAME_MBAFF && s->mb_y < s->mb_height) |
2309 | predict_field_decoding_flag(h); | |
6867a90b | 2310 | } |
0da71265 | 2311 | } |
0da71265 | 2312 | |
e5017ab8 | 2313 | if( eos || s->mb_y >= s->mb_height ) { |
a9c9a240 | 2314 | tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); |
e5017ab8 | 2315 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); |
0da71265 | 2316 | return 0; |
e5017ab8 | 2317 | } |
e5017ab8 LA |
2318 | } |
2319 | ||
2320 | } else { | |
2321 | for(;;){ | |
e1e94902 | 2322 | int ret = ff_h264_decode_mb_cavlc(h); |
e5017ab8 | 2323 | |
903d58f6 | 2324 | if(ret>=0) ff_h264_hl_decode_mb(h); |
e5017ab8 | 2325 | |
5d18eaad | 2326 | if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ? |
e5017ab8 | 2327 | s->mb_y++; |
e1e94902 | 2328 | ret = ff_h264_decode_mb_cavlc(h); |
e5017ab8 | 2329 | |
903d58f6 | 2330 | if(ret>=0) ff_h264_hl_decode_mb(h); |
e5017ab8 LA |
2331 | s->mb_y--; |
2332 | } | |
2333 | ||
2334 | if(ret<0){ | |
2335 | av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); | |
0da71265 MN |
2336 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); |
2337 | ||
2338 | return -1; | |
2339 | } | |
e5017ab8 LA |
2340 | |
2341 | if(++s->mb_x >= s->mb_width){ | |
2342 | s->mb_x=0; | |
c988f975 | 2343 | loop_filter(h); |
e5017ab8 | 2344 | ff_draw_horiz_band(s, 16*s->mb_y, 16); |
6867a90b | 2345 | ++s->mb_y; |
f3e53d9f | 2346 | if(FIELD_OR_MBAFF_PICTURE) { |
6867a90b | 2347 | ++s->mb_y; |
69cc3183 MN |
2348 | if(FRAME_MBAFF && s->mb_y < s->mb_height) |
2349 | predict_field_decoding_flag(h); | |
6867a90b LLL |
2350 | } |
2351 | if(s->mb_y >= s->mb_height){ | |
a9c9a240 | 2352 | tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); |
e5017ab8 LA |
2353 | |
2354 | if(get_bits_count(&s->gb) == s->gb.size_in_bits ) { | |
2355 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2356 | ||
2357 | return 0; | |
2358 | }else{ | |
2359 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2360 | ||
2361 | return -1; | |
2362 | } | |
2363 | } | |
2364 | } | |
2365 | ||
2366 | if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){ | |
a9c9a240 | 2367 | tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); |
e5017ab8 LA |
2368 | if(get_bits_count(&s->gb) == s->gb.size_in_bits ){ |
2369 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2370 | ||
2371 | return 0; | |
2372 | }else{ | |
2373 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); | |
2374 | ||
2375 | return -1; | |
2376 | } | |
2377 | } | |
0da71265 MN |
2378 | } |
2379 | } | |
e5017ab8 | 2380 | |
0da71265 MN |
2381 | #if 0 |
2382 | for(;s->mb_y < s->mb_height; s->mb_y++){ | |
2383 | for(;s->mb_x < s->mb_width; s->mb_x++){ | |
2384 | int ret= decode_mb(h); | |
115329f1 | 2385 | |
903d58f6 | 2386 | ff_h264_hl_decode_mb(h); |
0da71265 MN |
2387 | |
2388 | if(ret<0){ | |
267f7edc | 2389 | av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); |
0da71265 MN |
2390 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); |
2391 | ||
2392 | return -1; | |
2393 | } | |
115329f1 | 2394 | |
0da71265 MN |
2395 | if(++s->mb_x >= s->mb_width){ |
2396 | s->mb_x=0; | |
2397 | if(++s->mb_y >= s->mb_height){ | |
2398 | if(get_bits_count(s->gb) == s->gb.size_in_bits){ | |
2399 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2400 | ||
2401 | return 0; | |
2402 | }else{ | |
2403 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2404 | ||
2405 | return -1; | |
2406 | } | |
2407 | } | |
2408 | } | |
115329f1 | 2409 | |
0da71265 MN |
2410 | if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){ |
2411 | if(get_bits_count(s->gb) == s->gb.size_in_bits){ | |
2412 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); | |
2413 | ||
2414 | return 0; | |
2415 | }else{ | |
2416 | ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); | |
2417 | ||
2418 | return -1; | |
2419 | } | |
2420 | } | |
2421 | } | |
2422 | s->mb_x=0; | |
2423 | ff_draw_horiz_band(s, 16*s->mb_y, 16); | |
2424 | } | |
2425 | #endif | |
2426 | return -1; //not reached | |
2427 | } | |
2428 | ||
afebe2f7 AÖ |
2429 | /** |
2430 | * Call decode_slice() for each context. | |
2431 | * | |
2432 | * @param h h264 master context | |
2433 | * @param context_count number of contexts to execute | |
2434 | */ | |
2435 | static void execute_decode_slices(H264Context *h, int context_count){ | |
2436 | MpegEncContext * const s = &h->s; | |
2437 | AVCodecContext * const avctx= s->avctx; | |
2438 | H264Context *hx; | |
2439 | int i; | |
2440 | ||
40e5d31b GB |
2441 | if (s->avctx->hwaccel) |
2442 | return; | |
0d3d172f | 2443 | if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) |
369122dd | 2444 | return; |
afebe2f7 | 2445 | if(context_count == 1) { |
74e8b78b | 2446 | decode_slice(avctx, &h); |
afebe2f7 AÖ |
2447 | } else { |
2448 | for(i = 1; i < context_count; i++) { | |
2449 | hx = h->thread_context[i]; | |
047599a4 | 2450 | hx->s.error_recognition = avctx->error_recognition; |
afebe2f7 AÖ |
2451 | hx->s.error_count = 0; |
2452 | } | |
2453 | ||
2454 | avctx->execute(avctx, (void *)decode_slice, | |
01418506 | 2455 | h->thread_context, NULL, context_count, sizeof(void*)); |
afebe2f7 AÖ |
2456 | |
2457 | /* pull back stuff from slices to master context */ | |
2458 | hx = h->thread_context[context_count - 1]; | |
2459 | s->mb_x = hx->s.mb_x; | |
2460 | s->mb_y = hx->s.mb_y; | |
12d96de3 JD |
2461 | s->dropable = hx->s.dropable; |
2462 | s->picture_structure = hx->s.picture_structure; | |
afebe2f7 AÖ |
2463 | for(i = 1; i < context_count; i++) |
2464 | h->s.error_count += h->thread_context[i]->s.error_count; | |
2465 | } | |
2466 | } | |
2467 | ||
2468 | ||
30317501 | 2469 | static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ |
0da71265 MN |
2470 | MpegEncContext * const s = &h->s; |
2471 | AVCodecContext * const avctx= s->avctx; | |
2472 | int buf_index=0; | |
afebe2f7 AÖ |
2473 | H264Context *hx; ///< thread context |
2474 | int context_count = 0; | |
74b14aac | 2475 | int next_avc= h->is_avc ? 0 : buf_size; |
afebe2f7 AÖ |
2476 | |
2477 | h->max_contexts = avctx->thread_count; | |
377ec888 | 2478 | #if 0 |
eb60dddc | 2479 | int i; |
96b6ace2 MN |
2480 | for(i=0; i<50; i++){ |
2481 | av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]); | |
0da71265 MN |
2482 | } |
2483 | #endif | |
66a4b2c1 | 2484 | if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){ |
afebe2f7 | 2485 | h->current_slice = 0; |
12d96de3 | 2486 | if (!s->first_field) |
f6e3c460 | 2487 | s->current_picture_ptr= NULL; |
9c095463 | 2488 | ff_h264_reset_sei(h); |
66a4b2c1 MN |
2489 | } |
2490 | ||
0da71265 MN |
2491 | for(;;){ |
2492 | int consumed; | |
2493 | int dst_length; | |
2494 | int bit_length; | |
30317501 | 2495 | const uint8_t *ptr; |
4770b1b4 | 2496 | int i, nalsize = 0; |
afebe2f7 | 2497 | int err; |
115329f1 | 2498 | |
74b14aac | 2499 | if(buf_index >= next_avc) { |
1c48415b AÖ |
2500 | if(buf_index >= buf_size) break; |
2501 | nalsize = 0; | |
2502 | for(i = 0; i < h->nal_length_size; i++) | |
2503 | nalsize = (nalsize << 8) | buf[buf_index++]; | |
8d8409ca | 2504 | if(nalsize <= 1 || nalsize > buf_size - buf_index){ |
1c48415b AÖ |
2505 | if(nalsize == 1){ |
2506 | buf_index++; | |
2507 | continue; | |
2508 | }else{ | |
2509 | av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize); | |
2510 | break; | |
2511 | } | |
2512 | } | |
74b14aac | 2513 | next_avc= buf_index + nalsize; |
1c48415b AÖ |
2514 | } else { |
2515 | // start code prefix search | |
52255d17 | 2516 | for(; buf_index + 3 < next_avc; buf_index++){ |
1c48415b AÖ |
2517 | // This should always succeed in the first iteration. |
2518 | if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1) | |
2519 | break; | |
8b031359 | 2520 | } |
115329f1 | 2521 | |
1c48415b | 2522 | if(buf_index+3 >= buf_size) break; |
115329f1 | 2523 | |
1c48415b | 2524 | buf_index+=3; |
52255d17 | 2525 | if(buf_index >= next_avc) continue; |
1c48415b | 2526 | } |
115329f1 | 2527 | |
afebe2f7 AÖ |
2528 | hx = h->thread_context[context_count]; |
2529 | ||
74b14aac | 2530 | ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index); |
ff82e429 | 2531 | if (ptr==NULL || dst_length < 0){ |
ac658be5 FOL |
2532 | return -1; |
2533 | } | |
3566042a MN |
2534 | i= buf_index + consumed; |
2535 | if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc && | |
2536 | buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0) | |
2537 | s->workaround_bugs |= FF_BUG_TRUNCATED; | |
2538 | ||
2539 | if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){ | |
6ac9696e | 2540 | while(ptr[dst_length - 1] == 0 && dst_length > 0) |
c4da83fb | 2541 | dst_length--; |
3566042a | 2542 | } |
1790a5e9 | 2543 | bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1)); |
0da71265 MN |
2544 | |
2545 | if(s->avctx->debug&FF_DEBUG_STARTCODE){ | |
afebe2f7 | 2546 | av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length); |
0da71265 | 2547 | } |
115329f1 | 2548 | |
74b14aac | 2549 | if (h->is_avc && (nalsize != consumed) && nalsize){ |
e262365d | 2550 | av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize); |
9d2cc8c1 | 2551 | } |
4770b1b4 | 2552 | |
0da71265 MN |
2553 | buf_index += consumed; |
2554 | ||
755bfeab | 2555 | if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id |
8c3eba7c | 2556 | ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) |
0da71265 | 2557 | continue; |
115329f1 | 2558 | |
afebe2f7 AÖ |
2559 | again: |
2560 | err = 0; | |
2561 | switch(hx->nal_unit_type){ | |
0da71265 | 2562 | case NAL_IDR_SLICE: |
afebe2f7 AÖ |
2563 | if (h->nal_unit_type != NAL_IDR_SLICE) { |
2564 | av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices"); | |
2565 | return -1; | |
2566 | } | |
3b66c4c5 | 2567 | idr(h); //FIXME ensure we don't loose some frames if there is reordering |
0da71265 | 2568 | case NAL_SLICE: |
afebe2f7 AÖ |
2569 | init_get_bits(&hx->s.gb, ptr, bit_length); |
2570 | hx->intra_gb_ptr= | |
2571 | hx->inter_gb_ptr= &hx->s.gb; | |
2572 | hx->s.data_partitioning = 0; | |
2573 | ||
2574 | if((err = decode_slice_header(hx, h))) | |
2575 | break; | |
2576 | ||
dd0cd3d2 RC |
2577 | avctx->profile = hx->sps.profile_idc; |
2578 | avctx->level = hx->sps.level_idc; | |
2579 | ||
3bccd93a SW |
2580 | if (h->current_slice == 1) { |
2581 | if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0) | |
6026a096 | 2582 | return -1; |
3bccd93a SW |
2583 | if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) |
2584 | ff_vdpau_h264_picture_start(s); | |
6026a096 GB |
2585 | } |
2586 | ||
37a558fe IS |
2587 | s->current_picture_ptr->key_frame |= |
2588 | (hx->nal_unit_type == NAL_IDR_SLICE) || | |
2589 | (h->sei_recovery_frame_cnt >= 0); | |
afebe2f7 AÖ |
2590 | if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5 |
2591 | && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) | |
9f5c1037 | 2592 | && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE) |
4b30289e | 2593 | && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE) |
369122dd | 2594 | && avctx->skip_frame < AVDISCARD_ALL){ |
d404b3ed MN |
2595 | if(avctx->hwaccel) { |
2596 | if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0) | |
2597 | return -1; | |
2598 | }else | |
0d3d172f | 2599 | if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){ |
369122dd | 2600 | static const uint8_t start_code[] = {0x00, 0x00, 0x01}; |
c639fc72 CEH |
2601 | ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code)); |
2602 | ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed ); | |
369122dd | 2603 | }else |
f2c214a1 | 2604 | context_count++; |
369122dd | 2605 | } |
0da71265 MN |
2606 | break; |
2607 | case NAL_DPA: | |
afebe2f7 AÖ |
2608 | init_get_bits(&hx->s.gb, ptr, bit_length); |
2609 | hx->intra_gb_ptr= | |
2610 | hx->inter_gb_ptr= NULL; | |
0410ee8f AS |
2611 | |
2612 | if ((err = decode_slice_header(hx, h)) < 0) | |
2613 | break; | |
2614 | ||
dd0cd3d2 RC |
2615 | avctx->profile = hx->sps.profile_idc; |
2616 | avctx->level = hx->sps.level_idc; | |
2617 | ||
afebe2f7 | 2618 | hx->s.data_partitioning = 1; |
115329f1 | 2619 | |
0da71265 MN |
2620 | break; |
2621 | case NAL_DPB: | |
afebe2f7 AÖ |
2622 | init_get_bits(&hx->intra_gb, ptr, bit_length); |
2623 | hx->intra_gb_ptr= &hx->intra_gb; | |
0da71265 MN |
2624 | break; |
2625 | case NAL_DPC: | |
afebe2f7 AÖ |
2626 | init_get_bits(&hx->inter_gb, ptr, bit_length); |
2627 | hx->inter_gb_ptr= &hx->inter_gb; | |
8b92b792 | 2628 | |
afebe2f7 | 2629 | if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning |
b18e5c03 | 2630 | && s->context_initialized |
e0111b32 | 2631 | && s->hurry_up < 5 |
afebe2f7 | 2632 | && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) |
9f5c1037 | 2633 | && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE) |
4b30289e | 2634 | && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE) |
e0111b32 | 2635 | && avctx->skip_frame < AVDISCARD_ALL) |
afebe2f7 | 2636 | context_count++; |
0da71265 MN |
2637 | break; |
2638 | case NAL_SEI: | |
cdd10689 | 2639 | init_get_bits(&s->gb, ptr, bit_length); |
1790a5e9 | 2640 | ff_h264_decode_sei(h); |
0da71265 MN |
2641 | break; |
2642 | case NAL_SPS: | |
2643 | init_get_bits(&s->gb, ptr, bit_length); | |
1790a5e9 | 2644 | ff_h264_decode_seq_parameter_set(h); |
115329f1 | 2645 | |
0da71265 MN |
2646 | if(s->flags& CODEC_FLAG_LOW_DELAY) |
2647 | s->low_delay=1; | |
115329f1 | 2648 | |
a18030bb LM |
2649 | if(avctx->has_b_frames < 2) |
2650 | avctx->has_b_frames= !s->low_delay; | |
0da71265 MN |
2651 | break; |
2652 | case NAL_PPS: | |
2653 | init_get_bits(&s->gb, ptr, bit_length); | |
115329f1 | 2654 | |
1790a5e9 | 2655 | ff_h264_decode_picture_parameter_set(h, bit_length); |
0da71265 MN |
2656 | |
2657 | break; | |
ab470fa7 LM |
2658 | case NAL_AUD: |
2659 | case NAL_END_SEQUENCE: | |
2660 | case NAL_END_STREAM: | |
2661 | case NAL_FILLER_DATA: | |
2662 | case NAL_SPS_EXT: | |
2663 | case NAL_AUXILIARY_SLICE: | |
0da71265 | 2664 | break; |
bb270c08 | 2665 | default: |
4ad04da2 | 2666 | av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length); |
115329f1 | 2667 | } |
115329f1 | 2668 | |
afebe2f7 AÖ |
2669 | if(context_count == h->max_contexts) { |
2670 | execute_decode_slices(h, context_count); | |
2671 | context_count = 0; | |
2672 | } | |
2673 | ||
2674 | if (err < 0) | |
2675 | av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); | |
2676 | else if(err == 1) { | |
2677 | /* Slice could not be decoded in parallel mode, copy down | |
2678 | * NAL unit stuff to context 0 and restart. Note that | |
1412060e | 2679 | * rbsp_buffer is not transferred, but since we no longer |
afebe2f7 AÖ |
2680 | * run in parallel mode this should not be an issue. */ |
2681 | h->nal_unit_type = hx->nal_unit_type; | |
2682 | h->nal_ref_idc = hx->nal_ref_idc; | |
2683 | hx = h; | |
2684 | goto again; | |
2685 | } | |
2686 | } | |
2687 | if(context_count) | |
2688 | execute_decode_slices(h, context_count); | |
0da71265 MN |
2689 | return buf_index; |
2690 | } | |
2691 | ||
2692 | /** | |
3b66c4c5 | 2693 | * returns the number of bytes consumed for building the current frame |
0da71265 MN |
2694 | */ |
2695 | static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){ | |
755bfeab | 2696 | if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...) |
0da71265 MN |
2697 | if(pos+10>buf_size) pos=buf_size; // oops ;) |
2698 | ||
2699 | return pos; | |
0da71265 MN |
2700 | } |
2701 | ||
115329f1 | 2702 | static int decode_frame(AVCodecContext *avctx, |
0da71265 | 2703 | void *data, int *data_size, |
7a00bbad | 2704 | AVPacket *avpkt) |
0da71265 | 2705 | { |
7a00bbad TB |
2706 | const uint8_t *buf = avpkt->data; |
2707 | int buf_size = avpkt->size; | |
0da71265 MN |
2708 | H264Context *h = avctx->priv_data; |
2709 | MpegEncContext *s = &h->s; | |
115329f1 | 2710 | AVFrame *pict = data; |
0da71265 | 2711 | int buf_index; |
115329f1 | 2712 | |
0da71265 | 2713 | s->flags= avctx->flags; |
303e50e6 | 2714 | s->flags2= avctx->flags2; |
0da71265 | 2715 | |
1412060e | 2716 | /* end of stream, output what is still in the buffers */ |
0da71265 | 2717 | if (buf_size == 0) { |
97bbb885 MN |
2718 | Picture *out; |
2719 | int i, out_idx; | |
2720 | ||
2721 | //FIXME factorize this with the output code below | |
2722 | out = h->delayed_pic[0]; | |
2723 | out_idx = 0; | |
c173a088 | 2724 | for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++) |
97bbb885 MN |
2725 | if(h->delayed_pic[i]->poc < out->poc){ |
2726 | out = h->delayed_pic[i]; | |
2727 | out_idx = i; | |
2728 | } | |
2729 | ||
2730 | for(i=out_idx; h->delayed_pic[i]; i++) | |
2731 | h->delayed_pic[i] = h->delayed_pic[i+1]; | |
2732 | ||
2733 | if(out){ | |
2734 | *data_size = sizeof(AVFrame); | |
2735 | *pict= *(AVFrame*)out; | |
2736 | } | |
2737 | ||
0da71265 MN |
2738 | return 0; |
2739 | } | |
115329f1 | 2740 | |
0da71265 | 2741 | buf_index=decode_nal_units(h, buf, buf_size); |
115329f1 | 2742 | if(buf_index < 0) |
0da71265 MN |
2743 | return -1; |
2744 | ||
56c70e1d | 2745 | if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){ |
1c746a49 | 2746 | if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0; |
56c70e1d MN |
2747 | av_log(avctx, AV_LOG_ERROR, "no frame!\n"); |
2748 | return -1; | |
2749 | } | |
2750 | ||
66a4b2c1 MN |
2751 | if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){ |
2752 | Picture *out = s->current_picture_ptr; | |
2753 | Picture *cur = s->current_picture_ptr; | |
44be1d64 | 2754 | int i, pics, out_of_order, out_idx; |
115329f1 | 2755 | |
256299d3 | 2756 | field_end(h); |
66a4b2c1 | 2757 | |
357282c6 | 2758 | if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) { |
12d96de3 JD |
2759 | /* Wait for second field. */ |
2760 | *data_size = 0; | |
2761 | ||
2762 | } else { | |
b19d493f | 2763 | cur->interlaced_frame = 0; |
b09a7c05 AÖ |
2764 | cur->repeat_pict = 0; |
2765 | ||
2766 | /* Signal interlacing information externally. */ | |
2767 | /* Prioritize picture timing SEI information over used decoding process if it exists. */ | |
70e01da3 | 2768 | |
b09a7c05 AÖ |
2769 | if(h->sps.pic_struct_present_flag){ |
2770 | switch (h->sei_pic_struct) | |
2771 | { | |
b19d493f HY |
2772 | case SEI_PIC_STRUCT_FRAME: |
2773 | break; | |
2774 | case SEI_PIC_STRUCT_TOP_FIELD: | |
2775 | case SEI_PIC_STRUCT_BOTTOM_FIELD: | |
2776 | cur->interlaced_frame = 1; | |
2777 | break; | |
2778 | case SEI_PIC_STRUCT_TOP_BOTTOM: | |
2779 | case SEI_PIC_STRUCT_BOTTOM_TOP: | |
2780 | if (FIELD_OR_MBAFF_PICTURE) | |
2781 | cur->interlaced_frame = 1; | |
2782 | else | |
2783 | // try to flag soft telecine progressive | |
2784 | cur->interlaced_frame = h->prev_interlaced_frame; | |
2785 | break; | |
b09a7c05 AÖ |
2786 | case SEI_PIC_STRUCT_TOP_BOTTOM_TOP: |
2787 | case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: | |
2788 | // Signal the possibility of telecined film externally (pic_struct 5,6) | |
2789 | // From these hints, let the applications decide if they apply deinterlacing. | |
2790 | cur->repeat_pict = 1; | |
b09a7c05 AÖ |
2791 | break; |
2792 | case SEI_PIC_STRUCT_FRAME_DOUBLING: | |
2793 | // Force progressive here, as doubling interlaced frame is a bad idea. | |
b09a7c05 AÖ |
2794 | cur->repeat_pict = 2; |
2795 | break; | |
2796 | case SEI_PIC_STRUCT_FRAME_TRIPLING: | |
b09a7c05 AÖ |
2797 | cur->repeat_pict = 4; |
2798 | break; | |
2799 | } | |
b19d493f HY |
2800 | |
2801 | if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP) | |
2802 | cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0; | |
b09a7c05 AÖ |
2803 | }else{ |
2804 | /* Derive interlacing flag from used decoding process. */ | |
2805 | cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE; | |
2806 | } | |
b19d493f | 2807 | h->prev_interlaced_frame = cur->interlaced_frame; |
b09a7c05 AÖ |
2808 | |
2809 | if (cur->field_poc[0] != cur->field_poc[1]){ | |
2810 | /* Derive top_field_first from field pocs. */ | |
2811 | cur->top_field_first = cur->field_poc[0] < cur->field_poc[1]; | |
2812 | }else{ | |
2813 | if(cur->interlaced_frame || h->sps.pic_struct_present_flag){ | |
2814 | /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */ | |
2815 | if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM | |
2816 | || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP) | |
2817 | cur->top_field_first = 1; | |
2818 | else | |
2819 | cur->top_field_first = 0; | |
2820 | }else{ | |
2821 | /* Most likely progressive */ | |
2822 | cur->top_field_first = 0; | |
2823 | } | |
2824 | } | |
84a8596d | 2825 | |
f6e3c460 | 2826 | //FIXME do something with unavailable reference frames |
8b92b792 | 2827 | |
f6e3c460 | 2828 | /* Sort B-frames into display order */ |
2f944356 | 2829 | |
f6e3c460 AÖ |
2830 | if(h->sps.bitstream_restriction_flag |
2831 | && s->avctx->has_b_frames < h->sps.num_reorder_frames){ | |
2832 | s->avctx->has_b_frames = h->sps.num_reorder_frames; | |
2833 | s->low_delay = 0; | |
2834 | } | |
9170e345 | 2835 | |
fb19e144 MN |
2836 | if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT |
2837 | && !h->sps.bitstream_restriction_flag){ | |
2838 | s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT; | |
2839 | s->low_delay= 0; | |
2840 | } | |
2841 | ||
f6e3c460 AÖ |
2842 | pics = 0; |
2843 | while(h->delayed_pic[pics]) pics++; | |
9170e345 | 2844 | |
64b9d48f | 2845 | assert(pics <= MAX_DELAYED_PIC_COUNT); |
4e4d983e | 2846 | |
f6e3c460 AÖ |
2847 | h->delayed_pic[pics++] = cur; |
2848 | if(cur->reference == 0) | |
2849 | cur->reference = DELAYED_PIC_REF; | |
2f944356 | 2850 | |
f6e3c460 AÖ |
2851 | out = h->delayed_pic[0]; |
2852 | out_idx = 0; | |
c173a088 | 2853 | for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++) |
f6e3c460 AÖ |
2854 | if(h->delayed_pic[i]->poc < out->poc){ |
2855 | out = h->delayed_pic[i]; | |
2856 | out_idx = i; | |
2857 | } | |
44be1d64 MN |
2858 | if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) |
2859 | h->outputed_poc= INT_MIN; | |
2860 | out_of_order = out->poc < h->outputed_poc; | |
1b547aba | 2861 | |
f6e3c460 AÖ |
2862 | if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames) |
2863 | { } | |
2a811db2 | 2864 | else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) |
f6e3c460 | 2865 | || (s->low_delay && |
44be1d64 | 2866 | ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2) |
9701840b | 2867 | || cur->pict_type == FF_B_TYPE))) |
f6e3c460 AÖ |
2868 | { |
2869 | s->low_delay = 0; | |
2870 | s->avctx->has_b_frames++; | |
f6e3c460 | 2871 | } |
f6e3c460 AÖ |
2872 | |
2873 | if(out_of_order || pics > s->avctx->has_b_frames){ | |
3eaa6d0e | 2874 | out->reference &= ~DELAYED_PIC_REF; |
f6e3c460 AÖ |
2875 | for(i=out_idx; h->delayed_pic[i]; i++) |
2876 | h->delayed_pic[i] = h->delayed_pic[i+1]; | |
2877 | } | |
3eaa6d0e | 2878 | if(!out_of_order && pics > s->avctx->has_b_frames){ |
f6e3c460 | 2879 | *data_size = sizeof(AVFrame); |
df8a7dff | 2880 | |
44be1d64 MN |
2881 | if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) { |
2882 | h->outputed_poc = INT_MIN; | |
2883 | } else | |
67e362ca | 2884 | h->outputed_poc = out->poc; |
f6e3c460 | 2885 | *pict= *(AVFrame*)out; |
3eaa6d0e | 2886 | }else{ |
f6e3c460 | 2887 | av_log(avctx, AV_LOG_DEBUG, "no picture\n"); |
3eaa6d0e | 2888 | } |
12d96de3 | 2889 | } |
a4dae92b LM |
2890 | } |
2891 | ||
3165e258 | 2892 | assert(pict->data[0] || !*data_size); |
4e4d983e | 2893 | ff_print_debug_info(s, pict); |
0da71265 | 2894 | //printf("out %d\n", (int)pict->data[0]); |
0da71265 | 2895 | |
0da71265 MN |
2896 | return get_consumed_bytes(s, buf_index, buf_size); |
2897 | } | |
2898 | #if 0 | |
2899 | static inline void fill_mb_avail(H264Context *h){ | |
2900 | MpegEncContext * const s = &h->s; | |
7bc9090a | 2901 | const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; |
0da71265 MN |
2902 | |
2903 | if(s->mb_y){ | |
7bc9090a MN |
2904 | h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num; |
2905 | h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num; | |
2906 | h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num; | |
0da71265 MN |
2907 | }else{ |
2908 | h->mb_avail[0]= | |
2909 | h->mb_avail[1]= | |
2910 | h->mb_avail[2]= 0; | |
2911 | } | |
2912 | h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num; | |
2913 | h->mb_avail[4]= 1; //FIXME move out | |
2914 | h->mb_avail[5]= 0; //FIXME move out | |
2915 | } | |
2916 | #endif | |
2917 | ||
07e4e3ea | 2918 | #ifdef TEST |
6bf398a0 | 2919 | #undef printf |
d04d5bcd | 2920 | #undef random |
0da71265 MN |
2921 | #define COUNT 8000 |
2922 | #define SIZE (COUNT*40) | |
f8a80fd6 | 2923 | int main(void){ |
0da71265 MN |
2924 | int i; |
2925 | uint8_t temp[SIZE]; | |
2926 | PutBitContext pb; | |
2927 | GetBitContext gb; | |
2928 | // int int_temp[10000]; | |
2929 | DSPContext dsp; | |
2930 | AVCodecContext avctx; | |
115329f1 | 2931 | |
0da71265 MN |
2932 | dsputil_init(&dsp, &avctx); |
2933 | ||
ed7debda | 2934 | init_put_bits(&pb, temp, SIZE); |
0da71265 MN |
2935 | printf("testing unsigned exp golomb\n"); |
2936 | for(i=0; i<COUNT; i++){ | |
2937 | START_TIMER | |
2938 | set_ue_golomb(&pb, i); | |
2939 | STOP_TIMER("set_ue_golomb"); | |
2940 | } | |
2941 | flush_put_bits(&pb); | |
115329f1 | 2942 | |
0da71265 MN |
2943 | init_get_bits(&gb, temp, 8*SIZE); |
2944 | for(i=0; i<COUNT; i++){ | |
2945 | int j, s; | |
115329f1 | 2946 | |
0da71265 | 2947 | s= show_bits(&gb, 24); |
115329f1 | 2948 | |
0da71265 MN |
2949 | START_TIMER |
2950 | j= get_ue_golomb(&gb); | |
2951 | if(j != i){ | |
755bfeab | 2952 | printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); |
0da71265 MN |
2953 | // return -1; |
2954 | } | |
2955 | STOP_TIMER("get_ue_golomb"); | |
2956 | } | |
115329f1 DB |
2957 | |
2958 | ||
c58222c5 | 2959 | init_put_bits(&pb, temp, SIZE); |
0da71265 MN |
2960 | printf("testing signed exp golomb\n"); |
2961 | for(i=0; i<COUNT; i++){ | |
2962 | START_TIMER | |
2963 | set_se_golomb(&pb, i - COUNT/2); | |
2964 | STOP_TIMER("set_se_golomb"); | |
2965 | } | |
2966 | flush_put_bits(&pb); | |
115329f1 | 2967 | |
0da71265 MN |
2968 | init_get_bits(&gb, temp, 8*SIZE); |
2969 | for(i=0; i<COUNT; i++){ | |
2970 | int j, s; | |
115329f1 | 2971 | |
0da71265 | 2972 | s= show_bits(&gb, 24); |
115329f1 | 2973 | |
0da71265 MN |
2974 | START_TIMER |
2975 | j= get_se_golomb(&gb); | |
2976 | if(j != i - COUNT/2){ | |
755bfeab | 2977 | printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); |
0da71265 MN |
2978 | // return -1; |
2979 | } | |
2980 | STOP_TIMER("get_se_golomb"); | |
2981 | } | |
2982 | ||
6bf398a0 | 2983 | #if 0 |
0da71265 | 2984 | printf("testing 4x4 (I)DCT\n"); |
115329f1 | 2985 | |
0da71265 MN |
2986 | DCTELEM block[16]; |
2987 | uint8_t src[16], ref[16]; | |
2988 | uint64_t error= 0, max_error=0; | |
2989 | ||
2990 | for(i=0; i<COUNT; i++){ | |
2991 | int j; | |
2992 | // printf("%d %d %d\n", r1, r2, (r2-r1)*16); | |
2993 | for(j=0; j<16; j++){ | |
2994 | ref[j]= random()%255; | |
2995 | src[j]= random()%255; | |
2996 | } | |
2997 | ||
2998 | h264_diff_dct_c(block, src, ref, 4); | |
115329f1 | 2999 | |
0da71265 MN |
3000 | //normalize |
3001 | for(j=0; j<16; j++){ | |
3002 | // printf("%d ", block[j]); | |
3003 | block[j]= block[j]*4; | |
3004 | if(j&1) block[j]= (block[j]*4 + 2)/5; | |
3005 | if(j&4) block[j]= (block[j]*4 + 2)/5; | |
3006 | } | |
3007 | // printf("\n"); | |
115329f1 | 3008 | |
4693b031 | 3009 | h->h264dsp.h264_idct_add(ref, block, 4); |
0da71265 MN |
3010 | /* for(j=0; j<16; j++){ |
3011 | printf("%d ", ref[j]); | |
3012 | } | |
3013 | printf("\n");*/ | |
115329f1 | 3014 | |
0da71265 | 3015 | for(j=0; j<16; j++){ |
c26abfa5 | 3016 | int diff= FFABS(src[j] - ref[j]); |
115329f1 | 3017 | |
0da71265 MN |
3018 | error+= diff*diff; |
3019 | max_error= FFMAX(max_error, diff); | |
3020 | } | |
3021 | } | |
3022 | printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error ); | |
0da71265 MN |
3023 | printf("testing quantizer\n"); |
3024 | for(qp=0; qp<52; qp++){ | |
3025 | for(i=0; i<16; i++) | |
3026 | src1_block[i]= src2_block[i]= random()%255; | |
115329f1 | 3027 | |
0da71265 | 3028 | } |
0da71265 | 3029 | printf("Testing NAL layer\n"); |
115329f1 | 3030 | |
0da71265 MN |
3031 | uint8_t bitstream[COUNT]; |
3032 | uint8_t nal[COUNT*2]; | |
3033 | H264Context h; | |
3034 | memset(&h, 0, sizeof(H264Context)); | |
115329f1 | 3035 | |
0da71265 MN |
3036 | for(i=0; i<COUNT; i++){ |
3037 | int zeros= i; | |
3038 | int nal_length; | |
3039 | int consumed; | |
3040 | int out_length; | |
3041 | uint8_t *out; | |
3042 | int j; | |
115329f1 | 3043 | |
0da71265 MN |
3044 | for(j=0; j<COUNT; j++){ |
3045 | bitstream[j]= (random() % 255) + 1; | |
3046 | } | |
115329f1 | 3047 | |
0da71265 MN |
3048 | for(j=0; j<zeros; j++){ |
3049 | int pos= random() % COUNT; | |
3050 | while(bitstream[pos] == 0){ | |
3051 | pos++; | |
3052 | pos %= COUNT; | |
3053 | } | |
3054 | bitstream[pos]=0; | |
3055 | } | |
115329f1 | 3056 | |
0da71265 | 3057 | START_TIMER |
115329f1 | 3058 | |
0da71265 MN |
3059 | nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2); |
3060 | if(nal_length<0){ | |
3061 | printf("encoding failed\n"); | |
3062 | return -1; | |
3063 | } | |
115329f1 | 3064 | |
1790a5e9 | 3065 | out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length); |
0da71265 MN |
3066 | |
3067 | STOP_TIMER("NAL") | |
115329f1 | 3068 | |
0da71265 MN |
3069 | if(out_length != COUNT){ |
3070 | printf("incorrect length %d %d\n", out_length, COUNT); | |
3071 | return -1; | |
3072 | } | |
115329f1 | 3073 | |
0da71265 MN |
3074 | if(consumed != nal_length){ |
3075 | printf("incorrect consumed length %d %d\n", nal_length, consumed); | |
3076 | return -1; | |
3077 | } | |
115329f1 | 3078 | |
0da71265 | 3079 | if(memcmp(bitstream, out, COUNT)){ |
755bfeab | 3080 | printf("mismatch\n"); |
0da71265 MN |
3081 | return -1; |
3082 | } | |
3083 | } | |
6bf398a0 | 3084 | #endif |
115329f1 | 3085 | |
0da71265 | 3086 | printf("Testing RBSP\n"); |
115329f1 DB |
3087 | |
3088 | ||
0da71265 MN |
3089 | return 0; |
3090 | } | |
07e4e3ea | 3091 | #endif /* TEST */ |
0da71265 MN |
3092 | |
3093 | ||
cbf1eae9 | 3094 | av_cold void ff_h264_free_context(H264Context *h) |
0da71265 | 3095 | { |
5f129a05 | 3096 | int i; |
115329f1 | 3097 | |
0da71265 | 3098 | free_tables(h); //FIXME cleanup init stuff perhaps |
5f129a05 MN |
3099 | |
3100 | for(i = 0; i < MAX_SPS_COUNT; i++) | |
3101 | av_freep(h->sps_buffers + i); | |
3102 | ||
3103 | for(i = 0; i < MAX_PPS_COUNT; i++) | |
3104 | av_freep(h->pps_buffers + i); | |
15861962 RD |
3105 | } |
3106 | ||
903d58f6 | 3107 | av_cold int ff_h264_decode_end(AVCodecContext *avctx) |
15861962 RD |
3108 | { |
3109 | H264Context *h = avctx->priv_data; | |
3110 | MpegEncContext *s = &h->s; | |
3111 | ||
3112 | ff_h264_free_context(h); | |
5f129a05 | 3113 | |
0da71265 MN |
3114 | MPV_common_end(s); |
3115 | ||
3116 | // memset(h, 0, sizeof(H264Context)); | |
115329f1 | 3117 | |
0da71265 MN |
3118 | return 0; |
3119 | } | |
3120 | ||
3121 | ||
3122 | AVCodec h264_decoder = { | |
3123 | "h264", | |
312 |