757400b3372243c3e0eeb23dae5d8317ca42c235
[libav.git] / libavcodec / h264.c
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *
19 */
20
21 /**
22 * @file h264.c
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
25 */
26
27 #include "common.h"
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264data.h"
32 #include "golomb.h"
33
34 #undef NDEBUG
35 #include <assert.h>
36
37 #define interlaced_dct interlaced_dct_is_a_bad_name
38 #define mb_intra mb_intra_isnt_initalized_see_mb_type
39
40 #define LUMA_DC_BLOCK_INDEX 25
41 #define CHROMA_DC_BLOCK_INDEX 26
42
43 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
44 #define COEFF_TOKEN_VLC_BITS 8
45 #define TOTAL_ZEROS_VLC_BITS 9
46 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
47 #define RUN_VLC_BITS 3
48 #define RUN7_VLC_BITS 6
49
50 #define MAX_SPS_COUNT 32
51 #define MAX_PPS_COUNT 256
52
53 #define MAX_MMCO_COUNT 66
54
55 /**
56 * Sequence parameter set
57 */
58 typedef struct SPS{
59
60 int profile_idc;
61 int level_idc;
62 int multiple_slice_groups; ///< more_than_one_slice_group_allowed_flag
63 int arbitrary_slice_order; ///< arbitrary_slice_order_allowed_flag
64 int redundant_slices; ///< redundant_slices_allowed_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int required_frame_num_update_behaviour_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int vui_parameters_present_flag;
80 int sar_width;
81 int sar_height;
82 short offset_for_ref_frame[256]; //FIXME dyn aloc?
83 }SPS;
84
85 /**
86 * Picture parameter set
87 */
88 typedef struct PPS{
89 int sps_id;
90 int cabac; ///< entropy_coding_mode_flag
91 int pic_order_present; ///< pic_order_present_flag
92 int slice_group_count; ///< num_slice_groups_minus1 + 1
93 int mb_slice_group_map_type;
94 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
95 int weighted_pred; ///< weighted_pred_flag
96 int weighted_bipred_idc;
97 int init_qp; ///< pic_init_qp_minus26 + 26
98 int init_qs; ///< pic_init_qs_minus26 + 26
99 int chroma_qp_index_offset;
100 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
101 int constrained_intra_pred; ///< constrained_intra_pred_flag
102 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
103 int crop; ///< frame_cropping_flag
104 int crop_left; ///< frame_cropping_rect_left_offset
105 int crop_right; ///< frame_cropping_rect_right_offset
106 int crop_top; ///< frame_cropping_rect_top_offset
107 int crop_bottom; ///< frame_cropping_rect_bottom_offset
108 }PPS;
109
110 /**
111 * Memory management control operation opcode.
112 */
113 typedef enum MMCOOpcode{
114 MMCO_END=0,
115 MMCO_SHORT2UNUSED,
116 MMCO_LONG2UNUSED,
117 MMCO_SHORT2LONG,
118 MMCO_SET_MAX_LONG,
119 MMCO_RESET,
120 MMCO_LONG,
121 } MMCOOpcode;
122
123 /**
124 * Memory management control operation.
125 */
126 typedef struct MMCO{
127 MMCOOpcode opcode;
128 int short_frame_num;
129 int long_index;
130 } MMCO;
131
132 /**
133 * H264Context
134 */
135 typedef struct H264Context{
136 MpegEncContext s;
137 int nal_ref_idc;
138 int nal_unit_type;
139 #define NAL_SLICE 1
140 #define NAL_DPA 2
141 #define NAL_DPB 3
142 #define NAL_DPC 4
143 #define NAL_IDR_SLICE 5
144 #define NAL_SEI 6
145 #define NAL_SPS 7
146 #define NAL_PPS 8
147 #define NAL_PICTURE_DELIMITER 9
148 #define NAL_FILTER_DATA 10
149 uint8_t *rbsp_buffer;
150 int rbsp_buffer_size;
151
152 int mb_stride; ///< stride of some mb tables
153
154 int chroma_qp; //QPc
155
156 int prev_mb_skiped; //FIXME remove (IMHO not used)
157
158 //prediction stuff
159 int chroma_pred_mode;
160 int intra16x16_pred_mode;
161
162 int8_t intra4x4_pred_mode_cache[5*8];
163 int8_t (*intra4x4_pred_mode)[8];
164 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
165 void (*pred8x8 [4+3])(uint8_t *src, int stride);
166 void (*pred16x16[4+3])(uint8_t *src, int stride);
167 unsigned int topleft_samples_available;
168 unsigned int top_samples_available;
169 unsigned int topright_samples_available;
170 unsigned int left_samples_available;
171
172 /**
173 * non zero coeff count cache.
174 * is 64 if not available.
175 */
176 uint8_t non_zero_count_cache[6*8];
177 uint8_t (*non_zero_count)[16];
178
179 /**
180 * Motion vector cache.
181 */
182 int16_t mv_cache[2][5*8][2];
183 int8_t ref_cache[2][5*8];
184 #define LIST_NOT_USED -1 //FIXME rename?
185 #define PART_NOT_AVAILABLE -2
186
187 /**
188 * is 1 if the specific list MV&references are set to 0,0,-2.
189 */
190 int mv_cache_clean[2];
191
192 int block_offset[16+8];
193 int chroma_subblock_offset[16]; //FIXME remove
194
195 uint16_t *mb2b_xy; //FIXME are these 4 a good idea?
196 uint16_t *mb2b8_xy;
197 int b_stride;
198 int b8_stride;
199
200 SPS sps_buffer[MAX_SPS_COUNT];
201 SPS sps; ///< current sps
202
203 PPS pps_buffer[MAX_PPS_COUNT];
204 /**
205 * current pps
206 */
207 PPS pps; //FIXME move tp Picture perhaps? (->no) do we need that?
208
209 int slice_num;
210 uint8_t *slice_table_base;
211 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
212 int slice_type;
213 int slice_type_fixed;
214
215 //interlacing specific flags
216 int mb_field_decoding_flag;
217
218 int sub_mb_type[4];
219
220 //POC stuff
221 int poc_lsb;
222 int poc_msb;
223 int delta_poc_bottom;
224 int delta_poc[2];
225 int frame_num;
226 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
227 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
228 int frame_num_offset; ///< for POC type 2
229 int prev_frame_num_offset; ///< for POC type 2
230 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
231
232 /**
233 * frame_num for frames or 2*frame_num for field pics.
234 */
235 int curr_pic_num;
236
237 /**
238 * max_frame_num or 2*max_frame_num for field pics.
239 */
240 int max_pic_num;
241
242 //Weighted pred stuff
243 int luma_log2_weight_denom;
244 int chroma_log2_weight_denom;
245 int luma_weight[2][16];
246 int luma_offset[2][16];
247 int chroma_weight[2][16][2];
248 int chroma_offset[2][16][2];
249
250 //deblock
251 int disable_deblocking_filter_idc;
252 int slice_alpha_c0_offset_div2;
253 int slice_beta_offset_div2;
254
255 int redundant_pic_count;
256
257 int direct_spatial_mv_pred;
258
259 /**
260 * num_ref_idx_l0/1_active_minus1 + 1
261 */
262 int ref_count[2];// FIXME split for AFF
263 Picture *short_ref[16];
264 Picture *long_ref[16];
265 Picture default_ref_list[2][32];
266 Picture ref_list[2][32]; //FIXME size?
267 Picture field_ref_list[2][32]; //FIXME size?
268
269 /**
270 * memory management control operations buffer.
271 */
272 MMCO mmco[MAX_MMCO_COUNT];
273 int mmco_index;
274
275 int long_ref_count; ///< number of actual long term references
276 int short_ref_count; ///< number of actual short term references
277
278 //data partitioning
279 GetBitContext intra_gb;
280 GetBitContext inter_gb;
281 GetBitContext *intra_gb_ptr;
282 GetBitContext *inter_gb_ptr;
283
284 DCTELEM mb[16*24] __align8;
285 }H264Context;
286
287 static VLC coeff_token_vlc[4];
288 static VLC chroma_dc_coeff_token_vlc;
289
290 static VLC total_zeros_vlc[15];
291 static VLC chroma_dc_total_zeros_vlc[3];
292
293 static VLC run_vlc[6];
294 static VLC run7_vlc;
295
296 /**
297 * fill a rectangle.
298 * @param h height of the recatangle, should be a constant
299 * @param w width of the recatangle, should be a constant
300 * @param size the size of val (1 or 4), should be a constant
301 */
302 static inline void fill_rectangle(void *p, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
303 assert(size==1 || size==4);
304
305 w *= size;
306 stride *= size;
307
308 //FIXME check what gcc generates for 64 bit on x86 and possible write a 32 bit ver of it
309 if(w==2 && h==2){
310 *(uint16_t*)(p + 0)=
311 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
312 }else if(w==2 && h==4){
313 *(uint16_t*)(p + 0*stride)=
314 *(uint16_t*)(p + 1*stride)=
315 *(uint16_t*)(p + 2*stride)=
316 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
317 }else if(w==4 && h==2){
318 *(uint32_t*)(p + 0*stride)=
319 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
320 }else if(w==4 && h==4){
321 *(uint32_t*)(p + 0*stride)=
322 *(uint32_t*)(p + 1*stride)=
323 *(uint32_t*)(p + 2*stride)=
324 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
325 }else if(w==8 && h==1){
326 *(uint32_t*)(p + 0)=
327 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
328 }else if(w==8 && h==2){
329 *(uint32_t*)(p + 0 + 0*stride)=
330 *(uint32_t*)(p + 4 + 0*stride)=
331 *(uint32_t*)(p + 0 + 1*stride)=
332 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
333 }else if(w==8 && h==4){
334 *(uint64_t*)(p + 0*stride)=
335 *(uint64_t*)(p + 1*stride)=
336 *(uint64_t*)(p + 2*stride)=
337 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
338 }else if(w==16 && h==2){
339 *(uint64_t*)(p + 0+0*stride)=
340 *(uint64_t*)(p + 8+0*stride)=
341 *(uint64_t*)(p + 0+1*stride)=
342 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
343 }else if(w==16 && h==4){
344 *(uint64_t*)(p + 0+0*stride)=
345 *(uint64_t*)(p + 8+0*stride)=
346 *(uint64_t*)(p + 0+1*stride)=
347 *(uint64_t*)(p + 8+1*stride)=
348 *(uint64_t*)(p + 0+2*stride)=
349 *(uint64_t*)(p + 8+2*stride)=
350 *(uint64_t*)(p + 0+3*stride)=
351 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
352 }else
353 assert(0);
354 }
355
356 static inline void fill_caches(H264Context *h, int mb_type){
357 MpegEncContext * const s = &h->s;
358 const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
359 int topleft_xy, top_xy, topright_xy, left_xy[2];
360 int topleft_type, top_type, topright_type, left_type[2];
361 int left_block[4];
362 int i;
363
364 //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it
365
366 if(h->sps.mb_aff){
367 //FIXME
368 }else{
369 topleft_xy = mb_xy-1 - h->mb_stride;
370 top_xy = mb_xy - h->mb_stride;
371 topright_xy= mb_xy+1 - h->mb_stride;
372 left_xy[0] = mb_xy-1;
373 left_xy[1] = mb_xy-1;
374 left_block[0]= 0;
375 left_block[1]= 1;
376 left_block[2]= 2;
377 left_block[3]= 3;
378 }
379
380 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
381 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
382 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
383 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
384 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
385
386 if(IS_INTRA(mb_type)){
387 h->topleft_samples_available=
388 h->top_samples_available=
389 h->left_samples_available= 0xFFFF;
390 h->topright_samples_available= 0xEEEA;
391
392 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
393 h->topleft_samples_available= 0xB3FF;
394 h->top_samples_available= 0x33FF;
395 h->topright_samples_available= 0x26EA;
396 }
397 for(i=0; i<2; i++){
398 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
399 h->topleft_samples_available&= 0xDF5F;
400 h->left_samples_available&= 0x5F5F;
401 }
402 }
403
404 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
405 h->topleft_samples_available&= 0x7FFF;
406
407 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
408 h->topright_samples_available&= 0xFBFF;
409
410 if(IS_INTRA4x4(mb_type)){
411 if(IS_INTRA4x4(top_type)){
412 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
413 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
414 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
415 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
416 }else{
417 int pred;
418 if(IS_INTRA16x16(top_type) || (IS_INTER(top_type) && !h->pps.constrained_intra_pred))
419 pred= 2;
420 else{
421 pred= -1;
422 }
423 h->intra4x4_pred_mode_cache[4+8*0]=
424 h->intra4x4_pred_mode_cache[5+8*0]=
425 h->intra4x4_pred_mode_cache[6+8*0]=
426 h->intra4x4_pred_mode_cache[7+8*0]= pred;
427 }
428 for(i=0; i<2; i++){
429 if(IS_INTRA4x4(left_type[i])){
430 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
431 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
432 }else{
433 int pred;
434 if(IS_INTRA16x16(left_type[i]) || (IS_INTER(left_type[i]) && !h->pps.constrained_intra_pred))
435 pred= 2;
436 else{
437 pred= -1;
438 }
439 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
440 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
441 }
442 }
443 }
444 }
445
446
447 /*
448 0 . T T. T T T T
449 1 L . .L . . . .
450 2 L . .L . . . .
451 3 . T TL . . . .
452 4 L . .L . . . .
453 5 L . .. . . . .
454 */
455 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
456 if(top_type){
457 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][0];
458 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][1];
459 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][2];
460 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
461
462 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][7];
463 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
464
465 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][10];
466 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
467 }else{
468 h->non_zero_count_cache[4+8*0]=
469 h->non_zero_count_cache[5+8*0]=
470 h->non_zero_count_cache[6+8*0]=
471 h->non_zero_count_cache[7+8*0]=
472
473 h->non_zero_count_cache[1+8*0]=
474 h->non_zero_count_cache[2+8*0]=
475
476 h->non_zero_count_cache[1+8*3]=
477 h->non_zero_count_cache[2+8*3]= 64;
478 }
479
480 if(left_type[0]){
481 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][6];
482 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][5];
483 h->non_zero_count_cache[0+8*1]= h->non_zero_count[left_xy[0]][9]; //FIXME left_block
484 h->non_zero_count_cache[0+8*4]= h->non_zero_count[left_xy[0]][12];
485 }else{
486 h->non_zero_count_cache[3+8*1]=
487 h->non_zero_count_cache[3+8*2]=
488 h->non_zero_count_cache[0+8*1]=
489 h->non_zero_count_cache[0+8*4]= 64;
490 }
491
492 if(left_type[1]){
493 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[1]][4];
494 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[1]][3];
495 h->non_zero_count_cache[0+8*2]= h->non_zero_count[left_xy[1]][8];
496 h->non_zero_count_cache[0+8*5]= h->non_zero_count[left_xy[1]][11];
497 }else{
498 h->non_zero_count_cache[3+8*3]=
499 h->non_zero_count_cache[3+8*4]=
500 h->non_zero_count_cache[0+8*2]=
501 h->non_zero_count_cache[0+8*5]= 64;
502 }
503
504 #if 1
505 if(IS_INTER(mb_type)){
506 int list;
507 for(list=0; list<2; list++){
508 if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
509 /*if(!h->mv_cache_clean[list]){
510 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
511 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
512 h->mv_cache_clean[list]= 1;
513 }*/
514 continue; //FIXME direct mode ...
515 }
516 h->mv_cache_clean[list]= 0;
517
518 if(IS_INTER(topleft_type)){
519 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
520 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
521 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
522 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
523 }else{
524 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
525 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
526 }
527
528 if(IS_INTER(top_type)){
529 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
530 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
531 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
532 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
533 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
534 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
535 h->ref_cache[list][scan8[0] + 0 - 1*8]=
536 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
537 h->ref_cache[list][scan8[0] + 2 - 1*8]=
538 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
539 }else{
540 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
541 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
542 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
543 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
544 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
545 }
546
547 if(IS_INTER(topright_type)){
548 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
549 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
550 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
551 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
552 }else{
553 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
554 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
555 }
556
557 //FIXME unify cleanup or sth
558 if(IS_INTER(left_type[0])){
559 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
560 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
561 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
562 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
563 h->ref_cache[list][scan8[0] - 1 + 0*8]=
564 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
565 }else{
566 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
567 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
568 h->ref_cache[list][scan8[0] - 1 + 0*8]=
569 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
570 }
571
572 if(IS_INTER(left_type[1])){
573 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
574 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
575 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
576 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
577 h->ref_cache[list][scan8[0] - 1 + 2*8]=
578 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
579 }else{
580 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
581 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
582 h->ref_cache[list][scan8[0] - 1 + 2*8]=
583 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
584 }
585
586 h->ref_cache[list][scan8[5 ]+1] =
587 h->ref_cache[list][scan8[7 ]+1] =
588 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewher else)
589 h->ref_cache[list][scan8[4 ]] =
590 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
591 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
592 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
593 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewher else)
594 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
595 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
596 }
597 //FIXME
598
599 }
600 #endif
601 }
602
603 static inline void write_back_intra_pred_mode(H264Context *h){
604 MpegEncContext * const s = &h->s;
605 const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
606
607 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
608 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
609 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
610 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
611 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
612 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
613 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
614 }
615
616 /**
617 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
618 */
619 static inline int check_intra4x4_pred_mode(H264Context *h){
620 MpegEncContext * const s = &h->s;
621 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
622 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
623 int i;
624
625 if(!(h->top_samples_available&0x8000)){
626 for(i=0; i<4; i++){
627 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
628 if(status<0){
629 fprintf(stderr, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
630 return -1;
631 } else if(status){
632 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
633 }
634 }
635 }
636
637 if(!(h->left_samples_available&0x8000)){
638 for(i=0; i<4; i++){
639 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
640 if(status<0){
641 fprintf(stderr, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
642 return -1;
643 } else if(status){
644 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
645 }
646 }
647 }
648
649 return 0;
650 } //FIXME cleanup like next
651
652 /**
653 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
654 */
655 static inline int check_intra_pred_mode(H264Context *h, int mode){
656 MpegEncContext * const s = &h->s;
657 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
658 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
659
660 if(!(h->top_samples_available&0x8000)){
661 mode= top[ mode ];
662 if(mode<0){
663 fprintf(stderr, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
664 return -1;
665 }
666 }
667
668 if(!(h->left_samples_available&0x8000)){
669 mode= left[ mode ];
670 if(mode<0){
671 fprintf(stderr, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
672 return -1;
673 }
674 }
675
676 return mode;
677 }
678
679 /**
680 * gets the predicted intra4x4 prediction mode.
681 */
682 static inline int pred_intra_mode(H264Context *h, int n){
683 const int index8= scan8[n];
684 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
685 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
686 const int min= FFMIN(left, top);
687
688 #ifdef TRACE
689 printf("mode:%d %d min:%d\n", left ,top, min);
690 #endif
691
692 if(min<0) return DC_PRED;
693 else return min;
694 }
695
696 static inline void write_back_non_zero_count(H264Context *h){
697 MpegEncContext * const s = &h->s;
698 const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
699
700 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[4+8*4];
701 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[5+8*4];
702 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[6+8*4];
703 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
704 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[7+8*3];
705 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[7+8*2];
706 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[7+8*1];
707
708 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[1+8*2];
709 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
710 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[2+8*1];
711
712 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[1+8*5];
713 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
714 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[2+8*4];
715 }
716
717 /**
718 * gets the predicted number of non zero coefficients.
719 * @param n block index
720 */
721 static inline int pred_non_zero_count(H264Context *h, int n){
722 const int index8= scan8[n];
723 const int left= h->non_zero_count_cache[index8 - 1];
724 const int top = h->non_zero_count_cache[index8 - 8];
725 int i= left + top;
726
727 if(i<64) i= (i+1)>>1;
728
729 #ifdef TRACE
730 printf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
731 #endif
732
733 return i&31;
734 }
735
736 /**
737 * gets the predicted MV.
738 * @param n the block index
739 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
740 * @param mx the x component of the predicted motion vector
741 * @param my the y component of the predicted motion vector
742 */
743 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
744 MpegEncContext * const s = &h->s;
745 const int index8= scan8[n];
746 const int top_ref= h->ref_cache[list][ index8 - 8 ];
747 const int topright_ref= h->ref_cache[list][ index8 - 8 + part_width ];
748 const int left_ref= h->ref_cache[list][ index8 - 1 ];
749 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
750 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
751 const int16_t * const C= h->mv_cache[list][ index8 - 8 + part_width ];
752
753 assert(part_width==1 || part_width==2 || part_width==4);
754
755 /* mv_cache
756 B . . A T T T T
757 U . . L . . , .
758 U . . L . . . .
759 U . . L . . , .
760 . . . L . . . .
761 */
762 if(topright_ref != PART_NOT_AVAILABLE){
763 if((topright_ref==ref) + (top_ref==ref) + (left_ref==ref) == 1){
764 *mx= A[0] + B[0] + C[0];
765 *my= A[1] + B[1] + C[1];
766 }else{
767 *mx= mid_pred(A[0], B[0], C[0]);
768 *my= mid_pred(A[1], B[1], C[1]);
769 }
770 }else{
771 const int topleft_ref= h->ref_cache[list][ index8 - 9 ];
772 const int16_t * const D= h->mv_cache[list][ index8 - 9 ];
773 if(top_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){ //FIXME check rare FMO case where std isnt clear
774 *mx= A[0];
775 *my= A[1];
776 }else{
777 if((topleft_ref==ref) + (top_ref==ref) + (left_ref==ref) == 1){
778 *mx= A[0] + B[0] + D[0];
779 *my= A[1] + B[1] + D[1];
780 }else{
781 *mx= mid_pred(A[0], B[0], D[0]);
782 *my= mid_pred(A[1], B[1], D[1]);
783 }
784 }
785
786 #ifdef TRACE
787 printf("topleft: %2d %2d %2d ", topleft_ref, D[0], D[1]);
788 #endif
789 }
790 #ifdef TRACE
791 printf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], topright_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, s->mb_x, s->mb_y, n, list);
792 #endif
793 }
794
795 /**
796 * gets the directionally predicted 16x8 MV.
797 * @param n the block index
798 * @param mx the x component of the predicted motion vector
799 * @param my the y component of the predicted motion vector
800 */
801 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
802 MpegEncContext * const s = &h->s;
803 if(n==0){
804 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
805 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
806
807 #ifdef TRACE
808 printf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", top_ref, B[0], B[1], s->mb_x, s->mb_y, n, list);
809 #endif
810
811 if(top_ref == ref){
812 *mx= B[0];
813 *my= B[1];
814 return;
815 }
816 }else{
817 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
818 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
819
820 #ifdef TRACE
821 printf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], s->mb_x, s->mb_y, n, list);
822 #endif
823
824 if(left_ref == ref){
825 *mx= A[0];
826 *my= A[1];
827 return;
828 }
829 }
830
831 //RARE
832 pred_motion(h, n, 4, list, ref, mx, my);
833 }
834
835 /**
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
840 */
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 MpegEncContext * const s = &h->s;
843 if(n==0){
844 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
845 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846
847 #ifdef TRACE
848 printf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], s->mb_x, s->mb_y, n, list);
849 #endif
850
851 if(left_ref == ref){
852 *mx= A[0];
853 *my= A[1];
854 return;
855 }
856 }else{
857 const int topright_ref= h->ref_cache[list][ scan8[4] - 8 + 2 ];
858 const int16_t * const C= h->mv_cache[list][ scan8[4] - 8 + 2 ];
859
860 #ifdef TRACE
861 printf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", topright_ref, C[0], C[1], s->mb_x, s->mb_y, n, list);
862 #endif
863
864 if(topright_ref == ref){
865 *mx= C[0];
866 *my= C[1];
867 return;
868 }
869
870 if(topright_ref == PART_NOT_AVAILABLE){ //insanity ...
871 const int topleft_ref= h->ref_cache[list][ scan8[4] - 9 ];
872 const int16_t * const D= h->mv_cache[list][ scan8[4] - 9 ];
873
874 #ifdef TRACE
875 printf("pred_8x16: insanity (%2d %2d %2d) at %2d %2d %d list %d", topleft_ref, D[0], D[1], s->mb_x, s->mb_y, n, list);
876 #endif
877 if(topleft_ref == ref){
878 *mx= D[0];
879 *my= D[1];
880 return;
881 }
882 }
883 }
884
885 //RARE
886 pred_motion(h, n, 2, list, ref, mx, my);
887 }
888
889 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
890 MpegEncContext * const s = &h->s;
891 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
892 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
893
894 #ifdef TRACE
895 printf("pred_pskip: (%d) (%d) at %2d %2d", top_ref, left_ref, s->mb_x, s->mb_y);
896 #endif
897
898 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
899 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
900 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
901
902 *mx = *my = 0;
903 return;
904 }
905
906 pred_motion(h, 0, 4, 0, 0, mx, my);
907
908 return;
909 }
910
911 static inline void write_back_motion(H264Context *h, int mb_type){
912 MpegEncContext * const s = &h->s;
913 const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
914 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
915 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
916 int list;
917
918 for(list=0; list<2; list++){
919 int y;
920 if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
921 if(1){ //FIXME skip or never read if mb_type doesnt use it
922 for(y=0; y<4; y++){
923 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
924 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
925 }
926 for(y=0; y<2; y++){
927 *(uint16_t*)s->current_picture.motion_val[list][b8_xy + y*h->b8_stride]= (LIST_NOT_USED&0xFF)*0x0101;
928 }
929 }
930 continue; //FIXME direct mode ...
931 }
932
933 for(y=0; y<4; y++){
934 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
935 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
936 }
937 for(y=0; y<2; y++){
938 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
939 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
940 }
941 }
942 }
943
944 /**
945 * Decodes a network abstraction layer unit.
946 * @param consumed is the number of bytes used as input
947 * @param length is the length of the array
948 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp ttailing?
949 * @returns decoded bytes, might be src+1 if no escapes
950 */
951 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
952 int i, si, di;
953 uint8_t *dst;
954
955 // src[0]&0x80; //forbidden bit
956 h->nal_ref_idc= src[0]>>5;
957 h->nal_unit_type= src[0]&0x1F;
958
959 src++; length--;
960 #if 0
961 for(i=0; i<length; i++)
962 printf("%2X ", src[i]);
963 #endif
964 for(i=0; i+1<length; i+=2){
965 if(src[i]) continue;
966 if(i>0 && src[i-1]==0) i--;
967 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
968 if(src[i+2]!=3){
969 /* startcode, so we must be past the end */
970 length=i;
971 }
972 break;
973 }
974 }
975
976 if(i>=length-1){ //no escaped 0
977 *dst_length= length;
978 *consumed= length+1; //+1 for the header
979 return src;
980 }
981
982 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
983 dst= h->rbsp_buffer;
984
985 //printf("deoding esc\n");
986 si=di=0;
987 while(si<length){
988 //remove escapes (very rare 1:2^22)
989 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
990 if(src[si+2]==3){ //escape
991 dst[di++]= 0;
992 dst[di++]= 0;
993 si+=3;
994 }else //next start code
995 break;
996 }
997
998 dst[di++]= src[si++];
999 }
1000
1001 *dst_length= di;
1002 *consumed= si + 1;//+1 for the header
1003 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1004 return dst;
1005 }
1006
1007 /**
1008 * @param src the data which should be escaped
1009 * @param dst the target buffer, dst+1 == src is allowed as a special case
1010 * @param length the length of the src data
1011 * @param dst_length the length of the dst array
1012 * @returns length of escaped data in bytes or -1 if an error occured
1013 */
1014 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1015 int i, escape_count, si, di;
1016 uint8_t *temp;
1017
1018 assert(length>=0);
1019 assert(dst_length>0);
1020
1021 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1022
1023 if(length==0) return 1;
1024
1025 escape_count= 0;
1026 for(i=0; i<length; i+=2){
1027 if(src[i]) continue;
1028 if(i>0 && src[i-1]==0)
1029 i--;
1030 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1031 escape_count++;
1032 i+=2;
1033 }
1034 }
1035
1036 if(escape_count==0){
1037 if(dst+1 != src)
1038 memcpy(dst+1, src, length);
1039 return length + 1;
1040 }
1041
1042 if(length + escape_count + 1> dst_length)
1043 return -1;
1044
1045 //this should be damn rare (hopefully)
1046
1047 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1048 temp= h->rbsp_buffer;
1049 //printf("encoding esc\n");
1050
1051 si= 0;
1052 di= 0;
1053 while(si < length){
1054 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1055 temp[di++]= 0; si++;
1056 temp[di++]= 0; si++;
1057 temp[di++]= 3;
1058 temp[di++]= src[si++];
1059 }
1060 else
1061 temp[di++]= src[si++];
1062 }
1063 memcpy(dst+1, temp, length+escape_count);
1064
1065 assert(di == length+escape_count);
1066
1067 return di + 1;
1068 }
1069
1070 /**
1071 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1072 */
1073 static void encode_rbsp_trailing(PutBitContext *pb){
1074 int length;
1075 put_bits(pb, 1, 1);
1076 length= (-get_bit_count(pb))&7;
1077 if(length) put_bits(pb, length, 0);
1078 }
1079
1080 /**
1081 * identifies the exact end of the bitstream
1082 * @return the length of the trailing, or 0 if damaged
1083 */
1084 static int decode_rbsp_trailing(uint8_t *src){
1085 int v= *src;
1086 int r;
1087
1088 #ifdef TRACE
1089 printf("rbsp trailing %X\n", v);
1090 #endif
1091
1092 for(r=1; r<9; r++){
1093 if(v&1) return r;
1094 v>>=1;
1095 }
1096 return 0;
1097 }
1098
1099 /**
1100 * idct tranforms the 16 dc values and dequantize them.
1101 * @param qp quantization parameter
1102 */
1103 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
1104 const int qmul= dequant_coeff[qp][0];
1105 #define stride 16
1106 int i;
1107 int temp[16]; //FIXME check if this is a good idea
1108 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1109 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1110
1111 //memset(block, 64, 2*256);
1112 //return;
1113 for(i=0; i<4; i++){
1114 const int offset= y_offset[i];
1115 const int z0= block[offset+stride*0] + block[offset+stride*4];
1116 const int z1= block[offset+stride*0] - block[offset+stride*4];
1117 const int z2= block[offset+stride*1] - block[offset+stride*5];
1118 const int z3= block[offset+stride*1] + block[offset+stride*5];
1119
1120 temp[4*i+0]= z0+z3;
1121 temp[4*i+1]= z1+z2;
1122 temp[4*i+2]= z1-z2;
1123 temp[4*i+3]= z0-z3;
1124 }
1125
1126 for(i=0; i<4; i++){
1127 const int offset= x_offset[i];
1128 const int z0= temp[4*0+i] + temp[4*2+i];
1129 const int z1= temp[4*0+i] - temp[4*2+i];
1130 const int z2= temp[4*1+i] - temp[4*3+i];
1131 const int z3= temp[4*1+i] + temp[4*3+i];
1132
1133 block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
1134 block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
1135 block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
1136 block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
1137 }
1138 }
1139
1140 /**
1141 * dct tranforms the 16 dc values.
1142 * @param qp quantization parameter ??? FIXME
1143 */
1144 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1145 // const int qmul= dequant_coeff[qp][0];
1146 int i;
1147 int temp[16]; //FIXME check if this is a good idea
1148 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1149 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1150
1151 for(i=0; i<4; i++){
1152 const int offset= y_offset[i];
1153 const int z0= block[offset+stride*0] + block[offset+stride*4];
1154 const int z1= block[offset+stride*0] - block[offset+stride*4];
1155 const int z2= block[offset+stride*1] - block[offset+stride*5];
1156 const int z3= block[offset+stride*1] + block[offset+stride*5];
1157
1158 temp[4*i+0]= z0+z3;
1159 temp[4*i+1]= z1+z2;
1160 temp[4*i+2]= z1-z2;
1161 temp[4*i+3]= z0-z3;
1162 }
1163
1164 for(i=0; i<4; i++){
1165 const int offset= x_offset[i];
1166 const int z0= temp[4*0+i] + temp[4*2+i];
1167 const int z1= temp[4*0+i] - temp[4*2+i];
1168 const int z2= temp[4*1+i] - temp[4*3+i];
1169 const int z3= temp[4*1+i] + temp[4*3+i];
1170
1171 block[stride*0 +offset]= (z0 + z3)>>1;
1172 block[stride*2 +offset]= (z1 + z2)>>1;
1173 block[stride*8 +offset]= (z1 - z2)>>1;
1174 block[stride*10+offset]= (z0 - z3)>>1;
1175 }
1176 }
1177 #undef xStride
1178 #undef stride
1179
1180 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
1181 const int qmul= dequant_coeff[qp][0];
1182 const int stride= 16*2;
1183 const int xStride= 16;
1184 int a,b,c,d,e;
1185
1186 a= block[stride*0 + xStride*0];
1187 b= block[stride*0 + xStride*1];
1188 c= block[stride*1 + xStride*0];
1189 d= block[stride*1 + xStride*1];
1190
1191 e= a-b;
1192 a= a+b;
1193 b= c-d;
1194 c= c+d;
1195
1196 block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
1197 block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
1198 block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
1199 block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
1200 }
1201
1202 static void chroma_dc_dct_c(DCTELEM *block){
1203 const int stride= 16*2;
1204 const int xStride= 16;
1205 int a,b,c,d,e;
1206
1207 a= block[stride*0 + xStride*0];
1208 b= block[stride*0 + xStride*1];
1209 c= block[stride*1 + xStride*0];
1210 d= block[stride*1 + xStride*1];
1211
1212 e= a-b;
1213 a= a+b;
1214 b= c-d;
1215 c= c+d;
1216
1217 block[stride*0 + xStride*0]= (a+c);
1218 block[stride*0 + xStride*1]= (e+b);
1219 block[stride*1 + xStride*0]= (a-c);
1220 block[stride*1 + xStride*1]= (e-b);
1221 }
1222
1223 /**
1224 * gets the chroma qp.
1225 */
1226 static inline int get_chroma_qp(H264Context *h, int qscale){
1227
1228 return chroma_qp[clip(qscale + h->pps.chroma_qp_index_offset, 0, 51)];
1229 }
1230
1231
1232 /**
1233 *
1234 */
1235 static void h264_add_idct_c(uint8_t *dst, DCTELEM *block, int stride){
1236 int i;
1237 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1238
1239 block[0] += 32;
1240 #if 1
1241 for(i=0; i<4; i++){
1242 const int z0= block[i + 4*0] + block[i + 4*2];
1243 const int z1= block[i + 4*0] - block[i + 4*2];
1244 const int z2= (block[i + 4*1]>>1) - block[i + 4*3];
1245 const int z3= block[i + 4*1] + (block[i + 4*3]>>1);
1246
1247 block[i + 4*0]= z0 + z3;
1248 block[i + 4*1]= z1 + z2;
1249 block[i + 4*2]= z1 - z2;
1250 block[i + 4*3]= z0 - z3;
1251 }
1252
1253 for(i=0; i<4; i++){
1254 const int z0= block[0 + 4*i] + block[2 + 4*i];
1255 const int z1= block[0 + 4*i] - block[2 + 4*i];
1256 const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i];
1257 const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1);
1258
1259 dst[0 + i*stride]= cm[ dst[0 + i*stride] + ((z0 + z3) >> 6) ];
1260 dst[1 + i*stride]= cm[ dst[1 + i*stride] + ((z1 + z2) >> 6) ];
1261 dst[2 + i*stride]= cm[ dst[2 + i*stride] + ((z1 - z2) >> 6) ];
1262 dst[3 + i*stride]= cm[ dst[3 + i*stride] + ((z0 - z3) >> 6) ];
1263 }
1264 #else
1265 for(i=0; i<4; i++){
1266 const int z0= block[0 + 4*i] + block[2 + 4*i];
1267 const int z1= block[0 + 4*i] - block[2 + 4*i];
1268 const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i];
1269 const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1);
1270
1271 block[0 + 4*i]= z0 + z3;
1272 block[1 + 4*i]= z1 + z2;
1273 block[2 + 4*i]= z1 - z2;
1274 block[3 + 4*i]= z0 - z3;
1275 }
1276
1277 for(i=0; i<4; i++){
1278 const int z0= block[i + 4*0] + block[i + 4*2];
1279 const int z1= block[i + 4*0] - block[i + 4*2];
1280 const int z2= (block[i + 4*1]>>1) - block[i + 4*3];
1281 const int z3= block[i + 4*1] + (block[i + 4*3]>>1);
1282
1283 dst[i + 0*stride]= cm[ dst[i + 0*stride] + ((z0 + z3) >> 6) ];
1284 dst[i + 1*stride]= cm[ dst[i + 1*stride] + ((z1 + z2) >> 6) ];
1285 dst[i + 2*stride]= cm[ dst[i + 2*stride] + ((z1 - z2) >> 6) ];
1286 dst[i + 3*stride]= cm[ dst[i + 3*stride] + ((z0 - z3) >> 6) ];
1287 }
1288 #endif
1289 }
1290
1291 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1292 int i;
1293 //FIXME try int temp instead of block
1294
1295 for(i=0; i<4; i++){
1296 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1297 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1298 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1299 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1300 const int z0= d0 + d3;
1301 const int z3= d0 - d3;
1302 const int z1= d1 + d2;
1303 const int z2= d1 - d2;
1304
1305 block[0 + 4*i]= z0 + z1;
1306 block[1 + 4*i]= 2*z3 + z2;
1307 block[2 + 4*i]= z0 - z1;
1308 block[3 + 4*i]= z3 - 2*z2;
1309 }
1310
1311 for(i=0; i<4; i++){
1312 const int z0= block[0*4 + i] + block[3*4 + i];
1313 const int z3= block[0*4 + i] - block[3*4 + i];
1314 const int z1= block[1*4 + i] + block[2*4 + i];
1315 const int z2= block[1*4 + i] - block[2*4 + i];
1316
1317 block[0*4 + i]= z0 + z1;
1318 block[1*4 + i]= 2*z3 + z2;
1319 block[2*4 + i]= z0 - z1;
1320 block[3*4 + i]= z3 - 2*z2;
1321 }
1322 }
1323
1324 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, iam not sure, its very close
1325 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1326 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1327 int i;
1328 const int * const quant_table= quant_coeff[qscale];
1329 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1330 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1331 const unsigned int threshold2= (threshold1<<1);
1332 int last_non_zero;
1333
1334 if(seperate_dc){
1335 if(qscale<=18){
1336 //avoid overflows
1337 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1338 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1339 const unsigned int dc_threshold2= (dc_threshold1<<1);
1340
1341 int level= block[0]*quant_coeff[qscale+18][0];
1342 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1343 if(level>0){
1344 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1345 block[0]= level;
1346 }else{
1347 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1348 block[0]= -level;
1349 }
1350 // last_non_zero = i;
1351 }else{
1352 block[0]=0;
1353 }
1354 }else{
1355 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1356 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1357 const unsigned int dc_threshold2= (dc_threshold1<<1);
1358
1359 int level= block[0]*quant_table[0];
1360 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1361 if(level>0){
1362 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1363 block[0]= level;
1364 }else{
1365 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1366 block[0]= -level;
1367 }
1368 // last_non_zero = i;
1369 }else{
1370 block[0]=0;
1371 }
1372 }
1373 last_non_zero= 0;
1374 i=1;
1375 }else{
1376 last_non_zero= -1;
1377 i=0;
1378 }
1379
1380 for(; i<16; i++){
1381 const int j= scantable[i];
1382 int level= block[j]*quant_table[j];
1383
1384 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1385 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1386 if(((unsigned)(level+threshold1))>threshold2){
1387 if(level>0){
1388 level= (bias + level)>>QUANT_SHIFT;
1389 block[j]= level;
1390 }else{
1391 level= (bias - level)>>QUANT_SHIFT;
1392 block[j]= -level;
1393 }
1394 last_non_zero = i;
1395 }else{
1396 block[j]=0;
1397 }
1398 }
1399
1400 return last_non_zero;
1401 }
1402
1403 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1404 const uint32_t a= ((uint32_t*)(src-stride))[0];
1405 ((uint32_t*)(src+0*stride))[0]= a;
1406 ((uint32_t*)(src+1*stride))[0]= a;
1407 ((uint32_t*)(src+2*stride))[0]= a;
1408 ((uint32_t*)(src+3*stride))[0]= a;
1409 }
1410
1411 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1412 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1413 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1414 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1415 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1416 }
1417
1418 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1419 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1420 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1421
1422 ((uint32_t*)(src+0*stride))[0]=
1423 ((uint32_t*)(src+1*stride))[0]=
1424 ((uint32_t*)(src+2*stride))[0]=
1425 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1426 }
1427
1428 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1429 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1430
1431 ((uint32_t*)(src+0*stride))[0]=
1432 ((uint32_t*)(src+1*stride))[0]=
1433 ((uint32_t*)(src+2*stride))[0]=
1434 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1435 }
1436
1437 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1438 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1439
1440 ((uint32_t*)(src+0*stride))[0]=
1441 ((uint32_t*)(src+1*stride))[0]=
1442 ((uint32_t*)(src+2*stride))[0]=
1443 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1444 }
1445
1446 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1447 ((uint32_t*)(src+0*stride))[0]=
1448 ((uint32_t*)(src+1*stride))[0]=
1449 ((uint32_t*)(src+2*stride))[0]=
1450 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1451 }
1452
1453
1454 #define LOAD_TOP_RIGHT_EDGE\
1455 const int t4= topright[0];\
1456 const int t5= topright[1];\
1457 const int t6= topright[2];\
1458 const int t7= topright[3];\
1459
1460 #define LOAD_LEFT_EDGE\
1461 const int l0= src[-1+0*stride];\
1462 const int l1= src[-1+1*stride];\
1463 const int l2= src[-1+2*stride];\
1464 const int l3= src[-1+3*stride];\
1465
1466 #define LOAD_TOP_EDGE\
1467 const int t0= src[ 0-1*stride];\
1468 const int t1= src[ 1-1*stride];\
1469 const int t2= src[ 2-1*stride];\
1470 const int t3= src[ 3-1*stride];\
1471
1472 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1473 const int lt= src[-1-1*stride];
1474 LOAD_TOP_EDGE
1475 LOAD_LEFT_EDGE
1476
1477 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1478 src[0+2*stride]=
1479 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1480 src[0+1*stride]=
1481 src[1+2*stride]=
1482 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1483 src[0+0*stride]=
1484 src[1+1*stride]=
1485 src[2+2*stride]=
1486 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1487 src[1+0*stride]=
1488 src[2+1*stride]=
1489 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1490 src[2+0*stride]=
1491 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1492 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1493 };
1494
1495 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1496 LOAD_TOP_EDGE
1497 LOAD_TOP_RIGHT_EDGE
1498 // LOAD_LEFT_EDGE
1499
1500 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1501 src[1+0*stride]=
1502 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1503 src[2+0*stride]=
1504 src[1+1*stride]=
1505 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1506 src[3+0*stride]=
1507 src[2+1*stride]=
1508 src[1+2*stride]=
1509 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1510 src[3+1*stride]=
1511 src[2+2*stride]=
1512 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1513 src[3+2*stride]=
1514 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1515 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1516 };
1517
1518 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1519 const int lt= src[-1-1*stride];
1520 LOAD_TOP_EDGE
1521 LOAD_LEFT_EDGE
1522 const __attribute__((unused)) int unu= l3;
1523
1524 src[0+0*stride]=
1525 src[1+2*stride]=(lt + t0 + 1)>>1;
1526 src[1+0*stride]=
1527 src[2+2*stride]=(t0 + t1 + 1)>>1;
1528 src[2+0*stride]=
1529 src[3+2*stride]=(t1 + t2 + 1)>>1;
1530 src[3+0*stride]=(t2 + t3 + 1)>>1;
1531 src[0+1*stride]=
1532 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1533 src[1+1*stride]=
1534 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1535 src[2+1*stride]=
1536 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1537 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1538 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1539 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1540 };
1541
1542 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1543 LOAD_TOP_EDGE
1544 LOAD_TOP_RIGHT_EDGE
1545 const __attribute__((unused)) int unu= t7;
1546
1547 src[0+0*stride]=(t0 + t1 + 1)>>1;
1548 src[1+0*stride]=
1549 src[0+2*stride]=(t1 + t2 + 1)>>1;
1550 src[2+0*stride]=
1551 src[1+2*stride]=(t2 + t3 + 1)>>1;
1552 src[3+0*stride]=
1553 src[2+2*stride]=(t3 + t4+ 1)>>1;
1554 src[3+2*stride]=(t4 + t5+ 1)>>1;
1555 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1556 src[1+1*stride]=
1557 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1558 src[2+1*stride]=
1559 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
1560 src[3+1*stride]=
1561 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
1562 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
1563 };
1564
1565 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
1566 LOAD_LEFT_EDGE
1567
1568 src[0+0*stride]=(l0 + l1 + 1)>>1;
1569 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1570 src[2+0*stride]=
1571 src[0+1*stride]=(l1 + l2 + 1)>>1;
1572 src[3+0*stride]=
1573 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1574 src[2+1*stride]=
1575 src[0+2*stride]=(l2 + l3 + 1)>>1;
1576 src[3+1*stride]=
1577 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
1578 src[3+2*stride]=
1579 src[1+3*stride]=
1580 src[0+3*stride]=
1581 src[2+2*stride]=
1582 src[2+3*stride]=
1583 src[3+3*stride]=l3;
1584 };
1585
1586 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
1587 const int lt= src[-1-1*stride];
1588 LOAD_TOP_EDGE
1589 LOAD_LEFT_EDGE
1590 const __attribute__((unused)) int unu= t3;
1591
1592 src[0+0*stride]=
1593 src[2+1*stride]=(lt + l0 + 1)>>1;
1594 src[1+0*stride]=
1595 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
1596 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
1597 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1598 src[0+1*stride]=
1599 src[2+2*stride]=(l0 + l1 + 1)>>1;
1600 src[1+1*stride]=
1601 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1602 src[0+2*stride]=
1603 src[2+3*stride]=(l1 + l2+ 1)>>1;
1604 src[1+2*stride]=
1605 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1606 src[0+3*stride]=(l2 + l3 + 1)>>1;
1607 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1608 };
1609
1610 static void pred16x16_vertical_c(uint8_t *src, int stride){
1611 int i;
1612 const uint32_t a= ((uint32_t*)(src-stride))[0];
1613 const uint32_t b= ((uint32_t*)(src-stride))[1];
1614 const uint32_t c= ((uint32_t*)(src-stride))[2];
1615 const uint32_t d= ((uint32_t*)(src-stride))[3];
1616
1617 for(i=0; i<16; i++){
1618 ((uint32_t*)(src+i*stride))[0]= a;
1619 ((uint32_t*)(src+i*stride))[1]= b;
1620 ((uint32_t*)(src+i*stride))[2]= c;
1621 ((uint32_t*)(src+i*stride))[3]= d;
1622 }
1623 }
1624
1625 static void pred16x16_horizontal_c(uint8_t *src, int stride){
1626 int i;
1627
1628 for(i=0; i<16; i++){
1629 ((uint32_t*)(src+i*stride))[0]=
1630 ((uint32_t*)(src+i*stride))[1]=
1631 ((uint32_t*)(src+i*stride))[2]=
1632 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
1633 }
1634 }
1635
1636 static void pred16x16_dc_c(uint8_t *src, int stride){
1637 int i, dc=0;
1638
1639 for(i=0;i<16; i++){
1640 dc+= src[-1+i*stride];
1641 }
1642
1643 for(i=0;i<16; i++){
1644 dc+= src[i-stride];
1645 }
1646
1647 dc= 0x01010101*((dc + 16)>>5);
1648
1649 for(i=0; i<16; i++){
1650 ((uint32_t*)(src+i*stride))[0]=
1651 ((uint32_t*)(src+i*stride))[1]=
1652 ((uint32_t*)(src+i*stride))[2]=
1653 ((uint32_t*)(src+i*stride))[3]= dc;
1654 }
1655 }
1656
1657 static void pred16x16_left_dc_c(uint8_t *src, int stride){
1658 int i, dc=0;
1659
1660 for(i=0;i<16; i++){
1661 dc+= src[-1+i*stride];
1662 }
1663
1664 dc= 0x01010101*((dc + 8)>>4);
1665
1666 for(i=0; i<16; i++){
1667 ((uint32_t*)(src+i*stride))[0]=
1668 ((uint32_t*)(src+i*stride))[1]=
1669 ((uint32_t*)(src+i*stride))[2]=
1670 ((uint32_t*)(src+i*stride))[3]= dc;
1671 }
1672 }
1673
1674 static void pred16x16_top_dc_c(uint8_t *src, int stride){
1675 int i, dc=0;
1676
1677 for(i=0;i<16; i++){
1678 dc+= src[i-stride];
1679 }
1680 dc= 0x01010101*((dc + 8)>>4);
1681
1682 for(i=0; i<16; i++){
1683 ((uint32_t*)(src+i*stride))[0]=
1684 ((uint32_t*)(src+i*stride))[1]=
1685 ((uint32_t*)(src+i*stride))[2]=
1686 ((uint32_t*)(src+i*stride))[3]= dc;
1687 }
1688 }
1689
1690 static void pred16x16_128_dc_c(uint8_t *src, int stride){
1691 int i;
1692
1693 for(i=0; i<16; i++){
1694 ((uint32_t*)(src+i*stride))[0]=
1695 ((uint32_t*)(src+i*stride))[1]=
1696 ((uint32_t*)(src+i*stride))[2]=
1697 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
1698 }
1699 }
1700
1701 static void pred16x16_plane_c(uint8_t *src, int stride){
1702 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1703 int i, dx, dy, dc;
1704 int temp[16];
1705
1706 dc= 16*(src[15-stride] + src[-1+15*stride]);
1707
1708 dx=dy=0;
1709 for(i=1; i<9; i++){
1710 dx += i*(src[7+i-stride] - src[7-i-stride]);
1711 dy += i*(src[-1+(7+i)*stride] - src[-1+(7-i)*stride]);
1712 }
1713 dx= (5*dx+32)>>6;
1714 dy= (5*dy+32)>>6;
1715
1716 dc += 16;
1717
1718 //FIXME modifiy dc,dx,dy to avoid -7
1719
1720 for(i=0; i<16; i++)
1721 temp[i]= dx*(i-7) + dc;
1722
1723 if( (dc - ABS(dx)*8 - ABS(dy)*8)>>5 < 0
1724 || (dc + ABS(dx)*8 + ABS(dy)*8)>>5 > 255){
1725
1726 for(i=0; i<16; i++){
1727 int j;
1728 for(j=0; j<16; j++)
1729 src[j + i*stride]= cm[ (temp[j] + dy*(i-7))>>5 ];
1730 }
1731 }else{
1732 for(i=0; i<16; i++){
1733 int j;
1734 for(j=0; j<16; j++)
1735 src[j + i*stride]= (temp[j] + dy*(i-7))>>5;
1736 }
1737 }
1738 }
1739
1740 static void pred8x8_vertical_c(uint8_t *src, int stride){
1741 int i;
1742 const uint32_t a= ((uint32_t*)(src-stride))[0];
1743 const uint32_t b= ((uint32_t*)(src-stride))[1];
1744
1745 for(i=0; i<8; i++){
1746 ((uint32_t*)(src+i*stride))[0]= a;
1747 ((uint32_t*)(src+i*stride))[1]= b;
1748 }
1749 }
1750
1751 static void pred8x8_horizontal_c(uint8_t *src, int stride){
1752 int i;
1753
1754 for(i=0; i<8; i++){
1755 ((uint32_t*)(src+i*stride))[0]=
1756 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
1757 }
1758 }
1759
1760 static void pred8x8_128_dc_c(uint8_t *src, int stride){
1761 int i;
1762
1763 for(i=0; i<4; i++){
1764 ((uint32_t*)(src+i*stride))[0]=
1765 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
1766 }
1767 for(i=4; i<8; i++){
1768 ((uint32_t*)(src+i*stride))[0]=
1769 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
1770 }
1771 }
1772
1773 static void pred8x8_left_dc_c(uint8_t *src, int stride){
1774 int i;
1775 int dc0, dc2;
1776
1777 dc0=dc2=0;
1778 for(i=0;i<4; i++){
1779 dc0+= src[-1+i*stride];
1780 dc2+= src[-1+(i+4)*stride];
1781 }
1782 dc0= 0x01010101*((dc0 + 2)>>2);
1783 dc2= 0x01010101*((dc2 + 2)>>2);
1784
1785 for(i=0; i<4; i++){
1786 ((uint32_t*)(src+i*stride))[0]=
1787 ((uint32_t*)(src+i*stride))[1]= dc0;
1788 }
1789 for(i=4; i<8; i++){
1790 ((uint32_t*)(src+i*stride))[0]=
1791 ((uint32_t*)(src+i*stride))[1]= dc2;
1792 }
1793 }
1794
1795 static void pred8x8_top_dc_c(uint8_t *src, int stride){
1796 int i;
1797 int dc0, dc1;
1798
1799 dc0=dc1=0;
1800 for(i=0;i<4; i++){
1801 dc0+= src[i-stride];
1802 dc1+= src[4+i-stride];
1803 }
1804 dc0= 0x01010101*((dc0 + 2)>>2);
1805 dc1= 0x01010101*((dc1 + 2)>>2);
1806
1807 for(i=0; i<4; i++){
1808 ((uint32_t*)(src+i*stride))[0]= dc0;
1809 ((uint32_t*)(src+i*stride))[1]= dc1;
1810 }
1811 for(i=4; i<8; i++){
1812 ((uint32_t*)(src+i*stride))[0]= dc0;
1813 ((uint32_t*)(src+i*stride))[1]= dc1;
1814 }
1815 }
1816
1817
1818 static void pred8x8_dc_c(uint8_t *src, int stride){
1819 int i;
1820 int dc0, dc1, dc2, dc3;
1821
1822 dc0=dc1=dc2=0;
1823 for(i=0;i<4; i++){
1824 dc0+= src[-1+i*stride] + src[i-stride];
1825 dc1+= src[4+i-stride];
1826 dc2+= src[-1+(i+4)*stride];
1827 }
1828 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
1829 dc0= 0x01010101*((dc0 + 4)>>3);
1830 dc1= 0x01010101*((dc1 + 2)>>2);
1831 dc2= 0x01010101*((dc2 + 2)>>2);
1832
1833 for(i=0; i<4; i++){
1834 ((uint32_t*)(src+i*stride))[0]= dc0;
1835 ((uint32_t*)(src+i*stride))[1]= dc1;
1836 }
1837 for(i=4; i<8; i++){
1838 ((uint32_t*)(src+i*stride))[0]= dc2;
1839 ((uint32_t*)(src+i*stride))[1]= dc3;
1840 }
1841 }
1842
1843 static void pred8x8_plane_c(uint8_t *src, int stride){
1844 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1845 int i, dx, dy, dc;
1846 int temp[8];
1847
1848 dc= 16*(src[7-stride] + src[-1+7*stride]);
1849
1850 dx=dy=0;
1851 for(i=1; i<5; i++){
1852 dx += i*(src[3+i-stride] - src[3-i-stride]);
1853 dy += i*(src[-1+(3+i)*stride] - src[-1+(3-i)*stride]);
1854 }
1855 dx= (17*dx+16)>>5;
1856 dy= (17*dy+16)>>5;
1857
1858 dc += 16;
1859
1860 //FIXME modifiy dc,dx,dy to avoid -3
1861
1862 for(i=0; i<8; i++)
1863 temp[i]= dx*(i-3) + dc;
1864
1865 if( (dc - ABS(dx)*4 - ABS(dy)*4)>>5 < 0
1866 || (dc + ABS(dx)*4 + ABS(dy)*4)>>5 > 255){
1867
1868 for(i=0; i<8; i++){
1869 int j;
1870 for(j=0; j<8; j++)
1871 src[j + i*stride]= cm[ (temp[j] + dy*(i-3))>>5 ];
1872 }
1873 }else{
1874 for(i=0; i<8; i++){
1875 int j;
1876 for(j=0; j<8; j++)
1877 src[j + i*stride]= (temp[j] + dy*(i-3))>>5;
1878 }
1879 }
1880 }
1881
1882 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1883 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1884 int src_x_offset, int src_y_offset,
1885 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1886 MpegEncContext * const s = &h->s;
1887 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1888 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1889 const int luma_xy= (mx&3) + ((my&3)<<2);
1890 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
1891 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
1892 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
1893 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
1894 int extra_height= extra_width;
1895 int emu=0;
1896 const int full_mx= mx>>2;
1897 const int full_my= my>>2;
1898
1899 assert(pic->data[0]);
1900
1901 if(mx&7) extra_width -= 3;
1902 if(my&7) extra_height -= 3;
1903
1904 if( full_mx < 0-extra_width
1905 || full_my < 0-extra_height
1906 || full_mx + 16/*FIXME*/ > s->width + extra_width
1907 || full_my + 16/*FIXME*/ > s->height + extra_height){
1908 ff_emulated_edge_mc(s, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, s->width, s->height);
1909 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
1910 emu=1;
1911 }
1912
1913 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
1914 if(!square){
1915 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
1916 }
1917
1918 if(s->flags&CODEC_FLAG_GRAY) return;
1919
1920 if(emu){
1921 ff_emulated_edge_mc(s, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
1922 src_cb= s->edge_emu_buffer;
1923 }
1924 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
1925
1926 if(emu){
1927 ff_emulated_edge_mc(s, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
1928 src_cr= s->edge_emu_buffer;
1929 }
1930 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
1931 }
1932
1933 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1934 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1935 int x_offset, int y_offset,
1936 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1937 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1938 int list0, int list1){
1939 MpegEncContext * const s = &h->s;
1940 qpel_mc_func *qpix_op= qpix_put;
1941 h264_chroma_mc_func chroma_op= chroma_put;
1942
1943 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
1944 dest_cb += x_offset + y_offset*s->uvlinesize;
1945 dest_cr += x_offset + y_offset*s->uvlinesize;
1946 x_offset += 8*s->mb_x;
1947 y_offset += 8*s->mb_y;
1948
1949 if(list0){
1950 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[0] ] ];
1951 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1952 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1953 qpix_op, chroma_op);
1954
1955 qpix_op= qpix_avg;
1956 chroma_op= chroma_avg;
1957 }
1958
1959 if(list1){
1960 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[0] ] ];
1961 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1962 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1963 qpix_op, chroma_op);
1964 }
1965 }
1966
1967 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1968 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1969 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg)){
1970 MpegEncContext * const s = &h->s;
1971 const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
1972 const int mb_type= s->current_picture.mb_type[mb_xy];
1973
1974 assert(IS_INTER(mb_type));
1975
1976 if(IS_16X16(mb_type)){
1977 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1978 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1979 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1980 }else if(IS_16X8(mb_type)){
1981 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1982 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1983 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1984 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1985 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1986 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1987 }else if(IS_8X16(mb_type)){
1988 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
1989 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1990 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1991 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
1992 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1993 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1994 }else{
1995 int i;
1996
1997 assert(IS_8X8(mb_type));
1998
1999 for(i=0; i<4; i++){
2000 const int sub_mb_type= h->sub_mb_type[i];
2001 const int n= 4*i;
2002 int x_offset= (i&1)<<2;
2003 int y_offset= (i&2)<<1;
2004
2005 if(IS_SUB_8X8(sub_mb_type)){
2006 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2007 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2008 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2009 }else if(IS_SUB_8X4(sub_mb_type)){
2010 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2011 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2012 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2013 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2014 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2015 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2016 }else if(IS_SUB_4X8(sub_mb_type)){
2017 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2018 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2019 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2020 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2021 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2022 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2023 }else{
2024 int j;
2025 assert(IS_SUB_4X4(sub_mb_type));
2026 for(j=0; j<4; j++){
2027 int sub_x_offset= x_offset + 2*(j&1);
2028 int sub_y_offset= y_offset + (j&2);
2029 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2030 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2031 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2032 }
2033 }
2034 }
2035 }
2036 }
2037
2038 static void decode_init_vlc(H264Context *h){
2039 static int done = 0;
2040
2041 if (!done) {
2042 int i;
2043 done = 1;
2044
2045 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2046 &chroma_dc_coeff_token_len [0], 1, 1,
2047 &chroma_dc_coeff_token_bits[0], 1, 1);
2048
2049 for(i=0; i<4; i++){
2050 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2051 &coeff_token_len [i][0], 1, 1,
2052 &coeff_token_bits[i][0], 1, 1);
2053 }
2054
2055 for(i=0; i<3; i++){
2056 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2057 &chroma_dc_total_zeros_len [i][0], 1, 1,
2058 &chroma_dc_total_zeros_bits[i][0], 1, 1);
2059 }
2060 for(i=0; i<15; i++){
2061 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2062 &total_zeros_len [i][0], 1, 1,
2063 &total_zeros_bits[i][0], 1, 1);
2064 }
2065
2066 for(i=0; i<6; i++){
2067 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2068 &run_len [i][0], 1, 1,
2069 &run_bits[i][0], 1, 1);
2070 }
2071 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2072 &run_len [6][0], 1, 1,
2073 &run_bits[6][0], 1, 1);
2074 }
2075 }
2076
2077 /**
2078 * Sets the intra prediction function pointers.
2079 */
2080 static void init_pred_ptrs(H264Context *h){
2081 // MpegEncContext * const s = &h->s;
2082
2083 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2084 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2085 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2086 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2087 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2088 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2089 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2090 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2091 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2092 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2093 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2094 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2095
2096 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2097 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2098 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2099 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2100 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2101 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2102 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2103
2104 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2105 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2106 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2107 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2108 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2109 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2110 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2111 }
2112
2113 //FIXME factorize
2114 #define CHECKED_ALLOCZ(p, size)\
2115 {\
2116 p= av_mallocz(size);\
2117 if(p==NULL){\
2118 perror("malloc");\
2119 goto fail;\
2120 }\
2121 }
2122
2123 static void free_tables(H264Context *h){
2124 MpegEncContext * const s = &h->s;
2125
2126 av_freep(&h->intra4x4_pred_mode);
2127 av_freep(&h->non_zero_count);
2128 av_freep(&h->slice_table_base);
2129 h->slice_table= NULL;
2130
2131 av_freep(&h->mb2b_xy);
2132 av_freep(&h->mb2b8_xy);
2133 }
2134
2135 /**
2136 * allocates tables.
2137 * needs widzh/height
2138 */
2139 static int alloc_tables(H264Context *h){
2140 MpegEncContext * const s = &h->s;
2141 const int big_mb_num= h->mb_stride * (s->mb_height+1);
2142 int x,y;
2143
2144 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2145 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2146 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
2147
2148 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
2149 h->slice_table= h->slice_table_base + h->mb_stride + 1;
2150
2151 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint16_t));
2152 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint16_t));
2153 for(y=0; y<s->mb_height; y++){
2154 for(x=0; x<s->mb_width; x++){
2155 const int mb_xy= x + y*h->mb_stride;
2156 const int b_xy = 4*x + 4*y*h->b_stride;
2157 const int b8_xy= 2*x + 2*y*h->b8_stride;
2158
2159 h->mb2b_xy [mb_xy]= b_xy;
2160 h->mb2b8_xy[mb_xy]= b8_xy;
2161 }
2162 }
2163
2164 return 0;
2165 fail:
2166 free_tables(h);
2167 return -1;
2168 }
2169
2170 static void common_init(H264Context *h){
2171 MpegEncContext * const s = &h->s;
2172 int i;
2173
2174 s->width = s->avctx->width;
2175 s->height = s->avctx->height;
2176 s->codec_id= s->avctx->codec->id;
2177
2178 init_pred_ptrs(h);
2179
2180 s->decode=1; //FIXME
2181 }
2182
2183 static int decode_init(AVCodecContext *avctx){
2184 H264Context *h= avctx->priv_data;
2185 MpegEncContext * const s = &h->s;
2186
2187 s->avctx = avctx;
2188 common_init(h);
2189
2190 s->out_format = FMT_H264;
2191 s->workaround_bugs= avctx->workaround_bugs;
2192
2193 // set defaults
2194 s->progressive_sequence=1;
2195 // s->decode_mb= ff_h263_decode_mb;
2196 s->low_delay= 1;
2197 avctx->pix_fmt= PIX_FMT_YUV420P;
2198
2199 decode_init_vlc(h);
2200
2201 return 0;
2202 }
2203
2204 static void frame_start(H264Context *h){
2205 MpegEncContext * const s = &h->s;
2206 int i;
2207
2208 MPV_frame_start(s, s->avctx);
2209 ff_er_frame_start(s);
2210 h->mmco_index=0;
2211
2212 assert(s->linesize && s->uvlinesize);
2213
2214 for(i=0; i<16; i++){
2215 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2216 h->chroma_subblock_offset[i]= 2*((scan8[i] - scan8[0])&7) + 2*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2217 }
2218 for(i=0; i<4; i++){
2219 h->block_offset[16+i]=
2220 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2221 }
2222
2223 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2224 }
2225
2226 static void hl_decode_mb(H264Context *h){
2227 MpegEncContext * const s = &h->s;
2228 const int mb_x= s->mb_x;
2229 const int mb_y= s->mb_y;
2230 const int mb_xy= mb_x + mb_y*h->mb_stride;
2231 const int mb_type= s->current_picture.mb_type[mb_xy];
2232 uint8_t *dest_y, *dest_cb, *dest_cr;
2233 int linesize, uvlinesize /*dct_offset*/;
2234 int i;
2235
2236 if(!s->decode)
2237 return;
2238
2239 if(s->mb_skiped){
2240 }
2241
2242 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2243 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2244 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2245
2246 if (h->mb_field_decoding_flag) {
2247 linesize = s->linesize * 2;
2248 uvlinesize = s->uvlinesize * 2;
2249 if(mb_y&1){ //FIXME move out of this func?
2250 dest_y -= s->linesize*15;
2251 dest_cb-= s->linesize*7;
2252 dest_cr-= s->linesize*7;
2253 }
2254 } else {
2255 linesize = s->linesize;
2256 uvlinesize = s->uvlinesize;
2257 // dct_offset = s->linesize * 16;
2258 }
2259
2260 if(IS_INTRA(mb_type)){
2261 if(!(s->flags&CODEC_FLAG_GRAY)){
2262 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2263 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2264 }
2265
2266 if(IS_INTRA4x4(mb_type)){
2267 if(!s->encoding){
2268 for(i=0; i<16; i++){
2269 uint8_t * const ptr= dest_y + h->block_offset[i];
2270 uint8_t *topright= ptr + 4 - linesize;
2271 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2272 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2273 int tr;
2274
2275 if(!topright_avail){
2276 tr= ptr[3 - linesize]*0x01010101;
2277 topright= (uint8_t*) &tr;
2278 }
2279
2280 h->pred4x4[ dir ](ptr, topright, linesize);
2281 if(h->non_zero_count_cache[ scan8[i] ])
2282 h264_add_idct_c(ptr, h->mb + i*16, linesize);
2283 }
2284 }
2285 }else{
2286 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2287 h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
2288 }
2289 }else{
2290 hl_motion(h, dest_y, dest_cb, dest_cr,
2291 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
2292 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab);
2293 }
2294
2295
2296 if(!IS_INTRA4x4(mb_type)){
2297 for(i=0; i<16; i++){
2298 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2299 uint8_t * const ptr= dest_y + h->block_offset[i];
2300 h264_add_idct_c(ptr, h->mb + i*16, linesize);
2301 }
2302 }
2303 }
2304
2305 if(!(s->flags&CODEC_FLAG_GRAY)){
2306 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
2307 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
2308 for(i=16; i<16+4; i++){
2309 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2310 uint8_t * const ptr= dest_cb + h->block_offset[i];
2311 h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
2312 }
2313 }
2314 for(i=20; i<20+4; i++){
2315 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2316 uint8_t * const ptr= dest_cr + h->block_offset[i];
2317 h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
2318 }
2319 }
2320 }
2321 }
2322
2323 static void decode_mb_cabac(H264Context *h){
2324 // MpegEncContext * const s = &h->s;
2325 }
2326
2327 /**
2328 * fills the default_ref_list.
2329 */
2330 static int fill_default_ref_list(H264Context *h){
2331 MpegEncContext * const s = &h->s;
2332 int i;
2333 Picture sorted_short_ref[16];
2334
2335 if(h->slice_type==B_TYPE){
2336 int out_i;
2337 int limit= -1;
2338
2339 for(out_i=0; out_i<h->short_ref_count; out_i++){
2340 int best_i=-1;
2341 int best_poc=-1;
2342
2343 for(i=0; i<h->short_ref_count; i++){
2344 const int poc= h->short_ref[i]->poc;
2345 if(poc > limit && poc < best_poc){
2346 best_poc= poc;
2347 best_i= i;
2348 }
2349 }
2350
2351 assert(best_i != -1);
2352
2353 limit= best_poc;
2354 sorted_short_ref[out_i]= *h->short_ref[best_i];
2355 }
2356 }
2357
2358 if(s->picture_structure == PICT_FRAME){
2359 if(h->slice_type==B_TYPE){
2360 const int current_poc= s->current_picture_ptr->poc;
2361 int list;
2362
2363 for(list=0; list<2; list++){
2364 int index=0;
2365
2366 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++){
2367 const int i2= list ? h->short_ref_count - i - 1 : i;
2368 const int poc= sorted_short_ref[i2].poc;
2369
2370 if(sorted_short_ref[i2].reference != 3) continue; //FIXME refernce field shit
2371
2372 if((list==1 && poc > current_poc) || (list==0 && poc < current_poc)){
2373 h->default_ref_list[list][index ]= sorted_short_ref[i2];
2374 h->default_ref_list[list][index++].pic_id= sorted_short_ref[i2].frame_num;
2375 }
2376 }
2377
2378 for(i=0; i<h->long_ref_count && index < h->ref_count[ list ]; i++){
2379 if(h->long_ref[i]->reference != 3) continue;
2380
2381 h->default_ref_list[ list ][index ]= *h->long_ref[i];
2382 h->default_ref_list[ list ][index++].pic_id= i;;
2383 }
2384
2385 if(h->long_ref_count > 1 && h->short_ref_count==0){
2386 Picture temp= h->default_ref_list[1][0];
2387 h->default_ref_list[1][0] = h->default_ref_list[1][1];
2388 h->default_ref_list[1][0] = temp;
2389 }
2390
2391 if(index < h->ref_count[ list ])
2392 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
2393 }
2394 }else{
2395 int index=0;
2396 for(i=0; i<h->short_ref_count && index < h->ref_count[0]; i++){
2397 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
2398 h->default_ref_list[0][index ]= *h->short_ref[i];
2399 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2400 }
2401 for(i=0; i<h->long_ref_count && index < h->ref_count[0]; i++){
2402 if(h->long_ref[i]->reference != 3) continue;
2403 h->default_ref_list[0][index ]= *h->long_ref[i];
2404 h->default_ref_list[0][index++].pic_id= i;;
2405 }
2406 if(index < h->ref_count[0])
2407 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2408 }
2409 }else{ //FIELD
2410 if(h->slice_type==B_TYPE){
2411 }else{
2412 //FIXME second field balh
2413 }
2414 }
2415 return 0;
2416 }
2417
2418 static int decode_ref_pic_list_reordering(H264Context *h){
2419 MpegEncContext * const s = &h->s;
2420 int list;
2421
2422 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move beofre func
2423
2424 for(list=0; list<2; list++){
2425 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2426
2427 if(get_bits1(&s->gb)){
2428 int pred= h->curr_pic_num;
2429 int index;
2430
2431 for(index=0; ; index++){
2432 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2433 int pic_id;
2434 int i;
2435
2436
2437 if(index >= h->ref_count[list]){
2438 fprintf(stderr, "reference count overflow\n");
2439 return -1;
2440 }
2441
2442 if(reordering_of_pic_nums_idc<3){
2443 if(reordering_of_pic_nums_idc<2){
2444 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2445
2446 if(abs_diff_pic_num >= h->max_pic_num){
2447 fprintf(stderr, "abs_diff_pic_num overflow\n");
2448 return -1;
2449 }
2450
2451 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2452 else pred+= abs_diff_pic_num;
2453 pred &= h->max_pic_num - 1;
2454
2455 for(i= h->ref_count[list]-1; i>=index; i--){
2456 if(h->ref_list[list][i].pic_id == pred && h->ref_list[list][i].long_ref==0)
2457 break;
2458 }
2459 }else{
2460 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2461
2462 for(i= h->ref_count[list]-1; i>=index; i--){
2463 if(h->ref_list[list][i].pic_id == pic_id && h->ref_list[list][i].long_ref==1)
2464 break;
2465 }
2466 }
2467
2468 if(i < index){
2469 fprintf(stderr, "reference picture missing during reorder\n");
2470 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2471 }else if(i > index){
2472 Picture tmp= h->ref_list[list][i];
2473 for(; i>index; i--){
2474 h->ref_list[list][i]= h->ref_list[list][i-1];
2475 }
2476 h->ref_list[list][index]= tmp;
2477 }
2478 }else if(reordering_of_pic_nums_idc==3)
2479 break;
2480 else{
2481 fprintf(stderr, "illegal reordering_of_pic_nums_idc\n");
2482 return -1;
2483 }
2484 }
2485 }
2486
2487 if(h->slice_type!=B_TYPE) break;
2488 }
2489 return 0;
2490 }
2491
2492 static int pred_weight_table(H264Context *h){
2493 MpegEncContext * const s = &h->s;
2494 int list, i;
2495
2496 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2497 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2498
2499 for(list=0; list<2; list++){
2500 for(i=0; i<h->ref_count[list]; i++){
2501 int luma_weight_flag, chroma_weight_flag;
2502
2503 luma_weight_flag= get_bits1(&s->gb);
2504 if(luma_weight_flag){
2505 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2506 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2507 }
2508
2509 chroma_weight_flag= get_bits1(&s->gb);
2510 if(chroma_weight_flag){
2511 int j;
2512 for(j=0; j<2; j++){
2513 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2514 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2515 }
2516 }
2517 }
2518 if(h->slice_type != B_TYPE) break;
2519 }
2520 return 0;
2521 }
2522
2523 /**
2524 * instantaneos decoder refresh.
2525 */
2526 static void idr(H264Context *h){
2527 int i;
2528
2529 for(i=0; i<h->long_ref_count; i++){
2530 h->long_ref[i]->reference=0;
2531 h->long_ref[i]= NULL;
2532 }
2533 h->long_ref_count=0;
2534
2535 for(i=0; i<h->short_ref_count; i++){
2536 h->short_ref[i]->reference=0;
2537 h->short_ref[i]= NULL;
2538 }
2539 h->short_ref_count=0;
2540 }
2541
2542 //static void
2543 /**
2544 *
2545 * @return the removed picture or NULL if an error occures
2546 */
2547 static Picture * remove_short(H264Context *h, int frame_num){
2548 int i;
2549
2550 for(i=0; i<h->short_ref_count; i++){
2551 Picture *pic= h->short_ref[i];
2552 if(pic->frame_num == frame_num){
2553 h->short_ref[i]= NULL;
2554 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
2555 h->short_ref_count--;
2556 return pic;
2557 }
2558 }
2559 return NULL;
2560 }
2561
2562 /**
2563 *
2564 * @return the removed picture or NULL if an error occures
2565 */
2566 static Picture * remove_long(H264Context *h, int i){
2567 Picture *pic;
2568
2569 if(i >= h->long_ref_count) return NULL;
2570 pic= h->long_ref[i];
2571 if(pic==NULL) return NULL;
2572
2573 h->long_ref[i]= NULL;
2574 memmove(&h->long_ref[i], &h->long_ref[i+1], (h->long_ref_count - i - 1)*sizeof(Picture*));
2575 h->long_ref_count--;
2576
2577 return pic;
2578 }
2579
2580 /**
2581 * Executes the reference picture marking (memory management control operations).
2582 */
2583 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
2584 MpegEncContext * const s = &h->s;
2585 int i;
2586 int current_is_long=0;
2587 Picture *pic;
2588
2589 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
2590 printf("no mmco here\n");
2591
2592 for(i=0; i<mmco_count; i++){
2593 if(s->avctx->debug&FF_DEBUG_MMCO)
2594 printf("mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
2595
2596 switch(mmco[i].opcode){
2597 case MMCO_SHORT2UNUSED:
2598 pic= remove_short(h, mmco[i].short_frame_num);
2599 if(pic==NULL) return -1;
2600 pic->reference= 0;
2601 break;
2602 case MMCO_SHORT2LONG:
2603 pic= remove_long(h, mmco[i].long_index);
2604 if(pic) pic->reference=0;
2605
2606 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
2607 h->long_ref[ mmco[i].long_index ]->long_ref=1;
2608 break;
2609 case MMCO_LONG2UNUSED:
2610 pic= remove_long(h, mmco[i].long_index);
2611 if(pic==NULL) return -1;
2612 pic->reference= 0;
2613 break;
2614 case MMCO_LONG:
2615 pic= remove_long(h, mmco[i].long_index);
2616 if(pic) pic->reference=0;
2617
2618 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
2619 h->long_ref[ mmco[i].long_index ]->long_ref=1;
2620 h->long_ref_count++;
2621
2622 current_is_long=1;
2623 break;
2624 case MMCO_SET_MAX_LONG:
2625 assert(mmco[i].long_index <= 16);
2626 while(mmco[i].long_index < h->long_ref_count){
2627 pic= remove_long(h, mmco[i].long_index);
2628 pic->reference=0;
2629 }
2630 while(mmco[i].long_index > h->long_ref_count){
2631 h->long_ref[ h->long_ref_count++ ]= NULL;
2632 }
2633 break;
2634 case MMCO_RESET:
2635 while(h->short_ref_count){
2636 pic= remove_short(h, h->short_ref[0]->frame_num);
2637 pic->reference=0;
2638 }
2639 while(h->long_ref_count){
2640 pic= remove_long(h, h->long_ref_count-1);
2641 pic->reference=0;
2642 }
2643 break;
2644 default: assert(0);
2645 }
2646 }
2647
2648 if(!current_is_long){
2649 pic= remove_short(h, s->current_picture_ptr->frame_num);
2650 if(pic){
2651 pic->reference=0;
2652 fprintf(stderr, "illegal short term buffer state detected\n");
2653 }
2654
2655 if(h->short_ref_count)
2656 memmove(&h->short_ref[1], &h->short_ref[0], (h->short_ref_count - 1)*sizeof(Picture*));
2657 h->short_ref[0]= s->current_picture_ptr;
2658 h->short_ref[0]->long_ref=0;
2659 h->short_ref_count++;
2660 }
2661
2662 return 0;
2663 }
2664
2665 static int decode_ref_pic_marking(H264Context *h){
2666 MpegEncContext * const s = &h->s;
2667 int i;
2668
2669 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
2670 s->broken_link= get_bits1(&s->gb) -1;
2671 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
2672 if(h->mmco[0].long_index == -1)
2673 h->mmco_index= 0;
2674 else{
2675 h->mmco[0].opcode= MMCO_LONG;
2676 h->mmco_index= 1;
2677 }
2678 }else{
2679 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
2680 for(i= h->mmco_index; i<MAX_MMCO_COUNT; i++) {
2681 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
2682
2683 h->mmco[i].opcode= opcode;
2684 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
2685 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
2686 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
2687 fprintf(stderr, "illegal short ref in memory management control operation %d\n", mmco);
2688 return -1;
2689 }*/
2690 }
2691 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
2692 h->mmco[i].long_index= get_ue_golomb(&s->gb);
2693 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
2694 fprintf(stderr, "illegal long ref in memory management control operation %d\n", opcode);
2695 return -1;
2696 }
2697 }
2698
2699 if(opcode > MMCO_LONG){
2700 fprintf(stderr, "illegal memory management control operation %d\n", opcode);
2701 return -1;
2702 }
2703 }
2704 h->mmco_index= i;
2705 }else{
2706 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
2707
2708 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
2709 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
2710 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
2711 h->mmco_index= 1;
2712 }else
2713 h->mmco_index= 0;
2714 }
2715 }
2716
2717 return 0;
2718 }
2719
2720 static int init_poc(H264Context *h){
2721 MpegEncContext * const s = &h->s;
2722 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
2723 int field_poc[2];
2724
2725 if(h->nal_unit_type == NAL_IDR_SLICE){
2726 h->frame_num_offset= 0;
2727 }else{
2728 if(h->frame_num < h->prev_frame_num)
2729 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
2730 else
2731 h->frame_num_offset= h->prev_frame_num_offset;
2732 }
2733
2734 if(h->sps.poc_type==0){
2735 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
2736
2737 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
2738 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
2739 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
2740 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
2741 else
2742 h->poc_msb = h->prev_poc_msb;
2743 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
2744 field_poc[0] =
2745 field_poc[1] = h->poc_msb + h->poc_lsb;
2746 if(s->picture_structure == PICT_FRAME)
2747 field_poc[1] += h->delta_poc_bottom;
2748 }else if(h->sps.poc_type==1){
2749 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
2750 int i;
2751
2752 if(h->sps.poc_cycle_length != 0)
2753 abs_frame_num = h->frame_num_offset + h->frame_num;
2754 else
2755 abs_frame_num = 0;
2756
2757 if(h->nal_ref_idc==0 && abs_frame_num > 0)
2758 abs_frame_num--;
2759
2760 expected_delta_per_poc_cycle = 0;
2761 for(i=0; i < h->sps.poc_cycle_length; i++)
2762 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
2763
2764 if(abs_frame_num > 0){
2765 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
2766 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
2767
2768 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
2769 for(i = 0; i <= frame_num_in_poc_cycle; i++)
2770 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
2771 } else
2772 expectedpoc = 0;
2773
2774 if(h->nal_ref_idc == 0)
2775 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
2776
2777 field_poc[0] = expectedpoc + h->delta_poc[0];
2778 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
2779
2780 if(s->picture_structure == PICT_FRAME)
2781 field_poc[1] += h->delta_poc[1];
2782 }else{
2783 int poc;
2784 if(h->nal_unit_type == NAL_IDR_SLICE){
2785 poc= 0;
2786 }else{
2787 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
2788 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
2789 }
2790 field_poc[0]= poc;
2791 field_poc[1]= poc;
2792 }
2793
2794 if(s->picture_structure != PICT_BOTTOM_FIELD)
2795 s->current_picture_ptr->field_poc[0]= field_poc[0];
2796 if(s->picture_structure != PICT_TOP_FIELD)
2797 s->current_picture_ptr->field_poc[1]= field_poc[1];
2798 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
2799 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
2800
2801 return 0;
2802 }
2803
2804 /**
2805 * decodes a slice header.
2806 * this will allso call MPV_common_init() and frame_start() as needed
2807 */
2808 static int decode_slice_header(H264Context *h){
2809 MpegEncContext * const s = &h->s;
2810 int first_mb_in_slice, pps_id;
2811 int num_ref_idx_active_override_flag;
2812 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
2813 float new_aspect;
2814
2815 s->current_picture.reference= h->nal_ref_idc != 0;
2816
2817 first_mb_in_slice= get_ue_golomb(&s->gb);
2818
2819 h->slice_type= get_ue_golomb(&s->gb);
2820 if(h->slice_type > 9){
2821 fprintf(stderr, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
2822 }
2823 if(h->slice_type > 4){
2824 h->slice_type -= 5;
2825 h->slice_type_fixed=1;
2826 }else
2827 h->slice_type_fixed=0;
2828
2829 h->slice_type= slice_type_map[ h->slice_type ];
2830
2831 s->pict_type= h->slice_type; // to make a few old func happy, its wrong though
2832
2833 pps_id= get_ue_golomb(&s->gb);
2834 if(pps_id>255){
2835 fprintf(stderr, "pps_id out of range\n");
2836 return -1;
2837 }
2838 h->pps= h->pps_buffer[pps_id];
2839 h->sps= h->sps_buffer[ h->pps.sps_id ];
2840
2841 s->mb_width= h->sps.mb_width;
2842 s->mb_height= h->sps.mb_height;
2843 h->mb_stride= s->mb_width + 1;
2844
2845 h->b_stride= s->mb_width*4;
2846 h->b8_stride= s->mb_width*2;
2847
2848 s->mb_x = first_mb_in_slice % s->mb_width;
2849 s->mb_y = first_mb_in_slice / s->mb_width; //FIXME AFFW
2850
2851 s->width = 16*s->mb_width - 2*(h->pps.crop_left + h->pps.crop_right );
2852 if(h->sps.frame_mbs_only_flag)
2853 s->height= 16*s->mb_height - 2*(h->pps.crop_top + h->pps.crop_bottom);
2854 else
2855 s->height= 16*s->mb_height - 4*(h->pps.crop_top + h->pps.crop_bottom); //FIXME recheck
2856
2857 if(h->pps.crop_left || h->pps.crop_top){
2858 fprintf(stderr, "insane croping not completly supported, this could look slightly wrong ...\n");
2859 }
2860
2861 if(s->aspected_height) //FIXME emms at end of slice ?
2862 new_aspect= h->sps.sar_width*s->width / (float)(s->height*h->sps.sar_height);
2863 else
2864 new_aspect=0;
2865
2866 if (s->context_initialized
2867 && ( s->width != s->avctx->width || s->height != s->avctx->height
2868 || ABS(new_aspect - s->avctx->aspect_ratio) > 0.001)) {
2869 free_tables(h);
2870 MPV_common_end(s);
2871 }
2872 if (!s->context_initialized) {
2873 if (MPV_common_init(s) < 0)
2874 return -1;
2875
2876 alloc_tables(h);
2877
2878 s->avctx->width = s->width;
2879 s->avctx->height = s->height;
2880 s->avctx->aspect_ratio= new_aspect;
2881 }
2882
2883 if(first_mb_in_slice == 0){
2884 frame_start(h);
2885 }
2886
2887 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
2888
2889 if(h->sps.frame_mbs_only_flag){
2890 s->picture_structure= PICT_FRAME;
2891 }else{
2892 if(get_bits1(&s->gb)) //field_pic_flag
2893 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
2894 else
2895 s->picture_structure= PICT_FRAME;
2896 }
2897
2898 if(s->picture_structure==PICT_FRAME){
2899 h->curr_pic_num= h->frame_num;
2900 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
2901 }else{
2902 h->curr_pic_num= 2*h->frame_num;
2903 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
2904 }
2905
2906 if(h->nal_unit_type == NAL_IDR_SLICE){
2907 int idr_pic_id= get_ue_golomb(&s->gb);
2908 }
2909
2910 if(h->sps.poc_type==0){
2911 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
2912
2913 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
2914 h->delta_poc_bottom= get_se_golomb(&s->gb);
2915 }
2916 }
2917
2918 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
2919 h->delta_poc[0]= get_se_golomb(&s->gb);
2920
2921 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
2922 h->delta_poc[1]= get_se_golomb(&s->gb);
2923 }
2924
2925 init_poc(h);
2926
2927 if(h->pps.redundant_pic_cnt_present){
2928 h->redundant_pic_count= get_ue_golomb(&s->gb);
2929 }
2930
2931 //set defaults, might be overriden a few line later
2932 h->ref_count[0]= h->pps.ref_count[0];
2933 h->ref_count[1]= h->pps.ref_count[1];
2934
2935 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
2936 if(h->slice_type == B_TYPE){
2937 h->direct_spatial_mv_pred= get_bits1(&s->gb);
2938 }
2939 num_ref_idx_active_override_flag= get_bits1(&s->gb);
2940
2941 if(num_ref_idx_active_override_flag){
2942 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
2943 if(h->slice_type==B_TYPE)
2944 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
2945
2946 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
2947 fprintf(stderr, "reference overflow\n");
2948 return -1;
2949 }
2950 }
2951 }
2952
2953 if(first_mb_in_slice == 0){
2954 fill_default_ref_list(h);
2955 }
2956
2957 decode_ref_pic_list_reordering(h);
2958
2959 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
2960 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
2961 pred_weight_table(h);
2962
2963 if(s->current_picture.reference)
2964 decode_ref_pic_marking(h);
2965 //FIXME CABAC stuff
2966
2967 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb); //slice_qp_delta
2968 //FIXME qscale / qp ... stuff
2969 if(h->slice_type == SP_TYPE){
2970 int sp_for_switch_flag= get_bits1(&s->gb);
2971 }
2972 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
2973 int slice_qs_delta= get_se_golomb(&s->gb);
2974 }
2975
2976 if( h->pps.deblocking_filter_parameters_present ) {
2977 h->disable_deblocking_filter_idc= get_ue_golomb(&s->gb);
2978 if( h->disable_deblocking_filter_idc != 1 ) {
2979 h->slice_alpha_c0_offset_div2= get_se_golomb(&s->gb);
2980 h->slice_beta_offset_div2= get_se_golomb(&s->gb);
2981 }
2982 }else
2983 h->disable_deblocking_filter_idc= 0;
2984
2985 #if 0 //FMO
2986 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
2987 slice_group_change_cycle= get_bits(&s->gb, ?);
2988 #endif
2989
2990 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
2991 printf("mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d\n",
2992 first_mb_in_slice,
2993 ff_get_pict_type_char(h->slice_type),
2994 pps_id, h->frame_num,
2995 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
2996 h->ref_count[0], h->ref_count[1],
2997 s->qscale,
2998 h->disable_deblocking_filter_idc
2999 );
3000 }
3001
3002 return 0;
3003 }
3004
3005 /**
3006 *
3007 */
3008 static inline int get_level_prefix(GetBitContext *gb){
3009 unsigned int buf;
3010 int log;
3011
3012 OPEN_READER(re, gb);
3013 UPDATE_CACHE(re, gb);
3014 buf=GET_CACHE(re, gb);
3015
3016 log= 32 - av_log2(buf);
3017 #ifdef TRACE
3018 print_bin(buf>>(32-log), log);
3019 printf("%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
3020 #endif
3021
3022 LAST_SKIP_BITS(re, gb, log);
3023 CLOSE_READER(re, gb);
3024
3025 return log-1;
3026 }
3027
3028 /**
3029 * decodes a residual block.
3030 * @param n block index
3031 * @param scantable scantable
3032 * @param max_coeff number of coefficients in the block
3033 * @return <0 if an error occured
3034 */
3035 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, int qp, int max_coeff){
3036 MpegEncContext * const s = &h->s;
3037 const uint16_t *qmul= dequant_coeff[qp];
3038 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
3039 int level[16], run[16];
3040 int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
3041
3042 //FIXME put trailing_onex into the context
3043
3044 if(n == CHROMA_DC_BLOCK_INDEX){
3045 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
3046 total_coeff= coeff_token>>2;
3047 }else{
3048 if(n == LUMA_DC_BLOCK_INDEX){
3049 total_coeff= pred_non_zero_count(h, 0);
3050 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3051 total_coeff= coeff_token>>2;
3052 }else{
3053 total_coeff= pred_non_zero_count(h, n);
3054 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3055 total_coeff= coeff_token>>2;
3056 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
3057 }
3058 }
3059
3060 //FIXME set last_non_zero?
3061
3062 if(total_coeff==0)
3063 return 0;
3064
3065 trailing_ones= coeff_token&3;
3066 #ifdef TRACE
3067 printf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
3068 #endif
3069 assert(total_coeff<=16);
3070
3071 for(i=0; i<trailing_ones; i++){
3072 level[i]= 1 - 2*get_bits1(gb);
3073 }
3074
3075 suffix_length= total_coeff > 10 && trailing_ones < 3;
3076
3077 for(; i<total_coeff; i++){
3078 const int prefix= get_level_prefix(gb);
3079 int level_code, mask;
3080
3081 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
3082 if(suffix_length)
3083 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3084 else
3085 level_code= (prefix<<suffix_length); //part
3086 }else if(prefix==14){
3087 if(suffix_length)
3088 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3089 else
3090 level_code= prefix + get_bits(gb, 4); //part
3091 }else if(prefix==15){
3092 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
3093 if(suffix_length==0) level_code+=15; //FIXME doesnt make (much)sense
3094 }else{
3095 fprintf(stderr, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
3096 return -1;
3097 }
3098
3099 if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
3100
3101 mask= -(level_code&1);
3102 level[i]= (((2+level_code)>>1) ^ mask) - mask;
3103
3104 if(suffix_length==0) suffix_length=1; //FIXME split first iteration
3105
3106 #if 1
3107 if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
3108 #else
3109 if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
3110 ? == prefix > 2 or sth
3111 #endif
3112 #ifdef TRACE
3113 printf("level: %d suffix_length:%d\n", level[i], suffix_length);
3114 #endif
3115 }
3116
3117 if(total_coeff == max_coeff)
3118 zeros_left=0;
3119 else{
3120 if(n == CHROMA_DC_BLOCK_INDEX)
3121 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
3122 else
3123 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);