Assert that the first list1 entry is a reference frame.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
bad5537e 23 * @file libavcodec/h264.c
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
40e5d31b 28#include "internal.h"
0da71265
MN
29#include "dsputil.h"
30#include "avcodec.h"
31#include "mpegvideo.h"
26b4fe82 32#include "h264.h"
0da71265 33#include "h264data.h"
26b4fe82 34#include "h264_parser.h"
0da71265 35#include "golomb.h"
199436b9 36#include "mathops.h"
626464fb 37#include "rectangle.h"
369122dd 38#include "vdpau_internal.h"
0da71265 39
e5017ab8 40#include "cabac.h"
b250f9c6 41#if ARCH_X86
a6493a8f 42#include "x86/h264_i386.h"
52cb7981 43#endif
e5017ab8 44
2848ce84 45//#undef NDEBUG
0da71265
MN
46#include <assert.h>
47
2ddcf84b
JD
48/**
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
51 */
52#define DELAYED_PIC_REF 4
53
0da71265 54static VLC coeff_token_vlc[4];
910e3668
AC
55static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
57
0da71265 58static VLC chroma_dc_coeff_token_vlc;
910e3668
AC
59static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60static const int chroma_dc_coeff_token_vlc_table_size = 256;
0da71265
MN
61
62static VLC total_zeros_vlc[15];
910e3668
AC
63static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64static const int total_zeros_vlc_tables_size = 512;
65
0da71265 66static VLC chroma_dc_total_zeros_vlc[3];
910e3668
AC
67static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68static const int chroma_dc_total_zeros_vlc_tables_size = 8;
0da71265
MN
69
70static VLC run_vlc[6];
910e3668
AC
71static VLC_TYPE run_vlc_tables[6][8][2];
72static const int run_vlc_tables_size = 8;
73
0da71265 74static VLC run7_vlc;
910e3668
AC
75static VLC_TYPE run7_vlc_table[96][2];
76static const int run7_vlc_table_size = 96;
0da71265 77
8b82a956
MN
78static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
6ba71fc4 80static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
3e20143e 81static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
9c0e4624 82static Picture * remove_long(H264Context *h, int i, int ref_mask);
8b82a956 83
849f1035 84static av_always_inline uint32_t pack16to32(int a, int b){
377ec888
MN
85#ifdef WORDS_BIGENDIAN
86 return (b&0xFFFF) + (a<<16);
87#else
88 return (a&0xFFFF) + (b<<16);
89#endif
90}
91
d9ec210b 92static const uint8_t rem6[52]={
acd8d10f
PI
930, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
94};
95
d9ec210b 96static const uint8_t div6[52]={
acd8d10f
PI
970, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
98};
99
89818988 100static const uint8_t left_block_options[4][8]={
143d7f14
PK
101 {0,1,2,3,7,10,8,11},
102 {2,2,3,3,8,11,8,11},
103 {0,0,1,1,7,10,7,10},
104 {0,2,0,2,7,10,7,10}
105};
acd8d10f 106
8140955d
MN
107#define LEVEL_TAB_BITS 8
108static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
109
70abb407 110static void fill_caches(H264Context *h, int mb_type, int for_deblock){
0da71265 111 MpegEncContext * const s = &h->s;
64514ee8 112 const int mb_xy= h->mb_xy;
0da71265
MN
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
89818988 115 const uint8_t * left_block;
02f7695b 116 int topleft_partition= -1;
0da71265
MN
117 int i;
118
36e097bc
JD
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
120
717b1733 121 //FIXME deblocking could skip the intra and nnz parts.
36e097bc 122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
e2e5894a
LM
123 return;
124
2cab6401
DB
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
115329f1 127
6867a90b
LLL
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
143d7f14 131 left_block = left_block_options[0];
5d18eaad 132 if(FRAME_MBAFF){
6867a90b
LLL
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
6f3c50f2
MN
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
6867a90b 142 const int bottom = (s->mb_y & 1);
6f3c50f2 143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
60c6ba7a 144
6f3c50f2 145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
6867a90b
LLL
146 top_xy -= s->mb_stride;
147 }
6f3c50f2 148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
6867a90b 149 topleft_xy -= s->mb_stride;
6f3c50f2 150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
02f7695b 151 topleft_xy += s->mb_stride;
1412060e 152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
02f7695b 153 topleft_partition = 0;
6867a90b 154 }
6f3c50f2 155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
6867a90b
LLL
156 topright_xy -= s->mb_stride;
157 }
6f3c50f2 158 if (left_mb_field_flag != curr_mb_field_flag) {
6867a90b 159 left_xy[1] = left_xy[0] = pair_xy - 1;
6f3c50f2
MN
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
163 } else {
03a035e0 164 left_block= left_block_options[2 - bottom];
6867a90b
LLL
165 }
166 }
0da71265
MN
167 }
168
826de46e
LLL
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
6ba71fc4 172 if(for_deblock){
717b1733
LM
173 topleft_type = 0;
174 topright_type = 0;
b735aeea
MN
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
5d18eaad 178
e248cb60 179 if(MB_MBAFF && !IS_INTRA(mb_type)){
5d18eaad 180 int list;
3425501d 181 for(list=0; list<h->list_count; list++){
e248cb60
MN
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
5d18eaad 185 if(USES_LIST(mb_type,list)){
191e8ca7 186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
5d18eaad 187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
beca9a28 188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
189 ref += h->b8_stride;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
beca9a28 191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
192 }
193 }
194 }
46f2f05f
MN
195 }else{
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
0da71265
MN
201
202 if(IS_INTRA(mb_type)){
faa7e394 203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
115329f1
DB
204 h->topleft_samples_available=
205 h->top_samples_available=
0da71265
MN
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
208
faa7e394 209 if(!(top_type & type_mask)){
0da71265
MN
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
213 }
d1d10e91
MN
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
faa7e394 216 if(!(left_type[0] & type_mask)){
d1d10e91
MN
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
219 }
faa7e394 220 if(!(left_type[1] & type_mask)){
d1d10e91
MN
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
223 }
224 }else{
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
faa7e394 228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
d1d10e91
MN
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
231 }
232 }
233 }else{
faa7e394 234 if(!(left_type[0] & type_mask)){
0da71265
MN
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
237 }
238 }
115329f1 239
faa7e394 240 if(!(topleft_type & type_mask))
0da71265 241 h->topleft_samples_available&= 0x7FFF;
115329f1 242
faa7e394 243 if(!(topright_type & type_mask))
0da71265 244 h->topright_samples_available&= 0xFBFF;
115329f1 245
0da71265
MN
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252 }else{
253 int pred;
faa7e394 254 if(!(top_type & type_mask))
0da71265 255 pred= -1;
6fbcaaa0
LLL
256 else{
257 pred= 2;
0da71265
MN
258 }
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
263 }
264 for(i=0; i<2; i++){
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268 }else{
269 int pred;
faa7e394 270 if(!(left_type[i] & type_mask))
0da71265 271 pred= -1;
6fbcaaa0
LLL
272 else{
273 pred= 2;
0da71265
MN
274 }
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
277 }
278 }
279 }
280 }
29671011 281 }
115329f1
DB
282
283
0da71265 284/*
115329f1
DB
2850 . T T. T T T T
2861 L . .L . . . .
2872 L . .L . . . .
2883 . T TL . . . .
2894 L . .L . . . .
2905 L . .. . . . .
0da71265 291*/
1412060e 292//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
0da71265 293 if(top_type){
6867a90b
LLL
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
53c05b1e 297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
115329f1 298
6867a90b 299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
53c05b1e 300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
115329f1 301
6867a90b 302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
53c05b1e 303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
115329f1 304
0da71265 305 }else{
115329f1 306 h->non_zero_count_cache[4+8*0]=
0da71265
MN
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
115329f1 310
0da71265
MN
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
115329f1 313
0da71265 314 h->non_zero_count_cache[1+8*3]=
3981c385 315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
115329f1 316
0da71265 317 }
826de46e 318
6867a90b
LLL
319 for (i=0; i<2; i++) {
320 if(left_type[i]){
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
6867a90b 325 }else{
115329f1
DB
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
6867a90b 329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
826de46e
LLL
330 }
331 }
332
333 if( h->pps.cabac ) {
334 // top_cbp
335 if(top_type) {
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
338 h->top_cbp = 0x1C0;
339 } else {
340 h->top_cbp = 0;
341 }
342 // left_cbp
343 if (left_type[0]) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
346 h->left_cbp = 0x1C0;
347 } else {
348 h->left_cbp = 0;
349 }
350 if (left_type[0]) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
352 }
353 if (left_type[1]) {
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
6867a90b 355 }
0da71265 356 }
6867a90b 357
0da71265 358#if 1
e2e5894a 359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
0da71265 360 int list;
3425501d 361 for(list=0; list<h->list_count; list++){
e2e5894a 362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
0da71265
MN
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
367 }*/
5ad984c9 368 continue;
0da71265
MN
369 }
370 h->mv_cache_clean[list]= 0;
115329f1 371
53b19144 372 if(USES_LIST(top_type, list)){
0da71265
MN
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 }else{
115329f1
DB
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
0da71265
MN
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
389 }
390
4672503d
LM
391 for(i=0; i<2; i++){
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 }else{
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
405 }
0da71265
MN
406 }
407
0281d325 408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
46f2f05f
MN
409 continue;
410
53b19144 411 if(USES_LIST(topleft_type, list)){
02f7695b
LM
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
e2e5894a
LM
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 }else{
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
419 }
115329f1 420
53b19144 421 if(USES_LIST(topright_type, list)){
e2e5894a
LM
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 }else{
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
429 }
e2e5894a 430
ae08a563 431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
717b1733 432 continue;
115329f1
DB
433
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
3b66c4c5 436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
115329f1 437 h->ref_cache[list][scan8[4 ]] =
0da71265
MN
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
3b66c4c5 441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
0da71265
MN
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
9e528114
LA
444
445 if( h->pps.cabac ) {
446 /* XXX beurk, Load mvd */
53b19144 447 if(USES_LIST(top_type, list)){
9e528114
LA
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 }else{
115329f1
DB
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
9e528114
LA
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
458 }
53b19144 459 if(USES_LIST(left_type[0], list)){
9e528114
LA
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 }else{
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
466 }
53b19144 467 if(USES_LIST(left_type[1], list)){
9e528114
LA
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 }else{
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
474 }
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
3b66c4c5 477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
9e528114
LA
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
5ad984c9 480
9f5c1037 481 if(h->slice_type_nos == FF_B_TYPE){
5ad984c9
LM
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
483
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 }else{
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
492 }
115329f1 493
5d18eaad
LM
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 else
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
500
501 if(IS_DIRECT(left_type[1]))
5ad984c9 502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
5d18eaad
LM
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 else
5ad984c9 506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
5d18eaad
LM
507 }
508 }
509
510 if(FRAME_MBAFF){
511#define MAP_MVS\
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 if(MB_FIELD){
523#define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
528 }
529 MAP_MVS
530#undef MAP_F2F
531 }else{
532#define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
5ad984c9 537 }
5d18eaad
LM
538 MAP_MVS
539#undef MAP_F2F
5ad984c9 540 }
9e528114 541 }
0da71265 542 }
0da71265
MN
543 }
544#endif
43efd19a
LM
545
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
0da71265
MN
547}
548
549static inline void write_back_intra_pred_mode(H264Context *h){
64514ee8 550 const int mb_xy= h->mb_xy;
0da71265
MN
551
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
559}
560
561/**
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
563 */
564static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
568 int i;
115329f1 569
0da71265
MN
570 if(!(h->top_samples_available&0x8000)){
571 for(i=0; i<4; i++){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573 if(status<0){
9b879566 574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
575 return -1;
576 } else if(status){
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
578 }
579 }
580 }
115329f1 581
d1d10e91
MN
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
0da71265 584 for(i=0; i<4; i++){
d1d10e91 585 if(!(h->left_samples_available&mask[i])){
26695973
MN
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587 if(status<0){
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
589 return -1;
590 } else if(status){
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
592 }
d1d10e91 593 }
0da71265
MN
594 }
595 }
596
597 return 0;
598} //FIXME cleanup like next
599
600/**
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
602 */
603static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 607
43ff0714 608 if(mode > 6U) {
5175b937 609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 610 return -1;
5175b937 611 }
115329f1 612
0da71265
MN
613 if(!(h->top_samples_available&0x8000)){
614 mode= top[ mode ];
615 if(mode<0){
9b879566 616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
617 return -1;
618 }
619 }
115329f1 620
d1d10e91 621 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 622 mode= left[ mode ];
d1d10e91
MN
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
625 }
0da71265 626 if(mode<0){
9b879566 627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 628 return -1;
115329f1 629 }
0da71265
MN
630 }
631
632 return mode;
633}
634
635/**
636 * gets the predicted intra4x4 prediction mode.
637 */
638static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
643
a9c9a240 644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
0da71265
MN
645
646 if(min<0) return DC_PRED;
647 else return min;
648}
649
650static inline void write_back_non_zero_count(H264Context *h){
64514ee8 651 const int mb_xy= h->mb_xy;
0da71265 652
6867a90b
LLL
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
53c05b1e 656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
6867a90b
LLL
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
115329f1 660
6867a90b 661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
53c05b1e 662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
6867a90b 663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
53c05b1e 664
6867a90b 665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
53c05b1e 666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
6867a90b 667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
0da71265
MN
668}
669
670/**
1412060e 671 * gets the predicted number of non-zero coefficients.
0da71265
MN
672 * @param n block index
673 */
674static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
678 int i= left + top;
115329f1 679
0da71265
MN
680 if(i<64) i= (i+1)>>1;
681
a9c9a240 682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
0da71265
MN
683
684 return i&31;
685}
686
1924f3ce
MN
687static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
a9c9a240 689 MpegEncContext *s = &h->s;
1924f3ce 690
5d18eaad
LM
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
693 if(FRAME_MBAFF){
191e8ca7 694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
5d18eaad
LM
695 const int16_t *mv;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
698
699 if(!MB_FIELD
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
02f7695b 706 if(!USES_LIST(mb_type,list))\
5d18eaad
LM
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
712
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
714 }
715 }
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 if(!MB_FIELD
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
722 }
723 if(MB_FIELD
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 && i >= scan8[0]+8){
1412060e 726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
02f7695b 727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
5d18eaad
LM
728 }
729 }
730#undef SET_DIAG_MV
731 }
732
1924f3ce
MN
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
735 return topright_ref;
736 }else{
a9c9a240 737 tprintf(s->avctx, "topright MV not available\n");
95c26348 738
1924f3ce
MN
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
741 }
742}
743
0da71265
MN
744/**
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
750 */
751static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
0da71265
MN
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
0da71265
MN
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1924f3ce
MN
757 const int16_t * C;
758 int diagonal_ref, match_count;
759
0da71265 760 assert(part_width==1 || part_width==2 || part_width==4);
1924f3ce 761
0da71265 762/* mv_cache
115329f1 763 B . . A T T T T
0da71265
MN
764 U . . L . . , .
765 U . . L . . . .
766 U . . L . . , .
767 . . . L . . . .
768*/
1924f3ce
MN
769
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
a9c9a240 772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1924f3ce
MN
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
777 if(left_ref==ref){
778 *mx= A[0];
115329f1 779 *my= A[1];
1924f3ce
MN
780 }else if(top_ref==ref){
781 *mx= B[0];
115329f1 782 *my= B[1];
0da71265 783 }else{
1924f3ce 784 *mx= C[0];
115329f1 785 *my= C[1];
0da71265
MN
786 }
787 }else{
1924f3ce 788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
0da71265 789 *mx= A[0];
115329f1 790 *my= A[1];
0da71265 791 }else{
1924f3ce
MN
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
0da71265 794 }
0da71265 795 }
115329f1 796
a9c9a240 797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
798}
799
800/**
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
805 */
806static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
807 if(n==0){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
810
a9c9a240 811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
115329f1 812
0da71265
MN
813 if(top_ref == ref){
814 *mx= B[0];
815 *my= B[1];
816 return;
817 }
818 }else{
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
115329f1 821
a9c9a240 822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
823
824 if(left_ref == ref){
825 *mx= A[0];
826 *my= A[1];
827 return;
828 }
829 }
830
831 //RARE
832 pred_motion(h, n, 4, list, ref, mx, my);
833}
834
835/**
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
840 */
841static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
842 if(n==0){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
115329f1 845
a9c9a240 846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
847
848 if(left_ref == ref){
849 *mx= A[0];
850 *my= A[1];
851 return;
852 }
853 }else{
1924f3ce
MN
854 const int16_t * C;
855 int diagonal_ref;
856
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
115329f1 858
a9c9a240 859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265 860
115329f1 861 if(diagonal_ref == ref){
0da71265
MN
862 *mx= C[0];
863 *my= C[1];
864 return;
865 }
0da71265
MN
866 }
867
868 //RARE
869 pred_motion(h, n, 2, list, ref, mx, my);
870}
871
872static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
0da71265
MN
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
875
a9c9a240 876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
0da71265
MN
877
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
62ea19c0
MN
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
115329f1 881
0da71265
MN
882 *mx = *my = 0;
883 return;
884 }
115329f1 885
0da71265
MN
886 pred_motion(h, 0, 4, 0, 0, mx, my);
887
888 return;
889}
890
8b1fd554
MN
891static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
895 return 256;
896 }else{
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
900 }
901}
902
5ad984c9 903static inline void direct_dist_scale_factor(H264Context * const h){
2879c75f
MN
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
5ad984c9 906 const int poc1 = h->ref_list[1][0].poc;
8b1fd554
MN
907 int i, field;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
5ad984c9 913 }
8b1fd554
MN
914
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
5d18eaad 917 }
5ad984c9 918}
f4d3382d
MN
919
920static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
927
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
930
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
934
935 if (!interl)
936 poc |= 3;
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
939
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944 if(rfield == field)
945 map[list][old_ref] = cur_ref;
946 break;
947 }
948 }
949 }
950 }
951}
952
2f944356
LM
953static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
bbc78fb4 957 int list, j, field;
f4d3382d
MN
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
aa617518 960
2f944356 961 for(list=0; list<2; list++){
2879c75f 962 cur->ref_count[sidx][list] = h->ref_count[list];
2f944356 963 for(j=0; j<h->ref_count[list]; j++)
42de393d 964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
2f944356 965 }
aa617518 966
7762cc3d 967 if(s->picture_structure == PICT_FRAME){
f4d3382d
MN
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
7762cc3d 970 }
aa617518 971
48e025e5 972 cur->mbaff= FRAME_MBAFF;
aa617518 973
9701840b 974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
2f944356 975 return;
aa617518 976
2f944356 977 for(list=0; list<2; list++){
f4d3382d
MN
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
2f944356
LM
981 }
982}
5ad984c9
LM
983
984static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
d00eac6c
MN
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
989 int mb_type_col[2];
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
5ad984c9 992 const int is_b8x8 = IS_8X8(*mb_type);
88e7a4d1 993 unsigned int sub_mb_type;
5ad984c9
LM
994 int i8, i4;
995
7824b129
MN
996 assert(h->ref_list[1][0].reference&3);
997
5d18eaad 998#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
d00eac6c
MN
999
1000 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
53c193a9 1001 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
471341a7
MN
1002 int cur_poc = s->current_picture_ptr->poc;
1003 int *col_poc = h->ref_list[1]->field_poc;
1004 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1005 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1006 b8_stride = 0;
60c9b24d 1007 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
d00eac6c
MN
1008 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1009 mb_xy += s->mb_stride*fieldoff;
1010 }
1011 goto single_col;
1012 }else{ // AFL/AFR/FR/FL -> AFR/FR
1013 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1014 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1015 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1016 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1017 b8_stride *= 3;
1018 b4_stride *= 6;
1019 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1020 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1021 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1022 && !is_b8x8){
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1025 }else{
1026 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1027 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1028 }
1029 }else{ // AFR/FR -> AFR/FR
1030single_col:
1031 mb_type_col[0] =
1032 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
cc615d2c
MN
1033 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1034 /* FIXME save sub mb types from previous frames (or derive from MVs)
1035 * so we know exactly what block size to use */
1036 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1037 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1038 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1041 }else{
1042 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1043 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1044 }
d00eac6c 1045 }
5ad984c9 1046 }
5ad984c9 1047
7d54ecc9
MN
1048 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1049 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1050 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1051 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
9b5fab91
MN
1052 if(!b8_stride){
1053 if(s->mb_y&1){
1054 l1ref0 += h->b8_stride;
1055 l1ref1 += h->b8_stride;
1056 l1mv0 += 2*b4_stride;
1057 l1mv1 += 2*b4_stride;
1058 }
d00eac6c 1059 }
115329f1 1060
5ad984c9
LM
1061 if(h->direct_spatial_mv_pred){
1062 int ref[2];
1063 int mv[2][2];
1064 int list;
1065
5d18eaad
LM
1066 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1067
5ad984c9
LM
1068 /* ref = min(neighbors) */
1069 for(list=0; list<2; list++){
1070 int refa = h->ref_cache[list][scan8[0] - 1];
1071 int refb = h->ref_cache[list][scan8[0] - 8];
1072 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
9bec77fe 1073 if(refc == PART_NOT_AVAILABLE)
5ad984c9 1074 refc = h->ref_cache[list][scan8[0] - 8 - 1];
29d05ebc 1075 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
5ad984c9
LM
1076 if(ref[list] < 0)
1077 ref[list] = -1;
1078 }
1079
1080 if(ref[0] < 0 && ref[1] < 0){
1081 ref[0] = ref[1] = 0;
1082 mv[0][0] = mv[0][1] =
1083 mv[1][0] = mv[1][1] = 0;
1084 }else{
1085 for(list=0; list<2; list++){
1086 if(ref[list] >= 0)
1087 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1088 else
1089 mv[list][0] = mv[list][1] = 0;
1090 }
1091 }
1092
1093 if(ref[1] < 0){
50b3ab0f
LM
1094 if(!is_b8x8)
1095 *mb_type &= ~MB_TYPE_L1;
1096 sub_mb_type &= ~MB_TYPE_L1;
5ad984c9 1097 }else if(ref[0] < 0){
50b3ab0f
LM
1098 if(!is_b8x8)
1099 *mb_type &= ~MB_TYPE_L0;
1100 sub_mb_type &= ~MB_TYPE_L0;
5ad984c9
LM
1101 }
1102
d00eac6c 1103 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
50b3ab0f
LM
1104 for(i8=0; i8<4; i8++){
1105 int x8 = i8&1;
1106 int y8 = i8>>1;
1107 int xy8 = x8+y8*b8_stride;
1108 int xy4 = 3*x8+y8*b4_stride;
1109 int a=0, b=0;
1110
1111 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1112 continue;
1113 h->sub_mb_type[i8] = sub_mb_type;
1114
1115 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1116 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
d00eac6c 1117 if(!IS_INTRA(mb_type_col[y8])
50b3ab0f
LM
1118 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1119 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1120 if(ref[0] > 0)
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 if(ref[1] > 0)
1123 b= pack16to32(mv[1][0],mv[1][1]);
1124 }else{
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 b= pack16to32(mv[1][0],mv[1][1]);
1127 }
1128 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1129 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1130 }
1131 }else if(IS_16X16(*mb_type)){
d19f5acb
MN
1132 int a=0, b=0;
1133
cec93959
LM
1134 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1135 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
d00eac6c 1136 if(!IS_INTRA(mb_type_col[0])
c26abfa5
DB
1137 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1138 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
bf4e3bd2 1139 && (h->x264_build>33 || !h->x264_build)))){
5ad984c9 1140 if(ref[0] > 0)
d19f5acb 1141 a= pack16to32(mv[0][0],mv[0][1]);
5ad984c9 1142 if(ref[1] > 0)
d19f5acb 1143 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1144 }else{
d19f5acb
MN
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1147 }
d19f5acb
MN
1148 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1149 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
5ad984c9
LM
1150 }else{
1151 for(i8=0; i8<4; i8++){
1152 const int x8 = i8&1;
1153 const int y8 = i8>>1;
115329f1 1154
5ad984c9
LM
1155 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1156 continue;
1157 h->sub_mb_type[i8] = sub_mb_type;
115329f1 1158
5ad984c9
LM
1159 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1160 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
cec93959
LM
1161 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1162 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
115329f1 1163
5ad984c9 1164 /* col_zero_flag */
2ccd25d0
MN
1165 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1166 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
bf4e3bd2 1167 && (h->x264_build>33 || !h->x264_build)))){
2ccd25d0 1168 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
f1f17e54 1169 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1170 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
c26abfa5 1171 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
f1f17e54
LM
1172 if(ref[0] == 0)
1173 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1174 if(ref[1] == 0)
1175 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1176 }
1177 }else
5ad984c9 1178 for(i4=0; i4<4; i4++){
2ccd25d0 1179 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
c26abfa5 1180 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
5ad984c9
LM
1181 if(ref[0] == 0)
1182 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1183 if(ref[1] == 0)
1184 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1185 }
1186 }
1187 }
1188 }
1189 }
1190 }else{ /* direct temporal mv pred */
5d18eaad
LM
1191 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1192 const int *dist_scale_factor = h->dist_scale_factor;
f4d3382d 1193 int ref_offset= 0;
5d18eaad 1194
cc615d2c 1195 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
8b1fd554
MN
1196 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1197 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1198 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
cc615d2c 1199 }
48e025e5 1200 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
f4d3382d 1201 ref_offset += 16;
48e025e5 1202
cc615d2c
MN
1203 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1204 /* FIXME assumes direct_8x8_inference == 1 */
c210fa61 1205 int y_shift = 2*!IS_INTERLACED(*mb_type);
5d18eaad 1206
cc615d2c
MN
1207 for(i8=0; i8<4; i8++){
1208 const int x8 = i8&1;
1209 const int y8 = i8>>1;
1210 int ref0, scale;
1211 const int16_t (*l1mv)[2]= l1mv0;
5d18eaad 1212
cc615d2c
MN
1213 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1214 continue;
1215 h->sub_mb_type[i8] = sub_mb_type;
1216
1217 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 if(IS_INTRA(mb_type_col[y8])){
1219 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1220 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1222 continue;
1223 }
1224
1225 ref0 = l1ref0[x8 + y8*b8_stride];
1226 if(ref0 >= 0)
f4d3382d 1227 ref0 = map_col_to_list0[0][ref0 + ref_offset];
cc615d2c 1228 else{
f4d3382d 1229 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
cc615d2c
MN
1230 l1mv= l1mv1;
1231 }
1232 scale = dist_scale_factor[ref0];
1233 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1234
1235 {
1236 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1237 int my_col = (mv_col[1]<<y_shift)/2;
1238 int mx = (scale * mv_col[0] + 128) >> 8;
1239 int my = (scale * my_col + 128) >> 8;
1240 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1241 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
5d18eaad 1242 }
5d18eaad 1243 }
cc615d2c
MN
1244 return;
1245 }
5d18eaad
LM
1246
1247 /* one-to-one mv scaling */
1248
5ad984c9 1249 if(IS_16X16(*mb_type)){
fda51641
MN
1250 int ref, mv0, mv1;
1251
5ad984c9 1252 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
d00eac6c 1253 if(IS_INTRA(mb_type_col[0])){
fda51641 1254 ref=mv0=mv1=0;
5ad984c9 1255 }else{
f4d3382d
MN
1256 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1257 : map_col_to_list0[1][l1ref1[0] + ref_offset];
5d18eaad 1258 const int scale = dist_scale_factor[ref0];
8583bef8 1259 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
5ad984c9 1260 int mv_l0[2];
5d18eaad
LM
1261 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1262 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
fda51641
MN
1263 ref= ref0;
1264 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1265 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
5ad984c9 1266 }
fda51641
MN
1267 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1268 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1269 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
5ad984c9
LM
1270 }else{
1271 for(i8=0; i8<4; i8++){
1272 const int x8 = i8&1;
1273 const int y8 = i8>>1;
5d18eaad 1274 int ref0, scale;
bf4e3bd2 1275 const int16_t (*l1mv)[2]= l1mv0;
8583bef8 1276
5ad984c9
LM
1277 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1278 continue;
1279 h->sub_mb_type[i8] = sub_mb_type;
5d18eaad 1280 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
d00eac6c 1281 if(IS_INTRA(mb_type_col[0])){
5ad984c9 1282 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1283 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1285 continue;
1286 }
115329f1 1287
f4d3382d 1288 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
2f944356 1289 if(ref0 >= 0)
5d18eaad 1290 ref0 = map_col_to_list0[0][ref0];
8583bef8 1291 else{
f4d3382d 1292 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
8583bef8
MN
1293 l1mv= l1mv1;
1294 }
5d18eaad 1295 scale = dist_scale_factor[ref0];
115329f1 1296
5ad984c9 1297 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
f1f17e54 1298 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1299 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
5d18eaad
LM
1300 int mx = (scale * mv_col[0] + 128) >> 8;
1301 int my = (scale * mv_col[1] + 128) >> 8;
f1f17e54
LM
1302 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1303 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1304 }else
5ad984c9 1305 for(i4=0; i4<4; i4++){
2ccd25d0 1306 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
5ad984c9 1307 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
5d18eaad
LM
1308 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1309 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
5ad984c9
LM
1310 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1311 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1312 }
1313 }
1314 }
1315 }
1316}
1317
0da71265
MN
1318static inline void write_back_motion(H264Context *h, int mb_type){
1319 MpegEncContext * const s = &h->s;
0da71265
MN
1320 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1321 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1322 int list;
1323
2ea39252
LM
1324 if(!USES_LIST(mb_type, 0))
1325 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1326
3425501d 1327 for(list=0; list<h->list_count; list++){
0da71265 1328 int y;
53b19144 1329 if(!USES_LIST(mb_type, list))
5ad984c9 1330 continue;
115329f1 1331
0da71265
MN
1332 for(y=0; y<4; y++){
1333 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1335 }
9e528114 1336 if( h->pps.cabac ) {
e6e77eb6
LM
1337 if(IS_SKIP(mb_type))
1338 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1339 else
9e528114
LA
1340 for(y=0; y<4; y++){
1341 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1343 }
1344 }
53b19144
LM
1345
1346 {
191e8ca7 1347 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
53b19144
LM
1348 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1349 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1350 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1351 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
0da71265
MN
1352 }
1353 }
115329f1 1354
9f5c1037 1355 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
5ad984c9 1356 if(IS_8X8(mb_type)){
53b19144
LM
1357 uint8_t *direct_table = &h->direct_table[b8_xy];
1358 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1359 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1360 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
5ad984c9
LM
1361 }
1362 }
0da71265
MN
1363}
1364
1790a5e9 1365const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
1366 int i, si, di;
1367 uint8_t *dst;
24456882 1368 int bufidx;
0da71265 1369
bb270c08 1370// src[0]&0x80; //forbidden bit
0da71265
MN
1371 h->nal_ref_idc= src[0]>>5;
1372 h->nal_unit_type= src[0]&0x1F;
1373
1374 src++; length--;
115329f1 1375#if 0
0da71265
MN
1376 for(i=0; i<length; i++)
1377 printf("%2X ", src[i]);
1378#endif
e08715d3 1379
b250f9c6
AJ
1380#if HAVE_FAST_UNALIGNED
1381# if HAVE_FAST_64BIT
e08715d3
MN
1382# define RS 7
1383 for(i=0; i+1<length; i+=9){
3878be31 1384 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
e08715d3
MN
1385# else
1386# define RS 3
1387 for(i=0; i+1<length; i+=5){
3878be31 1388 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
e08715d3
MN
1389# endif
1390 continue;
1391 if(i>0 && !src[i]) i--;
1392 while(src[i]) i++;
1393#else
1394# define RS 0
0da71265
MN
1395 for(i=0; i+1<length; i+=2){
1396 if(src[i]) continue;
1397 if(i>0 && src[i-1]==0) i--;
e08715d3 1398#endif
0da71265
MN
1399 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1400 if(src[i+2]!=3){
1401 /* startcode, so we must be past the end */
1402 length=i;
1403 }
1404 break;
1405 }
abb27cfb 1406 i-= RS;
0da71265
MN
1407 }
1408
1409 if(i>=length-1){ //no escaped 0
1410 *dst_length= length;
1411 *consumed= length+1; //+1 for the header
115329f1 1412 return src;
0da71265
MN
1413 }
1414
24456882 1415 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
238ef6da 1416 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 1417 dst= h->rbsp_buffer[bufidx];
0da71265 1418
ac658be5
FOL
1419 if (dst == NULL){
1420 return NULL;
1421 }
1422
3b66c4c5 1423//printf("decoding esc\n");
593af7cd
MN
1424 memcpy(dst, src, i);
1425 si=di=i;
1426 while(si+2<length){
0da71265 1427 //remove escapes (very rare 1:2^22)
593af7cd
MN
1428 if(src[si+2]>3){
1429 dst[di++]= src[si++];
1430 dst[di++]= src[si++];
1431 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
1432 if(src[si+2]==3){ //escape
1433 dst[di++]= 0;
1434 dst[di++]= 0;
1435 si+=3;
c8470cc1 1436 continue;
0da71265 1437 }else //next start code
593af7cd 1438 goto nsc;
0da71265
MN
1439 }
1440
1441 dst[di++]= src[si++];
1442 }
593af7cd
MN
1443 while(si<length)
1444 dst[di++]= src[si++];
1445nsc:
0da71265 1446
d4369630
AS
1447 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1448
0da71265
MN
1449 *dst_length= di;
1450 *consumed= si + 1;//+1 for the header
90b5b51e 1451//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
1452 return dst;
1453}
1454
1790a5e9 1455int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
1456 int v= *src;
1457 int r;
1458
a9c9a240 1459 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
1460
1461 for(r=1; r<9; r++){
1462 if(v&1) return r;
1463 v>>=1;
1464 }
1465 return 0;
1466}
1467
1468/**
1412060e 1469 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
1470 * @param qp quantization parameter
1471 */
239ea04c 1472static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1473#define stride 16
1474 int i;
1475 int temp[16]; //FIXME check if this is a good idea
1476 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1477 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1478
1479//memset(block, 64, 2*256);
1480//return;
1481 for(i=0; i<4; i++){
1482 const int offset= y_offset[i];
1483 const int z0= block[offset+stride*0] + block[offset+stride*4];
1484 const int z1= block[offset+stride*0] - block[offset+stride*4];
1485 const int z2= block[offset+stride*1] - block[offset+stride*5];
1486 const int z3= block[offset+stride*1] + block[offset+stride*5];
1487
1488 temp[4*i+0]= z0+z3;
1489 temp[4*i+1]= z1+z2;
1490 temp[4*i+2]= z1-z2;
1491 temp[4*i+3]= z0-z3;
1492 }
1493
1494 for(i=0; i<4; i++){
1495 const int offset= x_offset[i];
1496 const int z0= temp[4*0+i] + temp[4*2+i];
1497 const int z1= temp[4*0+i] - temp[4*2+i];
1498 const int z2= temp[4*1+i] - temp[4*3+i];
1499 const int z3= temp[4*1+i] + temp[4*3+i];
1500
1412060e 1501 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
1502 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1503 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1504 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
1505 }
1506}
1507
e5017ab8 1508#if 0
0da71265 1509/**
1412060e 1510 * DCT transforms the 16 dc values.
0da71265
MN
1511 * @param qp quantization parameter ??? FIXME
1512 */
1513static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1514// const int qmul= dequant_coeff[qp][0];
1515 int i;
1516 int temp[16]; //FIXME check if this is a good idea
1517 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1518 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1519
1520 for(i=0; i<4; i++){
1521 const int offset= y_offset[i];
1522 const int z0= block[offset+stride*0] + block[offset+stride*4];
1523 const int z1= block[offset+stride*0] - block[offset+stride*4];
1524 const int z2= block[offset+stride*1] - block[offset+stride*5];
1525 const int z3= block[offset+stride*1] + block[offset+stride*5];
1526
1527 temp[4*i+0]= z0+z3;
1528 temp[4*i+1]= z1+z2;
1529 temp[4*i+2]= z1-z2;
1530 temp[4*i+3]= z0-z3;
1531 }
1532
1533 for(i=0; i<4; i++){
1534 const int offset= x_offset[i];
1535 const int z0= temp[4*0+i] + temp[4*2+i];
1536 const int z1= temp[4*0+i] - temp[4*2+i];
1537 const int z2= temp[4*1+i] - temp[4*3+i];
1538 const int z3= temp[4*1+i] + temp[4*3+i];
1539
1540 block[stride*0 +offset]= (z0 + z3)>>1;
1541 block[stride*2 +offset]= (z1 + z2)>>1;
1542 block[stride*8 +offset]= (z1 - z2)>>1;
1543 block[stride*10+offset]= (z0 - z3)>>1;
1544 }
1545}
e5017ab8
LA
1546#endif
1547
0da71265
MN
1548#undef xStride
1549#undef stride
1550
239ea04c 1551static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1552 const int stride= 16*2;
1553 const int xStride= 16;
1554 int a,b,c,d,e;
1555
1556 a= block[stride*0 + xStride*0];
1557 b= block[stride*0 + xStride*1];
1558 c= block[stride*1 + xStride*0];
1559 d= block[stride*1 + xStride*1];
1560
1561 e= a-b;
1562 a= a+b;
1563 b= c-d;
1564 c= c+d;
1565
239ea04c
LM
1566 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1567 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1568 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1569 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
1570}
1571
e5017ab8 1572#if 0
0da71265
MN
1573static void chroma_dc_dct_c(DCTELEM *block){
1574 const int stride= 16*2;
1575 const int xStride= 16;
1576 int a,b,c,d,e;
1577
1578 a= block[stride*0 + xStride*0];
1579 b= block[stride*0 + xStride*1];
1580 c= block[stride*1 + xStride*0];
1581 d= block[stride*1 + xStride*1];
1582
1583 e= a-b;
1584 a= a+b;
1585 b= c-d;
1586 c= c+d;
1587
1588 block[stride*0 + xStride*0]= (a+c);
1589 block[stride*0 + xStride*1]= (e+b);
1590 block[stride*1 + xStride*0]= (a-c);
1591 block[stride*1 + xStride*1]= (e-b);
1592}
e5017ab8 1593#endif
0da71265
MN
1594
1595/**
1596 * gets the chroma qp.
1597 */
4691a77d 1598static inline int get_chroma_qp(H264Context *h, int t, int qscale){
5a78bfbd 1599 return h->pps.chroma_qp_table[t][qscale];
0da71265
MN
1600}
1601
0da71265
MN
1602static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1603 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1604 int src_x_offset, int src_y_offset,
1605 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1606 MpegEncContext * const s = &h->s;
1607 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 1608 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 1609 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
1610 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1611 uint8_t * src_cb, * src_cr;
1612 int extra_width= h->emu_edge_width;
1613 int extra_height= h->emu_edge_height;
0da71265
MN
1614 int emu=0;
1615 const int full_mx= mx>>2;
1616 const int full_my= my>>2;
fbd312fd 1617 const int pic_width = 16*s->mb_width;
0d43dd8c 1618 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 1619
0da71265
MN
1620 if(mx&7) extra_width -= 3;
1621 if(my&7) extra_height -= 3;
115329f1
DB
1622
1623 if( full_mx < 0-extra_width
1624 || full_my < 0-extra_height
1625 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 1626 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
1627 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1628 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
1629 emu=1;
1630 }
115329f1 1631
5d18eaad 1632 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 1633 if(!square){
5d18eaad 1634 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 1635 }
115329f1 1636
49fb20cb 1637 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 1638
0d43dd8c 1639 if(MB_FIELD){
5d18eaad 1640 // chroma offset when predicting from a field of opposite parity
2143b118 1641 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
1642 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1643 }
1644 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1645 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1646
0da71265 1647 if(emu){
5d18eaad 1648 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1649 src_cb= s->edge_emu_buffer;
1650 }
5d18eaad 1651 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1652
1653 if(emu){
5d18eaad 1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1655 src_cr= s->edge_emu_buffer;
1656 }
5d18eaad 1657 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1658}
1659
9f2d1b4f 1660static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
1661 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1662 int x_offset, int y_offset,
1663 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1664 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1665 int list0, int list1){
1666 MpegEncContext * const s = &h->s;
1667 qpel_mc_func *qpix_op= qpix_put;
1668 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 1669
5d18eaad
LM
1670 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1671 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1672 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 1673 x_offset += 8*s->mb_x;
0d43dd8c 1674 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1675
0da71265 1676 if(list0){
1924f3ce 1677 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1681
1682 qpix_op= qpix_avg;
1683 chroma_op= chroma_avg;
1684 }
1685
1686 if(list1){
1924f3ce 1687 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
1688 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1689 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1690 qpix_op, chroma_op);
1691 }
1692}
1693
9f2d1b4f
LM
1694static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1695 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1696 int x_offset, int y_offset,
1697 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1698 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1699 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1700 int list0, int list1){
1701 MpegEncContext * const s = &h->s;
1702
5d18eaad
LM
1703 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1704 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1705 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 1706 x_offset += 8*s->mb_x;
0d43dd8c 1707 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1708
9f2d1b4f
LM
1709 if(list0 && list1){
1710 /* don't optimize for luma-only case, since B-frames usually
1711 * use implicit weights => chroma too. */
1712 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
1713 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1714 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
1715 int refn0 = h->ref_cache[0][ scan8[n] ];
1716 int refn1 = h->ref_cache[1][ scan8[n] ];
1717
1718 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1719 dest_y, dest_cb, dest_cr,
1720 x_offset, y_offset, qpix_put, chroma_put);
1721 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1722 tmp_y, tmp_cb, tmp_cr,
1723 x_offset, y_offset, qpix_put, chroma_put);
1724
1725 if(h->use_weight == 2){
1726 int weight0 = h->implicit_weight[refn0][refn1];
1727 int weight1 = 64 - weight0;
5d18eaad
LM
1728 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1729 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 1731 }else{
5d18eaad 1732 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 1733 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 1734 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1736 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 1737 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1739 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 1740 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
1741 }
1742 }else{
1743 int list = list1 ? 1 : 0;
1744 int refn = h->ref_cache[list][ scan8[n] ];
1745 Picture *ref= &h->ref_list[list][refn];
1746 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1747 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1748 qpix_put, chroma_put);
1749
5d18eaad 1750 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
1751 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1752 if(h->use_weight_chroma){
5d18eaad 1753 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 1754 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 1755 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
1756 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1757 }
1758 }
1759}
1760
1761static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1762 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1763 int x_offset, int y_offset,
1764 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1765 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 1766 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
1767 int list0, int list1){
1768 if((h->use_weight==2 && list0 && list1
1769 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1770 || h->use_weight==1)
1771 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put,
1773 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1774 else
1775 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1776 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1777}
1778
513fbd8e
LM
1779static inline void prefetch_motion(H264Context *h, int list){
1780 /* fetch pixels for estimated mv 4 macroblocks ahead
1781 * optimized for 64byte cache lines */
1782 MpegEncContext * const s = &h->s;
1783 const int refn = h->ref_cache[list][scan8[0]];
1784 if(refn >= 0){
1785 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1786 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1787 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 1788 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
1789 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1790 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1791 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1792 }
1793}
1794
0da71265
MN
1795static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1796 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
1797 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1798 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 1799 MpegEncContext * const s = &h->s;
64514ee8 1800 const int mb_xy= h->mb_xy;
0da71265 1801 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 1802
0da71265 1803 assert(IS_INTER(mb_type));
115329f1 1804
513fbd8e
LM
1805 prefetch_motion(h, 0);
1806
0da71265
MN
1807 if(IS_16X16(mb_type)){
1808 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1809 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
9f2d1b4f 1810 &weight_op[0], &weight_avg[0],
0da71265
MN
1811 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1812 }else if(IS_16X8(mb_type)){
1813 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1815 &weight_op[1], &weight_avg[1],
0da71265
MN
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1818 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1819 &weight_op[1], &weight_avg[1],
0da71265
MN
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 }else if(IS_8X16(mb_type)){
5d18eaad 1822 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 1823 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1824 &weight_op[2], &weight_avg[2],
0da71265 1825 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 1826 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 1827 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1828 &weight_op[2], &weight_avg[2],
0da71265
MN
1829 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1830 }else{
1831 int i;
115329f1 1832
0da71265
MN
1833 assert(IS_8X8(mb_type));
1834
1835 for(i=0; i<4; i++){
1836 const int sub_mb_type= h->sub_mb_type[i];
1837 const int n= 4*i;
1838 int x_offset= (i&1)<<2;
1839 int y_offset= (i&2)<<1;
1840
1841 if(IS_SUB_8X8(sub_mb_type)){
1842 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1843 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1844 &weight_op[3], &weight_avg[3],
0da71265
MN
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_8X4(sub_mb_type)){
1847 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1849 &weight_op[4], &weight_avg[4],
0da71265
MN
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1852 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1853 &weight_op[4], &weight_avg[4],
0da71265
MN
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1855 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 1856 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 1857 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1858 &weight_op[5], &weight_avg[5],
0da71265 1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 1860 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1862 &weight_op[5], &weight_avg[5],
0da71265
MN
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1864 }else{
1865 int j;
1866 assert(IS_SUB_4X4(sub_mb_type));
1867 for(j=0; j<4; j++){
1868 int sub_x_offset= x_offset + 2*(j&1);
1869 int sub_y_offset= y_offset + (j&2);
1870 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1871 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1872 &weight_op[6], &weight_avg[6],
0da71265
MN
1873 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1874 }
1875 }
1876 }
1877 }
513fbd8e
LM
1878
1879 prefetch_motion(h, 1);
0da71265
MN
1880}
1881
8140955d
MN
1882static av_cold void init_cavlc_level_tab(void){
1883 int suffix_length, mask;
1884 unsigned int i;
1885
1886 for(suffix_length=0; suffix_length<7; suffix_length++){
1887 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1888 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1889 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1890
1891 mask= -(level_code&1);
1892 level_code= (((2+level_code)>>1) ^ mask) - mask;
1893 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1894 cavlc_level_tab[suffix_length][i][0]= level_code;
1895 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1896 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1897 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1898 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1899 }else{
1900 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1901 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1902 }
1903 }
1904 }
1905}
1906
98a6fff9 1907static av_cold void decode_init_vlc(void){
0da71265
MN
1908 static int done = 0;
1909
1910 if (!done) {
1911 int i;
910e3668 1912 int offset;
0da71265
MN
1913 done = 1;
1914
910e3668
AC
1915 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1916 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
115329f1 1917 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
0da71265 1918 &chroma_dc_coeff_token_len [0], 1, 1,
910e3668
AC
1919 &chroma_dc_coeff_token_bits[0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
0da71265 1921
910e3668 1922 offset = 0;
0da71265 1923 for(i=0; i<4; i++){
910e3668
AC
1924 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1925 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
115329f1 1926 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
0da71265 1927 &coeff_token_len [i][0], 1, 1,
910e3668
AC
1928 &coeff_token_bits[i][0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC);
1930 offset += coeff_token_vlc_tables_size[i];
0da71265 1931 }
910e3668
AC
1932 /*
1933 * This is a one time safety check to make sure that
1934 * the packed static coeff_token_vlc table sizes
1935 * were initialized correctly.
1936 */
37d3e066 1937 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
0da71265
MN
1938
1939 for(i=0; i<3; i++){
910e3668
AC
1940 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1941 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1942 init_vlc(&chroma_dc_total_zeros_vlc[i],
1943 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
0da71265 1944 &chroma_dc_total_zeros_len [i][0], 1, 1,
910e3668
AC
1945 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1946 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1947 }
1948 for(i=0; i<15; i++){
910e3668
AC
1949 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1950 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1951 init_vlc(&total_zeros_vlc[i],
1952 TOTAL_ZEROS_VLC_BITS, 16,
0da71265 1953 &total_zeros_len [i][0], 1, 1,
910e3668
AC
1954 &total_zeros_bits[i][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1956 }
1957
1958 for(i=0; i<6; i++){
910e3668
AC
1959 run_vlc[i].table = run_vlc_tables[i];
1960 run_vlc[i].table_allocated = run_vlc_tables_size;
1961 init_vlc(&run_vlc[i],
1962 RUN_VLC_BITS, 7,
0da71265 1963 &run_len [i][0], 1, 1,
910e3668
AC
1964 &run_bits[i][0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
0da71265 1966 }
910e3668
AC
1967 run7_vlc.table = run7_vlc_table,
1968 run7_vlc.table_allocated = run7_vlc_table_size;
115329f1 1969 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
0da71265 1970 &run_len [6][0], 1, 1,
910e3668
AC
1971 &run_bits[6][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC);
8140955d
MN
1973
1974 init_cavlc_level_tab();
0da71265
MN
1975 }
1976}
1977
0da71265 1978static void free_tables(H264Context *h){
7978debd 1979 int i;
afebe2f7 1980 H264Context *hx;
0da71265 1981 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
1982 av_freep(&h->chroma_pred_mode_table);
1983 av_freep(&h->cbp_table);
9e528114
LA
1984 av_freep(&h->mvd_table[0]);
1985 av_freep(&h->mvd_table[1]);
5ad984c9 1986 av_freep(&h->direct_table);
0da71265
MN
1987 av_freep(&h->non_zero_count);
1988 av_freep(&h->slice_table_base);
1989 h->slice_table= NULL;
e5017ab8 1990
0da71265
MN
1991 av_freep(&h->mb2b_xy);
1992 av_freep(&h->mb2b8_xy);
9f2d1b4f 1993
6752dd5a 1994 for(i = 0; i < MAX_THREADS; i++) {
afebe2f7
1995 hx = h->thread_context[i];
1996 if(!hx) continue;
1997 av_freep(&hx->top_borders[1]);
1998 av_freep(&hx->top_borders[0]);
1999 av_freep(&hx->s.obmc_scratchpad);
afebe2f7 2000 }
0da71265
MN
2001}
2002
239ea04c
LM
2003static void init_dequant8_coeff_table(H264Context *h){
2004 int i,q,x;
548a1c8a 2005 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
2006 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2007 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2008
2009 for(i=0; i<2; i++ ){
2010 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2011 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2012 break;
2013 }
2014
2015 for(q=0; q<52; q++){
d9ec210b
DP
2016 int shift = div6[q];
2017 int idx = rem6[q];
239ea04c 2018 for(x=0; x<64; x++)
548a1c8a
LM
2019 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2020 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2021 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
2022 }
2023 }
2024}
2025
2026static void init_dequant4_coeff_table(H264Context *h){
2027 int i,j,q,x;
ab2e3e2c 2028 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
2029 for(i=0; i<6; i++ ){
2030 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2031 for(j=0; j<i; j++){
2032 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2033 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2034 break;
2035 }
2036 }
2037 if(j<i)
2038 continue;
2039
2040 for(q=0; q<52; q++){
d9ec210b
DP
2041 int shift = div6[q] + 2;
2042 int idx = rem6[q];
239ea04c 2043 for(x=0; x<16; x++)
ab2e3e2c
LM
2044 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2045 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
2046 h->pps.scaling_matrix4[i][x]) << shift;
2047 }
2048 }
2049}
2050
2051static void init_dequant_tables(H264Context *h){
2052 int i,x;
2053 init_dequant4_coeff_table(h);
2054 if(h->pps.transform_8x8_mode)
2055 init_dequant8_coeff_table(h);
2056 if(h->sps.transform_bypass){
2057 for(i=0; i<6; i++)
2058 for(x=0; x<16; x++)
2059 h->dequant4_coeff[i][0][x] = 1<<6;
2060 if(h->pps.transform_8x8_mode)
2061 for(i=0; i<2; i++)
2062 for(x=0; x<64; x++)
2063 h->dequant8_coeff[i][0][x] = 1<<6;
2064 }
2065}
2066
2067
0da71265
MN
2068/**
2069 * allocates tables.
3b66c4c5 2070 * needs width/height
0da71265
MN
2071 */
2072static int alloc_tables(H264Context *h){
2073 MpegEncContext * const s = &h->s;
7bc9090a 2074 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 2075 int x,y;
0da71265
MN
2076
2077 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
e5017ab8 2078
53c05b1e 2079 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
b735aeea 2080 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
5d0e4cb8 2081 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
0da71265 2082
7526ade2
MN
2083 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2084 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2085 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2086 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
e5017ab8 2087
b735aeea 2088 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 2089 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 2090
a55f20bd
LM
2091 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2092 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
0da71265
MN
2093 for(y=0; y<s->mb_height; y++){
2094 for(x=0; x<s->mb_width; x++){
7bc9090a 2095 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
2096 const int b_xy = 4*x + 4*y*h->b_stride;
2097 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 2098
0da71265
MN
2099 h->mb2b_xy [mb_xy]= b_xy;
2100 h->mb2b8_xy[mb_xy]= b8_xy;
2101 }
2102 }
9f2d1b4f 2103
9c6221ae
GV
2104 s->obmc_scratchpad = NULL;
2105
56edbd81
LM
2106 if(!h->dequant4_coeff[0])
2107 init_dequant_tables(h);
2108
0da71265
MN
2109 return 0;
2110fail:
2111 free_tables(h);
2112 return -1;
2113}
2114
afebe2f7
2115/**
2116 * Mimic alloc_tables(), but for every context thread.
2117 */
2118static void clone_tables(H264Context *dst, H264Context *src){
2119 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2120 dst->non_zero_count = src->non_zero_count;
2121 dst->slice_table = src->slice_table;
2122 dst->cbp_table = src->cbp_table;
2123 dst->mb2b_xy = src->mb2b_xy;
2124 dst->mb2b8_xy = src->mb2b8_xy;
2125 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2126 dst->mvd_table[0] = src->mvd_table[0];
2127 dst->mvd_table[1] = src->mvd_table[1];
2128 dst->direct_table = src->direct_table;
2129
afebe2f7
2130 dst->s.obmc_scratchpad = NULL;
2131 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
2132}
2133
2134/**
2135 * Init context
2136 * Allocate buffers which are not shared amongst multiple threads.
2137 */
2138static int context_init(H264Context *h){
afebe2f7
2139 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2140 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2141
afebe2f7
2142 return 0;
2143fail:
2144 return -1; // free_tables will clean up for us
2145}
2146
98a6fff9 2147static av_cold void common_init(H264Context *h){
0da71265 2148 MpegEncContext * const s = &h->s;
0da71265
MN
2149
2150 s->width = s->avctx->width;
2151 s->height = s->avctx->height;
2152 s->codec_id= s->avctx->codec->id;
115329f1 2153
c92a30bb 2154 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 2155
239ea04c 2156 h->dequant_coeff_pps= -1;
9a41c2c7 2157 s->unrestricted_mv=1;
0da71265 2158 s->decode=1; //FIXME
56edbd81 2159
a5805aa9
MN
2160 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2161
56edbd81
LM
2162 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2163 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
2164}
2165
03831f46
IS
2166/**
2167 * Reset SEI values at the beginning of the frame.
2168 *
2169 * @param h H.264 context.
2170 */
2171static void reset_sei(H264Context *h) {
2172 h->sei_recovery_frame_cnt = -1;
2173 h->sei_dpb_output_delay = 0;
2174 h->sei_cpb_removal_delay = -1;
2175 h->sei_buffering_period_present = 0;
2176}
2177
98a6fff9 2178static av_cold int decode_init(AVCodecContext *avctx){
0da71265
MN
2179 H264Context *h= avctx->priv_data;
2180 MpegEncContext * const s = &h->s;
2181
3edcacde 2182 MPV_decode_defaults(s);
115329f1 2183
0da71265
MN
2184 s->avctx = avctx;
2185 common_init(h);
2186
2187 s->out_format = FMT_H264;
2188 s->workaround_bugs= avctx->workaround_bugs;
2189
2190 // set defaults
0da71265 2191// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 2192 s->quarter_sample = 1;
47cd974a 2193 if(!avctx->has_b_frames)
0da71265 2194 s->low_delay= 1;
7a9dba3c 2195
09a9b45e 2196 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
369122dd 2197 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
7a9dba3c 2198 else
09a9b45e 2199 avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
a05aa821 2200 avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
0da71265 2201
c2212338 2202 decode_init_vlc();
115329f1 2203
26165f99
MR
2204 if(avctx->extradata_size > 0 && avctx->extradata &&
2205 *(char *)avctx->extradata == 1){
4770b1b4
RT
2206 h->is_avc = 1;
2207 h->got_avcC = 0;
26165f99
MR
2208 } else {
2209 h->is_avc = 0;
4770b1b4
RT
2210 }
2211
afebe2f7 2212 h->thread_context[0] = h;
18c7be65 2213 h->outputed_poc = INT_MIN;
e4b8f1fa 2214 h->prev_poc_msb= 1<<16;
03831f46 2215 reset_sei(h);
efd8c1f6
MN
2216 if(avctx->codec_id == CODEC_ID_H264){
2217 if(avctx->ticks_per_frame == 1){
2218 s->avctx->time_base.den *=2;
2219 }
19df37a8 2220 avctx->ticks_per_frame = 2;
efd8c1f6 2221 }
0da71265
MN
2222 return 0;
2223}
2224
af8aa846 2225static int frame_start(H264Context *h){
0da71265
MN
2226 MpegEncContext * const s = &h->s;
2227 int i;
2228
af8aa846
MN
2229 if(MPV_frame_start(s, s->avctx) < 0)
2230 return -1;
0da71265 2231 ff_er_frame_start(s);
3a22d7fa
JD
2232 /*
2233 * MPV_frame_start uses pict_type to derive key_frame.
2234 * This is incorrect for H.264; IDR markings must be used.
1412060e 2235 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
2236 * See decode_nal_units().
2237 */
2238 s->current_picture_ptr->key_frame= 0;
0da71265
MN
2239
2240 assert(s->linesize && s->uvlinesize);
2241
2242 for(i=0; i<16; i++){
2243 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 2244 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2245 }
2246 for(i=0; i<4; i++){
2247 h->block_offset[16+i]=
2248 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
2249 h->block_offset[24+16+i]=
2250 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2251 }
2252
934b0821
LM
2253 /* can't be in alloc_tables because linesize isn't known there.
2254 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
2255 for(i = 0; i < s->avctx->thread_count; i++)
2256 if(!h->thread_context[i]->s.obmc_scratchpad)
2257 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
2258
2259 /* some macroblocks will be accessed before they're available */
afebe2f7 2260 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 2261 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 2262
0da71265 2263// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 2264
1412060e 2265 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
2266 // that if we break out due to an error it can be released automatically
2267 // in the next MPV_frame_start().
2268 // SVQ3 as well as most other codecs have only last/next/current and thus
2269 // get released even with set reference, besides SVQ3 and others do not
2270 // mark frames as reference later "naturally".
2271 if(s->codec_id != CODEC_ID_SVQ3)
2272 s->current_picture_ptr->reference= 0;
357282c6
MN
2273
2274 s->current_picture_ptr->field_poc[0]=
2275 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 2276 assert(s->current_picture_ptr->long_ref==0);
357282c6 2277
af8aa846 2278 return 0;
0da71265
MN
2279}
2280
93cc10fa 2281static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e
MN
2282 MpegEncContext * const s = &h->s;
2283 int i;
5f7f9719
MN
2284 int step = 1;
2285 int offset = 1;
2286 int uvoffset= 1;
2287 int top_idx = 1;
2288 int skiplast= 0;
115329f1 2289
53c05b1e
MN
2290 src_y -= linesize;
2291 src_cb -= uvlinesize;
2292 src_cr -= uvlinesize;
2293
5f7f9719
MN
2294 if(!simple && FRAME_MBAFF){
2295 if(s->mb_y&1){
2296 offset = MB_MBAFF ? 1 : 17;
2297 uvoffset= MB_MBAFF ? 1 : 9;
2298 if(!MB_MBAFF){
2299 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2300 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
49fb20cb 2301 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2304 }
2305 }
2306 }else{
2307 if(!MB_MBAFF){
2308 h->left_border[0]= h->top_borders[0][s->mb_x][15];
49fb20cb 2309 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2310 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2311 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2312 }
2313 skiplast= 1;
2314 }
2315 offset =
2316 uvoffset=
2317 top_idx = MB_MBAFF ? 0 : 1;
2318 }
2319 step= MB_MBAFF ? 2 : 1;
2320 }
2321
3b66c4c5 2322 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 2323 // and the line above the bottom macroblock
5f7f9719
MN
2324 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2325 for(i=1; i<17 - skiplast; i++){
2326 h->left_border[offset+i*step]= src_y[15+i* linesize];
53c05b1e 2327 }
115329f1 2328
5f7f9719
MN
2329 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2330 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
53c05b1e 2331
49fb20cb 2332 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2333 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2334 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2335 for(i=1; i<9 - skiplast; i++){
2336 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2337 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
53c05b1e 2338 }
5f7f9719
MN
2339 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2340 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
53c05b1e
MN
2341 }
2342}
2343
93cc10fa 2344static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
2345 MpegEncContext * const s = &h->s;
2346 int temp8, i;
2347 uint64_t temp64;
b69378e2
2348 int deblock_left;
2349 int deblock_top;
2350 int mb_xy;
5f7f9719
MN
2351 int step = 1;
2352 int offset = 1;
2353 int uvoffset= 1;
2354 int top_idx = 1;
2355
2356 if(!simple && FRAME_MBAFF){
2357 if(s->mb_y&1){
2358 offset = MB_MBAFF ? 1 : 17;
2359 uvoffset= MB_MBAFF ? 1 : 9;
2360 }else{
2361 offset =
2362 uvoffset=
2363 top_idx = MB_MBAFF ? 0 : 1;
2364 }
2365 step= MB_MBAFF ? 2 : 1;
2366 }
b69378e2
2367
2368 if(h->deblocking_filter == 2) {
64514ee8 2369 mb_xy = h->mb_xy;
b69378e2
2370 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2371 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2372 } else {
2373 deblock_left = (s->mb_x > 0);
6c805007 2374 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 2375 }
53c05b1e
MN
2376
2377 src_y -= linesize + 1;
2378 src_cb -= uvlinesize + 1;
2379 src_cr -= uvlinesize + 1;
2380
2381#define XCHG(a,b,t,xchg)\
2382t= a;\
2383if(xchg)\
2384 a= b;\
2385b= t;
d89dc06a
LM
2386
2387 if(deblock_left){
5f7f9719
MN
2388 for(i = !deblock_top; i<16; i++){
2389 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
d89dc06a 2390 }
5f7f9719 2391 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
d89dc06a
LM
2392 }
2393
2394 if(deblock_top){
5f7f9719
MN
2395 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2396 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
cad4368a 2397 if(s->mb_x+1 < s->mb_width){
5f7f9719 2398 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
43efd19a 2399 }
53c05b1e 2400 }
53c05b1e 2401
49fb20cb 2402 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 2403 if(deblock_left){
5f7f9719
MN
2404 for(i = !deblock_top; i<8; i++){
2405 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2406 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
d89dc06a 2407 }
5f7f9719
MN
2408 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2409 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
d89dc06a
LM
2410 }
2411 if(deblock_top){
5f7f9719
MN
2412 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2413 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
53c05b1e 2414 }
53c05b1e
MN
2415 }
2416}
2417
5a6a6cc7 2418static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
2419 MpegEncContext * const s = &h->s;
2420 const int mb_x= s->mb_x;
2421 const int mb_y= s->mb_y;
64514ee8 2422 const int mb_xy= h->mb_xy;
0da71265
MN
2423 const int mb_type= s->current_picture.mb_type[mb_xy];
2424 uint8_t *dest_y, *dest_cb, *dest_cr;
2425 int linesize, uvlinesize /*dct_offset*/;
2426 int i;
6867a90b 2427 int *block_offset = &h->block_offset[0];
41e4055b 2428 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed 2429 /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb 2430 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca 2431 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 2432 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 2433
6120a343
MN
2434 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2435 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2436 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 2437
a957c27b
LM
2438 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2439 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2440
bd91fee3 2441 if (!simple && MB_FIELD) {
5d18eaad
LM
2442 linesize = h->mb_linesize = s->linesize * 2;
2443 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 2444 block_offset = &h->block_offset[24];
1412060e 2445 if(mb_y&1){ //FIXME move out of this function?
0da71265 2446 dest_y -= s->linesize*15;
6867a90b
LLL
2447 dest_cb-= s->uvlinesize*7;
2448 dest_cr-= s->uvlinesize*7;
0da71265 2449 }
5d18eaad
LM
2450 if(FRAME_MBAFF) {
2451 int list;
3425501d 2452 for(list=0; list<h->list_count; list++){
5d18eaad
LM
2453 if(!USES_LIST(mb_type, list))
2454 continue;
2455 if(IS_16X16(mb_type)){
2456 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 2457 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
2458 }else{
2459 for(i=0; i<16; i+=4){
5d18eaad
LM
2460 int ref = h->ref_cache[list][scan8[i]];
2461 if(ref >= 0)
1710856c 2462 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
2463 }
2464 }
2465 }
2466 }
0da71265 2467 } else {
5d18eaad
LM
2468 linesize = h->mb_linesize = s->linesize;
2469 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
2470// dct_offset = s->linesize * 16;
2471 }
115329f1 2472
bd91fee3 2473 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
2474 for (i=0; i<16; i++) {
2475 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 2476 }
c1708e8d
MN
2477 for (i=0; i<8; i++) {
2478 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2479 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 2480 }
e7e09b49
LLL
2481 } else {
2482 if(IS_INTRA(mb_type)){
5f7f9719 2483 if(h->deblocking_filter)
93cc10fa 2484 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 2485
49fb20cb 2486 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
2487 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2488 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 2489 }
0da71265 2490
e7e09b49 2491 if(IS_INTRA4x4(mb_type)){
bd91fee3 2492 if(simple || !s->encoding){
43efd19a 2493 if(IS_8x8DCT(mb_type)){
1eb96035
MN
2494 if(transform_bypass){
2495 idct_dc_add =
2496 idct_add = s->dsp.add_pixels8;
dae006d7 2497 }else{
1eb96035
MN
2498 idct_dc_add = s->dsp.h264_idct8_dc_add;
2499 idct_add = s->dsp.h264_idct8_add;
2500 }
43efd19a
LM
2501 for(i=0; i<16; i+=4){
2502 uint8_t * const ptr= dest_y + block_offset[i];
2503 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
2504 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2505 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2506 }else{
ac0623b2
MN
2507 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2508 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2509 (h->topright_samples_available<<i)&0x4000, linesize);
2510 if(nnz){
2511 if(nnz == 1 && h->mb[i*16])
2512 idct_dc_add(ptr, h->mb + i*16, linesize);
2513 else
2514 idct_add (ptr, h->mb + i*16, linesize);
2515 }
41e4055b 2516 }
43efd19a 2517 }
1eb96035
MN
2518 }else{
2519 if(transform_bypass){
2520 idct_dc_add =
2521 idct_add = s->dsp.add_pixels4;
2522 }else{
2523 idct_dc_add = s->dsp.h264_idct_dc_add;
2524 idct_add = s->dsp.h264_idct_add;
2525 }
aebb5d6d
MN
2526 for(i=0; i<16; i++){
2527 uint8_t * const ptr= dest_y + block_offset[i];
2528 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 2529
aebb5d6d
MN
2530 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2531 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2532 }else{
2533 uint8_t *topright;
2534 int nnz, tr;
2535 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2536 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2537 assert(mb_y || linesize <= block_offset[i]);
2538 if(!topright_avail){
2539 tr= ptr[3 - linesize]*0x01010101;
2540 topright= (uint8_t*) &tr;
2541 }else
2542 topright= ptr + 4 - linesize;
ac0623b2 2543 }else
aebb5d6d
MN
2544 topright= NULL;
2545
2546 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2547 nnz = h->non_zero_count_cache[ scan8[i] ];
2548 if(nnz){
2549 if(is_h264){
2550 if(nnz == 1 && h->mb[i*16])
2551 idct_dc_add(ptr, h->mb + i*16, linesize);
2552 else
2553 idct_add (ptr, h->mb + i*16, linesize);
2554 }else
2555 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2556 }
ac0623b2 2557 }
41e4055b 2558 }
8b82a956 2559 }
0da71265 2560 }
e7e09b49 2561 }else{
c92a30bb 2562 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 2563 if(is_h264){
36940eca 2564 if(!transform_bypass)
93f0c0a4 2565 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 2566 }else
e7e09b49 2567 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 2568 }
5f7f9719 2569 if(h->deblocking_filter)
93cc10fa 2570 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 2571 }else if(is_h264){
e7e09b49 2572 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
2573 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2574 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 2575 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 2576 }
e7e09b49
LLL
2577
2578
2579 if(!IS_INTRA4x4(mb_type)){
bd91fee3 2580 if(is_h264){
ef9d1d15 2581 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
2582 if(transform_bypass){
2583 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
2584 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2585 }else{
2586 for(i=0; i<16; i++){
2587 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 2588 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2589 }
2fd1f0e0
MN
2590 }
2591 }else{
2592 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 2593 }
49c084a7 2594 }else if(h->cbp&15){
2fd1f0e0 2595 if(transform_bypass){
0a8ca22f 2596 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 2597 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 2598 for(i=0; i<16; i+=di){
62bc966f 2599 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 2600 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2601 }
ef9d1d15 2602 }
2fd1f0e0
MN
2603 }else{
2604 if(IS_8x8DCT(mb_type)){
2605 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2606 }else{
2607 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2608 }
2609 }
4704097a 2610 }
e7e09b49
LLL
2611 }else{
2612 for(i=0; i<16; i++){
2613 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 2614 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2615 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2616 }
4704097a 2617 }
0da71265
MN
2618 }
2619 }
0da71265 2620
49fb20cb 2621 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
2622 uint8_t *dest[2] = {dest_cb, dest_cr};
2623 if(transform_bypass){
96465b90
MN
2624 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2625 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2626 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2627 }else{
c25ac15a 2628 idct_add = s->dsp.add_pixels4;
96465b90
MN
2629 for(i=16; i<16+8; i++){
2630 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2631 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2632 }
2633 }
ef9d1d15 2634 }else{
4691a77d
2635 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2636 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 2637 if(is_h264){
c25ac15a
MN
2638 idct_add = s->dsp.h264_idct_add;
2639 idct_dc_add = s->dsp.h264_idct_dc_add;
ac0623b2
MN
2640 for(i=16; i<16+8; i++){
2641 if(h->non_zero_count_cache[ scan8[i] ])
2642 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2643 else if(h->mb[i*16])
2644 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2645 }
aebb5d6d
MN
2646 }else{
2647 for(i=16; i<16+8; i++){
2648 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2649 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2650 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2651 }
e7e09b49 2652 }
4704097a 2653 }
0da71265
MN
2654 }
2655 }
2656 }
c212fb0c
MN
2657 if(h->cbp || IS_INTRA(mb_type))
2658 s->dsp.clear_blocks(h->mb);
2659
53c05b1e 2660 if(h->deblocking_filter) {
5f7f9719
MN
2661 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2662 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2663 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2664 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
bd91fee3 2665 if (!simple && FRAME_MBAFF) {
5f7f9719 2666 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2667 } else {
3e20143e 2668 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2669 }
53c05b1e 2670 }
0da71265
MN
2671}
2672
0da71265 2673/**
bd91fee3
AS
2674 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2675 */
2676static void hl_decode_mb_simple(H264Context *h){
2677 hl_decode_mb_internal(h, 1);
2678}
2679
2680/**
2681 * Process a macroblock; this handles edge cases, such as interlacing.
2682 */
2683static void av_noinline hl_decode_mb_complex(H264Context *h){
2684 hl_decode_mb_internal(h, 0);
2685}
2686
2687static void hl_decode_mb(H264Context *h){
2688 MpegEncContext * const s = &h->s;
64514ee8 2689 const int mb_xy= h->mb_xy;
bd91fee3 2690 const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb 2691 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 2692
bd91fee3
AS
2693 if (is_complex)
2694 hl_decode_mb_complex(h);
2695 else hl_decode_mb_simple(h);
2696}
2697
2143b118 2698static void pic_as_field(Picture *pic, const int parity){
11cc1d8c
JD
2699 int i;
2700 for (i = 0; i < 4; ++i) {
2143b118 2701 if (parity == PICT_BOTTOM_FIELD)
11cc1d8c 2702 pic->data[i] += pic->linesize[i];
2143b118 2703 pic->reference = parity;
11cc1d8c
JD
2704 pic->linesize[i] *= 2;
2705 }
2879c75f 2706 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
11cc1d8c
JD
2707}
2708
2709static int split_field_copy(Picture *dest, Picture *src,
2710 int parity, int id_add){
2711 int match = !!(src->reference & parity);
2712
2713 if (match) {
2714 *dest = *src;
d4f7d838 2715 if(parity != PICT_FRAME){
b3e93fd4
MN
2716 pic_as_field(dest, parity);
2717 dest->pic_id *= 2;
2718 dest->pic_id += id_add;
d4f7d838 2719 }
11cc1d8c
JD
2720 }
2721
2722 return match;
2723}
2724
d4f7d838
MN
2725static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2726 int i[2]={0};
2727 int index=0;
11cc1d8c 2728
d4f7d838
MN
2729 while(i[0]<len || i[1]<len){
2730 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2731 i[0]++;
2732 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2733 i[1]++;
2734 if(i[0] < len){
2735 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2736 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2737 }
2738 if(i[1] < len){
2739 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2740 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
11cc1d8c
JD
2741 }
2742 }
2743
d4f7d838 2744 return index;
11cc1d8c
JD
2745}
2746
d4f7d838
MN
2747static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2748 int i, best_poc;
2749 int out_i= 0;
11cc1d8c 2750
d4f7d838
MN
2751 for(;;){
2752 best_poc= dir ? INT_MIN : INT_MAX;
11cc1d8c 2753
d4f7d838
MN
2754 for(i=0; i<len; i++){
2755 const int poc= src[i]->poc;
2756 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2757 best_poc= poc;
2758 sorted[out_i]= src[i];
2759 }
2760 }
2761 if(best_poc == (dir ? INT_MIN : INT_MAX))
2762 break;
2763 limit= sorted[out_i++]->poc - dir;
2764 }
2765 return out_i;
11cc1d8c
JD
2766}
2767
bd91fee3 2768/**
0da71265
MN
2769 * fills the default_ref_list.
2770 */
2771static int fill_default_ref_list(H264Context *h){
2772 MpegEncContext * const s = &h->s;
d4f7d838 2773 int i, len;
115329f1 2774
9f5c1037 2775 if(h->slice_type_nos==FF_B_TYPE){
d4f7d838
MN
2776 Picture *sorted[32];
2777 int cur_poc, list;
2778 int lens[2];
11cc1d8c 2779
d4f7d838
MN
2780 if(FIELD_PICTURE)
2781 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2782 else
2783 cur_poc= s->current_picture_ptr->poc;
086acdd5 2784
d4f7d838
MN
2785 for(list= 0; list<2; list++){
2786 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2787 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2788 assert(len<=32);
2789 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2790 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2791 assert(len<=32);
086acdd5 2792
d4f7d838
MN
2793 if(len < h->ref_count[list])
2794 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2795 lens[list]= len;
086acdd5
JD
2796 }
2797
d4f7d838
MN
2798 if(lens[0] == lens[1] && lens[1] > 1){
2799 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2800 if(i == lens[0])
2801 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
086acdd5 2802 }
086acdd5 2803 }else{
d4f7d838
MN
2804 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2805 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2806 assert(len <= 32);
2807 if(len < h->ref_count[0])
2808 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
0da71265 2809 }
827c91bf
LLL
2810#ifdef TRACE
2811 for (i=0; i<h->ref_count[0]; i++) {
a9c9a240 2812 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
827c91bf 2813 }
9f5c1037 2814 if(h->slice_type_nos==FF_B_TYPE){
827c91bf 2815 for (i=0; i<h->ref_count[1]; i++) {
ffbc5e04 2816 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
827c91bf
LLL
2817 }
2818 }
2819#endif
0da71265
MN
2820 return 0;
2821}
2822
827c91bf
LLL
2823static void print_short_term(H264Context *h);
2824static void print_long_term(H264Context *h);
2825
949da388
JD
2826/**
2827 * Extract structure information about the picture described by pic_num in
2828 * the current decoding context (frame or field). Note that pic_num is
2829 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2830 * @param pic_num picture number for which to extract structure information
2831 * @param structure one of PICT_XXX describing structure of picture
2832 * with pic_num
2833 * @return frame number (short term) or long term index of picture
2834 * described by pic_num
2835 */
2836static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2837 MpegEncContext * const s = &h->s;
2838
2839 *structure = s->picture_structure;
2840 if(FIELD_PICTURE){
2841 if (!(pic_num & 1))
2842 /* opposite field */
2843 *structure ^= PICT_FRAME;
2844 pic_num >>= 1;
2845 }
2846
2847 return pic_num;
2848}
2849
0da71265
MN
2850static int decode_ref_pic_list_reordering(H264Context *h){
2851 MpegEncContext * const s = &h->s;
949da388 2852 int list, index, pic_structure;
115329f1 2853
827c91bf
LLL
2854 print_short_term(h);
2855 print_long_term(h);
115329f1 2856
3425501d 2857 for(list=0; list<h->list_count; list++){
0da71265
MN
2858 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2859
2860 if(get_bits1(&s->gb)){
2861 int pred= h->curr_pic_num;
0da71265
MN
2862
2863 for(index=0; ; index++){
9963b332 2864 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
88e7a4d1 2865 unsigned int pic_id;
0da71265 2866 int i;
2f944356 2867 Picture *ref = NULL;
115329f1
DB
2868
2869 if(reordering_of_pic_nums_idc==3)
0bc42cad 2870 break;
115329f1 2871
0da71265 2872 if(index >= h->ref_count[list]){
9b879566 2873 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
0da71265
MN
2874 return -1;
2875 }
115329f1 2876
0da71265
MN
2877 if(reordering_of_pic_nums_idc<3){
2878 if(reordering_of_pic_nums_idc<2){
88e7a4d1 2879 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
949da388 2880 int frame_num;
0da71265 2881
03d3cab8 2882 if(abs_diff_pic_num > h->max_pic_num){
9b879566 2883 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
0da71265
MN
2884 return -1;
2885 }
2886
2887 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2888 else pred+= abs_diff_pic_num;
2889 pred &= h->max_pic_num - 1;
115329f1 2890
949da388
JD
2891 frame_num = pic_num_extract(h, pred, &pic_structure);
2892
0d175622
MN
2893 for(i= h->short_ref_count-1; i>=0; i--){
2894 ref = h->short_ref[i];
949da388 2895 assert(ref->reference);
0d175622 2896 assert(!ref->long_ref);
6edac8e1 2897 if(
af8c5e08
MN
2898 ref->frame_num == frame_num &&
2899 (ref->reference & pic_structure)
6edac8e1 2900 )
0da71265
MN
2901 break;
2902 }
0d175622 2903 if(i>=0)
949da388 2904 ref->pic_id= pred;
0da71265 2905 }else{
949da388 2906 int long_idx;
0da71265 2907 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
949da388
JD
2908
2909 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2910
2911 if(long_idx>31){
88e7a4d1
MN
2912 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2913 return -1;
2914 }
949da388
JD
2915 ref = h->long_ref[long_idx];
2916 assert(!(ref && !ref->reference));
af8c5e08 2917 if(ref && (ref->reference & pic_structure)){
ac658be5 2918 ref->pic_id= pic_id;
ac658be5
FOL
2919 assert(ref->long_ref);
2920 i=0;
2921 }else{
2922 i=-1;
2923 }
0da71265
MN
2924 }
2925
0d315f28 2926 if (i < 0) {
9b879566 2927 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
0da71265 2928 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
0d175622
MN
2929 } else {
2930 for(i=index; i+1<h->ref_count[list]; i++){
2931 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2932 break;
21be92bf
MN
2933 }
2934 for(; i > index; i--){
2935 h->ref_list[list][i]= h->ref_list[list][i-1];
2936 }
0d175622 2937 h->ref_list[list][index]= *ref;
949da388 2938 if (FIELD_PICTURE){
2143b118 2939 pic_as_field(&h->ref_list[list][index], pic_structure);
949da388 2940 }
0da71265 2941 }
0bc42cad 2942 }else{
9b879566 2943 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
0da71265
MN
2944 return -1;
2945 }
2946 }
2947 }
0da71265 2948 }
3425501d 2949 for(list=0; list<h->list_count; list++){
6ab87211 2950 for(index= 0; index < h->ref_count[list]; index++){
79b5c776
MN
2951 if(!h->ref_list[list][index].data[0]){
2952 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2953 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2954 }
6ab87211 2955 }
6ab87211 2956 }
115329f1 2957
115329f1 2958 return 0;
0da71265
MN
2959}
2960
91c58c94 2961static void fill_mbaff_ref_list(H264Context *h){
5d18eaad 2962 int list, i, j;
3425501d 2963 for(list=0; list<2; list++){ //FIXME try list_count
5d18eaad
LM
2964 for(i=0; i<h->ref_count[list]; i++){
2965 Picture *frame = &h->ref_list[list][i];
2966 Picture *field = &h->ref_list[list][16+2*i];
2967 field[0] = *frame;
2968 for(j=0; j<3; j++)
2969 field[0].linesize[j] <<= 1;
2143b118 2970 field[0].reference = PICT_TOP_FIELD;
078f42dd 2971 field[0].poc= field[0].field_poc[0];
5d18eaad
LM
2972 field[1] = field[0];
2973 for(j=0; j<3; j++)
2974 field[1].data[j] += frame->linesize[j];
2143b118 2975 field[1].reference = PICT_BOTTOM_FIELD;
078f42dd 2976 field[1].poc= field[1].field_poc[1];
5d18eaad
LM
2977
2978 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2979 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2980 for(j=0; j<2; j++){
2981 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2982 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2983 }
2984 }
2985 }
2986 for(j=0; j<h->ref_count[1]; j++){
2987 for(i=0; i<h->ref_count[0]; i++)
2988 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2989 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2990 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2991 }
2992}
2993
0da71265
MN
2994static int pred_weight_table(H264Context *h){
2995 MpegEncContext * const s = &h->s;
2996 int list, i;
9f2d1b4f 2997 int luma_def, chroma_def;
115329f1 2998
9f2d1b4f
LM
2999 h->use_weight= 0;
3000 h->use_weight_chroma= 0;
0da71265
MN
3001 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3002 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
3003 luma_def = 1<<h->luma_log2_weight_denom;
3004 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
3005
3006 for(list=0; list<2; list++){
cb99c652
GB
3007 h->luma_weight_flag[list] = 0;
3008 h->chroma_weight_flag[list] = 0;
0da71265
MN
3009 for(i=0; i<h->ref_count[list]; i++){
3010 int luma_weight_flag, chroma_weight_flag;
115329f1 3011
0da71265
MN
3012 luma_weight_flag= get_bits1(&s->gb);
3013 if(luma_weight_flag){
3014 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3015 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f 3016 if( h->luma_weight[list][i] != luma_def
cb99c652 3017 || h->luma_offset[list][i] != 0) {
9f2d1b4f 3018 h->use_weight= 1;
cb99c652
GB
3019 h->luma_weight_flag[list]= 1;
3020 }
9f2d1b4f
LM
3021 }else{
3022 h->luma_weight[list][i]= luma_def;
3023 h->luma_offset[list][i]= 0;
0da71265
MN
3024 }
3025
0af6967e 3026 if(CHROMA){
fef744d4
MN
3027 chroma_weight_flag= get_bits1(&s->gb);
3028 if(chroma_weight_flag){
3029 int j;
3030 for(j=0; j<2; j++){
3031 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3032 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3033 if( h->chroma_weight[list][i][j] != chroma_def
cb99c652 3034 || h->chroma_offset[list][i][j] != 0) {
fef744d4 3035 h->use_weight_chroma= 1;
cb99c652
GB
3036 h->chroma_weight_flag[list]= 1;
3037 }
fef744d4
MN
3038 }
3039 }else{
3040 int j;
3041 for(j=0; j<2; j++){
3042 h->chroma_weight[list][i][j]= chroma_def;
3043 h->chroma_offset[list][i][j]= 0;
3044 }
0da71265
MN
3045 }
3046 }
3047 }
9f5c1037 3048 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 3049 }
9f2d1b4f 3050 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
3051 return 0;
3052}
3053
9f2d1b4f
LM
3054static void implicit_weight_table(H264Context *h){
3055 MpegEncContext * const s = &h->s;
cb99c652 3056 int ref0, ref1, i;
9f2d1b4f
LM
3057 int cur_poc = s->current_picture_ptr->poc;
3058
ce09f927
GB
3059 for (i = 0; i < 2; i++) {
3060 h->luma_weight_flag[i] = 0;
3061 h->chroma_weight_flag[i] = 0;
3062 }
3063
9f2d1b4f
LM
3064 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3065 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3066 h->use_weight= 0;
3067 h->use_weight_chroma= 0;
3068 return;
3069 }
3070
3071 h->use_weight= 2;
3072 h->use_weight_chroma= 2;
3073 h->luma_log2_weight_denom= 5;
3074 h->chroma_log2_weight_denom= 5;
3075
9f2d1b4f
LM
3076 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3077 int poc0 = h->ref_list[0][ref0].poc;
3078 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 3079 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 3080 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 3081 if(td){
f66e4f5f 3082 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 3083 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 3084 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
3085 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3086 h->implicit_weight[ref0][ref1] = 32;
3087 else
3088 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3089 }else
3090 h->implicit_weight[ref0][ref1] = 32;
3091 }
3092 }
3093}
3094
8fd57a66
JD
3095/**
3096 * Mark a picture as no longer needed for reference. The refmask
3097 * argument allows unreferencing of individual fields or the whole frame.
3098 * If the picture becomes entirely unreferenced, but is being held for
3099 * display purposes, it is marked as such.
3100 * @param refmask mask of fields to unreference; the mask is bitwise
3101 * anded with the reference marking of pic
3102 * @return non-zero if pic becomes entirely unreferenced (except possibly
3103 * for display purposes) zero if one of the fields remains in
3104 * reference
3105 */
3106static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
4e4d983e 3107 int i;
8fd57a66
JD
3108 if (pic->reference &= refmask) {
3109 return 0;
3110 } else {
79f4494a
MN
3111 for(i = 0; h->delayed_pic[i]; i++)
3112 if(pic == h->delayed_pic[i]){
3113 pic->reference=DELAYED_PIC_REF;
3114 break;
3115 }
8fd57a66
JD
3116 return 1;
3117 }
4e4d983e
LM
3118}
3119
0da71265 3120/**
5175b937 3121 * instantaneous decoder refresh.
0da71265
MN
3122 */
3123static void idr(H264Context *h){
4e4d983e 3124 int i;
0da71265 3125
dc032f33 3126 for(i=0; i<16; i++){
9c0e4624 3127 remove_long(h, i, 0);
0da71265 3128 }
849b9cef 3129 assert(h->long_ref_count==0);
0da71265
MN
3130
3131 for(i=0; i<h->short_ref_count; i++){
8fd57a66 3132 unreference_pic(h, h->short_ref[i], 0);
0da71265
MN
3133 h->short_ref[i]= NULL;
3134 }
3135 h->short_ref_count=0;
a149c1a5 3136 h->prev_frame_num= 0;
80f8e035
MN
3137 h->prev_frame_num_offset= 0;
3138 h->prev_poc_msb=
3139 h->prev_poc_lsb= 0;
0da71265
MN
3140}
3141
7c33ad19
LM
3142/* forget old pics after a seek */
3143static void flush_dpb(AVCodecContext *avctx){
3144 H264Context *h= avctx->priv_data;
3145 int i;
64b9d48f 3146 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
3147 if(h->delayed_pic[i])
3148 h->delayed_pic[i]->reference= 0;
7c33ad19 3149 h->delayed_pic[i]= NULL;
285b570f 3150 }
df8a7dff 3151 h->outputed_poc= INT_MIN;
7c33ad19 3152 idr(h);
ca159196
MR
3153 if(h->s.current_picture_ptr)
3154 h->s.current_picture_ptr->reference= 0;
12d96de3 3155 h->s.first_field= 0;
03831f46 3156 reset_sei(h);
e240f898 3157 ff_mpeg_flush(avctx);
7c33ad19
LM
3158}
3159
0da71265 3160/**
47e112f8
JD
3161 * Find a Picture in the short term reference list by frame number.