Store CPB count in the context.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
bad5537e 23 * @file libavcodec/h264.c
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
0da71265
MN
28#include "dsputil.h"
29#include "avcodec.h"
30#include "mpegvideo.h"
26b4fe82 31#include "h264.h"
0da71265 32#include "h264data.h"
26b4fe82 33#include "h264_parser.h"
0da71265 34#include "golomb.h"
199436b9 35#include "mathops.h"
626464fb 36#include "rectangle.h"
369122dd 37#include "vdpau_internal.h"
0da71265 38
e5017ab8 39#include "cabac.h"
b250f9c6 40#if ARCH_X86
a6493a8f 41#include "x86/h264_i386.h"
52cb7981 42#endif
e5017ab8 43
2848ce84 44//#undef NDEBUG
0da71265
MN
45#include <assert.h>
46
2ddcf84b
JD
47/**
48 * Value of Picture.reference when Picture is not a reference picture, but
49 * is held for delayed output.
50 */
51#define DELAYED_PIC_REF 4
52
0da71265 53static VLC coeff_token_vlc[4];
910e3668
AC
54static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
55static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
56
0da71265 57static VLC chroma_dc_coeff_token_vlc;
910e3668
AC
58static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
59static const int chroma_dc_coeff_token_vlc_table_size = 256;
0da71265
MN
60
61static VLC total_zeros_vlc[15];
910e3668
AC
62static VLC_TYPE total_zeros_vlc_tables[15][512][2];
63static const int total_zeros_vlc_tables_size = 512;
64
0da71265 65static VLC chroma_dc_total_zeros_vlc[3];
910e3668
AC
66static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
67static const int chroma_dc_total_zeros_vlc_tables_size = 8;
0da71265
MN
68
69static VLC run_vlc[6];
910e3668
AC
70static VLC_TYPE run_vlc_tables[6][8][2];
71static const int run_vlc_tables_size = 8;
72
0da71265 73static VLC run7_vlc;
910e3668
AC
74static VLC_TYPE run7_vlc_table[96][2];
75static const int run7_vlc_table_size = 96;
0da71265 76
8b82a956
MN
77static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
78static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
6ba71fc4 79static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
3e20143e 80static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
9c0e4624 81static Picture * remove_long(H264Context *h, int i, int ref_mask);
8b82a956 82
849f1035 83static av_always_inline uint32_t pack16to32(int a, int b){
377ec888
MN
84#ifdef WORDS_BIGENDIAN
85 return (b&0xFFFF) + (a<<16);
86#else
87 return (a&0xFFFF) + (b<<16);
88#endif
89}
90
d9ec210b 91static const uint8_t rem6[52]={
acd8d10f
PI
920, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93};
94
d9ec210b 95static const uint8_t div6[52]={
acd8d10f
PI
960, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97};
98
89818988 99static const uint8_t left_block_options[4][8]={
143d7f14
PK
100 {0,1,2,3,7,10,8,11},
101 {2,2,3,3,8,11,8,11},
102 {0,0,1,1,7,10,7,10},
103 {0,2,0,2,7,10,7,10}
104};
acd8d10f 105
8140955d
MN
106#define LEVEL_TAB_BITS 8
107static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
108
70abb407 109static void fill_caches(H264Context *h, int mb_type, int for_deblock){
0da71265 110 MpegEncContext * const s = &h->s;
64514ee8 111 const int mb_xy= h->mb_xy;
0da71265
MN
112 int topleft_xy, top_xy, topright_xy, left_xy[2];
113 int topleft_type, top_type, topright_type, left_type[2];
89818988 114 const uint8_t * left_block;
02f7695b 115 int topleft_partition= -1;
0da71265
MN
116 int i;
117
36e097bc
JD
118 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
119
717b1733 120 //FIXME deblocking could skip the intra and nnz parts.
36e097bc 121 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
e2e5894a
LM
122 return;
123
2cab6401
DB
124 /* Wow, what a mess, why didn't they simplify the interlacing & intra
125 * stuff, I can't imagine that these complex rules are worth it. */
115329f1 126
6867a90b
LLL
127 topleft_xy = top_xy - 1;
128 topright_xy= top_xy + 1;
129 left_xy[1] = left_xy[0] = mb_xy-1;
143d7f14 130 left_block = left_block_options[0];
5d18eaad 131 if(FRAME_MBAFF){
6867a90b
LLL
132 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
133 const int top_pair_xy = pair_xy - s->mb_stride;
134 const int topleft_pair_xy = top_pair_xy - 1;
135 const int topright_pair_xy = top_pair_xy + 1;
6f3c50f2
MN
136 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
137 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
138 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
139 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
140 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
6867a90b 141 const int bottom = (s->mb_y & 1);
6f3c50f2 142 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
60c6ba7a 143
6f3c50f2 144 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
6867a90b
LLL
145 top_xy -= s->mb_stride;
146 }
6f3c50f2 147 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
6867a90b 148 topleft_xy -= s->mb_stride;
6f3c50f2 149 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
02f7695b 150 topleft_xy += s->mb_stride;
1412060e 151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
02f7695b 152 topleft_partition = 0;
6867a90b 153 }
6f3c50f2 154 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
6867a90b
LLL
155 topright_xy -= s->mb_stride;
156 }
6f3c50f2 157 if (left_mb_field_flag != curr_mb_field_flag) {
6867a90b 158 left_xy[1] = left_xy[0] = pair_xy - 1;
6f3c50f2
MN
159 if (curr_mb_field_flag) {
160 left_xy[1] += s->mb_stride;
161 left_block = left_block_options[3];
162 } else {
03a035e0 163 left_block= left_block_options[2 - bottom];
6867a90b
LLL
164 }
165 }
0da71265
MN
166 }
167
826de46e
LLL
168 h->top_mb_xy = top_xy;
169 h->left_mb_xy[0] = left_xy[0];
170 h->left_mb_xy[1] = left_xy[1];
6ba71fc4 171 if(for_deblock){
717b1733
LM
172 topleft_type = 0;
173 topright_type = 0;
b735aeea
MN
174 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
175 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
176 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
5d18eaad 177
e248cb60 178 if(MB_MBAFF && !IS_INTRA(mb_type)){
5d18eaad 179 int list;
3425501d 180 for(list=0; list<h->list_count; list++){
e248cb60
MN
181 //These values where changed for ease of performing MC, we need to change them back
182 //FIXME maybe we can make MC and loop filter use the same values or prevent
183 //the MC code from changing ref_cache and rather use a temporary array.
5d18eaad 184 if(USES_LIST(mb_type,list)){
191e8ca7 185 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
5d18eaad 186 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
beca9a28 187 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
188 ref += h->b8_stride;
189 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
beca9a28 190 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
191 }
192 }
193 }
46f2f05f
MN
194 }else{
195 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
196 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
197 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
198 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
199 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
0da71265
MN
200
201 if(IS_INTRA(mb_type)){
faa7e394 202 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
115329f1
DB
203 h->topleft_samples_available=
204 h->top_samples_available=
0da71265
MN
205 h->left_samples_available= 0xFFFF;
206 h->topright_samples_available= 0xEEEA;
207
faa7e394 208 if(!(top_type & type_mask)){
0da71265
MN
209 h->topleft_samples_available= 0xB3FF;
210 h->top_samples_available= 0x33FF;
211 h->topright_samples_available= 0x26EA;
212 }
d1d10e91
MN
213 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
214 if(IS_INTERLACED(mb_type)){
faa7e394 215 if(!(left_type[0] & type_mask)){
d1d10e91
MN
216 h->topleft_samples_available&= 0xDFFF;
217 h->left_samples_available&= 0x5FFF;
218 }
faa7e394 219 if(!(left_type[1] & type_mask)){
d1d10e91
MN
220 h->topleft_samples_available&= 0xFF5F;
221 h->left_samples_available&= 0xFF5F;
222 }
223 }else{
224 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
225 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
226 assert(left_xy[0] == left_xy[1]);
faa7e394 227 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
d1d10e91
MN
228 h->topleft_samples_available&= 0xDF5F;
229 h->left_samples_available&= 0x5F5F;
230 }
231 }
232 }else{
faa7e394 233 if(!(left_type[0] & type_mask)){
0da71265
MN
234 h->topleft_samples_available&= 0xDF5F;
235 h->left_samples_available&= 0x5F5F;
236 }
237 }
115329f1 238
faa7e394 239 if(!(topleft_type & type_mask))
0da71265 240 h->topleft_samples_available&= 0x7FFF;
115329f1 241
faa7e394 242 if(!(topright_type & type_mask))
0da71265 243 h->topright_samples_available&= 0xFBFF;
115329f1 244
0da71265
MN
245 if(IS_INTRA4x4(mb_type)){
246 if(IS_INTRA4x4(top_type)){
247 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
248 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
249 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
250 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
251 }else{
252 int pred;
faa7e394 253 if(!(top_type & type_mask))
0da71265 254 pred= -1;
6fbcaaa0
LLL
255 else{
256 pred= 2;
0da71265
MN
257 }
258 h->intra4x4_pred_mode_cache[4+8*0]=
259 h->intra4x4_pred_mode_cache[5+8*0]=
260 h->intra4x4_pred_mode_cache[6+8*0]=
261 h->intra4x4_pred_mode_cache[7+8*0]= pred;
262 }
263 for(i=0; i<2; i++){
264 if(IS_INTRA4x4(left_type[i])){
265 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
266 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
267 }else{
268 int pred;
faa7e394 269 if(!(left_type[i] & type_mask))
0da71265 270 pred= -1;
6fbcaaa0
LLL
271 else{
272 pred= 2;
0da71265
MN
273 }
274 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
275 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
276 }
277 }
278 }
279 }
29671011 280 }
115329f1
DB
281
282
0da71265 283/*
115329f1
DB
2840 . T T. T T T T
2851 L . .L . . . .
2862 L . .L . . . .
2873 . T TL . . . .
2884 L . .L . . . .
2895 L . .. . . . .
0da71265 290*/
1412060e 291//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
0da71265 292 if(top_type){
6867a90b
LLL
293 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
294 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
295 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
53c05b1e 296 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
115329f1 297
6867a90b 298 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
53c05b1e 299 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
115329f1 300
6867a90b 301 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
53c05b1e 302 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
115329f1 303
0da71265 304 }else{
115329f1 305 h->non_zero_count_cache[4+8*0]=
0da71265
MN
306 h->non_zero_count_cache[5+8*0]=
307 h->non_zero_count_cache[6+8*0]=
308 h->non_zero_count_cache[7+8*0]=
115329f1 309
0da71265
MN
310 h->non_zero_count_cache[1+8*0]=
311 h->non_zero_count_cache[2+8*0]=
115329f1 312
0da71265 313 h->non_zero_count_cache[1+8*3]=
3981c385 314 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
115329f1 315
0da71265 316 }
826de46e 317
6867a90b
LLL
318 for (i=0; i<2; i++) {
319 if(left_type[i]){
320 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
321 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
322 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
323 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
6867a90b 324 }else{
115329f1
DB
325 h->non_zero_count_cache[3+8*1 + 2*8*i]=
326 h->non_zero_count_cache[3+8*2 + 2*8*i]=
327 h->non_zero_count_cache[0+8*1 + 8*i]=
6867a90b 328 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
826de46e
LLL
329 }
330 }
331
332 if( h->pps.cabac ) {
333 // top_cbp
334 if(top_type) {
335 h->top_cbp = h->cbp_table[top_xy];
336 } else if(IS_INTRA(mb_type)) {
337 h->top_cbp = 0x1C0;
338 } else {
339 h->top_cbp = 0;
340 }
341 // left_cbp
342 if (left_type[0]) {
343 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
344 } else if(IS_INTRA(mb_type)) {
345 h->left_cbp = 0x1C0;
346 } else {
347 h->left_cbp = 0;
348 }
349 if (left_type[0]) {
350 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
351 }
352 if (left_type[1]) {
353 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
6867a90b 354 }
0da71265 355 }
6867a90b 356
0da71265 357#if 1
e2e5894a 358 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
0da71265 359 int list;
3425501d 360 for(list=0; list<h->list_count; list++){
e2e5894a 361 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
0da71265
MN
362 /*if(!h->mv_cache_clean[list]){
363 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
364 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
365 h->mv_cache_clean[list]= 1;
366 }*/
5ad984c9 367 continue;
0da71265
MN
368 }
369 h->mv_cache_clean[list]= 0;
115329f1 370
53b19144 371 if(USES_LIST(top_type, list)){
0da71265
MN
372 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
373 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
378 h->ref_cache[list][scan8[0] + 0 - 1*8]=
379 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
380 h->ref_cache[list][scan8[0] + 2 - 1*8]=
381 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
382 }else{
115329f1
DB
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
0da71265
MN
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
387 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
388 }
389
4672503d
LM
390 for(i=0; i<2; i++){
391 int cache_idx = scan8[0] - 1 + i*2*8;
392 if(USES_LIST(left_type[i], list)){
393 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
394 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
395 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
396 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
397 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
398 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
399 }else{
400 *(uint32_t*)h->mv_cache [list][cache_idx ]=
401 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
402 h->ref_cache[list][cache_idx ]=
403 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
404 }
0da71265
MN
405 }
406
0281d325 407 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
46f2f05f
MN
408 continue;
409
53b19144 410 if(USES_LIST(topleft_type, list)){
02f7695b
LM
411 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
412 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
e2e5894a
LM
413 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
414 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
415 }else{
416 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
417 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
418 }
115329f1 419
53b19144 420 if(USES_LIST(topright_type, list)){
e2e5894a
LM
421 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
422 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
423 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
424 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
425 }else{
426 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
427 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
428 }
e2e5894a 429
ae08a563 430 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
717b1733 431 continue;
115329f1
DB
432
433 h->ref_cache[list][scan8[5 ]+1] =
434 h->ref_cache[list][scan8[7 ]+1] =
3b66c4c5 435 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
115329f1 436 h->ref_cache[list][scan8[4 ]] =
0da71265
MN
437 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
438 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
439 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
3b66c4c5 440 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
0da71265
MN
441 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
442 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
9e528114
LA
443
444 if( h->pps.cabac ) {
445 /* XXX beurk, Load mvd */
53b19144 446 if(USES_LIST(top_type, list)){
9e528114
LA
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
452 }else{
115329f1
DB
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
9e528114
LA
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
457 }
53b19144 458 if(USES_LIST(left_type[0], list)){
9e528114
LA
459 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
460 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
462 }else{
463 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
465 }
53b19144 466 if(USES_LIST(left_type[1], list)){
9e528114
LA
467 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
470 }else{
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
473 }
474 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
475 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
3b66c4c5 476 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
9e528114
LA
477 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
478 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
5ad984c9 479
9f5c1037 480 if(h->slice_type_nos == FF_B_TYPE){
5ad984c9
LM
481 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
482
483 if(IS_DIRECT(top_type)){
484 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
485 }else if(IS_8X8(top_type)){
486 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
487 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
488 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
489 }else{
490 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
491 }
115329f1 492
5d18eaad
LM
493 if(IS_DIRECT(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
495 else if(IS_8X8(left_type[0]))
496 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
497 else
498 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
499
500 if(IS_DIRECT(left_type[1]))
5ad984c9 501 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
5d18eaad
LM
502 else if(IS_8X8(left_type[1]))
503 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
504 else
5ad984c9 505 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
5d18eaad
LM
506 }
507 }
508
509 if(FRAME_MBAFF){
510#define MAP_MVS\
511 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
512 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
517 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
518 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
520 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
521 if(MB_FIELD){
522#define MAP_F2F(idx, mb_type)\
523 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
524 h->ref_cache[list][idx] <<= 1;\
525 h->mv_cache[list][idx][1] /= 2;\
526 h->mvd_cache[list][idx][1] /= 2;\
527 }
528 MAP_MVS
529#undef MAP_F2F
530 }else{
531#define MAP_F2F(idx, mb_type)\
532 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
533 h->ref_cache[list][idx] >>= 1;\
534 h->mv_cache[list][idx][1] <<= 1;\
535 h->mvd_cache[list][idx][1] <<= 1;\
5ad984c9 536 }
5d18eaad
LM
537 MAP_MVS
538#undef MAP_F2F
5ad984c9 539 }
9e528114 540 }
0da71265 541 }
0da71265
MN
542 }
543#endif
43efd19a
LM
544
545 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
0da71265
MN
546}
547
548static inline void write_back_intra_pred_mode(H264Context *h){
64514ee8 549 const int mb_xy= h->mb_xy;
0da71265
MN
550
551 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
552 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
553 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
554 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
555 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
556 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
557 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
558}
559
560/**
561 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
562 */
563static inline int check_intra4x4_pred_mode(H264Context *h){
564 MpegEncContext * const s = &h->s;
565 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
566 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
567 int i;
115329f1 568
0da71265
MN
569 if(!(h->top_samples_available&0x8000)){
570 for(i=0; i<4; i++){
571 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
572 if(status<0){
9b879566 573 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
574 return -1;
575 } else if(status){
576 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
577 }
578 }
579 }
115329f1 580
d1d10e91
MN
581 if((h->left_samples_available&0x8888)!=0x8888){
582 static const int mask[4]={0x8000,0x2000,0x80,0x20};
0da71265 583 for(i=0; i<4; i++){
d1d10e91 584 if(!(h->left_samples_available&mask[i])){
26695973
MN
585 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
586 if(status<0){
587 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
588 return -1;
589 } else if(status){
590 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
591 }
d1d10e91 592 }
0da71265
MN
593 }
594 }
595
596 return 0;
597} //FIXME cleanup like next
598
599/**
600 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
601 */
602static inline int check_intra_pred_mode(H264Context *h, int mode){
603 MpegEncContext * const s = &h->s;
604 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
605 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 606
43ff0714 607 if(mode > 6U) {
5175b937 608 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 609 return -1;
5175b937 610 }
115329f1 611
0da71265
MN
612 if(!(h->top_samples_available&0x8000)){
613 mode= top[ mode ];
614 if(mode<0){
9b879566 615 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
616 return -1;
617 }
618 }
115329f1 619
d1d10e91 620 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 621 mode= left[ mode ];
d1d10e91
MN
622 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
623 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
624 }
0da71265 625 if(mode<0){
9b879566 626 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 627 return -1;
115329f1 628 }
0da71265
MN
629 }
630
631 return mode;
632}
633
634/**
635 * gets the predicted intra4x4 prediction mode.
636 */
637static inline int pred_intra_mode(H264Context *h, int n){
638 const int index8= scan8[n];
639 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
640 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
641 const int min= FFMIN(left, top);
642
a9c9a240 643 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
0da71265
MN
644
645 if(min<0) return DC_PRED;
646 else return min;
647}
648
649static inline void write_back_non_zero_count(H264Context *h){
64514ee8 650 const int mb_xy= h->mb_xy;
0da71265 651
6867a90b
LLL
652 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
653 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
654 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
53c05b1e 655 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
6867a90b
LLL
656 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
657 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
658 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
115329f1 659
6867a90b 660 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
53c05b1e 661 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
6867a90b 662 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
53c05b1e 663
6867a90b 664 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
53c05b1e 665 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
6867a90b 666 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
0da71265
MN
667}
668
669/**
1412060e 670 * gets the predicted number of non-zero coefficients.
0da71265
MN
671 * @param n block index
672 */
673static inline int pred_non_zero_count(H264Context *h, int n){
674 const int index8= scan8[n];
675 const int left= h->non_zero_count_cache[index8 - 1];
676 const int top = h->non_zero_count_cache[index8 - 8];
677 int i= left + top;
115329f1 678
0da71265
MN
679 if(i<64) i= (i+1)>>1;
680
a9c9a240 681 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
0da71265
MN
682
683 return i&31;
684}
685
1924f3ce
MN
686static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
687 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
a9c9a240 688 MpegEncContext *s = &h->s;
1924f3ce 689
5d18eaad
LM
690 /* there is no consistent mapping of mvs to neighboring locations that will
691 * make mbaff happy, so we can't move all this logic to fill_caches */
692 if(FRAME_MBAFF){
191e8ca7 693 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
5d18eaad
LM
694 const int16_t *mv;
695 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
696 *C = h->mv_cache[list][scan8[0]-2];
697
698 if(!MB_FIELD
699 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
700 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
701 if(IS_INTERLACED(mb_types[topright_xy])){
702#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
703 const int x4 = X4, y4 = Y4;\
704 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
02f7695b 705 if(!USES_LIST(mb_type,list))\
5d18eaad
LM
706 return LIST_NOT_USED;\
707 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
708 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
709 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
710 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
711
712 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
713 }
714 }
715 if(topright_ref == PART_NOT_AVAILABLE
716 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
717 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
718 if(!MB_FIELD
719 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
720 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
721 }
722 if(MB_FIELD
723 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
724 && i >= scan8[0]+8){
1412060e 725 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
02f7695b 726 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
5d18eaad
LM
727 }
728 }
729#undef SET_DIAG_MV
730 }
731
1924f3ce
MN
732 if(topright_ref != PART_NOT_AVAILABLE){
733 *C= h->mv_cache[list][ i - 8 + part_width ];
734 return topright_ref;
735 }else{
a9c9a240 736 tprintf(s->avctx, "topright MV not available\n");
95c26348 737
1924f3ce
MN
738 *C= h->mv_cache[list][ i - 8 - 1 ];
739 return h->ref_cache[list][ i - 8 - 1 ];
740 }
741}
742
0da71265
MN
743/**
744 * gets the predicted MV.
745 * @param n the block index
746 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
747 * @param mx the x component of the predicted motion vector
748 * @param my the y component of the predicted motion vector
749 */
750static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
0da71265
MN
751 const int index8= scan8[n];
752 const int top_ref= h->ref_cache[list][ index8 - 8 ];
0da71265
MN
753 const int left_ref= h->ref_cache[list][ index8 - 1 ];
754 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
755 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1924f3ce
MN
756 const int16_t * C;
757 int diagonal_ref, match_count;
758
0da71265 759 assert(part_width==1 || part_width==2 || part_width==4);
1924f3ce 760
0da71265 761/* mv_cache
115329f1 762 B . . A T T T T
0da71265
MN
763 U . . L . . , .
764 U . . L . . . .
765 U . . L . . , .
766 . . . L . . . .
767*/
1924f3ce
MN
768
769 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
770 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
a9c9a240 771 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1924f3ce
MN
772 if(match_count > 1){ //most common
773 *mx= mid_pred(A[0], B[0], C[0]);
774 *my= mid_pred(A[1], B[1], C[1]);
775 }else if(match_count==1){
776 if(left_ref==ref){
777 *mx= A[0];
115329f1 778 *my= A[1];
1924f3ce
MN
779 }else if(top_ref==ref){
780 *mx= B[0];
115329f1 781 *my= B[1];
0da71265 782 }else{
1924f3ce 783 *mx= C[0];
115329f1 784 *my= C[1];
0da71265
MN
785 }
786 }else{
1924f3ce 787 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
0da71265 788 *mx= A[0];
115329f1 789 *my= A[1];
0da71265 790 }else{
1924f3ce
MN
791 *mx= mid_pred(A[0], B[0], C[0]);
792 *my= mid_pred(A[1], B[1], C[1]);
0da71265 793 }
0da71265 794 }
115329f1 795
a9c9a240 796 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
797}
798
799/**
800 * gets the directionally predicted 16x8 MV.
801 * @param n the block index
802 * @param mx the x component of the predicted motion vector
803 * @param my the y component of the predicted motion vector
804 */
805static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
806 if(n==0){
807 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
808 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
809
a9c9a240 810 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
115329f1 811
0da71265
MN
812 if(top_ref == ref){
813 *mx= B[0];
814 *my= B[1];
815 return;
816 }
817 }else{
818 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
819 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
115329f1 820
a9c9a240 821 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
822
823 if(left_ref == ref){
824 *mx= A[0];
825 *my= A[1];
826 return;
827 }
828 }
829
830 //RARE
831 pred_motion(h, n, 4, list, ref, mx, my);
832}
833
834/**
835 * gets the directionally predicted 8x16 MV.
836 * @param n the block index
837 * @param mx the x component of the predicted motion vector
838 * @param my the y component of the predicted motion vector
839 */
840static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
841 if(n==0){
842 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
843 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
115329f1 844
a9c9a240 845 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
846
847 if(left_ref == ref){
848 *mx= A[0];
849 *my= A[1];
850 return;
851 }
852 }else{
1924f3ce
MN
853 const int16_t * C;
854 int diagonal_ref;
855
856 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
115329f1 857
a9c9a240 858 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265 859
115329f1 860 if(diagonal_ref == ref){
0da71265
MN
861 *mx= C[0];
862 *my= C[1];
863 return;
864 }
0da71265
MN
865 }
866
867 //RARE
868 pred_motion(h, n, 2, list, ref, mx, my);
869}
870
871static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
0da71265
MN
872 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
873 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
874
a9c9a240 875 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
0da71265
MN
876
877 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
62ea19c0
MN
878 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
879 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
115329f1 880
0da71265
MN
881 *mx = *my = 0;
882 return;
883 }
115329f1 884
0da71265
MN
885 pred_motion(h, 0, 4, 0, 0, mx, my);
886
887 return;
888}
889
8b1fd554
MN
890static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
891 int poc0 = h->ref_list[0][i].poc;
892 int td = av_clip(poc1 - poc0, -128, 127);
893 if(td == 0 || h->ref_list[0][i].long_ref){
894 return 256;
895 }else{
896 int tb = av_clip(poc - poc0, -128, 127);
897 int tx = (16384 + (FFABS(td) >> 1)) / td;
898 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
899 }
900}
901
5ad984c9 902static inline void direct_dist_scale_factor(H264Context * const h){
2879c75f
MN
903 MpegEncContext * const s = &h->s;
904 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
5ad984c9 905 const int poc1 = h->ref_list[1][0].poc;
8b1fd554
MN
906 int i, field;
907 for(field=0; field<2; field++){
908 const int poc = h->s.current_picture_ptr->field_poc[field];
909 const int poc1 = h->ref_list[1][0].field_poc[field];
910 for(i=0; i < 2*h->ref_count[0]; i++)
911 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
5ad984c9 912 }
8b1fd554
MN
913
914 for(i=0; i<h->ref_count[0]; i++){
915 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
5d18eaad 916 }
5ad984c9 917}
f4d3382d
MN
918
919static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
920 MpegEncContext * const s = &h->s;
921 Picture * const ref1 = &h->ref_list[1][0];
922 int j, old_ref, rfield;
923 int start= mbafi ? 16 : 0;
924 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
925 int interl= mbafi || s->picture_structure != PICT_FRAME;
926
927 /* bogus; fills in for missing frames */
928 memset(map[list], 0, sizeof(map[list]));
929
930 for(rfield=0; rfield<2; rfield++){
931 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
932 int poc = ref1->ref_poc[colfield][list][old_ref];
933
934 if (!interl)
935 poc |= 3;
936 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
937 poc= (poc&~3) + rfield + 1;
938
939 for(j=start; j<end; j++){
940 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
941 int cur_ref= mbafi ? (j-16)^field : j;
942 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
943 if(rfield == field)
944 map[list][old_ref] = cur_ref;
945 break;
946 }
947 }
948 }
949 }
950}
951
2f944356
LM
952static inline void direct_ref_list_init(H264Context * const h){
953 MpegEncContext * const s = &h->s;
954 Picture * const ref1 = &h->ref_list[1][0];
955 Picture * const cur = s->current_picture_ptr;
bbc78fb4 956 int list, j, field;
f4d3382d
MN
957 int sidx= (s->picture_structure&1)^1;
958 int ref1sidx= (ref1->reference&1)^1;
aa617518 959
2f944356 960 for(list=0; list<2; list++){
2879c75f 961 cur->ref_count[sidx][list] = h->ref_count[list];
2f944356 962 for(j=0; j<h->ref_count[list]; j++)
42de393d 963 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
2f944356 964 }
aa617518 965
7762cc3d 966 if(s->picture_structure == PICT_FRAME){
f4d3382d
MN
967 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
968 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
7762cc3d 969 }
aa617518 970
48e025e5 971 cur->mbaff= FRAME_MBAFF;
aa617518 972
9701840b 973 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
2f944356 974 return;
aa617518 975
2f944356 976 for(list=0; list<2; list++){
f4d3382d
MN
977 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
978 for(field=0; field<2; field++)
979 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
2f944356
LM
980 }
981}
5ad984c9
LM
982
983static inline void pred_direct_motion(H264Context * const h, int *mb_type){
984 MpegEncContext * const s = &h->s;
d00eac6c
MN
985 int b8_stride = h->b8_stride;
986 int b4_stride = h->b_stride;
987 int mb_xy = h->mb_xy;
988 int mb_type_col[2];
989 const int16_t (*l1mv0)[2], (*l1mv1)[2];
990 const int8_t *l1ref0, *l1ref1;
5ad984c9 991 const int is_b8x8 = IS_8X8(*mb_type);
88e7a4d1 992 unsigned int sub_mb_type;
5ad984c9
LM
993 int i8, i4;
994
5d18eaad 995#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
d00eac6c
MN
996
997 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
53c193a9 998 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
471341a7
MN
999 int cur_poc = s->current_picture_ptr->poc;
1000 int *col_poc = h->ref_list[1]->field_poc;
1001 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1002 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1003 b8_stride = 0;
60c9b24d 1004 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
d00eac6c
MN
1005 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1006 mb_xy += s->mb_stride*fieldoff;
1007 }
1008 goto single_col;
1009 }else{ // AFL/AFR/FR/FL -> AFR/FR
1010 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1011 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1012 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1013 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1014 b8_stride *= 3;
1015 b4_stride *= 6;
1016 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1017 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1018 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1019 && !is_b8x8){
1020 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1021 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1022 }else{
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1025 }
1026 }else{ // AFR/FR -> AFR/FR
1027single_col:
1028 mb_type_col[0] =
1029 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
cc615d2c
MN
1030 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1031 /* FIXME save sub mb types from previous frames (or derive from MVs)
1032 * so we know exactly what block size to use */
1033 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1034 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1035 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1036 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1037 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1038 }else{
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1041 }
d00eac6c 1042 }
5ad984c9 1043 }
5ad984c9 1044
7d54ecc9
MN
1045 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1046 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1047 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1048 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
9b5fab91
MN
1049 if(!b8_stride){
1050 if(s->mb_y&1){
1051 l1ref0 += h->b8_stride;
1052 l1ref1 += h->b8_stride;
1053 l1mv0 += 2*b4_stride;
1054 l1mv1 += 2*b4_stride;
1055 }
d00eac6c 1056 }
115329f1 1057
5ad984c9
LM
1058 if(h->direct_spatial_mv_pred){
1059 int ref[2];
1060 int mv[2][2];
1061 int list;
1062
5d18eaad
LM
1063 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1064
5ad984c9
LM
1065 /* ref = min(neighbors) */
1066 for(list=0; list<2; list++){
1067 int refa = h->ref_cache[list][scan8[0] - 1];
1068 int refb = h->ref_cache[list][scan8[0] - 8];
1069 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
9bec77fe 1070 if(refc == PART_NOT_AVAILABLE)
5ad984c9 1071 refc = h->ref_cache[list][scan8[0] - 8 - 1];
29d05ebc 1072 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
5ad984c9
LM
1073 if(ref[list] < 0)
1074 ref[list] = -1;
1075 }
1076
1077 if(ref[0] < 0 && ref[1] < 0){
1078 ref[0] = ref[1] = 0;
1079 mv[0][0] = mv[0][1] =
1080 mv[1][0] = mv[1][1] = 0;
1081 }else{
1082 for(list=0; list<2; list++){
1083 if(ref[list] >= 0)
1084 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1085 else
1086 mv[list][0] = mv[list][1] = 0;
1087 }
1088 }
1089
1090 if(ref[1] < 0){
50b3ab0f
LM
1091 if(!is_b8x8)
1092 *mb_type &= ~MB_TYPE_L1;
1093 sub_mb_type &= ~MB_TYPE_L1;
5ad984c9 1094 }else if(ref[0] < 0){
50b3ab0f
LM
1095 if(!is_b8x8)
1096 *mb_type &= ~MB_TYPE_L0;
1097 sub_mb_type &= ~MB_TYPE_L0;
5ad984c9
LM
1098 }
1099
d00eac6c 1100 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
50b3ab0f
LM
1101 for(i8=0; i8<4; i8++){
1102 int x8 = i8&1;
1103 int y8 = i8>>1;
1104 int xy8 = x8+y8*b8_stride;
1105 int xy4 = 3*x8+y8*b4_stride;
1106 int a=0, b=0;
1107
1108 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1109 continue;
1110 h->sub_mb_type[i8] = sub_mb_type;
1111
1112 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1113 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
d00eac6c 1114 if(!IS_INTRA(mb_type_col[y8])
50b3ab0f
LM
1115 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1116 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1117 if(ref[0] > 0)
1118 a= pack16to32(mv[0][0],mv[0][1]);
1119 if(ref[1] > 0)
1120 b= pack16to32(mv[1][0],mv[1][1]);
1121 }else{
1122 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1124 }
1125 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1126 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1127 }
1128 }else if(IS_16X16(*mb_type)){
d19f5acb
MN
1129 int a=0, b=0;
1130
cec93959
LM
1131 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1132 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
d00eac6c 1133 if(!IS_INTRA(mb_type_col[0])
c26abfa5
DB
1134 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1135 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
bf4e3bd2 1136 && (h->x264_build>33 || !h->x264_build)))){
5ad984c9 1137 if(ref[0] > 0)
d19f5acb 1138 a= pack16to32(mv[0][0],mv[0][1]);
5ad984c9 1139 if(ref[1] > 0)
d19f5acb 1140 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1141 }else{
d19f5acb
MN
1142 a= pack16to32(mv[0][0],mv[0][1]);
1143 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1144 }
d19f5acb
MN
1145 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1146 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
5ad984c9
LM
1147 }else{
1148 for(i8=0; i8<4; i8++){
1149 const int x8 = i8&1;
1150 const int y8 = i8>>1;
115329f1 1151
5ad984c9
LM
1152 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1153 continue;
1154 h->sub_mb_type[i8] = sub_mb_type;
115329f1 1155
5ad984c9
LM
1156 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
cec93959
LM
1158 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1159 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
115329f1 1160
5ad984c9 1161 /* col_zero_flag */
2ccd25d0
MN
1162 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1163 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
bf4e3bd2 1164 && (h->x264_build>33 || !h->x264_build)))){
2ccd25d0 1165 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
f1f17e54 1166 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1167 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
c26abfa5 1168 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
f1f17e54
LM
1169 if(ref[0] == 0)
1170 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1171 if(ref[1] == 0)
1172 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1173 }
1174 }else
5ad984c9 1175 for(i4=0; i4<4; i4++){
2ccd25d0 1176 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
c26abfa5 1177 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
5ad984c9
LM
1178 if(ref[0] == 0)
1179 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1180 if(ref[1] == 0)
1181 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1182 }
1183 }
1184 }
1185 }
1186 }
1187 }else{ /* direct temporal mv pred */
5d18eaad
LM
1188 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1189 const int *dist_scale_factor = h->dist_scale_factor;
f4d3382d 1190 int ref_offset= 0;
5d18eaad 1191
cc615d2c 1192 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
8b1fd554
MN
1193 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1194 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1195 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
cc615d2c 1196 }
48e025e5 1197 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
f4d3382d 1198 ref_offset += 16;
48e025e5 1199
cc615d2c
MN
1200 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1201 /* FIXME assumes direct_8x8_inference == 1 */
c210fa61 1202 int y_shift = 2*!IS_INTERLACED(*mb_type);
5d18eaad 1203
cc615d2c
MN
1204 for(i8=0; i8<4; i8++){
1205 const int x8 = i8&1;
1206 const int y8 = i8>>1;
1207 int ref0, scale;
1208 const int16_t (*l1mv)[2]= l1mv0;
5d18eaad 1209
cc615d2c
MN
1210 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1211 continue;
1212 h->sub_mb_type[i8] = sub_mb_type;
1213
1214 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1215 if(IS_INTRA(mb_type_col[y8])){
1216 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1217 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1218 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1219 continue;
1220 }
1221
1222 ref0 = l1ref0[x8 + y8*b8_stride];
1223 if(ref0 >= 0)
f4d3382d 1224 ref0 = map_col_to_list0[0][ref0 + ref_offset];
cc615d2c 1225 else{
f4d3382d 1226 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
cc615d2c
MN
1227 l1mv= l1mv1;
1228 }
1229 scale = dist_scale_factor[ref0];
1230 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1231
1232 {
1233 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1234 int my_col = (mv_col[1]<<y_shift)/2;
1235 int mx = (scale * mv_col[0] + 128) >> 8;
1236 int my = (scale * my_col + 128) >> 8;
1237 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1238 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
5d18eaad 1239 }
5d18eaad 1240 }
cc615d2c
MN
1241 return;
1242 }
5d18eaad
LM
1243
1244 /* one-to-one mv scaling */
1245
5ad984c9 1246 if(IS_16X16(*mb_type)){
fda51641
MN
1247 int ref, mv0, mv1;
1248
5ad984c9 1249 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
d00eac6c 1250 if(IS_INTRA(mb_type_col[0])){
fda51641 1251 ref=mv0=mv1=0;
5ad984c9 1252 }else{
f4d3382d
MN
1253 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1254 : map_col_to_list0[1][l1ref1[0] + ref_offset];
5d18eaad 1255 const int scale = dist_scale_factor[ref0];
8583bef8 1256 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
5ad984c9 1257 int mv_l0[2];
5d18eaad
LM
1258 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1259 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
fda51641
MN
1260 ref= ref0;
1261 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1262 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
5ad984c9 1263 }
fda51641
MN
1264 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1265 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1266 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
5ad984c9
LM
1267 }else{
1268 for(i8=0; i8<4; i8++){
1269 const int x8 = i8&1;
1270 const int y8 = i8>>1;
5d18eaad 1271 int ref0, scale;
bf4e3bd2 1272 const int16_t (*l1mv)[2]= l1mv0;
8583bef8 1273
5ad984c9
LM
1274 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1275 continue;
1276 h->sub_mb_type[i8] = sub_mb_type;
5d18eaad 1277 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
d00eac6c 1278 if(IS_INTRA(mb_type_col[0])){
5ad984c9 1279 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1280 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1281 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1282 continue;
1283 }
115329f1 1284
f4d3382d 1285 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
2f944356 1286 if(ref0 >= 0)
5d18eaad 1287 ref0 = map_col_to_list0[0][ref0];
8583bef8 1288 else{
f4d3382d 1289 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
8583bef8
MN
1290 l1mv= l1mv1;
1291 }
5d18eaad 1292 scale = dist_scale_factor[ref0];
115329f1 1293
5ad984c9 1294 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
f1f17e54 1295 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1296 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
5d18eaad
LM
1297 int mx = (scale * mv_col[0] + 128) >> 8;
1298 int my = (scale * mv_col[1] + 128) >> 8;
f1f17e54
LM
1299 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1301 }else
5ad984c9 1302 for(i4=0; i4<4; i4++){
2ccd25d0 1303 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
5ad984c9 1304 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
5d18eaad
LM
1305 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1306 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
5ad984c9
LM
1307 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1308 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1309 }
1310 }
1311 }
1312 }
1313}
1314
0da71265
MN
1315static inline void write_back_motion(H264Context *h, int mb_type){
1316 MpegEncContext * const s = &h->s;
0da71265
MN
1317 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1318 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1319 int list;
1320
2ea39252
LM
1321 if(!USES_LIST(mb_type, 0))
1322 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1323
3425501d 1324 for(list=0; list<h->list_count; list++){
0da71265 1325 int y;
53b19144 1326 if(!USES_LIST(mb_type, list))
5ad984c9 1327 continue;
115329f1 1328
0da71265
MN
1329 for(y=0; y<4; y++){
1330 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1332 }
9e528114 1333 if( h->pps.cabac ) {
e6e77eb6
LM
1334 if(IS_SKIP(mb_type))
1335 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1336 else
9e528114
LA
1337 for(y=0; y<4; y++){
1338 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1339 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1340 }
1341 }
53b19144
LM
1342
1343 {
191e8ca7 1344 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
53b19144
LM
1345 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1346 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1347 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1348 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
0da71265
MN
1349 }
1350 }
115329f1 1351
9f5c1037 1352 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
5ad984c9 1353 if(IS_8X8(mb_type)){
53b19144
LM
1354 uint8_t *direct_table = &h->direct_table[b8_xy];
1355 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1356 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1357 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
5ad984c9
LM
1358 }
1359 }
0da71265
MN
1360}
1361
1362/**
1363 * Decodes a network abstraction layer unit.
1364 * @param consumed is the number of bytes used as input
1365 * @param length is the length of the array
3b66c4c5 1366 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
115329f1 1367 * @returns decoded bytes, might be src+1 if no escapes
0da71265 1368 */
30317501 1369static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
1370 int i, si, di;
1371 uint8_t *dst;
24456882 1372 int bufidx;
0da71265 1373
bb270c08 1374// src[0]&0x80; //forbidden bit
0da71265
MN
1375 h->nal_ref_idc= src[0]>>5;
1376 h->nal_unit_type= src[0]&0x1F;
1377
1378 src++; length--;
115329f1 1379#if 0
0da71265
MN
1380 for(i=0; i<length; i++)
1381 printf("%2X ", src[i]);
1382#endif
e08715d3 1383
b250f9c6
AJ
1384#if HAVE_FAST_UNALIGNED
1385# if HAVE_FAST_64BIT
e08715d3
MN
1386# define RS 7
1387 for(i=0; i+1<length; i+=9){
1388 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1389# else
1390# define RS 3
1391 for(i=0; i+1<length; i+=5){
1392 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1393# endif
1394 continue;
1395 if(i>0 && !src[i]) i--;
1396 while(src[i]) i++;
1397#else
1398# define RS 0
0da71265
MN
1399 for(i=0; i+1<length; i+=2){
1400 if(src[i]) continue;
1401 if(i>0 && src[i-1]==0) i--;
e08715d3 1402#endif
0da71265
MN
1403 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1404 if(src[i+2]!=3){
1405 /* startcode, so we must be past the end */
1406 length=i;
1407 }
1408 break;
1409 }
abb27cfb 1410 i-= RS;
0da71265
MN
1411 }
1412
1413 if(i>=length-1){ //no escaped 0
1414 *dst_length= length;
1415 *consumed= length+1; //+1 for the header
115329f1 1416 return src;
0da71265
MN
1417 }
1418
24456882 1419 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
d4369630 1420 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 1421 dst= h->rbsp_buffer[bufidx];
0da71265 1422
ac658be5
FOL
1423 if (dst == NULL){
1424 return NULL;
1425 }
1426
3b66c4c5 1427//printf("decoding esc\n");
593af7cd
MN
1428 memcpy(dst, src, i);
1429 si=di=i;
1430 while(si+2<length){
0da71265 1431 //remove escapes (very rare 1:2^22)
593af7cd
MN
1432 if(src[si+2]>3){
1433 dst[di++]= src[si++];
1434 dst[di++]= src[si++];
1435 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
1436 if(src[si+2]==3){ //escape
1437 dst[di++]= 0;
1438 dst[di++]= 0;
1439 si+=3;
c8470cc1 1440 continue;
0da71265 1441 }else //next start code
593af7cd 1442 goto nsc;
0da71265
MN
1443 }
1444
1445 dst[di++]= src[si++];
1446 }
593af7cd
MN
1447 while(si<length)
1448 dst[di++]= src[si++];
1449nsc:
0da71265 1450
d4369630
AS
1451 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1452
0da71265
MN
1453 *dst_length= di;
1454 *consumed= si + 1;//+1 for the header
90b5b51e 1455//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
1456 return dst;
1457}
1458
0da71265
MN
1459/**
1460 * identifies the exact end of the bitstream
1461 * @return the length of the trailing, or 0 if damaged
1462 */
30317501 1463static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
1464 int v= *src;
1465 int r;
1466
a9c9a240 1467 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
1468
1469 for(r=1; r<9; r++){
1470 if(v&1) return r;
1471 v>>=1;
1472 }
1473 return 0;
1474}
1475
1476/**
1412060e 1477 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
1478 * @param qp quantization parameter
1479 */
239ea04c 1480static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1481#define stride 16
1482 int i;
1483 int temp[16]; //FIXME check if this is a good idea
1484 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1485 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1486
1487//memset(block, 64, 2*256);
1488//return;
1489 for(i=0; i<4; i++){
1490 const int offset= y_offset[i];
1491 const int z0= block[offset+stride*0] + block[offset+stride*4];
1492 const int z1= block[offset+stride*0] - block[offset+stride*4];
1493 const int z2= block[offset+stride*1] - block[offset+stride*5];
1494 const int z3= block[offset+stride*1] + block[offset+stride*5];
1495
1496 temp[4*i+0]= z0+z3;
1497 temp[4*i+1]= z1+z2;
1498 temp[4*i+2]= z1-z2;
1499 temp[4*i+3]= z0-z3;
1500 }
1501
1502 for(i=0; i<4; i++){
1503 const int offset= x_offset[i];
1504 const int z0= temp[4*0+i] + temp[4*2+i];
1505 const int z1= temp[4*0+i] - temp[4*2+i];
1506 const int z2= temp[4*1+i] - temp[4*3+i];
1507 const int z3= temp[4*1+i] + temp[4*3+i];
1508
1412060e 1509 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
1510 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1511 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1512 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
1513 }
1514}
1515
e5017ab8 1516#if 0
0da71265 1517/**
1412060e 1518 * DCT transforms the 16 dc values.
0da71265
MN
1519 * @param qp quantization parameter ??? FIXME
1520 */
1521static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1522// const int qmul= dequant_coeff[qp][0];
1523 int i;
1524 int temp[16]; //FIXME check if this is a good idea
1525 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1526 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1527
1528 for(i=0; i<4; i++){
1529 const int offset= y_offset[i];
1530 const int z0= block[offset+stride*0] + block[offset+stride*4];
1531 const int z1= block[offset+stride*0] - block[offset+stride*4];
1532 const int z2= block[offset+stride*1] - block[offset+stride*5];
1533 const int z3= block[offset+stride*1] + block[offset+stride*5];
1534
1535 temp[4*i+0]= z0+z3;
1536 temp[4*i+1]= z1+z2;
1537 temp[4*i+2]= z1-z2;
1538 temp[4*i+3]= z0-z3;
1539 }
1540
1541 for(i=0; i<4; i++){
1542 const int offset= x_offset[i];
1543 const int z0= temp[4*0+i] + temp[4*2+i];
1544 const int z1= temp[4*0+i] - temp[4*2+i];
1545 const int z2= temp[4*1+i] - temp[4*3+i];
1546 const int z3= temp[4*1+i] + temp[4*3+i];
1547
1548 block[stride*0 +offset]= (z0 + z3)>>1;
1549 block[stride*2 +offset]= (z1 + z2)>>1;
1550 block[stride*8 +offset]= (z1 - z2)>>1;
1551 block[stride*10+offset]= (z0 - z3)>>1;
1552 }
1553}
e5017ab8
LA
1554#endif
1555
0da71265
MN
1556#undef xStride
1557#undef stride
1558
239ea04c 1559static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1560 const int stride= 16*2;
1561 const int xStride= 16;
1562 int a,b,c,d,e;
1563
1564 a= block[stride*0 + xStride*0];
1565 b= block[stride*0 + xStride*1];
1566 c= block[stride*1 + xStride*0];
1567 d= block[stride*1 + xStride*1];
1568
1569 e= a-b;
1570 a= a+b;
1571 b= c-d;
1572 c= c+d;
1573
239ea04c
LM
1574 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1575 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1576 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1577 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
1578}
1579
e5017ab8 1580#if 0
0da71265
MN
1581static void chroma_dc_dct_c(DCTELEM *block){
1582 const int stride= 16*2;
1583 const int xStride= 16;
1584 int a,b,c,d,e;
1585
1586 a= block[stride*0 + xStride*0];
1587 b= block[stride*0 + xStride*1];
1588 c= block[stride*1 + xStride*0];
1589 d= block[stride*1 + xStride*1];
1590
1591 e= a-b;
1592 a= a+b;
1593 b= c-d;
1594 c= c+d;
1595
1596 block[stride*0 + xStride*0]= (a+c);
1597 block[stride*0 + xStride*1]= (e+b);
1598 block[stride*1 + xStride*0]= (a-c);
1599 block[stride*1 + xStride*1]= (e-b);
1600}
e5017ab8 1601#endif
0da71265
MN
1602
1603/**
1604 * gets the chroma qp.
1605 */
4691a77d 1606static inline int get_chroma_qp(H264Context *h, int t, int qscale){
5a78bfbd 1607 return h->pps.chroma_qp_table[t][qscale];
0da71265
MN
1608}
1609
0da71265
MN
1610static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1611 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1612 int src_x_offset, int src_y_offset,
1613 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1614 MpegEncContext * const s = &h->s;
1615 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 1616 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 1617 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
1618 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1619 uint8_t * src_cb, * src_cr;
1620 int extra_width= h->emu_edge_width;
1621 int extra_height= h->emu_edge_height;
0da71265
MN
1622 int emu=0;
1623 const int full_mx= mx>>2;
1624 const int full_my= my>>2;
fbd312fd 1625 const int pic_width = 16*s->mb_width;
0d43dd8c 1626 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 1627
0da71265
MN
1628 if(mx&7) extra_width -= 3;
1629 if(my&7) extra_height -= 3;
115329f1
DB
1630
1631 if( full_mx < 0-extra_width
1632 || full_my < 0-extra_height
1633 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 1634 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
1635 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1636 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
1637 emu=1;
1638 }
115329f1 1639
5d18eaad 1640 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 1641 if(!square){
5d18eaad 1642 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 1643 }
115329f1 1644
49fb20cb 1645 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 1646
0d43dd8c 1647 if(MB_FIELD){
5d18eaad 1648 // chroma offset when predicting from a field of opposite parity
2143b118 1649 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
1650 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1651 }
1652 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1653 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1654
0da71265 1655 if(emu){
5d18eaad 1656 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1657 src_cb= s->edge_emu_buffer;
1658 }
5d18eaad 1659 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1660
1661 if(emu){
5d18eaad 1662 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1663 src_cr= s->edge_emu_buffer;
1664 }
5d18eaad 1665 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1666}
1667
9f2d1b4f 1668static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
1669 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1670 int x_offset, int y_offset,
1671 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1672 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1673 int list0, int list1){
1674 MpegEncContext * const s = &h->s;
1675 qpel_mc_func *qpix_op= qpix_put;
1676 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 1677
5d18eaad
LM
1678 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1679 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1680 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 1681 x_offset += 8*s->mb_x;
0d43dd8c 1682 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1683
0da71265 1684 if(list0){
1924f3ce 1685 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
1686 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1687 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1688 qpix_op, chroma_op);
1689
1690 qpix_op= qpix_avg;
1691 chroma_op= chroma_avg;
1692 }
1693
1694 if(list1){
1924f3ce 1695 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
1696 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1697 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1698 qpix_op, chroma_op);
1699 }
1700}
1701
9f2d1b4f
LM
1702static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1703 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1704 int x_offset, int y_offset,
1705 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1706 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1707 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1708 int list0, int list1){
1709 MpegEncContext * const s = &h->s;
1710
5d18eaad
LM
1711 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1712 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1713 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 1714 x_offset += 8*s->mb_x;
0d43dd8c 1715 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1716
9f2d1b4f
LM
1717 if(list0 && list1){
1718 /* don't optimize for luma-only case, since B-frames usually
1719 * use implicit weights => chroma too. */
1720 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
1721 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1722 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
1723 int refn0 = h->ref_cache[0][ scan8[n] ];
1724 int refn1 = h->ref_cache[1][ scan8[n] ];
1725
1726 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1727 dest_y, dest_cb, dest_cr,
1728 x_offset, y_offset, qpix_put, chroma_put);
1729 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1730 tmp_y, tmp_cb, tmp_cr,
1731 x_offset, y_offset, qpix_put, chroma_put);
1732
1733 if(h->use_weight == 2){
1734 int weight0 = h->implicit_weight[refn0][refn1];
1735 int weight1 = 64 - weight0;
5d18eaad
LM
1736 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1737 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 1739 }else{
5d18eaad 1740 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 1741 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 1742 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 1743 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1744 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 1745 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 1746 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1747 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 1748 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
1749 }
1750 }else{
1751 int list = list1 ? 1 : 0;
1752 int refn = h->ref_cache[list][ scan8[n] ];
1753 Picture *ref= &h->ref_list[list][refn];
1754 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1755 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1756 qpix_put, chroma_put);
1757
5d18eaad 1758 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
1759 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1760 if(h->use_weight_chroma){
5d18eaad 1761 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 1762 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 1763 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
1764 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1765 }
1766 }
1767}
1768
1769static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1770 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1771 int x_offset, int y_offset,
1772 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1773 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 1774 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
1775 int list0, int list1){
1776 if((h->use_weight==2 && list0 && list1
1777 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1778 || h->use_weight==1)
1779 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1780 x_offset, y_offset, qpix_put, chroma_put,
1781 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1782 else
1783 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1784 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1785}
1786
513fbd8e
LM
1787static inline void prefetch_motion(H264Context *h, int list){
1788 /* fetch pixels for estimated mv 4 macroblocks ahead
1789 * optimized for 64byte cache lines */
1790 MpegEncContext * const s = &h->s;
1791 const int refn = h->ref_cache[list][scan8[0]];
1792 if(refn >= 0){
1793 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1794 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1795 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 1796 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
1797 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1798 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1799 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1800 }
1801}
1802
0da71265
MN
1803static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1804 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
1805 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1806 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 1807 MpegEncContext * const s = &h->s;
64514ee8 1808 const int mb_xy= h->mb_xy;
0da71265 1809 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 1810
0da71265 1811 assert(IS_INTER(mb_type));
115329f1 1812
513fbd8e
LM
1813 prefetch_motion(h, 0);
1814
0da71265
MN
1815 if(IS_16X16(mb_type)){
1816 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1817 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
9f2d1b4f 1818 &weight_op[0], &weight_avg[0],
0da71265
MN
1819 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1820 }else if(IS_16X8(mb_type)){
1821 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1822 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1823 &weight_op[1], &weight_avg[1],
0da71265
MN
1824 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1825 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1826 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1827 &weight_op[1], &weight_avg[1],
0da71265
MN
1828 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1829 }else if(IS_8X16(mb_type)){
5d18eaad 1830 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 1831 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1832 &weight_op[2], &weight_avg[2],
0da71265 1833 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 1834 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 1835 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1836 &weight_op[2], &weight_avg[2],
0da71265
MN
1837 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1838 }else{
1839 int i;
115329f1 1840
0da71265
MN
1841 assert(IS_8X8(mb_type));
1842
1843 for(i=0; i<4; i++){
1844 const int sub_mb_type= h->sub_mb_type[i];
1845 const int n= 4*i;
1846 int x_offset= (i&1)<<2;
1847 int y_offset= (i&2)<<1;
1848
1849 if(IS_SUB_8X8(sub_mb_type)){
1850 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1851 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1852 &weight_op[3], &weight_avg[3],
0da71265
MN
1853 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1854 }else if(IS_SUB_8X4(sub_mb_type)){
1855 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1856 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1857 &weight_op[4], &weight_avg[4],
0da71265
MN
1858 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1859 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1860 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1861 &weight_op[4], &weight_avg[4],
0da71265
MN
1862 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1863 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 1864 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 1865 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1866 &weight_op[5], &weight_avg[5],
0da71265 1867 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 1868 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 1869 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1870 &weight_op[5], &weight_avg[5],
0da71265
MN
1871 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1872 }else{
1873 int j;
1874 assert(IS_SUB_4X4(sub_mb_type));
1875 for(j=0; j<4; j++){
1876 int sub_x_offset= x_offset + 2*(j&1);
1877 int sub_y_offset= y_offset + (j&2);
1878 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1879 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1880 &weight_op[6], &weight_avg[6],
0da71265
MN
1881 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1882 }
1883 }
1884 }
1885 }
513fbd8e
LM
1886
1887 prefetch_motion(h, 1);
0da71265
MN
1888}
1889
8140955d
MN
1890static av_cold void init_cavlc_level_tab(void){
1891 int suffix_length, mask;
1892 unsigned int i;
1893
1894 for(suffix_length=0; suffix_length<7; suffix_length++){
1895 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1896 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1897 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1898
1899 mask= -(level_code&1);
1900 level_code= (((2+level_code)>>1) ^ mask) - mask;
1901 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1902 cavlc_level_tab[suffix_length][i][0]= level_code;
1903 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1904 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1905 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1906 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1907 }else{
1908 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1909 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1910 }
1911 }
1912 }
1913}
1914
98a6fff9 1915static av_cold void decode_init_vlc(void){
0da71265
MN
1916 static int done = 0;
1917
1918 if (!done) {
1919 int i;
910e3668 1920 int offset;
0da71265
MN
1921 done = 1;
1922
910e3668
AC
1923 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1924 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
115329f1 1925 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
0da71265 1926 &chroma_dc_coeff_token_len [0], 1, 1,
910e3668
AC
1927 &chroma_dc_coeff_token_bits[0], 1, 1,
1928 INIT_VLC_USE_NEW_STATIC);
0da71265 1929
910e3668 1930 offset = 0;
0da71265 1931 for(i=0; i<4; i++){
910e3668
AC
1932 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1933 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
115329f1 1934 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
0da71265 1935 &coeff_token_len [i][0], 1, 1,
910e3668
AC
1936 &coeff_token_bits[i][0], 1, 1,
1937 INIT_VLC_USE_NEW_STATIC);
1938 offset += coeff_token_vlc_tables_size[i];
0da71265 1939 }
910e3668
AC
1940 /*
1941 * This is a one time safety check to make sure that
1942 * the packed static coeff_token_vlc table sizes
1943 * were initialized correctly.
1944 */
37d3e066 1945 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
0da71265
MN
1946
1947 for(i=0; i<3; i++){
910e3668
AC
1948 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1949 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1950 init_vlc(&chroma_dc_total_zeros_vlc[i],
1951 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
0da71265 1952 &chroma_dc_total_zeros_len [i][0], 1, 1,
910e3668
AC
1953 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1954 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1955 }
1956 for(i=0; i<15; i++){
910e3668
AC
1957 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1958 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1959 init_vlc(&total_zeros_vlc[i],
1960 TOTAL_ZEROS_VLC_BITS, 16,
0da71265 1961 &total_zeros_len [i][0], 1, 1,
910e3668
AC
1962 &total_zeros_bits[i][0], 1, 1,
1963 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1964 }
1965
1966 for(i=0; i<6; i++){
910e3668
AC
1967 run_vlc[i].table = run_vlc_tables[i];
1968 run_vlc[i].table_allocated = run_vlc_tables_size;
1969 init_vlc(&run_vlc[i],
1970 RUN_VLC_BITS, 7,
0da71265 1971 &run_len [i][0], 1, 1,
910e3668
AC
1972 &run_bits[i][0], 1, 1,
1973 INIT_VLC_USE_NEW_STATIC);
0da71265 1974 }
910e3668
AC
1975 run7_vlc.table = run7_vlc_table,
1976 run7_vlc.table_allocated = run7_vlc_table_size;
115329f1 1977 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
0da71265 1978 &run_len [6][0], 1, 1,
910e3668
AC
1979 &run_bits[6][0], 1, 1,
1980 INIT_VLC_USE_NEW_STATIC);
8140955d
MN
1981
1982 init_cavlc_level_tab();
0da71265
MN
1983 }
1984}
1985
0da71265 1986static void free_tables(H264Context *h){
7978debd 1987 int i;
afebe2f7 1988 H264Context *hx;
0da71265 1989 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
1990 av_freep(&h->chroma_pred_mode_table);
1991 av_freep(&h->cbp_table);
9e528114
LA
1992 av_freep(&h->mvd_table[0]);
1993 av_freep(&h->mvd_table[1]);
5ad984c9 1994 av_freep(&h->direct_table);
0da71265
MN
1995 av_freep(&h->non_zero_count);
1996 av_freep(&h->slice_table_base);
1997 h->slice_table= NULL;
e5017ab8 1998
0da71265
MN
1999 av_freep(&h->mb2b_xy);
2000 av_freep(&h->mb2b8_xy);
9f2d1b4f 2001
afebe2f7
2002 for(i = 0; i < h->s.avctx->thread_count; i++) {
2003 hx = h->thread_context[i];
2004 if(!hx) continue;
2005 av_freep(&hx->top_borders[1]);
2006 av_freep(&hx->top_borders[0]);
2007 av_freep(&hx->s.obmc_scratchpad);
afebe2f7 2008 }
0da71265
MN
2009}
2010
239ea04c
LM
2011static void init_dequant8_coeff_table(H264Context *h){
2012 int i,q,x;
548a1c8a 2013 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
2014 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2015 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2016
2017 for(i=0; i<2; i++ ){
2018 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2019 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2020 break;
2021 }
2022
2023 for(q=0; q<52; q++){
d9ec210b
DP
2024 int shift = div6[q];
2025 int idx = rem6[q];
239ea04c 2026 for(x=0; x<64; x++)
548a1c8a
LM
2027 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2028 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2029 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
2030 }
2031 }
2032}
2033
2034static void init_dequant4_coeff_table(H264Context *h){
2035 int i,j,q,x;
ab2e3e2c 2036 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
2037 for(i=0; i<6; i++ ){
2038 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2039 for(j=0; j<i; j++){
2040 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2041 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2042 break;
2043 }
2044 }
2045 if(j<i)
2046 continue;
2047
2048 for(q=0; q<52; q++){
d9ec210b
DP
2049 int shift = div6[q] + 2;
2050 int idx = rem6[q];
239ea04c 2051 for(x=0; x<16; x++)
ab2e3e2c
LM
2052 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2053 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
2054 h->pps.scaling_matrix4[i][x]) << shift;
2055 }
2056 }
2057}
2058
2059static void init_dequant_tables(H264Context *h){
2060 int i,x;
2061 init_dequant4_coeff_table(h);
2062 if(h->pps.transform_8x8_mode)
2063 init_dequant8_coeff_table(h);
2064 if(h->sps.transform_bypass){
2065 for(i=0; i<6; i++)
2066 for(x=0; x<16; x++)
2067 h->dequant4_coeff[i][0][x] = 1<<6;
2068 if(h->pps.transform_8x8_mode)
2069 for(i=0; i<2; i++)
2070 for(x=0; x<64; x++)
2071 h->dequant8_coeff[i][0][x] = 1<<6;
2072 }
2073}
2074
2075
0da71265
MN
2076/**
2077 * allocates tables.
3b66c4c5 2078 * needs width/height
0da71265
MN
2079 */
2080static int alloc_tables(H264Context *h){
2081 MpegEncContext * const s = &h->s;
7bc9090a 2082 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 2083 int x,y;
0da71265
MN
2084
2085 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
e5017ab8 2086
53c05b1e 2087 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
b735aeea 2088 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
5d0e4cb8 2089 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
0da71265 2090
7526ade2
MN
2091 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2092 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2093 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2094 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
e5017ab8 2095
b735aeea 2096 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 2097 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 2098
a55f20bd
LM
2099 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2100 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
0da71265
MN
2101 for(y=0; y<s->mb_height; y++){
2102 for(x=0; x<s->mb_width; x++){
7bc9090a 2103 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
2104 const int b_xy = 4*x + 4*y*h->b_stride;
2105 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 2106
0da71265
MN
2107 h->mb2b_xy [mb_xy]= b_xy;
2108 h->mb2b8_xy[mb_xy]= b8_xy;
2109 }
2110 }
9f2d1b4f 2111
9c6221ae
GV
2112 s->obmc_scratchpad = NULL;
2113
56edbd81
LM
2114 if(!h->dequant4_coeff[0])
2115 init_dequant_tables(h);
2116
0da71265
MN
2117 return 0;
2118fail:
2119 free_tables(h);
2120 return -1;
2121}
2122
afebe2f7
2123/**
2124 * Mimic alloc_tables(), but for every context thread.
2125 */
2126static void clone_tables(H264Context *dst, H264Context *src){
2127 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2128 dst->non_zero_count = src->non_zero_count;
2129 dst->slice_table = src->slice_table;
2130 dst->cbp_table = src->cbp_table;
2131 dst->mb2b_xy = src->mb2b_xy;
2132 dst->mb2b8_xy = src->mb2b8_xy;
2133 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2134 dst->mvd_table[0] = src->mvd_table[0];
2135 dst->mvd_table[1] = src->mvd_table[1];
2136 dst->direct_table = src->direct_table;
2137
afebe2f7
2138 dst->s.obmc_scratchpad = NULL;
2139 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
2140}
2141
2142/**
2143 * Init context
2144 * Allocate buffers which are not shared amongst multiple threads.
2145 */
2146static int context_init(H264Context *h){
afebe2f7
2147 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2148 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2149
afebe2f7
2150 return 0;
2151fail:
2152 return -1; // free_tables will clean up for us
2153}
2154
98a6fff9 2155static av_cold void common_init(H264Context *h){
0da71265 2156 MpegEncContext * const s = &h->s;
0da71265
MN
2157
2158 s->width = s->avctx->width;
2159 s->height = s->avctx->height;
2160 s->codec_id= s->avctx->codec->id;
115329f1 2161
c92a30bb 2162 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 2163
239ea04c 2164 h->dequant_coeff_pps= -1;
9a41c2c7 2165 s->unrestricted_mv=1;
0da71265 2166 s->decode=1; //FIXME
56edbd81 2167
a5805aa9
MN
2168 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2169
56edbd81
LM
2170 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2171 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
2172}
2173
98a6fff9 2174static av_cold int decode_init(AVCodecContext *avctx){
0da71265
MN
2175 H264Context *h= avctx->priv_data;
2176 MpegEncContext * const s = &h->s;
2177
3edcacde 2178 MPV_decode_defaults(s);
115329f1 2179
0da71265
MN
2180 s->avctx = avctx;
2181 common_init(h);
2182
2183 s->out_format = FMT_H264;
2184 s->workaround_bugs= avctx->workaround_bugs;
2185
2186 // set defaults
0da71265 2187// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 2188 s->quarter_sample = 1;
0da71265 2189 s->low_delay= 1;
7a9dba3c
MN
2190
2191 if(avctx->codec_id == CODEC_ID_SVQ3)
2192 avctx->pix_fmt= PIX_FMT_YUVJ420P;
0d3d172f 2193 else if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
369122dd 2194 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
7a9dba3c 2195 else
1d42f410 2196 avctx->pix_fmt= PIX_FMT_YUV420P;
0da71265 2197
c2212338 2198 decode_init_vlc();
115329f1 2199
26165f99
MR
2200 if(avctx->extradata_size > 0 && avctx->extradata &&
2201 *(char *)avctx->extradata == 1){
4770b1b4
RT
2202 h->is_avc = 1;
2203 h->got_avcC = 0;
26165f99
MR
2204 } else {
2205 h->is_avc = 0;
4770b1b4
RT
2206 }
2207
afebe2f7 2208 h->thread_context[0] = h;
18c7be65 2209 h->outputed_poc = INT_MIN;
e4b8f1fa 2210 h->prev_poc_msb= 1<<16;
37a558fe 2211 h->sei_recovery_frame_cnt = -1;
ff594f81 2212 h->sei_dpb_output_delay = 0;
cf6065ca 2213 h->sei_cpb_removal_delay = -1;
0da71265
MN
2214 return 0;
2215}
2216
af8aa846 2217static int frame_start(H264Context *h){
0da71265
MN
2218 MpegEncContext * const s = &h->s;
2219 int i;
2220
af8aa846
MN
2221 if(MPV_frame_start(s, s->avctx) < 0)
2222 return -1;
0da71265 2223 ff_er_frame_start(s);
3a22d7fa
JD
2224 /*
2225 * MPV_frame_start uses pict_type to derive key_frame.
2226 * This is incorrect for H.264; IDR markings must be used.
1412060e 2227 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
2228 * See decode_nal_units().
2229 */
2230 s->current_picture_ptr->key_frame= 0;
0da71265
MN
2231
2232 assert(s->linesize && s->uvlinesize);
2233
2234 for(i=0; i<16; i++){
2235 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 2236 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2237 }
2238 for(i=0; i<4; i++){
2239 h->block_offset[16+i]=
2240 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
2241 h->block_offset[24+16+i]=
2242 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2243 }
2244
934b0821
LM
2245 /* can't be in alloc_tables because linesize isn't known there.
2246 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
2247 for(i = 0; i < s->avctx->thread_count; i++)
2248 if(!h->thread_context[i]->s.obmc_scratchpad)
2249 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
2250
2251 /* some macroblocks will be accessed before they're available */
afebe2f7 2252 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 2253 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 2254
0da71265 2255// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 2256
1412060e 2257 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
2258 // that if we break out due to an error it can be released automatically
2259 // in the next MPV_frame_start().
2260 // SVQ3 as well as most other codecs have only last/next/current and thus
2261 // get released even with set reference, besides SVQ3 and others do not
2262 // mark frames as reference later "naturally".
2263 if(s->codec_id != CODEC_ID_SVQ3)
2264 s->current_picture_ptr->reference= 0;
357282c6
MN
2265
2266 s->current_picture_ptr->field_poc[0]=
2267 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 2268 assert(s->current_picture_ptr->long_ref==0);
357282c6 2269
af8aa846 2270 return 0;
0da71265
MN
2271}
2272
93cc10fa 2273static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e
MN
2274 MpegEncContext * const s = &h->s;
2275 int i;
5f7f9719
MN
2276 int step = 1;
2277 int offset = 1;
2278 int uvoffset= 1;
2279 int top_idx = 1;
2280 int skiplast= 0;
115329f1 2281
53c05b1e
MN
2282 src_y -= linesize;
2283 src_cb -= uvlinesize;
2284 src_cr -= uvlinesize;
2285
5f7f9719
MN
2286 if(!simple && FRAME_MBAFF){
2287 if(s->mb_y&1){
2288 offset = MB_MBAFF ? 1 : 17;
2289 uvoffset= MB_MBAFF ? 1 : 9;
2290 if(!MB_MBAFF){
2291 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2292 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
49fb20cb 2293 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2294 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2295 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2296 }
2297 }
2298 }else{
2299 if(!MB_MBAFF){
2300 h->left_border[0]= h->top_borders[0][s->mb_x][15];
49fb20cb 2301 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2302 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2303 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2304 }
2305 skiplast= 1;
2306 }
2307 offset =
2308 uvoffset=
2309 top_idx = MB_MBAFF ? 0 : 1;
2310 }
2311 step= MB_MBAFF ? 2 : 1;
2312 }
2313
3b66c4c5 2314 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 2315 // and the line above the bottom macroblock
5f7f9719
MN
2316 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2317 for(i=1; i<17 - skiplast; i++){
2318 h->left_border[offset+i*step]= src_y[15+i* linesize];
53c05b1e 2319 }
115329f1 2320
5f7f9719
MN
2321 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2322 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
53c05b1e 2323
49fb20cb 2324 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2325 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2326 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2327 for(i=1; i<9 - skiplast; i++){
2328 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2329 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
53c05b1e 2330 }
5f7f9719
MN
2331 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2332 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
53c05b1e
MN
2333 }
2334}
2335
93cc10fa 2336static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
2337 MpegEncContext * const s = &h->s;
2338 int temp8, i;
2339 uint64_t temp64;
b69378e2
2340 int deblock_left;
2341 int deblock_top;
2342 int mb_xy;
5f7f9719
MN
2343 int step = 1;
2344 int offset = 1;
2345 int uvoffset= 1;
2346 int top_idx = 1;
2347
2348 if(!simple && FRAME_MBAFF){
2349 if(s->mb_y&1){
2350 offset = MB_MBAFF ? 1 : 17;
2351 uvoffset= MB_MBAFF ? 1 : 9;
2352 }else{
2353 offset =
2354 uvoffset=
2355 top_idx = MB_MBAFF ? 0 : 1;
2356 }
2357 step= MB_MBAFF ? 2 : 1;
2358 }
b69378e2
2359
2360 if(h->deblocking_filter == 2) {
64514ee8 2361 mb_xy = h->mb_xy;
b69378e2
2362 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2363 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2364 } else {
2365 deblock_left = (s->mb_x > 0);
6c805007 2366 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 2367 }
53c05b1e
MN
2368
2369 src_y -= linesize + 1;
2370 src_cb -= uvlinesize + 1;
2371 src_cr -= uvlinesize + 1;
2372
2373#define XCHG(a,b,t,xchg)\
2374t= a;\
2375if(xchg)\
2376 a= b;\
2377b= t;
d89dc06a
LM
2378
2379 if(deblock_left){
5f7f9719
MN
2380 for(i = !deblock_top; i<16; i++){
2381 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
d89dc06a 2382 }
5f7f9719 2383 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
d89dc06a
LM
2384 }
2385
2386 if(deblock_top){
5f7f9719
MN
2387 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2388 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
cad4368a 2389 if(s->mb_x+1 < s->mb_width){
5f7f9719 2390 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
43efd19a 2391 }
53c05b1e 2392 }
53c05b1e 2393
49fb20cb 2394 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 2395 if(deblock_left){
5f7f9719
MN
2396 for(i = !deblock_top; i<8; i++){
2397 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2398 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
d89dc06a 2399 }
5f7f9719
MN
2400 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2401 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
d89dc06a
LM
2402 }
2403 if(deblock_top){
5f7f9719
MN
2404 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2405 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
53c05b1e 2406 }
53c05b1e
MN
2407 }
2408}
2409
5a6a6cc7 2410static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
2411 MpegEncContext * const s = &h->s;
2412 const int mb_x= s->mb_x;
2413 const int mb_y= s->mb_y;
64514ee8 2414 const int mb_xy= h->mb_xy;
0da71265
MN
2415 const int mb_type= s->current_picture.mb_type[mb_xy];
2416 uint8_t *dest_y, *dest_cb, *dest_cr;
2417 int linesize, uvlinesize /*dct_offset*/;
2418 int i;
6867a90b 2419 int *block_offset = &h->block_offset[0];
41e4055b 2420 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed 2421 /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb 2422 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca 2423 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 2424 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 2425
6120a343
MN
2426 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2427 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2428 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 2429
a957c27b
LM
2430 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2431 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2432
bd91fee3 2433 if (!simple && MB_FIELD) {
5d18eaad
LM
2434 linesize = h->mb_linesize = s->linesize * 2;
2435 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 2436 block_offset = &h->block_offset[24];
1412060e 2437 if(mb_y&1){ //FIXME move out of this function?
0da71265 2438 dest_y -= s->linesize*15;
6867a90b
LLL
2439 dest_cb-= s->uvlinesize*7;
2440 dest_cr-= s->uvlinesize*7;
0da71265 2441 }
5d18eaad
LM
2442 if(FRAME_MBAFF) {
2443 int list;
3425501d 2444 for(list=0; list<h->list_count; list++){
5d18eaad
LM
2445 if(!USES_LIST(mb_type, list))
2446 continue;
2447 if(IS_16X16(mb_type)){
2448 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 2449 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
2450 }else{
2451 for(i=0; i<16; i+=4){
5d18eaad
LM
2452 int ref = h->ref_cache[list][scan8[i]];
2453 if(ref >= 0)
1710856c 2454 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
2455 }
2456 }
2457 }
2458 }
0da71265 2459 } else {
5d18eaad
LM
2460 linesize = h->mb_linesize = s->linesize;
2461 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
2462// dct_offset = s->linesize * 16;
2463 }
115329f1 2464
bd91fee3 2465 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
2466 for (i=0; i<16; i++) {
2467 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 2468 }
c1708e8d
MN
2469 for (i=0; i<8; i++) {
2470 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2471 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 2472 }
e7e09b49
LLL
2473 } else {
2474 if(IS_INTRA(mb_type)){
5f7f9719 2475 if(h->deblocking_filter)
93cc10fa 2476 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 2477
49fb20cb 2478 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
2479 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2480 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 2481 }
0da71265 2482
e7e09b49 2483 if(IS_INTRA4x4(mb_type)){
bd91fee3 2484 if(simple || !s->encoding){
43efd19a 2485 if(IS_8x8DCT(mb_type)){
1eb96035
MN
2486 if(transform_bypass){
2487 idct_dc_add =
2488 idct_add = s->dsp.add_pixels8;
dae006d7 2489 }else{
1eb96035
MN
2490 idct_dc_add = s->dsp.h264_idct8_dc_add;
2491 idct_add = s->dsp.h264_idct8_add;
2492 }
43efd19a
LM
2493 for(i=0; i<16; i+=4){
2494 uint8_t * const ptr= dest_y + block_offset[i];
2495 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
2496 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2497 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2498 }else{
ac0623b2
MN
2499 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2500 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2501 (h->topright_samples_available<<i)&0x4000, linesize);
2502 if(nnz){
2503 if(nnz == 1 && h->mb[i*16])
2504 idct_dc_add(ptr, h->mb + i*16, linesize);
2505 else
2506 idct_add (ptr, h->mb + i*16, linesize);
2507 }
41e4055b 2508 }
43efd19a 2509 }
1eb96035
MN
2510 }else{
2511 if(transform_bypass){
2512 idct_dc_add =
2513 idct_add = s->dsp.add_pixels4;
2514 }else{
2515 idct_dc_add = s->dsp.h264_idct_dc_add;
2516 idct_add = s->dsp.h264_idct_add;
2517 }
aebb5d6d
MN
2518 for(i=0; i<16; i++){
2519 uint8_t * const ptr= dest_y + block_offset[i];
2520 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 2521
aebb5d6d
MN
2522 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2523 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2524 }else{
2525 uint8_t *topright;
2526 int nnz, tr;
2527 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2528 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2529 assert(mb_y || linesize <= block_offset[i]);
2530 if(!topright_avail){
2531 tr= ptr[3 - linesize]*0x01010101;
2532 topright= (uint8_t*) &tr;
2533 }else
2534 topright= ptr + 4 - linesize;
ac0623b2 2535 }else
aebb5d6d
MN
2536 topright= NULL;
2537
2538 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2539 nnz = h->non_zero_count_cache[ scan8[i] ];
2540 if(nnz){
2541 if(is_h264){
2542 if(nnz == 1 && h->mb[i*16])
2543 idct_dc_add(ptr, h->mb + i*16, linesize);
2544 else
2545 idct_add (ptr, h->mb + i*16, linesize);
2546 }else
2547 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2548 }
ac0623b2 2549 }
41e4055b 2550 }
8b82a956 2551 }
0da71265 2552 }
e7e09b49 2553 }else{
c92a30bb 2554 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 2555 if(is_h264){
36940eca 2556 if(!transform_bypass)
93f0c0a4 2557 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 2558 }else
e7e09b49 2559 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 2560 }
5f7f9719 2561 if(h->deblocking_filter)
93cc10fa 2562 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 2563 }else if(is_h264){
e7e09b49 2564 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
2565 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2566 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 2567 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 2568 }
e7e09b49
LLL
2569
2570
2571 if(!IS_INTRA4x4(mb_type)){
bd91fee3 2572 if(is_h264){
ef9d1d15 2573 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
2574 if(transform_bypass){
2575 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
2576 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2577 }else{
2578 for(i=0; i<16; i++){
2579 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 2580 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2581 }
2fd1f0e0
MN
2582 }
2583 }else{
2584 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 2585 }
49c084a7 2586 }else if(h->cbp&15){
2fd1f0e0 2587 if(transform_bypass){
0a8ca22f 2588 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 2589 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 2590 for(i=0; i<16; i+=di){
62bc966f 2591 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 2592 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2593 }
ef9d1d15 2594 }
2fd1f0e0
MN
2595 }else{
2596 if(IS_8x8DCT(mb_type)){
2597 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2598 }else{
2599 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2600 }
2601 }
4704097a 2602 }
e7e09b49
LLL
2603 }else{
2604 for(i=0; i<16; i++){
2605 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 2606 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2607 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2608 }
4704097a 2609 }
0da71265
MN
2610 }
2611 }
0da71265 2612
49fb20cb 2613 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
2614 uint8_t *dest[2] = {dest_cb, dest_cr};
2615 if(transform_bypass){
96465b90
MN
2616 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2617 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2618 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2619 }else{
c25ac15a 2620 idct_add = s->dsp.add_pixels4;
96465b90
MN
2621 for(i=16; i<16+8; i++){
2622 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2623 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2624 }
2625 }
ef9d1d15 2626 }else{
4691a77d
2627 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2628 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 2629 if(is_h264){
c25ac15a
MN
2630 idct_add = s->dsp.h264_idct_add;
2631 idct_dc_add = s->dsp.h264_idct_dc_add;
ac0623b2
MN
2632 for(i=16; i<16+8; i++){
2633 if(h->non_zero_count_cache[ scan8[i] ])
2634 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2635 else if(h->mb[i*16])
2636 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2637 }
aebb5d6d
MN
2638 }else{
2639 for(i=16; i<16+8; i++){
2640 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2641 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2642 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2643 }
e7e09b49 2644 }
4704097a 2645 }
0da71265
MN
2646 }
2647 }
2648 }
c212fb0c
MN
2649 if(h->cbp || IS_INTRA(mb_type))
2650 s->dsp.clear_blocks(h->mb);
2651
53c05b1e 2652 if(h->deblocking_filter) {
5f7f9719
MN
2653 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2654 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2655 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2656 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
bd91fee3 2657 if (!simple && FRAME_MBAFF) {
5f7f9719 2658 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2659 } else {
3e20143e 2660 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2661 }
53c05b1e 2662 }
0da71265
MN
2663}
2664
0da71265 2665/**
bd91fee3
AS
2666 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2667 */
2668static void hl_decode_mb_simple(H264Context *h){
2669 hl_decode_mb_internal(h, 1);
2670}
2671
2672/**
2673 * Process a macroblock; this handles edge cases, such as interlacing.
2674 */
2675static void av_noinline hl_decode_mb_complex(H264Context *h){
2676 hl_decode_mb_internal(h, 0);
2677}
2678
2679static void hl_decode_mb(H264Context *h){
2680 MpegEncContext * const s = &h->s;
64514ee8 2681 const int mb_xy= h->mb_xy;
bd91fee3 2682 const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb 2683 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 2684
bd91fee3
AS
2685 if (is_complex)
2686 hl_decode_mb_complex(h);
2687 else hl_decode_mb_simple(h);
2688}
2689
2143b118 2690static void pic_as_field(Picture *pic, const int parity){
11cc1d8c
JD
2691 int i;
2692 for (i = 0; i < 4; ++i) {
2143b118 2693 if (parity == PICT_BOTTOM_FIELD)
11cc1d8c 2694 pic->data[i] += pic->linesize[i];
2143b118 2695 pic->reference = parity;
11cc1d8c
JD
2696 pic->linesize[i] *= 2;
2697 }
2879c75f 2698 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
11cc1d8c
JD
2699}
2700
2701static int split_field_copy(Picture *dest, Picture *src,
2702 int parity, int id_add){
2703 int match = !!(src->reference & parity);
2704
2705 if (match) {
2706 *dest = *src;
d4f7d838 2707 if(parity != PICT_FRAME){
b3e93fd4
MN
2708 pic_as_field(dest, parity);
2709 dest->pic_id *= 2;
2710 dest->pic_id += id_add;
d4f7d838 2711 }
11cc1d8c
JD
2712 }
2713
2714 return match;
2715}
2716
d4f7d838
MN
2717static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2718 int i[2]={0};
2719 int index=0;
11cc1d8c 2720
d4f7d838
MN
2721 while(i[0]<len || i[1]<len){
2722 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2723 i[0]++;
2724 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2725 i[1]++;
2726 if(i[0] < len){
2727 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2728 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2729 }
2730 if(i[1] < len){
2731 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2732 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
11cc1d8c
JD
2733 }
2734 }
2735
d4f7d838 2736 return index;
11cc1d8c
JD
2737}
2738
d4f7d838
MN
2739static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2740 int i, best_poc;
2741 int out_i= 0;
11cc1d8c 2742
d4f7d838
MN
2743 for(;;){
2744 best_poc= dir ? INT_MIN : INT_MAX;
11cc1d8c 2745
d4f7d838
MN
2746 for(i=0; i<len; i++){
2747 const int poc= src[i]->poc;
2748 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2749 best_poc= poc;
2750 sorted[out_i]= src[i];
2751 }
2752 }
2753 if(best_poc == (dir ? INT_MIN : INT_MAX))
2754 break;
2755 limit= sorted[out_i++]->poc - dir;
2756 }
2757 return out_i;
11cc1d8c
JD
2758}
2759
bd91fee3 2760/**
0da71265
MN
2761 * fills the default_ref_list.
2762 */
2763static int fill_default_ref_list(H264Context *h){
2764 MpegEncContext * const s = &h->s;
d4f7d838 2765 int i, len;
115329f1 2766
9f5c1037 2767 if(h->slice_type_nos==FF_B_TYPE){
d4f7d838
MN
2768 Picture *sorted[32];
2769 int cur_poc, list;
2770 int lens[2];
11cc1d8c 2771
d4f7d838
MN
2772 if(FIELD_PICTURE)
2773 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2774 else
2775 cur_poc= s->current_picture_ptr->poc;
086acdd5 2776
d4f7d838
MN
2777 for(list= 0; list<2; list++){
2778 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2779 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2780 assert(len<=32);
2781 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2782 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2783 assert(len<=32);
086acdd5 2784
d4f7d838
MN
2785 if(len < h->ref_count[list])
2786 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2787 lens[list]= len;
086acdd5
JD
2788 }
2789
d4f7d838
MN
2790 if(lens[0] == lens[1] && lens[1] > 1){
2791 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2792 if(i == lens[0])
2793 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
086acdd5 2794 }
086acdd5 2795 }else{
d4f7d838
MN
2796 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2797 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2798 assert(len <= 32);
2799 if(len < h->ref_count[0])
2800 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
0da71265 2801 }
827c91bf
LLL
2802#ifdef TRACE
2803 for (i=0; i<h->ref_count[0]; i++) {
a9c9a240 2804 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
827c91bf 2805 }
9f5c1037 2806 if(h->slice_type_nos==FF_B_TYPE){
827c91bf 2807 for (i=0; i<h->ref_count[1]; i++) {
ffbc5e04 2808 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
827c91bf
LLL
2809 }
2810 }
2811#endif
0da71265
MN
2812 return 0;
2813}
2814
827c91bf
LLL
2815static void print_short_term(H264Context *h);
2816static void print_long_term(H264Context *h);
2817
949da388
JD
2818/**
2819 * Extract structure information about the picture described by pic_num in
2820 * the current decoding context (frame or field). Note that pic_num is
2821 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2822 * @param pic_num picture number for which to extract structure information
2823 * @param structure one of PICT_XXX describing structure of picture
2824 * with pic_num
2825 * @return frame number (short term) or long term index of picture
2826 * described by pic_num
2827 */
2828static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2829 MpegEncContext * const s = &h->s;
2830
2831 *structure = s->picture_structure;
2832 if(FIELD_PICTURE){
2833 if (!(pic_num & 1))
2834 /* opposite field */
2835 *structure ^= PICT_FRAME;
2836 pic_num >>= 1;
2837 }
2838
2839 return pic_num;
2840}
2841
0da71265
MN
2842static int decode_ref_pic_list_reordering(H264Context *h){
2843 MpegEncContext * const s = &h->s;
949da388 2844 int list, index, pic_structure;
115329f1 2845
827c91bf
LLL
2846 print_short_term(h);
2847 print_long_term(h);
115329f1 2848
3425501d 2849 for(list=0; list<h->list_count; list++){
0da71265
MN
2850 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2851
2852 if(get_bits1(&s->gb)){
2853 int pred= h->curr_pic_num;
0da71265
MN
2854
2855 for(index=0; ; index++){
9963b332 2856 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
88e7a4d1 2857 unsigned int pic_id;
0da71265 2858 int i;
2f944356 2859 Picture *ref = NULL;
115329f1
DB
2860
2861 if(reordering_of_pic_nums_idc==3)
0bc42cad 2862 break;
115329f1 2863
0da71265 2864 if(index >= h->ref_count[list]){
9b879566 2865 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
0da71265
MN
2866 return -1;
2867 }
115329f1 2868
0da71265
MN
2869 if(reordering_of_pic_nums_idc<3){
2870 if(reordering_of_pic_nums_idc<2){
88e7a4d1 2871 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
949da388 2872 int frame_num;
0da71265 2873
03d3cab8 2874 if(abs_diff_pic_num > h->max_pic_num){
9b879566 2875 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
0da71265
MN
2876 return -1;
2877 }
2878
2879 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2880 else pred+= abs_diff_pic_num;
2881 pred &= h->max_pic_num - 1;
115329f1 2882
949da388
JD
2883 frame_num = pic_num_extract(h, pred, &pic_structure);
2884
0d175622
MN
2885 for(i= h->short_ref_count-1; i>=0; i--){
2886 ref = h->short_ref[i];
949da388 2887 assert(ref->reference);
0d175622 2888 assert(!ref->long_ref);
6edac8e1 2889 if(
af8c5e08
MN
2890 ref->frame_num == frame_num &&
2891 (ref->reference & pic_structure)
6edac8e1 2892 )
0da71265
MN
2893 break;
2894 }
0d175622 2895 if(i>=0)
949da388 2896 ref->pic_id= pred;
0da71265 2897 }else{
949da388 2898 int long_idx;
0da71265 2899 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
949da388
JD
2900
2901 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2902
2903 if(long_idx>31){
88e7a4d1
MN
2904 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2905 return -1;
2906 }
949da388
JD
2907 ref = h->long_ref[long_idx];
2908 assert(!(ref && !ref->reference));
af8c5e08 2909 if(ref && (ref->reference & pic_structure)){
ac658be5 2910 ref->pic_id= pic_id;
ac658be5
FOL
2911 assert(ref->long_ref);
2912 i=0;
2913 }else{
2914 i=-1;
2915 }
0da71265
MN
2916 }
2917
0d315f28 2918 if (i < 0) {
9b879566 2919 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
0da71265 2920 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
0d175622
MN
2921 } else {
2922 for(i=index; i+1<h->ref_count[list]; i++){
2923 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2924 break;
21be92bf
MN
2925 }
2926 for(; i > index; i--){
2927 h->ref_list[list][i]= h->ref_list[list][i-1];
2928 }
0d175622 2929 h->ref_list[list][index]= *ref;
949da388 2930 if (FIELD_PICTURE){
2143b118 2931 pic_as_field(&h->ref_list[list][index], pic_structure);
949da388 2932 }
0da71265 2933 }
0bc42cad 2934 }else{
9b879566 2935 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
0da71265
MN
2936 return -1;
2937 }
2938 }
2939 }
0da71265 2940 }
3425501d 2941 for(list=0; list<h->list_count; list++){
6ab87211 2942 for(index= 0; index < h->ref_count[list]; index++){
79b5c776
MN
2943 if(!h->ref_list[list][index].data[0]){
2944 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2945 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2946 }
6ab87211 2947 }
6ab87211 2948 }
115329f1 2949
115329f1 2950 return 0;
0da71265
MN
2951}
2952
91c58c94 2953static void fill_mbaff_ref_list(H264Context *h){
5d18eaad 2954 int list, i, j;
3425501d 2955 for(list=0; list<2; list++){ //FIXME try list_count
5d18eaad
LM
2956 for(i=0; i<h->ref_count[list]; i++){
2957 Picture *frame = &h->ref_list[list][i];
2958 Picture *field = &h->ref_list[list][16+2*i];
2959 field[0] = *frame;
2960 for(j=0; j<3; j++)
2961 field[0].linesize[j] <<= 1;
2143b118 2962 field[0].reference = PICT_TOP_FIELD;
078f42dd 2963 field[0].poc= field[0].field_poc[0];
5d18eaad
LM
2964 field[1] = field[0];
2965 for(j=0; j<3; j++)
2966 field[1].data[j] += frame->linesize[j];
2143b118 2967 field[1].reference = PICT_BOTTOM_FIELD;
078f42dd 2968 field[1].poc= field[1].field_poc[1];
5d18eaad
LM
2969
2970 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2971 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2972 for(j=0; j<2; j++){
2973 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2974 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2975 }
2976 }
2977 }
2978 for(j=0; j<h->ref_count[1]; j++){
2979 for(i=0; i<h->ref_count[0]; i++)
2980 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2981 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2982 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2983 }
2984}
2985
0da71265
MN
2986static int pred_weight_table(H264Context *h){
2987 MpegEncContext * const s = &h->s;
2988 int list, i;
9f2d1b4f 2989 int luma_def, chroma_def;
115329f1 2990
9f2d1b4f
LM
2991 h->use_weight= 0;
2992 h->use_weight_chroma= 0;
0da71265
MN
2993 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2994 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
2995 luma_def = 1<<h->luma_log2_weight_denom;
2996 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
2997
2998 for(list=0; list<2; list++){
cb99c652
GB
2999 h->luma_weight_flag[list] = 0;
3000 h->chroma_weight_flag[list] = 0;
0da71265
MN
3001 for(i=0; i<h->ref_count[list]; i++){
3002 int luma_weight_flag, chroma_weight_flag;
115329f1 3003
0da71265
MN
3004 luma_weight_flag= get_bits1(&s->gb);
3005 if(luma_weight_flag){
3006 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3007 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f 3008 if( h->luma_weight[list][i] != luma_def
cb99c652 3009 || h->luma_offset[list][i] != 0) {
9f2d1b4f 3010 h->use_weight= 1;
cb99c652
GB
3011 h->luma_weight_flag[list]= 1;
3012 }
9f2d1b4f
LM
3013 }else{
3014 h->luma_weight[list][i]= luma_def;
3015 h->luma_offset[list][i]= 0;
0da71265
MN
3016 }
3017
0af6967e 3018 if(CHROMA){
fef744d4
MN
3019 chroma_weight_flag= get_bits1(&s->gb);
3020 if(chroma_weight_flag){
3021 int j;
3022 for(j=0; j<2; j++){
3023 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3024 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3025 if( h->chroma_weight[list][i][j] != chroma_def
cb99c652 3026 || h->chroma_offset[list][i][j] != 0) {
fef744d4 3027 h->use_weight_chroma= 1;
cb99c652
GB
3028 h->chroma_weight_flag[list]= 1;
3029 }
fef744d4
MN
3030 }
3031 }else{
3032 int j;
3033 for(j=0; j<2; j++){
3034 h->chroma_weight[list][i][j]= chroma_def;
3035 h->chroma_offset[list][i][j]= 0;
3036 }
0da71265
MN
3037 }
3038 }
3039 }
9f5c1037 3040 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 3041 }
9f2d1b4f 3042 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
3043 return 0;
3044}
3045
9f2d1b4f
LM
3046static void implicit_weight_table(H264Context *h){
3047 MpegEncContext * const s = &h->s;
cb99c652 3048 int ref0, ref1, i;
9f2d1b4f
LM
3049 int cur_poc = s->current_picture_ptr->poc;
3050
ce09f927
GB
3051 for (i = 0; i < 2; i++) {
3052 h->luma_weight_flag[i] = 0;
3053 h->chroma_weight_flag[i] = 0;
3054 }
3055
9f2d1b4f
LM
3056 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3057 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3058 h->use_weight= 0;
3059 h->use_weight_chroma= 0;
3060 return;
3061 }
3062
3063 h->use_weight= 2;
3064 h->use_weight_chroma= 2;
3065 h->luma_log2_weight_denom= 5;
3066 h->chroma_log2_weight_denom= 5;
3067
9f2d1b4f
LM
3068 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3069 int poc0 = h->ref_list[0][ref0].poc;
3070 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 3071 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 3072 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 3073 if(td){
f66e4f5f 3074 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 3075 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 3076 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
3077 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3078 h->implicit_weight[ref0][ref1] = 32;
3079 else
3080 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3081 }else
3082 h->implicit_weight[ref0][ref1] = 32;
3083 }
3084 }
3085}
3086
8fd57a66
JD
3087/**
3088 * Mark a picture as no longer needed for reference. The refmask
3089 * argument allows unreferencing of individual fields or the whole frame.
3090 * If the picture becomes entirely unreferenced, but is being held for
3091 * display purposes, it is marked as such.
3092 * @param refmask mask of fields to unreference; the mask is bitwise
3093 * anded with the reference marking of pic
3094 * @return non-zero if pic becomes entirely unreferenced (except possibly
3095 * for display purposes) zero if one of the fields remains in
3096 * reference
3097 */
3098static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
4e4d983e 3099 int i;
8fd57a66
JD
3100 if (pic->reference &= refmask) {
3101 return 0;
3102 } else {
79f4494a
MN
3103 for(i = 0; h->delayed_pic[i]; i++)
3104 if(pic == h->delayed_pic[i]){
3105 pic->reference=DELAYED_PIC_REF;
3106 break;
3107 }
8fd57a66
JD
3108 return 1;
3109 }
4e4d983e
LM
3110}
3111
0da71265 3112/**
5175b937 3113 * instantaneous decoder refresh.
0da71265
MN
3114 */
3115static void idr(H264Context *h){
4e4d983e 3116 int i;
0da71265 3117
dc032f33 3118 for(i=0; i<16; i++){
9c0e4624 3119 remove_long(h, i, 0);
0da71265 3120 }
849b9cef 3121 assert(h->long_ref_count==0);
0da71265
MN
3122
3123 for(i=0; i<h->short_ref_count; i++){
8fd57a66 3124 unreference_pic(h, h->short_ref[i], 0);
0da71265
MN
3125 h->short_ref[i]= NULL;
3126 }
3127 h->short_ref_count=0;
a149c1a5 3128 h->prev_frame_num= 0;
80f8e035
MN
3129 h->prev_frame_num_offset= 0;
3130 h->prev_poc_msb=
3131 h->prev_poc_lsb= 0;
0da71265
MN
3132}
3133
7c33ad19
LM
3134/* forget old pics after a seek */
3135static void flush_dpb(AVCodecContext *avctx){
3136 H264Context *h= avctx->priv_data;
3137 int i;
64b9d48f 3138 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
3139 if(h->delayed_pic[i])
3140 h->delayed_pic[i]->reference= 0;
7c33ad19 3141 h->delayed_pic[i]= NULL;
285b570f 3142 }
df8a7dff 3143 h->outputed_poc= INT_MIN;
7c33ad19 3144 idr(h);
ca159196
MR
3145 if(h->s.current_picture_ptr)
3146 h->s.current_picture_ptr->reference= 0;
12d96de3 3147 h->s.first_field= 0;
37a558fe 3148 h->sei_recovery_frame_cnt = -1;
ff594f81 3149 h->sei_dpb_output_delay = 0;
cf6065ca 3150 h->sei_cpb_removal_delay = -1;
e240f898 3151 ff_mpeg_flush(avctx);
7c33ad19
LM
3152}
3153
0da71265 3154/**
47e112f8
JD
3155 * Find a Picture in the short term reference list by frame number.
3156 * @param frame_num frame number to search for
3157 * @param idx the index into h->short_ref where returned picture is found
3158 * undefined if no picture found.
3159 * @return pointer to the found picture, or NULL if no pic with the provided
3160 * frame number is found
0da71265 3161 */
47e112f8 3162static Picture * find_short(H264Context *h, int frame_num, int *idx){
1924f3ce