Integrate get_te0_golomb() calls into the code, this allows some checks
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265
MN
22/**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
0da71265
MN
28#include "dsputil.h"
29#include "avcodec.h"
30#include "mpegvideo.h"
26b4fe82 31#include "h264.h"
0da71265 32#include "h264data.h"
26b4fe82 33#include "h264_parser.h"
0da71265 34#include "golomb.h"
626464fb 35#include "rectangle.h"
0da71265 36
e5017ab8 37#include "cabac.h"
52cb7981 38#ifdef ARCH_X86
a6493a8f 39#include "x86/h264_i386.h"
52cb7981 40#endif
e5017ab8 41
2848ce84 42//#undef NDEBUG
0da71265
MN
43#include <assert.h>
44
2ddcf84b
JD
45/**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
48 */
49#define DELAYED_PIC_REF 4
50
0da71265 51static VLC coeff_token_vlc[4];
910e3668
AC
52static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
54
0da71265 55static VLC chroma_dc_coeff_token_vlc;
910e3668
AC
56static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57static const int chroma_dc_coeff_token_vlc_table_size = 256;
0da71265
MN
58
59static VLC total_zeros_vlc[15];
910e3668
AC
60static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61static const int total_zeros_vlc_tables_size = 512;
62
0da71265 63static VLC chroma_dc_total_zeros_vlc[3];
910e3668
AC
64static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65static const int chroma_dc_total_zeros_vlc_tables_size = 8;
0da71265
MN
66
67static VLC run_vlc[6];
910e3668
AC
68static VLC_TYPE run_vlc_tables[6][8][2];
69static const int run_vlc_tables_size = 8;
70
0da71265 71static VLC run7_vlc;
910e3668
AC
72static VLC_TYPE run7_vlc_table[96][2];
73static const int run7_vlc_table_size = 96;
0da71265 74
8b82a956
MN
75static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
6ba71fc4 77static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
3e20143e 78static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
9c0e4624 79static Picture * remove_long(H264Context *h, int i, int ref_mask);
8b82a956 80
849f1035 81static av_always_inline uint32_t pack16to32(int a, int b){
377ec888
MN
82#ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
84#else
85 return (a&0xFFFF) + (b<<16);
86#endif
87}
88
d9ec210b 89static const uint8_t rem6[52]={
acd8d10f
PI
900, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
91};
92
d9ec210b 93static const uint8_t div6[52]={
acd8d10f
PI
940, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
95};
96
143d7f14
PK
97static const int left_block_options[4][8]={
98 {0,1,2,3,7,10,8,11},
99 {2,2,3,3,8,11,8,11},
100 {0,0,1,1,7,10,7,10},
101 {0,2,0,2,7,10,7,10}
102};
acd8d10f 103
8140955d
MN
104#define LEVEL_TAB_BITS 8
105static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
106
70abb407 107static void fill_caches(H264Context *h, int mb_type, int for_deblock){
0da71265 108 MpegEncContext * const s = &h->s;
64514ee8 109 const int mb_xy= h->mb_xy;
0da71265
MN
110 int topleft_xy, top_xy, topright_xy, left_xy[2];
111 int topleft_type, top_type, topright_type, left_type[2];
cac55c91 112 const int * left_block;
02f7695b 113 int topleft_partition= -1;
0da71265
MN
114 int i;
115
36e097bc
JD
116 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
117
717b1733 118 //FIXME deblocking could skip the intra and nnz parts.
36e097bc 119 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
e2e5894a
LM
120 return;
121
2cab6401
DB
122 /* Wow, what a mess, why didn't they simplify the interlacing & intra
123 * stuff, I can't imagine that these complex rules are worth it. */
115329f1 124
6867a90b
LLL
125 topleft_xy = top_xy - 1;
126 topright_xy= top_xy + 1;
127 left_xy[1] = left_xy[0] = mb_xy-1;
143d7f14 128 left_block = left_block_options[0];
5d18eaad 129 if(FRAME_MBAFF){
6867a90b
LLL
130 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
131 const int top_pair_xy = pair_xy - s->mb_stride;
132 const int topleft_pair_xy = top_pair_xy - 1;
133 const int topright_pair_xy = top_pair_xy + 1;
6f3c50f2
MN
134 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
135 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
136 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
137 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
138 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
6867a90b 139 const int bottom = (s->mb_y & 1);
6f3c50f2 140 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
60c6ba7a 141
6f3c50f2 142 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
6867a90b
LLL
143 top_xy -= s->mb_stride;
144 }
6f3c50f2 145 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
6867a90b 146 topleft_xy -= s->mb_stride;
6f3c50f2 147 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
02f7695b 148 topleft_xy += s->mb_stride;
1412060e 149 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
02f7695b 150 topleft_partition = 0;
6867a90b 151 }
6f3c50f2 152 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
6867a90b
LLL
153 topright_xy -= s->mb_stride;
154 }
6f3c50f2 155 if (left_mb_field_flag != curr_mb_field_flag) {
6867a90b 156 left_xy[1] = left_xy[0] = pair_xy - 1;
6f3c50f2
MN
157 if (curr_mb_field_flag) {
158 left_xy[1] += s->mb_stride;
159 left_block = left_block_options[3];
160 } else {
03a035e0 161 left_block= left_block_options[2 - bottom];
6867a90b
LLL
162 }
163 }
0da71265
MN
164 }
165
826de46e
LLL
166 h->top_mb_xy = top_xy;
167 h->left_mb_xy[0] = left_xy[0];
168 h->left_mb_xy[1] = left_xy[1];
6ba71fc4 169 if(for_deblock){
717b1733
LM
170 topleft_type = 0;
171 topright_type = 0;
b735aeea
MN
172 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
173 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
174 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
5d18eaad 175
e248cb60 176 if(MB_MBAFF && !IS_INTRA(mb_type)){
5d18eaad 177 int list;
3425501d 178 for(list=0; list<h->list_count; list++){
e248cb60
MN
179 //These values where changed for ease of performing MC, we need to change them back
180 //FIXME maybe we can make MC and loop filter use the same values or prevent
181 //the MC code from changing ref_cache and rather use a temporary array.
5d18eaad 182 if(USES_LIST(mb_type,list)){
191e8ca7 183 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
5d18eaad 184 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
beca9a28 185 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
186 ref += h->b8_stride;
187 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
beca9a28 188 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
189 }
190 }
191 }
46f2f05f
MN
192 }else{
193 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
194 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
195 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
196 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
197 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
0da71265
MN
198
199 if(IS_INTRA(mb_type)){
faa7e394 200 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
115329f1
DB
201 h->topleft_samples_available=
202 h->top_samples_available=
0da71265
MN
203 h->left_samples_available= 0xFFFF;
204 h->topright_samples_available= 0xEEEA;
205
faa7e394 206 if(!(top_type & type_mask)){
0da71265
MN
207 h->topleft_samples_available= 0xB3FF;
208 h->top_samples_available= 0x33FF;
209 h->topright_samples_available= 0x26EA;
210 }
d1d10e91
MN
211 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
212 if(IS_INTERLACED(mb_type)){
faa7e394 213 if(!(left_type[0] & type_mask)){
d1d10e91
MN
214 h->topleft_samples_available&= 0xDFFF;
215 h->left_samples_available&= 0x5FFF;
216 }
faa7e394 217 if(!(left_type[1] & type_mask)){
d1d10e91
MN
218 h->topleft_samples_available&= 0xFF5F;
219 h->left_samples_available&= 0xFF5F;
220 }
221 }else{
222 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
223 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
224 assert(left_xy[0] == left_xy[1]);
faa7e394 225 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
d1d10e91
MN
226 h->topleft_samples_available&= 0xDF5F;
227 h->left_samples_available&= 0x5F5F;
228 }
229 }
230 }else{
faa7e394 231 if(!(left_type[0] & type_mask)){
0da71265
MN
232 h->topleft_samples_available&= 0xDF5F;
233 h->left_samples_available&= 0x5F5F;
234 }
235 }
115329f1 236
faa7e394 237 if(!(topleft_type & type_mask))
0da71265 238 h->topleft_samples_available&= 0x7FFF;
115329f1 239
faa7e394 240 if(!(topright_type & type_mask))
0da71265 241 h->topright_samples_available&= 0xFBFF;
115329f1 242
0da71265
MN
243 if(IS_INTRA4x4(mb_type)){
244 if(IS_INTRA4x4(top_type)){
245 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
246 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
247 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
248 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
249 }else{
250 int pred;
faa7e394 251 if(!(top_type & type_mask))
0da71265 252 pred= -1;
6fbcaaa0
LLL
253 else{
254 pred= 2;
0da71265
MN
255 }
256 h->intra4x4_pred_mode_cache[4+8*0]=
257 h->intra4x4_pred_mode_cache[5+8*0]=
258 h->intra4x4_pred_mode_cache[6+8*0]=
259 h->intra4x4_pred_mode_cache[7+8*0]= pred;
260 }
261 for(i=0; i<2; i++){
262 if(IS_INTRA4x4(left_type[i])){
263 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
264 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
265 }else{
266 int pred;
faa7e394 267 if(!(left_type[i] & type_mask))
0da71265 268 pred= -1;
6fbcaaa0
LLL
269 else{
270 pred= 2;
0da71265
MN
271 }
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
274 }
275 }
276 }
277 }
29671011 278 }
115329f1
DB
279
280
0da71265 281/*
115329f1
DB
2820 . T T. T T T T
2831 L . .L . . . .
2842 L . .L . . . .
2853 . T TL . . . .
2864 L . .L . . . .
2875 L . .. . . . .
0da71265 288*/
1412060e 289//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
0da71265 290 if(top_type){
6867a90b
LLL
291 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
292 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
293 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
53c05b1e 294 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
115329f1 295
6867a90b 296 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
53c05b1e 297 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
115329f1 298
6867a90b 299 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
53c05b1e 300 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
115329f1 301
0da71265 302 }else{
115329f1 303 h->non_zero_count_cache[4+8*0]=
0da71265
MN
304 h->non_zero_count_cache[5+8*0]=
305 h->non_zero_count_cache[6+8*0]=
306 h->non_zero_count_cache[7+8*0]=
115329f1 307
0da71265
MN
308 h->non_zero_count_cache[1+8*0]=
309 h->non_zero_count_cache[2+8*0]=
115329f1 310
0da71265 311 h->non_zero_count_cache[1+8*3]=
3981c385 312 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
115329f1 313
0da71265 314 }
826de46e 315
6867a90b
LLL
316 for (i=0; i<2; i++) {
317 if(left_type[i]){
318 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
319 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
320 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
321 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
6867a90b 322 }else{
115329f1
DB
323 h->non_zero_count_cache[3+8*1 + 2*8*i]=
324 h->non_zero_count_cache[3+8*2 + 2*8*i]=
325 h->non_zero_count_cache[0+8*1 + 8*i]=
6867a90b 326 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
826de46e
LLL
327 }
328 }
329
330 if( h->pps.cabac ) {
331 // top_cbp
332 if(top_type) {
333 h->top_cbp = h->cbp_table[top_xy];
334 } else if(IS_INTRA(mb_type)) {
335 h->top_cbp = 0x1C0;
336 } else {
337 h->top_cbp = 0;
338 }
339 // left_cbp
340 if (left_type[0]) {
341 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
342 } else if(IS_INTRA(mb_type)) {
343 h->left_cbp = 0x1C0;
344 } else {
345 h->left_cbp = 0;
346 }
347 if (left_type[0]) {
348 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
349 }
350 if (left_type[1]) {
351 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
6867a90b 352 }
0da71265 353 }
6867a90b 354
0da71265 355#if 1
e2e5894a 356 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
0da71265 357 int list;
3425501d 358 for(list=0; list<h->list_count; list++){
e2e5894a 359 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
0da71265
MN
360 /*if(!h->mv_cache_clean[list]){
361 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
362 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
363 h->mv_cache_clean[list]= 1;
364 }*/
5ad984c9 365 continue;
0da71265
MN
366 }
367 h->mv_cache_clean[list]= 0;
115329f1 368
53b19144 369 if(USES_LIST(top_type, list)){
0da71265
MN
370 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
371 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
372 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
373 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
376 h->ref_cache[list][scan8[0] + 0 - 1*8]=
377 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
378 h->ref_cache[list][scan8[0] + 2 - 1*8]=
379 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
380 }else{
115329f1
DB
381 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
382 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
0da71265
MN
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
385 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
386 }
387
4672503d
LM
388 for(i=0; i<2; i++){
389 int cache_idx = scan8[0] - 1 + i*2*8;
390 if(USES_LIST(left_type[i], list)){
391 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
392 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
393 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
394 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
395 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
396 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
397 }else{
398 *(uint32_t*)h->mv_cache [list][cache_idx ]=
399 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
400 h->ref_cache[list][cache_idx ]=
401 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
402 }
0da71265
MN
403 }
404
0281d325 405 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
46f2f05f
MN
406 continue;
407
53b19144 408 if(USES_LIST(topleft_type, list)){
02f7695b
LM
409 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
410 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
e2e5894a
LM
411 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
412 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
413 }else{
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
416 }
115329f1 417
53b19144 418 if(USES_LIST(topright_type, list)){
e2e5894a
LM
419 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
420 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
421 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
422 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 }else{
424 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
426 }
e2e5894a 427
ae08a563 428 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
717b1733 429 continue;
115329f1
DB
430
431 h->ref_cache[list][scan8[5 ]+1] =
432 h->ref_cache[list][scan8[7 ]+1] =
3b66c4c5 433 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
115329f1 434 h->ref_cache[list][scan8[4 ]] =
0da71265
MN
435 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
436 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
437 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
3b66c4c5 438 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
0da71265
MN
439 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
440 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
9e528114
LA
441
442 if( h->pps.cabac ) {
443 /* XXX beurk, Load mvd */
53b19144 444 if(USES_LIST(top_type, list)){
9e528114
LA
445 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
446 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
447 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
450 }else{
115329f1
DB
451 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
452 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
9e528114
LA
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
455 }
53b19144 456 if(USES_LIST(left_type[0], list)){
9e528114
LA
457 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
458 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
459 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
460 }else{
461 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
463 }
53b19144 464 if(USES_LIST(left_type[1], list)){
9e528114
LA
465 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
466 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
468 }else{
469 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
471 }
472 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
473 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
3b66c4c5 474 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
9e528114
LA
475 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
476 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
5ad984c9 477
9f5c1037 478 if(h->slice_type_nos == FF_B_TYPE){
5ad984c9
LM
479 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
480
481 if(IS_DIRECT(top_type)){
482 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
483 }else if(IS_8X8(top_type)){
484 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
485 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
486 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
487 }else{
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
489 }
115329f1 490
5d18eaad
LM
491 if(IS_DIRECT(left_type[0]))
492 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
493 else if(IS_8X8(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
495 else
496 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
497
498 if(IS_DIRECT(left_type[1]))
5ad984c9 499 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
5d18eaad
LM
500 else if(IS_8X8(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
502 else
5ad984c9 503 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
5d18eaad
LM
504 }
505 }
506
507 if(FRAME_MBAFF){
508#define MAP_MVS\
509 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
510 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
511 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
512 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
515 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
516 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
517 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
518 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
519 if(MB_FIELD){
520#define MAP_F2F(idx, mb_type)\
521 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
522 h->ref_cache[list][idx] <<= 1;\
523 h->mv_cache[list][idx][1] /= 2;\
524 h->mvd_cache[list][idx][1] /= 2;\
525 }
526 MAP_MVS
527#undef MAP_F2F
528 }else{
529#define MAP_F2F(idx, mb_type)\
530 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] >>= 1;\
532 h->mv_cache[list][idx][1] <<= 1;\
533 h->mvd_cache[list][idx][1] <<= 1;\
5ad984c9 534 }
5d18eaad
LM
535 MAP_MVS
536#undef MAP_F2F
5ad984c9 537 }
9e528114 538 }
0da71265 539 }
0da71265
MN
540 }
541#endif
43efd19a
LM
542
543 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
0da71265
MN
544}
545
546static inline void write_back_intra_pred_mode(H264Context *h){
64514ee8 547 const int mb_xy= h->mb_xy;
0da71265
MN
548
549 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
550 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
551 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
552 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
553 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
554 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
555 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
556}
557
558/**
559 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
560 */
561static inline int check_intra4x4_pred_mode(H264Context *h){
562 MpegEncContext * const s = &h->s;
563 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
564 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
565 int i;
115329f1 566
0da71265
MN
567 if(!(h->top_samples_available&0x8000)){
568 for(i=0; i<4; i++){
569 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
570 if(status<0){
9b879566 571 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
572 return -1;
573 } else if(status){
574 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
575 }
576 }
577 }
115329f1 578
d1d10e91
MN
579 if((h->left_samples_available&0x8888)!=0x8888){
580 static const int mask[4]={0x8000,0x2000,0x80,0x20};
0da71265 581 for(i=0; i<4; i++){
d1d10e91 582 if(!(h->left_samples_available&mask[i])){
26695973
MN
583 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
584 if(status<0){
585 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
586 return -1;
587 } else if(status){
588 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
589 }
d1d10e91 590 }
0da71265
MN
591 }
592 }
593
594 return 0;
595} //FIXME cleanup like next
596
597/**
598 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
599 */
600static inline int check_intra_pred_mode(H264Context *h, int mode){
601 MpegEncContext * const s = &h->s;
602 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
603 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 604
43ff0714 605 if(mode > 6U) {
5175b937 606 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 607 return -1;
5175b937 608 }
115329f1 609
0da71265
MN
610 if(!(h->top_samples_available&0x8000)){
611 mode= top[ mode ];
612 if(mode<0){
9b879566 613 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
614 return -1;
615 }
616 }
115329f1 617
d1d10e91 618 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 619 mode= left[ mode ];
d1d10e91
MN
620 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
621 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
622 }
0da71265 623 if(mode<0){
9b879566 624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 625 return -1;
115329f1 626 }
0da71265
MN
627 }
628
629 return mode;
630}
631
632/**
633 * gets the predicted intra4x4 prediction mode.
634 */
635static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
640
a9c9a240 641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
0da71265
MN
642
643 if(min<0) return DC_PRED;
644 else return min;
645}
646
647static inline void write_back_non_zero_count(H264Context *h){
64514ee8 648 const int mb_xy= h->mb_xy;
0da71265 649
6867a90b
LLL
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
53c05b1e 653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
6867a90b
LLL
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
115329f1 657
6867a90b 658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
53c05b1e 659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
6867a90b 660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
53c05b1e 661
6867a90b 662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
53c05b1e 663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
6867a90b 664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
0da71265
MN
665}
666
667/**
1412060e 668 * gets the predicted number of non-zero coefficients.
0da71265
MN
669 * @param n block index
670 */
671static inline int pred_non_zero_count(H264Context *h, int n){
672 const int index8= scan8[n];
673 const int left= h->non_zero_count_cache[index8 - 1];
674 const int top = h->non_zero_count_cache[index8 - 8];
675 int i= left + top;
115329f1 676
0da71265
MN
677 if(i<64) i= (i+1)>>1;
678
a9c9a240 679 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
0da71265
MN
680
681 return i&31;
682}
683
1924f3ce
MN
684static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
685 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
a9c9a240 686 MpegEncContext *s = &h->s;
1924f3ce 687
5d18eaad
LM
688 /* there is no consistent mapping of mvs to neighboring locations that will
689 * make mbaff happy, so we can't move all this logic to fill_caches */
690 if(FRAME_MBAFF){
191e8ca7 691 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
5d18eaad
LM
692 const int16_t *mv;
693 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
694 *C = h->mv_cache[list][scan8[0]-2];
695
696 if(!MB_FIELD
697 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
698 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
699 if(IS_INTERLACED(mb_types[topright_xy])){
700#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
701 const int x4 = X4, y4 = Y4;\
702 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
02f7695b 703 if(!USES_LIST(mb_type,list))\
5d18eaad
LM
704 return LIST_NOT_USED;\
705 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
706 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
707 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
708 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
709
710 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
711 }
712 }
713 if(topright_ref == PART_NOT_AVAILABLE
714 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
715 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
716 if(!MB_FIELD
717 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
718 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
719 }
720 if(MB_FIELD
721 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
722 && i >= scan8[0]+8){
1412060e 723 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
02f7695b 724 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
5d18eaad
LM
725 }
726 }
727#undef SET_DIAG_MV
728 }
729
1924f3ce
MN
730 if(topright_ref != PART_NOT_AVAILABLE){
731 *C= h->mv_cache[list][ i - 8 + part_width ];
732 return topright_ref;
733 }else{
a9c9a240 734 tprintf(s->avctx, "topright MV not available\n");
95c26348 735
1924f3ce
MN
736 *C= h->mv_cache[list][ i - 8 - 1 ];
737 return h->ref_cache[list][ i - 8 - 1 ];
738 }
739}
740
0da71265
MN
741/**
742 * gets the predicted MV.
743 * @param n the block index
744 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
745 * @param mx the x component of the predicted motion vector
746 * @param my the y component of the predicted motion vector
747 */
748static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
0da71265
MN
749 const int index8= scan8[n];
750 const int top_ref= h->ref_cache[list][ index8 - 8 ];
0da71265
MN
751 const int left_ref= h->ref_cache[list][ index8 - 1 ];
752 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
753 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1924f3ce
MN
754 const int16_t * C;
755 int diagonal_ref, match_count;
756
0da71265 757 assert(part_width==1 || part_width==2 || part_width==4);
1924f3ce 758
0da71265 759/* mv_cache
115329f1 760 B . . A T T T T
0da71265
MN
761 U . . L . . , .
762 U . . L . . . .
763 U . . L . . , .
764 . . . L . . . .
765*/
1924f3ce
MN
766
767 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
768 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
a9c9a240 769 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1924f3ce
MN
770 if(match_count > 1){ //most common
771 *mx= mid_pred(A[0], B[0], C[0]);
772 *my= mid_pred(A[1], B[1], C[1]);
773 }else if(match_count==1){
774 if(left_ref==ref){
775 *mx= A[0];
115329f1 776 *my= A[1];
1924f3ce
MN
777 }else if(top_ref==ref){
778 *mx= B[0];
115329f1 779 *my= B[1];
0da71265 780 }else{
1924f3ce 781 *mx= C[0];
115329f1 782 *my= C[1];
0da71265
MN
783 }
784 }else{
1924f3ce 785 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
0da71265 786 *mx= A[0];
115329f1 787 *my= A[1];
0da71265 788 }else{
1924f3ce
MN
789 *mx= mid_pred(A[0], B[0], C[0]);
790 *my= mid_pred(A[1], B[1], C[1]);
0da71265 791 }
0da71265 792 }
115329f1 793
a9c9a240 794 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
795}
796
797/**
798 * gets the directionally predicted 16x8 MV.
799 * @param n the block index
800 * @param mx the x component of the predicted motion vector
801 * @param my the y component of the predicted motion vector
802 */
803static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
804 if(n==0){
805 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
806 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
807
a9c9a240 808 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
115329f1 809
0da71265
MN
810 if(top_ref == ref){
811 *mx= B[0];
812 *my= B[1];
813 return;
814 }
815 }else{
816 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
817 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
115329f1 818
a9c9a240 819 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
820
821 if(left_ref == ref){
822 *mx= A[0];
823 *my= A[1];
824 return;
825 }
826 }
827
828 //RARE
829 pred_motion(h, n, 4, list, ref, mx, my);
830}
831
832/**
833 * gets the directionally predicted 8x16 MV.
834 * @param n the block index
835 * @param mx the x component of the predicted motion vector
836 * @param my the y component of the predicted motion vector
837 */
838static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
839 if(n==0){
840 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
841 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
115329f1 842
a9c9a240 843 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
844
845 if(left_ref == ref){
846 *mx= A[0];
847 *my= A[1];
848 return;
849 }
850 }else{
1924f3ce
MN
851 const int16_t * C;
852 int diagonal_ref;
853
854 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
115329f1 855
a9c9a240 856 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265 857
115329f1 858 if(diagonal_ref == ref){
0da71265
MN
859 *mx= C[0];
860 *my= C[1];
861 return;
862 }
0da71265
MN
863 }
864
865 //RARE
866 pred_motion(h, n, 2, list, ref, mx, my);
867}
868
869static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
0da71265
MN
870 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
871 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
872
a9c9a240 873 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
0da71265
MN
874
875 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
876 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
877 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
115329f1 878
0da71265
MN
879 *mx = *my = 0;
880 return;
881 }
115329f1 882
0da71265
MN
883 pred_motion(h, 0, 4, 0, 0, mx, my);
884
885 return;
886}
887
8b1fd554
MN
888static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
889 int poc0 = h->ref_list[0][i].poc;
890 int td = av_clip(poc1 - poc0, -128, 127);
891 if(td == 0 || h->ref_list[0][i].long_ref){
892 return 256;
893 }else{
894 int tb = av_clip(poc - poc0, -128, 127);
895 int tx = (16384 + (FFABS(td) >> 1)) / td;
896 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
897 }
898}
899
5ad984c9 900static inline void direct_dist_scale_factor(H264Context * const h){
2879c75f
MN
901 MpegEncContext * const s = &h->s;
902 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
5ad984c9 903 const int poc1 = h->ref_list[1][0].poc;
8b1fd554
MN
904 int i, field;
905 for(field=0; field<2; field++){
906 const int poc = h->s.current_picture_ptr->field_poc[field];
907 const int poc1 = h->ref_list[1][0].field_poc[field];
908 for(i=0; i < 2*h->ref_count[0]; i++)
909 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
5ad984c9 910 }
8b1fd554
MN
911
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
5d18eaad 914 }
5ad984c9 915}
f4d3382d
MN
916
917static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
918 MpegEncContext * const s = &h->s;
919 Picture * const ref1 = &h->ref_list[1][0];
920 int j, old_ref, rfield;
921 int start= mbafi ? 16 : 0;
922 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
923 int interl= mbafi || s->picture_structure != PICT_FRAME;
924
925 /* bogus; fills in for missing frames */
926 memset(map[list], 0, sizeof(map[list]));
927
928 for(rfield=0; rfield<2; rfield++){
929 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
930 int poc = ref1->ref_poc[colfield][list][old_ref];
931
932 if (!interl)
933 poc |= 3;
934 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
935 poc= (poc&~3) + rfield + 1;
936
937 for(j=start; j<end; j++){
938 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
939 int cur_ref= mbafi ? (j-16)^field : j;
940 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
941 if(rfield == field)
942 map[list][old_ref] = cur_ref;
943 break;
944 }
945 }
946 }
947 }
948}
949
2f944356
LM
950static inline void direct_ref_list_init(H264Context * const h){
951 MpegEncContext * const s = &h->s;
952 Picture * const ref1 = &h->ref_list[1][0];
953 Picture * const cur = s->current_picture_ptr;
bbc78fb4 954 int list, j, field;
f4d3382d
MN
955 int sidx= (s->picture_structure&1)^1;
956 int ref1sidx= (ref1->reference&1)^1;
aa617518 957
2f944356 958 for(list=0; list<2; list++){
2879c75f 959 cur->ref_count[sidx][list] = h->ref_count[list];
2f944356 960 for(j=0; j<h->ref_count[list]; j++)
42de393d 961 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
2f944356 962 }
aa617518 963
7762cc3d 964 if(s->picture_structure == PICT_FRAME){
f4d3382d
MN
965 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
966 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
7762cc3d 967 }
aa617518 968
48e025e5 969 cur->mbaff= FRAME_MBAFF;
aa617518 970
9701840b 971 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
2f944356 972 return;
aa617518 973
2f944356 974 for(list=0; list<2; list++){
f4d3382d
MN
975 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
976 for(field=0; field<2; field++)
977 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
2f944356
LM
978 }
979}
5ad984c9
LM
980
981static inline void pred_direct_motion(H264Context * const h, int *mb_type){
982 MpegEncContext * const s = &h->s;
d00eac6c
MN
983 int b8_stride = h->b8_stride;
984 int b4_stride = h->b_stride;
985 int mb_xy = h->mb_xy;
986 int mb_type_col[2];
987 const int16_t (*l1mv0)[2], (*l1mv1)[2];
988 const int8_t *l1ref0, *l1ref1;
5ad984c9 989 const int is_b8x8 = IS_8X8(*mb_type);
88e7a4d1 990 unsigned int sub_mb_type;
5ad984c9
LM
991 int i8, i4;
992
5d18eaad 993#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
d00eac6c
MN
994
995 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
53c193a9 996 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
471341a7
MN
997 int cur_poc = s->current_picture_ptr->poc;
998 int *col_poc = h->ref_list[1]->field_poc;
999 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1000 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1001 b8_stride = 0;
60c9b24d 1002 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
d00eac6c
MN
1003 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1004 mb_xy += s->mb_stride*fieldoff;
1005 }
1006 goto single_col;
1007 }else{ // AFL/AFR/FR/FL -> AFR/FR
1008 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1009 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1010 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1011 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1012 b8_stride *= 3;
1013 b4_stride *= 6;
1014 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1015 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1016 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1017 && !is_b8x8){
1018 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1019 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1020 }else{
1021 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1022 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1023 }
1024 }else{ // AFR/FR -> AFR/FR
1025single_col:
1026 mb_type_col[0] =
1027 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
cc615d2c
MN
1028 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1029 /* FIXME save sub mb types from previous frames (or derive from MVs)
1030 * so we know exactly what block size to use */
1031 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1032 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1034 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1035 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1036 }else{
1037 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1038 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1039 }
d00eac6c 1040 }
5ad984c9 1041 }
5ad984c9 1042
7d54ecc9
MN
1043 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1044 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1045 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1046 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
9b5fab91
MN
1047 if(!b8_stride){
1048 if(s->mb_y&1){
1049 l1ref0 += h->b8_stride;
1050 l1ref1 += h->b8_stride;
1051 l1mv0 += 2*b4_stride;
1052 l1mv1 += 2*b4_stride;
1053 }
d00eac6c 1054 }
115329f1 1055
5ad984c9
LM
1056 if(h->direct_spatial_mv_pred){
1057 int ref[2];
1058 int mv[2][2];
1059 int list;
1060
5d18eaad
LM
1061 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1062
5ad984c9
LM
1063 /* ref = min(neighbors) */
1064 for(list=0; list<2; list++){
1065 int refa = h->ref_cache[list][scan8[0] - 1];
1066 int refb = h->ref_cache[list][scan8[0] - 8];
1067 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
9bec77fe 1068 if(refc == PART_NOT_AVAILABLE)
5ad984c9 1069 refc = h->ref_cache[list][scan8[0] - 8 - 1];
29d05ebc 1070 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
5ad984c9
LM
1071 if(ref[list] < 0)
1072 ref[list] = -1;
1073 }
1074
1075 if(ref[0] < 0 && ref[1] < 0){
1076 ref[0] = ref[1] = 0;
1077 mv[0][0] = mv[0][1] =
1078 mv[1][0] = mv[1][1] = 0;
1079 }else{
1080 for(list=0; list<2; list++){
1081 if(ref[list] >= 0)
1082 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1083 else
1084 mv[list][0] = mv[list][1] = 0;
1085 }
1086 }
1087
1088 if(ref[1] < 0){
50b3ab0f
LM
1089 if(!is_b8x8)
1090 *mb_type &= ~MB_TYPE_L1;
1091 sub_mb_type &= ~MB_TYPE_L1;
5ad984c9 1092 }else if(ref[0] < 0){
50b3ab0f
LM
1093 if(!is_b8x8)
1094 *mb_type &= ~MB_TYPE_L0;
1095 sub_mb_type &= ~MB_TYPE_L0;
5ad984c9
LM
1096 }
1097
d00eac6c 1098 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
50b3ab0f
LM
1099 for(i8=0; i8<4; i8++){
1100 int x8 = i8&1;
1101 int y8 = i8>>1;
1102 int xy8 = x8+y8*b8_stride;
1103 int xy4 = 3*x8+y8*b4_stride;
1104 int a=0, b=0;
1105
1106 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1107 continue;
1108 h->sub_mb_type[i8] = sub_mb_type;
1109
1110 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1111 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
d00eac6c 1112 if(!IS_INTRA(mb_type_col[y8])
50b3ab0f
LM
1113 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1114 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1115 if(ref[0] > 0)
1116 a= pack16to32(mv[0][0],mv[0][1]);
1117 if(ref[1] > 0)
1118 b= pack16to32(mv[1][0],mv[1][1]);
1119 }else{
1120 a= pack16to32(mv[0][0],mv[0][1]);
1121 b= pack16to32(mv[1][0],mv[1][1]);
1122 }
1123 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1124 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1125 }
1126 }else if(IS_16X16(*mb_type)){
d19f5acb
MN
1127 int a=0, b=0;
1128
cec93959
LM
1129 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1130 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
d00eac6c 1131 if(!IS_INTRA(mb_type_col[0])
c26abfa5
DB
1132 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1133 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
bf4e3bd2 1134 && (h->x264_build>33 || !h->x264_build)))){
5ad984c9 1135 if(ref[0] > 0)
d19f5acb 1136 a= pack16to32(mv[0][0],mv[0][1]);
5ad984c9 1137 if(ref[1] > 0)
d19f5acb 1138 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1139 }else{
d19f5acb
MN
1140 a= pack16to32(mv[0][0],mv[0][1]);
1141 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1142 }
d19f5acb
MN
1143 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1144 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
5ad984c9
LM
1145 }else{
1146 for(i8=0; i8<4; i8++){
1147 const int x8 = i8&1;
1148 const int y8 = i8>>1;
115329f1 1149
5ad984c9
LM
1150 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1151 continue;
1152 h->sub_mb_type[i8] = sub_mb_type;
115329f1 1153
5ad984c9
LM
1154 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1155 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
cec93959
LM
1156 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1157 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
115329f1 1158
5ad984c9 1159 /* col_zero_flag */
2ccd25d0
MN
1160 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1161 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
bf4e3bd2 1162 && (h->x264_build>33 || !h->x264_build)))){
2ccd25d0 1163 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
f1f17e54 1164 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1165 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
c26abfa5 1166 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
f1f17e54
LM
1167 if(ref[0] == 0)
1168 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1169 if(ref[1] == 0)
1170 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1171 }
1172 }else
5ad984c9 1173 for(i4=0; i4<4; i4++){
2ccd25d0 1174 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
c26abfa5 1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
5ad984c9
LM
1176 if(ref[0] == 0)
1177 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1178 if(ref[1] == 0)
1179 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1180 }
1181 }
1182 }
1183 }
1184 }
1185 }else{ /* direct temporal mv pred */
5d18eaad
LM
1186 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1187 const int *dist_scale_factor = h->dist_scale_factor;
f4d3382d 1188 int ref_offset= 0;
5d18eaad 1189
cc615d2c 1190 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
8b1fd554
MN
1191 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1192 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1193 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
cc615d2c 1194 }
48e025e5 1195 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
f4d3382d 1196 ref_offset += 16;
48e025e5 1197
cc615d2c
MN
1198 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1199 /* FIXME assumes direct_8x8_inference == 1 */
c210fa61 1200 int y_shift = 2*!IS_INTERLACED(*mb_type);
5d18eaad 1201
cc615d2c
MN
1202 for(i8=0; i8<4; i8++){
1203 const int x8 = i8&1;
1204 const int y8 = i8>>1;
1205 int ref0, scale;
1206 const int16_t (*l1mv)[2]= l1mv0;
5d18eaad 1207
cc615d2c
MN
1208 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1209 continue;
1210 h->sub_mb_type[i8] = sub_mb_type;
1211
1212 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1213 if(IS_INTRA(mb_type_col[y8])){
1214 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1215 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1216 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1217 continue;
1218 }
1219
1220 ref0 = l1ref0[x8 + y8*b8_stride];
1221 if(ref0 >= 0)
f4d3382d 1222 ref0 = map_col_to_list0[0][ref0 + ref_offset];
cc615d2c 1223 else{
f4d3382d 1224 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
cc615d2c
MN
1225 l1mv= l1mv1;
1226 }
1227 scale = dist_scale_factor[ref0];
1228 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1229
1230 {
1231 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1232 int my_col = (mv_col[1]<<y_shift)/2;
1233 int mx = (scale * mv_col[0] + 128) >> 8;
1234 int my = (scale * my_col + 128) >> 8;
1235 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1236 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
5d18eaad 1237 }
5d18eaad 1238 }
cc615d2c
MN
1239 return;
1240 }
5d18eaad
LM
1241
1242 /* one-to-one mv scaling */
1243
5ad984c9 1244 if(IS_16X16(*mb_type)){
fda51641
MN
1245 int ref, mv0, mv1;
1246
5ad984c9 1247 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
d00eac6c 1248 if(IS_INTRA(mb_type_col[0])){
fda51641 1249 ref=mv0=mv1=0;
5ad984c9 1250 }else{
f4d3382d
MN
1251 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1252 : map_col_to_list0[1][l1ref1[0] + ref_offset];
5d18eaad 1253 const int scale = dist_scale_factor[ref0];
8583bef8 1254 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
5ad984c9 1255 int mv_l0[2];
5d18eaad
LM
1256 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1257 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
fda51641
MN
1258 ref= ref0;
1259 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1260 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
5ad984c9 1261 }
fda51641
MN
1262 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1263 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1264 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
5ad984c9
LM
1265 }else{
1266 for(i8=0; i8<4; i8++){
1267 const int x8 = i8&1;
1268 const int y8 = i8>>1;
5d18eaad 1269 int ref0, scale;
bf4e3bd2 1270 const int16_t (*l1mv)[2]= l1mv0;
8583bef8 1271
5ad984c9
LM
1272 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1273 continue;
1274 h->sub_mb_type[i8] = sub_mb_type;
5d18eaad 1275 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
d00eac6c 1276 if(IS_INTRA(mb_type_col[0])){
5ad984c9 1277 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1278 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1279 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1280 continue;
1281 }
115329f1 1282
f4d3382d 1283 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
2f944356 1284 if(ref0 >= 0)
5d18eaad 1285 ref0 = map_col_to_list0[0][ref0];
8583bef8 1286 else{
f4d3382d 1287 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
8583bef8
MN
1288 l1mv= l1mv1;
1289 }
5d18eaad 1290 scale = dist_scale_factor[ref0];
115329f1 1291
5ad984c9 1292 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
f1f17e54 1293 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1294 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
5d18eaad
LM
1295 int mx = (scale * mv_col[0] + 128) >> 8;
1296 int my = (scale * mv_col[1] + 128) >> 8;
f1f17e54
LM
1297 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1298 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1299 }else
5ad984c9 1300 for(i4=0; i4<4; i4++){
2ccd25d0 1301 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
5ad984c9 1302 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
5d18eaad
LM
1303 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1304 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
5ad984c9
LM
1305 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1306 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1307 }
1308 }
1309 }
1310 }
1311}
1312
0da71265
MN
1313static inline void write_back_motion(H264Context *h, int mb_type){
1314 MpegEncContext * const s = &h->s;
0da71265
MN
1315 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1316 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1317 int list;
1318
2ea39252
LM
1319 if(!USES_LIST(mb_type, 0))
1320 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1321
3425501d 1322 for(list=0; list<h->list_count; list++){
0da71265 1323 int y;
53b19144 1324 if(!USES_LIST(mb_type, list))
5ad984c9 1325 continue;
115329f1 1326
0da71265
MN
1327 for(y=0; y<4; y++){
1328 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1329 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1330 }
9e528114 1331 if( h->pps.cabac ) {
e6e77eb6
LM
1332 if(IS_SKIP(mb_type))
1333 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1334 else
9e528114
LA
1335 for(y=0; y<4; y++){
1336 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1337 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1338 }
1339 }
53b19144
LM
1340
1341 {
191e8ca7 1342 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
53b19144
LM
1343 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1344 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1345 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1346 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
0da71265
MN
1347 }
1348 }
115329f1 1349
9f5c1037 1350 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
5ad984c9 1351 if(IS_8X8(mb_type)){
53b19144
LM
1352 uint8_t *direct_table = &h->direct_table[b8_xy];
1353 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1354 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1355 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
5ad984c9
LM
1356 }
1357 }
0da71265
MN
1358}
1359
1360/**
1361 * Decodes a network abstraction layer unit.
1362 * @param consumed is the number of bytes used as input
1363 * @param length is the length of the array
3b66c4c5 1364 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
115329f1 1365 * @returns decoded bytes, might be src+1 if no escapes
0da71265 1366 */
30317501 1367static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
1368 int i, si, di;
1369 uint8_t *dst;
24456882 1370 int bufidx;
0da71265 1371
bb270c08 1372// src[0]&0x80; //forbidden bit
0da71265
MN
1373 h->nal_ref_idc= src[0]>>5;
1374 h->nal_unit_type= src[0]&0x1F;
1375
1376 src++; length--;
115329f1 1377#if 0
0da71265
MN
1378 for(i=0; i<length; i++)
1379 printf("%2X ", src[i]);
1380#endif
e08715d3
MN
1381
1382#ifdef HAVE_FAST_UNALIGNED
1383# ifdef HAVE_FAST_64BIT
1384# define RS 7
1385 for(i=0; i+1<length; i+=9){
1386 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1387# else
1388# define RS 3
1389 for(i=0; i+1<length; i+=5){
1390 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1391# endif
1392 continue;
1393 if(i>0 && !src[i]) i--;
1394 while(src[i]) i++;
1395#else
1396# define RS 0
0da71265
MN
1397 for(i=0; i+1<length; i+=2){
1398 if(src[i]) continue;
1399 if(i>0 && src[i-1]==0) i--;
e08715d3 1400#endif
0da71265
MN
1401 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1402 if(src[i+2]!=3){
1403 /* startcode, so we must be past the end */
1404 length=i;
1405 }
1406 break;
1407 }
abb27cfb 1408 i-= RS;
0da71265
MN
1409 }
1410
1411 if(i>=length-1){ //no escaped 0
1412 *dst_length= length;
1413 *consumed= length+1; //+1 for the header
115329f1 1414 return src;
0da71265
MN
1415 }
1416
24456882 1417 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
d4369630 1418 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 1419 dst= h->rbsp_buffer[bufidx];
0da71265 1420
ac658be5
FOL
1421 if (dst == NULL){
1422 return NULL;
1423 }
1424
3b66c4c5 1425//printf("decoding esc\n");
593af7cd
MN
1426 memcpy(dst, src, i);
1427 si=di=i;
1428 while(si+2<length){
0da71265 1429 //remove escapes (very rare 1:2^22)
593af7cd
MN
1430 if(src[si+2]>3){
1431 dst[di++]= src[si++];
1432 dst[di++]= src[si++];
1433 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
1434 if(src[si+2]==3){ //escape
1435 dst[di++]= 0;
1436 dst[di++]= 0;
1437 si+=3;
c8470cc1 1438 continue;
0da71265 1439 }else //next start code
593af7cd 1440 goto nsc;
0da71265
MN
1441 }
1442
1443 dst[di++]= src[si++];
1444 }
593af7cd
MN
1445 while(si<length)
1446 dst[di++]= src[si++];
1447nsc:
0da71265 1448
d4369630
AS
1449 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1450
0da71265
MN
1451 *dst_length= di;
1452 *consumed= si + 1;//+1 for the header
90b5b51e 1453//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
1454 return dst;
1455}
1456
0da71265
MN
1457/**
1458 * identifies the exact end of the bitstream
1459 * @return the length of the trailing, or 0 if damaged
1460 */
30317501 1461static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
1462 int v= *src;
1463 int r;
1464
a9c9a240 1465 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
1466
1467 for(r=1; r<9; r++){
1468 if(v&1) return r;
1469 v>>=1;
1470 }
1471 return 0;
1472}
1473
1474/**
1412060e 1475 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
1476 * @param qp quantization parameter
1477 */
239ea04c 1478static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1479#define stride 16
1480 int i;
1481 int temp[16]; //FIXME check if this is a good idea
1482 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1483 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1484
1485//memset(block, 64, 2*256);
1486//return;
1487 for(i=0; i<4; i++){
1488 const int offset= y_offset[i];
1489 const int z0= block[offset+stride*0] + block[offset+stride*4];
1490 const int z1= block[offset+stride*0] - block[offset+stride*4];
1491 const int z2= block[offset+stride*1] - block[offset+stride*5];
1492 const int z3= block[offset+stride*1] + block[offset+stride*5];
1493
1494 temp[4*i+0]= z0+z3;
1495 temp[4*i+1]= z1+z2;
1496 temp[4*i+2]= z1-z2;
1497 temp[4*i+3]= z0-z3;
1498 }
1499
1500 for(i=0; i<4; i++){
1501 const int offset= x_offset[i];
1502 const int z0= temp[4*0+i] + temp[4*2+i];
1503 const int z1= temp[4*0+i] - temp[4*2+i];
1504 const int z2= temp[4*1+i] - temp[4*3+i];
1505 const int z3= temp[4*1+i] + temp[4*3+i];
1506
1412060e 1507 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
1508 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1509 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1510 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
1511 }
1512}
1513
e5017ab8 1514#if 0
0da71265 1515/**
1412060e 1516 * DCT transforms the 16 dc values.
0da71265
MN
1517 * @param qp quantization parameter ??? FIXME
1518 */
1519static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1520// const int qmul= dequant_coeff[qp][0];
1521 int i;
1522 int temp[16]; //FIXME check if this is a good idea
1523 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1524 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1525
1526 for(i=0; i<4; i++){
1527 const int offset= y_offset[i];
1528 const int z0= block[offset+stride*0] + block[offset+stride*4];
1529 const int z1= block[offset+stride*0] - block[offset+stride*4];
1530 const int z2= block[offset+stride*1] - block[offset+stride*5];
1531 const int z3= block[offset+stride*1] + block[offset+stride*5];
1532
1533 temp[4*i+0]= z0+z3;
1534 temp[4*i+1]= z1+z2;
1535 temp[4*i+2]= z1-z2;
1536 temp[4*i+3]= z0-z3;
1537 }
1538
1539 for(i=0; i<4; i++){
1540 const int offset= x_offset[i];
1541 const int z0= temp[4*0+i] + temp[4*2+i];
1542 const int z1= temp[4*0+i] - temp[4*2+i];
1543 const int z2= temp[4*1+i] - temp[4*3+i];
1544 const int z3= temp[4*1+i] + temp[4*3+i];
1545
1546 block[stride*0 +offset]= (z0 + z3)>>1;
1547 block[stride*2 +offset]= (z1 + z2)>>1;
1548 block[stride*8 +offset]= (z1 - z2)>>1;
1549 block[stride*10+offset]= (z0 - z3)>>1;
1550 }
1551}
e5017ab8
LA
1552#endif
1553
0da71265
MN
1554#undef xStride
1555#undef stride
1556
239ea04c 1557static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1558 const int stride= 16*2;
1559 const int xStride= 16;
1560 int a,b,c,d,e;
1561
1562 a= block[stride*0 + xStride*0];
1563 b= block[stride*0 + xStride*1];
1564 c= block[stride*1 + xStride*0];
1565 d= block[stride*1 + xStride*1];
1566
1567 e= a-b;
1568 a= a+b;
1569 b= c-d;
1570 c= c+d;
1571
239ea04c
LM
1572 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1573 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1574 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1575 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
1576}
1577
e5017ab8 1578#if 0
0da71265
MN
1579static void chroma_dc_dct_c(DCTELEM *block){
1580 const int stride= 16*2;
1581 const int xStride= 16;
1582 int a,b,c,d,e;
1583
1584 a= block[stride*0 + xStride*0];
1585 b= block[stride*0 + xStride*1];
1586 c= block[stride*1 + xStride*0];
1587 d= block[stride*1 + xStride*1];
1588
1589 e= a-b;
1590 a= a+b;
1591 b= c-d;
1592 c= c+d;
1593
1594 block[stride*0 + xStride*0]= (a+c);
1595 block[stride*0 + xStride*1]= (e+b);
1596 block[stride*1 + xStride*0]= (a-c);
1597 block[stride*1 + xStride*1]= (e-b);
1598}
e5017ab8 1599#endif
0da71265
MN
1600
1601/**
1602 * gets the chroma qp.
1603 */
4691a77d 1604static inline int get_chroma_qp(H264Context *h, int t, int qscale){
5a78bfbd 1605 return h->pps.chroma_qp_table[t][qscale];
0da71265
MN
1606}
1607
0da71265
MN
1608static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1609 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1610 int src_x_offset, int src_y_offset,
1611 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1612 MpegEncContext * const s = &h->s;
1613 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 1614 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 1615 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
1616 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1617 uint8_t * src_cb, * src_cr;
1618 int extra_width= h->emu_edge_width;
1619 int extra_height= h->emu_edge_height;
0da71265
MN
1620 int emu=0;
1621 const int full_mx= mx>>2;
1622 const int full_my= my>>2;
fbd312fd 1623 const int pic_width = 16*s->mb_width;
0d43dd8c 1624 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 1625
0da71265
MN
1626 if(mx&7) extra_width -= 3;
1627 if(my&7) extra_height -= 3;
115329f1
DB
1628
1629 if( full_mx < 0-extra_width
1630 || full_my < 0-extra_height
1631 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 1632 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
1633 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1634 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
1635 emu=1;
1636 }
115329f1 1637
5d18eaad 1638 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 1639 if(!square){
5d18eaad 1640 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 1641 }
115329f1 1642
87352549 1643 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 1644
0d43dd8c 1645 if(MB_FIELD){
5d18eaad 1646 // chroma offset when predicting from a field of opposite parity
2143b118 1647 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
1648 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1649 }
1650 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1651 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1652
0da71265 1653 if(emu){
5d18eaad 1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1655 src_cb= s->edge_emu_buffer;
1656 }
5d18eaad 1657 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1658
1659 if(emu){
5d18eaad 1660 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1661 src_cr= s->edge_emu_buffer;
1662 }
5d18eaad 1663 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1664}
1665
9f2d1b4f 1666static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
1667 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1668 int x_offset, int y_offset,
1669 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1670 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1671 int list0, int list1){
1672 MpegEncContext * const s = &h->s;
1673 qpel_mc_func *qpix_op= qpix_put;
1674 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 1675
5d18eaad
LM
1676 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1677 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1678 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 1679 x_offset += 8*s->mb_x;
0d43dd8c 1680 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1681
0da71265 1682 if(list0){
1924f3ce 1683 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
1684 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1685 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1686 qpix_op, chroma_op);
1687
1688 qpix_op= qpix_avg;
1689 chroma_op= chroma_avg;
1690 }
1691
1692 if(list1){
1924f3ce 1693 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
1694 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1695 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1696 qpix_op, chroma_op);
1697 }
1698}
1699
9f2d1b4f
LM
1700static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1701 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1702 int x_offset, int y_offset,
1703 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1704 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1705 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1706 int list0, int list1){
1707 MpegEncContext * const s = &h->s;
1708
5d18eaad
LM
1709 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1710 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1711 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 1712 x_offset += 8*s->mb_x;
0d43dd8c 1713 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1714
9f2d1b4f
LM
1715 if(list0 && list1){
1716 /* don't optimize for luma-only case, since B-frames usually
1717 * use implicit weights => chroma too. */
1718 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
1719 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1720 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
1721 int refn0 = h->ref_cache[0][ scan8[n] ];
1722 int refn1 = h->ref_cache[1][ scan8[n] ];
1723
1724 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1725 dest_y, dest_cb, dest_cr,
1726 x_offset, y_offset, qpix_put, chroma_put);
1727 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1728 tmp_y, tmp_cb, tmp_cr,
1729 x_offset, y_offset, qpix_put, chroma_put);
1730
1731 if(h->use_weight == 2){
1732 int weight0 = h->implicit_weight[refn0][refn1];
1733 int weight1 = 64 - weight0;
5d18eaad
LM
1734 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1736 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 1737 }else{
5d18eaad 1738 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 1739 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 1740 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 1741 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1742 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 1743 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 1744 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1745 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 1746 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
1747 }
1748 }else{
1749 int list = list1 ? 1 : 0;
1750 int refn = h->ref_cache[list][ scan8[n] ];
1751 Picture *ref= &h->ref_list[list][refn];
1752 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1753 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1754 qpix_put, chroma_put);
1755
5d18eaad 1756 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
1757 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1758 if(h->use_weight_chroma){
5d18eaad 1759 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 1760 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 1761 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
1762 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1763 }
1764 }
1765}
1766
1767static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1768 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1769 int x_offset, int y_offset,
1770 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1771 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 1772 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
1773 int list0, int list1){
1774 if((h->use_weight==2 && list0 && list1
1775 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1776 || h->use_weight==1)
1777 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1778 x_offset, y_offset, qpix_put, chroma_put,
1779 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1780 else
1781 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1782 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1783}
1784
513fbd8e
LM
1785static inline void prefetch_motion(H264Context *h, int list){
1786 /* fetch pixels for estimated mv 4 macroblocks ahead
1787 * optimized for 64byte cache lines */
1788 MpegEncContext * const s = &h->s;
1789 const int refn = h->ref_cache[list][scan8[0]];
1790 if(refn >= 0){
1791 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1792 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1793 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 1794 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
1795 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1796 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1797 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1798 }
1799}
1800
0da71265
MN
1801static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1802 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
1803 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1804 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 1805 MpegEncContext * const s = &h->s;
64514ee8 1806 const int mb_xy= h->mb_xy;
0da71265 1807 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 1808
0da71265 1809 assert(IS_INTER(mb_type));
115329f1 1810
513fbd8e
LM
1811 prefetch_motion(h, 0);
1812
0da71265
MN
1813 if(IS_16X16(mb_type)){
1814 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1815 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
9f2d1b4f 1816 &weight_op[0], &weight_avg[0],
0da71265
MN
1817 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1818 }else if(IS_16X8(mb_type)){
1819 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1820 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1821 &weight_op[1], &weight_avg[1],
0da71265
MN
1822 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1823 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1824 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1825 &weight_op[1], &weight_avg[1],
0da71265
MN
1826 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1827 }else if(IS_8X16(mb_type)){
5d18eaad 1828 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 1829 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1830 &weight_op[2], &weight_avg[2],
0da71265 1831 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 1832 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 1833 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1834 &weight_op[2], &weight_avg[2],
0da71265
MN
1835 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1836 }else{
1837 int i;
115329f1 1838
0da71265
MN
1839 assert(IS_8X8(mb_type));
1840
1841 for(i=0; i<4; i++){
1842 const int sub_mb_type= h->sub_mb_type[i];
1843 const int n= 4*i;
1844 int x_offset= (i&1)<<2;
1845 int y_offset= (i&2)<<1;
1846
1847 if(IS_SUB_8X8(sub_mb_type)){
1848 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1849 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1850 &weight_op[3], &weight_avg[3],
0da71265
MN
1851 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1852 }else if(IS_SUB_8X4(sub_mb_type)){
1853 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1854 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1855 &weight_op[4], &weight_avg[4],
0da71265
MN
1856 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1857 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1858 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1859 &weight_op[4], &weight_avg[4],
0da71265
MN
1860 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1861 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 1862 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 1863 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1864 &weight_op[5], &weight_avg[5],
0da71265 1865 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 1866 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 1867 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1868 &weight_op[5], &weight_avg[5],
0da71265
MN
1869 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1870 }else{
1871 int j;
1872 assert(IS_SUB_4X4(sub_mb_type));
1873 for(j=0; j<4; j++){
1874 int sub_x_offset= x_offset + 2*(j&1);
1875 int sub_y_offset= y_offset + (j&2);
1876 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1877 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1878 &weight_op[6], &weight_avg[6],
0da71265
MN
1879 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1880 }
1881 }
1882 }
1883 }
513fbd8e
LM
1884
1885 prefetch_motion(h, 1);
0da71265
MN
1886}
1887
8140955d
MN
1888static av_cold void init_cavlc_level_tab(void){
1889 int suffix_length, mask;
1890 unsigned int i;
1891
1892 for(suffix_length=0; suffix_length<7; suffix_length++){
1893 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1894 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1895 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1896
1897 mask= -(level_code&1);
1898 level_code= (((2+level_code)>>1) ^ mask) - mask;
1899 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1900 cavlc_level_tab[suffix_length][i][0]= level_code;
1901 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1902 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1903 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1904 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1905 }else{
1906 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1907 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1908 }
1909 }
1910 }
1911}
1912
98a6fff9 1913static av_cold void decode_init_vlc(void){
0da71265
MN
1914 static int done = 0;
1915
1916 if (!done) {
1917 int i;
910e3668 1918 int offset;
0da71265
MN
1919 done = 1;
1920
910e3668
AC
1921 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1922 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
115329f1 1923 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
0da71265 1924 &chroma_dc_coeff_token_len [0], 1, 1,
910e3668
AC
1925 &chroma_dc_coeff_token_bits[0], 1, 1,
1926 INIT_VLC_USE_NEW_STATIC);
0da71265 1927
910e3668 1928 offset = 0;
0da71265 1929 for(i=0; i<4; i++){
910e3668
AC
1930 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1931 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
115329f1 1932 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
0da71265 1933 &coeff_token_len [i][0], 1, 1,
910e3668
AC
1934 &coeff_token_bits[i][0], 1, 1,
1935 INIT_VLC_USE_NEW_STATIC);
1936 offset += coeff_token_vlc_tables_size[i];
0da71265 1937 }
910e3668
AC
1938 /*
1939 * This is a one time safety check to make sure that
1940 * the packed static coeff_token_vlc table sizes
1941 * were initialized correctly.
1942 */
37d3e066 1943 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
0da71265
MN
1944
1945 for(i=0; i<3; i++){
910e3668
AC
1946 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1947 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1948 init_vlc(&chroma_dc_total_zeros_vlc[i],
1949 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
0da71265 1950 &chroma_dc_total_zeros_len [i][0], 1, 1,
910e3668
AC
1951 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1952 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1953 }
1954 for(i=0; i<15; i++){
910e3668
AC
1955 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1956 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1957 init_vlc(&total_zeros_vlc[i],
1958 TOTAL_ZEROS_VLC_BITS, 16,
0da71265 1959 &total_zeros_len [i][0], 1, 1,
910e3668
AC
1960 &total_zeros_bits[i][0], 1, 1,
1961 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1962 }
1963
1964 for(i=0; i<6; i++){
910e3668
AC
1965 run_vlc[i].table = run_vlc_tables[i];
1966 run_vlc[i].table_allocated = run_vlc_tables_size;
1967 init_vlc(&run_vlc[i],
1968 RUN_VLC_BITS, 7,
0da71265 1969 &run_len [i][0], 1, 1,
910e3668
AC
1970 &run_bits[i][0], 1, 1,
1971 INIT_VLC_USE_NEW_STATIC);
0da71265 1972 }
910e3668
AC
1973 run7_vlc.table = run7_vlc_table,
1974 run7_vlc.table_allocated = run7_vlc_table_size;
115329f1 1975 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
0da71265 1976 &run_len [6][0], 1, 1,
910e3668
AC
1977 &run_bits[6][0], 1, 1,
1978 INIT_VLC_USE_NEW_STATIC);
8140955d
MN
1979
1980 init_cavlc_level_tab();
0da71265
MN
1981 }
1982}
1983
0da71265 1984static void free_tables(H264Context *h){
7978debd 1985 int i;
afebe2f7 1986 H264Context *hx;
0da71265 1987 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
1988 av_freep(&h->chroma_pred_mode_table);
1989 av_freep(&h->cbp_table);
9e528114
LA
1990 av_freep(&h->mvd_table[0]);
1991 av_freep(&h->mvd_table[1]);
5ad984c9 1992 av_freep(&h->direct_table);
0da71265
MN
1993 av_freep(&h->non_zero_count);
1994 av_freep(&h->slice_table_base);
1995 h->slice_table= NULL;
e5017ab8 1996
0da71265
MN
1997 av_freep(&h->mb2b_xy);
1998 av_freep(&h->mb2b8_xy);
9f2d1b4f 1999
afebe2f7
2000 for(i = 0; i < h->s.avctx->thread_count; i++) {
2001 hx = h->thread_context[i];
2002 if(!hx) continue;
2003 av_freep(&hx->top_borders[1]);
2004 av_freep(&hx->top_borders[0]);
2005 av_freep(&hx->s.obmc_scratchpad);
afebe2f7 2006 }
0da71265
MN
2007}
2008
239ea04c
LM
2009static void init_dequant8_coeff_table(H264Context *h){
2010 int i,q,x;
548a1c8a 2011 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
2012 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2013 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2014
2015 for(i=0; i<2; i++ ){
2016 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2017 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2018 break;
2019 }
2020
2021 for(q=0; q<52; q++){
d9ec210b
DP
2022 int shift = div6[q];
2023 int idx = rem6[q];
239ea04c 2024 for(x=0; x<64; x++)
548a1c8a
LM
2025 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2026 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2027 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
2028 }
2029 }
2030}
2031
2032static void init_dequant4_coeff_table(H264Context *h){
2033 int i,j,q,x;
ab2e3e2c 2034 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
2035 for(i=0; i<6; i++ ){
2036 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2037 for(j=0; j<i; j++){
2038 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2039 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2040 break;
2041 }
2042 }
2043 if(j<i)
2044 continue;
2045
2046 for(q=0; q<52; q++){
d9ec210b
DP
2047 int shift = div6[q] + 2;
2048 int idx = rem6[q];
239ea04c 2049 for(x=0; x<16; x++)
ab2e3e2c
LM
2050 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2051 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
2052 h->pps.scaling_matrix4[i][x]) << shift;
2053 }
2054 }
2055}
2056
2057static void init_dequant_tables(H264Context *h){
2058 int i,x;
2059 init_dequant4_coeff_table(h);
2060 if(h->pps.transform_8x8_mode)
2061 init_dequant8_coeff_table(h);
2062 if(h->sps.transform_bypass){
2063 for(i=0; i<6; i++)
2064 for(x=0; x<16; x++)
2065 h->dequant4_coeff[i][0][x] = 1<<6;
2066 if(h->pps.transform_8x8_mode)
2067 for(i=0; i<2; i++)
2068 for(x=0; x<64; x++)
2069 h->dequant8_coeff[i][0][x] = 1<<6;
2070 }
2071}
2072
2073
0da71265
MN
2074/**
2075 * allocates tables.
3b66c4c5 2076 * needs width/height
0da71265
MN
2077 */
2078static int alloc_tables(H264Context *h){
2079 MpegEncContext * const s = &h->s;
7bc9090a 2080 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 2081 int x,y;
0da71265
MN
2082
2083 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
e5017ab8 2084
53c05b1e 2085 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
b735aeea 2086 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
5d0e4cb8 2087 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
0da71265 2088
7526ade2
MN
2089 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2090 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2091 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2092 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
e5017ab8 2093
b735aeea 2094 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 2095 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 2096
a55f20bd
LM
2097 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2098 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
0da71265
MN
2099 for(y=0; y<s->mb_height; y++){
2100 for(x=0; x<s->mb_width; x++){
7bc9090a 2101 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
2102 const int b_xy = 4*x + 4*y*h->b_stride;
2103 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 2104
0da71265
MN
2105 h->mb2b_xy [mb_xy]= b_xy;
2106 h->mb2b8_xy[mb_xy]= b8_xy;
2107 }
2108 }
9f2d1b4f 2109
9c6221ae
GV
2110 s->obmc_scratchpad = NULL;
2111
56edbd81
LM
2112 if(!h->dequant4_coeff[0])
2113 init_dequant_tables(h);
2114
0da71265
MN
2115 return 0;
2116fail:
2117 free_tables(h);
2118 return -1;
2119}
2120
afebe2f7
2121/**
2122 * Mimic alloc_tables(), but for every context thread.
2123 */
2124static void clone_tables(H264Context *dst, H264Context *src){
2125 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2126 dst->non_zero_count = src->non_zero_count;
2127 dst->slice_table = src->slice_table;
2128 dst->cbp_table = src->cbp_table;
2129 dst->mb2b_xy = src->mb2b_xy;
2130 dst->mb2b8_xy = src->mb2b8_xy;
2131 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2132 dst->mvd_table[0] = src->mvd_table[0];
2133 dst->mvd_table[1] = src->mvd_table[1];
2134 dst->direct_table = src->direct_table;
2135
afebe2f7
2136 dst->s.obmc_scratchpad = NULL;
2137 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
2138}
2139
2140/**
2141 * Init context
2142 * Allocate buffers which are not shared amongst multiple threads.
2143 */
2144static int context_init(H264Context *h){
afebe2f7
2145 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2146 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2147
afebe2f7
2148 return 0;
2149fail:
2150 return -1; // free_tables will clean up for us
2151}
2152
98a6fff9 2153static av_cold void common_init(H264Context *h){
0da71265 2154 MpegEncContext * const s = &h->s;
0da71265
MN
2155
2156 s->width = s->avctx->width;
2157 s->height = s->avctx->height;
2158 s->codec_id= s->avctx->codec->id;
115329f1 2159
c92a30bb 2160 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 2161
239ea04c 2162 h->dequant_coeff_pps= -1;
9a41c2c7 2163 s->unrestricted_mv=1;
0da71265 2164 s->decode=1; //FIXME
56edbd81 2165
a5805aa9
MN
2166 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2167
56edbd81
LM
2168 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2169 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
2170}
2171
98a6fff9 2172static av_cold int decode_init(AVCodecContext *avctx){
0da71265
MN
2173 H264Context *h= avctx->priv_data;
2174 MpegEncContext * const s = &h->s;
2175
3edcacde 2176 MPV_decode_defaults(s);
115329f1 2177
0da71265
MN
2178 s->avctx = avctx;
2179 common_init(h);
2180
2181 s->out_format = FMT_H264;
2182 s->workaround_bugs= avctx->workaround_bugs;
2183
2184 // set defaults
0da71265 2185// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 2186 s->quarter_sample = 1;
0da71265 2187 s->low_delay= 1;
7a9dba3c
MN
2188
2189 if(avctx->codec_id == CODEC_ID_SVQ3)
2190 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2191 else
1d42f410 2192 avctx->pix_fmt= PIX_FMT_YUV420P;
0da71265 2193
c2212338 2194 decode_init_vlc();
115329f1 2195
26165f99
MR
2196 if(avctx->extradata_size > 0 && avctx->extradata &&
2197 *(char *)avctx->extradata == 1){
4770b1b4
RT
2198 h->is_avc = 1;
2199 h->got_avcC = 0;
26165f99
MR
2200 } else {
2201 h->is_avc = 0;
4770b1b4
RT
2202 }
2203
afebe2f7 2204 h->thread_context[0] = h;
18c7be65 2205 h->outputed_poc = INT_MIN;
e4b8f1fa 2206 h->prev_poc_msb= 1<<16;
0da71265
MN
2207 return 0;
2208}
2209
af8aa846 2210static int frame_start(H264Context *h){
0da71265
MN
2211 MpegEncContext * const s = &h->s;
2212 int i;
2213
af8aa846
MN
2214 if(MPV_frame_start(s, s->avctx) < 0)
2215 return -1;
0da71265 2216 ff_er_frame_start(s);
3a22d7fa
JD
2217 /*
2218 * MPV_frame_start uses pict_type to derive key_frame.
2219 * This is incorrect for H.264; IDR markings must be used.
1412060e 2220 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
2221 * See decode_nal_units().
2222 */
2223 s->current_picture_ptr->key_frame= 0;
0da71265
MN
2224
2225 assert(s->linesize && s->uvlinesize);
2226
2227 for(i=0; i<16; i++){
2228 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 2229 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2230 }
2231 for(i=0; i<4; i++){
2232 h->block_offset[16+i]=
2233 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
2234 h->block_offset[24+16+i]=
2235 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2236 }
2237
934b0821
LM
2238 /* can't be in alloc_tables because linesize isn't known there.
2239 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
2240 for(i = 0; i < s->avctx->thread_count; i++)
2241 if(!h->thread_context[i]->s.obmc_scratchpad)
2242 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
2243
2244 /* some macroblocks will be accessed before they're available */
afebe2f7 2245 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 2246 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 2247
0da71265 2248// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 2249
1412060e 2250 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
2251 // that if we break out due to an error it can be released automatically
2252 // in the next MPV_frame_start().
2253 // SVQ3 as well as most other codecs have only last/next/current and thus
2254 // get released even with set reference, besides SVQ3 and others do not
2255 // mark frames as reference later "naturally".
2256 if(s->codec_id != CODEC_ID_SVQ3)
2257 s->current_picture_ptr->reference= 0;
357282c6
MN
2258
2259 s->current_picture_ptr->field_poc[0]=
2260 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 2261 assert(s->current_picture_ptr->long_ref==0);
357282c6 2262
af8aa846 2263 return 0;
0da71265
MN
2264}
2265
93cc10fa 2266static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e
MN
2267 MpegEncContext * const s = &h->s;
2268 int i;
5f7f9719
MN
2269 int step = 1;
2270 int offset = 1;
2271 int uvoffset= 1;
2272 int top_idx = 1;
2273 int skiplast= 0;
115329f1 2274
53c05b1e
MN
2275 src_y -= linesize;
2276 src_cb -= uvlinesize;
2277 src_cr -= uvlinesize;
2278
5f7f9719
MN
2279 if(!simple && FRAME_MBAFF){
2280 if(s->mb_y&1){
2281 offset = MB_MBAFF ? 1 : 17;
2282 uvoffset= MB_MBAFF ? 1 : 9;
2283 if(!MB_MBAFF){
2284 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2286 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2287 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2288 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2289 }
2290 }
2291 }else{
2292 if(!MB_MBAFF){
2293 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2294 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2295 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2296 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2297 }
2298 skiplast= 1;
2299 }
2300 offset =
2301 uvoffset=
2302 top_idx = MB_MBAFF ? 0 : 1;
2303 }
2304 step= MB_MBAFF ? 2 : 1;
2305 }
2306
3b66c4c5 2307 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 2308 // and the line above the bottom macroblock
5f7f9719
MN
2309 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2310 for(i=1; i<17 - skiplast; i++){
2311 h->left_border[offset+i*step]= src_y[15+i* linesize];
53c05b1e 2312 }
115329f1 2313
5f7f9719
MN
2314 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2315 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
53c05b1e 2316
87352549 2317 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2318 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2319 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2320 for(i=1; i<9 - skiplast; i++){
2321 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2322 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
53c05b1e 2323 }
5f7f9719
MN
2324 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2325 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
53c05b1e
MN
2326 }
2327}
2328
93cc10fa 2329static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
2330 MpegEncContext * const s = &h->s;
2331 int temp8, i;
2332 uint64_t temp64;
b69378e2
2333 int deblock_left;
2334 int deblock_top;
2335 int mb_xy;
5f7f9719
MN
2336 int step = 1;
2337 int offset = 1;
2338 int uvoffset= 1;
2339 int top_idx = 1;
2340
2341 if(!simple && FRAME_MBAFF){
2342 if(s->mb_y&1){
2343 offset = MB_MBAFF ? 1 : 17;
2344 uvoffset= MB_MBAFF ? 1 : 9;
2345 }else{
2346 offset =
2347 uvoffset=
2348 top_idx = MB_MBAFF ? 0 : 1;
2349 }
2350 step= MB_MBAFF ? 2 : 1;
2351 }
b69378e2
2352
2353 if(h->deblocking_filter == 2) {
64514ee8 2354 mb_xy = h->mb_xy;
b69378e2
2355 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2356 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2357 } else {
2358 deblock_left = (s->mb_x > 0);
6c805007 2359 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 2360 }
53c05b1e
MN
2361
2362 src_y -= linesize + 1;
2363 src_cb -= uvlinesize + 1;
2364 src_cr -= uvlinesize + 1;
2365
2366#define XCHG(a,b,t,xchg)\
2367t= a;\
2368if(xchg)\
2369 a= b;\
2370b= t;
d89dc06a
LM
2371
2372 if(deblock_left){
5f7f9719
MN
2373 for(i = !deblock_top; i<16; i++){
2374 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
d89dc06a 2375 }
5f7f9719 2376 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
d89dc06a
LM
2377 }
2378
2379 if(deblock_top){
5f7f9719
MN
2380 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2381 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
cad4368a 2382 if(s->mb_x+1 < s->mb_width){
5f7f9719 2383 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
43efd19a 2384 }
53c05b1e 2385 }
53c05b1e 2386
87352549 2387 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 2388 if(deblock_left){
5f7f9719
MN
2389 for(i = !deblock_top; i<8; i++){
2390 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2391 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
d89dc06a 2392 }
5f7f9719
MN
2393 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2394 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
d89dc06a
LM
2395 }
2396 if(deblock_top){
5f7f9719
MN
2397 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2398 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
53c05b1e 2399 }
53c05b1e
MN
2400 }
2401}
2402
5a6a6cc7 2403static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
2404 MpegEncContext * const s = &h->s;
2405 const int mb_x= s->mb_x;
2406 const int mb_y= s->mb_y;
64514ee8 2407 const int mb_xy= h->mb_xy;
0da71265
MN
2408 const int mb_type= s->current_picture.mb_type[mb_xy];
2409 uint8_t *dest_y, *dest_cb, *dest_cr;
2410 int linesize, uvlinesize /*dct_offset*/;
2411 int i;
6867a90b 2412 int *block_offset = &h->block_offset[0];
41e4055b
MN
2413 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2414 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
36940eca 2415 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 2416 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 2417
6120a343
MN
2418 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2419 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2420 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 2421
a957c27b
LM
2422 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2423 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2424
bd91fee3 2425 if (!simple && MB_FIELD) {
5d18eaad
LM
2426 linesize = h->mb_linesize = s->linesize * 2;
2427 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 2428 block_offset = &h->block_offset[24];
1412060e 2429 if(mb_y&1){ //FIXME move out of this function?
0da71265 2430 dest_y -= s->linesize*15;
6867a90b
LLL
2431 dest_cb-= s->uvlinesize*7;
2432 dest_cr-= s->uvlinesize*7;
0da71265 2433 }
5d18eaad
LM
2434 if(FRAME_MBAFF) {
2435 int list;
3425501d 2436 for(list=0; list<h->list_count; list++){
5d18eaad
LM
2437 if(!USES_LIST(mb_type, list))
2438 continue;
2439 if(IS_16X16(mb_type)){
2440 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 2441 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
2442 }else{
2443 for(i=0; i<16; i+=4){
5d18eaad
LM
2444 int ref = h->ref_cache[list][scan8[i]];
2445 if(ref >= 0)
1710856c 2446 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
2447 }
2448 }
2449 }
2450 }
0da71265 2451 } else {
5d18eaad
LM
2452 linesize = h->mb_linesize = s->linesize;
2453 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
2454// dct_offset = s->linesize * 16;
2455 }
115329f1 2456
bd91fee3 2457 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
2458 for (i=0; i<16; i++) {
2459 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 2460 }
c1708e8d
MN
2461 for (i=0; i<8; i++) {
2462 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2463 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 2464 }
e7e09b49
LLL
2465 } else {
2466 if(IS_INTRA(mb_type)){
5f7f9719 2467 if(h->deblocking_filter)
93cc10fa 2468 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 2469
87352549 2470 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
2471 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2472 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 2473 }
0da71265 2474
e7e09b49 2475 if(IS_INTRA4x4(mb_type)){
bd91fee3 2476 if(simple || !s->encoding){
43efd19a 2477 if(IS_8x8DCT(mb_type)){
1eb96035
MN
2478 if(transform_bypass){
2479 idct_dc_add =
2480 idct_add = s->dsp.add_pixels8;
dae006d7 2481 }else{
1eb96035
MN
2482 idct_dc_add = s->dsp.h264_idct8_dc_add;
2483 idct_add = s->dsp.h264_idct8_add;
2484 }
43efd19a
LM
2485 for(i=0; i<16; i+=4){
2486 uint8_t * const ptr= dest_y + block_offset[i];
2487 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
2488 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2489 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2490 }else{
ac0623b2
MN
2491 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2492 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2493 (h->topright_samples_available<<i)&0x4000, linesize);
2494 if(nnz){
2495 if(nnz == 1 && h->mb[i*16])
2496 idct_dc_add(ptr, h->mb + i*16, linesize);
2497 else
2498 idct_add (ptr, h->mb + i*16, linesize);
2499 }
41e4055b 2500 }
43efd19a 2501 }
1eb96035
MN
2502 }else{
2503 if(transform_bypass){
2504 idct_dc_add =
2505 idct_add = s->dsp.add_pixels4;
2506 }else{
2507 idct_dc_add = s->dsp.h264_idct_dc_add;
2508 idct_add = s->dsp.h264_idct_add;
2509 }
aebb5d6d
MN
2510 for(i=0; i<16; i++){
2511 uint8_t * const ptr= dest_y + block_offset[i];
2512 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 2513
aebb5d6d
MN
2514 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2515 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2516 }else{
2517 uint8_t *topright;
2518 int nnz, tr;
2519 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2520 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2521 assert(mb_y || linesize <= block_offset[i]);
2522 if(!topright_avail){
2523 tr= ptr[3 - linesize]*0x01010101;
2524 topright= (uint8_t*) &tr;
2525 }else
2526 topright= ptr + 4 - linesize;
ac0623b2 2527 }else
aebb5d6d
MN
2528 topright= NULL;
2529
2530 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2531 nnz = h->non_zero_count_cache[ scan8[i] ];
2532 if(nnz){
2533 if(is_h264){
2534 if(nnz == 1 && h->mb[i*16])
2535 idct_dc_add(ptr, h->mb + i*16, linesize);
2536 else
2537 idct_add (ptr, h->mb + i*16, linesize);
2538 }else
2539 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2540 }
ac0623b2 2541 }
41e4055b 2542 }
8b82a956 2543 }
0da71265 2544 }
e7e09b49 2545 }else{
c92a30bb 2546 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 2547 if(is_h264){
36940eca 2548 if(!transform_bypass)
93f0c0a4 2549 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 2550 }else
e7e09b49 2551 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 2552 }
5f7f9719 2553 if(h->deblocking_filter)
93cc10fa 2554 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 2555 }else if(is_h264){
e7e09b49 2556 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
2557 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2558 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 2559 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 2560 }
e7e09b49
LLL
2561
2562
2563 if(!IS_INTRA4x4(mb_type)){
bd91fee3 2564 if(is_h264){
ef9d1d15 2565 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
2566 if(transform_bypass){
2567 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
2568 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2569 }else{
2570 for(i=0; i<16; i++){
2571 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 2572 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2573 }
2fd1f0e0
MN
2574 }
2575 }else{
2576 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 2577 }
49c084a7 2578 }else if(h->cbp&15){
2fd1f0e0 2579 if(transform_bypass){
0a8ca22f 2580 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 2581 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 2582 for(i=0; i<16; i+=di){
62bc966f 2583 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 2584 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2585 }
ef9d1d15 2586 }
2fd1f0e0
MN
2587 }else{
2588 if(IS_8x8DCT(mb_type)){
2589 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2590 }else{
2591 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2592 }
2593 }
4704097a 2594 }
e7e09b49
LLL
2595 }else{
2596 for(i=0; i<16; i++){
2597 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 2598 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2599 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2600 }
4704097a 2601 }
0da71265
MN
2602 }
2603 }
0da71265 2604
621561cd 2605 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
2606 uint8_t *dest[2] = {dest_cb, dest_cr};
2607 if(transform_bypass){
96465b90
MN
2608 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2609 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2610 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2611 }else{
c25ac15a 2612 idct_add = s->dsp.add_pixels4;
96465b90
MN
2613 for(i=16; i<16+8; i++){
2614 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2615 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2616 }
2617 }
ef9d1d15 2618 }else{
4691a77d
2619 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2620 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 2621 if(is_h264){
c25ac15a
MN
2622 idct_add = s->dsp.h264_idct_add;
2623 idct_dc_add = s->dsp.h264_idct_dc_add;
ac0623b2
MN
2624 for(i=16; i<16+8; i++){
2625 if(h->non_zero_count_cache[ scan8[i] ])
2626 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2627 else if(h->mb[i*16])
2628 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2629 }
aebb5d6d
MN
2630 }else{
2631 for(i=16; i<16+8; i++){
2632 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2633 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2634 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2635 }
e7e09b49 2636 }
4704097a 2637 }
0da71265
MN
2638 }
2639 }
2640 }
c212fb0c
MN
2641 if(h->cbp || IS_INTRA(mb_type))
2642 s->dsp.clear_blocks(h->mb);
2643
53c05b1e 2644 if(h->deblocking_filter) {
5f7f9719
MN
2645 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2646 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2647 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2648 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
bd91fee3 2649 if (!simple && FRAME_MBAFF) {
5f7f9719 2650 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2651 } else {
3e20143e 2652 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2653 }
53c05b1e 2654 }
0da71265
MN
2655}
2656
0da71265 2657/**
bd91fee3
AS
2658 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2659 */
2660static void hl_decode_mb_simple(H264Context *h){
2661 hl_decode_mb_internal(h, 1);
2662}
2663
2664/**
2665 * Process a macroblock; this handles edge cases, such as interlacing.
2666 */
2667static void av_noinline hl_decode_mb_complex(H264Context *h){
2668 hl_decode_mb_internal(h, 0);
2669}
2670
2671static void hl_decode_mb(H264Context *h){
2672 MpegEncContext * const s = &h->s;
64514ee8 2673 const int mb_xy= h->mb_xy;
bd91fee3 2674 const int mb_type= s->current_picture.mb_type[mb_xy];
1dd488e9 2675 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 2676
fedec603 2677 if(ENABLE_H264_ENCODER && !s->decode)
bd91fee3
AS
2678 return;
2679
2680 if (is_complex)
2681 hl_decode_mb_complex(h);
2682 else hl_decode_mb_simple(h);
2683}
2684
2143b118 2685static void pic_as_field(Picture *pic, const int parity){
11cc1d8c
JD
2686 int i;
2687 for (i = 0; i < 4; ++i) {
2143b118 2688 if (parity == PICT_BOTTOM_FIELD)
11cc1d8c 2689 pic->data[i] += pic->linesize[i];
2143b118 2690 pic->reference = parity;
11cc1d8c
JD
2691 pic->linesize[i] *= 2;
2692 }
2879c75f 2693 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
11cc1d8c
JD
2694}
2695
2696static int split_field_copy(Picture *dest, Picture *src,
2697 int parity, int id_add){
2698 int match = !!(src->reference & parity);
2699
2700 if (match) {
2701 *dest = *src;
d4f7d838 2702 if(parity != PICT_FRAME){
b3e93fd4
MN
2703 pic_as_field(dest, parity);
2704 dest->pic_id *= 2;
2705 dest->pic_id += id_add;
d4f7d838 2706 }
11cc1d8c
JD
2707 }
2708
2709 return match;
2710}
2711
d4f7d838
MN
2712static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2713 int i[2]={0};
2714 int index=0;
11cc1d8c 2715
d4f7d838
MN
2716 while(i[0]<len || i[1]<len){
2717 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2718 i[0]++;
2719 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2720 i[1]++;
2721 if(i[0] < len){
2722 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2723 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2724 }
2725 if(i[1] < len){
2726 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2727 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
11cc1d8c
JD
2728 }
2729 }
2730
d4f7d838 2731 return index;
11cc1d8c
JD
2732}
2733
d4f7d838
MN
2734static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2735 int i, best_poc;
2736 int out_i= 0;
11cc1d8c 2737
d4f7d838
MN
2738 for(;;){
2739 best_poc= dir ? INT_MIN : INT_MAX;
11cc1d8c 2740
d4f7d838
MN
2741 for(i=0; i<len; i++){
2742 const int poc= src[i]->poc;
2743 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2744 best_poc= poc;
2745 sorted[out_i]= src[i];
2746 }
2747 }
2748 if(best_poc == (dir ? INT_MIN : INT_MAX))
2749 break;
2750 limit= sorted[out_i++]->poc - dir;
2751 }
2752 return out_i;
11cc1d8c
JD
2753}
2754
bd91fee3 2755/**
0da71265
MN
2756 * fills the default_ref_list.
2757 */
2758static int fill_default_ref_list(H264Context *h){
2759 MpegEncContext * const s = &h->s;
d4f7d838 2760 int i, len;
115329f1 2761
9f5c1037 2762 if(h->slice_type_nos==FF_B_TYPE){
d4f7d838
MN
2763 Picture *sorted[32];
2764 int cur_poc, list;
2765 int lens[2];
11cc1d8c 2766
d4f7d838
MN
2767 if(FIELD_PICTURE)
2768 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2769 else
2770 cur_poc= s->current_picture_ptr->poc;
086acdd5 2771
d4f7d838
MN
2772 for(list= 0; list<2; list++){
2773 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2774 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2775 assert(len<=32);
2776 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2777 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2778 assert(len<=32);
086acdd5 2779
d4f7d838
MN
2780 if(len < h->ref_count[list])
2781 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2782 lens[list]= len;
086acdd5
JD
2783 }
2784
d4f7d838
MN
2785 if(lens[0] == lens[1] && lens[1] > 1){
2786 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2787 if(i == lens[0])
2788 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
086acdd5 2789 }
086acdd5 2790 }else{
d4f7d838
MN
2791 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2792 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2793 assert(len <= 32);
2794 if(len < h->ref_count[0])
2795 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
0da71265 2796 }
827c91bf
LLL
2797#ifdef TRACE
2798 for (i=0; i<h->ref_count[0]; i++) {
a9c9a240 2799 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
827c91bf 2800 }
9f5c1037 2801 if(h->slice_type_nos==FF_B_TYPE){
827c91bf 2802 for (i=0; i<h->ref_count[1]; i++) {
ffbc5e04 2803 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
827c91bf
LLL
2804 }
2805 }
2806#endif
0da71265
MN
2807 return 0;
2808}
2809
827c91bf
LLL
2810static void print_short_term(H264Context *h);
2811static void print_long_term(H264Context *h);
2812
949da388
JD
2813/**
2814 * Extract structure information about the picture described by pic_num in
2815 * the current decoding context (frame or field). Note that pic_num is
2816 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2817 * @param pic_num picture number for which to extract structure information
2818 * @param structure one of PICT_XXX describing structure of picture
2819 * with pic_num
2820 * @return frame number (short term) or long term index of picture
2821 * described by pic_num
2822 */
2823static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2824 MpegEncContext * const s = &h->s;
2825
2826 *structure = s->picture_structure;
2827 if(FIELD_PICTURE){
2828 if (!(pic_num & 1))
2829 /* opposite field */
2830 *structure ^= PICT_FRAME;
2831 pic_num >>= 1;
2832 }
2833
2834 return pic_num;
2835}
2836
0da71265
MN
2837static int decode_ref_pic_list_reordering(H264Context *h){
2838 MpegEncContext * const s = &h->s;
949da388 2839 int list, index, pic_structure;
115329f1 2840
827c91bf
LLL
2841 print_short_term(h);
2842 print_long_term(h);
115329f1 2843
3425501d 2844 for(list=0; list<h->list_count; list++){
0da71265
MN
2845 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2846
2847 if(get_bits1(&s->gb)){
2848 int pred= h->curr_pic_num;
0da71265
MN
2849
2850 for(index=0; ; index++){
9963b332 2851 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
88e7a4d1 2852 unsigned int pic_id;
0da71265 2853 int i;
2f944356 2854 Picture *ref = NULL;
115329f1
DB
2855
2856 if(reordering_of_pic_nums_idc==3)
0bc42cad 2857 break;
115329f1 2858
0da71265 2859 if(index >= h->ref_count[list]){
9b879566 2860 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
0da71265
MN
2861 return -1;
2862 }
115329f1 2863
0da71265
MN
2864 if(reordering_of_pic_nums_idc<3){
2865 if(reordering_of_pic_nums_idc<2){
88e7a4d1 2866 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
949da388 2867 int frame_num;
0da71265 2868
03d3cab8 2869 if(abs_diff_pic_num > h->max_pic_num){
9b879566 2870 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
0da71265
MN
2871 return -1;
2872 }
2873
2874 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2875 else pred+= abs_diff_pic_num;
2876 pred &= h->max_pic_num - 1;
115329f1 2877
949da388
JD
2878 frame_num = pic_num_extract(h, pred, &pic_structure);
2879
0d175622
MN
2880 for(i= h->short_ref_count-1; i>=0; i--){
2881 ref = h->short_ref[i];
949da388 2882 assert(ref->reference);
0d175622 2883 assert(!ref->long_ref);
6edac8e1 2884 if(
af8c5e08
MN
2885 ref->frame_num == frame_num &&
2886 (ref->reference & pic_structure)
6edac8e1 2887 )
0da71265
MN
2888 break;
2889 }
0d175622 2890 if(i>=0)
949da388 2891 ref->pic_id= pred;
0da71265 2892 }else{
949da388 2893 int long_idx;
0da71265 2894 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
949da388
JD
2895
2896 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2897
2898 if(long_idx>31){
88e7a4d1
MN
2899 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2900 return -1;
2901 }
949da388
JD
2902 ref = h->long_ref[long_idx];
2903 assert(!(ref && !ref->reference));
af8c5e08 2904 if(ref && (ref->reference & pic_structure)){
ac658be5 2905 ref->pic_id= pic_id;
ac658be5
FOL
2906 assert(ref->long_ref);
2907 i=0;
2908 }else{
2909 i=-1;
2910 }
0da71265
MN
2911 }
2912
0d315f28 2913 if (i < 0) {
9b879566 2914 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
0da71265 2915 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
0d175622
MN
2916 } else {
2917 for(i=index; i+1<h->ref_count[list]; i++){
2918 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2919 break;
21be92bf
MN
2920 }
2921 for(; i > index; i--){
2922 h->ref_list[list][i]= h->ref_list[list][i-1];
2923 }
0d175622 2924 h->ref_list[list][index]= *ref;
949da388 2925 if (FIELD_PICTURE){
2143b118 2926 pic_as_field(&h->ref_list[list][index], pic_structure);
949da388 2927 }
0da71265 2928 }
0bc42cad 2929 }else{
9b879566 2930 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
0da71265
MN
2931 return -1;
2932 }
2933 }
2934 }
0da71265 2935 }
3425501d 2936 for(list=0; list<h->list_count; list++){
6ab87211 2937 for(index= 0; index < h->ref_count[list]; index++){
79b5c776
MN
2938 if(!h->ref_list[list][index].data[0]){
2939 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2940 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2941 }
6ab87211 2942 }
6ab87211 2943 }
115329f1 2944
115329f1 2945 return 0;
0da71265
MN
2946}
2947
91c58c94 2948static void fill_mbaff_ref_list(H264Context *h){
5d18eaad 2949 int list, i, j;
3425501d 2950 for(list=0; list<2; list++){ //FIXME try list_count
5d18eaad
LM
2951 for(i=0; i<h->ref_count[list]; i++){
2952 Picture *frame = &h->ref_list[list][i];
2953 Picture *field = &h->ref_list[list][16+2*i];
2954 field[0] = *frame;
2955 for(j=0; j<3; j++)
2956 field[0].linesize[j] <<= 1;
2143b118 2957 field[0].reference = PICT_TOP_FIELD;
078f42dd 2958 field[0].poc= field[0].field_poc[0];
5d18eaad
LM
2959 field[1] = field[0];
2960 for(j=0; j<3; j++)
2961 field[1].data[j] += frame->linesize[j];
2143b118 2962 field[1].reference = PICT_BOTTOM_FIELD;
078f42dd 2963 field[1].poc= field[1].field_poc[1];
5d18eaad
LM
2964
2965 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2966 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2967 for(j=0; j<2; j++){
2968 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2969 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2970 }
2971 }
2972 }
2973 for(j=0; j<h->ref_count[1]; j++){
2974 for(i=0; i<h->ref_count[0]; i++)
2975 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2976 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2977 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2978 }
2979}
2980
0da71265
MN
2981static int pred_weight_table(H264Context *h){
2982 MpegEncContext * const s = &h->s;
2983 int list, i;
9f2d1b4f 2984 int luma_def, chroma_def;
115329f1 2985
9f2d1b4f
LM
2986 h->use_weight= 0;
2987 h->use_weight_chroma= 0;
0da71265
MN
2988 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2989 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
2990 luma_def = 1<<h->luma_log2_weight_denom;
2991 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
2992
2993 for(list=0; list<2; list++){
2994 for(i=0; i<h->ref_count[list]; i++){
2995 int luma_weight_flag, chroma_weight_flag;
115329f1 2996
0da71265
MN
2997 luma_weight_flag= get_bits1(&s->gb);
2998 if(luma_weight_flag){
2999 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3000 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f
LM
3001 if( h->luma_weight[list][i] != luma_def
3002 || h->luma_offset[list][i] != 0)
3003 h->use_weight= 1;
3004 }else{
3005 h->luma_weight[list][i]= luma_def;
3006 h->luma_offset[list][i]= 0;
0da71265
MN
3007 }
3008
0af6967e 3009 if(CHROMA){
fef744d4
MN
3010 chroma_weight_flag= get_bits1(&s->gb);
3011 if(chroma_weight_flag){
3012 int j;
3013 for(j=0; j<2; j++){
3014 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3015 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3016 if( h->chroma_weight[list][i][j] != chroma_def
3017 || h->chroma_offset[list][i][j] != 0)
3018 h->use_weight_chroma= 1;
3019 }
3020 }else{
3021 int j;
3022 for(j=0; j<2; j++){
3023 h->chroma_weight[list][i][j]= chroma_def;
3024 h->chroma_offset[list][i][j]= 0;
3025 }
0da71265
MN
3026 }
3027 }
3028 }
9f5c1037 3029 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 3030 }
9f2d1b4f 3031 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
3032 return 0;
3033}
3034
9f2d1b4f
LM
3035static void implicit_weight_table(H264Context *h){
3036 MpegEncContext * const s = &h->s;
9f2d1b4f
LM
3037 int ref0, ref1;
3038 int cur_poc = s->current_picture_ptr->poc;
3039
3040 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3041 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3042 h->use_weight= 0;
3043 h->use_weight_chroma= 0;
3044 return;
3045 }
3046
3047 h->use_weight= 2;
3048 h->use_weight_chroma= 2;
3049 h->luma_log2_weight_denom= 5;
3050 h->chroma_log2_weight_denom= 5;
3051
9f2d1b4f
LM
3052 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3053 int poc0 = h->ref_list[0][ref0].poc;
3054 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 3055 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 3056 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 3057 if(td){
f66e4f5f 3058 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 3059 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 3060 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
3061 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3062 h->implicit_weight[ref0][ref1] = 32;
3063 else
3064 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3065 }else
3066 h->implicit_weight[ref0][ref1] = 32;
3067 }
3068 }
3069}
3070
8fd57a66
JD
3071/**
3072 * Mark a picture as no longer needed for reference. The refmask
3073 * argument allows unreferencing of individual fields or the whole frame.
3074 * If the picture becomes entirely unreferenced, but is being held for
3075 * display purposes, it is marked as such.
3076 * @param refmask mask of fields to unreference; the mask is bitwise
3077 * anded with the reference marking of pic
3078 * @return non-zero if pic becomes entirely unreferenced (except possibly
3079 * for display purposes) zero if one of the fields remains in
3080 * reference
3081 */
3082static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
4e4d983e 3083 int i;
8fd57a66
JD
3084 if (pic->reference &= refmask) {
3085 return 0;
3086 } else {
79f4494a
MN
3087 for(i = 0; h->delayed_pic[i]; i++)
3088 if(pic == h->delayed_pic[i]){
3089 pic->reference=DELAYED_PIC_REF;
3090 break;
3091 }
8fd57a66
JD
3092 return 1;
3093 }
4e4d983e
LM
3094}
3095
0da71265 3096/**
5175b937 3097 * instantaneous decoder refresh.
0da71265
MN
3098 */
3099static void idr(H264Context *h){
4e4d983e 3100 int i;
0da71265 3101
dc032f33 3102 for(i=0; i<16; i++){
9c0e4624 3103 remove_long(h, i, 0);
0da71265 3104 }
849b9cef 3105 assert(h->long_ref_count==0);
0da71265
MN
3106
3107 for(i=0; i<h->short_ref_count; i++){
8fd57a66 3108 unreference_pic(h, h->short_ref[i], 0);
0da71265
MN
3109 h->short_ref[i]= NULL;
3110 }
3111 h->short_ref_count=0;
a149c1a5 3112 h->prev_frame_num= 0;
80f8e035
MN
3113 h->prev_frame_num_offset= 0;
3114 h->prev_poc_msb=
3115 h->prev_poc_lsb= 0;
0da71265
MN
3116}
3117
7c33ad19
LM
3118/* forget old pics after a seek */
3119static void flush_dpb(AVCodecContext *avctx){
3120 H264Context *h= avctx->priv_data;
3121 int i;
64b9d48f 3122 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
3123 if(h->delayed_pic[i])
3124 h->delayed_pic[i]->reference= 0;
7c33ad19 3125 h->delayed_pic[i]= NULL;
285b570f 3126 }
df8a7dff 3127 h->outputed_poc= INT_MIN;
7c33ad19 3128 idr(h);
ca159196
MR
3129 if(h->s.current_picture_ptr)
3130 h->s.current_picture_ptr->reference= 0;
12d96de3 3131 h->s.first_field= 0;
e240f898 3132 ff_mpeg_flush(avctx);
7c33ad19
LM
3133}
3134
0da71265 3135/**
47e112f8
JD
3136 * Find a Picture in the short term reference list by frame number.
3137 * @param frame_num frame number to search for
3138 * @param idx the index into h->short_ref where returned picture is found
3139 * undefined if no picture found.
3140 * @return pointer to the found picture, or NULL if no pic with the provided
3141 * frame number is found
0da71265 3142 */
47e112f8 3143static Picture * find_short(H264Context *h, int frame_num, int *idx){
1924f3ce 3144 MpegEncContext * const s = &h->s;
0da71265 3145 int i;
115329f1 3146
0da71265
MN
3147 for(i=0; i<h->short_ref_count; i++){
3148 Picture *pic= h->short_ref[i];
1924f3ce 3149 if(s->avctx->debug&FF_DEBUG_MMCO)
9b879566 3150 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
47e112f8
JD
3151 if(pic->frame_num == frame_num) {
3152 *idx = i;
0da71265
MN
3153 return pic;
3154 }
3155 }
3156 return NULL;
3157}
3158
3159/**
47e112f8
JD
3160 * Remove a picture from the short term reference list by its index in
3161 * that list. This does no checking on the provided index; it is assumed
3162 * to be valid. Other list entries are shifted down.
3163 * @param i index into h->short_ref of picture to remove.
3164 */
3165static void remove_short_at_index(H264Context *h, int i){
e1f15d38 3166 assert(i >= 0 && i < h->short_ref_count);
47e112f8
JD
3167 h->short_ref[i]= NULL;
3168 if (--h->short_ref_count)
3169 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3170}
3171
3172/**
3173 *
3174 * @return the removed picture or NULL if an error occurs
3175 */
d9e32422 3176static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
47e112f8
JD
3177 MpegEncContext * const s = &h->s;
3178 Picture *pic;
3179 int i;