Use ref_poc and ref_count from the correct field.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265
MN
22/**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
0da71265
MN
28#include "dsputil.h"
29#include "avcodec.h"
30#include "mpegvideo.h"
26b4fe82 31#include "h264.h"
0da71265 32#include "h264data.h"
26b4fe82 33#include "h264_parser.h"
0da71265 34#include "golomb.h"
626464fb 35#include "rectangle.h"
0da71265 36
e5017ab8 37#include "cabac.h"
52cb7981
JD
38#ifdef ARCH_X86
39#include "i386/h264_i386.h"
40#endif
e5017ab8 41
2848ce84 42//#undef NDEBUG
0da71265
MN
43#include <assert.h>
44
2ddcf84b
JD
45/**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
48 */
49#define DELAYED_PIC_REF 4
50
0da71265
MN
51static VLC coeff_token_vlc[4];
52static VLC chroma_dc_coeff_token_vlc;
53
54static VLC total_zeros_vlc[15];
55static VLC chroma_dc_total_zeros_vlc[3];
56
57static VLC run_vlc[6];
58static VLC run7_vlc;
59
8b82a956
MN
60static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
6ba71fc4 62static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
3e20143e 63static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
9c0e4624 64static Picture * remove_long(H264Context *h, int i, int ref_mask);
8b82a956 65
849f1035 66static av_always_inline uint32_t pack16to32(int a, int b){
377ec888
MN
67#ifdef WORDS_BIGENDIAN
68 return (b&0xFFFF) + (a<<16);
69#else
70 return (a&0xFFFF) + (b<<16);
71#endif
72}
73
acd8d10f
PI
74const uint8_t ff_rem6[52]={
750, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
76};
77
78const uint8_t ff_div6[52]={
790, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
80};
81
143d7f14
PK
82static const int left_block_options[4][8]={
83 {0,1,2,3,7,10,8,11},
84 {2,2,3,3,8,11,8,11},
85 {0,0,1,1,7,10,7,10},
86 {0,2,0,2,7,10,7,10}
87};
acd8d10f 88
70abb407 89static void fill_caches(H264Context *h, int mb_type, int for_deblock){
0da71265 90 MpegEncContext * const s = &h->s;
64514ee8 91 const int mb_xy= h->mb_xy;
0da71265
MN
92 int topleft_xy, top_xy, topright_xy, left_xy[2];
93 int topleft_type, top_type, topright_type, left_type[2];
143d7f14 94 int * left_block;
02f7695b 95 int topleft_partition= -1;
0da71265
MN
96 int i;
97
36e097bc
JD
98 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
99
717b1733 100 //FIXME deblocking could skip the intra and nnz parts.
36e097bc 101 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
e2e5894a
LM
102 return;
103
2cab6401
DB
104 /* Wow, what a mess, why didn't they simplify the interlacing & intra
105 * stuff, I can't imagine that these complex rules are worth it. */
115329f1 106
6867a90b
LLL
107 topleft_xy = top_xy - 1;
108 topright_xy= top_xy + 1;
109 left_xy[1] = left_xy[0] = mb_xy-1;
143d7f14 110 left_block = left_block_options[0];
5d18eaad 111 if(FRAME_MBAFF){
6867a90b
LLL
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
a9c9a240 122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
6867a90b
LLL
123 if (bottom
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
126 ) {
127 top_xy -= s->mb_stride;
128 }
129 if (bottom
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
132 ) {
133 topleft_xy -= s->mb_stride;
02f7695b
LM
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
1412060e 136 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
02f7695b 137 topleft_partition = 0;
6867a90b
LLL
138 }
139 if (bottom
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
142 ) {
143 topright_xy -= s->mb_stride;
144 }
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
148 if (bottom) {
143d7f14 149 left_block = left_block_options[1];
6867a90b 150 } else {
143d7f14 151 left_block= left_block_options[2];
6867a90b
LLL
152 }
153 } else {
154 left_xy[1] += s->mb_stride;
143d7f14 155 left_block = left_block_options[3];
6867a90b
LLL
156 }
157 }
0da71265
MN
158 }
159
826de46e
LLL
160 h->top_mb_xy = top_xy;
161 h->left_mb_xy[0] = left_xy[0];
162 h->left_mb_xy[1] = left_xy[1];
6ba71fc4 163 if(for_deblock){
717b1733
LM
164 topleft_type = 0;
165 topright_type = 0;
46f2f05f 166 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
46f2f05f
MN
167 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
168 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
5d18eaad
LM
169
170 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
171 int list;
172 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
173 for(i=0; i<16; i++)
174 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
3425501d 175 for(list=0; list<h->list_count; list++){
5d18eaad
LM
176 if(USES_LIST(mb_type,list)){
177 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
178 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
191e8ca7 179 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
5d18eaad
LM
180 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
181 dst[0] = src[0];
182 dst[1] = src[1];
183 dst[2] = src[2];
184 dst[3] = src[3];
185 }
186 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
187 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
188 ref += h->b8_stride;
189 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
190 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
191 }else{
192 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
193 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
194 }
195 }
196 }
46f2f05f
MN
197 }else{
198 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
199 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
200 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
201 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
202 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
203 }
0da71265
MN
204
205 if(IS_INTRA(mb_type)){
115329f1
DB
206 h->topleft_samples_available=
207 h->top_samples_available=
0da71265
MN
208 h->left_samples_available= 0xFFFF;
209 h->topright_samples_available= 0xEEEA;
210
211 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
212 h->topleft_samples_available= 0xB3FF;
213 h->top_samples_available= 0x33FF;
214 h->topright_samples_available= 0x26EA;
215 }
216 for(i=0; i<2; i++){
217 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
218 h->topleft_samples_available&= 0xDF5F;
219 h->left_samples_available&= 0x5F5F;
220 }
221 }
115329f1 222
0da71265
MN
223 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
224 h->topleft_samples_available&= 0x7FFF;
115329f1 225
0da71265
MN
226 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
227 h->topright_samples_available&= 0xFBFF;
115329f1 228
0da71265
MN
229 if(IS_INTRA4x4(mb_type)){
230 if(IS_INTRA4x4(top_type)){
231 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
232 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
233 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
234 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
235 }else{
236 int pred;
6fbcaaa0 237 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
0da71265 238 pred= -1;
6fbcaaa0
LLL
239 else{
240 pred= 2;
0da71265
MN
241 }
242 h->intra4x4_pred_mode_cache[4+8*0]=
243 h->intra4x4_pred_mode_cache[5+8*0]=
244 h->intra4x4_pred_mode_cache[6+8*0]=
245 h->intra4x4_pred_mode_cache[7+8*0]= pred;
246 }
247 for(i=0; i<2; i++){
248 if(IS_INTRA4x4(left_type[i])){
249 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
250 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
251 }else{
252 int pred;
6fbcaaa0 253 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
0da71265 254 pred= -1;
6fbcaaa0
LLL
255 else{
256 pred= 2;
0da71265
MN
257 }
258 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
259 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
260 }
261 }
262 }
263 }
115329f1
DB
264
265
0da71265 266/*
115329f1
DB
2670 . T T. T T T T
2681 L . .L . . . .
2692 L . .L . . . .
2703 . T TL . . . .
2714 L . .L . . . .
2725 L . .. . . . .
0da71265 273*/
1412060e 274//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
0da71265 275 if(top_type){
6867a90b
LLL
276 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
277 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
278 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
53c05b1e 279 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
115329f1 280
6867a90b 281 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
53c05b1e 282 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
115329f1 283
6867a90b 284 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
53c05b1e 285 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
115329f1 286
0da71265 287 }else{
115329f1 288 h->non_zero_count_cache[4+8*0]=
0da71265
MN
289 h->non_zero_count_cache[5+8*0]=
290 h->non_zero_count_cache[6+8*0]=
291 h->non_zero_count_cache[7+8*0]=
115329f1 292
0da71265
MN
293 h->non_zero_count_cache[1+8*0]=
294 h->non_zero_count_cache[2+8*0]=
115329f1 295
0da71265 296 h->non_zero_count_cache[1+8*3]=
3981c385 297 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
115329f1 298
0da71265 299 }
826de46e 300
6867a90b
LLL
301 for (i=0; i<2; i++) {
302 if(left_type[i]){
303 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
304 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
305 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
306 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
6867a90b 307 }else{
115329f1
DB
308 h->non_zero_count_cache[3+8*1 + 2*8*i]=
309 h->non_zero_count_cache[3+8*2 + 2*8*i]=
310 h->non_zero_count_cache[0+8*1 + 8*i]=
6867a90b 311 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
826de46e
LLL
312 }
313 }
314
315 if( h->pps.cabac ) {
316 // top_cbp
317 if(top_type) {
318 h->top_cbp = h->cbp_table[top_xy];
319 } else if(IS_INTRA(mb_type)) {
320 h->top_cbp = 0x1C0;
321 } else {
322 h->top_cbp = 0;
323 }
324 // left_cbp
325 if (left_type[0]) {
326 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
327 } else if(IS_INTRA(mb_type)) {
328 h->left_cbp = 0x1C0;
329 } else {
330 h->left_cbp = 0;
331 }
332 if (left_type[0]) {
333 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
334 }
335 if (left_type[1]) {
336 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
6867a90b 337 }
0da71265 338 }
6867a90b 339
0da71265 340#if 1
e2e5894a 341 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
0da71265 342 int list;
3425501d 343 for(list=0; list<h->list_count; list++){
e2e5894a 344 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
0da71265
MN
345 /*if(!h->mv_cache_clean[list]){
346 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
347 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
348 h->mv_cache_clean[list]= 1;
349 }*/
5ad984c9 350 continue;
0da71265
MN
351 }
352 h->mv_cache_clean[list]= 0;
115329f1 353
53b19144 354 if(USES_LIST(top_type, list)){
0da71265
MN
355 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
356 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
357 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
358 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
359 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
360 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
361 h->ref_cache[list][scan8[0] + 0 - 1*8]=
362 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
363 h->ref_cache[list][scan8[0] + 2 - 1*8]=
364 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
365 }else{
115329f1
DB
366 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
367 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
368 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
0da71265
MN
369 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
370 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
371 }
372
4672503d
LM
373 for(i=0; i<2; i++){
374 int cache_idx = scan8[0] - 1 + i*2*8;
375 if(USES_LIST(left_type[i], list)){
376 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
377 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
378 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
379 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
380 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
381 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
382 }else{
383 *(uint32_t*)h->mv_cache [list][cache_idx ]=
384 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
385 h->ref_cache[list][cache_idx ]=
386 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
387 }
0da71265
MN
388 }
389
ae08a563 390 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
46f2f05f
MN
391 continue;
392
53b19144 393 if(USES_LIST(topleft_type, list)){
02f7695b
LM
394 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
395 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
e2e5894a
LM
396 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
397 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
398 }else{
399 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
400 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
401 }
115329f1 402
53b19144 403 if(USES_LIST(topright_type, list)){
e2e5894a
LM
404 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
405 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
406 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
407 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
408 }else{
409 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
410 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 }
e2e5894a 412
ae08a563 413 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
717b1733 414 continue;
115329f1
DB
415
416 h->ref_cache[list][scan8[5 ]+1] =
417 h->ref_cache[list][scan8[7 ]+1] =
3b66c4c5 418 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
115329f1 419 h->ref_cache[list][scan8[4 ]] =
0da71265
MN
420 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
421 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
422 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
3b66c4c5 423 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
0da71265
MN
424 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
425 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
9e528114
LA
426
427 if( h->pps.cabac ) {
428 /* XXX beurk, Load mvd */
53b19144 429 if(USES_LIST(top_type, list)){
9e528114
LA
430 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
431 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
432 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
433 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
434 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
435 }else{
115329f1
DB
436 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
437 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
438 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
9e528114
LA
439 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
440 }
53b19144 441 if(USES_LIST(left_type[0], list)){
9e528114
LA
442 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
443 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
444 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
445 }else{
446 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
447 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
448 }
53b19144 449 if(USES_LIST(left_type[1], list)){
9e528114
LA
450 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
451 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
453 }else{
454 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
456 }
457 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
458 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
3b66c4c5 459 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
9e528114
LA
460 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
461 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
5ad984c9 462
9f5c1037 463 if(h->slice_type_nos == FF_B_TYPE){
5ad984c9
LM
464 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
465
466 if(IS_DIRECT(top_type)){
467 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
468 }else if(IS_8X8(top_type)){
469 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
470 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
471 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
472 }else{
473 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
474 }
115329f1 475
5d18eaad
LM
476 if(IS_DIRECT(left_type[0]))
477 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
478 else if(IS_8X8(left_type[0]))
479 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
480 else
481 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
482
483 if(IS_DIRECT(left_type[1]))
5ad984c9 484 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
5d18eaad
LM
485 else if(IS_8X8(left_type[1]))
486 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
487 else
5ad984c9 488 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
5d18eaad
LM
489 }
490 }
491
492 if(FRAME_MBAFF){
493#define MAP_MVS\
494 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
495 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
496 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
497 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
498 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
499 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
500 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
501 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
502 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
503 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
504 if(MB_FIELD){
505#define MAP_F2F(idx, mb_type)\
506 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
507 h->ref_cache[list][idx] <<= 1;\
508 h->mv_cache[list][idx][1] /= 2;\
509 h->mvd_cache[list][idx][1] /= 2;\
510 }
511 MAP_MVS
512#undef MAP_F2F
513 }else{
514#define MAP_F2F(idx, mb_type)\
515 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
516 h->ref_cache[list][idx] >>= 1;\
517 h->mv_cache[list][idx][1] <<= 1;\
518 h->mvd_cache[list][idx][1] <<= 1;\
5ad984c9 519 }
5d18eaad
LM
520 MAP_MVS
521#undef MAP_F2F
5ad984c9 522 }
9e528114 523 }
0da71265 524 }
0da71265
MN
525 }
526#endif
43efd19a
LM
527
528 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
0da71265
MN
529}
530
531static inline void write_back_intra_pred_mode(H264Context *h){
64514ee8 532 const int mb_xy= h->mb_xy;
0da71265
MN
533
534 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
535 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
536 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
537 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
538 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
539 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
540 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
541}
542
543/**
544 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
545 */
546static inline int check_intra4x4_pred_mode(H264Context *h){
547 MpegEncContext * const s = &h->s;
548 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
549 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
550 int i;
115329f1 551
0da71265
MN
552 if(!(h->top_samples_available&0x8000)){
553 for(i=0; i<4; i++){
554 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
555 if(status<0){
9b879566 556 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
557 return -1;
558 } else if(status){
559 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
560 }
561 }
562 }
115329f1 563
0da71265
MN
564 if(!(h->left_samples_available&0x8000)){
565 for(i=0; i<4; i++){
566 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
567 if(status<0){
9b879566 568 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
569 return -1;
570 } else if(status){
571 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
572 }
573 }
574 }
575
576 return 0;
577} //FIXME cleanup like next
578
579/**
580 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
581 */
582static inline int check_intra_pred_mode(H264Context *h, int mode){
583 MpegEncContext * const s = &h->s;
584 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
585 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 586
43ff0714 587 if(mode > 6U) {
5175b937 588 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 589 return -1;
5175b937 590 }
115329f1 591
0da71265
MN
592 if(!(h->top_samples_available&0x8000)){
593 mode= top[ mode ];
594 if(mode<0){
9b879566 595 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
596 return -1;
597 }
598 }
115329f1 599
0da71265
MN
600 if(!(h->left_samples_available&0x8000)){
601 mode= left[ mode ];
602 if(mode<0){
9b879566 603 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 604 return -1;
115329f1 605 }
0da71265
MN
606 }
607
608 return mode;
609}
610
611/**
612 * gets the predicted intra4x4 prediction mode.
613 */
614static inline int pred_intra_mode(H264Context *h, int n){
615 const int index8= scan8[n];
616 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
617 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
618 const int min= FFMIN(left, top);
619
a9c9a240 620 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
0da71265
MN
621
622 if(min<0) return DC_PRED;
623 else return min;
624}
625
626static inline void write_back_non_zero_count(H264Context *h){
64514ee8 627 const int mb_xy= h->mb_xy;
0da71265 628
6867a90b
LLL
629 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
630 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
631 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
53c05b1e 632 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
6867a90b
LLL
633 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
634 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
635 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
115329f1 636
6867a90b 637 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
53c05b1e 638 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
6867a90b 639 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
53c05b1e 640
6867a90b 641 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
53c05b1e 642 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
6867a90b 643 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
5d18eaad
LM
644
645 if(FRAME_MBAFF){
646 // store all luma nnzs, for deblocking
647 int v = 0, i;
648 for(i=0; i<16; i++)
649 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
650 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
651 }
0da71265
MN
652}
653
654/**
1412060e 655 * gets the predicted number of non-zero coefficients.
0da71265
MN
656 * @param n block index
657 */
658static inline int pred_non_zero_count(H264Context *h, int n){
659 const int index8= scan8[n];
660 const int left= h->non_zero_count_cache[index8 - 1];
661 const int top = h->non_zero_count_cache[index8 - 8];
662 int i= left + top;
115329f1 663
0da71265
MN
664 if(i<64) i= (i+1)>>1;
665
a9c9a240 666 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
0da71265
MN
667
668 return i&31;
669}
670
1924f3ce
MN
671static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
672 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
a9c9a240 673 MpegEncContext *s = &h->s;
1924f3ce 674
5d18eaad
LM
675 /* there is no consistent mapping of mvs to neighboring locations that will
676 * make mbaff happy, so we can't move all this logic to fill_caches */
677 if(FRAME_MBAFF){
191e8ca7 678 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
5d18eaad
LM
679 const int16_t *mv;
680 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
681 *C = h->mv_cache[list][scan8[0]-2];
682
683 if(!MB_FIELD
684 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
685 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
686 if(IS_INTERLACED(mb_types[topright_xy])){
687#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
688 const int x4 = X4, y4 = Y4;\
689 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
02f7695b 690 if(!USES_LIST(mb_type,list))\
5d18eaad
LM
691 return LIST_NOT_USED;\
692 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
693 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
694 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
695 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
696
697 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
698 }
699 }
700 if(topright_ref == PART_NOT_AVAILABLE
701 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
702 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
703 if(!MB_FIELD
704 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
705 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
706 }
707 if(MB_FIELD
708 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
709 && i >= scan8[0]+8){
1412060e 710 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
02f7695b 711 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
5d18eaad
LM
712 }
713 }
714#undef SET_DIAG_MV
715 }
716
1924f3ce
MN
717 if(topright_ref != PART_NOT_AVAILABLE){
718 *C= h->mv_cache[list][ i - 8 + part_width ];
719 return topright_ref;
720 }else{
a9c9a240 721 tprintf(s->avctx, "topright MV not available\n");
95c26348 722
1924f3ce
MN
723 *C= h->mv_cache[list][ i - 8 - 1 ];
724 return h->ref_cache[list][ i - 8 - 1 ];
725 }
726}
727
0da71265
MN
728/**
729 * gets the predicted MV.
730 * @param n the block index
731 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
732 * @param mx the x component of the predicted motion vector
733 * @param my the y component of the predicted motion vector
734 */
735static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
0da71265
MN
736 const int index8= scan8[n];
737 const int top_ref= h->ref_cache[list][ index8 - 8 ];
0da71265
MN
738 const int left_ref= h->ref_cache[list][ index8 - 1 ];
739 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
740 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1924f3ce
MN
741 const int16_t * C;
742 int diagonal_ref, match_count;
743
0da71265 744 assert(part_width==1 || part_width==2 || part_width==4);
1924f3ce 745
0da71265 746/* mv_cache
115329f1 747 B . . A T T T T
0da71265
MN
748 U . . L . . , .
749 U . . L . . . .
750 U . . L . . , .
751 . . . L . . . .
752*/
1924f3ce
MN
753
754 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
755 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
a9c9a240 756 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1924f3ce
MN
757 if(match_count > 1){ //most common
758 *mx= mid_pred(A[0], B[0], C[0]);
759 *my= mid_pred(A[1], B[1], C[1]);
760 }else if(match_count==1){
761 if(left_ref==ref){
762 *mx= A[0];
115329f1 763 *my= A[1];
1924f3ce
MN
764 }else if(top_ref==ref){
765 *mx= B[0];
115329f1 766 *my= B[1];
0da71265 767 }else{
1924f3ce 768 *mx= C[0];
115329f1 769 *my= C[1];
0da71265
MN
770 }
771 }else{
1924f3ce 772 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
0da71265 773 *mx= A[0];
115329f1 774 *my= A[1];
0da71265 775 }else{
1924f3ce
MN
776 *mx= mid_pred(A[0], B[0], C[0]);
777 *my= mid_pred(A[1], B[1], C[1]);
0da71265 778 }
0da71265 779 }
115329f1 780
a9c9a240 781 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
782}
783
784/**
785 * gets the directionally predicted 16x8 MV.
786 * @param n the block index
787 * @param mx the x component of the predicted motion vector
788 * @param my the y component of the predicted motion vector
789 */
790static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
791 if(n==0){
792 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
793 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
794
a9c9a240 795 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
115329f1 796
0da71265
MN
797 if(top_ref == ref){
798 *mx= B[0];
799 *my= B[1];
800 return;
801 }
802 }else{
803 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
804 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
115329f1 805
a9c9a240 806 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
807
808 if(left_ref == ref){
809 *mx= A[0];
810 *my= A[1];
811 return;
812 }
813 }
814
815 //RARE
816 pred_motion(h, n, 4, list, ref, mx, my);
817}
818
819/**
820 * gets the directionally predicted 8x16 MV.
821 * @param n the block index
822 * @param mx the x component of the predicted motion vector
823 * @param my the y component of the predicted motion vector
824 */
825static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
826 if(n==0){
827 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
828 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
115329f1 829
a9c9a240 830 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
831
832 if(left_ref == ref){
833 *mx= A[0];
834 *my= A[1];
835 return;
836 }
837 }else{
1924f3ce
MN
838 const int16_t * C;
839 int diagonal_ref;
840
841 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
115329f1 842
a9c9a240 843 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265 844
115329f1 845 if(diagonal_ref == ref){
0da71265
MN
846 *mx= C[0];
847 *my= C[1];
848 return;
849 }
0da71265
MN
850 }
851
852 //RARE
853 pred_motion(h, n, 2, list, ref, mx, my);
854}
855
856static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
0da71265
MN
857 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
858 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
859
a9c9a240 860 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
0da71265
MN
861
862 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
863 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
864 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
115329f1 865
0da71265
MN
866 *mx = *my = 0;
867 return;
868 }
115329f1 869
0da71265
MN
870 pred_motion(h, 0, 4, 0, 0, mx, my);
871
872 return;
873}
874
5ad984c9 875static inline void direct_dist_scale_factor(H264Context * const h){
2879c75f
MN
876 MpegEncContext * const s = &h->s;
877 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
5ad984c9
LM
878 const int poc1 = h->ref_list[1][0].poc;
879 int i;
880 for(i=0; i<h->ref_count[0]; i++){
881 int poc0 = h->ref_list[0][i].poc;
f66e4f5f 882 int td = av_clip(poc1 - poc0, -128, 127);
4c0d57bd 883 if(td == 0 || h->ref_list[0][i].long_ref){
5ad984c9
LM
884 h->dist_scale_factor[i] = 256;
885 }else{
f66e4f5f 886 int tb = av_clip(poc - poc0, -128, 127);
c26abfa5 887 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 888 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
5ad984c9
LM
889 }
890 }
5d18eaad
LM
891 if(FRAME_MBAFF){
892 for(i=0; i<h->ref_count[0]; i++){
893 h->dist_scale_factor_field[2*i] =
894 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
895 }
896 }
5ad984c9 897}
2f944356
LM
898static inline void direct_ref_list_init(H264Context * const h){
899 MpegEncContext * const s = &h->s;
900 Picture * const ref1 = &h->ref_list[1][0];
901 Picture * const cur = s->current_picture_ptr;
902 int list, i, j;
2879c75f 903 int sidx= s->picture_structure&1;
45260d4f 904 int ref1sidx= ref1->reference&1;
9701840b 905 if(cur->pict_type == FF_I_TYPE)
2879c75f 906 cur->ref_count[sidx][0] = 0;
9701840b 907 if(cur->pict_type != FF_B_TYPE)
2879c75f 908 cur->ref_count[sidx][1] = 0;
2f944356 909 for(list=0; list<2; list++){
2879c75f 910 cur->ref_count[sidx][list] = h->ref_count[list];
2f944356 911 for(j=0; j<h->ref_count[list]; j++)
2879c75f 912 cur->ref_poc[sidx][list][j] = h->ref_list[list][j].poc;
2f944356 913 }
9701840b 914 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
2f944356
LM
915 return;
916 for(list=0; list<2; list++){
45260d4f
MN
917 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
918 const int poc = ref1->ref_poc[ref1sidx][list][i];
171c4076 919 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
2f944356
LM
920 for(j=0; j<h->ref_count[list]; j++)
921 if(h->ref_list[list][j].poc == poc){
922 h->map_col_to_list0[list][i] = j;
923 break;
924 }
925 }
926 }
5d18eaad
LM
927 if(FRAME_MBAFF){
928 for(list=0; list<2; list++){
45260d4f 929 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
5d18eaad
LM
930 j = h->map_col_to_list0[list][i];
931 h->map_col_to_list0_field[list][2*i] = 2*j;
932 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
933 }
934 }
935 }
2f944356 936}
5ad984c9
LM
937
938static inline void pred_direct_motion(H264Context * const h, int *mb_type){
939 MpegEncContext * const s = &h->s;
71041842
MN
940 const int fieldoff= (s->picture_structure & h->ref_list[1][0].reference) ? 0 : (3-2*s->picture_structure);
941 const int mb_xy = h->mb_xy + s->mb_stride*fieldoff;
942 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride + 2*h->b8_stride*fieldoff;
943 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h-> b_stride + 4*h-> b_stride*fieldoff;
5ad984c9
LM
944 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
945 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
4866bd2b 946 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
5ad984c9 947 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
2f944356 948 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
5ad984c9 949 const int is_b8x8 = IS_8X8(*mb_type);
88e7a4d1 950 unsigned int sub_mb_type;
5ad984c9
LM
951 int i8, i4;
952
5d18eaad 953#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
5ad984c9
LM
954 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
955 /* FIXME save sub mb types from previous frames (or derive from MVs)
956 * so we know exactly what block size to use */
957 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
3622988f 958 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
5d18eaad 959 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
5ad984c9
LM
960 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
961 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
962 }else{
963 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
3622988f 964 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
5ad984c9
LM
965 }
966 if(!is_b8x8)
967 *mb_type |= MB_TYPE_DIRECT2;
5d18eaad
LM
968 if(MB_FIELD)
969 *mb_type |= MB_TYPE_INTERLACED;
5ad984c9 970
a9c9a240 971 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
115329f1 972
5ad984c9
LM
973 if(h->direct_spatial_mv_pred){
974 int ref[2];
975 int mv[2][2];
976 int list;
977
5d18eaad
LM
978 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
979
5ad984c9
LM
980 /* ref = min(neighbors) */
981 for(list=0; list<2; list++){
982 int refa = h->ref_cache[list][scan8[0] - 1];
983 int refb = h->ref_cache[list][scan8[0] - 8];
984 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
9bec77fe 985 if(refc == PART_NOT_AVAILABLE)
5ad984c9 986 refc = h->ref_cache[list][scan8[0] - 8 - 1];
29d05ebc 987 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
5ad984c9
LM
988 if(ref[list] < 0)
989 ref[list] = -1;
990 }
991
992 if(ref[0] < 0 && ref[1] < 0){
993 ref[0] = ref[1] = 0;
994 mv[0][0] = mv[0][1] =
995 mv[1][0] = mv[1][1] = 0;
996 }else{
997 for(list=0; list<2; list++){
998 if(ref[list] >= 0)
999 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1000 else
1001 mv[list][0] = mv[list][1] = 0;
1002 }
1003 }
1004
1005 if(ref[1] < 0){
50b3ab0f
LM
1006 if(!is_b8x8)
1007 *mb_type &= ~MB_TYPE_L1;
1008 sub_mb_type &= ~MB_TYPE_L1;
5ad984c9 1009 }else if(ref[0] < 0){
50b3ab0f
LM
1010 if(!is_b8x8)
1011 *mb_type &= ~MB_TYPE_L0;
1012 sub_mb_type &= ~MB_TYPE_L0;
5ad984c9
LM
1013 }
1014
50b3ab0f
LM
1015 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1016 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1017 int mb_types_col[2];
1018 int b8_stride = h->b8_stride;
1019 int b4_stride = h->b_stride;
1020
1021 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1022
1023 if(IS_INTERLACED(*mb_type)){
1024 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1025 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1026 if(s->mb_y&1){
1027 l1ref0 -= 2*b8_stride;
1028 l1ref1 -= 2*b8_stride;
1029 l1mv0 -= 4*b4_stride;
1030 l1mv1 -= 4*b4_stride;
1031 }
1032 b8_stride *= 3;
1033 b4_stride *= 6;
1034 }else{
1035 int cur_poc = s->current_picture_ptr->poc;
1036 int *col_poc = h->ref_list[1]->field_poc;
1037 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1038 int dy = 2*col_parity - (s->mb_y&1);
1039 mb_types_col[0] =
1040 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1041 l1ref0 += dy*b8_stride;
1042 l1ref1 += dy*b8_stride;
1043 l1mv0 += 2*dy*b4_stride;
1044 l1mv1 += 2*dy*b4_stride;
1045 b8_stride = 0;
1046 }
1047
1048 for(i8=0; i8<4; i8++){
1049 int x8 = i8&1;
1050 int y8 = i8>>1;
1051 int xy8 = x8+y8*b8_stride;
1052 int xy4 = 3*x8+y8*b4_stride;
1053 int a=0, b=0;
1054
1055 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1056 continue;
1057 h->sub_mb_type[i8] = sub_mb_type;
1058
1059 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1060 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1061 if(!IS_INTRA(mb_types_col[y8])
1062 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1063 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1064 if(ref[0] > 0)
1065 a= pack16to32(mv[0][0],mv[0][1]);
1066 if(ref[1] > 0)
1067 b= pack16to32(mv[1][0],mv[1][1]);
1068 }else{
1069 a= pack16to32(mv[0][0],mv[0][1]);
1070 b= pack16to32(mv[1][0],mv[1][1]);
1071 }
1072 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1073 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1074 }
1075 }else if(IS_16X16(*mb_type)){
d19f5acb
MN
1076 int a=0, b=0;
1077
cec93959
LM
1078 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1079 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
115329f1 1080 if(!IS_INTRA(mb_type_col)
c26abfa5
DB
1081 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1082 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
bf4e3bd2 1083 && (h->x264_build>33 || !h->x264_build)))){
5ad984c9 1084 if(ref[0] > 0)
d19f5acb 1085 a= pack16to32(mv[0][0],mv[0][1]);
5ad984c9 1086 if(ref[1] > 0)
d19f5acb 1087 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1088 }else{
d19f5acb
MN
1089 a= pack16to32(mv[0][0],mv[0][1]);
1090 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1091 }
d19f5acb
MN
1092 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1093 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
5ad984c9
LM
1094 }else{
1095 for(i8=0; i8<4; i8++){
1096 const int x8 = i8&1;
1097 const int y8 = i8>>1;
115329f1 1098
5ad984c9
LM
1099 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1100 continue;
1101 h->sub_mb_type[i8] = sub_mb_type;
115329f1 1102
5ad984c9
LM
1103 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1104 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
cec93959
LM
1105 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1106 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
115329f1 1107
5ad984c9 1108 /* col_zero_flag */
115329f1
DB
1109 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1110 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
bf4e3bd2 1111 && (h->x264_build>33 || !h->x264_build)))){
4866bd2b 1112 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
f1f17e54
LM
1113 if(IS_SUB_8X8(sub_mb_type)){
1114 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
c26abfa5 1115 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
f1f17e54
LM
1116 if(ref[0] == 0)
1117 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1118 if(ref[1] == 0)
1119 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1120 }
1121 }else
5ad984c9 1122 for(i4=0; i4<4; i4++){
4866bd2b 1123 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
c26abfa5 1124 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
5ad984c9
LM
1125 if(ref[0] == 0)
1126 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1127 if(ref[1] == 0)
1128 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1129 }
1130 }
1131 }
1132 }
1133 }
1134 }else{ /* direct temporal mv pred */
5d18eaad
LM
1135 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1136 const int *dist_scale_factor = h->dist_scale_factor;
1137
1138 if(FRAME_MBAFF){
1139 if(IS_INTERLACED(*mb_type)){
1140 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1141 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1142 dist_scale_factor = h->dist_scale_factor_field;
1143 }
1144 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1145 /* FIXME assumes direct_8x8_inference == 1 */
1146 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1147 int mb_types_col[2];
1148 int y_shift;
1149
1150 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1151 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1152 | (*mb_type & MB_TYPE_INTERLACED);
1153 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1154
1155 if(IS_INTERLACED(*mb_type)){
1156 /* frame to field scaling */
1157 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1158 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1159 if(s->mb_y&1){
1160 l1ref0 -= 2*h->b8_stride;
1161 l1ref1 -= 2*h->b8_stride;
1162 l1mv0 -= 4*h->b_stride;
1163 l1mv1 -= 4*h->b_stride;
1164 }
1165 y_shift = 0;
1166
1167 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1168 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1169 && !is_b8x8)
1170 *mb_type |= MB_TYPE_16x8;
1171 else
1172 *mb_type |= MB_TYPE_8x8;
1173 }else{
1174 /* field to frame scaling */
1175 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1176 * but in MBAFF, top and bottom POC are equal */
1177 int dy = (s->mb_y&1) ? 1 : 2;
1178 mb_types_col[0] =
1179 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1180 l1ref0 += dy*h->b8_stride;
1181 l1ref1 += dy*h->b8_stride;
1182 l1mv0 += 2*dy*h->b_stride;
1183 l1mv1 += 2*dy*h->b_stride;
1184 y_shift = 2;
1185
1186 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1187 && !is_b8x8)
1188 *mb_type |= MB_TYPE_16x16;
1189 else
1190 *mb_type |= MB_TYPE_8x8;
1191 }
1192
1193 for(i8=0; i8<4; i8++){
1194 const int x8 = i8&1;
1195 const int y8 = i8>>1;
1196 int ref0, scale;
1197 const int16_t (*l1mv)[2]= l1mv0;
1198
1199 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1200 continue;
1201 h->sub_mb_type[i8] = sub_mb_type;
1202
1203 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1204 if(IS_INTRA(mb_types_col[y8])){
1205 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1206 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1207 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1208 continue;
1209 }
1210
1211 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1212 if(ref0 >= 0)
1213 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1214 else{
1215 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1216 l1mv= l1mv1;
1217 }
1218 scale = dist_scale_factor[ref0];
1219 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1220
1221 {
1222 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1223 int my_col = (mv_col[1]<<y_shift)/2;
1224 int mx = (scale * mv_col[0] + 128) >> 8;
1225 int my = (scale * my_col + 128) >> 8;
1226 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1227 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1228 }
1229 }
1230 return;
1231 }
1232 }
1233
1234 /* one-to-one mv scaling */
1235
5ad984c9 1236 if(IS_16X16(*mb_type)){
fda51641
MN
1237 int ref, mv0, mv1;
1238
5ad984c9
LM
1239 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1240 if(IS_INTRA(mb_type_col)){
fda51641 1241 ref=mv0=mv1=0;
5ad984c9 1242 }else{
5d18eaad
LM
1243 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1244 : map_col_to_list0[1][l1ref1[0]];
1245 const int scale = dist_scale_factor[ref0];
8583bef8 1246 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
5ad984c9 1247 int mv_l0[2];
5d18eaad
LM
1248 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1249 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
fda51641
MN
1250 ref= ref0;
1251 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1252 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
5ad984c9 1253 }
fda51641
MN
1254 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1255 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1256 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
5ad984c9
LM
1257 }else{
1258 for(i8=0; i8<4; i8++){
1259 const int x8 = i8&1;
1260 const int y8 = i8>>1;
5d18eaad 1261 int ref0, scale;
bf4e3bd2 1262 const int16_t (*l1mv)[2]= l1mv0;
8583bef8 1263
5ad984c9
LM
1264 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1265 continue;
1266 h->sub_mb_type[i8] = sub_mb_type;
5d18eaad 1267 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1268 if(IS_INTRA(mb_type_col)){
1269 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1270 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1271 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1272 continue;
1273 }
115329f1 1274
5ad984c9 1275 ref0 = l1ref0[x8 + y8*h->b8_stride];
2f944356 1276 if(ref0 >= 0)
5d18eaad 1277 ref0 = map_col_to_list0[0][ref0];
8583bef8 1278 else{
5d18eaad 1279 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
8583bef8
MN
1280 l1mv= l1mv1;
1281 }
5d18eaad 1282 scale = dist_scale_factor[ref0];
115329f1 1283
5ad984c9 1284 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
f1f17e54
LM
1285 if(IS_SUB_8X8(sub_mb_type)){
1286 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
5d18eaad
LM
1287 int mx = (scale * mv_col[0] + 128) >> 8;
1288 int my = (scale * mv_col[1] + 128) >> 8;
f1f17e54
LM
1289 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1290 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1291 }else
5ad984c9 1292 for(i4=0; i4<4; i4++){
8583bef8 1293 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
5ad984c9 1294 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
5d18eaad
LM
1295 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1296 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
5ad984c9
LM
1297 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1298 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1299 }
1300 }
1301 }
1302 }
1303}
1304
0da71265
MN
1305static inline void write_back_motion(H264Context *h, int mb_type){
1306 MpegEncContext * const s = &h->s;
0da71265
MN
1307 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1308 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1309 int list;
1310
2ea39252
LM
1311 if(!USES_LIST(mb_type, 0))
1312 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1313
3425501d 1314 for(list=0; list<h->list_count; list++){
0da71265 1315 int y;
53b19144 1316 if(!USES_LIST(mb_type, list))
5ad984c9 1317 continue;
115329f1 1318
0da71265
MN
1319 for(y=0; y<4; y++){
1320 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1321 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1322 }
9e528114 1323 if( h->pps.cabac ) {
e6e77eb6
LM
1324 if(IS_SKIP(mb_type))
1325 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1326 else
9e528114
LA
1327 for(y=0; y<4; y++){
1328 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1329 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1330 }
1331 }
53b19144
LM
1332
1333 {
191e8ca7 1334 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
53b19144
LM
1335 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1336 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1337 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1338 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
0da71265
MN
1339 }
1340 }
115329f1 1341
9f5c1037 1342 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
5ad984c9 1343 if(IS_8X8(mb_type)){
53b19144
LM
1344 uint8_t *direct_table = &h->direct_table[b8_xy];
1345 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1346 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1347 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
5ad984c9
LM
1348 }
1349 }
0da71265
MN
1350}
1351
1352/**
1353 * Decodes a network abstraction layer unit.
1354 * @param consumed is the number of bytes used as input
1355 * @param length is the length of the array
3b66c4c5 1356 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
115329f1 1357 * @returns decoded bytes, might be src+1 if no escapes
0da71265 1358 */
30317501 1359static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
1360 int i, si, di;
1361 uint8_t *dst;
24456882 1362 int bufidx;
0da71265 1363
bb270c08 1364// src[0]&0x80; //forbidden bit
0da71265
MN
1365 h->nal_ref_idc= src[0]>>5;
1366 h->nal_unit_type= src[0]&0x1F;
1367
1368 src++; length--;
115329f1 1369#if 0
0da71265
MN
1370 for(i=0; i<length; i++)
1371 printf("%2X ", src[i]);
1372#endif
1373 for(i=0; i+1<length; i+=2){
1374 if(src[i]) continue;
1375 if(i>0 && src[i-1]==0) i--;
1376 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1377 if(src[i+2]!=3){
1378 /* startcode, so we must be past the end */
1379 length=i;
1380 }
1381 break;
1382 }
1383 }
1384
1385 if(i>=length-1){ //no escaped 0
1386 *dst_length= length;
1387 *consumed= length+1; //+1 for the header
115329f1 1388 return src;
0da71265
MN
1389 }
1390
24456882
1391 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1392 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1393 dst= h->rbsp_buffer[bufidx];
0da71265 1394
ac658be5
FOL
1395 if (dst == NULL){
1396 return NULL;
1397 }
1398
3b66c4c5 1399//printf("decoding esc\n");
0da71265 1400 si=di=0;
115329f1 1401 while(si<length){
0da71265
MN
1402 //remove escapes (very rare 1:2^22)
1403 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1404 if(src[si+2]==3){ //escape
1405 dst[di++]= 0;
1406 dst[di++]= 0;
1407 si+=3;
c8470cc1 1408 continue;
0da71265
MN
1409 }else //next start code
1410 break;
1411 }
1412
1413 dst[di++]= src[si++];
1414 }
1415
1416 *dst_length= di;
1417 *consumed= si + 1;//+1 for the header
90b5b51e 1418//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
1419 return dst;
1420}
1421
0da71265
MN
1422/**
1423 * identifies the exact end of the bitstream
1424 * @return the length of the trailing, or 0 if damaged
1425 */
30317501 1426static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
1427 int v= *src;
1428 int r;
1429
a9c9a240 1430 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
1431
1432 for(r=1; r<9; r++){
1433 if(v&1) return r;
1434 v>>=1;
1435 }
1436 return 0;
1437}
1438
1439/**
1412060e 1440 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
1441 * @param qp quantization parameter
1442 */
239ea04c 1443static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1444#define stride 16
1445 int i;
1446 int temp[16]; //FIXME check if this is a good idea
1447 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1448 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1449
1450//memset(block, 64, 2*256);
1451//return;
1452 for(i=0; i<4; i++){
1453 const int offset= y_offset[i];
1454 const int z0= block[offset+stride*0] + block[offset+stride*4];
1455 const int z1= block[offset+stride*0] - block[offset+stride*4];
1456 const int z2= block[offset+stride*1] - block[offset+stride*5];
1457 const int z3= block[offset+stride*1] + block[offset+stride*5];
1458
1459 temp[4*i+0]= z0+z3;
1460 temp[4*i+1]= z1+z2;
1461 temp[4*i+2]= z1-z2;
1462 temp[4*i+3]= z0-z3;
1463 }
1464
1465 for(i=0; i<4; i++){
1466 const int offset= x_offset[i];
1467 const int z0= temp[4*0+i] + temp[4*2+i];
1468 const int z1= temp[4*0+i] - temp[4*2+i];
1469 const int z2= temp[4*1+i] - temp[4*3+i];
1470 const int z3= temp[4*1+i] + temp[4*3+i];
1471
1412060e 1472 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
1473 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1474 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1475 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
1476 }
1477}
1478
e5017ab8 1479#if 0
0da71265 1480/**
1412060e 1481 * DCT transforms the 16 dc values.
0da71265
MN
1482 * @param qp quantization parameter ??? FIXME
1483 */
1484static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1485// const int qmul= dequant_coeff[qp][0];
1486 int i;
1487 int temp[16]; //FIXME check if this is a good idea
1488 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1489 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1490
1491 for(i=0; i<4; i++){
1492 const int offset= y_offset[i];
1493 const int z0= block[offset+stride*0] + block[offset+stride*4];
1494 const int z1= block[offset+stride*0] - block[offset+stride*4];
1495 const int z2= block[offset+stride*1] - block[offset+stride*5];
1496 const int z3= block[offset+stride*1] + block[offset+stride*5];
1497
1498 temp[4*i+0]= z0+z3;
1499 temp[4*i+1]= z1+z2;
1500 temp[4*i+2]= z1-z2;
1501 temp[4*i+3]= z0-z3;
1502 }
1503
1504 for(i=0; i<4; i++){
1505 const int offset= x_offset[i];
1506 const int z0= temp[4*0+i] + temp[4*2+i];
1507 const int z1= temp[4*0+i] - temp[4*2+i];
1508 const int z2= temp[4*1+i] - temp[4*3+i];
1509 const int z3= temp[4*1+i] + temp[4*3+i];
1510
1511 block[stride*0 +offset]= (z0 + z3)>>1;
1512 block[stride*2 +offset]= (z1 + z2)>>1;
1513 block[stride*8 +offset]= (z1 - z2)>>1;
1514 block[stride*10+offset]= (z0 - z3)>>1;
1515 }
1516}
e5017ab8
LA
1517#endif
1518
0da71265
MN
1519#undef xStride
1520#undef stride
1521
239ea04c 1522static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1523 const int stride= 16*2;
1524 const int xStride= 16;
1525 int a,b,c,d,e;
1526
1527 a= block[stride*0 + xStride*0];
1528 b= block[stride*0 + xStride*1];
1529 c= block[stride*1 + xStride*0];
1530 d= block[stride*1 + xStride*1];
1531
1532 e= a-b;
1533 a= a+b;
1534 b= c-d;
1535 c= c+d;
1536
239ea04c
LM
1537 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1538 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1539 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1540 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
1541}
1542
e5017ab8 1543#if 0
0da71265
MN
1544static void chroma_dc_dct_c(DCTELEM *block){
1545 const int stride= 16*2;
1546 const int xStride= 16;
1547 int a,b,c,d,e;
1548
1549 a= block[stride*0 + xStride*0];
1550 b= block[stride*0 + xStride*1];
1551 c= block[stride*1 + xStride*0];
1552 d= block[stride*1 + xStride*1];
1553
1554 e= a-b;
1555 a= a+b;
1556 b= c-d;
1557 c= c+d;
1558
1559 block[stride*0 + xStride*0]= (a+c);
1560 block[stride*0 + xStride*1]= (e+b);
1561 block[stride*1 + xStride*0]= (a-c);
1562 block[stride*1 + xStride*1]= (e-b);
1563}
e5017ab8 1564#endif
0da71265
MN
1565
1566/**
1567 * gets the chroma qp.
1568 */
4691a77d 1569static inline int get_chroma_qp(H264Context *h, int t, int qscale){
5a78bfbd 1570 return h->pps.chroma_qp_table[t][qscale];
0da71265
MN
1571}
1572
2cab6401 1573//FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
0afd2a92
DB
1574//FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1575static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
0da71265
MN
1576 int i;
1577 const int * const quant_table= quant_coeff[qscale];
1578 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1579 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1580 const unsigned int threshold2= (threshold1<<1);
1581 int last_non_zero;
1582
0afd2a92 1583 if(separate_dc){
0da71265
MN
1584 if(qscale<=18){
1585 //avoid overflows
1586 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1587 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1588 const unsigned int dc_threshold2= (dc_threshold1<<1);
1589
1590 int level= block[0]*quant_coeff[qscale+18][0];
1591 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1592 if(level>0){
1593 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1594 block[0]= level;
1595 }else{
1596 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1597 block[0]= -level;
1598 }
1599// last_non_zero = i;
1600 }else{
1601 block[0]=0;
1602 }
1603 }else{
1604 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1605 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1606 const unsigned int dc_threshold2= (dc_threshold1<<1);
1607
1608 int level= block[0]*quant_table[0];
1609 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1610 if(level>0){
1611 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1612 block[0]= level;
1613 }else{
1614 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1615 block[0]= -level;
1616 }
1617// last_non_zero = i;
1618 }else{
1619 block[0]=0;
1620 }
1621 }
1622 last_non_zero= 0;
1623 i=1;
1624 }else{
1625 last_non_zero= -1;
1626 i=0;
1627 }
1628
1629 for(; i<16; i++){
1630 const int j= scantable[i];
1631 int level= block[j]*quant_table[j];
1632
1633// if( bias+level >= (1<<(QMAT_SHIFT - 3))
1634// || bias-level >= (1<<(QMAT_SHIFT - 3))){
1635 if(((unsigned)(level+threshold1))>threshold2){
1636 if(level>0){
1637 level= (bias + level)>>QUANT_SHIFT;
1638 block[j]= level;
1639 }else{
1640 level= (bias - level)>>QUANT_SHIFT;
1641 block[j]= -level;
1642 }
1643 last_non_zero = i;
1644 }else{
1645 block[j]=0;
1646 }
1647 }
1648
1649 return last_non_zero;
1650}
1651
0da71265
MN
1652static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1653 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1654 int src_x_offset, int src_y_offset,
1655 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1656 MpegEncContext * const s = &h->s;
1657 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 1658 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 1659 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
1660 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1661 uint8_t * src_cb, * src_cr;
1662 int extra_width= h->emu_edge_width;
1663 int extra_height= h->emu_edge_height;
0da71265
MN
1664 int emu=0;
1665 const int full_mx= mx>>2;
1666 const int full_my= my>>2;
fbd312fd 1667 const int pic_width = 16*s->mb_width;
0d43dd8c 1668 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 1669
1412060e 1670 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
171c4076 1671 return;
115329f1 1672
0da71265
MN
1673 if(mx&7) extra_width -= 3;
1674 if(my&7) extra_height -= 3;
115329f1
DB
1675
1676 if( full_mx < 0-extra_width
1677 || full_my < 0-extra_height
1678 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 1679 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
1680 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1681 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
1682 emu=1;
1683 }
115329f1 1684
5d18eaad 1685 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 1686 if(!square){
5d18eaad 1687 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 1688 }
115329f1 1689
87352549 1690 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 1691
0d43dd8c 1692 if(MB_FIELD){
5d18eaad 1693 // chroma offset when predicting from a field of opposite parity
2143b118 1694 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
1695 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1696 }
1697 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1698 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1699
0da71265 1700 if(emu){
5d18eaad 1701 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1702 src_cb= s->edge_emu_buffer;
1703 }
5d18eaad 1704 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1705
1706 if(emu){
5d18eaad 1707 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1708 src_cr= s->edge_emu_buffer;
1709 }
5d18eaad 1710 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1711}
1712
9f2d1b4f 1713static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
1714 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1715 int x_offset, int y_offset,
1716 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1717 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1718 int list0, int list1){
1719 MpegEncContext * const s = &h->s;
1720 qpel_mc_func *qpix_op= qpix_put;
1721 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 1722
5d18eaad
LM
1723 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1724 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1725 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 1726 x_offset += 8*s->mb_x;
0d43dd8c 1727 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1728
0da71265 1729 if(list0){
1924f3ce 1730 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
1731 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1732 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1733 qpix_op, chroma_op);
1734
1735 qpix_op= qpix_avg;
1736 chroma_op= chroma_avg;
1737 }
1738
1739 if(list1){
1924f3ce 1740 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
1741 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1742 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1743 qpix_op, chroma_op);
1744 }
1745}
1746
9f2d1b4f
LM
1747static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1748 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1749 int x_offset, int y_offset,
1750 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1751 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1752 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1753 int list0, int list1){
1754 MpegEncContext * const s = &h->s;
1755
5d18eaad
LM
1756 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1757 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1758 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 1759 x_offset += 8*s->mb_x;
0d43dd8c 1760 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1761
9f2d1b4f
LM
1762 if(list0 && list1){
1763 /* don't optimize for luma-only case, since B-frames usually
1764 * use implicit weights => chroma too. */
1765 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
1766 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1767 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
1768 int refn0 = h->ref_cache[0][ scan8[n] ];
1769 int refn1 = h->ref_cache[1][ scan8[n] ];
1770
1771 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1772 dest_y, dest_cb, dest_cr,
1773 x_offset, y_offset, qpix_put, chroma_put);
1774 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1775 tmp_y, tmp_cb, tmp_cr,
1776 x_offset, y_offset, qpix_put, chroma_put);
1777
1778 if(h->use_weight == 2){
1779 int weight0 = h->implicit_weight[refn0][refn1];
1780 int weight1 = 64 - weight0;
5d18eaad
LM
1781 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1782 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1783 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 1784 }else{
5d18eaad 1785 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 1786 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 1787 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 1788 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1789 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 1790 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 1791 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1792 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 1793 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
1794 }
1795 }else{
1796 int list = list1 ? 1 : 0;
1797 int refn = h->ref_cache[list][ scan8[n] ];
1798 Picture *ref= &h->ref_list[list][refn];
1799 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1800 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1801 qpix_put, chroma_put);
1802
5d18eaad 1803 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
1804 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1805 if(h->use_weight_chroma){
5d18eaad 1806 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 1807 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 1808 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
1809 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1810 }
1811 }
1812}
1813
1814static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1815 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1816 int x_offset, int y_offset,
1817 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1818 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 1819 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
1820 int list0, int list1){
1821 if((h->use_weight==2 && list0 && list1
1822 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1823 || h->use_weight==1)
1824 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1825 x_offset, y_offset, qpix_put, chroma_put,
1826 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1827 else
1828 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1829 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1830}
1831
513fbd8e
LM
1832static inline void prefetch_motion(H264Context *h, int list){
1833 /* fetch pixels for estimated mv 4 macroblocks ahead
1834 * optimized for 64byte cache lines */
1835 MpegEncContext * const s = &h->s;
1836 const int refn = h->ref_cache[list][scan8[0]];
1837 if(refn >= 0){
1838 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1839 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1840 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 1841 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
1842 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1843 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1844 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1845 }
1846}
1847
0da71265
MN
1848static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1849 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
1850 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1851 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 1852 MpegEncContext * const s = &h->s;
64514ee8 1853 const int mb_xy= h->mb_xy;
0da71265 1854 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 1855
0da71265 1856 assert(IS_INTER(mb_type));
115329f1 1857
513fbd8e
LM
1858 prefetch_motion(h, 0);
1859
0da71265
MN
1860 if(IS_16X16(mb_type)){
1861 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1862 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
9f2d1b4f 1863 &weight_op[0], &weight_avg[0],
0da71265
MN
1864 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1865 }else if(IS_16X8(mb_type)){
1866 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1867 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1868 &weight_op[1], &weight_avg[1],
0da71265
MN
1869 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1870 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1871 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1872 &weight_op[1], &weight_avg[1],
0da71265
MN
1873 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1874 }else if(IS_8X16(mb_type)){
5d18eaad 1875 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 1876 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1877 &weight_op[2], &weight_avg[2],
0da71265 1878 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 1879 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 1880 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1881 &weight_op[2], &weight_avg[2],
0da71265
MN
1882 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1883 }else{
1884 int i;
115329f1 1885
0da71265
MN
1886 assert(IS_8X8(mb_type));
1887
1888 for(i=0; i<4; i++){
1889 const int sub_mb_type= h->sub_mb_type[i];
1890 const int n= 4*i;
1891 int x_offset= (i&1)<<2;
1892 int y_offset= (i&2)<<1;
1893
1894 if(IS_SUB_8X8(sub_mb_type)){
1895 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1896 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1897 &weight_op[3], &weight_avg[3],
0da71265
MN
1898 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1899 }else if(IS_SUB_8X4(sub_mb_type)){
1900 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1901 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1902 &weight_op[4], &weight_avg[4],
0da71265
MN
1903 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1904 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1905 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1906 &weight_op[4], &weight_avg[4],
0da71265
MN
1907 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1908 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 1909 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 1910 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1911 &weight_op[5], &weight_avg[5],
0da71265 1912 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 1913 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 1914 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1915 &weight_op[5], &weight_avg[5],
0da71265
MN
1916 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1917 }else{
1918 int j;
1919 assert(IS_SUB_4X4(sub_mb_type));
1920 for(j=0; j<4; j++){
1921 int sub_x_offset= x_offset + 2*(j&1);
1922 int sub_y_offset= y_offset + (j&2);
1923 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1924 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1925 &weight_op[6], &weight_avg[6],
0da71265
MN
1926 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1927 }
1928 }
1929 }
1930 }
513fbd8e
LM
1931
1932 prefetch_motion(h, 1);
0da71265
MN
1933}
1934
98a6fff9 1935static av_cold void decode_init_vlc(void){
0da71265
MN
1936 static int done = 0;
1937
1938 if (!done) {
1939 int i;
1940 done = 1;
1941
115329f1 1942 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
0da71265 1943 &chroma_dc_coeff_token_len [0], 1, 1,
073c2593 1944 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
0da71265
MN
1945
1946 for(i=0; i<4; i++){
115329f1 1947 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
0da71265 1948 &coeff_token_len [i][0], 1, 1,
073c2593 1949 &coeff_token_bits[i][0], 1, 1, 1);
0da71265
MN
1950 }
1951
1952 for(i=0; i<3; i++){
1953 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1954 &chroma_dc_total_zeros_len [i][0], 1, 1,
073c2593 1955 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
0da71265
MN
1956 }
1957 for(i=0; i<15; i++){
115329f1 1958 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
0da71265 1959 &total_zeros_len [i][0], 1, 1,
073c2593 1960 &total_zeros_bits[i][0], 1, 1, 1);
0da71265
MN
1961 }
1962
1963 for(i=0; i<6; i++){
115329f1 1964 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
0da71265 1965 &run_len [i][0], 1, 1,
073c2593 1966 &run_bits[i][0], 1, 1, 1);
0da71265 1967 }
115329f1 1968 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
0da71265 1969 &run_len [6][0], 1, 1,
073c2593 1970 &run_bits[6][0], 1, 1, 1);
0da71265
MN
1971 }
1972}
1973
0da71265 1974static void free_tables(H264Context *h){
7978debd 1975 int i;
afebe2f7 1976 H264Context *hx;
0da71265 1977 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
1978 av_freep(&h->chroma_pred_mode_table);
1979 av_freep(&h->cbp_table);
9e528114
LA
1980 av_freep(&h->mvd_table[0]);
1981 av_freep(&h->mvd_table[1]);
5ad984c9 1982 av_freep(&h->direct_table);
0da71265
MN
1983 av_freep(&h->non_zero_count);
1984 av_freep(&h->slice_table_base);
1985 h->slice_table= NULL;
e5017ab8 1986
0da71265
MN
1987 av_freep(&h->mb2b_xy);
1988 av_freep(&h->mb2b8_xy);
9f2d1b4f 1989
7978debd
1990 for(i = 0; i < MAX_SPS_COUNT; i++)
1991 av_freep(h->sps_buffers + i);
1992
1993 for(i = 0; i < MAX_PPS_COUNT; i++)
1994 av_freep(h->pps_buffers + i);
afebe2f7
1995
1996 for(i = 0; i < h->s.avctx->thread_count; i++) {
1997 hx = h->thread_context[i];
1998 if(!hx) continue;
1999 av_freep(&hx->top_borders[1]);
2000 av_freep(&hx->top_borders[0]);
2001 av_freep(&hx->s.obmc_scratchpad);
afebe2f7 2002 }
0da71265
MN
2003}
2004
239ea04c
LM
2005static void init_dequant8_coeff_table(H264Context *h){
2006 int i,q,x;
548a1c8a 2007 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
2008 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2009 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2010
2011 for(i=0; i<2; i++ ){
2012 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2013 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2014 break;
2015 }
2016
2017 for(q=0; q<52; q++){
acd8d10f
PI
2018 int shift = ff_div6[q];
2019 int idx = ff_rem6[q];
239ea04c 2020 for(x=0; x<64; x++)
548a1c8a
LM
2021 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2022 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2023 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
2024 }
2025 }
2026}
2027
2028static void init_dequant4_coeff_table(H264Context *h){
2029 int i,j,q,x;
ab2e3e2c 2030 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
2031 for(i=0; i<6; i++ ){
2032 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2033 for(j=0; j<i; j++){
2034 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2035 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2036 break;
2037 }
2038 }
2039 if(j<i)
2040 continue;
2041
2042 for(q=0; q<52; q++){
acd8d10f
PI
2043 int shift = ff_div6[q] + 2;
2044 int idx = ff_rem6[q];
239ea04c 2045 for(x=0; x<16; x++)
ab2e3e2c
LM
2046 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2047 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
2048 h->pps.scaling_matrix4[i][x]) << shift;
2049 }
2050 }
2051}
2052
2053static void init_dequant_tables(H264Context *h){
2054 int i,x;
2055 init_dequant4_coeff_table(h);
2056 if(h->pps.transform_8x8_mode)
2057 init_dequant8_coeff_table(h);
2058 if(h->sps.transform_bypass){
2059 for(i=0; i<6; i++)
2060 for(x=0; x<16; x++)
2061 h->dequant4_coeff[i][0][x] = 1<<6;
2062 if(h->pps.transform_8x8_mode)
2063 for(i=0; i<2; i++)
2064 for(x=0; x<64; x++)
2065 h->dequant8_coeff[i][0][x] = 1<<6;
2066 }
2067}
2068
2069
0da71265
MN
2070/**
2071 * allocates tables.
3b66c4c5 2072 * needs width/height
0da71265
MN
2073 */
2074static int alloc_tables(H264Context *h){
2075 MpegEncContext * const s = &h->s;
7bc9090a 2076 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 2077 int x,y;
0da71265
MN
2078
2079 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
e5017ab8 2080
53c05b1e 2081 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
5d18eaad 2082 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
5d0e4cb8 2083 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
0da71265 2084
7526ade2
MN
2085 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2086 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2087 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2088 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
e5017ab8 2089
5d18eaad
LM
2090 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2091 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 2092
a55f20bd
LM
2093 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2094 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
0da71265
MN
2095 for(y=0; y<s->mb_height; y++){
2096 for(x=0; x<s->mb_width; x++){
7bc9090a 2097 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
2098 const int b_xy = 4*x + 4*y*h->b_stride;
2099 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 2100
0da71265
MN
2101 h->mb2b_xy [mb_xy]= b_xy;
2102 h->mb2b8_xy[mb_xy]= b8_xy;
2103 }
2104 }
9f2d1b4f 2105
9c6221ae
GV
2106 s->obmc_scratchpad = NULL;
2107
56edbd81
LM
2108 if(!h->dequant4_coeff[0])
2109 init_dequant_tables(h);
2110
0da71265
MN
2111 return 0;
2112fail:
2113 free_tables(h);
2114 return -1;
2115}
2116
afebe2f7
2117/**
2118 * Mimic alloc_tables(), but for every context thread.
2119 */
2120static void clone_tables(H264Context *dst, H264Context *src){
2121 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2122 dst->non_zero_count = src->non_zero_count;
2123 dst->slice_table = src->slice_table;
2124 dst->cbp_table = src->cbp_table;
2125 dst->mb2b_xy = src->mb2b_xy;
2126 dst->mb2b8_xy = src->mb2b8_xy;
2127 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2128 dst->mvd_table[0] = src->mvd_table[0];
2129 dst->mvd_table[1] = src->mvd_table[1];
2130 dst->direct_table = src->direct_table;
2131
afebe2f7
2132 dst->s.obmc_scratchpad = NULL;
2133 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
2134}
2135
2136/**
2137 * Init context
2138 * Allocate buffers which are not shared amongst multiple threads.
2139 */
2140static int context_init(H264Context *h){
afebe2f7
2141 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2142 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2143
afebe2f7
2144 return 0;
2145fail:
2146 return -1; // free_tables will clean up for us
2147}
2148
98a6fff9 2149static av_cold void common_init(H264Context *h){
0da71265 2150 MpegEncContext * const s = &h->s;
0da71265
MN
2151
2152 s->width = s->avctx->width;
2153 s->height = s->avctx->height;
2154 s->codec_id= s->avctx->codec->id;
115329f1 2155
c92a30bb 2156 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 2157
239ea04c 2158 h->dequant_coeff_pps= -1;
9a41c2c7 2159 s->unrestricted_mv=1;
0da71265 2160 s->decode=1; //FIXME
56edbd81
LM
2161
2162 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2163 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
2164}
2165
98a6fff9 2166static av_cold int decode_init(AVCodecContext *avctx){
0da71265
MN
2167 H264Context *h= avctx->priv_data;
2168 MpegEncContext * const s = &h->s;
2169
3edcacde 2170 MPV_decode_defaults(s);
115329f1 2171
0da71265
MN
2172 s->avctx = avctx;
2173 common_init(h);
2174
2175 s->out_format = FMT_H264;
2176 s->workaround_bugs= avctx->workaround_bugs;
2177
2178 // set defaults
0da71265 2179// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 2180 s->quarter_sample = 1;
0da71265 2181 s->low_delay= 1;
7a9dba3c
MN
2182
2183 if(avctx->codec_id == CODEC_ID_SVQ3)
2184 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2185 else
1d42f410 2186 avctx->pix_fmt= PIX_FMT_YUV420P;
0da71265 2187
c2212338 2188 decode_init_vlc();
115329f1 2189
26165f99
MR
2190 if(avctx->extradata_size > 0 && avctx->extradata &&
2191 *(char *)avctx->extradata == 1){
4770b1b4
RT
2192 h->is_avc = 1;
2193 h->got_avcC = 0;
26165f99
MR
2194 } else {
2195 h->is_avc = 0;
4770b1b4
RT
2196 }
2197
afebe2f7 2198 h->thread_context[0] = h;
18c7be65 2199 h->outputed_poc = INT_MIN;
0da71265
MN
2200 return 0;
2201}
2202
af8aa846 2203static int frame_start(H264Context *h){
0da71265
MN
2204 MpegEncContext * const s = &h->s;
2205 int i;
2206
af8aa846
MN
2207 if(MPV_frame_start(s, s->avctx) < 0)
2208 return -1;
0da71265 2209 ff_er_frame_start(s);
3a22d7fa
JD
2210 /*
2211 * MPV_frame_start uses pict_type to derive key_frame.
2212 * This is incorrect for H.264; IDR markings must be used.
1412060e 2213 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
2214 * See decode_nal_units().
2215 */
2216 s->current_picture_ptr->key_frame= 0;
0da71265
MN
2217
2218 assert(s->linesize && s->uvlinesize);
2219
2220 for(i=0; i<16; i++){
2221 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 2222 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2223 }
2224 for(i=0; i<4; i++){
2225 h->block_offset[16+i]=
2226 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
2227 h->block_offset[24+16+i]=
2228 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2229 }
2230
934b0821
LM
2231 /* can't be in alloc_tables because linesize isn't known there.
2232 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
2233 for(i = 0; i < s->avctx->thread_count; i++)
2234 if(!h->thread_context[i]->s.obmc_scratchpad)
2235 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
2236
2237 /* some macroblocks will be accessed before they're available */
afebe2f7 2238 if(FRAME_MBAFF || s->avctx->thread_count > 1)
5d18eaad 2239 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
934b0821 2240
0da71265 2241// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 2242
1412060e 2243 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
2244 // that if we break out due to an error it can be released automatically
2245 // in the next MPV_frame_start().
2246 // SVQ3 as well as most other codecs have only last/next/current and thus
2247 // get released even with set reference, besides SVQ3 and others do not
2248 // mark frames as reference later "naturally".
2249 if(s->codec_id != CODEC_ID_SVQ3)
2250 s->current_picture_ptr->reference= 0;
357282c6
MN
2251
2252 s->current_picture_ptr->field_poc[0]=
2253 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 2254 assert(s->current_picture_ptr->long_ref==0);
357282c6 2255
af8aa846 2256 return 0;
0da71265
MN
2257}
2258
93cc10fa 2259static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e
MN
2260 MpegEncContext * const s = &h->s;
2261 int i;
115329f1 2262
53c05b1e
MN
2263 src_y -= linesize;
2264 src_cb -= uvlinesize;
2265 src_cr -= uvlinesize;
2266
3b66c4c5 2267 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 2268 // and the line above the bottom macroblock
6ba71fc4 2269 h->left_border[0]= h->top_borders[0][s->mb_x][15];
53c05b1e
MN
2270 for(i=1; i<17; i++){
2271 h->left_border[i]= src_y[15+i* linesize];
2272 }
115329f1 2273
6ba71fc4
LLL
2274 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2275 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
53c05b1e 2276
87352549 2277 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
6ba71fc4
LLL
2278 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2279 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
53c05b1e
MN
2280 for(i=1; i<9; i++){
2281 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2282 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2283 }
6ba71fc4
LLL
2284 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
53c05b1e
MN
2286 }
2287}
2288
93cc10fa 2289static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
2290 MpegEncContext * const s = &h->s;
2291 int temp8, i;
2292 uint64_t temp64;
b69378e2
2293 int deblock_left;
2294 int deblock_top;
2295 int mb_xy;
2296
2297 if(h->deblocking_filter == 2) {
64514ee8 2298 mb_xy = h->mb_xy;
b69378e2
2299 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2300 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2301 } else {
2302 deblock_left = (s->mb_x > 0);
2303 deblock_top = (s->mb_y > 0);
2304 }
53c05b1e
MN
2305
2306 src_y -= linesize + 1;
2307 src_cb -= uvlinesize + 1;
2308 src_cr -= uvlinesize + 1;
2309
2310#define XCHG(a,b,t,xchg)\
2311t= a;\
2312if(xchg)\
2313 a= b;\
2314b= t;
d89dc06a
LM
2315
2316 if(deblock_left){
2317 for(i = !deblock_top; i<17; i++){
2318 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2319 }
2320 }
2321
2322 if(deblock_top){
6ba71fc4
LLL
2323 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2324 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
cad4368a 2325 if(s->mb_x+1 < s->mb_width){
43efd19a
LM
2326 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2327 }
53c05b1e 2328 }
53c05b1e 2329
87352549 2330 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a
LM
2331 if(deblock_left){
2332 for(i = !deblock_top; i<9; i++){
2333 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2334 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2335 }
2336 }
2337 if(deblock_top){
6ba71fc4
LLL
2338 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2339 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2340 }
2341 }
2342}
2343
2344static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2345 MpegEncContext * const s = &h->s;
2346 int i;
115329f1 2347
6ba71fc4
LLL
2348 src_y -= 2 * linesize;
2349 src_cb -= 2 * uvlinesize;
2350 src_cr -= 2 * uvlinesize;
2351
3b66c4c5 2352 // There are two lines saved, the line above the the top macroblock of a pair,
6ba71fc4
LLL
2353 // and the line above the bottom macroblock
2354 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2355 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2356 for(i=2; i<34; i++){
2357 h->left_border[i]= src_y[15+i* linesize];
2358 }
115329f1 2359
6ba71fc4
LLL
2360 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2361 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2362 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2363 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2364
87352549 2365 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
6ba71fc4
LLL
2366 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2367 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2368 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2369 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2370 for(i=2; i<18; i++){
2371 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2372 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2373 }
2374 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2375 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2376 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2377 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2378 }
2379}
2380
2381static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2382 MpegEncContext * const s = &h->s;
2383 int temp8, i;
2384 uint64_t temp64;
2385 int deblock_left = (s->mb_x > 0);
5d18eaad 2386 int deblock_top = (s->mb_y > 1);
6ba71fc4 2387
a9c9a240 2388 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
6ba71fc4
LLL
2389
2390 src_y -= 2 * linesize + 1;
2391 src_cb -= 2 * uvlinesize + 1;
2392 src_cr -= 2 * uvlinesize + 1;
2393
2394#define XCHG(a,b,t,xchg)\
2395t= a;\
2396if(xchg)\
2397 a= b;\
2398b= t;
2399
2400 if(deblock_left){
2401 for(i = (!deblock_top)<<1; i<34; i++){
2402 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2403 }
2404 }
2405
2406 if(deblock_top){
2407 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2408 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2409 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2410 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
5d18eaad
LM
2411 if(s->mb_x+1 < s->mb_width){
2412 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2413 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2414 }
6ba71fc4
LLL
2415 }
2416
87352549 2417 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
6ba71fc4
LLL
2418 if(deblock_left){
2419 for(i = (!deblock_top) << 1; i<18; i++){
2420 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2421 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2422 }
2423 }
2424 if(deblock_top){
2425 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2426 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2427 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2428 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
53c05b1e 2429 }
53c05b1e
MN
2430 }
2431}
2432
5a6a6cc7 2433static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
2434 MpegEncContext * const s = &h->s;
2435 const int mb_x= s->mb_x;
2436 const int mb_y= s->mb_y;
64514ee8 2437 const int mb_xy= h->mb_xy;
0da71265
MN
2438 const int mb_type= s->current_picture.mb_type[mb_xy];
2439 uint8_t *dest_y, *dest_cb, *dest_cr;
2440 int linesize, uvlinesize /*dct_offset*/;
2441 int i;
6867a90b 2442 int *block_offset = &h->block_offset[0];
6ba71fc4 2443 const unsigned int bottom = mb_y & 1;
bd91fee3 2444 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
36940eca 2445 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 2446 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 2447
0da71265
MN
2448 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2449 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2450 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2451
a957c27b
LM
2452 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2453 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2454
bd91fee3 2455 if (!simple && MB_FIELD) {
5d18eaad
LM
2456 linesize = h->mb_linesize = s->linesize * 2;
2457 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 2458 block_offset = &h->block_offset[24];
1412060e 2459 if(mb_y&1){ //FIXME move out of this function?
0da71265 2460 dest_y -= s->linesize*15;
6867a90b
LLL
2461 dest_cb-= s->uvlinesize*7;
2462 dest_cr-= s->uvlinesize*7;
0da71265 2463 }
5d18eaad
LM
2464 if(FRAME_MBAFF) {
2465 int list;
3425501d 2466 for(list=0; list<h->list_count; list++){
5d18eaad
LM
2467 if(!USES_LIST(mb_type, list))
2468 continue;
2469 if(IS_16X16(mb_type)){
2470 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 2471 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
2472 }else{
2473 for(i=0; i<16; i+=4){
2474 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2475 int ref = h->ref_cache[list][scan8[i]];
2476 if(ref >= 0)
1710856c 2477 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
2478 }
2479 }
2480 }
2481 }
0da71265 2482 } else {
5d18eaad
LM
2483 linesize = h->mb_linesize = s->linesize;
2484 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
2485// dct_offset = s->linesize * 16;
2486 }
115329f1 2487
ef9d1d15
LM
2488 if(transform_bypass){
2489 idct_dc_add =
2490 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2491 }else if(IS_8x8DCT(mb_type)){
2492 idct_dc_add = s->dsp.h264_idct8_dc_add;
2493 idct_add = s->dsp.h264_idct8_add;
2494 }else{
2495 idct_dc_add = s->dsp.h264_idct_dc_add;
2496 idct_add = s->dsp.h264_idct_add;
2497 }
0da71265 2498
bd91fee3 2499 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
5d18eaad
LM
2500 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2501 int mbt_y = mb_y&~1;
2502 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2503 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2504 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2505 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2506 }
2507
bd91fee3 2508 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
2509 for (i=0; i<16; i++) {
2510 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 2511 }
c1708e8d
MN
2512 for (i=0; i<8; i++) {
2513 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2514 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 2515 }
e7e09b49
LLL
2516 } else {
2517 if(IS_INTRA(mb_type)){
bd91fee3 2518 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
93cc10fa 2519 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 2520
87352549 2521 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
2522 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2523 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 2524 }
0da71265 2525
e7e09b49 2526 if(IS_INTRA4x4(mb_type)){
bd91fee3 2527 if(simple || !s->encoding){
43efd19a
LM
2528 if(IS_8x8DCT(mb_type)){
2529 for(i=0; i<16; i+=4){
2530 uint8_t * const ptr= dest_y + block_offset[i];
2531 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
ef9d1d15 2532 const int nnz = h->non_zero_count_cache[ scan8[i] ];
c92a30bb 2533 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
4672503d 2534 (h->topright_samples_available<<i)&0x4000, linesize);
ef9d1d15
LM
2535 if(nnz){
2536 if(nnz == 1 && h->mb[i*16])
2537 idct_dc_add(ptr, h->mb + i*16, linesize);
2538 else
2539 idct_add(ptr, h->mb + i*16, linesize);
2540 }
43efd19a
LM
2541 }
2542 }else
e7e09b49 2543 for(i=0; i<16; i++){
6867a90b 2544 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2545 uint8_t *topright;
2546 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
ef9d1d15 2547 int nnz, tr;
e7e09b49
LLL
2548
2549 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2550 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
6867a90b 2551 assert(mb_y || linesize <= block_offset[i]);
e7e09b49
LLL
2552 if(!topright_avail){
2553 tr= ptr[3 - linesize]*0x01010101;
2554 topright= (uint8_t*) &tr;
115329f1 2555 }else
e7e09b49 2556 topright= ptr + 4 - linesize;
a9799653 2557 }else
e7e09b49
LLL
2558 topright= NULL;
2559
c92a30bb 2560 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
ef9d1d15
LM
2561 nnz = h->non_zero_count_cache[ scan8[i] ];
2562 if(nnz){
bd91fee3 2563 if(is_h264){
ef9d1d15
LM
2564 if(nnz == 1 && h->mb[i*16])
2565 idct_dc_add(ptr, h->mb + i*16, linesize);
2566 else
2567 idct_add(ptr, h->mb + i*16, linesize);
2568 }else
e7e09b49
LLL
2569 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2570 }
8b82a956 2571 }
0da71265 2572 }
e7e09b49 2573 }else{
c92a30bb 2574 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 2575 if(is_h264){
36940eca 2576 if(!transform_bypass)
93f0c0a4 2577 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 2578 }else
e7e09b49 2579 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 2580 }
bd91fee3 2581 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
93cc10fa 2582 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 2583 }else if(is_h264){
e7e09b49 2584 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
2585 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2586 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 2587 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 2588 }
e7e09b49
LLL
2589
2590
2591 if(!IS_INTRA4x4(mb_type)){
bd91fee3 2592 if(is_h264){
ef9d1d15
LM
2593 if(IS_INTRA16x16(mb_type)){
2594 for(i=0; i<16; i++){
2595 if(h->non_zero_count_cache[ scan8[i] ])
2596 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2597 else if(h->mb[i*16])
2598 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2599 }
2600 }else{
2601 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2602 for(i=0; i<16; i+=di){
2603 int nnz = h->non_zero_count_cache[ scan8[i] ];
2604 if(nnz){
2605 if(nnz==1 && h->mb[i*16])
2606 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2607 else
2608 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2609 }
e7e09b49 2610 }
4704097a 2611 }
e7e09b49
LLL
2612 }else{
2613 for(i=0; i<16; i++){
2614 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 2615 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2616 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2617 }
4704097a 2618 }
0da71265
MN
2619 }
2620 }
0da71265 2621
87352549 2622 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
ef9d1d15
LM
2623 uint8_t *dest[2] = {dest_cb, dest_cr};
2624 if(transform_bypass){
2625 idct_add = idct_dc_add = s->dsp.add_pixels4;
2626 }else{
2627 idct_add = s->dsp.h264_idct_add;
2628 idct_dc_add = s->dsp.h264_idct_dc_add;
4691a77d
2629 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2630 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
36940eca 2631 }
bd91fee3 2632 if(is_h264){
ef9d1d15
LM
2633 for(i=16; i<16+8; i++){
2634 if(h->non_zero_count_cache[ scan8[i] ])
2635 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2636 else if(h->mb[i*16])
2637 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
4704097a 2638 }
e7e09b49 2639 }else{
ef9d1d15 2640 for(i=16; i<16+8; i++){
e7e09b49 2641 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
ef9d1d15 2642 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
e7e09b49
LLL
2643 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2644 }
4704097a 2645 }
0da71265
MN
2646 }
2647 }
2648 }
53c05b1e 2649 if(h->deblocking_filter) {
bd91fee3 2650 if (!simple && FRAME_MBAFF) {
5d18eaad
LM
2651 //FIXME try deblocking one mb at a time?
2652 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
6ba71fc4
LLL
2653 const int mb_y = s->mb_y - 1;
2654 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2655 const int mb_xy= mb_x + mb_y*s->mb_stride;
2656 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2657 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
6ba71fc4
LLL
2658 if (!bottom) return;
2659 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2660 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2661 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2662
5d18eaad
LM
2663 if(IS_INTRA(mb_type_top | mb_type_bottom))
2664 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2665
6ba71fc4 2666 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
5d18eaad 2667 // deblock a pair
115329f1 2668 // top
64514ee8 2669 s->mb_y--; h->mb_xy -= s->mb_stride;
a9c9a240 2670 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3b66c4c5 2671 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
4691a77d
2672 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2673 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
6ba71fc4 2674 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
6ba71fc4 2675 // bottom
64514ee8 2676 s->mb_y++; h->mb_xy += s->mb_stride;
a9c9a240 2677 tprintf(h->s.avctx, "call mbaff filter_mb\n");
3b66c4c5 2678 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
4691a77d
2679 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2680 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
6ba71fc4 2681 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2682 } else {
a9c9a240 2683 tprintf(h->s.avctx, "call filter_mb\n");
93cc10fa 2684 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
3b66c4c5 2685 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
a82688b0
MN
2686 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2687 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
3e20143e 2688 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2689 }
53c05b1e 2690 }
0da71265
MN
2691}
2692
0da71265 2693/**
bd91fee3
AS
2694 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2695 */
2696static void hl_decode_mb_simple(H264Context *h){
2697 hl_decode_mb_internal(h, 1);
2698}
2699
2700/**
2701 * Process a macroblock; this handles edge cases, such as interlacing.
2702 */
2703static void av_noinline hl_decode_mb_complex(H264Context *h){
2704 hl_decode_mb_internal(h, 0);
2705}
2706
2707static void hl_decode_mb(H264Context *h){
2708 MpegEncContext * const s = &h->s;
64514ee8 2709 const int mb_xy= h->mb_xy;
bd91fee3 2710 const int mb_type= s->current_picture.mb_type[mb_xy];
58cc7dd9
AS
2711 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2712 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
bd91fee3 2713
fedec603 2714 if(ENABLE_H264_ENCODER && !s->decode)
bd91fee3
AS
2715 return;
2716
2717 if (is_complex)
2718 hl_decode_mb_complex(h);
2719 else hl_decode_mb_simple(h);
2720}
2721
2143b118 2722static void pic_as_field(Picture *pic, const int parity){
11cc1d8c
JD
2723 int i;
2724 for (i = 0; i < 4; ++i) {
2143b118 2725 if (parity == PICT_BOTTOM_FIELD)
11cc1d8c 2726 pic->data[i] += pic->linesize[i];
2143b118 2727 pic->reference = parity;
11cc1d8c
JD
2728 pic->linesize[i] *= 2;
2729 }
2879c75f 2730 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
11cc1d8c
JD
2731}
2732
2733static int split_field_copy(Picture *dest, Picture *src,
2734 int parity, int id_add){
2735 int match = !!(src->reference & parity);
2736
2737 if (match) {
2738 *dest = *src;
d4f7d838 2739 if(parity != PICT_FRAME){
b3e93fd4
MN
2740 pic_as_field(dest, parity);
2741 dest->pic_id *= 2;
2742 dest->pic_id += id_add;
d4f7d838 2743 }
11cc1d8c
JD
2744 }
2745
2746 return match;
2747}
2748
d4f7d838
MN
2749static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2750 int i[2]={0};
2751 int index=0;
11cc1d8c 2752
d4f7d838
MN
2753 while(i[0]<len || i[1]<len){
2754 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2755 i[0]++;
2756 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2757 i[1]++;
2758 if(i[0] < len){
2759 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2760 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2761 }
2762 if(i[1] < len){
2763 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2764 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
11cc1d8c
JD
2765 }
2766 }
2767
d4f7d838 2768 return index;
11cc1d8c
JD
2769}
2770
d4f7d838
MN
2771static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2772 int i, best_poc;
2773 int out_i= 0;
11cc1d8c 2774
d4f7d838
MN
2775 for(;;){
2776 best_poc= dir ? INT_MIN : INT_MAX;
11cc1d8c 2777
d4f7d838
MN
2778 for(i=0; i<len; i++){
2779 const int poc= src[i]->poc;
2780 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2781 best_poc= poc;
2782 sorted[out_i]= src[i];
2783 }
2784 }
2785 if(best_poc == (dir ? INT_MIN : INT_MAX))
2786 break;
2787 limit= sorted[out_i++]->poc - dir;
2788 }
2789 return out_i;
11cc1d8c
JD
2790}
2791
bd91fee3 2792/**
0da71265
MN
2793 * fills the default_ref_list.
2794 */
2795static int fill_default_ref_list(H264Context *h){
2796 MpegEncContext * const s = &h->s;
d4f7d838 2797 int i, len;
115329f1 2798
9f5c1037 2799 if(h->slice_type_nos==FF_B_TYPE){
d4f7d838
MN
2800 Picture *sorted[32];
2801 int cur_poc, list;
2802 int lens[2];
11cc1d8c 2803
d4f7d838
MN
2804 if(FIELD_PICTURE)
2805 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2806 else
2807 cur_poc= s->current_picture_ptr->poc;
086acdd5 2808
d4f7d838
MN
2809 for(list= 0; list<2; list++){
2810 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2811 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2812 assert(len<=32);
2813 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2814 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2815 assert(len<=32);
086acdd5 2816
d4f7d838
MN
2817 if(len < h->ref_count[list])
2818 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2819 lens[list]= len;
086acdd5
JD
2820 }
2821
d4f7d838
MN
2822 if(lens[0] == lens[1] && lens[1] > 1){
2823 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2824 if(i == lens[0])
2825 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
086acdd5 2826 }
086acdd5 2827 }else{
d4f7d838
MN
2828 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2829 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2830 assert(len <= 32);
2831 if(len < h->ref_count[0])
2832 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
0da71265 2833 }
827c91bf
LLL
2834#ifdef TRACE
2835 for (i=0; i<h->ref_count[0]; i++) {
a9c9a240 2836 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
827c91bf 2837 }
9f5c1037 2838 if(h->slice_type_nos==FF_B_TYPE){
827c91bf 2839 for (i=0; i<h->ref_count[1]; i++) {
ffbc5e04 2840 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
827c91bf
LLL
2841 }
2842 }
2843#endif
0da71265
MN
2844 return 0;
2845}
2846
827c91bf
LLL
2847static void print_short_term(H264Context *h);
2848static void print_long_term(H264Context *h);
2849
949da388
JD
2850/**
2851 * Extract structure information about the picture described by pic_num in
2852 * the current decoding context (frame or field). Note that pic_num is
2853 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2854 * @param pic_num picture number for which to extract structure information
2855 * @param structure one of PICT_XXX describing structure of picture
2856 * with pic_num
2857 * @return frame number (short term) or long term index of picture
2858 * described by pic_num
2859 */
2860static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2861 MpegEncContext * const s = &h->s;
2862
2863 *structure = s->picture_structure;
2864 if(FIELD_PICTURE){
2865 if (!(pic_num & 1))
2866 /* opposite field */
2867 *structure ^= PICT_FRAME;
2868 pic_num >>= 1;
2869 }
2870
2871 return pic_num;
2872}
2873
0da71265
MN
2874static int decode_ref_pic_list_reordering(H264Context *h){
2875 MpegEncContext * const s = &h->s;
949da388 2876 int list, index, pic_structure;
115329f1 2877
827c91bf
LLL
2878 print_short_term(h);
2879 print_long_term(h);
115329f1 2880
3425501d 2881 for(list=0; list<h->list_count; list++){
0da71265
MN
2882 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2883
2884 if(get_bits1(&s->gb)){
2885 int pred= h->curr_pic_num;
0da71265
MN
2886
2887 for(index=0; ; index++){
88e7a4d1
MN
2888 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2889 unsigned int pic_id;
0da71265 2890 int i;
2f944356 2891 Picture *ref = NULL;
115329f1
DB
2892
2893 if(reordering_of_pic_nums_idc==3)
0bc42cad 2894 break;
115329f1 2895
0da71265 2896 if(index >= h->ref_count[list]){
9b879566 2897 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
0da71265
MN
2898 return -1;
2899 }
115329f1 2900
0da71265
MN
2901 if(reordering_of_pic_nums_idc<3){
2902 if(reordering_of_pic_nums_idc<2){
88e7a4d1 2903 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
949da388 2904 int frame_num;
0da71265 2905
03d3cab8 2906 if(abs_diff_pic_num > h->max_pic_num){
9b879566 2907 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
0da71265
MN
2908 return -1;
2909 }
2910
2911 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2912 else pred+= abs_diff_pic_num;
2913 pred &= h->max_pic_num - 1;
115329f1 2914
949da388
JD
2915 frame_num = pic_num_extract(h, pred, &pic_structure);
2916
0d175622
MN
2917 for(i= h->short_ref_count-1; i>=0; i--){
2918 ref = h->short_ref[i];
949da388 2919 assert(ref->reference);
0d175622 2920 assert(!ref->long_ref);
6edac8e1 2921 if(
af8c5e08
MN
2922 ref->frame_num == frame_num &&
2923 (ref->reference & pic_structure)
6edac8e1 2924 )
0da71265
MN
2925 break;
2926 }
0d175622 2927 if(i>=0)
949da388 2928 ref->pic_id= pred;
0da71265 2929 }else{
949da388 2930 int long_idx;
0da71265 2931 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
949da388
JD
2932
2933 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2934
2935 if(long_idx>31){
88e7a4d1
MN
2936 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2937 return -1;
2938 }
949da388
JD
2939 ref = h->long_ref[long_idx];
2940 assert(!(ref && !ref->reference));
af8c5e08 2941 if(ref && (ref->reference & pic_structure)){
ac658be5 2942 ref->pic_id= pic_id;
ac658be5
FOL
2943 assert(ref->long_ref);
2944 i=0;
2945 }else{
2946 i=-1;
2947 }
0da71265
MN
2948 }
2949
0d315f28 2950 if (i < 0) {
9b879566 2951 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
0da71265 2952 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
0d175622
MN
2953 } else {
2954 for(i=index; i+1<h->ref_count[list]; i++){
2955 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2956 break;
21be92bf
MN
2957 }
2958 for(; i > index; i--){
2959 h->ref_list[list][i]= h->ref_list[list][i-1];
2960 }
0d175622 2961 h->ref_list[list][index]= *ref;
949da388 2962 if (FIELD_PICTURE){
2143b118 2963 pic_as_field(&h->ref_list[list][index], pic_structure);
949da388 2964 }
0da71265 2965 }
0bc42cad 2966 }else{
9b879566 2967 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
0da71265
MN
2968 return -1;
2969 }
2970 }
2971 }
0da71265 2972 }
3425501d 2973 for(list=0; list<h->list_count; list++){
6ab87211 2974 for(index= 0; index < h->ref_count[list]; index++){
79b5c776
MN
2975 if(!h->ref_list[list][index].data[0]){
2976 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2977 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2978 }
6ab87211 2979 }
6ab87211 2980 }
115329f1 2981
9f5c1037 2982 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
5ad984c9 2983 direct_dist_scale_factor(h);
2f944356 2984 direct_ref_list_init(h);
115329f1 2985 return 0;
0da71265
MN
2986}
2987
91c58c94 2988static void fill_mbaff_ref_list(H264Context *h){
5d18eaad 2989 int list, i, j;
3425501d 2990 for(list=0; list<2; list++){ //FIXME try list_count
5d18eaad
LM
2991 for(i=0; i<h->ref_count[list]; i++){
2992 Picture *frame = &h->ref_list[list][i];
2993 Picture *field = &h->ref_list[list][16+2*i];
2994 field[0] = *frame;
2995 for(j=0; j<3; j++)
2996 field[0].linesize[j] <<= 1;
2143b118 2997 field[0].reference = PICT_TOP_FIELD;
5d18eaad
LM
2998 field[1] = field[0];
2999 for(j=0; j<3; j++)
3000 field[1].data[j] += frame->linesize[j];
2143b118 3001 field[1].reference = PICT_BOTTOM_FIELD;
5d18eaad
LM
3002
3003 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3004 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3005 for(j=0; j<2; j++){
3006 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3007 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3008 }
3009 }
3010 }
3011 for(j=0; j<h->ref_count[1]; j++){
3012 for(i=0; i<h->ref_count[0]; i++)
3013 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3014 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3015 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3016 }
3017}
3018
0da71265
MN
3019static int pred_weight_table(H264Context *h){
3020 MpegEncContext * const s = &h->s;
3021 int list, i;
9f2d1b4f 3022 int luma_def, chroma_def;
115329f1 3023
9f2d1b4f
LM
3024 h->use_weight= 0;
3025 h->use_weight_chroma= 0;
0da71265
MN
3026 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3027 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
3028 luma_def = 1<<h->luma_log2_weight_denom;
3029 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
3030
3031 for(list=0; list<2; list++){
3032 for(i=0; i<h->ref_count[list]; i++){
3033 int luma_weight_flag, chroma_weight_flag;
115329f1 3034
0da71265
MN
3035 luma_weight_flag= get_bits1(&s->gb);
3036 if(luma_weight_flag){
3037 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3038 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f
LM
3039 if( h->luma_weight[list][i] != luma_def
3040 || h->luma_offset[list][i] != 0)
3041 h->use_weight= 1;
3042 }else{
3043 h->luma_weight[list][i]= luma_def;
3044 h->luma_offset[list][i]= 0;
0da71265
MN
3045 }
3046
0af6967e 3047 if(CHROMA){
fef744d4
MN
3048 chroma_weight_flag= get_bits1(&s->gb);
3049 if(chroma_weight_flag){
3050 int j;
3051 for(j=0; j<2; j++){
3052 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3053 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3054 if( h->chroma_weight[list][i][j] != chroma_def
3055 || h->chroma_offset[list][i][j] != 0)
3056 h->use_weight_chroma= 1;
3057 }
3058 }else{
3059 int j;
3060 for(j=0; j<2; j++){
3061 h->chroma_weight[list][i][j]= chroma_def;
3062 h->chroma_offset[list][i][j]= 0;
3063 }
0da71265
MN
3064 }
3065 }
3066 }
9f5c1037 3067 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 3068 }
9f2d1b4f 3069 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
3070 return 0;
3071}
3072
9f2d1b4f
LM
3073static void implicit_weight_table(H264Context *h){
3074 MpegEncContext * const s = &h->s;
9f2d1b4f
LM
3075 int ref0, ref1;
3076 int cur_poc = s->current_picture_ptr->poc;
3077
3078 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3079 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3080 h->use_weight= 0;
3081 h->use_weight_chroma= 0;
3082 return;
3083 }
3084
3085 h->use_weight= 2;
3086 h->use_weight_chroma= 2;
3087 h->luma_log2_weight_denom= 5;
3088 h->chroma_log2_weight_denom= 5;
3089
9f2d1b4f
LM
3090 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3091 int poc0 = h->ref_list[0][ref0].poc;
3092 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 3093 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 3094 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 3095 if(td){
f66e4f5f 3096 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 3097 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 3098 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
3099 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3100 h->implicit_weight[ref0][ref1] = 32;
3101 else
3102 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3103 }else
3104 h->implicit_weight[ref0][ref1] = 32;
3105 }
3106 }
3107}
3108
8fd57a66
JD
3109/**
3110 * Mark a picture as no longer needed for reference. The refmask
3111 * argument allows unreferencing of individual fields or the whole frame.
3112 * If the picture becomes entirely unreferenced, but is being held for
3113 * display purposes, it is marked as such.
3114 * @param refmask mask of fields to unreference; the mask is bitwise
3115 * anded with the reference marking of pic
3116 * @return non-zero if pic becomes entirely unreferenced (except possibly
3117 * for display purposes) zero if one of the fields remains in
3118 * reference
3119 */
3120static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
4e4d983e 3121 int i;
8fd57a66
JD
3122 if (pic->reference &= refmask) {
3123 return 0;
3124 } else {
79f4494a
MN
3125 for(i = 0; h->delayed_pic[i]; i++)
3126 if(pic == h->delayed_pic[i]){
3127 pic->reference=DELAYED_PIC_REF;
3128 break;
3129 }
8fd57a66
JD
3130 return 1;
3131 }
4e4d983e
LM
3132}
3133
0da71265 3134/**
5175b937 3135 * instantaneous decoder refresh.
0da71265
MN
3136 */
3137static void idr(H264Context *h){
4e4d983e 3138 int i;
0da71265 3139
dc032f33 3140 for(i=0; i<16; i++){
9c0e4624 3141 remove_long(h, i, 0);
0da71265 3142 }
849b9cef 3143 assert(h->long_ref_count==0);
0da71265
MN
3144
3145 for(i=0; i<h->short_ref_count; i++){
8fd57a66 3146 unreference_pic(h, h->short_ref[i], 0);
0da71265
MN
3147 h->short_ref[i]= NULL;
3148 }
3149 h->short_ref_count=0;
a149c1a5 3150 h->prev_frame_num= 0;
80f8e035
MN
3151 h->prev_frame_num_offset= 0;
3152 h->prev_poc_msb=
3153 h->prev_poc_lsb= 0;
0da71265
MN
3154}
3155
7c33ad19
LM
3156/* forget old pics after a seek */
3157static void flush_dpb(AVCodecContext *avctx){
3158 H264Context *h= avctx->priv_data;
3159 int i;
64b9d48f 3160 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
3161 if(h->delayed_pic[i])
3162 h->delayed_pic[i]->reference= 0;
7c33ad19 3163 h->delayed_pic[i]= NULL;
285b570f 3164 }
df8a7dff 3165 h->outputed_poc= INT_MIN;
7c33ad19 3166 idr(h);
ca159196
MR
3167 if(h->s.current_picture_ptr)
3168 h->s.current_picture_ptr->reference= 0;
12d96de3 3169 h->s.first_field= 0;
e240f898 3170 ff_mpeg_flush(avctx);
7c33ad19
LM
3171}
3172
0da71265 3173/**
47e112f8
JD
3174 * Find a Picture in the short term reference list by frame number.
3175 * @param frame_num frame number to search for
3176 * @param idx the index into h->short_ref where returned picture is found
3177 * undefined if no picture found.
3178 * @return pointer to the found picture, or NULL if no pic with the provided
3179 * frame number is found
0da71265 3180 */
47e112f8 3181static Picture * find_short(H264Context *h, int frame_num, int *idx){
1924f3ce 3182 MpegEncContext * const s = &h->s;
0da71265 3183 int i;
115329f1 3184
0da71265
MN
3185 for(i=0; i<h->short_ref_count; i++){
3186 Picture *pic= h->short_ref[i];
1924f3ce 3187 if(s->avctx->debug&FF_DEBUG_MMCO)
9b879566 3188 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
47e112f8
JD
3189 if(pic->frame_num == frame_num) {
3190 *idx = i;
0da71265
MN
3191 return pic;
3192 }
3193 }
3194 return NULL;
3195}
3196
3197/**
47e112f8
JD
3198 * Remove a picture from the short term reference list by its index in
3199 * that list. This does no checking on the provided index; it is assumed
3200 * to be valid. Other list entries are shifted down.
3201 * @param i index into h->short_ref of picture to remove.
3202 */
3203static void remove_short_at_index(H264Context *h, int i){
e1f15d38 3204 assert(i >= 0 && i < h->short_ref_count);
47e112f8
JD
3205 h->short_ref[i]= NULL;
3206 if (--h->short_ref_count)
3207 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3208}
3209
3210/**
3211 *
3212 * @return the removed picture or NULL if an error occurs
3213 */
d9e32422 3214static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
47e112f8
JD
3215 MpegEncContext * const s = &h->s;
3216 Picture *pic;
3217 int i;
3218
3219 if(s->avctx->debug&FF_DEBUG_MMCO)
3220 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3221
3222 pic = find_short(h, frame_num, &i);
d9e32422
MN
3223 if (pic){
3224 if(unreference_pic(h, pic, ref_mask))
47e112f8 3225 remove_short_at_index(h, i);
d9e32422 3226 }
47e112f8
JD
3227
3228 return pic;
3229}
3230
3231/**
24231e4c 3232 * Remove a picture from the long term reference list by its index in
1cea5d0d 3233 * that list.
3b66c4c5 3234 * @return the removed picture or NULL if an error occurs
0da71265 3235 */
9c0e4624 3236static Picture * remove_long(H264Context *h, int i, int ref_mask){
0da71265
MN
3237