using isnan instead of d==d, to signal correctly parsed option
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265
MN
22/**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
0da71265
MN
28#include "dsputil.h"
29#include "avcodec.h"
30#include "mpegvideo.h"
26b4fe82 31#include "h264.h"
0da71265 32#include "h264data.h"
26b4fe82 33#include "h264_parser.h"
0da71265
MN
34#include "golomb.h"
35
e5017ab8
LA
36#include "cabac.h"
37
2848ce84 38//#undef NDEBUG
0da71265
MN
39#include <assert.h>
40
0da71265
MN
41static VLC coeff_token_vlc[4];
42static VLC chroma_dc_coeff_token_vlc;
43
44static VLC total_zeros_vlc[15];
45static VLC chroma_dc_total_zeros_vlc[3];
46
47static VLC run_vlc[6];
48static VLC run7_vlc;
49
8b82a956
MN
50static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
51static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
6ba71fc4 52static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
3e20143e 53static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
8b82a956 54
849f1035 55static av_always_inline uint32_t pack16to32(int a, int b){
377ec888
MN
56#ifdef WORDS_BIGENDIAN
57 return (b&0xFFFF) + (a<<16);
58#else
59 return (a&0xFFFF) + (b<<16);
60#endif
61}
62
acd8d10f
PI
63const uint8_t ff_rem6[52]={
640, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
65};
66
67const uint8_t ff_div6[52]={
680, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
69};
70
71
0da71265
MN
72/**
73 * fill a rectangle.
5175b937
LLL
74 * @param h height of the rectangle, should be a constant
75 * @param w width of the rectangle, should be a constant
0da71265
MN
76 * @param size the size of val (1 or 4), should be a constant
77 */
849f1035 78static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
af6e2fed 79 uint8_t *p= (uint8_t*)vp;
0da71265 80 assert(size==1 || size==4);
67a82086 81 assert(w<=4);
115329f1 82
0da71265
MN
83 w *= size;
84 stride *= size;
115329f1 85
4733abcb 86 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
d9c780a8 87 assert((stride&(w-1))==0);
67a82086
LM
88 if(w==2){
89 const uint16_t v= size==4 ? val : val*0x0101;
90 *(uint16_t*)(p + 0*stride)= v;
91 if(h==1) return;
92 *(uint16_t*)(p + 1*stride)= v;
93 if(h==2) return;
6d324c81 94 *(uint16_t*)(p + 2*stride)= v;
67a82086
LM
95 *(uint16_t*)(p + 3*stride)= v;
96 }else if(w==4){
97 const uint32_t v= size==4 ? val : val*0x01010101;
98 *(uint32_t*)(p + 0*stride)= v;
99 if(h==1) return;
100 *(uint32_t*)(p + 1*stride)= v;
101 if(h==2) return;
6d324c81 102 *(uint32_t*)(p + 2*stride)= v;
67a82086
LM
103 *(uint32_t*)(p + 3*stride)= v;
104 }else if(w==8){
105 //gcc can't optimize 64bit math on x86_32
106#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
107 const uint64_t v= val*0x0100000001ULL;
108 *(uint64_t*)(p + 0*stride)= v;
109 if(h==1) return;
110 *(uint64_t*)(p + 1*stride)= v;
111 if(h==2) return;
6d324c81 112 *(uint64_t*)(p + 2*stride)= v;
67a82086
LM
113 *(uint64_t*)(p + 3*stride)= v;
114 }else if(w==16){
115 const uint64_t v= val*0x0100000001ULL;
6d324c81
AS
116 *(uint64_t*)(p + 0+0*stride)= v;
117 *(uint64_t*)(p + 8+0*stride)= v;
118 *(uint64_t*)(p + 0+1*stride)= v;
67a82086
LM
119 *(uint64_t*)(p + 8+1*stride)= v;
120 if(h==2) return;
6d324c81
AS
121 *(uint64_t*)(p + 0+2*stride)= v;
122 *(uint64_t*)(p + 8+2*stride)= v;
123 *(uint64_t*)(p + 0+3*stride)= v;
67a82086
LM
124 *(uint64_t*)(p + 8+3*stride)= v;
125#else
6d324c81 126 *(uint32_t*)(p + 0+0*stride)= val;
67a82086
LM
127 *(uint32_t*)(p + 4+0*stride)= val;
128 if(h==1) return;
6d324c81 129 *(uint32_t*)(p + 0+1*stride)= val;
67a82086
LM
130 *(uint32_t*)(p + 4+1*stride)= val;
131 if(h==2) return;
6d324c81
AS
132 *(uint32_t*)(p + 0+2*stride)= val;
133 *(uint32_t*)(p + 4+2*stride)= val;
134 *(uint32_t*)(p + 0+3*stride)= val;
67a82086
LM
135 *(uint32_t*)(p + 4+3*stride)= val;
136 }else if(w==16){
6d324c81
AS
137 *(uint32_t*)(p + 0+0*stride)= val;
138 *(uint32_t*)(p + 4+0*stride)= val;
139 *(uint32_t*)(p + 8+0*stride)= val;
140 *(uint32_t*)(p +12+0*stride)= val;
141 *(uint32_t*)(p + 0+1*stride)= val;
142 *(uint32_t*)(p + 4+1*stride)= val;
143 *(uint32_t*)(p + 8+1*stride)= val;
67a82086
LM
144 *(uint32_t*)(p +12+1*stride)= val;
145 if(h==2) return;
6d324c81
AS
146 *(uint32_t*)(p + 0+2*stride)= val;
147 *(uint32_t*)(p + 4+2*stride)= val;
148 *(uint32_t*)(p + 8+2*stride)= val;
149 *(uint32_t*)(p +12+2*stride)= val;
150 *(uint32_t*)(p + 0+3*stride)= val;
151 *(uint32_t*)(p + 4+3*stride)= val;
152 *(uint32_t*)(p + 8+3*stride)= val;
67a82086
LM
153 *(uint32_t*)(p +12+3*stride)= val;
154#endif
0da71265
MN
155 }else
156 assert(0);
67a82086 157 assert(h==4);
0da71265
MN
158}
159
70abb407 160static void fill_caches(H264Context *h, int mb_type, int for_deblock){
0da71265 161 MpegEncContext * const s = &h->s;
7bc9090a 162 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265
MN
163 int topleft_xy, top_xy, topright_xy, left_xy[2];
164 int topleft_type, top_type, topright_type, left_type[2];
6867a90b 165 int left_block[8];
0da71265
MN
166 int i;
167
717b1733
LM
168 //FIXME deblocking could skip the intra and nnz parts.
169 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
e2e5894a
LM
170 return;
171
115329f1
DB
172 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
173
6867a90b
LLL
174 top_xy = mb_xy - s->mb_stride;
175 topleft_xy = top_xy - 1;
176 topright_xy= top_xy + 1;
177 left_xy[1] = left_xy[0] = mb_xy-1;
178 left_block[0]= 0;
179 left_block[1]= 1;
180 left_block[2]= 2;
181 left_block[3]= 3;
182 left_block[4]= 7;
183 left_block[5]= 10;
184 left_block[6]= 8;
185 left_block[7]= 11;
5d18eaad 186 if(FRAME_MBAFF){
6867a90b
LLL
187 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
188 const int top_pair_xy = pair_xy - s->mb_stride;
189 const int topleft_pair_xy = top_pair_xy - 1;
190 const int topright_pair_xy = top_pair_xy + 1;
191 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
192 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
193 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
194 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
195 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
196 const int bottom = (s->mb_y & 1);
a9c9a240 197 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
6867a90b
LLL
198 if (bottom
199 ? !curr_mb_frame_flag // bottom macroblock
200 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
201 ) {
202 top_xy -= s->mb_stride;
203 }
204 if (bottom
205 ? !curr_mb_frame_flag // bottom macroblock
206 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
207 ) {
208 topleft_xy -= s->mb_stride;
209 }
210 if (bottom
211 ? !curr_mb_frame_flag // bottom macroblock
212 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
213 ) {
214 topright_xy -= s->mb_stride;
215 }
216 if (left_mb_frame_flag != curr_mb_frame_flag) {
217 left_xy[1] = left_xy[0] = pair_xy - 1;
218 if (curr_mb_frame_flag) {
219 if (bottom) {
220 left_block[0]= 2;
221 left_block[1]= 2;
222 left_block[2]= 3;
223 left_block[3]= 3;
224 left_block[4]= 8;
225 left_block[5]= 11;
226 left_block[6]= 8;
227 left_block[7]= 11;
228 } else {
229 left_block[0]= 0;
230 left_block[1]= 0;
231 left_block[2]= 1;
232 left_block[3]= 1;
233 left_block[4]= 7;
234 left_block[5]= 10;
235 left_block[6]= 7;
236 left_block[7]= 10;
237 }
238 } else {
239 left_xy[1] += s->mb_stride;
240 //left_block[0]= 0;
241 left_block[1]= 2;
242 left_block[2]= 0;
243 left_block[3]= 2;
244 //left_block[4]= 7;
245 left_block[5]= 10;
246 left_block[6]= 7;
247 left_block[7]= 10;
248 }
249 }
0da71265
MN
250 }
251
826de46e
LLL
252 h->top_mb_xy = top_xy;
253 h->left_mb_xy[0] = left_xy[0];
254 h->left_mb_xy[1] = left_xy[1];
6ba71fc4 255 if(for_deblock){
717b1733
LM
256 topleft_type = 0;
257 topright_type = 0;
46f2f05f 258 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
46f2f05f
MN
259 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
260 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
5d18eaad
LM
261
262 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
263 int list;
264 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
265 for(i=0; i<16; i++)
266 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
3425501d 267 for(list=0; list<h->list_count; list++){
5d18eaad
LM
268 if(USES_LIST(mb_type,list)){
269 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
270 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
191e8ca7 271 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
5d18eaad
LM
272 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
273 dst[0] = src[0];
274 dst[1] = src[1];
275 dst[2] = src[2];
276 dst[3] = src[3];
277 }
278 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
279 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
280 ref += h->b8_stride;
281 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
282 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
283 }else{
284 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
285 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
286 }
287 }
288 }
46f2f05f
MN
289 }else{
290 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
291 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
292 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
293 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
294 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
295 }
0da71265
MN
296
297 if(IS_INTRA(mb_type)){
115329f1
DB
298 h->topleft_samples_available=
299 h->top_samples_available=
0da71265
MN
300 h->left_samples_available= 0xFFFF;
301 h->topright_samples_available= 0xEEEA;
302
303 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
304 h->topleft_samples_available= 0xB3FF;
305 h->top_samples_available= 0x33FF;
306 h->topright_samples_available= 0x26EA;
307 }
308 for(i=0; i<2; i++){
309 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
310 h->topleft_samples_available&= 0xDF5F;
311 h->left_samples_available&= 0x5F5F;
312 }
313 }
115329f1 314
0da71265
MN
315 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
316 h->topleft_samples_available&= 0x7FFF;
115329f1 317
0da71265
MN
318 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
319 h->topright_samples_available&= 0xFBFF;
115329f1 320
0da71265
MN
321 if(IS_INTRA4x4(mb_type)){
322 if(IS_INTRA4x4(top_type)){
323 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
324 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
325 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
326 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
327 }else{
328 int pred;
6fbcaaa0 329 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
0da71265 330 pred= -1;
6fbcaaa0
LLL
331 else{
332 pred= 2;
0da71265
MN
333 }
334 h->intra4x4_pred_mode_cache[4+8*0]=
335 h->intra4x4_pred_mode_cache[5+8*0]=
336 h->intra4x4_pred_mode_cache[6+8*0]=
337 h->intra4x4_pred_mode_cache[7+8*0]= pred;
338 }
339 for(i=0; i<2; i++){
340 if(IS_INTRA4x4(left_type[i])){
341 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
342 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
343 }else{
344 int pred;
6fbcaaa0 345 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
0da71265 346 pred= -1;
6fbcaaa0
LLL
347 else{
348 pred= 2;
0da71265
MN
349 }
350 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
351 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
352 }
353 }
354 }
355 }
115329f1
DB
356
357
0da71265 358/*
115329f1
DB
3590 . T T. T T T T
3601 L . .L . . . .
3612 L . .L . . . .
3623 . T TL . . . .
3634 L . .L . . . .
3645 L . .. . . . .
0da71265
MN
365*/
366//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
367 if(top_type){
6867a90b
LLL
368 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
369 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
370 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
53c05b1e 371 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
115329f1 372
6867a90b 373 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
53c05b1e 374 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
115329f1 375
6867a90b 376 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
53c05b1e 377 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
115329f1 378
0da71265 379 }else{
115329f1 380 h->non_zero_count_cache[4+8*0]=
0da71265
MN
381 h->non_zero_count_cache[5+8*0]=
382 h->non_zero_count_cache[6+8*0]=
383 h->non_zero_count_cache[7+8*0]=
115329f1 384
0da71265
MN
385 h->non_zero_count_cache[1+8*0]=
386 h->non_zero_count_cache[2+8*0]=
115329f1 387
0da71265 388 h->non_zero_count_cache[1+8*3]=
3981c385 389 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
115329f1 390
0da71265 391 }
826de46e 392
6867a90b
LLL
393 for (i=0; i<2; i++) {
394 if(left_type[i]){
395 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
396 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
397 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
398 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
6867a90b 399 }else{
115329f1
DB
400 h->non_zero_count_cache[3+8*1 + 2*8*i]=
401 h->non_zero_count_cache[3+8*2 + 2*8*i]=
402 h->non_zero_count_cache[0+8*1 + 8*i]=
6867a90b 403 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
826de46e
LLL
404 }
405 }
406
407 if( h->pps.cabac ) {
408 // top_cbp
409 if(top_type) {
410 h->top_cbp = h->cbp_table[top_xy];
411 } else if(IS_INTRA(mb_type)) {
412 h->top_cbp = 0x1C0;
413 } else {
414 h->top_cbp = 0;
415 }
416 // left_cbp
417 if (left_type[0]) {
418 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
419 } else if(IS_INTRA(mb_type)) {
420 h->left_cbp = 0x1C0;
421 } else {
422 h->left_cbp = 0;
423 }
424 if (left_type[0]) {
425 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
426 }
427 if (left_type[1]) {
428 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
6867a90b 429 }
0da71265 430 }
6867a90b 431
0da71265 432#if 1
e2e5894a 433 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
0da71265 434 int list;
3425501d 435 for(list=0; list<h->list_count; list++){
e2e5894a 436 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
0da71265
MN
437 /*if(!h->mv_cache_clean[list]){
438 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
439 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
440 h->mv_cache_clean[list]= 1;
441 }*/
5ad984c9 442 continue;
0da71265
MN
443 }
444 h->mv_cache_clean[list]= 0;
115329f1 445
53b19144 446 if(USES_LIST(top_type, list)){
0da71265
MN
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
449 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
450 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
451 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
452 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
453 h->ref_cache[list][scan8[0] + 0 - 1*8]=
454 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
455 h->ref_cache[list][scan8[0] + 2 - 1*8]=
456 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
457 }else{
115329f1
DB
458 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
459 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
460 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
0da71265
MN
461 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
462 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
463 }
464
4672503d
LM
465 for(i=0; i<2; i++){
466 int cache_idx = scan8[0] - 1 + i*2*8;
467 if(USES_LIST(left_type[i], list)){
468 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
469 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
470 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
471 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
472 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
473 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
474 }else{
475 *(uint32_t*)h->mv_cache [list][cache_idx ]=
476 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
477 h->ref_cache[list][cache_idx ]=
478 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
479 }
0da71265
MN
480 }
481
ae08a563 482 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
46f2f05f
MN
483 continue;
484
53b19144 485 if(USES_LIST(topleft_type, list)){
e2e5894a
LM
486 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
487 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
488 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
489 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
490 }else{
491 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
492 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
493 }
115329f1 494
53b19144 495 if(USES_LIST(topright_type, list)){
e2e5894a
LM
496 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
497 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
498 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
499 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
500 }else{
501 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
502 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
503 }
e2e5894a 504
ae08a563 505 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
717b1733 506 continue;
115329f1
DB
507
508 h->ref_cache[list][scan8[5 ]+1] =
509 h->ref_cache[list][scan8[7 ]+1] =
3b66c4c5 510 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
115329f1 511 h->ref_cache[list][scan8[4 ]] =
0da71265
MN
512 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
513 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
514 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
3b66c4c5 515 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
0da71265
MN
516 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
517 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
9e528114
LA
518
519 if( h->pps.cabac ) {
520 /* XXX beurk, Load mvd */
53b19144 521 if(USES_LIST(top_type, list)){
9e528114
LA
522 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
523 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
524 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
525 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
526 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
527 }else{
115329f1
DB
528 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
529 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
530 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
9e528114
LA
531 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
532 }
53b19144 533 if(USES_LIST(left_type[0], list)){
9e528114
LA
534 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
535 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
536 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
537 }else{
538 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
539 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
540 }
53b19144 541 if(USES_LIST(left_type[1], list)){
9e528114
LA
542 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
543 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
544 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
545 }else{
546 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
547 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
548 }
549 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
550 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
3b66c4c5 551 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
9e528114
LA
552 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
553 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
5ad984c9
LM
554
555 if(h->slice_type == B_TYPE){
556 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
557
558 if(IS_DIRECT(top_type)){
559 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
560 }else if(IS_8X8(top_type)){
561 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
562 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
563 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
564 }else{
565 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
566 }
115329f1 567
5d18eaad
LM
568 if(IS_DIRECT(left_type[0]))
569 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
570 else if(IS_8X8(left_type[0]))
571 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
572 else
573 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
574
575 if(IS_DIRECT(left_type[1]))
5ad984c9 576 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
5d18eaad
LM
577 else if(IS_8X8(left_type[1]))
578 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
579 else
5ad984c9 580 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
5d18eaad
LM
581 }
582 }
583
584 if(FRAME_MBAFF){
585#define MAP_MVS\
586 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
587 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
588 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
589 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
590 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
591 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
592 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
593 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
594 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
595 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
596 if(MB_FIELD){
597#define MAP_F2F(idx, mb_type)\
598 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
599 h->ref_cache[list][idx] <<= 1;\
600 h->mv_cache[list][idx][1] /= 2;\
601 h->mvd_cache[list][idx][1] /= 2;\
602 }
603 MAP_MVS
604#undef MAP_F2F
605 }else{
606#define MAP_F2F(idx, mb_type)\
607 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
608 h->ref_cache[list][idx] >>= 1;\
609 h->mv_cache[list][idx][1] <<= 1;\
610 h->mvd_cache[list][idx][1] <<= 1;\
5ad984c9 611 }
5d18eaad
LM
612 MAP_MVS
613#undef MAP_F2F
5ad984c9 614 }
9e528114 615 }
0da71265 616 }
0da71265
MN
617 }
618#endif
43efd19a
LM
619
620 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
0da71265
MN
621}
622
623static inline void write_back_intra_pred_mode(H264Context *h){
624 MpegEncContext * const s = &h->s;
7bc9090a 625 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265
MN
626
627 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
628 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
629 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
630 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
631 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
632 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
633 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
634}
635
636/**
637 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
638 */
639static inline int check_intra4x4_pred_mode(H264Context *h){
640 MpegEncContext * const s = &h->s;
641 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
642 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
643 int i;
115329f1 644
0da71265
MN
645 if(!(h->top_samples_available&0x8000)){
646 for(i=0; i<4; i++){
647 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
648 if(status<0){
9b879566 649 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
650 return -1;
651 } else if(status){
652 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
653 }
654 }
655 }
115329f1 656
0da71265
MN
657 if(!(h->left_samples_available&0x8000)){
658 for(i=0; i<4; i++){
659 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
660 if(status<0){
9b879566 661 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
662 return -1;
663 } else if(status){
664 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
665 }
666 }
667 }
668
669 return 0;
670} //FIXME cleanup like next
671
672/**
673 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
674 */
675static inline int check_intra_pred_mode(H264Context *h, int mode){
676 MpegEncContext * const s = &h->s;
677 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
678 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 679
43ff0714 680 if(mode > 6U) {
5175b937 681 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 682 return -1;
5175b937 683 }
115329f1 684
0da71265
MN
685 if(!(h->top_samples_available&0x8000)){
686 mode= top[ mode ];
687 if(mode<0){
9b879566 688 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
689 return -1;
690 }
691 }
115329f1 692
0da71265
MN
693 if(!(h->left_samples_available&0x8000)){
694 mode= left[ mode ];
695 if(mode<0){
9b879566 696 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 697 return -1;
115329f1 698 }
0da71265
MN
699 }
700
701 return mode;
702}
703
704/**
705 * gets the predicted intra4x4 prediction mode.
706 */
707static inline int pred_intra_mode(H264Context *h, int n){
708 const int index8= scan8[n];
709 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
710 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
711 const int min= FFMIN(left, top);
712
a9c9a240 713 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
0da71265
MN
714
715 if(min<0) return DC_PRED;
716 else return min;
717}
718
719static inline void write_back_non_zero_count(H264Context *h){
720 MpegEncContext * const s = &h->s;
7bc9090a 721 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265 722
6867a90b
LLL
723 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
724 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
725 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
53c05b1e 726 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
6867a90b
LLL
727 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
728 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
729 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
115329f1 730
6867a90b 731 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
53c05b1e 732 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
6867a90b 733 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
53c05b1e 734
6867a90b 735 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
53c05b1e 736 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
6867a90b 737 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
5d18eaad
LM
738
739 if(FRAME_MBAFF){
740 // store all luma nnzs, for deblocking
741 int v = 0, i;
742 for(i=0; i<16; i++)
743 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
744 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
745 }
0da71265
MN
746}
747
748/**
749 * gets the predicted number of non zero coefficients.
750 * @param n block index
751 */
752static inline int pred_non_zero_count(H264Context *h, int n){
753 const int index8= scan8[n];
754 const int left= h->non_zero_count_cache[index8 - 1];
755 const int top = h->non_zero_count_cache[index8 - 8];
756 int i= left + top;
115329f1 757
0da71265
MN
758 if(i<64) i= (i+1)>>1;
759
a9c9a240 760 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
0da71265
MN
761
762 return i&31;
763}
764
1924f3ce
MN
765static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
766 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
a9c9a240 767 MpegEncContext *s = &h->s;
1924f3ce 768
5d18eaad
LM
769 /* there is no consistent mapping of mvs to neighboring locations that will
770 * make mbaff happy, so we can't move all this logic to fill_caches */
771 if(FRAME_MBAFF){
191e8ca7 772 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
5d18eaad
LM
773 const int16_t *mv;
774 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
775 *C = h->mv_cache[list][scan8[0]-2];
776
777 if(!MB_FIELD
778 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
779 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
780 if(IS_INTERLACED(mb_types[topright_xy])){
781#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
782 const int x4 = X4, y4 = Y4;\
783 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
784 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
785 return LIST_NOT_USED;\
786 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
787 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
788 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
789 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
790
791 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
792 }
793 }
794 if(topright_ref == PART_NOT_AVAILABLE
795 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
796 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
797 if(!MB_FIELD
798 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
799 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
800 }
801 if(MB_FIELD
802 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
803 && i >= scan8[0]+8){
804 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
805 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
806 }
807 }
808#undef SET_DIAG_MV
809 }
810
1924f3ce
MN
811 if(topright_ref != PART_NOT_AVAILABLE){
812 *C= h->mv_cache[list][ i - 8 + part_width ];
813 return topright_ref;
814 }else{
a9c9a240 815 tprintf(s->avctx, "topright MV not available\n");
95c26348 816
1924f3ce
MN
817 *C= h->mv_cache[list][ i - 8 - 1 ];
818 return h->ref_cache[list][ i - 8 - 1 ];
819 }
820}
821
0da71265
MN
822/**
823 * gets the predicted MV.
824 * @param n the block index
825 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
826 * @param mx the x component of the predicted motion vector
827 * @param my the y component of the predicted motion vector
828 */
829static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
0da71265
MN
830 const int index8= scan8[n];
831 const int top_ref= h->ref_cache[list][ index8 - 8 ];
0da71265
MN
832 const int left_ref= h->ref_cache[list][ index8 - 1 ];
833 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
834 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1924f3ce
MN
835 const int16_t * C;
836 int diagonal_ref, match_count;
837
0da71265 838 assert(part_width==1 || part_width==2 || part_width==4);
1924f3ce 839
0da71265 840/* mv_cache
115329f1 841 B . . A T T T T
0da71265
MN
842 U . . L . . , .
843 U . . L . . . .
844 U . . L . . , .
845 . . . L . . . .
846*/
1924f3ce
MN
847
848 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
849 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
a9c9a240 850 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1924f3ce
MN
851 if(match_count > 1){ //most common
852 *mx= mid_pred(A[0], B[0], C[0]);
853 *my= mid_pred(A[1], B[1], C[1]);
854 }else if(match_count==1){
855 if(left_ref==ref){
856 *mx= A[0];
115329f1 857 *my= A[1];
1924f3ce
MN
858 }else if(top_ref==ref){
859 *mx= B[0];
115329f1 860 *my= B[1];
0da71265 861 }else{
1924f3ce 862 *mx= C[0];
115329f1 863 *my= C[1];
0da71265
MN
864 }
865 }else{
1924f3ce 866 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
0da71265 867 *mx= A[0];
115329f1 868 *my= A[1];
0da71265 869 }else{
1924f3ce
MN
870 *mx= mid_pred(A[0], B[0], C[0]);
871 *my= mid_pred(A[1], B[1], C[1]);
0da71265 872 }
0da71265 873 }
115329f1 874
a9c9a240 875 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
876}
877
878/**
879 * gets the directionally predicted 16x8 MV.
880 * @param n the block index
881 * @param mx the x component of the predicted motion vector
882 * @param my the y component of the predicted motion vector
883 */
884static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
885 if(n==0){
886 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
887 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
888
a9c9a240 889 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
115329f1 890
0da71265
MN
891 if(top_ref == ref){
892 *mx= B[0];
893 *my= B[1];
894 return;
895 }
896 }else{
897 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
898 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
115329f1 899
a9c9a240 900 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
901
902 if(left_ref == ref){
903 *mx= A[0];
904 *my= A[1];
905 return;
906 }
907 }
908
909 //RARE
910 pred_motion(h, n, 4, list, ref, mx, my);
911}
912
913/**
914 * gets the directionally predicted 8x16 MV.
915 * @param n the block index
916 * @param mx the x component of the predicted motion vector
917 * @param my the y component of the predicted motion vector
918 */
919static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
920 if(n==0){
921 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
922 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
115329f1 923
a9c9a240 924 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
925
926 if(left_ref == ref){
927 *mx= A[0];
928 *my= A[1];
929 return;
930 }
931 }else{
1924f3ce
MN
932 const int16_t * C;
933 int diagonal_ref;
934
935 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
115329f1 936
a9c9a240 937 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265 938
115329f1 939 if(diagonal_ref == ref){
0da71265
MN
940 *mx= C[0];
941 *my= C[1];
942 return;
943 }
0da71265
MN
944 }
945
946 //RARE
947 pred_motion(h, n, 2, list, ref, mx, my);
948}
949
950static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
0da71265
MN
951 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
952 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
953
a9c9a240 954 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
0da71265
MN
955
956 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
957 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
958 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
115329f1 959
0da71265
MN
960 *mx = *my = 0;
961 return;
962 }
115329f1 963
0da71265
MN
964 pred_motion(h, 0, 4, 0, 0, mx, my);
965
966 return;
967}
968
5ad984c9
LM
969static inline void direct_dist_scale_factor(H264Context * const h){
970 const int poc = h->s.current_picture_ptr->poc;
971 const int poc1 = h->ref_list[1][0].poc;
972 int i;
973 for(i=0; i<h->ref_count[0]; i++){
974 int poc0 = h->ref_list[0][i].poc;
f66e4f5f 975 int td = av_clip(poc1 - poc0, -128, 127);
5ad984c9
LM
976 if(td == 0 /* FIXME || pic0 is a long-term ref */){
977 h->dist_scale_factor[i] = 256;
978 }else{
f66e4f5f 979 int tb = av_clip(poc - poc0, -128, 127);
c26abfa5 980 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 981 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
5ad984c9
LM
982 }
983 }
5d18eaad
LM
984 if(FRAME_MBAFF){
985 for(i=0; i<h->ref_count[0]; i++){
986 h->dist_scale_factor_field[2*i] =
987 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
988 }
989 }
5ad984c9 990}
2f944356
LM
991static inline void direct_ref_list_init(H264Context * const h){
992 MpegEncContext * const s = &h->s;
993 Picture * const ref1 = &h->ref_list[1][0];
994 Picture * const cur = s->current_picture_ptr;
995 int list, i, j;
996 if(cur->pict_type == I_TYPE)
997 cur->ref_count[0] = 0;
998 if(cur->pict_type != B_TYPE)
999 cur->ref_count[1] = 0;
1000 for(list=0; list<2; list++){
1001 cur->ref_count[list] = h->ref_count[list];
1002 for(j=0; j<h->ref_count[list]; j++)
1003 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1004 }
1005 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1006 return;
1007 for(list=0; list<2; list++){
1008 for(i=0; i<ref1->ref_count[list]; i++){
1009 const int poc = ref1->ref_poc[list][i];
171c4076 1010 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
2f944356
LM
1011 for(j=0; j<h->ref_count[list]; j++)
1012 if(h->ref_list[list][j].poc == poc){
1013 h->map_col_to_list0[list][i] = j;
1014 break;
1015 }
1016 }
1017 }
5d18eaad
LM
1018 if(FRAME_MBAFF){
1019 for(list=0; list<2; list++){
1020 for(i=0; i<ref1->ref_count[list]; i++){
1021 j = h->map_col_to_list0[list][i];
1022 h->map_col_to_list0_field[list][2*i] = 2*j;
1023 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1024 }
1025 }
1026 }
2f944356 1027}
5ad984c9
LM
1028
1029static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1030 MpegEncContext * const s = &h->s;
1031 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1032 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1033 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1034 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1035 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
4866bd2b 1036 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
5ad984c9 1037 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
2f944356 1038 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
5ad984c9 1039 const int is_b8x8 = IS_8X8(*mb_type);
88e7a4d1 1040 unsigned int sub_mb_type;
5ad984c9
LM
1041 int i8, i4;
1042
5d18eaad 1043#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
5ad984c9
LM
1044 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1045 /* FIXME save sub mb types from previous frames (or derive from MVs)
1046 * so we know exactly what block size to use */
1047 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
3622988f 1048 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
5d18eaad 1049 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
5ad984c9
LM
1050 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1051 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1052 }else{
1053 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
3622988f 1054 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
5ad984c9
LM
1055 }
1056 if(!is_b8x8)
1057 *mb_type |= MB_TYPE_DIRECT2;
5d18eaad
LM
1058 if(MB_FIELD)
1059 *mb_type |= MB_TYPE_INTERLACED;
5ad984c9 1060
a9c9a240 1061 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
115329f1 1062
5ad984c9
LM
1063 if(h->direct_spatial_mv_pred){
1064 int ref[2];
1065 int mv[2][2];
1066 int list;
1067
5d18eaad
LM
1068 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1069
5ad984c9
LM
1070 /* ref = min(neighbors) */
1071 for(list=0; list<2; list++){
1072 int refa = h->ref_cache[list][scan8[0] - 1];
1073 int refb = h->ref_cache[list][scan8[0] - 8];
1074 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1075 if(refc == -2)
1076 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1077 ref[list] = refa;
1078 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1079 ref[list] = refb;
1080 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1081 ref[list] = refc;
1082 if(ref[list] < 0)
1083 ref[list] = -1;
1084 }
1085
1086 if(ref[0] < 0 && ref[1] < 0){
1087 ref[0] = ref[1] = 0;
1088 mv[0][0] = mv[0][1] =
1089 mv[1][0] = mv[1][1] = 0;
1090 }else{
1091 for(list=0; list<2; list++){
1092 if(ref[list] >= 0)
1093 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1094 else
1095 mv[list][0] = mv[list][1] = 0;
1096 }
1097 }
1098
1099 if(ref[1] < 0){
1100 *mb_type &= ~MB_TYPE_P0L1;
1101 sub_mb_type &= ~MB_TYPE_P0L1;
1102 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_P0L0;
1104 sub_mb_type &= ~MB_TYPE_P0L0;
1105 }
1106
1107 if(IS_16X16(*mb_type)){
d19f5acb
MN
1108 int a=0, b=0;
1109
cec93959
LM
1110 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1111 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
115329f1 1112 if(!IS_INTRA(mb_type_col)
c26abfa5
DB
1113 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1114 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
bf4e3bd2 1115 && (h->x264_build>33 || !h->x264_build)))){
5ad984c9 1116 if(ref[0] > 0)
d19f5acb 1117 a= pack16to32(mv[0][0],mv[0][1]);
5ad984c9 1118 if(ref[1] > 0)
d19f5acb 1119 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1120 }else{
d19f5acb
MN
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1123 }
d19f5acb
MN
1124 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1125 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
5ad984c9
LM
1126 }else{
1127 for(i8=0; i8<4; i8++){
1128 const int x8 = i8&1;
1129 const int y8 = i8>>1;
115329f1 1130
5ad984c9
LM
1131 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1132 continue;
1133 h->sub_mb_type[i8] = sub_mb_type;
115329f1 1134
5ad984c9
LM
1135 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1136 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
cec93959
LM
1137 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1138 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
115329f1 1139
5ad984c9 1140 /* col_zero_flag */
115329f1
DB
1141 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1142 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
bf4e3bd2 1143 && (h->x264_build>33 || !h->x264_build)))){
4866bd2b 1144 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
f1f17e54
LM
1145 if(IS_SUB_8X8(sub_mb_type)){
1146 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
c26abfa5 1147 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
f1f17e54
LM
1148 if(ref[0] == 0)
1149 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1150 if(ref[1] == 0)
1151 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1152 }
1153 }else
5ad984c9 1154 for(i4=0; i4<4; i4++){
4866bd2b 1155 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
c26abfa5 1156 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
5ad984c9
LM
1157 if(ref[0] == 0)
1158 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1159 if(ref[1] == 0)
1160 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1161 }
1162 }
1163 }
1164 }
1165 }
1166 }else{ /* direct temporal mv pred */
5d18eaad
LM
1167 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1168 const int *dist_scale_factor = h->dist_scale_factor;
1169
1170 if(FRAME_MBAFF){
1171 if(IS_INTERLACED(*mb_type)){
1172 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1173 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1174 dist_scale_factor = h->dist_scale_factor_field;
1175 }
1176 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1177 /* FIXME assumes direct_8x8_inference == 1 */
1178 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1179 int mb_types_col[2];
1180 int y_shift;
1181
1182 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1183 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1184 | (*mb_type & MB_TYPE_INTERLACED);
1185 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1186
1187 if(IS_INTERLACED(*mb_type)){
1188 /* frame to field scaling */
1189 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1190 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1191 if(s->mb_y&1){
1192 l1ref0 -= 2*h->b8_stride;
1193 l1ref1 -= 2*h->b8_stride;
1194 l1mv0 -= 4*h->b_stride;
1195 l1mv1 -= 4*h->b_stride;
1196 }
1197 y_shift = 0;
1198
1199 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1200 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1201 && !is_b8x8)
1202 *mb_type |= MB_TYPE_16x8;
1203 else
1204 *mb_type |= MB_TYPE_8x8;
1205 }else{
1206 /* field to frame scaling */
1207 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1208 * but in MBAFF, top and bottom POC are equal */
1209 int dy = (s->mb_y&1) ? 1 : 2;
1210 mb_types_col[0] =
1211 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1212 l1ref0 += dy*h->b8_stride;
1213 l1ref1 += dy*h->b8_stride;
1214 l1mv0 += 2*dy*h->b_stride;
1215 l1mv1 += 2*dy*h->b_stride;
1216 y_shift = 2;
1217
1218 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1219 && !is_b8x8)
1220 *mb_type |= MB_TYPE_16x16;
1221 else
1222 *mb_type |= MB_TYPE_8x8;
1223 }
1224
1225 for(i8=0; i8<4; i8++){
1226 const int x8 = i8&1;
1227 const int y8 = i8>>1;
1228 int ref0, scale;
1229 const int16_t (*l1mv)[2]= l1mv0;
1230
1231 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1232 continue;
1233 h->sub_mb_type[i8] = sub_mb_type;
1234
1235 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1236 if(IS_INTRA(mb_types_col[y8])){
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1238 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1239 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1240 continue;
1241 }
1242
1243 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1244 if(ref0 >= 0)
1245 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1246 else{
1247 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1248 l1mv= l1mv1;
1249 }
1250 scale = dist_scale_factor[ref0];
1251 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1252
1253 {
1254 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1255 int my_col = (mv_col[1]<<y_shift)/2;
1256 int mx = (scale * mv_col[0] + 128) >> 8;
1257 int my = (scale * my_col + 128) >> 8;
1258 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1259 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1260 }
1261 }
1262 return;
1263 }
1264 }
1265
1266 /* one-to-one mv scaling */
1267
5ad984c9 1268 if(IS_16X16(*mb_type)){
fda51641
MN
1269 int ref, mv0, mv1;
1270
5ad984c9
LM
1271 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1272 if(IS_INTRA(mb_type_col)){
fda51641 1273 ref=mv0=mv1=0;
5ad984c9 1274 }else{
5d18eaad
LM
1275 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1276 : map_col_to_list0[1][l1ref1[0]];
1277 const int scale = dist_scale_factor[ref0];
8583bef8 1278 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
5ad984c9 1279 int mv_l0[2];
5d18eaad
LM
1280 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1281 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
fda51641
MN
1282 ref= ref0;
1283 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1284 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
5ad984c9 1285 }
fda51641
MN
1286 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
5ad984c9
LM
1289 }else{
1290 for(i8=0; i8<4; i8++){
1291 const int x8 = i8&1;
1292 const int y8 = i8>>1;
5d18eaad 1293 int ref0, scale;
bf4e3bd2 1294 const int16_t (*l1mv)[2]= l1mv0;
8583bef8 1295
5ad984c9
LM
1296 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1297 continue;
1298 h->sub_mb_type[i8] = sub_mb_type;
5d18eaad 1299 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1300 if(IS_INTRA(mb_type_col)){
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1302 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1303 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1304 continue;
1305 }
115329f1 1306
5ad984c9 1307 ref0 = l1ref0[x8 + y8*h->b8_stride];
2f944356 1308 if(ref0 >= 0)
5d18eaad 1309 ref0 = map_col_to_list0[0][ref0];
8583bef8 1310 else{
5d18eaad 1311 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
8583bef8
MN
1312 l1mv= l1mv1;
1313 }
5d18eaad 1314 scale = dist_scale_factor[ref0];
115329f1 1315
5ad984c9 1316 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
f1f17e54
LM
1317 if(IS_SUB_8X8(sub_mb_type)){
1318 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
5d18eaad
LM
1319 int mx = (scale * mv_col[0] + 128) >> 8;
1320 int my = (scale * mv_col[1] + 128) >> 8;
f1f17e54
LM
1321 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1322 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1323 }else
5ad984c9 1324 for(i4=0; i4<4; i4++){
8583bef8 1325 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
5ad984c9 1326 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
5d18eaad
LM
1327 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1328 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
5ad984c9
LM
1329 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1330 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1331 }
1332 }
1333 }
1334 }
1335}
1336
0da71265
MN
1337static inline void write_back_motion(H264Context *h, int mb_type){
1338 MpegEncContext * const s = &h->s;
0da71265
MN
1339 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1340 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1341 int list;
1342
2ea39252
LM
1343 if(!USES_LIST(mb_type, 0))
1344 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1345
3425501d 1346 for(list=0; list<h->list_count; list++){
0da71265 1347 int y;
53b19144 1348 if(!USES_LIST(mb_type, list))
5ad984c9 1349 continue;
115329f1 1350
0da71265
MN
1351 for(y=0; y<4; y++){
1352 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1353 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1354 }
9e528114 1355 if( h->pps.cabac ) {
e6e77eb6
LM
1356 if(IS_SKIP(mb_type))
1357 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1358 else
9e528114
LA
1359 for(y=0; y<4; y++){
1360 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1361 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1362 }
1363 }
53b19144
LM
1364
1365 {
191e8ca7 1366 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
53b19144
LM
1367 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1368 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1369 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1370 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
0da71265
MN
1371 }
1372 }
115329f1 1373
5ad984c9
LM
1374 if(h->slice_type == B_TYPE && h->pps.cabac){
1375 if(IS_8X8(mb_type)){
53b19144
LM
1376 uint8_t *direct_table = &h->direct_table[b8_xy];
1377 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1378 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1379 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
5ad984c9
LM
1380 }
1381 }
0da71265
MN
1382}
1383
1384/**
1385 * Decodes a network abstraction layer unit.
1386 * @param consumed is the number of bytes used as input
1387 * @param length is the length of the array
3b66c4c5 1388 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
115329f1 1389 * @returns decoded bytes, might be src+1 if no escapes
0da71265
MN
1390 */
1391static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1392 int i, si, di;
1393 uint8_t *dst;
24456882 1394 int bufidx;
0da71265 1395
bb270c08 1396// src[0]&0x80; //forbidden bit
0da71265
MN
1397 h->nal_ref_idc= src[0]>>5;
1398 h->nal_unit_type= src[0]&0x1F;
1399
1400 src++; length--;
115329f1 1401#if 0
0da71265
MN
1402 for(i=0; i<length; i++)
1403 printf("%2X ", src[i]);
1404#endif
1405 for(i=0; i+1<length; i+=2){
1406 if(src[i]) continue;
1407 if(i>0 && src[i-1]==0) i--;
1408 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1409 if(src[i+2]!=3){
1410 /* startcode, so we must be past the end */
1411 length=i;
1412 }
1413 break;
1414 }
1415 }
1416
1417 if(i>=length-1){ //no escaped 0
1418 *dst_length= length;
1419 *consumed= length+1; //+1 for the header
115329f1 1420 return src;
0da71265
MN
1421 }
1422
24456882
1423 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1424 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1425 dst= h->rbsp_buffer[bufidx];
0da71265 1426
ac658be5
FOL
1427 if (dst == NULL){
1428 return NULL;
1429 }
1430
3b66c4c5 1431//printf("decoding esc\n");
0da71265 1432 si=di=0;
115329f1 1433 while(si<length){
0da71265
MN
1434 //remove escapes (very rare 1:2^22)
1435 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1436 if(src[si+2]==3){ //escape
1437 dst[di++]= 0;
1438 dst[di++]= 0;
1439 si+=3;
c8470cc1 1440 continue;
0da71265
MN
1441 }else //next start code
1442 break;
1443 }
1444
1445 dst[di++]= src[si++];
1446 }
1447
1448 *dst_length= di;
1449 *consumed= si + 1;//+1 for the header
90b5b51e 1450//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
1451 return dst;
1452}
1453
0da71265
MN
1454/**
1455 * identifies the exact end of the bitstream
1456 * @return the length of the trailing, or 0 if damaged
1457 */
a9c9a240 1458static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
0da71265
MN
1459 int v= *src;
1460 int r;
1461
a9c9a240 1462 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
1463
1464 for(r=1; r<9; r++){
1465 if(v&1) return r;
1466 v>>=1;
1467 }
1468 return 0;
1469}
1470
1471/**
1472 * idct tranforms the 16 dc values and dequantize them.
1473 * @param qp quantization parameter
1474 */
239ea04c 1475static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1476#define stride 16
1477 int i;
1478 int temp[16]; //FIXME check if this is a good idea
1479 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1480 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1481
1482//memset(block, 64, 2*256);
1483//return;
1484 for(i=0; i<4; i++){
1485 const int offset= y_offset[i];
1486 const int z0= block[offset+stride*0] + block[offset+stride*4];
1487 const int z1= block[offset+stride*0] - block[offset+stride*4];
1488 const int z2= block[offset+stride*1] - block[offset+stride*5];
1489 const int z3= block[offset+stride*1] + block[offset+stride*5];
1490
1491 temp[4*i+0]= z0+z3;
1492 temp[4*i+1]= z1+z2;
1493 temp[4*i+2]= z1-z2;
1494 temp[4*i+3]= z0-z3;
1495 }
1496
1497 for(i=0; i<4; i++){
1498 const int offset= x_offset[i];
1499 const int z0= temp[4*0+i] + temp[4*2+i];
1500 const int z1= temp[4*0+i] - temp[4*2+i];
1501 const int z2= temp[4*1+i] - temp[4*3+i];
1502 const int z3= temp[4*1+i] + temp[4*3+i];
1503
239ea04c
LM
1504 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1505 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1506 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1507 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
1508 }
1509}
1510
e5017ab8 1511#if 0
0da71265
MN
1512/**
1513 * dct tranforms the 16 dc values.
1514 * @param qp quantization parameter ??? FIXME
1515 */
1516static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1517// const int qmul= dequant_coeff[qp][0];
1518 int i;
1519 int temp[16]; //FIXME check if this is a good idea
1520 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1521 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1522
1523 for(i=0; i<4; i++){
1524 const int offset= y_offset[i];
1525 const int z0= block[offset+stride*0] + block[offset+stride*4];
1526 const int z1= block[offset+stride*0] - block[offset+stride*4];
1527 const int z2= block[offset+stride*1] - block[offset+stride*5];
1528 const int z3= block[offset+stride*1] + block[offset+stride*5];
1529
1530 temp[4*i+0]= z0+z3;
1531 temp[4*i+1]= z1+z2;
1532 temp[4*i+2]= z1-z2;
1533 temp[4*i+3]= z0-z3;
1534 }
1535
1536 for(i=0; i<4; i++){
1537 const int offset= x_offset[i];
1538 const int z0= temp[4*0+i] + temp[4*2+i];
1539 const int z1= temp[4*0+i] - temp[4*2+i];
1540 const int z2= temp[4*1+i] - temp[4*3+i];
1541 const int z3= temp[4*1+i] + temp[4*3+i];
1542
1543 block[stride*0 +offset]= (z0 + z3)>>1;
1544 block[stride*2 +offset]= (z1 + z2)>>1;
1545 block[stride*8 +offset]= (z1 - z2)>>1;
1546 block[stride*10+offset]= (z0 - z3)>>1;
1547 }
1548}
e5017ab8
LA
1549#endif
1550
0da71265
MN
1551#undef xStride
1552#undef stride
1553
239ea04c 1554static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1555 const int stride= 16*2;
1556 const int xStride= 16;
1557 int a,b,c,d,e;
1558
1559 a= block[stride*0 + xStride*0];
1560 b= block[stride*0 + xStride*1];
1561 c= block[stride*1 + xStride*0];
1562 d= block[stride*1 + xStride*1];
1563
1564 e= a-b;
1565 a= a+b;
1566 b= c-d;
1567 c= c+d;
1568
239ea04c
LM
1569 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1570 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1571 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1572 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
1573}
1574
e5017ab8 1575#if 0
0da71265
MN
1576static void chroma_dc_dct_c(DCTELEM *block){
1577 const int stride= 16*2;
1578 const int xStride= 16;
1579 int a,b,c,d,e;
1580
1581 a= block[stride*0 + xStride*0];
1582 b= block[stride*0 + xStride*1];
1583 c= block[stride*1 + xStride*0];
1584 d= block[stride*1 + xStride*1];
1585
1586 e= a-b;
1587 a= a+b;
1588 b= c-d;
1589 c= c+d;
1590
1591 block[stride*0 + xStride*0]= (a+c);
1592 block[stride*0 + xStride*1]= (e+b);
1593 block[stride*1 + xStride*0]= (a-c);
1594 block[stride*1 + xStride*1]= (e-b);
1595}
e5017ab8 1596#endif
0da71265
MN
1597
1598/**
1599 * gets the chroma qp.
1600 */
4691a77d
1601static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1602 return h->pps.chroma_qp_table[t][qscale & 0xff];
0da71265
MN
1603}
1604
755bfeab 1605//FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
0afd2a92
DB
1606//FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1607static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
0da71265
MN
1608 int i;
1609 const int * const quant_table= quant_coeff[qscale];
1610 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1611 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1612 const unsigned int threshold2= (threshold1<<1);
1613 int last_non_zero;
1614
0afd2a92 1615 if(separate_dc){
0da71265
MN
1616 if(qscale<=18){
1617 //avoid overflows
1618 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1619 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1620 const unsigned int dc_threshold2= (dc_threshold1<<1);
1621
1622 int level= block[0]*quant_coeff[qscale+18][0];
1623 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1624 if(level>0){
1625 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1626 block[0]= level;
1627 }else{
1628 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1629 block[0]= -level;
1630 }
1631// last_non_zero = i;
1632 }else{
1633 block[0]=0;
1634 }
1635 }else{
1636 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1637 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1638 const unsigned int dc_threshold2= (dc_threshold1<<1);
1639
1640 int level= block[0]*quant_table[0];
1641 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1642 if(level>0){
1643 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1644 block[0]= level;
1645 }else{
1646 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1647 block[0]= -level;
1648 }
1649// last_non_zero = i;
1650 }else{
1651 block[0]=0;
1652 }
1653 }
1654 last_non_zero= 0;
1655 i=1;
1656 }else{
1657 last_non_zero= -1;
1658 i=0;
1659 }
1660
1661 for(; i<16; i++){
1662 const int j= scantable[i];
1663 int level= block[j]*quant_table[j];
1664
1665// if( bias+level >= (1<<(QMAT_SHIFT - 3))
1666// || bias-level >= (1<<(QMAT_SHIFT - 3))){
1667 if(((unsigned)(level+threshold1))>threshold2){
1668 if(level>0){
1669 level= (bias + level)>>QUANT_SHIFT;
1670 block[j]= level;
1671 }else{
1672 level= (bias - level)>>QUANT_SHIFT;
1673 block[j]= -level;
1674 }
1675 last_non_zero = i;
1676 }else{
1677 block[j]=0;
1678 }
1679 }
1680
1681 return last_non_zero;
1682}
1683
0da71265
MN
1684static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1685 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1686 int src_x_offset, int src_y_offset,
1687 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1688 MpegEncContext * const s = &h->s;
1689 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 1690 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 1691 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
1692 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1693 uint8_t * src_cb, * src_cr;
1694 int extra_width= h->emu_edge_width;
1695 int extra_height= h->emu_edge_height;
0da71265
MN
1696 int emu=0;
1697 const int full_mx= mx>>2;
1698 const int full_my= my>>2;
fbd312fd 1699 const int pic_width = 16*s->mb_width;
5d18eaad 1700 const int pic_height = 16*s->mb_height >> MB_MBAFF;
115329f1 1701
2f29af39 1702 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
171c4076 1703 return;
115329f1 1704
0da71265
MN
1705 if(mx&7) extra_width -= 3;
1706 if(my&7) extra_height -= 3;
115329f1
DB
1707
1708 if( full_mx < 0-extra_width
1709 || full_my < 0-extra_height
1710 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 1711 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
1712 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1713 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
1714 emu=1;
1715 }
115329f1 1716
5d18eaad 1717 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 1718 if(!square){
5d18eaad 1719 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 1720 }
115329f1 1721
87352549 1722 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 1723
5d18eaad
LM
1724 if(MB_MBAFF){
1725 // chroma offset when predicting from a field of opposite parity
1726 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
1727 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1728 }
1729 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1730 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1731
0da71265 1732 if(emu){
5d18eaad 1733 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1734 src_cb= s->edge_emu_buffer;
1735 }
5d18eaad 1736 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1737
1738 if(emu){
5d18eaad 1739 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1740 src_cr= s->edge_emu_buffer;
1741 }
5d18eaad 1742 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1743}
1744
9f2d1b4f 1745static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
1746 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1747 int x_offset, int y_offset,
1748 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1749 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1750 int list0, int list1){
1751 MpegEncContext * const s = &h->s;
1752 qpel_mc_func *qpix_op= qpix_put;
1753 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 1754
5d18eaad
LM
1755 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1756 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1757 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 1758 x_offset += 8*s->mb_x;
5d18eaad 1759 y_offset += 8*(s->mb_y >> MB_MBAFF);
115329f1 1760
0da71265 1761 if(list0){
1924f3ce 1762 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
1763 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1764 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1765 qpix_op, chroma_op);
1766
1767 qpix_op= qpix_avg;
1768 chroma_op= chroma_avg;
1769 }
1770
1771 if(list1){
1924f3ce 1772 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
1773 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1774 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1775 qpix_op, chroma_op);
1776 }
1777}
1778
9f2d1b4f
LM
1779static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1780 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1781 int x_offset, int y_offset,
1782 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1783 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1784 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1785 int list0, int list1){
1786 MpegEncContext * const s = &h->s;
1787
5d18eaad
LM
1788 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1789 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1790 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 1791 x_offset += 8*s->mb_x;
5d18eaad 1792 y_offset += 8*(s->mb_y >> MB_MBAFF);
115329f1 1793
9f2d1b4f
LM
1794 if(list0 && list1){
1795 /* don't optimize for luma-only case, since B-frames usually
1796 * use implicit weights => chroma too. */
1797 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
1798 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1799 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
1800 int refn0 = h->ref_cache[0][ scan8[n] ];
1801 int refn1 = h->ref_cache[1][ scan8[n] ];
1802
1803 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1804 dest_y, dest_cb, dest_cr,
1805 x_offset, y_offset, qpix_put, chroma_put);
1806 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1807 tmp_y, tmp_cb, tmp_cr,
1808 x_offset, y_offset, qpix_put, chroma_put);
1809
1810 if(h->use_weight == 2){
1811 int weight0 = h->implicit_weight[refn0][refn1];
1812 int weight1 = 64 - weight0;
5d18eaad
LM
1813 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1814 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1815 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 1816 }else{
5d18eaad 1817 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 1818 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 1819 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 1820 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1821 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 1822 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 1823 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1824 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 1825 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
1826 }
1827 }else{
1828 int list = list1 ? 1 : 0;
1829 int refn = h->ref_cache[list][ scan8[n] ];
1830 Picture *ref= &h->ref_list[list][refn];
1831 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1832 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1833 qpix_put, chroma_put);
1834
5d18eaad 1835 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
1836 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1837 if(h->use_weight_chroma){
5d18eaad 1838 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 1839 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 1840 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
1841 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1842 }
1843 }
1844}
1845
1846static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1847 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1848 int x_offset, int y_offset,
1849 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1850 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 1851 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
1852 int list0, int list1){
1853 if((h->use_weight==2 && list0 && list1
1854 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1855 || h->use_weight==1)
1856 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1857 x_offset, y_offset, qpix_put, chroma_put,
1858 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1859 else
1860 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1861 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1862}
1863
513fbd8e
LM
1864static inline void prefetch_motion(H264Context *h, int list){
1865 /* fetch pixels for estimated mv 4 macroblocks ahead
1866 * optimized for 64byte cache lines */
1867 MpegEncContext * const s = &h->s;
1868 const int refn = h->ref_cache[list][scan8[0]];
1869 if(refn >= 0){
1870 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1871 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1872 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 1873 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
1874 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1875 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1876 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1877 }
1878}
1879
0da71265
MN
1880static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1881 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
1882 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1883 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 1884 MpegEncContext * const s = &h->s;
7bc9090a 1885 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265 1886 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 1887
0da71265 1888 assert(IS_INTER(mb_type));
115329f1 1889
513fbd8e
LM
1890 prefetch_motion(h, 0);
1891
0da71265
MN
1892 if(IS_16X16(mb_type)){
1893 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1894 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
9f2d1b4f 1895 &weight_op[0], &weight_avg[0],
0da71265
MN
1896 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1897 }else if(IS_16X8(mb_type)){
1898 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1899 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1900 &weight_op[1], &weight_avg[1],
0da71265
MN
1901 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1902 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1903 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1904 &weight_op[1], &weight_avg[1],
0da71265
MN
1905 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1906 }else if(IS_8X16(mb_type)){
5d18eaad 1907 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 1908 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1909 &weight_op[2], &weight_avg[2],
0da71265 1910 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 1911 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 1912 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1913 &weight_op[2], &weight_avg[2],
0da71265
MN
1914 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1915 }else{
1916 int i;
115329f1 1917
0da71265
MN
1918 assert(IS_8X8(mb_type));
1919
1920 for(i=0; i<4; i++){
1921 const int sub_mb_type= h->sub_mb_type[i];
1922 const int n= 4*i;
1923 int x_offset= (i&1)<<2;
1924 int y_offset= (i&2)<<1;
1925
1926 if(IS_SUB_8X8(sub_mb_type)){
1927 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1928 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1929 &weight_op[3], &weight_avg[3],
0da71265
MN
1930 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1931 }else if(IS_SUB_8X4(sub_mb_type)){
1932 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1933 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1934 &weight_op[4], &weight_avg[4],
0da71265
MN
1935 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1936 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1937 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1938 &weight_op[4], &weight_avg[4],
0da71265
MN
1939 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1940 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 1941 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 1942 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1943 &weight_op[5], &weight_avg[5],
0da71265 1944 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 1945 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 1946 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1947 &weight_op[5], &weight_avg[5],
0da71265
MN
1948 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1949 }else{
1950 int j;
1951 assert(IS_SUB_4X4(sub_mb_type));
1952 for(j=0; j<4; j++){
1953 int sub_x_offset= x_offset + 2*(j&1);
1954 int sub_y_offset= y_offset + (j&2);
1955 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1956 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1957 &weight_op[6], &weight_avg[6],
0da71265
MN
1958 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1959 }
1960 }
1961 }
1962 }
513fbd8e
LM
1963
1964 prefetch_motion(h, 1);
0da71265
MN
1965}
1966
2b100ab2 1967static void decode_init_vlc(void){
0da71265
MN
1968 static int done = 0;
1969
1970 if (!done) {
1971 int i;
1972 done = 1;
1973
115329f1 1974 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
0da71265 1975 &chroma_dc_coeff_token_len [0], 1, 1,
073c2593 1976 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
0da71265
MN
1977
1978 for(i=0; i<4; i++){
115329f1 1979 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
0da71265 1980 &coeff_token_len [i][0], 1, 1,
073c2593 1981 &coeff_token_bits[i][0], 1, 1, 1);
0da71265
MN
1982 }
1983
1984 for(i=0; i<3; i++){
1985 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1986 &chroma_dc_total_zeros_len [i][0], 1, 1,
073c2593 1987 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
0da71265
MN
1988 }
1989 for(i=0; i<15; i++){
115329f1 1990 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
0da71265 1991 &total_zeros_len [i][0], 1, 1,
073c2593 1992 &total_zeros_bits[i][0], 1, 1, 1);
0da71265
MN
1993 }
1994
1995 for(i=0; i<6; i++){
115329f1 1996 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
0da71265 1997 &run_len [i][0], 1, 1,
073c2593 1998 &run_bits[i][0], 1, 1, 1);
0da71265 1999 }
115329f1 2000 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
0da71265 2001 &run_len [6][0], 1, 1,
073c2593 2002 &run_bits[6][0], 1, 1, 1);
0da71265
MN
2003 }
2004}
2005
0da71265 2006static void free_tables(H264Context *h){
7978debd 2007 int i;
0da71265 2008 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
2009 av_freep(&h->chroma_pred_mode_table);
2010 av_freep(&h->cbp_table);
9e528114
LA
2011 av_freep(&h->mvd_table[0]);
2012 av_freep(&h->mvd_table[1]);
5ad984c9 2013 av_freep(&h->direct_table);
0da71265
MN
2014 av_freep(&h->non_zero_count);
2015 av_freep(&h->slice_table_base);
6ba71fc4
LLL
2016 av_freep(&h->top_borders[1]);
2017 av_freep(&h->top_borders[0]);
0da71265 2018 h->slice_table= NULL;
e5017ab8 2019
0da71265
MN
2020 av_freep(&h->mb2b_xy);
2021 av_freep(&h->mb2b8_xy);
9f2d1b4f
LM
2022
2023 av_freep(&h->s.obmc_scratchpad);
7978debd
2024
2025 for(i = 0; i < MAX_SPS_COUNT; i++)
2026 av_freep(h->sps_buffers + i);
2027
2028 for(i = 0; i < MAX_PPS_COUNT; i++)
2029 av_freep(h->pps_buffers + i);
0da71265
MN
2030}
2031
239ea04c
LM
2032static void init_dequant8_coeff_table(H264Context *h){
2033 int i,q,x;
548a1c8a 2034 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
2035 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2036 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2037
2038 for(i=0; i<2; i++ ){
2039 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2040 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2041 break;
2042 }
2043
2044 for(q=0; q<52; q++){
acd8d10f
PI
2045 int shift = ff_div6[q];
2046 int idx = ff_rem6[q];
239ea04c 2047 for(x=0; x<64; x++)
548a1c8a
LM
2048 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2049 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2050 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
2051 }
2052 }
2053}
2054
2055static void init_dequant4_coeff_table(H264Context *h){
2056 int i,j,q,x;
ab2e3e2c 2057 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
2058 for(i=0; i<6; i++ ){
2059 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2060 for(j=0; j<i; j++){
2061 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2062 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2063 break;
2064 }
2065 }
2066 if(j<i)
2067 continue;
2068
2069 for(q=0; q<52; q++){
acd8d10f
PI
2070 int shift = ff_div6[q] + 2;
2071 int idx = ff_rem6[q];
239ea04c 2072 for(x=0; x<16; x++)
ab2e3e2c
LM
2073 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2074 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
2075 h->pps.scaling_matrix4[i][x]) << shift;
2076 }
2077 }
2078}
2079
2080static void init_dequant_tables(H264Context *h){
2081 int i,x;
2082 init_dequant4_coeff_table(h);
2083 if(h->pps.transform_8x8_mode)
2084 init_dequant8_coeff_table(h);
2085 if(h->sps.transform_bypass){
2086 for(i=0; i<6; i++)
2087 for(x=0; x<16; x++)
2088 h->dequant4_coeff[i][0][x] = 1<<6;
2089 if(h->pps.transform_8x8_mode)
2090 for(i=0; i<2; i++)
2091 for(x=0; x<64; x++)
2092 h->dequant8_coeff[i][0][x] = 1<<6;
2093 }
2094}
2095
2096
0da71265
MN
2097/**
2098 * allocates tables.
3b66c4c5 2099 * needs width/height
0da71265
MN
2100 */
2101static int alloc_tables(H264Context *h){
2102 MpegEncContext * const s = &h->s;
7bc9090a 2103 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 2104 int x,y;
0da71265
MN
2105
2106 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
e5017ab8 2107
53c05b1e 2108 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
5d18eaad 2109 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
6ba71fc4
LLL
2110 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2111 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
5d0e4cb8 2112 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
0da71265 2113
e5017ab8
LA
2114 if( h->pps.cabac ) {
2115 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
9e528114
LA
2116 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2117 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
5ad984c9 2118 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
e5017ab8
LA
2119 }
2120
5d18eaad
LM
2121 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2122 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 2123
a55f20bd
LM
2124 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2125 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
0da71265
MN
2126 for(y=0; y<s->mb_height; y++){
2127 for(x=0; x<s->mb_width; x++){
7bc9090a 2128 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
2129 const int b_xy = 4*x + 4*y*h->b_stride;
2130 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 2131
0da71265
MN
2132 h->mb2b_xy [mb_xy]= b_xy;
2133 h->mb2b8_xy[mb_xy]= b8_xy;
2134 }
2135 }
9f2d1b4f 2136
9c6221ae
GV
2137 s->obmc_scratchpad = NULL;
2138
56edbd81
LM
2139 if(!h->dequant4_coeff[0])
2140 init_dequant_tables(h);
2141
0da71265
MN
2142 return 0;
2143fail:
2144 free_tables(h);
2145 return -1;
2146}
2147
2148static void common_init(H264Context *h){
2149 MpegEncContext * const s = &h->s;
0da71265
MN
2150
2151 s->width = s->avctx->width;
2152 s->height = s->avctx->height;
2153 s->codec_id= s->avctx->codec->id;
115329f1 2154
c92a30bb 2155 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 2156
239ea04c 2157 h->dequant_coeff_pps= -1;
9a41c2c7 2158 s->unrestricted_mv=1;
0da71265 2159 s->decode=1; //FIXME
56edbd81
LM
2160
2161 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2162 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
2163}
2164
2165static int decode_init(AVCodecContext *avctx){
2166 H264Context *h= avctx->priv_data;
2167 MpegEncContext * const s = &h->s;
2168
3edcacde 2169 MPV_decode_defaults(s);
115329f1 2170
0da71265
MN
2171 s->avctx = avctx;
2172 common_init(h);
2173
2174 s->out_format = FMT_H264;
2175 s->workaround_bugs= avctx->workaround_bugs;
2176
2177 // set defaults
0da71265 2178// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 2179 s->quarter_sample = 1;
0da71265
MN
2180 s->low_delay= 1;
2181 avctx->pix_fmt= PIX_FMT_YUV420P;
2182
c2212338 2183 decode_init_vlc();
115329f1 2184
26165f99
MR
2185 if(avctx->extradata_size > 0 && avctx->extradata &&
2186 *(char *)avctx->extradata == 1){
4770b1b4
RT
2187 h->is_avc = 1;
2188 h->got_avcC = 0;
26165f99
MR
2189 } else {
2190 h->is_avc = 0;
4770b1b4
RT
2191 }
2192
0da71265
MN
2193 return 0;
2194}
2195
af8aa846 2196static int frame_start(H264Context *h){
0da71265
MN
2197 MpegEncContext * const s = &h->s;
2198 int i;
2199
af8aa846
MN
2200 if(MPV_frame_start(s, s->avctx) < 0)
2201 return -1;
0da71265 2202 ff_er_frame_start(s);
0da71265
MN
2203
2204 assert(s->linesize && s->uvlinesize);
2205
2206 for(i=0; i<16; i++){
2207 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 2208 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2209 }
2210 for(i=0; i<4; i++){
2211 h->block_offset[16+i]=
2212 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
2213 h->block_offset[24+16+i]=
2214 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2215 }
2216
934b0821
LM
2217 /* can't be in alloc_tables because linesize isn't known there.
2218 * FIXME: redo bipred weight to not require extra buffer? */
2219 if(!s->obmc_scratchpad)
5d18eaad
LM
2220 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2221
2222 /* some macroblocks will be accessed before they're available */
2223 if(FRAME_MBAFF)
2224 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
934b0821 2225
0da71265 2226// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
af8aa846 2227 return 0;
0da71265
MN
2228}
2229
93cc10fa 2230static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e
MN
2231 MpegEncContext * const s = &h->s;
2232 int i;
115329f1 2233
53c05b1e
MN
2234 src_y -= linesize;
2235 src_cb -= uvlinesize;
2236 src_cr -= uvlinesize;
2237
3b66c4c5 2238 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 2239 // and the line above the bottom macroblock
6ba71fc4 2240 h->left_border[0]= h->top_borders[0][s->mb_x][15];
53c05b1e
MN
2241 for(i=1; i<17; i++){
2242 h->left_border[i]= src_y[15+i* linesize];
2243 }
115329f1 2244
6ba71fc4
LLL
2245 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2246 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
53c05b1e 2247
87352549 2248 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
6ba71fc4
LLL
2249 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2250 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
53c05b1e
MN
2251 for(i=1; i<9; i++){
2252 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2253 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2254 }
6ba71fc4
LLL
2255 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2256 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
53c05b1e
MN
2257 }
2258}
2259
93cc10fa 2260static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
2261 MpegEncContext * const s = &h->s;
2262 int temp8, i;
2263 uint64_t temp64;
b69378e2
2264 int deblock_left;
2265 int deblock_top;
2266 int mb_xy;
2267
2268 if(h->deblocking_filter == 2) {
2269 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2270 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2271 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2272 } else {
2273 deblock_left = (s->mb_x > 0);
2274 deblock_top = (s->mb_y > 0);
2275 }
53c05b1e
MN
2276
2277 src_y -= linesize + 1;
2278 src_cb -= uvlinesize + 1;
2279 src_cr -= uvlinesize + 1;
2280
2281#define XCHG(a,b,t,xchg)\
2282t= a;\
2283if(xchg)\
2284 a= b;\
2285b= t;
d89dc06a
LM
2286
2287 if(deblock_left){
2288 for(i = !deblock_top; i<17; i++){
2289 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2290 }
2291 }
2292
2293 if(deblock_top){
6ba71fc4
LLL
2294 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2295 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
cad4368a 2296 if(s->mb_x+1 < s->mb_width){
43efd19a
LM
2297 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2298 }
53c05b1e 2299 }
53c05b1e 2300
87352549 2301 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a
LM
2302 if(deblock_left){
2303 for(i = !deblock_top; i<9; i++){
2304 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2305 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2306 }
2307 }
2308 if(deblock_top){
6ba71fc4
LLL
2309 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2310 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2311 }
2312 }
2313}
2314
2315static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2316 MpegEncContext * const s = &h->s;
2317 int i;
115329f1 2318
6ba71fc4
LLL
2319 src_y -= 2 * linesize;
2320 src_cb -= 2 * uvlinesize;
2321 src_cr -= 2 * uvlinesize;
2322
3b66c4c5 2323 // There are two lines saved, the line above the the top macroblock of a pair,
6ba71fc4
LLL
2324 // and the line above the bottom macroblock
2325 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2326 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2327 for(i=2; i<34; i++){
2328 h->left_border[i]= src_y[15+i* linesize];
2329 }
115329f1 2330
6ba71fc4
LLL
2331 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2332 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2333 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2334 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2335
87352549 2336 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
6ba71fc4
LLL
2337 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2338 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2339 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2340 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2341 for(i=2; i<18; i++){
2342 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2343 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2344 }
2345 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2346 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2347 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2348 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2349 }
2350}
2351
2352static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2353 MpegEncContext * const s = &h->s;
2354 int temp8, i;
2355 uint64_t temp64;
2356 int deblock_left = (s->mb_x > 0);
5d18eaad 2357 int deblock_top = (s->mb_y > 1);
6ba71fc4 2358
a9c9a240 2359 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
6ba71fc4
LLL
2360
2361 src_y -= 2 * linesize + 1;
2362 src_cb -= 2 * uvlinesize + 1;
2363 src_cr -= 2 * uvlinesize + 1;
2364
2365#define XCHG(a,b,t,xchg)\
2366t= a;\
2367if(xchg)\
2368 a= b;\
2369b= t;
2370
2371 if(deblock_left){
2372 for(i = (!deblock_top)<<1; i<34; i++){
2373 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2374 }
2375 }
2376
2377 if(deblock_top){
2378 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2379 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2380 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2381 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
5d18eaad
LM
2382 if(s->mb_x+1 < s->mb_width){
2383 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2384 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2385 }
6ba71fc4
LLL
2386 }
2387
87352549 2388 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
6ba71fc4
LLL
2389 if(deblock_left){
2390 for(i = (!deblock_top) << 1; i<18; i++){
2391 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2392 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2393 }
2394 }
2395 if(deblock_top){
2396 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2397 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2398 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2399 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
53c05b1e 2400 }
53c05b1e
MN
2401 }
2402}
2403
5a6a6cc7 2404static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
2405 MpegEncContext * const s = &h->s;
2406 const int mb_x= s->mb_x;
2407 const int mb_y= s->mb_y;
7bc9090a 2408 const int mb_xy= mb_x + mb_y*s->mb_stride;
0da71265
MN
2409 const int mb_type= s->current_picture.mb_type[mb_xy];
2410 uint8_t *dest_y, *dest_cb, *dest_cr;
2411 int linesize, uvlinesize /*dct_offset*/;
2412 int i;
6867a90b 2413 int *block_offset = &h->block_offset[0];
6ba71fc4 2414 const unsigned int bottom = mb_y & 1;
bd91fee3 2415 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
36940eca 2416 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 2417 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 2418
0da71265
MN
2419 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2420 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2421 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2422
a957c27b
LM
2423 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2424 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2425
bd91fee3 2426 if (!simple && MB_FIELD) {
5d18eaad
LM
2427 linesize = h->mb_linesize = s->linesize * 2;
2428 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 2429 block_offset = &h->block_offset[24];
0da71265
MN
2430 if(mb_y&1){ //FIXME move out of this func?
2431 dest_y -= s->linesize*15;
6867a90b
LLL
2432 dest_cb-= s->uvlinesize*7;
2433 dest_cr-= s->uvlinesize*7;
0da71265 2434 }
5d18eaad
LM
2435 if(FRAME_MBAFF) {
2436 int list;
3425501d 2437 for(list=0; list<h->list_count; list++){
5d18eaad
LM
2438 if(!USES_LIST(mb_type, list))
2439 continue;
2440 if(IS_16X16(mb_type)){
2441 int8_t *ref = &h->ref_cache[list][scan8[0]];
2442 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
2443 }else{
2444 for(i=0; i<16; i+=4){
2445 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2446 int ref = h->ref_cache[list][scan8[i]];
2447 if(ref >= 0)
2448 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
2449 }
2450 }
2451 }
2452 }
0da71265 2453 } else {
5d18eaad
LM
2454 linesize = h->mb_linesize = s->linesize;
2455 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
2456// dct_offset = s->linesize * 16;
2457 }
115329f1 2458
ef9d1d15
LM
2459 if(transform_bypass){
2460 idct_dc_add =
2461 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2462 }else if(IS_8x8DCT(mb_type)){
2463 idct_dc_add = s->dsp.h264_idct8_dc_add;
2464 idct_add = s->dsp.h264_idct8_add;
2465 }else{
2466 idct_dc_add = s->dsp.h264_idct_dc_add;
2467 idct_add = s->dsp.h264_idct_add;
2468 }
0da71265 2469
bd91fee3 2470 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
5d18eaad
LM
2471 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2472 int mbt_y = mb_y&~1;
2473 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2474 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2475 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2476 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2477 }
2478
bd91fee3 2479 if (!simple && IS_INTRA_PCM(mb_type)) {
6fbcaaa0
LLL
2480 unsigned int x, y;
2481
2482 // The pixels are stored in h->mb array in the same order as levels,
2483 // copy them in output in the correct order.
2484 for(i=0; i<16; i++) {
2485 for (y=0; y<4; y++) {
2486 for (x=0; x<4; x++) {
6867a90b 2487 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
6fbcaaa0
LLL
2488 }
2489 }
2490 }
2491 for(i=16; i<16+4; i++) {
2492 for (y=0; y<4; y++) {
2493 for (x=0; x<4; x++) {
6867a90b 2494 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
6fbcaaa0
LLL
2495 }
2496 }
2497 }
2498 for(i=20; i<20+4; i++) {
2499 for (y=0; y<4; y++) {
2500 for (x=0; x<4; x++) {
6867a90b 2501 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
6fbcaaa0
LLL
2502 }
2503 }
2504 }
e7e09b49
LLL
2505 } else {
2506 if(IS_INTRA(mb_type)){
bd91fee3 2507 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
93cc10fa 2508 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 2509
87352549 2510 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
2511 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2512 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 2513 }
0da71265 2514
e7e09b49 2515 if(IS_INTRA4x4(mb_type)){
bd91fee3 2516 if(simple || !s->encoding){
43efd19a
LM
2517 if(IS_8x8DCT(mb_type)){
2518 for(i=0; i<16; i+=4){
2519 uint8_t * const ptr= dest_y + block_offset[i];
2520 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
ef9d1d15 2521 const int nnz = h->non_zero_count_cache[ scan8[i] ];
c92a30bb 2522 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
4672503d 2523 (h->topright_samples_available<<i)&0x4000, linesize);
ef9d1d15
LM
2524 if(nnz){
2525 if(nnz == 1 && h->mb[i*16])
2526 idct_dc_add(ptr, h->mb + i*16, linesize);
2527 else
2528 idct_add(ptr, h->mb + i*16, linesize);
2529 }
43efd19a
LM
2530 }
2531 }else
e7e09b49 2532 for(i=0; i<16; i++){
6867a90b 2533 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2534 uint8_t *topright;
2535 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
ef9d1d15 2536 int nnz, tr;
e7e09b49
LLL
2537
2538 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2539 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
6867a90b 2540 assert(mb_y || linesize <= block_offset[i]);
e7e09b49
LLL
2541 if(!topright_avail){
2542 tr= ptr[3 - linesize]*0x01010101;
2543 topright= (uint8_t*) &tr;
115329f1 2544 }else
e7e09b49 2545 topright= ptr + 4 - linesize;
a9799653 2546 }else
e7e09b49
LLL
2547 topright= NULL;
2548
c92a30bb 2549 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
ef9d1d15
LM
2550 nnz = h->non_zero_count_cache[ scan8[i] ];
2551 if(nnz){
bd91fee3 2552 if(is_h264){
ef9d1d15
LM
2553 if(nnz == 1 && h->mb[i*16])
2554 idct_dc_add(ptr, h->mb + i*16, linesize);
2555 else
2556 idct_add(ptr, h->mb + i*16, linesize);
2557 }else
e7e09b49
LLL
2558 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2559 }
8b82a956 2560 }
0da71265 2561 }
e7e09b49 2562 }else{
c92a30bb 2563 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 2564 if(is_h264){
36940eca 2565 if(!transform_bypass)
239ea04c 2566 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
36940eca 2567 }else
e7e09b49 2568 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 2569 }
bd91fee3 2570 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
93cc10fa 2571 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 2572 }else if(is_h264){
e7e09b49 2573 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
2574 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2575 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 2576 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 2577 }
e7e09b49
LLL
2578
2579
2580 if(!IS_INTRA4x4(mb_type)){
bd91fee3 2581 if(is_h264){
ef9d1d15
LM
2582 if(IS_INTRA16x16(mb_type)){
2583 for(i=0; i<16; i++){
2584 if(h->non_zero_count_cache[ scan8[i] ])
2585 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2586 else if(h->mb[i*16])
2587 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2588 }
2589 }else{
2590 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2591 for(i=0; i<16; i+=di){
2592 int nnz = h->non_zero_count_cache[ scan8[i] ];
2593 if(nnz){
2594 if(nnz==1 && h->mb[i*16])
2595 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2596 else
2597 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2598 }
e7e09b49 2599 }
4704097a 2600 }
e7e09b49
LLL
2601 }else{
2602 for(i=0; i<16; i++){
2603 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 2604 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2605 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2606 }
4704097a 2607 }
0da71265
MN
2608 }
2609 }
0da71265 2610
87352549 2611 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
ef9d1d15
LM
2612 uint8_t *dest[2] = {dest_cb, dest_cr};
2613 if(transform_bypass){
2614 idct_add = idct_dc_add = s->dsp.add_pixels4;
2615 }else{
2616 idct_add = s->dsp.h264_idct_add;
2617 idct_dc_add = s->dsp.h264_idct_dc_add;
4691a77d
2618 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2619 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
36940eca 2620 }
bd91fee3 2621 if(is_h264){
ef9d1d15
LM
2622 for(i=16; i<16+8; i++){
2623 if(h->non_zero_count_cache[ scan8[i] ])
2624 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2625 else if(h->mb[i*16])
2626 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
4704097a 2627 }
e7e09b49 2628 }else{
ef9d1d15 2629 for(i=16; i<16+8; i++){
e7e09b49 2630 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
ef9d1d15 2631 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
e7e09b49
LLL
2632 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2633 }
4704097a 2634 }
0da71265
MN
2635 }
2636 }
2637 }
53c05b1e 2638 if(h->deblocking_filter) {
bd91fee3 2639 if (!simple && FRAME_MBAFF) {
5d18eaad
LM
2640 //FIXME try deblocking one mb at a time?
2641 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
6ba71fc4
LLL
2642 const int mb_y = s->mb_y - 1;
2643 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2644 const int mb_xy= mb_x + mb_y*s->mb_stride;
2645 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2646 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
6ba71fc4
LLL
2647 if (!bottom) return;
2648 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2649 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2650 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2651
5d18eaad
LM
2652 if(IS_INTRA(mb_type_top | mb_type_bottom))
2653 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2654
6ba71fc4 2655 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
5d18eaad 2656 // deblock a pair
115329f1 2657 // top
6ba71fc4 2658 s->mb_y--;
a9c9a240 2659 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3b66c4c5 2660 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
4691a77d
2661 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2662 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
6ba71fc4 2663 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
6ba71fc4
LLL
2664 // bottom
2665 s->mb_y++;
a9c9a240 2666 tprintf(h->s.avctx, "call mbaff filter_mb\n");
3b66c4c5 2667 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
4691a77d
2668 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2669 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
6ba71fc4 2670 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2671 } else {
a9c9a240 2672 tprintf(h->s.avctx, "call filter_mb\n");
93cc10fa 2673 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
3b66c4c5 2674 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3e20143e 2675 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2676 }
53c05b1e 2677 }
0da71265
MN
2678}
2679
0da71265 2680/**
bd91fee3
AS
2681 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2682 */
2683static void hl_decode_mb_simple(H264Context *h){
2684 hl_decode_mb_internal(h, 1);
2685}
2686
2687/**
2688 * Process a macroblock; this handles edge cases, such as interlacing.
2689 */
2690static void av_noinline hl_decode_mb_complex(H264Context *h){
2691 hl_decode_mb_internal(h, 0);
2692}
2693
2694static void hl_decode_mb(H264Context *h){
2695 MpegEncContext * const s = &h->s;
2696 const int mb_x= s->mb_x;
2697 const int mb_y= s->mb_y;
2698 const int mb_xy= mb_x + mb_y*s->mb_stride;
2699 const int mb_type= s->current_picture.mb_type[mb_xy];
87352549 2700 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
bd91fee3
AS
2701
2702 if(!s->decode)
2703 return;
2704
2705 if (is_complex)
2706 hl_decode_mb_complex(h);
2707 else hl_decode_mb_simple(h);
2708}
2709
2710/**
0da71265
MN
2711 * fills the default_ref_list.
2712 */
2713static int fill_default_ref_list(H264Context *h){
2714 MpegEncContext * const s = &h->s;
2715 int i;
827c91bf 2716 int smallest_poc_greater_than_current = -1;
17107065 2717 Picture sorted_short_ref[32];
115329f1 2718
0da71265
MN
2719 if(h->slice_type==B_TYPE){
2720 int out_i;
29860cc8 2721 int limit= INT_MIN;
0da71265 2722
827c91bf 2723 /* sort frame according to poc in B slice */
0da71265 2724 for(out_i=0; out_i<h->short_ref_count; out_i++){
29860cc8 2725 int best_i=INT_MIN;
792bb815 2726 int best_poc=INT_MAX;
0da71265
MN
2727
2728 for(i=0; i<h->short_ref_count; i++){
2729 const int poc= h->short_ref[i]->poc;
2730 if(poc > limit && poc < best_poc){
2731 best_poc= poc;
2732 best_i= i;
2733 }
2734 }
115329f1 2735
29860cc8 2736 assert(best_i != INT_MIN);
115329f1 2737
0da71265
MN
2738 limit= best_poc;
2739 sorted_short_ref[out_i]= *h->short_ref[best_i];
a9c9a240 2740 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
827c91bf
LLL
2741 if (-1 == smallest_poc_greater_than_current) {
2742 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2743 smallest_poc_greater_than_current = out_i;
2744 }
2745 }
0da71265
MN
2746 }
2747 }
2748
2749 if(s->picture_structure == PICT_FRAME){
2750 if(h->slice_type==B_TYPE){
0da71265 2751 int list;
a9c9a240 2752 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
0da71265 2753
827c91bf 2754 // find the largest poc
0da71265 2755 for(list=0; list<2; list++){
827c91bf 2756 int index = 0;
3b33943e
MN
2757 int j= -99;
2758 int step= list ? -1 : 1;
827c91bf 2759
3b33943e
MN
2760 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2761 while(j<0 || j>= h->short_ref_count){
6ab87211
MN
2762 if(j != -99 && step == (list ? -1 : 1))
2763 return -1;
3b33943e
MN
2764 step = -step;
2765 j= smallest_poc_greater_than_current + (step>>1);
0da71265 2766 }
3b33943e
MN
2767 if(sorted_short_ref[j].reference != 3) continue;
2768 h->default_ref_list[list][index ]= sorted_short_ref[j];
2769 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
0da71265
MN
2770 }
2771
827c91bf
LLL
2772 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2773 if(h->long_ref[i] == NULL) continue;
0da71265
MN
2774 if(h->long_ref[i]->reference != 3) continue;
2775
2776 h->default_ref_list[ list ][index ]= *h->long_ref[i];
2777 h->default_ref_list[ list ][index++].pic_id= i;;
2778 }
115329f1 2779
3b33943e 2780 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
827c91bf
LLL
2781 // swap the two first elements of L1 when
2782 // L0 and L1 are identical
0da71265
MN
2783 Picture temp= h->default_ref_list[1][0];
2784 h->default_ref_list[1][0] = h->default_ref_list[1][1];
5cb46bc7 2785 h->default_ref_list[1][1] = temp;
0da71265
MN
2786 }
2787
2788 if(index < h->ref_count[ list ])
2789 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
2790 }
2791 }else{
2792 int index=0;
2f944356 2793 for(i=0; i<h->short_ref_count; i++){
0da71265
MN
2794 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
2795 h->default_ref_list[0][index ]= *h->short_ref[i];
2796 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2797 }
2f944356 2798 for(i = 0; i < 16; i++){
827c91bf 2799 if(h->long_ref[i] == NULL) continue;
0da71265
MN
2800 if(h->long_ref[i]->reference != 3) continue;
2801 h->default_ref_list[0][index ]= *h->long_ref[i];
2802 h->default_ref_list[0][index++].pic_id= i;;
2803 }
2804 if(index < h->ref_count[0])
2805 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2806 }
2807 }else{ //FIELD
2808 if(h->slice_type==B_TYPE){
2809 }else{
2810 //FIXME second field balh
2811 }
2812 }
827c91bf
LLL
2813#ifdef TRACE
2814 for (i=0; i<h->ref_count[0]; i++) {
a9c9a240 2815 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
827c91bf
LLL
2816 }
2817 if(h->slice_type==B_TYPE){
2818 for (i=0; i<h->ref_count[1]; i++) {
a9c9a240 2819 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
827c91bf
LLL
2820 }
2821 }
2822#endif
0da71265
MN
2823 return 0;
2824}
2825
827c91bf
LLL
2826static void print_short_term(H264Context *h);
2827static void print_long_term(H264Context *h);
2828
0da71265
MN
2829static int decode_ref_pic_list_reordering(H264Context *h){
2830 MpegEncContext * const s = &h->s;
6ab87211 2831 int list, index;
115329f1 2832
827c91bf
LLL
2833 print_short_term(h);
2834 print_long_term(h);
3b66c4c5 2835 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
115329f1 2836
3425501d 2837 for(list=0; list<h->list_count; list++){
0da71265
MN
2838 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2839
2840 if(get_bits1(&s->gb)){
2841 int pred= h->curr_pic_num;
0da71265
MN
2842
2843 for(index=0; ; index++){
88e7a4d1
MN
2844 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2845 unsigned int pic_id;
0da71265 2846 int i;
2f944356 2847 Picture *ref = NULL;
115329f1
DB
2848
2849 if(reordering_of_pic_nums_idc==3)
0bc42cad 2850 break;
115329f1 2851
0da71265 2852 if(index >= h->ref_count[list]){
9b879566 2853 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
0da71265
MN
2854 return -1;
2855 }
115329f1 2856
0da71265
MN
2857 if(reordering_of_pic_nums_idc<3){
2858 if(reordering_of_pic_nums_idc<2){
88e7a4d1 2859 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
0da71265
MN
2860
2861 if(abs_diff_pic_num >= h->max_pic_num){
9b879566 2862 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
0da71265
MN
2863 return -1;
2864 }
2865
2866 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2867 else pred+= abs_diff_pic_num;
2868 pred &= h->max_pic_num - 1;
115329f1 2869
0d175622
MN
2870 for(i= h->short_ref_count-1; i>=0; i--){
2871 ref = h->short_ref[i];
2872 assert(ref->reference == 3);
2873 assert(!ref->long_ref);
2874 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
0da71265
MN
2875 break;
2876 }
0d175622
MN
2877 if(i>=0)
2878 ref->pic_id= ref->frame_num;
0da71265
MN
2879 }else{
2880 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
88e7a4d1
MN
2881 if(pic_id>31){
2882 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2883 return -1;
2884 }
0d175622 2885 ref = h->long_ref[pic_id];
ac658be5
FOL
2886 if(ref){
2887 ref->pic_id= pic_id;
2888 assert(ref->reference == 3);
2889 assert(ref->long_ref);
2890 i=0;
2891 }else{
2892 i=-1;
2893 }
0da71265
MN
2894 }
2895
0d315f28 2896 if (i < 0) {
9b879566 2897 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
0da71265 2898 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
0d175622
MN
2899 } else {
2900 for(i=index; i+1<h->ref_count[list]; i++){
2901 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2902 break;
21be92bf
MN
2903 }
2904 for(; i > index; i--){
2905 h->ref_list[list][i]= h->ref_list[list][i-1];
2906 }
0d175622 2907 h->ref_list[list][index]= *ref;
0da71265 2908 }
0bc42cad 2909 }else{
9b879566 2910 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
0da71265
MN
2911 return -1;
2912 }
2913 }
2914 }
0da71265 2915 }
3425501d 2916 for(list=0; list<h->list_count; list++){
6ab87211
MN
2917 for(index= 0; index < h->ref_count[list]; index++){
2918 if(!h->ref_list[list][index].data[0])
2919 h->ref_list[list][index]= s->current_picture;
2920 }
6ab87211 2921 }
115329f1 2922
5ad984c9
LM
2923 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
2924 direct_dist_scale_factor(h);
2f944356 2925 direct_ref_list_init(h);
115329f1 2926 return 0;
0da71265
MN
2927}
2928
91c58c94 2929static void fill_mbaff_ref_list(H264Context *h){
5d18eaad 2930 int list, i, j;
3425501d 2931 for(list=0; list<2; list++){ //FIXME try list_count
5d18eaad
LM
2932 for(i=0; i<h->ref_count[list]; i++){
2933 Picture *frame = &h->ref_list[list][i];
2934 Picture *field = &h->ref_list[list][16+2*i];
2935 field[0] = *frame;
2936 for(j=0; j<3; j++)
2937 field[0].linesize[j] <<= 1;
2938 field[1] = field[0];
2939 for(j=0; j<3; j++)
2940 field[1].data[j] += frame->linesize[j];
2941
2942 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2943 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2944 for(j=0; j<2; j++){
2945 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2946 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2947 }
2948 }
2949 }
2950 for(j=0; j<h->ref_count[1]; j++){
2951 for(i=0; i<h->ref_count[0]; i++)
2952 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2953 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2954 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2955 }
2956}
2957
0da71265
MN
2958static int pred_weight_table(H264Context *h){
2959 MpegEncContext * const s = &h->s;
2960 int list, i;
9f2d1b4f 2961 int luma_def, chroma_def;
115329f1 2962
9f2d1b4f
LM
2963 h->use_weight= 0;
2964 h->use_weight_chroma= 0;
0da71265
MN
2965 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2966 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
2967 luma_def = 1<<h->luma_log2_weight_denom;
2968 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
2969
2970 for(list=0; list<2; list++){
2971 for(i=0; i<h->ref_count[list]; i++){
2972 int luma_weight_flag, chroma_weight_flag;
115329f1 2973
0da71265
MN
2974 luma_weight_flag= get_bits1(&s->gb);
2975 if(luma_weight_flag){
2976 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2977 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f
LM
2978 if( h->luma_weight[list][i] != luma_def
2979 || h->luma_offset[list][i] != 0)
2980 h->use_weight= 1;
2981 }else{
2982 h->luma_weight[list][i]= luma_def;
2983 h->luma_offset[list][i]= 0;
0da71265
MN
2984 }
2985
2986 chroma_weight_flag= get_bits1(&s->gb);
2987 if(chroma_weight_flag){
2988 int j;
2989 for(j=0; j<2; j++){
2990 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2991 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
9f2d1b4f
LM
2992 if( h->chroma_weight[list][i][j] != chroma_def
2993 || h->chroma_offset[list][i][j] != 0)
2994 h->use_weight_chroma= 1;
2995 }
2996 }else{
2997 int j;
2998 for(j=0; j<2; j++){
2999 h->chroma_weight[list][i][j]= chroma_def;
3000 h->chroma_offset[list][i][j]= 0;
0da71265
MN
3001 }
3002 }
3003 }
3004 if(h->slice_type != B_TYPE) break;
3005 }
9f2d1b4f 3006 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
3007 return 0;
3008}
3009
9f2d1b4f
LM
3010static void implicit_weight_table(H264Context *h){
3011 MpegEncContext * const s = &h->s;
9f2d1b4f
LM
3012 int ref0, ref1;
3013 int cur_poc = s->current_picture_ptr->poc;
3014
3015 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3016 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3017 h->use_weight= 0;
3018 h->use_weight_chroma= 0;
3019 return;
3020 }
3021
3022 h->use_weight= 2;
3023 h->use_weight_chroma= 2;
3024 h->luma_log2_weight_denom= 5;
3025 h->chroma_log2_weight_denom= 5;
3026
9f2d1b4f
LM
3027 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3028 int poc0 = h->ref_list[0][ref0].poc;
3029 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 3030 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 3031 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 3032 if(td){
f66e4f5f 3033 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 3034 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 3035 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
3036 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3037 h->implicit_weight[ref0][ref1] = 32;
3038 else
3039 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3040 }else
3041 h->implicit_weight[ref0][ref1] = 32;
3042 }
3043 }
3044}
3045
4e4d983e
LM
3046static inline void unreference_pic(H264Context *h, Picture *pic){
3047 int i;
3048 pic->reference=0;
3049 if(pic == h->delayed_output_pic)
3050 pic->reference=1;
3051 else{
3052 for(i = 0; h->delayed_pic[i]; i++)
3053 if(pic == h->delayed_pic[i]){
3054 pic->reference=1;
3055 break;
3056 }
3057 }
3058}
3059
0da71265 3060/**
5175b937 3061 * instantaneous decoder refresh.
0da71265
MN
3062 */
3063static void idr(H264Context *h){
4e4d983e 3064 int i;
0da71265 3065
dc032f33
LLL
3066 for(i=0; i<16; i++){
3067 if (h->long_ref[i] != NULL) {
4e4d983e 3068 unreference_pic(h, h->long_ref[i]);
dc032f33
LLL
3069 h->long_ref[i]= NULL;
3070 }
0da71265
MN
3071 }
3072 h->long_ref_count=0;
3073
3074 for(i=0; i<h->short_ref_count; i++){
4e4d983e 3075 unreference_pic(h, h->short_ref[i]);
0da71265
MN
3076 h->short_ref[i]= NULL;
3077 }
3078 h->short_ref_count=0;
3079}
3080
7c33ad19
LM
3081/* forget old pics after a seek */
3082static void flush_dpb(AVCodecContext *avctx){
3083 H264Context *h= avctx->priv_data;
3084 int i;
285b570f
LM
3085 for(i=0; i<16; i++) {
3086 if(h->delayed_pic[i])
3087 h->delayed_pic[i]->reference= 0;
7c33ad19 3088 h->delayed_pic[i]= NULL;
285b570f
LM
3089 }
3090 if(h->delayed_output_pic)
3091 h->delayed_output_pic->reference= 0;
7c33ad19
LM
3092 h->delayed_output_pic= NULL;
3093 idr(h);
ca159196
MR
3094 if(h->s.current_picture_ptr)
3095 h->s.current_picture_ptr->reference= 0;
7c33ad19
LM
3096}
3097
0da71265
MN
3098/**
3099 *
3b66c4c5 3100 * @return the removed picture or NULL if an error occurs
0da71265
MN
3101 */
3102static Picture * remove_short(H264Context *h, int frame_num){
1924f3ce 3103 MpegEncContext * const s = &h->s;
0da71265 3104 int i;
115329f1 3105
1924f3ce 3106 if(s->avctx->debug&FF_DEBUG_MMCO)
9b879566 3107 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
115329f1 3108
0da71265
MN
3109 for(i=0; i<h->short_ref_count; i++){
3110 Picture *pic= h->short_ref[i];
1924f3ce 3111 if(s->avctx->debug&FF_DEBUG_MMCO)
9b879566 3112 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
0da71265
MN
3113 if(pic->frame_num == frame_num){
3114 h->short_ref[i]= NULL;
3115 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3116 h->short_ref_count--;
3117 return pic;
3118 }
3119 }
3120 return NULL;
3121}
3122
3123/**
3124 *
3b66c4c5 3125 * @return the removed picture or NULL if an error occurs
0da71265
MN
3126 */
3127static Picture * remove_long(H264Context *h, int i){
3128 Picture *pic;
3129
0da71265 3130 pic= h->long_ref[i];
0da71265 3131 h->long_ref[i]= NULL;
827c91bf 3132 if(pic) h->long_ref_count--;
0da71265
MN
3133
3134 return pic;
3135}
3136
3137/**
827c91bf
LLL
3138 * print short term list
3139 */
3140static void print_short_term(H264Context *h) {
3141 uint32_t i;
3142 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3143 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3144 for(i=0; i<h->short_ref_count; i++){
3145 Picture *pic= h->short_ref[i];
3146 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3147 }
3148 }
3149}
3150
3151/**
3152 * print long term list
3153 */
3154static void print_long_term(H264Context *h) {
3155 uint32_t i;
3156 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3157 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3158 for(i = 0; i < 16; i++){
3159 Picture *pic= h->long_ref[i];
3160 if (pic) {
3161 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3162 }
3163 }
3164 }
3165}
3166
3167/**
0da71265
MN
3168 * Executes the reference picture marking (memory management control operations).
3169 */
3170static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3171 MpegEncContext * const s = &h->s;
827c91bf 3172 int i, j;
0da71265
MN
3173 int current_is_long=0;
3174 Picture *pic;
115329f1 3175
0da71265 3176 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
9b879566 3177 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
115329f1 3178
0da71265
MN
3179 for(i=0; i<mmco_count; i++){
3180 if(s->avctx->debug&FF_DEBUG_MMCO)
9b879566 3181 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
0da71265
MN
3182
3183 switch(mmco[i].opcode){
3184 case MMCO_SHORT2UNUSED:
3185 pic= remove_short(h, mmco[i].short_frame_num);
806bb93f
MN
3186 if(pic)
3187 unreference_pic(h, pic);
3188 else if(s->avctx->debug&FF_DEBUG_MMCO)
3189 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
0da71265
MN
3190 break;
3191 case MMCO_SHORT2LONG:
3192 pic= remove_long(h, mmco[i].long_index);
4e4d983e 3193 if(pic) unreference_pic(h, pic);
115329f1 3194
0da71265 3195 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
ac658be5
FOL
3196 if (h->long_ref[ mmco[i].long_index ]){
3197 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3198 h->long_ref_count++;
3199 }
0da71265
MN
3200 break;
3201 case MMCO_LONG2UNUSED:
3202 pic= remove_long(h, mmco[i].long_index);
806bb93f
MN
3203 if(pic)
3204 unreference_pic(h, pic);
3205 else if(s->avctx->debug&FF_DEBUG_MMCO)
3206 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
0da71265
MN
3207 break;
3208 case MMCO_LONG:
3209 pic= remove_long(h, mmco[i].long_index);
4e4d983e 3210 if(pic) unreference_pic(h, pic);
115329f1 3211
0da71265
MN
3212 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
3213 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3214 h->long_ref_count++;
115329f1 3215
0da71265
MN
3216 current_is_long=1;
3217 break;
3218 case MMCO_SET_MAX_LONG:
3219 assert(mmco[i].long_index <= 16);
827c91bf
LLL
3220 // just remove the long term which index is greater than new max
3221 for(j = mmco[i].long_index; j<16; j++){
3222 pic = remove_long(h, j);
4e4d983e 3223 if (pic) unreference_pic(h, pic);
0da71265
MN
3224 }
3225 break;
3226 case MMCO_RESET:
3227 while(h->short_ref_count){
3228 pic= remove_short(h, h->short_ref[0]->frame_num);
ac658be5 3229 if(pic) unreference_pic(h, pic);
0da71265 3230 }
827c91bf
LLL
3231 for(j = 0; j < 16; j++) {
3232 pic= remove_long(h, j);
4e4d983e 3233 if(pic) unreference_pic(h, pic);
0da71265
MN
3234 }
3235 break;
3236 default: assert(0);
3237 }
3238 }
115329f1 3239
0da71265
MN
3240 if(!current_is_long){
3241 pic= remove_short(h, s->current_picture_ptr->frame_num);
3242 if(pic){
4e4d983e 3243 unreference_pic(h, pic);
9b879566 3244 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
0da71265 3245 }
115329f1 3246
0da71265 3247 if(h->short_ref_count)
1924f3ce
MN
3248 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3249
3250 h->short_ref[0]= s->current_picture_ptr;
0da71265
MN
3251 h->short_ref[0]->long_ref=0;
3252 h->short_ref_count++;
3253 }
115329f1 3254
827c91bf
LLL
3255 print_short_term(h);
3256 print_long_term(h);
115329f1 3257 return 0;
0da71265
MN
3258}
3259
995a30c0 3260static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
0da71265
MN
3261 MpegEncContext * const s = &h->s;
3262 int i;
115329f1 3263
0da71265 3264 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
995a30c0
3265 s->broken_link= get_bits1(gb) -1;
3266 h->mmco[0].long_index= get_bits1(gb) - 1; // current_long_term_idx
0da71265
MN
3267 if(h->mmco[0].long_index == -1)
3268 h->mmco_index= 0;
3269 else{
3270 h->mmco[0].opcode= MMCO_LONG;
3271 h->mmco_index= 1;
115329f1 3272 }
0da71265 3273 }else{
995a30c0 3274 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
115329f1 3275 for(i= 0; i<MAX_MMCO_COUNT; i++) {
995a30c0 3276 MMCOOpcode opcode= get_ue_golomb(gb);
0da71265
MN
3277
3278 h->mmco[i].opcode= opcode;
3279 if(opcode==MMCO_SHO