More approved hunks for VAAPI & our new and cleaner hwaccel API.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
bad5537e 23 * @file libavcodec/h264.c
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
40e5d31b 28#include "internal.h"
0da71265
MN
29#include "dsputil.h"
30#include "avcodec.h"
31#include "mpegvideo.h"
26b4fe82 32#include "h264.h"
0da71265 33#include "h264data.h"
26b4fe82 34#include "h264_parser.h"
0da71265 35#include "golomb.h"
199436b9 36#include "mathops.h"
626464fb 37#include "rectangle.h"
369122dd 38#include "vdpau_internal.h"
0da71265 39
e5017ab8 40#include "cabac.h"
b250f9c6 41#if ARCH_X86
a6493a8f 42#include "x86/h264_i386.h"
52cb7981 43#endif
e5017ab8 44
2848ce84 45//#undef NDEBUG
0da71265
MN
46#include <assert.h>
47
2ddcf84b
JD
48/**
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
51 */
52#define DELAYED_PIC_REF 4
53
0da71265 54static VLC coeff_token_vlc[4];
910e3668
AC
55static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
57
0da71265 58static VLC chroma_dc_coeff_token_vlc;
910e3668
AC
59static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60static const int chroma_dc_coeff_token_vlc_table_size = 256;
0da71265
MN
61
62static VLC total_zeros_vlc[15];
910e3668
AC
63static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64static const int total_zeros_vlc_tables_size = 512;
65
0da71265 66static VLC chroma_dc_total_zeros_vlc[3];
910e3668
AC
67static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68static const int chroma_dc_total_zeros_vlc_tables_size = 8;
0da71265
MN
69
70static VLC run_vlc[6];
910e3668
AC
71static VLC_TYPE run_vlc_tables[6][8][2];
72static const int run_vlc_tables_size = 8;
73
0da71265 74static VLC run7_vlc;
910e3668
AC
75static VLC_TYPE run7_vlc_table[96][2];
76static const int run7_vlc_table_size = 96;
0da71265 77
8b82a956
MN
78static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
6ba71fc4 80static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
3e20143e 81static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
9c0e4624 82static Picture * remove_long(H264Context *h, int i, int ref_mask);
8b82a956 83
849f1035 84static av_always_inline uint32_t pack16to32(int a, int b){
377ec888
MN
85#ifdef WORDS_BIGENDIAN
86 return (b&0xFFFF) + (a<<16);
87#else
88 return (a&0xFFFF) + (b<<16);
89#endif
90}
91
d9ec210b 92static const uint8_t rem6[52]={
acd8d10f
PI
930, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
94};
95
d9ec210b 96static const uint8_t div6[52]={
acd8d10f
PI
970, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
98};
99
89818988 100static const uint8_t left_block_options[4][8]={
143d7f14
PK
101 {0,1,2,3,7,10,8,11},
102 {2,2,3,3,8,11,8,11},
103 {0,0,1,1,7,10,7,10},
104 {0,2,0,2,7,10,7,10}
105};
acd8d10f 106
8140955d
MN
107#define LEVEL_TAB_BITS 8
108static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
109
70abb407 110static void fill_caches(H264Context *h, int mb_type, int for_deblock){
0da71265 111 MpegEncContext * const s = &h->s;
64514ee8 112 const int mb_xy= h->mb_xy;
0da71265
MN
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
89818988 115 const uint8_t * left_block;
02f7695b 116 int topleft_partition= -1;
0da71265
MN
117 int i;
118
36e097bc
JD
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
120
717b1733 121 //FIXME deblocking could skip the intra and nnz parts.
36e097bc 122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
e2e5894a
LM
123 return;
124
2cab6401
DB
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
115329f1 127
6867a90b
LLL
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
143d7f14 131 left_block = left_block_options[0];
5d18eaad 132 if(FRAME_MBAFF){
6867a90b
LLL
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
6f3c50f2
MN
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
6867a90b 142 const int bottom = (s->mb_y & 1);
6f3c50f2 143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
60c6ba7a 144
6f3c50f2 145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
6867a90b
LLL
146 top_xy -= s->mb_stride;
147 }
6f3c50f2 148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
6867a90b 149 topleft_xy -= s->mb_stride;
6f3c50f2 150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
02f7695b 151 topleft_xy += s->mb_stride;
1412060e 152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
02f7695b 153 topleft_partition = 0;
6867a90b 154 }
6f3c50f2 155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
6867a90b
LLL
156 topright_xy -= s->mb_stride;
157 }
6f3c50f2 158 if (left_mb_field_flag != curr_mb_field_flag) {
6867a90b 159 left_xy[1] = left_xy[0] = pair_xy - 1;
6f3c50f2
MN
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
163 } else {
03a035e0 164 left_block= left_block_options[2 - bottom];
6867a90b
LLL
165 }
166 }
0da71265
MN
167 }
168
826de46e
LLL
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
6ba71fc4 172 if(for_deblock){
717b1733
LM
173 topleft_type = 0;
174 topright_type = 0;
b735aeea
MN
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
5d18eaad 178
e248cb60 179 if(MB_MBAFF && !IS_INTRA(mb_type)){
5d18eaad 180 int list;
3425501d 181 for(list=0; list<h->list_count; list++){
e248cb60
MN
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
5d18eaad 185 if(USES_LIST(mb_type,list)){
191e8ca7 186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
5d18eaad 187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
beca9a28 188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
189 ref += h->b8_stride;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
beca9a28 191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
192 }
193 }
194 }
46f2f05f
MN
195 }else{
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
0da71265
MN
201
202 if(IS_INTRA(mb_type)){
faa7e394 203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
115329f1
DB
204 h->topleft_samples_available=
205 h->top_samples_available=
0da71265
MN
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
208
faa7e394 209 if(!(top_type & type_mask)){
0da71265
MN
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
213 }
d1d10e91
MN
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
faa7e394 216 if(!(left_type[0] & type_mask)){
d1d10e91
MN
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
219 }
faa7e394 220 if(!(left_type[1] & type_mask)){
d1d10e91
MN
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
223 }
224 }else{
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
faa7e394 228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
d1d10e91
MN
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
231 }
232 }
233 }else{
faa7e394 234 if(!(left_type[0] & type_mask)){
0da71265
MN
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
237 }
238 }
115329f1 239
faa7e394 240 if(!(topleft_type & type_mask))
0da71265 241 h->topleft_samples_available&= 0x7FFF;
115329f1 242
faa7e394 243 if(!(topright_type & type_mask))
0da71265 244 h->topright_samples_available&= 0xFBFF;
115329f1 245
0da71265
MN
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252 }else{
253 int pred;
faa7e394 254 if(!(top_type & type_mask))
0da71265 255 pred= -1;
6fbcaaa0
LLL
256 else{
257 pred= 2;
0da71265
MN
258 }
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
263 }
264 for(i=0; i<2; i++){
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268 }else{
269 int pred;
faa7e394 270 if(!(left_type[i] & type_mask))
0da71265 271 pred= -1;
6fbcaaa0
LLL
272 else{
273 pred= 2;
0da71265
MN
274 }
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
277 }
278 }
279 }
280 }
29671011 281 }
115329f1
DB
282
283
0da71265 284/*
115329f1
DB
2850 . T T. T T T T
2861 L . .L . . . .
2872 L . .L . . . .
2883 . T TL . . . .
2894 L . .L . . . .
2905 L . .. . . . .
0da71265 291*/
1412060e 292//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
0da71265 293 if(top_type){
6867a90b
LLL
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
53c05b1e 297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
115329f1 298
6867a90b 299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
53c05b1e 300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
115329f1 301
6867a90b 302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
53c05b1e 303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
115329f1 304
0da71265 305 }else{
115329f1 306 h->non_zero_count_cache[4+8*0]=
0da71265
MN
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
115329f1 310
0da71265
MN
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
115329f1 313
0da71265 314 h->non_zero_count_cache[1+8*3]=
3981c385 315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
115329f1 316
0da71265 317 }
826de46e 318
6867a90b
LLL
319 for (i=0; i<2; i++) {
320 if(left_type[i]){
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
6867a90b 325 }else{
115329f1
DB
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
6867a90b 329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
826de46e
LLL
330 }
331 }
332
333 if( h->pps.cabac ) {
334 // top_cbp
335 if(top_type) {
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
338 h->top_cbp = 0x1C0;
339 } else {
340 h->top_cbp = 0;
341 }
342 // left_cbp
343 if (left_type[0]) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
346 h->left_cbp = 0x1C0;
347 } else {
348 h->left_cbp = 0;
349 }
350 if (left_type[0]) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
352 }
353 if (left_type[1]) {
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
6867a90b 355 }
0da71265 356 }
6867a90b 357
0da71265 358#if 1
e2e5894a 359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
0da71265 360 int list;
3425501d 361 for(list=0; list<h->list_count; list++){
e2e5894a 362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
0da71265
MN
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
367 }*/
5ad984c9 368 continue;
0da71265
MN
369 }
370 h->mv_cache_clean[list]= 0;
115329f1 371
53b19144 372 if(USES_LIST(top_type, list)){
0da71265
MN
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 }else{
115329f1
DB
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
0da71265
MN
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
389 }
390
4672503d
LM
391 for(i=0; i<2; i++){
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 }else{
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
405 }
0da71265
MN
406 }
407
0281d325 408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
46f2f05f
MN
409 continue;
410
53b19144 411 if(USES_LIST(topleft_type, list)){
02f7695b
LM
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
e2e5894a
LM
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 }else{
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
419 }
115329f1 420
53b19144 421 if(USES_LIST(topright_type, list)){
e2e5894a
LM
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 }else{
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
429 }
e2e5894a 430
ae08a563 431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
717b1733 432 continue;
115329f1
DB
433
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
3b66c4c5 436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
115329f1 437 h->ref_cache[list][scan8[4 ]] =
0da71265
MN
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
3b66c4c5 441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
0da71265
MN
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
9e528114
LA
444
445 if( h->pps.cabac ) {
446 /* XXX beurk, Load mvd */
53b19144 447 if(USES_LIST(top_type, list)){
9e528114
LA
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 }else{
115329f1
DB
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
9e528114
LA
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
458 }
53b19144 459 if(USES_LIST(left_type[0], list)){
9e528114
LA
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 }else{
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
466 }
53b19144 467 if(USES_LIST(left_type[1], list)){
9e528114
LA
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 }else{
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
474 }
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
3b66c4c5 477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
9e528114
LA
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
5ad984c9 480
9f5c1037 481 if(h->slice_type_nos == FF_B_TYPE){
5ad984c9
LM
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
483
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 }else{
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
492 }
115329f1 493
5d18eaad
LM
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 else
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
500
501 if(IS_DIRECT(left_type[1]))
5ad984c9 502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
5d18eaad
LM
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 else
5ad984c9 506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
5d18eaad
LM
507 }
508 }
509
510 if(FRAME_MBAFF){
511#define MAP_MVS\
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 if(MB_FIELD){
523#define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
528 }
529 MAP_MVS
530#undef MAP_F2F
531 }else{
532#define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
5ad984c9 537 }
5d18eaad
LM
538 MAP_MVS
539#undef MAP_F2F
5ad984c9 540 }
9e528114 541 }
0da71265 542 }
0da71265
MN
543 }
544#endif
43efd19a
LM
545
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
0da71265
MN
547}
548
549static inline void write_back_intra_pred_mode(H264Context *h){
64514ee8 550 const int mb_xy= h->mb_xy;
0da71265
MN
551
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
559}
560
561/**
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
563 */
564static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
568 int i;
115329f1 569
0da71265
MN
570 if(!(h->top_samples_available&0x8000)){
571 for(i=0; i<4; i++){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573 if(status<0){
9b879566 574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
575 return -1;
576 } else if(status){
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
578 }
579 }
580 }
115329f1 581
d1d10e91
MN
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
0da71265 584 for(i=0; i<4; i++){
d1d10e91 585 if(!(h->left_samples_available&mask[i])){
26695973
MN
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587 if(status<0){
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
589 return -1;
590 } else if(status){
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
592 }
d1d10e91 593 }
0da71265
MN
594 }
595 }
596
597 return 0;
598} //FIXME cleanup like next
599
600/**
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
602 */
603static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 607
43ff0714 608 if(mode > 6U) {
5175b937 609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 610 return -1;
5175b937 611 }
115329f1 612
0da71265
MN
613 if(!(h->top_samples_available&0x8000)){
614 mode= top[ mode ];
615 if(mode<0){
9b879566 616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
617 return -1;
618 }
619 }
115329f1 620
d1d10e91 621 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 622 mode= left[ mode ];
d1d10e91
MN
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
625 }
0da71265 626 if(mode<0){
9b879566 627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 628 return -1;
115329f1 629 }
0da71265
MN
630 }
631
632 return mode;
633}
634
635/**
636 * gets the predicted intra4x4 prediction mode.
637 */
638static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
643
a9c9a240 644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
0da71265
MN
645
646 if(min<0) return DC_PRED;
647 else return min;
648}
649
650static inline void write_back_non_zero_count(H264Context *h){
64514ee8 651 const int mb_xy= h->mb_xy;
0da71265 652
6867a90b
LLL
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
53c05b1e 656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
6867a90b
LLL
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
115329f1 660
6867a90b 661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
53c05b1e 662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
6867a90b 663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
53c05b1e 664
6867a90b 665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
53c05b1e 666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
6867a90b 667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
0da71265
MN
668}
669
670/**
1412060e 671 * gets the predicted number of non-zero coefficients.
0da71265
MN
672 * @param n block index
673 */
674static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
678 int i= left + top;
115329f1 679
0da71265
MN
680 if(i<64) i= (i+1)>>1;
681
a9c9a240 682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
0da71265
MN
683
684 return i&31;
685}
686
1924f3ce
MN
687static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
a9c9a240 689 MpegEncContext *s = &h->s;
1924f3ce 690
5d18eaad
LM
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
693 if(FRAME_MBAFF){
191e8ca7 694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
5d18eaad
LM
695 const int16_t *mv;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
698
699 if(!MB_FIELD
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
02f7695b 706 if(!USES_LIST(mb_type,list))\
5d18eaad
LM
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
712
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
714 }
715 }
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 if(!MB_FIELD
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
722 }
723 if(MB_FIELD
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 && i >= scan8[0]+8){
1412060e 726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
02f7695b 727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
5d18eaad
LM
728 }
729 }
730#undef SET_DIAG_MV
731 }
732
1924f3ce
MN
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
735 return topright_ref;
736 }else{
a9c9a240 737 tprintf(s->avctx, "topright MV not available\n");
95c26348 738
1924f3ce
MN
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
741 }
742}
743
0da71265
MN
744/**
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
750 */
751static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
0da71265
MN
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
0da71265
MN
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1924f3ce
MN
757 const int16_t * C;
758 int diagonal_ref, match_count;
759
0da71265 760 assert(part_width==1 || part_width==2 || part_width==4);
1924f3ce 761
0da71265 762/* mv_cache
115329f1 763 B . . A T T T T
0da71265
MN
764 U . . L . . , .
765 U . . L . . . .
766 U . . L . . , .
767 . . . L . . . .
768*/
1924f3ce
MN
769
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
a9c9a240 772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1924f3ce
MN
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
777 if(left_ref==ref){
778 *mx= A[0];
115329f1 779 *my= A[1];
1924f3ce
MN
780 }else if(top_ref==ref){
781 *mx= B[0];
115329f1 782 *my= B[1];
0da71265 783 }else{
1924f3ce 784 *mx= C[0];
115329f1 785 *my= C[1];
0da71265
MN
786 }
787 }else{
1924f3ce 788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
0da71265 789 *mx= A[0];
115329f1 790 *my= A[1];
0da71265 791 }else{
1924f3ce
MN
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
0da71265 794 }
0da71265 795 }
115329f1 796
a9c9a240 797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
798}
799
800/**
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
805 */
806static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
807 if(n==0){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
810
a9c9a240 811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
115329f1 812
0da71265
MN
813 if(top_ref == ref){
814 *mx= B[0];
815 *my= B[1];
816 return;
817 }
818 }else{
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
115329f1 821
a9c9a240 822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
823
824 if(left_ref == ref){
825 *mx= A[0];
826 *my= A[1];
827 return;
828 }
829 }
830
831 //RARE
832 pred_motion(h, n, 4, list, ref, mx, my);
833}
834
835/**
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
840 */
841static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
842 if(n==0){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
115329f1 845
a9c9a240 846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
847
848 if(left_ref == ref){
849 *mx= A[0];
850 *my= A[1];
851 return;
852 }
853 }else{
1924f3ce
MN
854 const int16_t * C;
855 int diagonal_ref;
856
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
115329f1 858
a9c9a240 859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265 860
115329f1 861 if(diagonal_ref == ref){
0da71265
MN
862 *mx= C[0];
863 *my= C[1];
864 return;
865 }
0da71265
MN
866 }
867
868 //RARE
869 pred_motion(h, n, 2, list, ref, mx, my);
870}
871
872static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
0da71265
MN
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
875
a9c9a240 876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
0da71265
MN
877
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
62ea19c0
MN
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
115329f1 881
0da71265
MN
882 *mx = *my = 0;
883 return;
884 }
115329f1 885
0da71265
MN
886 pred_motion(h, 0, 4, 0, 0, mx, my);
887
888 return;
889}
890
8b1fd554
MN
891static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
895 return 256;
896 }else{
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
900 }
901}
902
5ad984c9 903static inline void direct_dist_scale_factor(H264Context * const h){
2879c75f
MN
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
5ad984c9 906 const int poc1 = h->ref_list[1][0].poc;
8b1fd554
MN
907 int i, field;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
5ad984c9 913 }
8b1fd554
MN
914
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
5d18eaad 917 }
5ad984c9 918}
f4d3382d
MN
919
920static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
927
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
930
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
934
935 if (!interl)
936 poc |= 3;
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
939
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944 if(rfield == field)
945 map[list][old_ref] = cur_ref;
946 break;
947 }
948 }
949 }
950 }
951}
952
2f944356
LM
953static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
bbc78fb4 957 int list, j, field;
f4d3382d
MN
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
aa617518 960
2f944356 961 for(list=0; list<2; list++){
2879c75f 962 cur->ref_count[sidx][list] = h->ref_count[list];
2f944356 963 for(j=0; j<h->ref_count[list]; j++)
42de393d 964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
2f944356 965 }
aa617518 966
7762cc3d 967 if(s->picture_structure == PICT_FRAME){
f4d3382d
MN
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
7762cc3d 970 }
aa617518 971
48e025e5 972 cur->mbaff= FRAME_MBAFF;
aa617518 973
9701840b 974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
2f944356 975 return;
aa617518 976
2f944356 977 for(list=0; list<2; list++){
f4d3382d
MN
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
2f944356
LM
981 }
982}
5ad984c9
LM
983
984static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
d00eac6c
MN
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
989 int mb_type_col[2];
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
5ad984c9 992 const int is_b8x8 = IS_8X8(*mb_type);
88e7a4d1 993 unsigned int sub_mb_type;
5ad984c9
LM
994 int i8, i4;
995
5d18eaad 996#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
d00eac6c
MN
997
998 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
53c193a9 999 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
471341a7
MN
1000 int cur_poc = s->current_picture_ptr->poc;
1001 int *col_poc = h->ref_list[1]->field_poc;
1002 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1003 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1004 b8_stride = 0;
60c9b24d 1005 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
d00eac6c
MN
1006 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1007 mb_xy += s->mb_stride*fieldoff;
1008 }
1009 goto single_col;
1010 }else{ // AFL/AFR/FR/FL -> AFR/FR
1011 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1012 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1013 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1014 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1015 b8_stride *= 3;
1016 b4_stride *= 6;
1017 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1018 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1019 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1020 && !is_b8x8){
1021 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1022 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1023 }else{
1024 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1025 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1026 }
1027 }else{ // AFR/FR -> AFR/FR
1028single_col:
1029 mb_type_col[0] =
1030 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
cc615d2c
MN
1031 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1032 /* FIXME save sub mb types from previous frames (or derive from MVs)
1033 * so we know exactly what block size to use */
1034 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1035 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1036 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1037 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1038 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1039 }else{
1040 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }
d00eac6c 1043 }
5ad984c9 1044 }
5ad984c9 1045
7d54ecc9
MN
1046 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1047 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1048 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1049 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
9b5fab91
MN
1050 if(!b8_stride){
1051 if(s->mb_y&1){
1052 l1ref0 += h->b8_stride;
1053 l1ref1 += h->b8_stride;
1054 l1mv0 += 2*b4_stride;
1055 l1mv1 += 2*b4_stride;
1056 }
d00eac6c 1057 }
115329f1 1058
5ad984c9
LM
1059 if(h->direct_spatial_mv_pred){
1060 int ref[2];
1061 int mv[2][2];
1062 int list;
1063
5d18eaad
LM
1064 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1065
5ad984c9
LM
1066 /* ref = min(neighbors) */
1067 for(list=0; list<2; list++){
1068 int refa = h->ref_cache[list][scan8[0] - 1];
1069 int refb = h->ref_cache[list][scan8[0] - 8];
1070 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
9bec77fe 1071 if(refc == PART_NOT_AVAILABLE)
5ad984c9 1072 refc = h->ref_cache[list][scan8[0] - 8 - 1];
29d05ebc 1073 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
5ad984c9
LM
1074 if(ref[list] < 0)
1075 ref[list] = -1;
1076 }
1077
1078 if(ref[0] < 0 && ref[1] < 0){
1079 ref[0] = ref[1] = 0;
1080 mv[0][0] = mv[0][1] =
1081 mv[1][0] = mv[1][1] = 0;
1082 }else{
1083 for(list=0; list<2; list++){
1084 if(ref[list] >= 0)
1085 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1086 else
1087 mv[list][0] = mv[list][1] = 0;
1088 }
1089 }
1090
1091 if(ref[1] < 0){
50b3ab0f
LM
1092 if(!is_b8x8)
1093 *mb_type &= ~MB_TYPE_L1;
1094 sub_mb_type &= ~MB_TYPE_L1;
5ad984c9 1095 }else if(ref[0] < 0){
50b3ab0f
LM
1096 if(!is_b8x8)
1097 *mb_type &= ~MB_TYPE_L0;
1098 sub_mb_type &= ~MB_TYPE_L0;
5ad984c9
LM
1099 }
1100
d00eac6c 1101 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
50b3ab0f
LM
1102 for(i8=0; i8<4; i8++){
1103 int x8 = i8&1;
1104 int y8 = i8>>1;
1105 int xy8 = x8+y8*b8_stride;
1106 int xy4 = 3*x8+y8*b4_stride;
1107 int a=0, b=0;
1108
1109 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1110 continue;
1111 h->sub_mb_type[i8] = sub_mb_type;
1112
1113 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1114 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
d00eac6c 1115 if(!IS_INTRA(mb_type_col[y8])
50b3ab0f
LM
1116 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1117 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1118 if(ref[0] > 0)
1119 a= pack16to32(mv[0][0],mv[0][1]);
1120 if(ref[1] > 0)
1121 b= pack16to32(mv[1][0],mv[1][1]);
1122 }else{
1123 a= pack16to32(mv[0][0],mv[0][1]);
1124 b= pack16to32(mv[1][0],mv[1][1]);
1125 }
1126 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1127 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1128 }
1129 }else if(IS_16X16(*mb_type)){
d19f5acb
MN
1130 int a=0, b=0;
1131
cec93959
LM
1132 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1133 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
d00eac6c 1134 if(!IS_INTRA(mb_type_col[0])
c26abfa5
DB
1135 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1136 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
bf4e3bd2 1137 && (h->x264_build>33 || !h->x264_build)))){
5ad984c9 1138 if(ref[0] > 0)
d19f5acb 1139 a= pack16to32(mv[0][0],mv[0][1]);
5ad984c9 1140 if(ref[1] > 0)
d19f5acb 1141 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1142 }else{
d19f5acb
MN
1143 a= pack16to32(mv[0][0],mv[0][1]);
1144 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1145 }
d19f5acb
MN
1146 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1147 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
5ad984c9
LM
1148 }else{
1149 for(i8=0; i8<4; i8++){
1150 const int x8 = i8&1;
1151 const int y8 = i8>>1;
115329f1 1152
5ad984c9
LM
1153 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1154 continue;
1155 h->sub_mb_type[i8] = sub_mb_type;
115329f1 1156
5ad984c9
LM
1157 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1158 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
cec93959
LM
1159 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1160 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
115329f1 1161
5ad984c9 1162 /* col_zero_flag */
2ccd25d0
MN
1163 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1164 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
bf4e3bd2 1165 && (h->x264_build>33 || !h->x264_build)))){
2ccd25d0 1166 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
f1f17e54 1167 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1168 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
c26abfa5 1169 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
f1f17e54
LM
1170 if(ref[0] == 0)
1171 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1172 if(ref[1] == 0)
1173 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1174 }
1175 }else
5ad984c9 1176 for(i4=0; i4<4; i4++){
2ccd25d0 1177 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
c26abfa5 1178 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
5ad984c9
LM
1179 if(ref[0] == 0)
1180 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1181 if(ref[1] == 0)
1182 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1183 }
1184 }
1185 }
1186 }
1187 }
1188 }else{ /* direct temporal mv pred */
5d18eaad
LM
1189 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1190 const int *dist_scale_factor = h->dist_scale_factor;
f4d3382d 1191 int ref_offset= 0;
5d18eaad 1192
cc615d2c 1193 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
8b1fd554
MN
1194 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1195 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1196 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
cc615d2c 1197 }
48e025e5 1198 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
f4d3382d 1199 ref_offset += 16;
48e025e5 1200
cc615d2c
MN
1201 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1202 /* FIXME assumes direct_8x8_inference == 1 */
c210fa61 1203 int y_shift = 2*!IS_INTERLACED(*mb_type);
5d18eaad 1204
cc615d2c
MN
1205 for(i8=0; i8<4; i8++){
1206 const int x8 = i8&1;
1207 const int y8 = i8>>1;
1208 int ref0, scale;
1209 const int16_t (*l1mv)[2]= l1mv0;
5d18eaad 1210
cc615d2c
MN
1211 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1212 continue;
1213 h->sub_mb_type[i8] = sub_mb_type;
1214
1215 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1216 if(IS_INTRA(mb_type_col[y8])){
1217 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1219 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1220 continue;
1221 }
1222
1223 ref0 = l1ref0[x8 + y8*b8_stride];
1224 if(ref0 >= 0)
f4d3382d 1225 ref0 = map_col_to_list0[0][ref0 + ref_offset];
cc615d2c 1226 else{
f4d3382d 1227 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
cc615d2c
MN
1228 l1mv= l1mv1;
1229 }
1230 scale = dist_scale_factor[ref0];
1231 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1232
1233 {
1234 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1235 int my_col = (mv_col[1]<<y_shift)/2;
1236 int mx = (scale * mv_col[0] + 128) >> 8;
1237 int my = (scale * my_col + 128) >> 8;
1238 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1239 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
5d18eaad 1240 }
5d18eaad 1241 }
cc615d2c
MN
1242 return;
1243 }
5d18eaad
LM
1244
1245 /* one-to-one mv scaling */
1246
5ad984c9 1247 if(IS_16X16(*mb_type)){
fda51641
MN
1248 int ref, mv0, mv1;
1249
5ad984c9 1250 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
d00eac6c 1251 if(IS_INTRA(mb_type_col[0])){
fda51641 1252 ref=mv0=mv1=0;
5ad984c9 1253 }else{
f4d3382d
MN
1254 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1255 : map_col_to_list0[1][l1ref1[0] + ref_offset];
5d18eaad 1256 const int scale = dist_scale_factor[ref0];
8583bef8 1257 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
5ad984c9 1258 int mv_l0[2];
5d18eaad
LM
1259 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1260 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
fda51641
MN
1261 ref= ref0;
1262 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1263 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
5ad984c9 1264 }
fda51641
MN
1265 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1266 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1267 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
5ad984c9
LM
1268 }else{
1269 for(i8=0; i8<4; i8++){
1270 const int x8 = i8&1;
1271 const int y8 = i8>>1;
5d18eaad 1272 int ref0, scale;
bf4e3bd2 1273 const int16_t (*l1mv)[2]= l1mv0;
8583bef8 1274
5ad984c9
LM
1275 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1276 continue;
1277 h->sub_mb_type[i8] = sub_mb_type;
5d18eaad 1278 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
d00eac6c 1279 if(IS_INTRA(mb_type_col[0])){
5ad984c9 1280 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1281 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1282 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1283 continue;
1284 }
115329f1 1285
f4d3382d 1286 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
2f944356 1287 if(ref0 >= 0)
5d18eaad 1288 ref0 = map_col_to_list0[0][ref0];
8583bef8 1289 else{
f4d3382d 1290 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
8583bef8
MN
1291 l1mv= l1mv1;
1292 }
5d18eaad 1293 scale = dist_scale_factor[ref0];
115329f1 1294
5ad984c9 1295 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
f1f17e54 1296 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1297 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
5d18eaad
LM
1298 int mx = (scale * mv_col[0] + 128) >> 8;
1299 int my = (scale * mv_col[1] + 128) >> 8;
f1f17e54
LM
1300 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1301 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1302 }else
5ad984c9 1303 for(i4=0; i4<4; i4++){
2ccd25d0 1304 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
5ad984c9 1305 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
5d18eaad
LM
1306 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1307 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
5ad984c9
LM
1308 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1309 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1310 }
1311 }
1312 }
1313 }
1314}
1315
0da71265
MN
1316static inline void write_back_motion(H264Context *h, int mb_type){
1317 MpegEncContext * const s = &h->s;
0da71265
MN
1318 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1319 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1320 int list;
1321
2ea39252
LM
1322 if(!USES_LIST(mb_type, 0))
1323 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1324
3425501d 1325 for(list=0; list<h->list_count; list++){
0da71265 1326 int y;
53b19144 1327 if(!USES_LIST(mb_type, list))
5ad984c9 1328 continue;
115329f1 1329
0da71265
MN
1330 for(y=0; y<4; y++){
1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1332 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1333 }
9e528114 1334 if( h->pps.cabac ) {
e6e77eb6
LM
1335 if(IS_SKIP(mb_type))
1336 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1337 else
9e528114
LA
1338 for(y=0; y<4; y++){
1339 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1340 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1341 }
1342 }
53b19144
LM
1343
1344 {
191e8ca7 1345 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
53b19144
LM
1346 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1347 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1348 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1349 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
0da71265
MN
1350 }
1351 }
115329f1 1352
9f5c1037 1353 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
5ad984c9 1354 if(IS_8X8(mb_type)){
53b19144
LM
1355 uint8_t *direct_table = &h->direct_table[b8_xy];
1356 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1357 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1358 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
5ad984c9
LM
1359 }
1360 }
0da71265
MN
1361}
1362
1790a5e9 1363const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
1364 int i, si, di;
1365 uint8_t *dst;
24456882 1366 int bufidx;
0da71265 1367
bb270c08 1368// src[0]&0x80; //forbidden bit
0da71265
MN
1369 h->nal_ref_idc= src[0]>>5;
1370 h->nal_unit_type= src[0]&0x1F;
1371
1372 src++; length--;
115329f1 1373#if 0
0da71265
MN
1374 for(i=0; i<length; i++)
1375 printf("%2X ", src[i]);
1376#endif
e08715d3 1377
b250f9c6
AJ
1378#if HAVE_FAST_UNALIGNED
1379# if HAVE_FAST_64BIT
e08715d3
MN
1380# define RS 7
1381 for(i=0; i+1<length; i+=9){
3878be31 1382 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
e08715d3
MN
1383# else
1384# define RS 3
1385 for(i=0; i+1<length; i+=5){
3878be31 1386 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
e08715d3
MN
1387# endif
1388 continue;
1389 if(i>0 && !src[i]) i--;
1390 while(src[i]) i++;
1391#else
1392# define RS 0
0da71265
MN
1393 for(i=0; i+1<length; i+=2){
1394 if(src[i]) continue;
1395 if(i>0 && src[i-1]==0) i--;
e08715d3 1396#endif
0da71265
MN
1397 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1398 if(src[i+2]!=3){
1399 /* startcode, so we must be past the end */
1400 length=i;
1401 }
1402 break;
1403 }
abb27cfb 1404 i-= RS;
0da71265
MN
1405 }
1406
1407 if(i>=length-1){ //no escaped 0
1408 *dst_length= length;
1409 *consumed= length+1; //+1 for the header
115329f1 1410 return src;
0da71265
MN
1411 }
1412
24456882 1413 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
d4369630 1414 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 1415 dst= h->rbsp_buffer[bufidx];
0da71265 1416
ac658be5
FOL
1417 if (dst == NULL){
1418 return NULL;
1419 }
1420
3b66c4c5 1421//printf("decoding esc\n");
593af7cd
MN
1422 memcpy(dst, src, i);
1423 si=di=i;
1424 while(si+2<length){
0da71265 1425 //remove escapes (very rare 1:2^22)
593af7cd
MN
1426 if(src[si+2]>3){
1427 dst[di++]= src[si++];
1428 dst[di++]= src[si++];
1429 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
1430 if(src[si+2]==3){ //escape
1431 dst[di++]= 0;
1432 dst[di++]= 0;
1433 si+=3;
c8470cc1 1434 continue;
0da71265 1435 }else //next start code
593af7cd 1436 goto nsc;
0da71265
MN
1437 }
1438
1439 dst[di++]= src[si++];
1440 }
593af7cd
MN
1441 while(si<length)
1442 dst[di++]= src[si++];
1443nsc:
0da71265 1444
d4369630
AS
1445 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1446
0da71265
MN
1447 *dst_length= di;
1448 *consumed= si + 1;//+1 for the header
90b5b51e 1449//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
1450 return dst;
1451}
1452
1790a5e9 1453int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
1454 int v= *src;
1455 int r;
1456
a9c9a240 1457 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
1458
1459 for(r=1; r<9; r++){
1460 if(v&1) return r;
1461 v>>=1;
1462 }
1463 return 0;
1464}
1465
1466/**
1412060e 1467 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
1468 * @param qp quantization parameter
1469 */
239ea04c 1470static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1471#define stride 16
1472 int i;
1473 int temp[16]; //FIXME check if this is a good idea
1474 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1475 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1476
1477//memset(block, 64, 2*256);
1478//return;
1479 for(i=0; i<4; i++){
1480 const int offset= y_offset[i];
1481 const int z0= block[offset+stride*0] + block[offset+stride*4];
1482 const int z1= block[offset+stride*0] - block[offset+stride*4];
1483 const int z2= block[offset+stride*1] - block[offset+stride*5];
1484 const int z3= block[offset+stride*1] + block[offset+stride*5];
1485
1486 temp[4*i+0]= z0+z3;
1487 temp[4*i+1]= z1+z2;
1488 temp[4*i+2]= z1-z2;
1489 temp[4*i+3]= z0-z3;
1490 }
1491
1492 for(i=0; i<4; i++){
1493 const int offset= x_offset[i];
1494 const int z0= temp[4*0+i] + temp[4*2+i];
1495 const int z1= temp[4*0+i] - temp[4*2+i];
1496 const int z2= temp[4*1+i] - temp[4*3+i];
1497 const int z3= temp[4*1+i] + temp[4*3+i];
1498
1412060e 1499 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
1500 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1501 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1502 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
1503 }
1504}
1505
e5017ab8 1506#if 0
0da71265 1507/**
1412060e 1508 * DCT transforms the 16 dc values.
0da71265
MN
1509 * @param qp quantization parameter ??? FIXME
1510 */
1511static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1512// const int qmul= dequant_coeff[qp][0];
1513 int i;
1514 int temp[16]; //FIXME check if this is a good idea
1515 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1516 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1517
1518 for(i=0; i<4; i++){
1519 const int offset= y_offset[i];
1520 const int z0= block[offset+stride*0] + block[offset+stride*4];
1521 const int z1= block[offset+stride*0] - block[offset+stride*4];
1522 const int z2= block[offset+stride*1] - block[offset+stride*5];
1523 const int z3= block[offset+stride*1] + block[offset+stride*5];
1524
1525 temp[4*i+0]= z0+z3;
1526 temp[4*i+1]= z1+z2;
1527 temp[4*i+2]= z1-z2;
1528 temp[4*i+3]= z0-z3;
1529 }
1530
1531 for(i=0; i<4; i++){
1532 const int offset= x_offset[i];
1533 const int z0= temp[4*0+i] + temp[4*2+i];
1534 const int z1= temp[4*0+i] - temp[4*2+i];
1535 const int z2= temp[4*1+i] - temp[4*3+i];
1536 const int z3= temp[4*1+i] + temp[4*3+i];
1537
1538 block[stride*0 +offset]= (z0 + z3)>>1;
1539 block[stride*2 +offset]= (z1 + z2)>>1;
1540 block[stride*8 +offset]= (z1 - z2)>>1;
1541 block[stride*10+offset]= (z0 - z3)>>1;
1542 }
1543}
e5017ab8
LA
1544#endif
1545
0da71265
MN
1546#undef xStride
1547#undef stride
1548
239ea04c 1549static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1550 const int stride= 16*2;
1551 const int xStride= 16;
1552 int a,b,c,d,e;
1553
1554 a= block[stride*0 + xStride*0];
1555 b= block[stride*0 + xStride*1];
1556 c= block[stride*1 + xStride*0];
1557 d= block[stride*1 + xStride*1];
1558
1559 e= a-b;
1560 a= a+b;
1561 b= c-d;
1562 c= c+d;
1563
239ea04c
LM
1564 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1565 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1566 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1567 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
1568}
1569
e5017ab8 1570#if 0
0da71265
MN
1571static void chroma_dc_dct_c(DCTELEM *block){
1572 const int stride= 16*2;
1573 const int xStride= 16;
1574 int a,b,c,d,e;
1575
1576 a= block[stride*0 + xStride*0];
1577 b= block[stride*0 + xStride*1];
1578 c= block[stride*1 + xStride*0];
1579 d= block[stride*1 + xStride*1];
1580
1581 e= a-b;
1582 a= a+b;
1583 b= c-d;
1584 c= c+d;
1585
1586 block[stride*0 + xStride*0]= (a+c);
1587 block[stride*0 + xStride*1]= (e+b);
1588 block[stride*1 + xStride*0]= (a-c);
1589 block[stride*1 + xStride*1]= (e-b);
1590}
e5017ab8 1591#endif
0da71265
MN
1592
1593/**
1594 * gets the chroma qp.
1595 */
4691a77d 1596static inline int get_chroma_qp(H264Context *h, int t, int qscale){
5a78bfbd 1597 return h->pps.chroma_qp_table[t][qscale];
0da71265
MN
1598}
1599
0da71265
MN
1600static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1601 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1602 int src_x_offset, int src_y_offset,
1603 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1604 MpegEncContext * const s = &h->s;
1605 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 1606 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 1607 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
1608 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1609 uint8_t * src_cb, * src_cr;
1610 int extra_width= h->emu_edge_width;
1611 int extra_height= h->emu_edge_height;
0da71265
MN
1612 int emu=0;
1613 const int full_mx= mx>>2;
1614 const int full_my= my>>2;
fbd312fd 1615 const int pic_width = 16*s->mb_width;
0d43dd8c 1616 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 1617
0da71265
MN
1618 if(mx&7) extra_width -= 3;
1619 if(my&7) extra_height -= 3;
115329f1
DB
1620
1621 if( full_mx < 0-extra_width
1622 || full_my < 0-extra_height
1623 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 1624 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
1625 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1626 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
1627 emu=1;
1628 }
115329f1 1629
5d18eaad 1630 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 1631 if(!square){
5d18eaad 1632 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 1633 }
115329f1 1634
49fb20cb 1635 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 1636
0d43dd8c 1637 if(MB_FIELD){
5d18eaad 1638 // chroma offset when predicting from a field of opposite parity
2143b118 1639 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
1640 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1641 }
1642 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1643 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1644
0da71265 1645 if(emu){
5d18eaad 1646 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1647 src_cb= s->edge_emu_buffer;
1648 }
5d18eaad 1649 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1650
1651 if(emu){
5d18eaad 1652 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1653 src_cr= s->edge_emu_buffer;
1654 }
5d18eaad 1655 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1656}
1657
9f2d1b4f 1658static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
1659 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1660 int x_offset, int y_offset,
1661 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1662 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1663 int list0, int list1){
1664 MpegEncContext * const s = &h->s;
1665 qpel_mc_func *qpix_op= qpix_put;
1666 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 1667
5d18eaad
LM
1668 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1669 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1670 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 1671 x_offset += 8*s->mb_x;
0d43dd8c 1672 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1673
0da71265 1674 if(list0){
1924f3ce 1675 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
1676 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1677 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1678 qpix_op, chroma_op);
1679
1680 qpix_op= qpix_avg;
1681 chroma_op= chroma_avg;
1682 }
1683
1684 if(list1){
1924f3ce 1685 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
1686 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1687 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1688 qpix_op, chroma_op);
1689 }
1690}
1691
9f2d1b4f
LM
1692static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1693 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1694 int x_offset, int y_offset,
1695 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1696 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1697 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1698 int list0, int list1){
1699 MpegEncContext * const s = &h->s;
1700
5d18eaad
LM
1701 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1702 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1703 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 1704 x_offset += 8*s->mb_x;
0d43dd8c 1705 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1706
9f2d1b4f
LM
1707 if(list0 && list1){
1708 /* don't optimize for luma-only case, since B-frames usually
1709 * use implicit weights => chroma too. */
1710 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
1711 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1712 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
1713 int refn0 = h->ref_cache[0][ scan8[n] ];
1714 int refn1 = h->ref_cache[1][ scan8[n] ];
1715
1716 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1717 dest_y, dest_cb, dest_cr,
1718 x_offset, y_offset, qpix_put, chroma_put);
1719 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1720 tmp_y, tmp_cb, tmp_cr,
1721 x_offset, y_offset, qpix_put, chroma_put);
1722
1723 if(h->use_weight == 2){
1724 int weight0 = h->implicit_weight[refn0][refn1];
1725 int weight1 = 64 - weight0;
5d18eaad
LM
1726 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1727 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1728 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 1729 }else{
5d18eaad 1730 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 1731 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 1732 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 1733 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1734 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 1735 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 1736 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1737 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 1738 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
1739 }
1740 }else{
1741 int list = list1 ? 1 : 0;
1742 int refn = h->ref_cache[list][ scan8[n] ];
1743 Picture *ref= &h->ref_list[list][refn];
1744 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1745 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1746 qpix_put, chroma_put);
1747
5d18eaad 1748 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
1749 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1750 if(h->use_weight_chroma){
5d18eaad 1751 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 1752 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 1753 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
1754 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1755 }
1756 }
1757}
1758
1759static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1760 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1761 int x_offset, int y_offset,
1762 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1763 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 1764 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
1765 int list0, int list1){
1766 if((h->use_weight==2 && list0 && list1
1767 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1768 || h->use_weight==1)
1769 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1770 x_offset, y_offset, qpix_put, chroma_put,
1771 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1772 else
1773 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1774 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1775}
1776
513fbd8e
LM
1777static inline void prefetch_motion(H264Context *h, int list){
1778 /* fetch pixels for estimated mv 4 macroblocks ahead
1779 * optimized for 64byte cache lines */
1780 MpegEncContext * const s = &h->s;
1781 const int refn = h->ref_cache[list][scan8[0]];
1782 if(refn >= 0){
1783 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1784 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1785 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 1786 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
1787 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1788 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1789 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1790 }
1791}
1792
0da71265
MN
1793static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1794 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
1795 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1796 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 1797 MpegEncContext * const s = &h->s;
64514ee8 1798 const int mb_xy= h->mb_xy;
0da71265 1799 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 1800
0da71265 1801 assert(IS_INTER(mb_type));
115329f1 1802
513fbd8e
LM
1803 prefetch_motion(h, 0);
1804
0da71265
MN
1805 if(IS_16X16(mb_type)){
1806 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1807 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
9f2d1b4f 1808 &weight_op[0], &weight_avg[0],
0da71265
MN
1809 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1810 }else if(IS_16X8(mb_type)){
1811 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1812 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1813 &weight_op[1], &weight_avg[1],
0da71265
MN
1814 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1815 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1816 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1817 &weight_op[1], &weight_avg[1],
0da71265
MN
1818 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1819 }else if(IS_8X16(mb_type)){
5d18eaad 1820 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 1821 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1822 &weight_op[2], &weight_avg[2],
0da71265 1823 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 1824 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 1825 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1826 &weight_op[2], &weight_avg[2],
0da71265
MN
1827 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1828 }else{
1829 int i;
115329f1 1830
0da71265
MN
1831 assert(IS_8X8(mb_type));
1832
1833 for(i=0; i<4; i++){
1834 const int sub_mb_type= h->sub_mb_type[i];
1835 const int n= 4*i;
1836 int x_offset= (i&1)<<2;
1837 int y_offset= (i&2)<<1;
1838
1839 if(IS_SUB_8X8(sub_mb_type)){
1840 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1841 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1842 &weight_op[3], &weight_avg[3],
0da71265
MN
1843 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1844 }else if(IS_SUB_8X4(sub_mb_type)){
1845 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1846 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1847 &weight_op[4], &weight_avg[4],
0da71265
MN
1848 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1849 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1850 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1851 &weight_op[4], &weight_avg[4],
0da71265
MN
1852 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1853 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 1854 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 1855 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1856 &weight_op[5], &weight_avg[5],
0da71265 1857 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 1858 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 1859 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1860 &weight_op[5], &weight_avg[5],
0da71265
MN
1861 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1862 }else{
1863 int j;
1864 assert(IS_SUB_4X4(sub_mb_type));
1865 for(j=0; j<4; j++){
1866 int sub_x_offset= x_offset + 2*(j&1);
1867 int sub_y_offset= y_offset + (j&2);
1868 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1869 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1870 &weight_op[6], &weight_avg[6],
0da71265
MN
1871 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1872 }
1873 }
1874 }
1875 }
513fbd8e
LM
1876
1877 prefetch_motion(h, 1);
0da71265
MN
1878}
1879
8140955d
MN
1880static av_cold void init_cavlc_level_tab(void){
1881 int suffix_length, mask;
1882 unsigned int i;
1883
1884 for(suffix_length=0; suffix_length<7; suffix_length++){
1885 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1886 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1887 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1888
1889 mask= -(level_code&1);
1890 level_code= (((2+level_code)>>1) ^ mask) - mask;
1891 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1892 cavlc_level_tab[suffix_length][i][0]= level_code;
1893 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1894 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1895 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1896 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1897 }else{
1898 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1899 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1900 }
1901 }
1902 }
1903}
1904
98a6fff9 1905static av_cold void decode_init_vlc(void){
0da71265
MN
1906 static int done = 0;
1907
1908 if (!done) {
1909 int i;
910e3668 1910 int offset;
0da71265
MN
1911 done = 1;
1912
910e3668
AC
1913 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1914 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
115329f1 1915 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
0da71265 1916 &chroma_dc_coeff_token_len [0], 1, 1,
910e3668
AC
1917 &chroma_dc_coeff_token_bits[0], 1, 1,
1918 INIT_VLC_USE_NEW_STATIC);
0da71265 1919
910e3668 1920 offset = 0;
0da71265 1921 for(i=0; i<4; i++){
910e3668
AC
1922 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1923 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
115329f1 1924 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
0da71265 1925 &coeff_token_len [i][0], 1, 1,
910e3668
AC
1926 &coeff_token_bits[i][0], 1, 1,
1927 INIT_VLC_USE_NEW_STATIC);
1928 offset += coeff_token_vlc_tables_size[i];
0da71265 1929 }
910e3668
AC
1930 /*
1931 * This is a one time safety check to make sure that
1932 * the packed static coeff_token_vlc table sizes
1933 * were initialized correctly.
1934 */
37d3e066 1935 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
0da71265
MN
1936
1937 for(i=0; i<3; i++){
910e3668
AC
1938 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1939 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1940 init_vlc(&chroma_dc_total_zeros_vlc[i],
1941 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
0da71265 1942 &chroma_dc_total_zeros_len [i][0], 1, 1,
910e3668
AC
1943 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1944 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1945 }
1946 for(i=0; i<15; i++){
910e3668
AC
1947 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1948 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1949 init_vlc(&total_zeros_vlc[i],
1950 TOTAL_ZEROS_VLC_BITS, 16,
0da71265 1951 &total_zeros_len [i][0], 1, 1,
910e3668
AC
1952 &total_zeros_bits[i][0], 1, 1,
1953 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1954 }
1955
1956 for(i=0; i<6; i++){
910e3668
AC
1957 run_vlc[i].table = run_vlc_tables[i];
1958 run_vlc[i].table_allocated = run_vlc_tables_size;
1959 init_vlc(&run_vlc[i],
1960 RUN_VLC_BITS, 7,
0da71265 1961 &run_len [i][0], 1, 1,
910e3668
AC
1962 &run_bits[i][0], 1, 1,
1963 INIT_VLC_USE_NEW_STATIC);
0da71265 1964 }
910e3668
AC
1965 run7_vlc.table = run7_vlc_table,
1966 run7_vlc.table_allocated = run7_vlc_table_size;
115329f1 1967 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
0da71265 1968 &run_len [6][0], 1, 1,
910e3668
AC
1969 &run_bits[6][0], 1, 1,
1970 INIT_VLC_USE_NEW_STATIC);
8140955d
MN
1971
1972 init_cavlc_level_tab();
0da71265
MN
1973 }
1974}
1975
0da71265 1976static void free_tables(H264Context *h){
7978debd 1977 int i;
afebe2f7 1978 H264Context *hx;
0da71265 1979 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
1980 av_freep(&h->chroma_pred_mode_table);
1981 av_freep(&h->cbp_table);
9e528114
LA
1982 av_freep(&h->mvd_table[0]);
1983 av_freep(&h->mvd_table[1]);
5ad984c9 1984 av_freep(&h->direct_table);
0da71265
MN
1985 av_freep(&h->non_zero_count);
1986 av_freep(&h->slice_table_base);
1987 h->slice_table= NULL;
e5017ab8 1988
0da71265
MN
1989 av_freep(&h->mb2b_xy);
1990 av_freep(&h->mb2b8_xy);
9f2d1b4f 1991
afebe2f7
1992 for(i = 0; i < h->s.avctx->thread_count; i++) {
1993 hx = h->thread_context[i];
1994 if(!hx) continue;
1995 av_freep(&hx->top_borders[1]);
1996 av_freep(&hx->top_borders[0]);
1997 av_freep(&hx->s.obmc_scratchpad);
afebe2f7 1998 }
0da71265
MN
1999}
2000
239ea04c
LM
2001static void init_dequant8_coeff_table(H264Context *h){
2002 int i,q,x;
548a1c8a 2003 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
2004 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2005 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2006
2007 for(i=0; i<2; i++ ){
2008 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2009 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2010 break;
2011 }
2012
2013 for(q=0; q<52; q++){
d9ec210b
DP
2014 int shift = div6[q];
2015 int idx = rem6[q];
239ea04c 2016 for(x=0; x<64; x++)
548a1c8a
LM
2017 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2018 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2019 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
2020 }
2021 }
2022}
2023
2024static void init_dequant4_coeff_table(H264Context *h){
2025 int i,j,q,x;
ab2e3e2c 2026 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
2027 for(i=0; i<6; i++ ){
2028 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2029 for(j=0; j<i; j++){
2030 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2031 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2032 break;
2033 }
2034 }
2035 if(j<i)
2036 continue;
2037
2038 for(q=0; q<52; q++){
d9ec210b
DP
2039 int shift = div6[q] + 2;
2040 int idx = rem6[q];
239ea04c 2041 for(x=0; x<16; x++)
ab2e3e2c
LM
2042 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2043 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
2044 h->pps.scaling_matrix4[i][x]) << shift;
2045 }
2046 }
2047}
2048
2049static void init_dequant_tables(H264Context *h){
2050 int i,x;
2051 init_dequant4_coeff_table(h);
2052 if(h->pps.transform_8x8_mode)
2053 init_dequant8_coeff_table(h);
2054 if(h->sps.transform_bypass){
2055 for(i=0; i<6; i++)
2056 for(x=0; x<16; x++)
2057 h->dequant4_coeff[i][0][x] = 1<<6;
2058 if(h->pps.transform_8x8_mode)
2059 for(i=0; i<2; i++)
2060 for(x=0; x<64; x++)
2061 h->dequant8_coeff[i][0][x] = 1<<6;
2062 }
2063}
2064
2065
0da71265
MN
2066/**
2067 * allocates tables.
3b66c4c5 2068 * needs width/height
0da71265
MN
2069 */
2070static int alloc_tables(H264Context *h){
2071 MpegEncContext * const s = &h->s;
7bc9090a 2072 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 2073 int x,y;
0da71265
MN
2074
2075 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
e5017ab8 2076
53c05b1e 2077 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
b735aeea 2078 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
5d0e4cb8 2079 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
0da71265 2080
7526ade2
MN
2081 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2082 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2083 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2084 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
e5017ab8 2085
b735aeea 2086 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 2087 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 2088
a55f20bd
LM
2089 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2090 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
0da71265
MN
2091 for(y=0; y<s->mb_height; y++){
2092 for(x=0; x<s->mb_width; x++){
7bc9090a 2093 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
2094 const int b_xy = 4*x + 4*y*h->b_stride;
2095 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 2096
0da71265
MN
2097 h->mb2b_xy [mb_xy]= b_xy;
2098 h->mb2b8_xy[mb_xy]= b8_xy;
2099 }
2100 }
9f2d1b4f 2101
9c6221ae
GV
2102 s->obmc_scratchpad = NULL;
2103
56edbd81
LM
2104 if(!h->dequant4_coeff[0])
2105 init_dequant_tables(h);
2106
0da71265
MN
2107 return 0;
2108fail:
2109 free_tables(h);
2110 return -1;
2111}
2112
afebe2f7
2113/**
2114 * Mimic alloc_tables(), but for every context thread.
2115 */
2116static void clone_tables(H264Context *dst, H264Context *src){
2117 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2118 dst->non_zero_count = src->non_zero_count;
2119 dst->slice_table = src->slice_table;
2120 dst->cbp_table = src->cbp_table;
2121 dst->mb2b_xy = src->mb2b_xy;
2122 dst->mb2b8_xy = src->mb2b8_xy;
2123 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2124 dst->mvd_table[0] = src->mvd_table[0];
2125 dst->mvd_table[1] = src->mvd_table[1];
2126 dst->direct_table = src->direct_table;
2127
afebe2f7
2128 dst->s.obmc_scratchpad = NULL;
2129 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
2130}
2131
2132/**
2133 * Init context
2134 * Allocate buffers which are not shared amongst multiple threads.
2135 */
2136static int context_init(H264Context *h){
afebe2f7
2137 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2138 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2139
afebe2f7
2140 return 0;
2141fail:
2142 return -1; // free_tables will clean up for us
2143}
2144
98a6fff9 2145static av_cold void common_init(H264Context *h){
0da71265 2146 MpegEncContext * const s = &h->s;
0da71265
MN
2147
2148 s->width = s->avctx->width;
2149 s->height = s->avctx->height;
2150 s->codec_id= s->avctx->codec->id;
115329f1 2151
c92a30bb 2152 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 2153
239ea04c 2154 h->dequant_coeff_pps= -1;
9a41c2c7 2155 s->unrestricted_mv=1;
0da71265 2156 s->decode=1; //FIXME
56edbd81 2157
a5805aa9
MN
2158 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2159
56edbd81
LM
2160 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2161 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
2162}
2163
98a6fff9 2164static av_cold int decode_init(AVCodecContext *avctx){
0da71265
MN
2165 H264Context *h= avctx->priv_data;
2166 MpegEncContext * const s = &h->s;
2167
3edcacde 2168 MPV_decode_defaults(s);
115329f1 2169
0da71265
MN
2170 s->avctx = avctx;
2171 common_init(h);
2172
2173 s->out_format = FMT_H264;
2174 s->workaround_bugs= avctx->workaround_bugs;
2175
2176 // set defaults
0da71265 2177// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 2178 s->quarter_sample = 1;
0da71265 2179 s->low_delay= 1;
7a9dba3c
MN
2180
2181 if(avctx->codec_id == CODEC_ID_SVQ3)
2182 avctx->pix_fmt= PIX_FMT_YUVJ420P;
0d3d172f 2183 else if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
369122dd 2184 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
7a9dba3c 2185 else
1d42f410 2186 avctx->pix_fmt= PIX_FMT_YUV420P;
0da71265 2187
c2212338 2188 decode_init_vlc();
115329f1 2189
26165f99
MR
2190 if(avctx->extradata_size > 0 && avctx->extradata &&
2191 *(char *)avctx->extradata == 1){
4770b1b4
RT
2192 h->is_avc = 1;
2193 h->got_avcC = 0;
26165f99
MR
2194 } else {
2195 h->is_avc = 0;
4770b1b4
RT
2196 }
2197
afebe2f7 2198 h->thread_context[0] = h;
18c7be65 2199 h->outputed_poc = INT_MIN;
e4b8f1fa 2200 h->prev_poc_msb= 1<<16;
37a558fe 2201 h->sei_recovery_frame_cnt = -1;
ff594f81 2202 h->sei_dpb_output_delay = 0;
cf6065ca 2203 h->sei_cpb_removal_delay = -1;
2ea89d92 2204 h->sei_buffering_period_present = 0;
0da71265
MN
2205 return 0;
2206}
2207
af8aa846 2208static int frame_start(H264Context *h){
0da71265
MN
2209 MpegEncContext * const s = &h->s;
2210 int i;
2211
af8aa846
MN
2212 if(MPV_frame_start(s, s->avctx) < 0)
2213 return -1;
0da71265 2214 ff_er_frame_start(s);
3a22d7fa
JD
2215 /*
2216 * MPV_frame_start uses pict_type to derive key_frame.
2217 * This is incorrect for H.264; IDR markings must be used.
1412060e 2218 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
2219 * See decode_nal_units().
2220 */
2221 s->current_picture_ptr->key_frame= 0;
0da71265
MN
2222
2223 assert(s->linesize && s->uvlinesize);
2224
2225 for(i=0; i<16; i++){
2226 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 2227 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2228 }
2229 for(i=0; i<4; i++){
2230 h->block_offset[16+i]=
2231 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
2232 h->block_offset[24+16+i]=
2233 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2234 }
2235
934b0821
LM
2236 /* can't be in alloc_tables because linesize isn't known there.
2237 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
2238 for(i = 0; i < s->avctx->thread_count; i++)
2239 if(!h->thread_context[i]->s.obmc_scratchpad)
2240 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
2241
2242 /* some macroblocks will be accessed before they're available */
afebe2f7 2243 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 2244 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 2245
0da71265 2246// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 2247
1412060e 2248 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
2249 // that if we break out due to an error it can be released automatically
2250 // in the next MPV_frame_start().
2251 // SVQ3 as well as most other codecs have only last/next/current and thus
2252 // get released even with set reference, besides SVQ3 and others do not
2253 // mark frames as reference later "naturally".
2254 if(s->codec_id != CODEC_ID_SVQ3)
2255 s->current_picture_ptr->reference= 0;
357282c6
MN
2256
2257 s->current_picture_ptr->field_poc[0]=
2258 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 2259 assert(s->current_picture_ptr->long_ref==0);
357282c6 2260
af8aa846 2261 return 0;
0da71265
MN
2262}
2263
93cc10fa 2264static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e
MN
2265 MpegEncContext * const s = &h->s;
2266 int i;
5f7f9719
MN
2267 int step = 1;
2268 int offset = 1;
2269 int uvoffset= 1;
2270 int top_idx = 1;
2271 int skiplast= 0;
115329f1 2272
53c05b1e
MN
2273 src_y -= linesize;
2274 src_cb -= uvlinesize;
2275 src_cr -= uvlinesize;
2276
5f7f9719
MN
2277 if(!simple && FRAME_MBAFF){
2278 if(s->mb_y&1){
2279 offset = MB_MBAFF ? 1 : 17;
2280 uvoffset= MB_MBAFF ? 1 : 9;
2281 if(!MB_MBAFF){
2282 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2283 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
49fb20cb 2284 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2286 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2287 }
2288 }
2289 }else{
2290 if(!MB_MBAFF){
2291 h->left_border[0]= h->top_borders[0][s->mb_x][15];
49fb20cb 2292 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2293 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2294 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2295 }
2296 skiplast= 1;
2297 }
2298 offset =
2299 uvoffset=
2300 top_idx = MB_MBAFF ? 0 : 1;
2301 }
2302 step= MB_MBAFF ? 2 : 1;
2303 }
2304
3b66c4c5 2305 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 2306 // and the line above the bottom macroblock
5f7f9719
MN
2307 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2308 for(i=1; i<17 - skiplast; i++){
2309 h->left_border[offset+i*step]= src_y[15+i* linesize];
53c05b1e 2310 }
115329f1 2311
5f7f9719
MN
2312 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2313 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
53c05b1e 2314
49fb20cb 2315 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2316 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2317 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2318 for(i=1; i<9 - skiplast; i++){
2319 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2320 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
53c05b1e 2321 }
5f7f9719
MN
2322 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2323 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
53c05b1e
MN
2324 }
2325}
2326
93cc10fa 2327static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
2328 MpegEncContext * const s = &h->s;
2329 int temp8, i;
2330 uint64_t temp64;
b69378e2
2331 int deblock_left;
2332 int deblock_top;
2333 int mb_xy;
5f7f9719
MN
2334 int step = 1;
2335 int offset = 1;
2336 int uvoffset= 1;
2337 int top_idx = 1;
2338
2339 if(!simple && FRAME_MBAFF){
2340 if(s->mb_y&1){
2341 offset = MB_MBAFF ? 1 : 17;
2342 uvoffset= MB_MBAFF ? 1 : 9;
2343 }else{
2344 offset =
2345 uvoffset=
2346 top_idx = MB_MBAFF ? 0 : 1;
2347 }
2348 step= MB_MBAFF ? 2 : 1;
2349 }
b69378e2
2350
2351 if(h->deblocking_filter == 2) {
64514ee8 2352 mb_xy = h->mb_xy;
b69378e2
2353 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2354 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2355 } else {
2356 deblock_left = (s->mb_x > 0);
6c805007 2357 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 2358 }
53c05b1e
MN
2359
2360 src_y -= linesize + 1;
2361 src_cb -= uvlinesize + 1;
2362 src_cr -= uvlinesize + 1;
2363
2364#define XCHG(a,b,t,xchg)\
2365t= a;\
2366if(xchg)\
2367 a= b;\
2368b= t;
d89dc06a
LM
2369
2370 if(deblock_left){
5f7f9719
MN
2371 for(i = !deblock_top; i<16; i++){
2372 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
d89dc06a 2373 }
5f7f9719 2374 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
d89dc06a
LM
2375 }
2376
2377 if(deblock_top){
5f7f9719
MN
2378 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2379 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
cad4368a 2380 if(s->mb_x+1 < s->mb_width){
5f7f9719 2381 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
43efd19a 2382 }
53c05b1e 2383 }
53c05b1e 2384
49fb20cb 2385 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 2386 if(deblock_left){
5f7f9719
MN
2387 for(i = !deblock_top; i<8; i++){
2388 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2389 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
d89dc06a 2390 }
5f7f9719
MN
2391 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2392 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
d89dc06a
LM
2393 }
2394 if(deblock_top){
5f7f9719
MN
2395 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2396 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
53c05b1e 2397 }
53c05b1e
MN
2398 }
2399}
2400
5a6a6cc7 2401static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
2402 MpegEncContext * const s = &h->s;
2403 const int mb_x= s->mb_x;
2404 const int mb_y= s->mb_y;
64514ee8 2405 const int mb_xy= h->mb_xy;
0da71265
MN
2406 const int mb_type= s->current_picture.mb_type[mb_xy];
2407 uint8_t *dest_y, *dest_cb, *dest_cr;
2408 int linesize, uvlinesize /*dct_offset*/;
2409 int i;
6867a90b 2410 int *block_offset = &h->block_offset[0];
41e4055b 2411 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed 2412 /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb 2413 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca 2414 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 2415 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 2416
6120a343
MN
2417 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2418 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2419 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 2420
a957c27b
LM
2421 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2422 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2423
bd91fee3 2424 if (!simple && MB_FIELD) {
5d18eaad
LM
2425 linesize = h->mb_linesize = s->linesize * 2;
2426 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 2427 block_offset = &h->block_offset[24];
1412060e 2428 if(mb_y&1){ //FIXME move out of this function?
0da71265 2429 dest_y -= s->linesize*15;
6867a90b
LLL
2430 dest_cb-= s->uvlinesize*7;
2431 dest_cr-= s->uvlinesize*7;
0da71265 2432 }
5d18eaad
LM
2433 if(FRAME_MBAFF) {
2434 int list;
3425501d 2435 for(list=0; list<h->list_count; list++){
5d18eaad
LM
2436 if(!USES_LIST(mb_type, list))
2437 continue;
2438 if(IS_16X16(mb_type)){
2439 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 2440 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
2441 }else{
2442 for(i=0; i<16; i+=4){
5d18eaad
LM
2443 int ref = h->ref_cache[list][scan8[i]];
2444 if(ref >= 0)
1710856c 2445 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
2446 }
2447 }
2448 }
2449 }
0da71265 2450 } else {
5d18eaad
LM
2451 linesize = h->mb_linesize = s->linesize;
2452 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
2453// dct_offset = s->linesize * 16;
2454 }
115329f1 2455
bd91fee3 2456 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
2457 for (i=0; i<16; i++) {
2458 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 2459 }
c1708e8d
MN
2460 for (i=0; i<8; i++) {
2461 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2462 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 2463 }
e7e09b49
LLL
2464 } else {
2465 if(IS_INTRA(mb_type)){
5f7f9719 2466 if(h->deblocking_filter)
93cc10fa 2467 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 2468
49fb20cb 2469 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
2470 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2471 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 2472 }
0da71265 2473
e7e09b49 2474 if(IS_INTRA4x4(mb_type)){
bd91fee3 2475 if(simple || !s->encoding){
43efd19a 2476 if(IS_8x8DCT(mb_type)){
1eb96035
MN
2477 if(transform_bypass){
2478 idct_dc_add =
2479 idct_add = s->dsp.add_pixels8;
dae006d7 2480 }else{
1eb96035
MN
2481 idct_dc_add = s->dsp.h264_idct8_dc_add;
2482 idct_add = s->dsp.h264_idct8_add;
2483 }
43efd19a
LM
2484 for(i=0; i<16; i+=4){
2485 uint8_t * const ptr= dest_y + block_offset[i];
2486 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
2487 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2488 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2489 }else{
ac0623b2
MN
2490 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2491 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2492 (h->topright_samples_available<<i)&0x4000, linesize);
2493 if(nnz){
2494 if(nnz == 1 && h->mb[i*16])
2495 idct_dc_add(ptr, h->mb + i*16, linesize);
2496 else
2497 idct_add (ptr, h->mb + i*16, linesize);
2498 }
41e4055b 2499 }
43efd19a 2500 }
1eb96035
MN
2501 }else{
2502 if(transform_bypass){
2503 idct_dc_add =
2504 idct_add = s->dsp.add_pixels4;
2505 }else{
2506 idct_dc_add = s->dsp.h264_idct_dc_add;
2507 idct_add = s->dsp.h264_idct_add;
2508 }
aebb5d6d
MN
2509 for(i=0; i<16; i++){
2510 uint8_t * const ptr= dest_y + block_offset[i];
2511 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 2512
aebb5d6d
MN
2513 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2514 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2515 }else{
2516 uint8_t *topright;
2517 int nnz, tr;
2518 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2519 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2520 assert(mb_y || linesize <= block_offset[i]);
2521 if(!topright_avail){
2522 tr= ptr[3 - linesize]*0x01010101;
2523 topright= (uint8_t*) &tr;
2524 }else
2525 topright= ptr + 4 - linesize;
ac0623b2 2526 }else
aebb5d6d
MN
2527 topright= NULL;
2528
2529 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2530 nnz = h->non_zero_count_cache[ scan8[i] ];
2531 if(nnz){
2532 if(is_h264){
2533 if(nnz == 1 && h->mb[i*16])
2534 idct_dc_add(ptr, h->mb + i*16, linesize);
2535 else
2536 idct_add (ptr, h->mb + i*16, linesize);
2537 }else
2538 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2539 }
ac0623b2 2540 }
41e4055b 2541 }
8b82a956 2542 }
0da71265 2543 }
e7e09b49 2544 }else{
c92a30bb 2545 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 2546 if(is_h264){
36940eca 2547 if(!transform_bypass)
93f0c0a4 2548 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 2549 }else
e7e09b49 2550 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 2551 }
5f7f9719 2552 if(h->deblocking_filter)
93cc10fa 2553 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 2554 }else if(is_h264){
e7e09b49 2555 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
2556 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2557 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 2558 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 2559 }
e7e09b49
LLL
2560
2561
2562 if(!IS_INTRA4x4(mb_type)){
bd91fee3 2563 if(is_h264){
ef9d1d15 2564 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
2565 if(transform_bypass){
2566 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
2567 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2568 }else{
2569 for(i=0; i<16; i++){
2570 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 2571 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2572 }
2fd1f0e0
MN
2573 }
2574 }else{
2575 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 2576 }
49c084a7 2577 }else if(h->cbp&15){
2fd1f0e0 2578 if(transform_bypass){
0a8ca22f 2579 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 2580 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 2581 for(i=0; i<16; i+=di){
62bc966f 2582 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 2583 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2584 }
ef9d1d15 2585 }
2fd1f0e0
MN
2586 }else{
2587 if(IS_8x8DCT(mb_type)){
2588 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2589 }else{
2590 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2591 }
2592 }
4704097a 2593 }
e7e09b49
LLL
2594 }else{
2595 for(i=0; i<16; i++){
2596 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 2597 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2598 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2599 }
4704097a 2600 }
0da71265
MN
2601 }
2602 }
0da71265 2603
49fb20cb 2604 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
2605 uint8_t *dest[2] = {dest_cb, dest_cr};
2606 if(transform_bypass){
96465b90
MN
2607 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2608 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2609 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2610 }else{
c25ac15a 2611 idct_add = s->dsp.add_pixels4;
96465b90
MN
2612 for(i=16; i<16+8; i++){
2613 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2614 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2615 }
2616 }
ef9d1d15 2617 }else{
4691a77d
2618 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2619 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 2620 if(is_h264){
c25ac15a
MN
2621 idct_add = s->dsp.h264_idct_add;
2622 idct_dc_add = s->dsp.h264_idct_dc_add;
ac0623b2
MN
2623 for(i=16; i<16+8; i++){
2624 if(h->non_zero_count_cache[ scan8[i] ])
2625 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2626 else if(h->mb[i*16])
2627 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2628 }
aebb5d6d
MN
2629 }else{
2630 for(i=16; i<16+8; i++){
2631 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2632 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2633 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2634 }
e7e09b49 2635 }
4704097a 2636 }
0da71265
MN
2637 }
2638 }
2639 }
c212fb0c
MN
2640 if(h->cbp || IS_INTRA(mb_type))
2641 s->dsp.clear_blocks(h->mb);
2642
53c05b1e 2643 if(h->deblocking_filter) {
5f7f9719
MN
2644 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2645 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2646 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2647 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
bd91fee3 2648 if (!simple && FRAME_MBAFF) {
5f7f9719 2649 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2650 } else {
3e20143e 2651 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2652 }
53c05b1e 2653 }
0da71265
MN
2654}
2655
0da71265 2656/**
bd91fee3
AS
2657 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2658 */
2659static void hl_decode_mb_simple(H264Context *h){
2660 hl_decode_mb_internal(h, 1);
2661}
2662
2663/**
2664 * Process a macroblock; this handles edge cases, such as interlacing.
2665 */
2666static void av_noinline hl_decode_mb_complex(H264Context *h){
2667 hl_decode_mb_internal(h, 0);
2668}
2669
2670static void hl_decode_mb(H264Context *h){
2671 MpegEncContext * const s = &h->s;
64514ee8 2672 const int mb_xy= h->mb_xy;
bd91fee3 2673 const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb 2674 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 2675
bd91fee3
AS
2676 if (is_complex)
2677 hl_decode_mb_complex(h);
2678 else hl_decode_mb_simple(h);
2679}
2680
2143b118 2681static void pic_as_field(Picture *pic, const int parity){
11cc1d8c
JD
2682 int i;
2683 for (i = 0; i < 4; ++i) {
2143b118 2684 if (parity == PICT_BOTTOM_FIELD)
11cc1d8c 2685 pic->data[i] += pic->linesize[i];
2143b118 2686 pic->reference = parity;
11cc1d8c
JD
2687 pic->linesize[i] *= 2;
2688 }
2879c75f 2689 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
11cc1d8c
JD
2690}
2691
2692static int split_field_copy(Picture *dest, Picture *src,
2693 int parity, int id_add){
2694 int match = !!(src->reference & parity);
2695
2696 if (match) {
2697 *dest = *src;
d4f7d838 2698 if(parity != PICT_FRAME){
b3e93fd4
MN
2699 pic_as_field(dest, parity);
2700 dest->pic_id *= 2;
2701 dest->pic_id += id_add;
d4f7d838 2702 }
11cc1d8c
JD
2703 }
2704
2705 return match;
2706}
2707
d4f7d838
MN
2708static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2709 int i[2]={0};
2710 int index=0;
11cc1d8c 2711
d4f7d838
MN
2712 while(i[0]<len || i[1]<len){
2713 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2714 i[0]++;
2715 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2716 i[1]++;
2717 if(i[0] < len){
2718 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2719 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2720 }
2721 if(i[1] < len){
2722 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2723 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
11cc1d8c
JD
2724 }
2725 }
2726
d4f7d838 2727 return index;
11cc1d8c
JD
2728}
2729
d4f7d838
MN
2730static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2731 int i, best_poc;
2732 int out_i= 0;
11cc1d8c 2733
d4f7d838
MN
2734 for(;;){
2735 best_poc= dir ? INT_MIN : INT_MAX;
11cc1d8c 2736
d4f7d838
MN
2737 for(i=0; i<len; i++){
2738 const int poc= src[i]->poc;
2739 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2740 best_poc= poc;
2741 sorted[out_i]= src[i];
2742 }
2743 }
2744 if(best_poc == (dir ? INT_MIN : INT_MAX))
2745 break;
2746 limit= sorted[out_i++]->poc - dir;
2747 }
2748 return out_i;
11cc1d8c
JD
2749}
2750
bd91fee3 2751/**
0da71265
MN
2752 * fills the default_ref_list.
2753 */
2754static int fill_default_ref_list(H264Context *h){
2755 MpegEncContext * const s = &h->s;
d4f7d838 2756 int i, len;
115329f1 2757
9f5c1037 2758 if(h->slice_type_nos==FF_B_TYPE){
d4f7d838
MN
2759 Picture *sorted[32];
2760 int cur_poc, list;
2761 int lens[2];
11cc1d8c 2762
d4f7d838
MN
2763 if(FIELD_PICTURE)
2764 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2765 else
2766 cur_poc= s->current_picture_ptr->poc;
086acdd5 2767
d4f7d838
MN
2768 for(list= 0; list<2; list++){
2769 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2770 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2771 assert(len<=32);
2772 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2773 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2774 assert(len<=32);
086acdd5 2775
d4f7d838
MN
2776 if(len < h->ref_count[list])
2777 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2778 lens[list]= len;
086acdd5
JD
2779 }
2780
d4f7d838
MN
2781 if(lens[0] == lens[1] && lens[1] > 1){
2782 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2783 if(i == lens[0])
2784 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
086acdd5 2785 }
086acdd5 2786 }else{
d4f7d838
MN
2787 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2788 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2789 assert(len <= 32);
2790 if(len < h->ref_count[0])
2791 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
0da71265 2792 }
827c91bf
LLL
2793#ifdef TRACE
2794 for (i=0; i<h->ref_count[0]; i++) {
a9c9a240 2795 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
827c91bf 2796 }
9f5c1037 2797 if(h->slice_type_nos==FF_B_TYPE){
827c91bf 2798 for (i=0; i<h->ref_count[1]; i++) {
ffbc5e04 2799 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
827c91bf
LLL
2800 }
2801 }
2802#endif
0da71265
MN
2803 return 0;
2804}
2805
827c91bf
LLL
2806static void print_short_term(H264Context *h);
2807static void print_long_term(H264Context *h);
2808
949da388
JD
2809/**
2810 * Extract structure information about the picture described by pic_num in
2811 * the current decoding context (frame or field). Note that pic_num is
2812 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2813 * @param pic_num picture number for which to extract structure information
2814 * @param structure one of PICT_XXX describing structure of picture
2815 * with pic_num
2816 * @return frame number (short term) or long term index of picture
2817 * described by pic_num
2818 */
2819static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2820 MpegEncContext * const s = &h->s;
2821
2822 *structure = s->picture_structure;
2823 if(FIELD_PICTURE){
2824 if (!(pic_num & 1))
2825 /* opposite field */
2826 *structure ^= PICT_FRAME;
2827 pic_num >>= 1;
2828 }
2829
2830 return pic_num;
2831}
2832
0da71265
MN
2833static int decode_ref_pic_list_reordering(H264Context *h){
2834 MpegEncContext * const s = &h->s;
949da388 2835 int list, index, pic_structure;
115329f1 2836
827c91bf
LLL
2837 print_short_term(h);
2838 print_long_term(h);
115329f1 2839
3425501d 2840 for(list=0; list<h->list_count; list++){
0da71265
MN
2841 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2842
2843 if(get_bits1(&s->gb)){
2844 int pred= h->curr_pic_num;
0da71265
MN
2845
2846 for(index=0; ; index++){
9963b332 2847 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
88e7a4d1 2848 unsigned int pic_id;
0da71265 2849 int i;
2f944356 2850 Picture *ref = NULL;
115329f1
DB
2851
2852 if(reordering_of_pic_nums_idc==3)
0bc42cad 2853 break;
115329f1 2854
0da71265 2855 if(index >= h->ref_count[list]){
9b879566 2856 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
0da71265
MN
2857 return -1;
2858 }
115329f1 2859
0da71265
MN
2860 if(reordering_of_pic_nums_idc<3){
2861 if(reordering_of_pic_nums_idc<2){
88e7a4d1 2862 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
949da388 2863 int frame_num;
0da71265 2864
03d3cab8 2865 if(abs_diff_pic_num > h->max_pic_num){
9b879566 2866 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
0da71265
MN
2867 return -1;
2868 }
2869
2870 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2871 else pred+= abs_diff_pic_num;
2872 pred &= h->max_pic_num - 1;
115329f1 2873
949da388
JD
2874 frame_num = pic_num_extract(h, pred, &pic_structure);
2875
0d175622
MN
2876 for(i= h->short_ref_count-1; i>=0; i--){
2877 ref = h->short_ref[i];
949da388 2878 assert(ref->reference);
0d175622 2879 assert(!ref->long_ref);
6edac8e1 2880 if(
af8c5e08
MN
2881 ref->frame_num == frame_num &&
2882 (ref->reference & pic_structure)
6edac8e1 2883 )
0da71265
MN
2884 break;
2885 }
0d175622 2886 if(i>=0)
949da388 2887 ref->pic_id= pred;
0da71265 2888 }else{
949da388 2889 int long_idx;
0da71265 2890 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
949da388
JD
2891
2892 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2893
2894 if(long_idx>31){
88e7a4d1
MN
2895 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2896 return -1;
2897 }
949da388
JD
2898 ref = h->long_ref[long_idx];
2899 assert(!(ref && !ref->reference));
af8c5e08 2900 if(ref && (ref->reference & pic_structure)){
ac658be5 2901 ref->pic_id= pic_id;
ac658be5
FOL
2902 assert(ref->long_ref);
2903 i=0;
2904 }else{
2905 i=-1;
2906 }
0da71265
MN
2907 }
2908
0d315f28 2909 if (i < 0) {
9b879566 2910 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
0da71265 2911 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
0d175622
MN
2912 } else {
2913 for(i=index; i+1<h->ref_count[list]; i++){
2914 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2915 break;
21be92bf
MN
2916 }
2917 for(; i > index; i--){
2918 h->ref_list[list][i]= h->ref_list[list][i-1];
2919 }
0d175622 2920 h->ref_list[list][index]= *ref;
949da388 2921 if (FIELD_PICTURE){
2143b118 2922 pic_as_field(&h->ref_list[list][index], pic_structure);
949da388 2923 }
0da71265 2924 }
0bc42cad 2925 }else{
9b879566 2926 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
0da71265
MN
2927 return -1;
2928 }
2929 }
2930 }
0da71265 2931 }
3425501d 2932 for(list=0; list<h->list_count; list++){
6ab87211 2933 for(index= 0; index < h->ref_count[list]; index++){
79b5c776
MN
2934 if(!h->ref_list[list][index].data[0]){
2935 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2936 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2937 }
6ab87211 2938 }
6ab87211 2939 }
115329f1 2940
115329f1 2941 return 0;
0da71265
MN
2942}
2943
91c58c94 2944static void fill_mbaff_ref_list(H264Context *h){
5d18eaad 2945 int list, i, j;
3425501d 2946 for(list=0; list<2; list++){ //FIXME try list_count
5d18eaad
LM
2947 for(i=0; i<h->ref_count[list]; i++){
2948 Picture *frame = &h->ref_list[list][i];
2949 Picture *field = &h->ref_list[list][16+2*i];
2950 field[0] = *frame;
2951 for(j=0; j<3; j++)
2952 field[0].linesize[j] <<= 1;
2143b118 2953 field[0].reference = PICT_TOP_FIELD;
078f42dd 2954 field[0].poc= field[0].field_poc[0];
5d18eaad
LM
2955 field[1] = field[0];
2956 for(j=0; j<3; j++)
2957 field[1].data[j] += frame->linesize[j];
2143b118 2958 field[1].reference = PICT_BOTTOM_FIELD;
078f42dd 2959 field[1].poc= field[1].field_poc[1];
5d18eaad
LM
2960
2961 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2962 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2963 for(j=0; j<2; j++){
2964 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2965 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2966 }
2967 }
2968 }
2969 for(j=0; j<h->ref_count[1]; j++){
2970 for(i=0; i<h->ref_count[0]; i++)
2971 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2972 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2973 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2974 }
2975}
2976
0da71265
MN
2977static int pred_weight_table(H264Context *h){
2978 MpegEncContext * const s = &h->s;
2979 int list, i;
9f2d1b4f 2980 int luma_def, chroma_def;
115329f1 2981
9f2d1b4f
LM
2982 h->use_weight= 0;
2983 h->use_weight_chroma= 0;
0da71265
MN
2984 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2985 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
2986 luma_def = 1<<h->luma_log2_weight_denom;
2987 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
2988
2989 for(list=0; list<2; list++){
cb99c652
GB
2990 h->luma_weight_flag[list] = 0;
2991 h->chroma_weight_flag[list] = 0;
0da71265
MN
2992 for(i=0; i<h->ref_count[list]; i++){
2993 int luma_weight_flag, chroma_weight_flag;
115329f1 2994
0da71265
MN
2995 luma_weight_flag= get_bits1(&s->gb);
2996 if(luma_weight_flag){
2997 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2998 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f 2999 if( h->luma_weight[list][i] != luma_def
cb99c652 3000 || h->luma_offset[list][i] != 0) {
9f2d1b4f 3001 h->use_weight= 1;
cb99c652
GB
3002 h->luma_weight_flag[list]= 1;
3003 }
9f2d1b4f
LM
3004 }else{
3005 h->luma_weight[list][i]= luma_def;
3006 h->luma_offset[list][i]= 0;
0da71265
MN
3007 }
3008
0af6967e 3009 if(CHROMA){
fef744d4
MN
3010 chroma_weight_flag= get_bits1(&s->gb);
3011 if(chroma_weight_flag){
3012 int j;
3013 for(j=0; j<2; j++){
3014 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3015 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3016 if( h->chroma_weight[list][i][j] != chroma_def
cb99c652 3017 || h->chroma_offset[list][i][j] != 0) {
fef744d4 3018 h->use_weight_chroma= 1;
cb99c652
GB
3019 h->chroma_weight_flag[list]= 1;
3020 }
fef744d4
MN
3021 }
3022 }else{
3023 int j;
3024 for(j=0; j<2; j++){
3025 h->chroma_weight[list][i][j]= chroma_def;
3026 h->chroma_offset[list][i][j]= 0;
3027 }
0da71265
MN
3028 }
3029 }
3030 }
9f5c1037 3031 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 3032 }
9f2d1b4f 3033 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
3034 return 0;
3035}
3036
9f2d1b4f
LM
3037static void implicit_weight_table(H264Context *h){
3038 MpegEncContext * const s = &h->s;
cb99c652 3039 int ref0, ref1, i;
9f2d1b4f
LM
3040 int cur_poc = s->current_picture_ptr->poc;
3041
ce09f927
GB
3042 for (i = 0; i < 2; i++) {
3043 h->luma_weight_flag[i] = 0;
3044 h->chroma_weight_flag[i] = 0;
3045 }
3046
9f2d1b4f
LM
3047 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3048 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3049 h->use_weight= 0;
3050 h->use_weight_chroma= 0;
3051 return;
3052 }
3053
3054 h->use_weight= 2;
3055 h->use_weight_chroma= 2;
3056 h->luma_log2_weight_denom= 5;
3057 h->chroma_log2_weight_denom= 5;
3058
9f2d1b4f
LM
3059 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3060 int poc0 = h->ref_list[0][ref0].poc;
3061 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 3062 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 3063 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 3064 if(td){
f66e4f5f 3065 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 3066 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 3067 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
3068 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3069 h->implicit_weight[ref0][ref1] = 32;
3070 else
3071 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3072 }else
3073 h->implicit_weight[ref0][ref1] = 32;
3074 }
3075 }
3076}
3077
8fd57a66
JD
3078/**
3079 * Mark a picture as no longer needed for reference. The refmask
3080 * argument allows unreferencing of individual fields or the whole frame.
3081 * If the picture becomes entirely unreferenced, but is being held for
3082 * display purposes, it is marked as such.
3083 * @param refmask mask of fields to unreference; the mask is bitwise
3084 * anded with the reference marking of pic
3085 * @return non-zero if pic becomes entirely unreferenced (except possibly
3086 * for display purposes) zero if one of the fields remains in
3087 * reference
3088 */
3089static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
4e4d983e 3090 int i;
8fd57a66
JD
3091 if (pic->reference &= refmask) {
3092 return 0;
3093 } else {
79f4494a
MN
3094 for(i = 0; h->delayed_pic[i]; i++)
3095 if(pic == h->delayed_pic[i]){
3096 pic->reference=DELAYED_PIC_REF;
3097 break;
3098 }
8fd57a66
JD
3099 return 1;
3100 }
4e4d983e
LM
3101}
3102
0da71265 3103/**
5175b937 3104 * instantaneous decoder refresh.
0da71265
MN
3105 */
3106static void idr(H264Context *h){
4e4d983e 3107 int i;
0da71265 3108
dc032f33 3109 for(i=0; i<16; i++){
9c0e4624 3110 remove_long(h, i, 0);
0da71265 3111 }
849b9cef 3112 assert(h->long_ref_count==0);
0da71265
MN
3113
3114 for(i=0; i<h->short_ref_count; i++){
8fd57a66 3115 unreference_pic(h, h->short_ref[i], 0);
0da71265
MN
3116 h->short_ref[i]= NULL;
3117 }
3118 h->short_ref_count=0;
a149c1a5 3119 h->prev_frame_num= 0;
80f8e035
MN
3120 h->prev_frame_num_offset= 0;
3121 h->prev_poc_msb=
3122 h->prev_poc_lsb= 0;
0da71265
MN
3123}
3124
7c33ad19
LM
3125/* forget old pics after a seek */
3126static void flush_dpb(AVCodecContext *avctx){
3127 H264Context *h= avctx->priv_data;
3128 int i;
64b9d48f 3129 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
3130 if(h->delayed_pic[i])
3131 h->delayed_pic[i]->reference= 0;
7c33ad19 3132 h->delayed_pic[i]= NULL;
285b570f 3133 }
df8a7dff 3134 h->outputed_poc= INT_MIN;
7c33ad19 3135 idr(h);
ca159196
MR
3136 if(h->s.current_picture_ptr)
3137 h->s.current_picture_ptr->reference= 0;
12d96de3 3138 h->s.first_field= 0;
37a558fe 3139 h->sei_recovery_frame_cnt = -1;
ff594f81 3140 h->sei_dpb_output_delay = 0;
cf6065ca 3141 h->sei_cpb_removal_delay = -1;
2ea89d92 3142 h->sei_buffering_period_present = 0;
e240f898 3143 ff_mpeg_flush(avctx);
7c33ad19
LM
3144}
3145
0da71265 3146/**
47e112f8
JD
3147 * Find a Picture in the short term reference list by frame number.
3148 * @param frame_num frame number to search for
3149 * @param idx the index into h->short_ref where returned picture is found
3150 * undefined if no picture found.
3151 * @return pointer to the found picture, or NULL if no pic with the provided
3152 * frame number is found
0da71265 3153 */
47e112f8 3154static Picture * find_short(H264Context *h, int frame_num, int *idx){
1924f3ce 3155 MpegEncContext * const s = &h->s;
0da71265 3156 int i;
115329f1 3157
0da71265
MN