Indent
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265
MN
22/**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
0da71265
MN
28#include "dsputil.h"
29#include "avcodec.h"
30#include "mpegvideo.h"
26b4fe82 31#include "h264.h"
0da71265 32#include "h264data.h"
26b4fe82 33#include "h264_parser.h"
0da71265 34#include "golomb.h"
626464fb 35#include "rectangle.h"
0da71265 36
e5017ab8 37#include "cabac.h"
52cb7981 38#ifdef ARCH_X86
a6493a8f 39#include "x86/h264_i386.h"
52cb7981 40#endif
e5017ab8 41
2848ce84 42//#undef NDEBUG
0da71265
MN
43#include <assert.h>
44
2ddcf84b
JD
45/**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
48 */
49#define DELAYED_PIC_REF 4
50
0da71265 51static VLC coeff_token_vlc[4];
910e3668
AC
52static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
54
0da71265 55static VLC chroma_dc_coeff_token_vlc;
910e3668
AC
56static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57static const int chroma_dc_coeff_token_vlc_table_size = 256;
0da71265
MN
58
59static VLC total_zeros_vlc[15];
910e3668
AC
60static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61static const int total_zeros_vlc_tables_size = 512;
62
0da71265 63static VLC chroma_dc_total_zeros_vlc[3];
910e3668
AC
64static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65static const int chroma_dc_total_zeros_vlc_tables_size = 8;
0da71265
MN
66
67static VLC run_vlc[6];
910e3668
AC
68static VLC_TYPE run_vlc_tables[6][8][2];
69static const int run_vlc_tables_size = 8;
70
0da71265 71static VLC run7_vlc;
910e3668
AC
72static VLC_TYPE run7_vlc_table[96][2];
73static const int run7_vlc_table_size = 96;
0da71265 74
8b82a956
MN
75static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
6ba71fc4 77static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
3e20143e 78static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
9c0e4624 79static Picture * remove_long(H264Context *h, int i, int ref_mask);
8b82a956 80
849f1035 81static av_always_inline uint32_t pack16to32(int a, int b){
377ec888
MN
82#ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
84#else
85 return (a&0xFFFF) + (b<<16);
86#endif
87}
88
d9ec210b 89static const uint8_t rem6[52]={
acd8d10f
PI
900, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
91};
92
d9ec210b 93static const uint8_t div6[52]={
acd8d10f
PI
940, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
95};
96
143d7f14
PK
97static const int left_block_options[4][8]={
98 {0,1,2,3,7,10,8,11},
99 {2,2,3,3,8,11,8,11},
100 {0,0,1,1,7,10,7,10},
101 {0,2,0,2,7,10,7,10}
102};
acd8d10f 103
8140955d
MN
104#define LEVEL_TAB_BITS 8
105static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
106
70abb407 107static void fill_caches(H264Context *h, int mb_type, int for_deblock){
0da71265 108 MpegEncContext * const s = &h->s;
64514ee8 109 const int mb_xy= h->mb_xy;
0da71265
MN
110 int topleft_xy, top_xy, topright_xy, left_xy[2];
111 int topleft_type, top_type, topright_type, left_type[2];
cac55c91 112 const int * left_block;
02f7695b 113 int topleft_partition= -1;
0da71265
MN
114 int i;
115
36e097bc
JD
116 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
117
717b1733 118 //FIXME deblocking could skip the intra and nnz parts.
36e097bc 119 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
e2e5894a
LM
120 return;
121
2cab6401
DB
122 /* Wow, what a mess, why didn't they simplify the interlacing & intra
123 * stuff, I can't imagine that these complex rules are worth it. */
115329f1 124
6867a90b
LLL
125 topleft_xy = top_xy - 1;
126 topright_xy= top_xy + 1;
127 left_xy[1] = left_xy[0] = mb_xy-1;
143d7f14 128 left_block = left_block_options[0];
5d18eaad 129 if(FRAME_MBAFF){
6867a90b
LLL
130 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
131 const int top_pair_xy = pair_xy - s->mb_stride;
132 const int topleft_pair_xy = top_pair_xy - 1;
133 const int topright_pair_xy = top_pair_xy + 1;
6f3c50f2
MN
134 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
135 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
136 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
137 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
138 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
6867a90b 139 const int bottom = (s->mb_y & 1);
6f3c50f2 140 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
60c6ba7a 141
6f3c50f2 142 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
6867a90b
LLL
143 top_xy -= s->mb_stride;
144 }
6f3c50f2 145 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
6867a90b 146 topleft_xy -= s->mb_stride;
6f3c50f2 147 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
02f7695b 148 topleft_xy += s->mb_stride;
1412060e 149 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
02f7695b 150 topleft_partition = 0;
6867a90b 151 }
6f3c50f2 152 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
6867a90b
LLL
153 topright_xy -= s->mb_stride;
154 }
6f3c50f2 155 if (left_mb_field_flag != curr_mb_field_flag) {
6867a90b 156 left_xy[1] = left_xy[0] = pair_xy - 1;
6f3c50f2
MN
157 if (curr_mb_field_flag) {
158 left_xy[1] += s->mb_stride;
159 left_block = left_block_options[3];
160 } else {
03a035e0 161 left_block= left_block_options[2 - bottom];
6867a90b
LLL
162 }
163 }
0da71265
MN
164 }
165
826de46e
LLL
166 h->top_mb_xy = top_xy;
167 h->left_mb_xy[0] = left_xy[0];
168 h->left_mb_xy[1] = left_xy[1];
6ba71fc4 169 if(for_deblock){
717b1733
LM
170 topleft_type = 0;
171 topright_type = 0;
b735aeea
MN
172 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
173 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
174 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
5d18eaad 175
e248cb60 176 if(MB_MBAFF && !IS_INTRA(mb_type)){
5d18eaad 177 int list;
3425501d 178 for(list=0; list<h->list_count; list++){
e248cb60
MN
179 //These values where changed for ease of performing MC, we need to change them back
180 //FIXME maybe we can make MC and loop filter use the same values or prevent
181 //the MC code from changing ref_cache and rather use a temporary array.
5d18eaad 182 if(USES_LIST(mb_type,list)){
191e8ca7 183 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
5d18eaad 184 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
beca9a28 185 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
186 ref += h->b8_stride;
187 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
beca9a28 188 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
189 }
190 }
191 }
46f2f05f
MN
192 }else{
193 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
194 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
195 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
196 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
197 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
0da71265
MN
198
199 if(IS_INTRA(mb_type)){
faa7e394 200 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
115329f1
DB
201 h->topleft_samples_available=
202 h->top_samples_available=
0da71265
MN
203 h->left_samples_available= 0xFFFF;
204 h->topright_samples_available= 0xEEEA;
205
faa7e394 206 if(!(top_type & type_mask)){
0da71265
MN
207 h->topleft_samples_available= 0xB3FF;
208 h->top_samples_available= 0x33FF;
209 h->topright_samples_available= 0x26EA;
210 }
d1d10e91
MN
211 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
212 if(IS_INTERLACED(mb_type)){
faa7e394 213 if(!(left_type[0] & type_mask)){
d1d10e91
MN
214 h->topleft_samples_available&= 0xDFFF;
215 h->left_samples_available&= 0x5FFF;
216 }
faa7e394 217 if(!(left_type[1] & type_mask)){
d1d10e91
MN
218 h->topleft_samples_available&= 0xFF5F;
219 h->left_samples_available&= 0xFF5F;
220 }
221 }else{
222 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
223 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
224 assert(left_xy[0] == left_xy[1]);
faa7e394 225 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
d1d10e91
MN
226 h->topleft_samples_available&= 0xDF5F;
227 h->left_samples_available&= 0x5F5F;
228 }
229 }
230 }else{
faa7e394 231 if(!(left_type[0] & type_mask)){
0da71265
MN
232 h->topleft_samples_available&= 0xDF5F;
233 h->left_samples_available&= 0x5F5F;
234 }
235 }
115329f1 236
faa7e394 237 if(!(topleft_type & type_mask))
0da71265 238 h->topleft_samples_available&= 0x7FFF;
115329f1 239
faa7e394 240 if(!(topright_type & type_mask))
0da71265 241 h->topright_samples_available&= 0xFBFF;
115329f1 242
0da71265
MN
243 if(IS_INTRA4x4(mb_type)){
244 if(IS_INTRA4x4(top_type)){
245 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
246 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
247 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
248 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
249 }else{
250 int pred;
faa7e394 251 if(!(top_type & type_mask))
0da71265 252 pred= -1;
6fbcaaa0
LLL
253 else{
254 pred= 2;
0da71265
MN
255 }
256 h->intra4x4_pred_mode_cache[4+8*0]=
257 h->intra4x4_pred_mode_cache[5+8*0]=
258 h->intra4x4_pred_mode_cache[6+8*0]=
259 h->intra4x4_pred_mode_cache[7+8*0]= pred;
260 }
261 for(i=0; i<2; i++){
262 if(IS_INTRA4x4(left_type[i])){
263 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
264 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
265 }else{
266 int pred;
faa7e394 267 if(!(left_type[i] & type_mask))
0da71265 268 pred= -1;
6fbcaaa0
LLL
269 else{
270 pred= 2;
0da71265
MN
271 }
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
274 }
275 }
276 }
277 }
29671011 278 }
115329f1
DB
279
280
0da71265 281/*
115329f1
DB
2820 . T T. T T T T
2831 L . .L . . . .
2842 L . .L . . . .
2853 . T TL . . . .
2864 L . .L . . . .
2875 L . .. . . . .
0da71265 288*/
1412060e 289//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
0da71265 290 if(top_type){
6867a90b
LLL
291 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
292 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
293 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
53c05b1e 294 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
115329f1 295
6867a90b 296 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
53c05b1e 297 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
115329f1 298
6867a90b 299 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
53c05b1e 300 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
115329f1 301
0da71265 302 }else{
115329f1 303 h->non_zero_count_cache[4+8*0]=
0da71265
MN
304 h->non_zero_count_cache[5+8*0]=
305 h->non_zero_count_cache[6+8*0]=
306 h->non_zero_count_cache[7+8*0]=
115329f1 307
0da71265
MN
308 h->non_zero_count_cache[1+8*0]=
309 h->non_zero_count_cache[2+8*0]=
115329f1 310
0da71265 311 h->non_zero_count_cache[1+8*3]=
3981c385 312 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
115329f1 313
0da71265 314 }
826de46e 315
6867a90b
LLL
316 for (i=0; i<2; i++) {
317 if(left_type[i]){
318 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
319 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
320 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
321 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
6867a90b 322 }else{
115329f1
DB
323 h->non_zero_count_cache[3+8*1 + 2*8*i]=
324 h->non_zero_count_cache[3+8*2 + 2*8*i]=
325 h->non_zero_count_cache[0+8*1 + 8*i]=
6867a90b 326 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
826de46e
LLL
327 }
328 }
329
330 if( h->pps.cabac ) {
331 // top_cbp
332 if(top_type) {
333 h->top_cbp = h->cbp_table[top_xy];
334 } else if(IS_INTRA(mb_type)) {
335 h->top_cbp = 0x1C0;
336 } else {
337 h->top_cbp = 0;
338 }
339 // left_cbp
340 if (left_type[0]) {
341 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
342 } else if(IS_INTRA(mb_type)) {
343 h->left_cbp = 0x1C0;
344 } else {
345 h->left_cbp = 0;
346 }
347 if (left_type[0]) {
348 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
349 }
350 if (left_type[1]) {
351 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
6867a90b 352 }
0da71265 353 }
6867a90b 354
0da71265 355#if 1
e2e5894a 356 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
0da71265 357 int list;
3425501d 358 for(list=0; list<h->list_count; list++){
e2e5894a 359 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
0da71265
MN
360 /*if(!h->mv_cache_clean[list]){
361 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
362 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
363 h->mv_cache_clean[list]= 1;
364 }*/
5ad984c9 365 continue;
0da71265
MN
366 }
367 h->mv_cache_clean[list]= 0;
115329f1 368
53b19144 369 if(USES_LIST(top_type, list)){
0da71265
MN
370 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
371 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
372 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
373 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
376 h->ref_cache[list][scan8[0] + 0 - 1*8]=
377 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
378 h->ref_cache[list][scan8[0] + 2 - 1*8]=
379 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
380 }else{
115329f1
DB
381 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
382 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
0da71265
MN
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
385 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
386 }
387
4672503d
LM
388 for(i=0; i<2; i++){
389 int cache_idx = scan8[0] - 1 + i*2*8;
390 if(USES_LIST(left_type[i], list)){
391 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
392 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
393 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
394 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
395 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
396 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
397 }else{
398 *(uint32_t*)h->mv_cache [list][cache_idx ]=
399 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
400 h->ref_cache[list][cache_idx ]=
401 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
402 }
0da71265
MN
403 }
404
0281d325 405 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
46f2f05f
MN
406 continue;
407
53b19144 408 if(USES_LIST(topleft_type, list)){
02f7695b
LM
409 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
410 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
e2e5894a
LM
411 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
412 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
413 }else{
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
416 }
115329f1 417
53b19144 418 if(USES_LIST(topright_type, list)){
e2e5894a
LM
419 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
420 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
421 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
422 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 }else{
424 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
426 }
e2e5894a 427
ae08a563 428 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
717b1733 429 continue;
115329f1
DB
430
431 h->ref_cache[list][scan8[5 ]+1] =
432 h->ref_cache[list][scan8[7 ]+1] =
3b66c4c5 433 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
115329f1 434 h->ref_cache[list][scan8[4 ]] =
0da71265
MN
435 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
436 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
437 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
3b66c4c5 438 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
0da71265
MN
439 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
440 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
9e528114
LA
441
442 if( h->pps.cabac ) {
443 /* XXX beurk, Load mvd */
53b19144 444 if(USES_LIST(top_type, list)){
9e528114
LA
445 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
446 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
447 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
450 }else{
115329f1
DB
451 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
452 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
9e528114
LA
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
455 }
53b19144 456 if(USES_LIST(left_type[0], list)){
9e528114
LA
457 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
458 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
459 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
460 }else{
461 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
463 }
53b19144 464 if(USES_LIST(left_type[1], list)){
9e528114
LA
465 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
466 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
468 }else{
469 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
471 }
472 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
473 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
3b66c4c5 474 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
9e528114
LA
475 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
476 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
5ad984c9 477
9f5c1037 478 if(h->slice_type_nos == FF_B_TYPE){
5ad984c9
LM
479 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
480
481 if(IS_DIRECT(top_type)){
482 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
483 }else if(IS_8X8(top_type)){
484 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
485 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
486 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
487 }else{
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
489 }
115329f1 490
5d18eaad
LM
491 if(IS_DIRECT(left_type[0]))
492 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
493 else if(IS_8X8(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
495 else
496 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
497
498 if(IS_DIRECT(left_type[1]))
5ad984c9 499 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
5d18eaad
LM
500 else if(IS_8X8(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
502 else
5ad984c9 503 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
5d18eaad
LM
504 }
505 }
506
507 if(FRAME_MBAFF){
508#define MAP_MVS\
509 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
510 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
511 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
512 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
515 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
516 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
517 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
518 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
519 if(MB_FIELD){
520#define MAP_F2F(idx, mb_type)\
521 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
522 h->ref_cache[list][idx] <<= 1;\
523 h->mv_cache[list][idx][1] /= 2;\
524 h->mvd_cache[list][idx][1] /= 2;\
525 }
526 MAP_MVS
527#undef MAP_F2F
528 }else{
529#define MAP_F2F(idx, mb_type)\
530 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] >>= 1;\
532 h->mv_cache[list][idx][1] <<= 1;\
533 h->mvd_cache[list][idx][1] <<= 1;\
5ad984c9 534 }
5d18eaad
LM
535 MAP_MVS
536#undef MAP_F2F
5ad984c9 537 }
9e528114 538 }
0da71265 539 }
0da71265
MN
540 }
541#endif
43efd19a
LM
542
543 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
0da71265
MN
544}
545
546static inline void write_back_intra_pred_mode(H264Context *h){
64514ee8 547 const int mb_xy= h->mb_xy;
0da71265
MN
548
549 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
550 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
551 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
552 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
553 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
554 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
555 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
556}
557
558/**
559 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
560 */
561static inline int check_intra4x4_pred_mode(H264Context *h){
562 MpegEncContext * const s = &h->s;
563 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
564 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
565 int i;
115329f1 566
0da71265
MN
567 if(!(h->top_samples_available&0x8000)){
568 for(i=0; i<4; i++){
569 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
570 if(status<0){
9b879566 571 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
572 return -1;
573 } else if(status){
574 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
575 }
576 }
577 }
115329f1 578
d1d10e91
MN
579 if((h->left_samples_available&0x8888)!=0x8888){
580 static const int mask[4]={0x8000,0x2000,0x80,0x20};
0da71265 581 for(i=0; i<4; i++){
d1d10e91 582 if(!(h->left_samples_available&mask[i])){
26695973
MN
583 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
584 if(status<0){
585 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
586 return -1;
587 } else if(status){
588 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
589 }
d1d10e91 590 }
0da71265
MN
591 }
592 }
593
594 return 0;
595} //FIXME cleanup like next
596
597/**
598 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
599 */
600static inline int check_intra_pred_mode(H264Context *h, int mode){
601 MpegEncContext * const s = &h->s;
602 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
603 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 604
43ff0714 605 if(mode > 6U) {
5175b937 606 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 607 return -1;
5175b937 608 }
115329f1 609
0da71265
MN
610 if(!(h->top_samples_available&0x8000)){
611 mode= top[ mode ];
612 if(mode<0){
9b879566 613 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
614 return -1;
615 }
616 }
115329f1 617
d1d10e91 618 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 619 mode= left[ mode ];
d1d10e91
MN
620 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
621 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
622 }
0da71265 623 if(mode<0){
9b879566 624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 625 return -1;
115329f1 626 }
0da71265
MN
627 }
628
629 return mode;
630}
631
632/**
633 * gets the predicted intra4x4 prediction mode.
634 */
635static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
640
a9c9a240 641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
0da71265
MN
642
643 if(min<0) return DC_PRED;
644 else return min;
645}
646
647static inline void write_back_non_zero_count(H264Context *h){
64514ee8 648 const int mb_xy= h->mb_xy;
0da71265 649
6867a90b
LLL
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
53c05b1e 653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
6867a90b
LLL
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
115329f1 657
6867a90b 658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
53c05b1e 659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
6867a90b 660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
53c05b1e 661
6867a90b 662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
53c05b1e 663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
6867a90b 664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
0da71265
MN
665}
666
667/**
1412060e 668 * gets the predicted number of non-zero coefficients.
0da71265
MN
669 * @param n block index
670 */
671static inline int pred_non_zero_count(H264Context *h, int n){
672 const int index8= scan8[n];
673 const int left= h->non_zero_count_cache[index8 - 1];
674 const int top = h->non_zero_count_cache[index8 - 8];
675 int i= left + top;
115329f1 676
0da71265
MN
677 if(i<64) i= (i+1)>>1;
678
a9c9a240 679 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
0da71265
MN
680
681 return i&31;
682}
683
1924f3ce
MN
684static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
685 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
a9c9a240 686 MpegEncContext *s = &h->s;
1924f3ce 687
5d18eaad
LM
688 /* there is no consistent mapping of mvs to neighboring locations that will
689 * make mbaff happy, so we can't move all this logic to fill_caches */
690 if(FRAME_MBAFF){
191e8ca7 691 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
5d18eaad
LM
692 const int16_t *mv;
693 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
694 *C = h->mv_cache[list][scan8[0]-2];
695
696 if(!MB_FIELD
697 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
698 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
699 if(IS_INTERLACED(mb_types[topright_xy])){
700#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
701 const int x4 = X4, y4 = Y4;\
702 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
02f7695b 703 if(!USES_LIST(mb_type,list))\
5d18eaad
LM
704 return LIST_NOT_USED;\
705 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
706 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
707 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
708 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
709
710 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
711 }
712 }
713 if(topright_ref == PART_NOT_AVAILABLE
714 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
715 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
716 if(!MB_FIELD
717 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
718 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
719 }
720 if(MB_FIELD
721 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
722 && i >= scan8[0]+8){
1412060e 723 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
02f7695b 724 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
5d18eaad
LM
725 }
726 }
727#undef SET_DIAG_MV
728 }
729
1924f3ce
MN
730 if(topright_ref != PART_NOT_AVAILABLE){
731 *C= h->mv_cache[list][ i - 8 + part_width ];
732 return topright_ref;
733 }else{
a9c9a240 734 tprintf(s->avctx, "topright MV not available\n");
95c26348 735
1924f3ce
MN
736 *C= h->mv_cache[list][ i - 8 - 1 ];
737 return h->ref_cache[list][ i - 8 - 1 ];
738 }
739}
740
0da71265
MN
741/**
742 * gets the predicted MV.
743 * @param n the block index
744 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
745 * @param mx the x component of the predicted motion vector
746 * @param my the y component of the predicted motion vector
747 */
748static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
0da71265
MN
749 const int index8= scan8[n];
750 const int top_ref= h->ref_cache[list][ index8 - 8 ];
0da71265
MN
751 const int left_ref= h->ref_cache[list][ index8 - 1 ];
752 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
753 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1924f3ce
MN
754 const int16_t * C;
755 int diagonal_ref, match_count;
756
0da71265 757 assert(part_width==1 || part_width==2 || part_width==4);
1924f3ce 758
0da71265 759/* mv_cache
115329f1 760 B . . A T T T T
0da71265
MN
761 U . . L . . , .
762 U . . L . . . .
763 U . . L . . , .
764 . . . L . . . .
765*/
1924f3ce
MN
766
767 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
768 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
a9c9a240 769 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1924f3ce
MN
770 if(match_count > 1){ //most common
771 *mx= mid_pred(A[0], B[0], C[0]);
772 *my= mid_pred(A[1], B[1], C[1]);
773 }else if(match_count==1){
774 if(left_ref==ref){
775 *mx= A[0];
115329f1 776 *my= A[1];
1924f3ce
MN
777 }else if(top_ref==ref){
778 *mx= B[0];
115329f1 779 *my= B[1];
0da71265 780 }else{
1924f3ce 781 *mx= C[0];
115329f1 782 *my= C[1];
0da71265
MN
783 }
784 }else{
1924f3ce 785 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
0da71265 786 *mx= A[0];
115329f1 787 *my= A[1];
0da71265 788 }else{
1924f3ce
MN
789 *mx= mid_pred(A[0], B[0], C[0]);
790 *my= mid_pred(A[1], B[1], C[1]);
0da71265 791 }
0da71265 792 }
115329f1 793
a9c9a240 794 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
795}
796
797/**
798 * gets the directionally predicted 16x8 MV.
799 * @param n the block index
800 * @param mx the x component of the predicted motion vector
801 * @param my the y component of the predicted motion vector
802 */
803static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
804 if(n==0){
805 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
806 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
807
a9c9a240 808 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
115329f1 809
0da71265
MN
810 if(top_ref == ref){
811 *mx= B[0];
812 *my= B[1];
813 return;
814 }
815 }else{
816 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
817 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
115329f1 818
a9c9a240 819 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
820
821 if(left_ref == ref){
822 *mx= A[0];
823 *my= A[1];
824 return;
825 }
826 }
827
828 //RARE
829 pred_motion(h, n, 4, list, ref, mx, my);
830}
831
832/**
833 * gets the directionally predicted 8x16 MV.
834 * @param n the block index
835 * @param mx the x component of the predicted motion vector
836 * @param my the y component of the predicted motion vector
837 */
838static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
839 if(n==0){
840 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
841 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
115329f1 842
a9c9a240 843 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
844
845 if(left_ref == ref){
846 *mx= A[0];
847 *my= A[1];
848 return;
849 }
850 }else{
1924f3ce
MN
851 const int16_t * C;
852 int diagonal_ref;
853
854 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
115329f1 855
a9c9a240 856 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265 857
115329f1 858 if(diagonal_ref == ref){
0da71265
MN
859 *mx= C[0];
860 *my= C[1];
861 return;
862 }
0da71265
MN
863 }
864
865 //RARE
866 pred_motion(h, n, 2, list, ref, mx, my);
867}
868
869static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
0da71265
MN
870 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
871 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
872
a9c9a240 873 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
0da71265
MN
874
875 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
876 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
877 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
115329f1 878
0da71265
MN
879 *mx = *my = 0;
880 return;
881 }
115329f1 882
0da71265
MN
883 pred_motion(h, 0, 4, 0, 0, mx, my);
884
885 return;
886}
887
8b1fd554
MN
888static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
889 int poc0 = h->ref_list[0][i].poc;
890 int td = av_clip(poc1 - poc0, -128, 127);
891 if(td == 0 || h->ref_list[0][i].long_ref){
892 return 256;
893 }else{
894 int tb = av_clip(poc - poc0, -128, 127);
895 int tx = (16384 + (FFABS(td) >> 1)) / td;
896 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
897 }
898}
899
5ad984c9 900static inline void direct_dist_scale_factor(H264Context * const h){
2879c75f
MN
901 MpegEncContext * const s = &h->s;
902 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
5ad984c9 903 const int poc1 = h->ref_list[1][0].poc;
8b1fd554
MN
904 int i, field;
905 for(field=0; field<2; field++){
906 const int poc = h->s.current_picture_ptr->field_poc[field];
907 const int poc1 = h->ref_list[1][0].field_poc[field];
908 for(i=0; i < 2*h->ref_count[0]; i++)
909 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
5ad984c9 910 }
8b1fd554
MN
911
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
5d18eaad 914 }
5ad984c9 915}
f4d3382d
MN
916
917static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
918 MpegEncContext * const s = &h->s;
919 Picture * const ref1 = &h->ref_list[1][0];
920 int j, old_ref, rfield;
921 int start= mbafi ? 16 : 0;
922 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
923 int interl= mbafi || s->picture_structure != PICT_FRAME;
924
925 /* bogus; fills in for missing frames */
926 memset(map[list], 0, sizeof(map[list]));
927
928 for(rfield=0; rfield<2; rfield++){
929 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
930 int poc = ref1->ref_poc[colfield][list][old_ref];
931
932 if (!interl)
933 poc |= 3;
934 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
935 poc= (poc&~3) + rfield + 1;
936
937 for(j=start; j<end; j++){
938 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
939 int cur_ref= mbafi ? (j-16)^field : j;
940 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
941 if(rfield == field)
942 map[list][old_ref] = cur_ref;
943 break;
944 }
945 }
946 }
947 }
948}
949
2f944356
LM
950static inline void direct_ref_list_init(H264Context * const h){
951 MpegEncContext * const s = &h->s;
952 Picture * const ref1 = &h->ref_list[1][0];
953 Picture * const cur = s->current_picture_ptr;
bbc78fb4 954 int list, j, field;
f4d3382d
MN
955 int sidx= (s->picture_structure&1)^1;
956 int ref1sidx= (ref1->reference&1)^1;
aa617518 957
2f944356 958 for(list=0; list<2; list++){
2879c75f 959 cur->ref_count[sidx][list] = h->ref_count[list];
2f944356 960 for(j=0; j<h->ref_count[list]; j++)
42de393d 961 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
2f944356 962 }
aa617518 963
7762cc3d 964 if(s->picture_structure == PICT_FRAME){
f4d3382d
MN
965 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
966 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
7762cc3d 967 }
aa617518 968
48e025e5 969 cur->mbaff= FRAME_MBAFF;
aa617518 970
9701840b 971 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
2f944356 972 return;
aa617518 973
2f944356 974 for(list=0; list<2; list++){
f4d3382d
MN
975 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
976 for(field=0; field<2; field++)
977 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
2f944356
LM
978 }
979}
5ad984c9
LM
980
981static inline void pred_direct_motion(H264Context * const h, int *mb_type){
982 MpegEncContext * const s = &h->s;
d00eac6c
MN
983 int b8_stride = h->b8_stride;
984 int b4_stride = h->b_stride;
985 int mb_xy = h->mb_xy;
986 int mb_type_col[2];
987 const int16_t (*l1mv0)[2], (*l1mv1)[2];
988 const int8_t *l1ref0, *l1ref1;
5ad984c9 989 const int is_b8x8 = IS_8X8(*mb_type);
88e7a4d1 990 unsigned int sub_mb_type;
5ad984c9
LM
991 int i8, i4;
992
5d18eaad 993#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
d00eac6c
MN
994
995 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
53c193a9 996 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
471341a7
MN
997 int cur_poc = s->current_picture_ptr->poc;
998 int *col_poc = h->ref_list[1]->field_poc;
999 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1000 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1001 b8_stride = 0;
60c9b24d 1002 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
d00eac6c
MN
1003 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1004 mb_xy += s->mb_stride*fieldoff;
1005 }
1006 goto single_col;
1007 }else{ // AFL/AFR/FR/FL -> AFR/FR
1008 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1009 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1010 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1011 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1012 b8_stride *= 3;
1013 b4_stride *= 6;
1014 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1015 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1016 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1017 && !is_b8x8){
1018 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1019 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1020 }else{
1021 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1022 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1023 }
1024 }else{ // AFR/FR -> AFR/FR
1025single_col:
1026 mb_type_col[0] =
1027 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
cc615d2c
MN
1028 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1029 /* FIXME save sub mb types from previous frames (or derive from MVs)
1030 * so we know exactly what block size to use */
1031 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1032 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1034 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1035 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1036 }else{
1037 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1038 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1039 }
d00eac6c 1040 }
5ad984c9 1041 }
5ad984c9 1042
7d54ecc9
MN
1043 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1044 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1045 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1046 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
9b5fab91
MN
1047 if(!b8_stride){
1048 if(s->mb_y&1){
1049 l1ref0 += h->b8_stride;
1050 l1ref1 += h->b8_stride;
1051 l1mv0 += 2*b4_stride;
1052 l1mv1 += 2*b4_stride;
1053 }
d00eac6c 1054 }
115329f1 1055
5ad984c9
LM
1056 if(h->direct_spatial_mv_pred){
1057 int ref[2];
1058 int mv[2][2];
1059 int list;
1060
5d18eaad
LM
1061 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1062
5ad984c9
LM
1063 /* ref = min(neighbors) */
1064 for(list=0; list<2; list++){
1065 int refa = h->ref_cache[list][scan8[0] - 1];
1066 int refb = h->ref_cache[list][scan8[0] - 8];
1067 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
9bec77fe 1068 if(refc == PART_NOT_AVAILABLE)
5ad984c9 1069 refc = h->ref_cache[list][scan8[0] - 8 - 1];
29d05ebc 1070 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
5ad984c9
LM
1071 if(ref[list] < 0)
1072 ref[list] = -1;
1073 }
1074
1075 if(ref[0] < 0 && ref[1] < 0){
1076 ref[0] = ref[1] = 0;
1077 mv[0][0] = mv[0][1] =
1078 mv[1][0] = mv[1][1] = 0;
1079 }else{
1080 for(list=0; list<2; list++){
1081 if(ref[list] >= 0)
1082 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1083 else
1084 mv[list][0] = mv[list][1] = 0;
1085 }
1086 }
1087
1088 if(ref[1] < 0){
50b3ab0f
LM
1089 if(!is_b8x8)
1090 *mb_type &= ~MB_TYPE_L1;
1091 sub_mb_type &= ~MB_TYPE_L1;
5ad984c9 1092 }else if(ref[0] < 0){
50b3ab0f
LM
1093 if(!is_b8x8)
1094 *mb_type &= ~MB_TYPE_L0;
1095 sub_mb_type &= ~MB_TYPE_L0;
5ad984c9
LM
1096 }
1097
d00eac6c 1098 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
50b3ab0f
LM
1099 for(i8=0; i8<4; i8++){
1100 int x8 = i8&1;
1101 int y8 = i8>>1;
1102 int xy8 = x8+y8*b8_stride;
1103 int xy4 = 3*x8+y8*b4_stride;
1104 int a=0, b=0;
1105
1106 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1107 continue;
1108 h->sub_mb_type[i8] = sub_mb_type;
1109
1110 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1111 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
d00eac6c 1112 if(!IS_INTRA(mb_type_col[y8])
50b3ab0f
LM
1113 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1114 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1115 if(ref[0] > 0)
1116 a= pack16to32(mv[0][0],mv[0][1]);
1117 if(ref[1] > 0)
1118 b= pack16to32(mv[1][0],mv[1][1]);
1119 }else{
1120 a= pack16to32(mv[0][0],mv[0][1]);
1121 b= pack16to32(mv[1][0],mv[1][1]);
1122 }
1123 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1124 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1125 }
1126 }else if(IS_16X16(*mb_type)){
d19f5acb
MN
1127 int a=0, b=0;
1128
cec93959
LM
1129 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1130 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
d00eac6c 1131 if(!IS_INTRA(mb_type_col[0])
c26abfa5
DB
1132 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1133 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
bf4e3bd2 1134 && (h->x264_build>33 || !h->x264_build)))){
5ad984c9 1135 if(ref[0] > 0)
d19f5acb 1136 a= pack16to32(mv[0][0],mv[0][1]);
5ad984c9 1137 if(ref[1] > 0)
d19f5acb 1138 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1139 }else{
d19f5acb
MN
1140 a= pack16to32(mv[0][0],mv[0][1]);
1141 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1142 }
d19f5acb
MN
1143 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1144 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
5ad984c9
LM
1145 }else{
1146 for(i8=0; i8<4; i8++){
1147 const int x8 = i8&1;
1148 const int y8 = i8>>1;
115329f1 1149
5ad984c9
LM
1150 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1151 continue;
1152 h->sub_mb_type[i8] = sub_mb_type;
115329f1 1153
5ad984c9
LM
1154 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1155 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
cec93959
LM
1156 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1157 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
115329f1 1158
5ad984c9 1159 /* col_zero_flag */
2ccd25d0
MN
1160 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1161 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
bf4e3bd2 1162 && (h->x264_build>33 || !h->x264_build)))){
2ccd25d0 1163 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
f1f17e54 1164 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1165 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
c26abfa5 1166 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
f1f17e54
LM
1167 if(ref[0] == 0)
1168 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1169 if(ref[1] == 0)
1170 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1171 }
1172 }else
5ad984c9 1173 for(i4=0; i4<4; i4++){
2ccd25d0 1174 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
c26abfa5 1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
5ad984c9
LM
1176 if(ref[0] == 0)
1177 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1178 if(ref[1] == 0)
1179 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1180 }
1181 }
1182 }
1183 }
1184 }
1185 }else{ /* direct temporal mv pred */
5d18eaad
LM
1186 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1187 const int *dist_scale_factor = h->dist_scale_factor;
f4d3382d 1188 int ref_offset= 0;
5d18eaad 1189
cc615d2c 1190 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
8b1fd554
MN
1191 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1192 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1193 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
cc615d2c 1194 }
48e025e5 1195 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
f4d3382d 1196 ref_offset += 16;
48e025e5 1197
cc615d2c
MN
1198 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1199 /* FIXME assumes direct_8x8_inference == 1 */
c210fa61 1200 int y_shift = 2*!IS_INTERLACED(*mb_type);
5d18eaad 1201
cc615d2c
MN
1202 for(i8=0; i8<4; i8++){
1203 const int x8 = i8&1;
1204 const int y8 = i8>>1;
1205 int ref0, scale;
1206 const int16_t (*l1mv)[2]= l1mv0;
5d18eaad 1207
cc615d2c
MN
1208 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1209 continue;
1210 h->sub_mb_type[i8] = sub_mb_type;
1211
1212 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1213 if(IS_INTRA(mb_type_col[y8])){
1214 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1215 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1216 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1217 continue;
1218 }
1219
1220 ref0 = l1ref0[x8 + y8*b8_stride];
1221 if(ref0 >= 0)
f4d3382d 1222 ref0 = map_col_to_list0[0][ref0 + ref_offset];
cc615d2c 1223 else{
f4d3382d 1224 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
cc615d2c
MN
1225 l1mv= l1mv1;
1226 }
1227 scale = dist_scale_factor[ref0];
1228 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1229
1230 {
1231 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1232 int my_col = (mv_col[1]<<y_shift)/2;
1233 int mx = (scale * mv_col[0] + 128) >> 8;
1234 int my = (scale * my_col + 128) >> 8;
1235 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1236 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
5d18eaad 1237 }
5d18eaad 1238 }
cc615d2c
MN
1239 return;
1240 }
5d18eaad
LM
1241
1242 /* one-to-one mv scaling */
1243
5ad984c9 1244 if(IS_16X16(*mb_type)){
fda51641
MN
1245 int ref, mv0, mv1;
1246
5ad984c9 1247 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
d00eac6c 1248 if(IS_INTRA(mb_type_col[0])){
fda51641 1249 ref=mv0=mv1=0;
5ad984c9 1250 }else{
f4d3382d
MN
1251 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1252 : map_col_to_list0[1][l1ref1[0] + ref_offset];
5d18eaad 1253 const int scale = dist_scale_factor[ref0];
8583bef8 1254 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
5ad984c9 1255 int mv_l0[2];
5d18eaad
LM
1256 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1257 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
fda51641
MN
1258 ref= ref0;
1259 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1260 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
5ad984c9 1261 }
fda51641
MN
1262 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1263 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1264 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
5ad984c9
LM
1265 }else{
1266 for(i8=0; i8<4; i8++){
1267 const int x8 = i8&1;
1268 const int y8 = i8>>1;
5d18eaad 1269 int ref0, scale;
bf4e3bd2 1270 const int16_t (*l1mv)[2]= l1mv0;
8583bef8 1271
5ad984c9
LM
1272 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1273 continue;
1274 h->sub_mb_type[i8] = sub_mb_type;
5d18eaad 1275 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
d00eac6c 1276 if(IS_INTRA(mb_type_col[0])){
5ad984c9 1277 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1278 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1279 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1280 continue;
1281 }
115329f1 1282
f4d3382d 1283 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
2f944356 1284 if(ref0 >= 0)
5d18eaad 1285 ref0 = map_col_to_list0[0][ref0];
8583bef8 1286 else{
f4d3382d 1287 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
8583bef8
MN
1288 l1mv= l1mv1;
1289 }
5d18eaad 1290 scale = dist_scale_factor[ref0];
115329f1 1291
5ad984c9 1292 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
f1f17e54 1293 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1294 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
5d18eaad
LM
1295 int mx = (scale * mv_col[0] + 128) >> 8;
1296 int my = (scale * mv_col[1] + 128) >> 8;
f1f17e54
LM
1297 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1298 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1299 }else
5ad984c9 1300 for(i4=0; i4<4; i4++){
2ccd25d0 1301 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
5ad984c9 1302 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
5d18eaad
LM
1303 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1304 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
5ad984c9
LM
1305 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1306 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1307 }
1308 }
1309 }
1310 }
1311}
1312
0da71265
MN
1313static inline void write_back_motion(H264Context *h, int mb_type){
1314 MpegEncContext * const s = &h->s;
0da71265
MN
1315 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1316 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1317 int list;
1318
2ea39252
LM
1319 if(!USES_LIST(mb_type, 0))
1320 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1321
3425501d 1322 for(list=0; list<h->list_count; list++){
0da71265 1323 int y;
53b19144 1324 if(!USES_LIST(mb_type, list))
5ad984c9 1325 continue;
115329f1 1326
0da71265
MN
1327 for(y=0; y<4; y++){
1328 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1329 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1330 }
9e528114 1331 if( h->pps.cabac ) {
e6e77eb6
LM
1332 if(IS_SKIP(mb_type))
1333 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1334 else
9e528114
LA
1335 for(y=0; y<4; y++){
1336 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1337 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1338 }
1339 }
53b19144
LM
1340
1341 {
191e8ca7 1342 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
53b19144
LM
1343 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1344 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1345 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1346 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
0da71265
MN
1347 }
1348 }
115329f1 1349
9f5c1037 1350 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
5ad984c9 1351 if(IS_8X8(mb_type)){
53b19144
LM
1352 uint8_t *direct_table = &h->direct_table[b8_xy];
1353 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1354 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1355 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
5ad984c9
LM
1356 }
1357 }
0da71265
MN
1358}
1359
1360/**
1361 * Decodes a network abstraction layer unit.
1362 * @param consumed is the number of bytes used as input
1363 * @param length is the length of the array
3b66c4c5 1364 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
115329f1 1365 * @returns decoded bytes, might be src+1 if no escapes
0da71265 1366 */
30317501 1367static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
1368 int i, si, di;
1369 uint8_t *dst;
24456882 1370 int bufidx;
0da71265 1371
bb270c08 1372// src[0]&0x80; //forbidden bit
0da71265
MN
1373 h->nal_ref_idc= src[0]>>5;
1374 h->nal_unit_type= src[0]&0x1F;
1375
1376 src++; length--;
115329f1 1377#if 0
0da71265
MN
1378 for(i=0; i<length; i++)
1379 printf("%2X ", src[i]);
1380#endif
e08715d3
MN
1381
1382#ifdef HAVE_FAST_UNALIGNED
1383# ifdef HAVE_FAST_64BIT
1384# define RS 7
1385 for(i=0; i+1<length; i+=9){
1386 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1387# else
1388# define RS 3
1389 for(i=0; i+1<length; i+=5){
1390 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1391# endif
1392 continue;
1393 if(i>0 && !src[i]) i--;
1394 while(src[i]) i++;
1395#else
1396# define RS 0
0da71265
MN
1397 for(i=0; i+1<length; i+=2){
1398 if(src[i]) continue;
1399 if(i>0 && src[i-1]==0) i--;
e08715d3 1400#endif
0da71265
MN
1401 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1402 if(src[i+2]!=3){
1403 /* startcode, so we must be past the end */
1404 length=i;
1405 }
1406 break;
1407 }
abb27cfb 1408 i-= RS;
0da71265
MN
1409 }
1410
1411 if(i>=length-1){ //no escaped 0
1412 *dst_length= length;
1413 *consumed= length+1; //+1 for the header
115329f1 1414 return src;
0da71265
MN
1415 }
1416
24456882 1417 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
d4369630 1418 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 1419 dst= h->rbsp_buffer[bufidx];
0da71265 1420
ac658be5
FOL
1421 if (dst == NULL){
1422 return NULL;
1423 }
1424
3b66c4c5 1425//printf("decoding esc\n");
0da71265 1426 si=di=0;
115329f1 1427 while(si<length){
0da71265
MN
1428 //remove escapes (very rare 1:2^22)
1429 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1430 if(src[si+2]==3){ //escape
1431 dst[di++]= 0;
1432 dst[di++]= 0;
1433 si+=3;
c8470cc1 1434 continue;
0da71265
MN
1435 }else //next start code
1436 break;
1437 }
1438
1439 dst[di++]= src[si++];
1440 }
1441
d4369630
AS
1442 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1443
0da71265
MN
1444 *dst_length= di;
1445 *consumed= si + 1;//+1 for the header
90b5b51e 1446//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
1447 return dst;
1448}
1449
0da71265
MN
1450/**
1451 * identifies the exact end of the bitstream
1452 * @return the length of the trailing, or 0 if damaged
1453 */
30317501 1454static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
1455 int v= *src;
1456 int r;
1457
a9c9a240 1458 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
1459
1460 for(r=1; r<9; r++){
1461 if(v&1) return r;
1462 v>>=1;
1463 }
1464 return 0;
1465}
1466
1467/**
1412060e 1468 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
1469 * @param qp quantization parameter
1470 */
239ea04c 1471static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1472#define stride 16
1473 int i;
1474 int temp[16]; //FIXME check if this is a good idea
1475 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1476 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1477
1478//memset(block, 64, 2*256);
1479//return;
1480 for(i=0; i<4; i++){
1481 const int offset= y_offset[i];
1482 const int z0= block[offset+stride*0] + block[offset+stride*4];
1483 const int z1= block[offset+stride*0] - block[offset+stride*4];
1484 const int z2= block[offset+stride*1] - block[offset+stride*5];
1485 const int z3= block[offset+stride*1] + block[offset+stride*5];
1486
1487 temp[4*i+0]= z0+z3;
1488 temp[4*i+1]= z1+z2;
1489 temp[4*i+2]= z1-z2;
1490 temp[4*i+3]= z0-z3;
1491 }
1492
1493 for(i=0; i<4; i++){
1494 const int offset= x_offset[i];
1495 const int z0= temp[4*0+i] + temp[4*2+i];
1496 const int z1= temp[4*0+i] - temp[4*2+i];
1497 const int z2= temp[4*1+i] - temp[4*3+i];
1498 const int z3= temp[4*1+i] + temp[4*3+i];
1499
1412060e 1500 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
1501 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1502 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1503 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
1504 }
1505}
1506
e5017ab8 1507#if 0
0da71265 1508/**
1412060e 1509 * DCT transforms the 16 dc values.
0da71265
MN
1510 * @param qp quantization parameter ??? FIXME
1511 */
1512static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1513// const int qmul= dequant_coeff[qp][0];
1514 int i;
1515 int temp[16]; //FIXME check if this is a good idea
1516 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1517 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1518
1519 for(i=0; i<4; i++){
1520 const int offset= y_offset[i];
1521 const int z0= block[offset+stride*0] + block[offset+stride*4];
1522 const int z1= block[offset+stride*0] - block[offset+stride*4];
1523 const int z2= block[offset+stride*1] - block[offset+stride*5];
1524 const int z3= block[offset+stride*1] + block[offset+stride*5];
1525
1526 temp[4*i+0]= z0+z3;
1527 temp[4*i+1]= z1+z2;
1528 temp[4*i+2]= z1-z2;
1529 temp[4*i+3]= z0-z3;
1530 }
1531
1532 for(i=0; i<4; i++){
1533 const int offset= x_offset[i];
1534 const int z0= temp[4*0+i] + temp[4*2+i];
1535 const int z1= temp[4*0+i] - temp[4*2+i];
1536 const int z2= temp[4*1+i] - temp[4*3+i];
1537 const int z3= temp[4*1+i] + temp[4*3+i];
1538
1539 block[stride*0 +offset]= (z0 + z3)>>1;
1540 block[stride*2 +offset]= (z1 + z2)>>1;
1541 block[stride*8 +offset]= (z1 - z2)>>1;
1542 block[stride*10+offset]= (z0 - z3)>>1;
1543 }
1544}
e5017ab8
LA
1545#endif
1546
0da71265
MN
1547#undef xStride
1548#undef stride
1549
239ea04c 1550static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1551 const int stride= 16*2;
1552 const int xStride= 16;
1553 int a,b,c,d,e;
1554
1555 a= block[stride*0 + xStride*0];
1556 b= block[stride*0 + xStride*1];
1557 c= block[stride*1 + xStride*0];
1558 d= block[stride*1 + xStride*1];
1559
1560 e= a-b;
1561 a= a+b;
1562 b= c-d;
1563 c= c+d;
1564
239ea04c
LM
1565 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1566 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1567 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1568 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
1569}
1570
e5017ab8 1571#if 0
0da71265
MN
1572static void chroma_dc_dct_c(DCTELEM *block){
1573 const int stride= 16*2;
1574 const int xStride= 16;
1575 int a,b,c,d,e;
1576
1577 a= block[stride*0 + xStride*0];
1578 b= block[stride*0 + xStride*1];
1579 c= block[stride*1 + xStride*0];
1580 d= block[stride*1 + xStride*1];
1581
1582 e= a-b;
1583 a= a+b;
1584 b= c-d;
1585 c= c+d;
1586
1587 block[stride*0 + xStride*0]= (a+c);
1588 block[stride*0 + xStride*1]= (e+b);
1589 block[stride*1 + xStride*0]= (a-c);
1590 block[stride*1 + xStride*1]= (e-b);
1591}
e5017ab8 1592#endif
0da71265
MN
1593
1594/**
1595 * gets the chroma qp.
1596 */
4691a77d 1597static inline int get_chroma_qp(H264Context *h, int t, int qscale){
5a78bfbd 1598 return h->pps.chroma_qp_table[t][qscale];
0da71265
MN
1599}
1600
0da71265
MN
1601static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1602 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1603 int src_x_offset, int src_y_offset,
1604 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1605 MpegEncContext * const s = &h->s;
1606 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 1607 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 1608 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
1609 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1610 uint8_t * src_cb, * src_cr;
1611 int extra_width= h->emu_edge_width;
1612 int extra_height= h->emu_edge_height;
0da71265
MN
1613 int emu=0;
1614 const int full_mx= mx>>2;
1615 const int full_my= my>>2;
fbd312fd 1616 const int pic_width = 16*s->mb_width;
0d43dd8c 1617 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 1618
0da71265
MN
1619 if(mx&7) extra_width -= 3;
1620 if(my&7) extra_height -= 3;
115329f1
DB
1621
1622 if( full_mx < 0-extra_width
1623 || full_my < 0-extra_height
1624 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 1625 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
1626 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1627 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
1628 emu=1;
1629 }
115329f1 1630
5d18eaad 1631 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 1632 if(!square){
5d18eaad 1633 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 1634 }
115329f1 1635
87352549 1636 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 1637
0d43dd8c 1638 if(MB_FIELD){
5d18eaad 1639 // chroma offset when predicting from a field of opposite parity
2143b118 1640 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
1641 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1642 }
1643 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1644 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1645
0da71265 1646 if(emu){
5d18eaad 1647 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1648 src_cb= s->edge_emu_buffer;
1649 }
5d18eaad 1650 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1651
1652 if(emu){
5d18eaad 1653 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1654 src_cr= s->edge_emu_buffer;
1655 }
5d18eaad 1656 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1657}
1658
9f2d1b4f 1659static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
1660 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1661 int x_offset, int y_offset,
1662 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1663 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1664 int list0, int list1){
1665 MpegEncContext * const s = &h->s;
1666 qpel_mc_func *qpix_op= qpix_put;
1667 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 1668
5d18eaad
LM
1669 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1670 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1671 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 1672 x_offset += 8*s->mb_x;
0d43dd8c 1673 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1674
0da71265 1675 if(list0){
1924f3ce 1676 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
1677 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1678 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1679 qpix_op, chroma_op);
1680
1681 qpix_op= qpix_avg;
1682 chroma_op= chroma_avg;
1683 }
1684
1685 if(list1){
1924f3ce 1686 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
1687 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1688 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1689 qpix_op, chroma_op);
1690 }
1691}
1692
9f2d1b4f
LM
1693static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1694 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1695 int x_offset, int y_offset,
1696 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1697 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1698 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1699 int list0, int list1){
1700 MpegEncContext * const s = &h->s;
1701
5d18eaad
LM
1702 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1703 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1704 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 1705 x_offset += 8*s->mb_x;
0d43dd8c 1706 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1707
9f2d1b4f
LM
1708 if(list0 && list1){
1709 /* don't optimize for luma-only case, since B-frames usually
1710 * use implicit weights => chroma too. */
1711 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
1712 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1713 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
1714 int refn0 = h->ref_cache[0][ scan8[n] ];
1715 int refn1 = h->ref_cache[1][ scan8[n] ];
1716
1717 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1718 dest_y, dest_cb, dest_cr,
1719 x_offset, y_offset, qpix_put, chroma_put);
1720 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1721 tmp_y, tmp_cb, tmp_cr,
1722 x_offset, y_offset, qpix_put, chroma_put);
1723
1724 if(h->use_weight == 2){
1725 int weight0 = h->implicit_weight[refn0][refn1];
1726 int weight1 = 64 - weight0;
5d18eaad
LM
1727 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1728 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1729 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 1730 }else{
5d18eaad 1731 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 1732 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 1733 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 1734 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1735 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 1736 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 1737 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1738 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 1739 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
1740 }
1741 }else{
1742 int list = list1 ? 1 : 0;
1743 int refn = h->ref_cache[list][ scan8[n] ];
1744 Picture *ref= &h->ref_list[list][refn];
1745 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1746 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1747 qpix_put, chroma_put);
1748
5d18eaad 1749 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
1750 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1751 if(h->use_weight_chroma){
5d18eaad 1752 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 1753 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 1754 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
1755 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1756 }
1757 }
1758}
1759
1760static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1761 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1762 int x_offset, int y_offset,
1763 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1764 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 1765 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
1766 int list0, int list1){
1767 if((h->use_weight==2 && list0 && list1
1768 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1769 || h->use_weight==1)
1770 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1771 x_offset, y_offset, qpix_put, chroma_put,
1772 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1773 else
1774 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1775 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1776}
1777
513fbd8e
LM
1778static inline void prefetch_motion(H264Context *h, int list){
1779 /* fetch pixels for estimated mv 4 macroblocks ahead
1780 * optimized for 64byte cache lines */
1781 MpegEncContext * const s = &h->s;
1782 const int refn = h->ref_cache[list][scan8[0]];
1783 if(refn >= 0){
1784 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1785 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1786 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 1787 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
1788 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1789 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1790 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1791 }
1792}
1793
0da71265
MN
1794static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1795 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
1796 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1797 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 1798 MpegEncContext * const s = &h->s;
64514ee8 1799 const int mb_xy= h->mb_xy;
0da71265 1800 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 1801
0da71265 1802 assert(IS_INTER(mb_type));
115329f1 1803
513fbd8e
LM
1804 prefetch_motion(h, 0);
1805
0da71265
MN
1806 if(IS_16X16(mb_type)){
1807 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1808 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
9f2d1b4f 1809 &weight_op[0], &weight_avg[0],
0da71265
MN
1810 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1811 }else if(IS_16X8(mb_type)){
1812 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1813 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1814 &weight_op[1], &weight_avg[1],
0da71265
MN
1815 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1816 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1817 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1818 &weight_op[1], &weight_avg[1],
0da71265
MN
1819 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1820 }else if(IS_8X16(mb_type)){
5d18eaad 1821 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 1822 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1823 &weight_op[2], &weight_avg[2],
0da71265 1824 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 1825 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 1826 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1827 &weight_op[2], &weight_avg[2],
0da71265
MN
1828 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1829 }else{
1830 int i;
115329f1 1831
0da71265
MN
1832 assert(IS_8X8(mb_type));
1833
1834 for(i=0; i<4; i++){
1835 const int sub_mb_type= h->sub_mb_type[i];
1836 const int n= 4*i;
1837 int x_offset= (i&1)<<2;
1838 int y_offset= (i&2)<<1;
1839
1840 if(IS_SUB_8X8(sub_mb_type)){
1841 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1842 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1843 &weight_op[3], &weight_avg[3],
0da71265
MN
1844 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1845 }else if(IS_SUB_8X4(sub_mb_type)){
1846 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1847 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1848 &weight_op[4], &weight_avg[4],
0da71265
MN
1849 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1850 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1851 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1852 &weight_op[4], &weight_avg[4],
0da71265
MN
1853 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1854 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 1855 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 1856 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1857 &weight_op[5], &weight_avg[5],
0da71265 1858 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 1859 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 1860 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1861 &weight_op[5], &weight_avg[5],
0da71265
MN
1862 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1863 }else{
1864 int j;
1865 assert(IS_SUB_4X4(sub_mb_type));
1866 for(j=0; j<4; j++){
1867 int sub_x_offset= x_offset + 2*(j&1);
1868 int sub_y_offset= y_offset + (j&2);
1869 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1870 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1871 &weight_op[6], &weight_avg[6],
0da71265
MN
1872 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1873 }
1874 }
1875 }
1876 }
513fbd8e
LM
1877
1878 prefetch_motion(h, 1);
0da71265
MN
1879}
1880
8140955d
MN
1881static av_cold void init_cavlc_level_tab(void){
1882 int suffix_length, mask;
1883 unsigned int i;
1884
1885 for(suffix_length=0; suffix_length<7; suffix_length++){
1886 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1887 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1888 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1889
1890 mask= -(level_code&1);
1891 level_code= (((2+level_code)>>1) ^ mask) - mask;
1892 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1893 cavlc_level_tab[suffix_length][i][0]= level_code;
1894 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1895 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1896 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1897 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1898 }else{
1899 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1900 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1901 }
1902 }
1903 }
1904}
1905
98a6fff9 1906static av_cold void decode_init_vlc(void){
0da71265
MN
1907 static int done = 0;
1908
1909 if (!done) {
1910 int i;
910e3668 1911 int offset;
0da71265
MN
1912 done = 1;
1913
910e3668
AC
1914 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1915 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
115329f1 1916 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
0da71265 1917 &chroma_dc_coeff_token_len [0], 1, 1,
910e3668
AC
1918 &chroma_dc_coeff_token_bits[0], 1, 1,
1919 INIT_VLC_USE_NEW_STATIC);
0da71265 1920
910e3668 1921 offset = 0;
0da71265 1922 for(i=0; i<4; i++){
910e3668
AC
1923 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1924 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
115329f1 1925 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
0da71265 1926 &coeff_token_len [i][0], 1, 1,
910e3668
AC
1927 &coeff_token_bits[i][0], 1, 1,
1928 INIT_VLC_USE_NEW_STATIC);
1929 offset += coeff_token_vlc_tables_size[i];
0da71265 1930 }
910e3668
AC
1931 /*
1932 * This is a one time safety check to make sure that
1933 * the packed static coeff_token_vlc table sizes
1934 * were initialized correctly.
1935 */
37d3e066 1936 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
0da71265
MN
1937
1938 for(i=0; i<3; i++){
910e3668
AC
1939 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1940 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1941 init_vlc(&chroma_dc_total_zeros_vlc[i],
1942 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
0da71265 1943 &chroma_dc_total_zeros_len [i][0], 1, 1,
910e3668
AC
1944 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1945 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1946 }
1947 for(i=0; i<15; i++){
910e3668
AC
1948 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1949 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1950 init_vlc(&total_zeros_vlc[i],
1951 TOTAL_ZEROS_VLC_BITS, 16,
0da71265 1952 &total_zeros_len [i][0], 1, 1,
910e3668
AC
1953 &total_zeros_bits[i][0], 1, 1,
1954 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1955 }
1956
1957 for(i=0; i<6; i++){
910e3668
AC
1958 run_vlc[i].table = run_vlc_tables[i];
1959 run_vlc[i].table_allocated = run_vlc_tables_size;
1960 init_vlc(&run_vlc[i],
1961 RUN_VLC_BITS, 7,
0da71265 1962 &run_len [i][0], 1, 1,
910e3668
AC
1963 &run_bits[i][0], 1, 1,
1964 INIT_VLC_USE_NEW_STATIC);
0da71265 1965 }
910e3668
AC
1966 run7_vlc.table = run7_vlc_table,
1967 run7_vlc.table_allocated = run7_vlc_table_size;
115329f1 1968 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
0da71265 1969 &run_len [6][0], 1, 1,
910e3668
AC
1970 &run_bits[6][0], 1, 1,
1971 INIT_VLC_USE_NEW_STATIC);
8140955d
MN
1972
1973 init_cavlc_level_tab();
0da71265
MN
1974 }
1975}
1976
0da71265 1977static void free_tables(H264Context *h){
7978debd 1978 int i;
afebe2f7 1979 H264Context *hx;
0da71265 1980 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
1981 av_freep(&h->chroma_pred_mode_table);
1982 av_freep(&h->cbp_table);
9e528114
LA
1983 av_freep(&h->mvd_table[0]);
1984 av_freep(&h->mvd_table[1]);
5ad984c9 1985 av_freep(&h->direct_table);
0da71265
MN
1986 av_freep(&h->non_zero_count);
1987 av_freep(&h->slice_table_base);
1988 h->slice_table= NULL;
e5017ab8 1989
0da71265
MN
1990 av_freep(&h->mb2b_xy);
1991 av_freep(&h->mb2b8_xy);
9f2d1b4f 1992
afebe2f7
1993 for(i = 0; i < h->s.avctx->thread_count; i++) {
1994 hx = h->thread_context[i];
1995 if(!hx) continue;
1996 av_freep(&hx->top_borders[1]);
1997 av_freep(&hx->top_borders[0]);
1998 av_freep(&hx->s.obmc_scratchpad);
afebe2f7 1999 }
0da71265
MN
2000}
2001
239ea04c
LM
2002static void init_dequant8_coeff_table(H264Context *h){
2003 int i,q,x;
548a1c8a 2004 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
2005 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2006 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2007
2008 for(i=0; i<2; i++ ){
2009 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2010 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2011 break;
2012 }
2013
2014 for(q=0; q<52; q++){
d9ec210b
DP
2015 int shift = div6[q];
2016 int idx = rem6[q];
239ea04c 2017 for(x=0; x<64; x++)
548a1c8a
LM
2018 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2019 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2020 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
2021 }
2022 }
2023}
2024
2025static void init_dequant4_coeff_table(H264Context *h){
2026 int i,j,q,x;
ab2e3e2c 2027 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
2028 for(i=0; i<6; i++ ){
2029 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2030 for(j=0; j<i; j++){
2031 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2032 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2033 break;
2034 }
2035 }
2036 if(j<i)
2037 continue;
2038
2039 for(q=0; q<52; q++){
d9ec210b
DP
2040 int shift = div6[q] + 2;
2041 int idx = rem6[q];
239ea04c 2042 for(x=0; x<16; x++)
ab2e3e2c
LM
2043 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2044 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
2045 h->pps.scaling_matrix4[i][x]) << shift;
2046 }
2047 }
2048}
2049
2050static void init_dequant_tables(H264Context *h){
2051 int i,x;
2052 init_dequant4_coeff_table(h);
2053 if(h->pps.transform_8x8_mode)
2054 init_dequant8_coeff_table(h);
2055 if(h->sps.transform_bypass){
2056 for(i=0; i<6; i++)
2057 for(x=0; x<16; x++)
2058 h->dequant4_coeff[i][0][x] = 1<<6;
2059 if(h->pps.transform_8x8_mode)
2060 for(i=0; i<2; i++)
2061 for(x=0; x<64; x++)
2062 h->dequant8_coeff[i][0][x] = 1<<6;
2063 }
2064}
2065
2066
0da71265
MN
2067/**
2068 * allocates tables.
3b66c4c5 2069 * needs width/height
0da71265
MN
2070 */
2071static int alloc_tables(H264Context *h){
2072 MpegEncContext * const s = &h->s;
7bc9090a 2073 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 2074 int x,y;
0da71265
MN
2075
2076 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
e5017ab8 2077
53c05b1e 2078 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
b735aeea 2079 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
5d0e4cb8 2080 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
0da71265 2081
7526ade2
MN
2082 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2083 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2084 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2085 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
e5017ab8 2086
b735aeea 2087 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 2088 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 2089
a55f20bd
LM
2090 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2091 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
0da71265
MN
2092 for(y=0; y<s->mb_height; y++){
2093 for(x=0; x<s->mb_width; x++){
7bc9090a 2094 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
2095 const int b_xy = 4*x + 4*y*h->b_stride;
2096 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 2097
0da71265
MN
2098 h->mb2b_xy [mb_xy]= b_xy;
2099 h->mb2b8_xy[mb_xy]= b8_xy;
2100 }
2101 }
9f2d1b4f 2102
9c6221ae
GV
2103 s->obmc_scratchpad = NULL;
2104
56edbd81
LM
2105 if(!h->dequant4_coeff[0])
2106 init_dequant_tables(h);
2107
0da71265
MN
2108 return 0;
2109fail:
2110 free_tables(h);
2111 return -1;
2112}
2113
afebe2f7
2114/**
2115 * Mimic alloc_tables(), but for every context thread.
2116 */
2117static void clone_tables(H264Context *dst, H264Context *src){
2118 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2119 dst->non_zero_count = src->non_zero_count;
2120 dst->slice_table = src->slice_table;
2121 dst->cbp_table = src->cbp_table;
2122 dst->mb2b_xy = src->mb2b_xy;
2123 dst->mb2b8_xy = src->mb2b8_xy;
2124 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2125 dst->mvd_table[0] = src->mvd_table[0];
2126 dst->mvd_table[1] = src->mvd_table[1];
2127 dst->direct_table = src->direct_table;
2128
afebe2f7
2129 dst->s.obmc_scratchpad = NULL;
2130 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
2131}
2132
2133/**
2134 * Init context
2135 * Allocate buffers which are not shared amongst multiple threads.
2136 */
2137static int context_init(H264Context *h){
afebe2f7
2138 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2139 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2140
afebe2f7
2141 return 0;
2142fail:
2143 return -1; // free_tables will clean up for us
2144}
2145
98a6fff9 2146static av_cold void common_init(H264Context *h){
0da71265 2147 MpegEncContext * const s = &h->s;
0da71265
MN
2148
2149 s->width = s->avctx->width;
2150 s->height = s->avctx->height;
2151 s->codec_id= s->avctx->codec->id;
115329f1 2152
c92a30bb 2153 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 2154
239ea04c 2155 h->dequant_coeff_pps= -1;
9a41c2c7 2156 s->unrestricted_mv=1;
0da71265 2157 s->decode=1; //FIXME
56edbd81 2158
a5805aa9
MN
2159 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2160
56edbd81
LM
2161 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2162 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
2163}
2164
98a6fff9 2165static av_cold int decode_init(AVCodecContext *avctx){
0da71265
MN
2166 H264Context *h= avctx->priv_data;
2167 MpegEncContext * const s = &h->s;
2168
3edcacde 2169 MPV_decode_defaults(s);
115329f1 2170
0da71265
MN
2171 s->avctx = avctx;
2172 common_init(h);
2173
2174 s->out_format = FMT_H264;
2175 s->workaround_bugs= avctx->workaround_bugs;
2176
2177 // set defaults
0da71265 2178// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 2179 s->quarter_sample = 1;
0da71265 2180 s->low_delay= 1;
7a9dba3c
MN
2181
2182 if(avctx->codec_id == CODEC_ID_SVQ3)
2183 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2184 else
1d42f410 2185 avctx->pix_fmt= PIX_FMT_YUV420P;
0da71265 2186
c2212338 2187 decode_init_vlc();
115329f1 2188
26165f99
MR
2189 if(avctx->extradata_size > 0 && avctx->extradata &&
2190 *(char *)avctx->extradata == 1){
4770b1b4
RT
2191 h->is_avc = 1;
2192 h->got_avcC = 0;
26165f99
MR
2193 } else {
2194 h->is_avc = 0;
4770b1b4
RT
2195 }
2196
afebe2f7 2197 h->thread_context[0] = h;
18c7be65 2198 h->outputed_poc = INT_MIN;
e4b8f1fa 2199 h->prev_poc_msb= 1<<16;
0da71265
MN
2200 return 0;
2201}
2202
af8aa846 2203static int frame_start(H264Context *h){
0da71265
MN
2204 MpegEncContext * const s = &h->s;
2205 int i;
2206
af8aa846
MN
2207 if(MPV_frame_start(s, s->avctx) < 0)
2208 return -1;
0da71265 2209 ff_er_frame_start(s);
3a22d7fa
JD
2210 /*
2211 * MPV_frame_start uses pict_type to derive key_frame.
2212 * This is incorrect for H.264; IDR markings must be used.
1412060e 2213 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
2214 * See decode_nal_units().
2215 */
2216 s->current_picture_ptr->key_frame= 0;
0da71265
MN
2217
2218 assert(s->linesize && s->uvlinesize);
2219
2220 for(i=0; i<16; i++){
2221 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 2222 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2223 }
2224 for(i=0; i<4; i++){
2225 h->block_offset[16+i]=
2226 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
2227 h->block_offset[24+16+i]=
2228 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2229 }
2230
934b0821
LM
2231 /* can't be in alloc_tables because linesize isn't known there.
2232 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
2233 for(i = 0; i < s->avctx->thread_count; i++)
2234 if(!h->thread_context[i]->s.obmc_scratchpad)
2235 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
2236
2237 /* some macroblocks will be accessed before they're available */
afebe2f7 2238 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 2239 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 2240
0da71265 2241// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 2242
1412060e 2243 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
2244 // that if we break out due to an error it can be released automatically
2245 // in the next MPV_frame_start().
2246 // SVQ3 as well as most other codecs have only last/next/current and thus
2247 // get released even with set reference, besides SVQ3 and others do not
2248 // mark frames as reference later "naturally".
2249 if(s->codec_id != CODEC_ID_SVQ3)
2250 s->current_picture_ptr->reference= 0;
357282c6
MN
2251
2252 s->current_picture_ptr->field_poc[0]=
2253 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 2254 assert(s->current_picture_ptr->long_ref==0);
357282c6 2255
af8aa846 2256 return 0;
0da71265
MN
2257}
2258
93cc10fa 2259static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e
MN
2260 MpegEncContext * const s = &h->s;
2261 int i;
5f7f9719
MN
2262 int step = 1;
2263 int offset = 1;
2264 int uvoffset= 1;
2265 int top_idx = 1;
2266 int skiplast= 0;
115329f1 2267
53c05b1e
MN
2268 src_y -= linesize;
2269 src_cb -= uvlinesize;
2270 src_cr -= uvlinesize;
2271
5f7f9719
MN
2272 if(!simple && FRAME_MBAFF){
2273 if(s->mb_y&1){
2274 offset = MB_MBAFF ? 1 : 17;
2275 uvoffset= MB_MBAFF ? 1 : 9;
2276 if(!MB_MBAFF){
2277 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2278 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2279 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2280 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2281 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2282 }
2283 }
2284 }else{
2285 if(!MB_MBAFF){
2286 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2287 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2288 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2289 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2290 }
2291 skiplast= 1;
2292 }
2293 offset =
2294 uvoffset=
2295 top_idx = MB_MBAFF ? 0 : 1;
2296 }
2297 step= MB_MBAFF ? 2 : 1;
2298 }
2299
3b66c4c5 2300 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 2301 // and the line above the bottom macroblock
5f7f9719
MN
2302 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2303 for(i=1; i<17 - skiplast; i++){
2304 h->left_border[offset+i*step]= src_y[15+i* linesize];
53c05b1e 2305 }
115329f1 2306
5f7f9719
MN
2307 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2308 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
53c05b1e 2309
87352549 2310 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2311 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2312 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2313 for(i=1; i<9 - skiplast; i++){
2314 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2315 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
53c05b1e 2316 }
5f7f9719
MN
2317 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2318 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
53c05b1e
MN
2319 }
2320}
2321
93cc10fa 2322static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
2323 MpegEncContext * const s = &h->s;
2324 int temp8, i;
2325 uint64_t temp64;
b69378e2
2326 int deblock_left;
2327 int deblock_top;
2328 int mb_xy;
5f7f9719
MN
2329 int step = 1;
2330 int offset = 1;
2331 int uvoffset= 1;
2332 int top_idx = 1;
2333
2334 if(!simple && FRAME_MBAFF){
2335 if(s->mb_y&1){
2336 offset = MB_MBAFF ? 1 : 17;
2337 uvoffset= MB_MBAFF ? 1 : 9;
2338 }else{
2339 offset =
2340 uvoffset=
2341 top_idx = MB_MBAFF ? 0 : 1;
2342 }
2343 step= MB_MBAFF ? 2 : 1;
2344 }
b69378e2
2345
2346 if(h->deblocking_filter == 2) {
64514ee8 2347 mb_xy = h->mb_xy;
b69378e2
2348 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2349 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2350 } else {
2351 deblock_left = (s->mb_x > 0);
6c805007 2352 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 2353 }
53c05b1e
MN
2354
2355 src_y -= linesize + 1;
2356 src_cb -= uvlinesize + 1;
2357 src_cr -= uvlinesize + 1;
2358
2359#define XCHG(a,b,t,xchg)\
2360t= a;\
2361if(xchg)\
2362 a= b;\
2363b= t;
d89dc06a
LM
2364
2365 if(deblock_left){
5f7f9719
MN
2366 for(i = !deblock_top; i<16; i++){
2367 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
d89dc06a 2368 }
5f7f9719 2369 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
d89dc06a
LM
2370 }
2371
2372 if(deblock_top){
5f7f9719
MN
2373 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2374 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
cad4368a 2375 if(s->mb_x+1 < s->mb_width){
5f7f9719 2376 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
43efd19a 2377 }
53c05b1e 2378 }
53c05b1e 2379
87352549 2380 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 2381 if(deblock_left){
5f7f9719
MN
2382 for(i = !deblock_top; i<8; i++){
2383 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2384 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
d89dc06a 2385 }
5f7f9719
MN
2386 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2387 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
d89dc06a
LM
2388 }
2389 if(deblock_top){
5f7f9719
MN
2390 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2391 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
53c05b1e 2392 }
53c05b1e
MN
2393 }
2394}
2395
5a6a6cc7 2396static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
2397 MpegEncContext * const s = &h->s;
2398 const int mb_x= s->mb_x;
2399 const int mb_y= s->mb_y;
64514ee8 2400 const int mb_xy= h->mb_xy;
0da71265
MN
2401 const int mb_type= s->current_picture.mb_type[mb_xy];
2402 uint8_t *dest_y, *dest_cb, *dest_cr;
2403 int linesize, uvlinesize /*dct_offset*/;
2404 int i;
6867a90b 2405 int *block_offset = &h->block_offset[0];
41e4055b
MN
2406 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2407 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
36940eca 2408 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 2409 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 2410
6120a343
MN
2411 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2412 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2413 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 2414
a957c27b
LM
2415 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2416 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2417
bd91fee3 2418 if (!simple && MB_FIELD) {
5d18eaad
LM
2419 linesize = h->mb_linesize = s->linesize * 2;
2420 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 2421 block_offset = &h->block_offset[24];
1412060e 2422 if(mb_y&1){ //FIXME move out of this function?
0da71265 2423 dest_y -= s->linesize*15;
6867a90b
LLL
2424 dest_cb-= s->uvlinesize*7;
2425 dest_cr-= s->uvlinesize*7;
0da71265 2426 }
5d18eaad
LM
2427 if(FRAME_MBAFF) {
2428 int list;
3425501d 2429 for(list=0; list<h->list_count; list++){
5d18eaad
LM
2430 if(!USES_LIST(mb_type, list))
2431 continue;
2432 if(IS_16X16(mb_type)){
2433 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 2434 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
2435 }else{
2436 for(i=0; i<16; i+=4){
5d18eaad
LM
2437 int ref = h->ref_cache[list][scan8[i]];
2438 if(ref >= 0)
1710856c 2439 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
2440 }
2441 }
2442 }
2443 }
0da71265 2444 } else {
5d18eaad
LM
2445 linesize = h->mb_linesize = s->linesize;
2446 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
2447// dct_offset = s->linesize * 16;
2448 }
115329f1 2449
bd91fee3 2450 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
2451 for (i=0; i<16; i++) {
2452 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 2453 }
c1708e8d
MN
2454 for (i=0; i<8; i++) {
2455 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2456 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 2457 }
e7e09b49
LLL
2458 } else {
2459 if(IS_INTRA(mb_type)){
5f7f9719 2460 if(h->deblocking_filter)
93cc10fa 2461 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 2462
87352549 2463 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
2464 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2465 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 2466 }
0da71265 2467
e7e09b49 2468 if(IS_INTRA4x4(mb_type)){
bd91fee3 2469 if(simple || !s->encoding){
43efd19a 2470 if(IS_8x8DCT(mb_type)){
1eb96035
MN
2471 if(transform_bypass){
2472 idct_dc_add =
2473 idct_add = s->dsp.add_pixels8;
dae006d7 2474 }else{
1eb96035
MN
2475 idct_dc_add = s->dsp.h264_idct8_dc_add;
2476 idct_add = s->dsp.h264_idct8_add;
2477 }
43efd19a
LM
2478 for(i=0; i<16; i+=4){
2479 uint8_t * const ptr= dest_y + block_offset[i];
2480 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
2481 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2482 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2483 }else{
ac0623b2
MN
2484 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2485 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2486 (h->topright_samples_available<<i)&0x4000, linesize);
2487 if(nnz){
2488 if(nnz == 1 && h->mb[i*16])
2489 idct_dc_add(ptr, h->mb + i*16, linesize);
2490 else
2491 idct_add (ptr, h->mb + i*16, linesize);
2492 }
41e4055b 2493 }
43efd19a 2494 }
1eb96035
MN
2495 }else{
2496 if(transform_bypass){
2497 idct_dc_add =
2498 idct_add = s->dsp.add_pixels4;
2499 }else{
2500 idct_dc_add = s->dsp.h264_idct_dc_add;
2501 idct_add = s->dsp.h264_idct_add;
2502 }
aebb5d6d
MN
2503 for(i=0; i<16; i++){
2504 uint8_t * const ptr= dest_y + block_offset[i];
2505 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 2506
aebb5d6d
MN
2507 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2508 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2509 }else{
2510 uint8_t *topright;
2511 int nnz, tr;
2512 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2513 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2514 assert(mb_y || linesize <= block_offset[i]);
2515 if(!topright_avail){
2516 tr= ptr[3 - linesize]*0x01010101;
2517 topright= (uint8_t*) &tr;
2518 }else
2519 topright= ptr + 4 - linesize;
ac0623b2 2520 }else
aebb5d6d
MN
2521 topright= NULL;
2522
2523 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2524 nnz = h->non_zero_count_cache[ scan8[i] ];
2525 if(nnz){
2526 if(is_h264){
2527 if(nnz == 1 && h->mb[i*16])
2528 idct_dc_add(ptr, h->mb + i*16, linesize);
2529 else
2530 idct_add (ptr, h->mb + i*16, linesize);
2531 }else
2532 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2533 }
ac0623b2 2534 }
41e4055b 2535 }
8b82a956 2536 }
0da71265 2537 }
e7e09b49 2538 }else{
c92a30bb 2539 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 2540 if(is_h264){
36940eca 2541 if(!transform_bypass)
93f0c0a4 2542 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 2543 }else
e7e09b49 2544 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 2545 }
5f7f9719 2546 if(h->deblocking_filter)
93cc10fa 2547 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 2548 }else if(is_h264){
e7e09b49 2549 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
2550 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2551 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 2552 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 2553 }
e7e09b49
LLL
2554
2555
2556 if(!IS_INTRA4x4(mb_type)){
bd91fee3 2557 if(is_h264){
ef9d1d15 2558 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
2559 if(transform_bypass){
2560 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
2561 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2562 }else{
2563 for(i=0; i<16; i++){
2564 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 2565 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2566 }
2fd1f0e0
MN
2567 }
2568 }else{
2569 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 2570 }
49c084a7 2571 }else if(h->cbp&15){
2fd1f0e0 2572 if(transform_bypass){
0a8ca22f 2573 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 2574 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 2575 for(i=0; i<16; i+=di){
62bc966f 2576 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 2577 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2578 }
ef9d1d15 2579 }
2fd1f0e0
MN
2580 }else{
2581 if(IS_8x8DCT(mb_type)){
2582 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2583 }else{
2584 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2585 }
2586 }
4704097a 2587 }
e7e09b49
LLL
2588 }else{
2589 for(i=0; i<16; i++){
2590 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 2591 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2592 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2593 }
4704097a 2594 }
0da71265
MN
2595 }
2596 }
0da71265 2597
621561cd 2598 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
2599 uint8_t *dest[2] = {dest_cb, dest_cr};
2600 if(transform_bypass){
96465b90
MN
2601 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2602 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2603 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2604 }else{
c25ac15a 2605 idct_add = s->dsp.add_pixels4;
96465b90
MN
2606 for(i=16; i<16+8; i++){
2607 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2608 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2609 }
2610 }
ef9d1d15 2611 }else{
4691a77d
2612 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2613 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 2614 if(is_h264){
c25ac15a
MN
2615 idct_add = s->dsp.h264_idct_add;
2616 idct_dc_add = s->dsp.h264_idct_dc_add;
ac0623b2
MN
2617 for(i=16; i<16+8; i++){
2618 if(h->non_zero_count_cache[ scan8[i] ])
2619 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2620 else if(h->mb[i*16])
2621 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2622 }
aebb5d6d
MN
2623 }else{
2624 for(i=16; i<16+8; i++){
2625 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2626 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2627 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2628 }
e7e09b49 2629 }
4704097a 2630 }
0da71265
MN
2631 }
2632 }
2633 }
c212fb0c
MN
2634 if(h->cbp || IS_INTRA(mb_type))
2635 s->dsp.clear_blocks(h->mb);
2636
53c05b1e 2637 if(h->deblocking_filter) {
5f7f9719
MN
2638 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2639 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2640 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2641 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
bd91fee3 2642 if (!simple && FRAME_MBAFF) {
5f7f9719 2643 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2644 } else {
3e20143e 2645 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2646 }
53c05b1e 2647 }
0da71265
MN
2648}
2649
0da71265 2650/**
bd91fee3
AS
2651 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2652 */
2653static void hl_decode_mb_simple(H264Context *h){
2654 hl_decode_mb_internal(h, 1);
2655}
2656
2657/**
2658 * Process a macroblock; this handles edge cases, such as interlacing.
2659 */
2660static void av_noinline hl_decode_mb_complex(H264Context *h){
2661 hl_decode_mb_internal(h, 0);
2662}
2663
2664static void hl_decode_mb(H264Context *h){
2665 MpegEncContext * const s = &h->s;
64514ee8 2666 const int mb_xy= h->mb_xy;
bd91fee3 2667 const int mb_type= s->current_picture.mb_type[mb_xy];
1dd488e9 2668 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 2669
fedec603 2670 if(ENABLE_H264_ENCODER && !s->decode)
bd91fee3
AS
2671 return;
2672
2673 if (is_complex)
2674 hl_decode_mb_complex(h);
2675 else hl_decode_mb_simple(h);
2676}
2677
2143b118 2678static void pic_as_field(Picture *pic, const int parity){
11cc1d8c
JD
2679 int i;
2680 for (i = 0; i < 4; ++i) {
2143b118 2681 if (parity == PICT_BOTTOM_FIELD)
11cc1d8c 2682 pic->data[i] += pic->linesize[i];
2143b118 2683 pic->reference = parity;
11cc1d8c
JD
2684 pic->linesize[i] *= 2;
2685 }
2879c75f 2686 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
11cc1d8c
JD
2687}
2688
2689static int split_field_copy(Picture *dest, Picture *src,
2690 int parity, int id_add){
2691 int match = !!(src->reference & parity);
2692
2693 if (match) {
2694 *dest = *src;
d4f7d838 2695 if(parity != PICT_FRAME){
b3e93fd4
MN
2696 pic_as_field(dest, parity);
2697 dest->pic_id *= 2;
2698 dest->pic_id += id_add;
d4f7d838 2699 }
11cc1d8c
JD
2700 }
2701
2702 return match;
2703}
2704
d4f7d838
MN
2705static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2706 int i[2]={0};
2707 int index=0;
11cc1d8c 2708
d4f7d838
MN
2709 while(i[0]<len || i[1]<len){
2710 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2711 i[0]++;
2712 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2713 i[1]++;
2714 if(i[0] < len){
2715 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2716 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2717 }
2718 if(i[1] < len){
2719 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2720 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
11cc1d8c
JD
2721 }
2722 }
2723
d4f7d838 2724 return index;
11cc1d8c
JD
2725}
2726
d4f7d838
MN
2727static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2728 int i, best_poc;
2729 int out_i= 0;
11cc1d8c 2730
d4f7d838
MN
2731 for(;;){
2732 best_poc= dir ? INT_MIN : INT_MAX;
11cc1d8c 2733
d4f7d838
MN
2734 for(i=0; i<len; i++){
2735 const int poc= src[i]->poc;
2736 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2737 best_poc= poc;
2738 sorted[out_i]= src[i];
2739 }
2740 }
2741 if(best_poc == (dir ? INT_MIN : INT_MAX))
2742 break;
2743 limit= sorted[out_i++]->poc - dir;
2744 }
2745 return out_i;
11cc1d8c
JD
2746}
2747
bd91fee3 2748/**
0da71265
MN
2749 * fills the default_ref_list.
2750 */
2751static int fill_default_ref_list(H264Context *h){
2752 MpegEncContext * const s = &h->s;
d4f7d838 2753 int i, len;
115329f1 2754
9f5c1037 2755 if(h->slice_type_nos==FF_B_TYPE){
d4f7d838
MN
2756 Picture *sorted[32];
2757 int cur_poc, list;
2758 int lens[2];
11cc1d8c 2759
d4f7d838
MN
2760 if(FIELD_PICTURE)
2761 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2762 else
2763 cur_poc= s->current_picture_ptr->poc;
086acdd5 2764
d4f7d838
MN
2765 for(list= 0; list<2; list++){
2766 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2767 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2768 assert(len<=32);
2769 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2770 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2771 assert(len<=32);
086acdd5 2772
d4f7d838
MN
2773 if(len < h->ref_count[list])
2774 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2775 lens[list]= len;
086acdd5
JD
2776 }
2777
d4f7d838
MN
2778 if(lens[0] == lens[1] && lens[1] > 1){
2779 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2780 if(i == lens[0])
2781 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
086acdd5 2782 }
086acdd5 2783 }else{
d4f7d838
MN
2784 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2785 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2786 assert(len <= 32);
2787 if(len < h->ref_count[0])
2788 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
0da71265 2789 }
827c91bf
LLL
2790#ifdef TRACE
2791 for (i=0; i<h->ref_count[0]; i++) {
a9c9a240 2792 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
827c91bf 2793 }
9f5c1037 2794 if(h->slice_type_nos==FF_B_TYPE){
827c91bf 2795 for (i=0; i<h->ref_count[1]; i++) {
ffbc5e04 2796 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
827c91bf
LLL
2797 }
2798 }
2799#endif
0da71265
MN
2800 return 0;
2801}
2802
827c91bf
LLL
2803static void print_short_term(H264Context *h);
2804static void print_long_term(H264Context *h);
2805
949da388
JD
2806/**
2807 * Extract structure information about the picture described by pic_num in
2808 * the current decoding context (frame or field). Note that pic_num is
2809 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2810 * @param pic_num picture number for which to extract structure information
2811 * @param structure one of PICT_XXX describing structure of picture
2812 * with pic_num
2813 * @return frame number (short term) or long term index of picture
2814 * described by pic_num
2815 */
2816static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2817 MpegEncContext * const s = &h->s;
2818
2819 *structure = s->picture_structure;
2820 if(FIELD_PICTURE){
2821 if (!(pic_num & 1))
2822 /* opposite field */
2823 *structure ^= PICT_FRAME;
2824 pic_num >>= 1;
2825 }
2826
2827 return pic_num;
2828}
2829
0da71265
MN
2830static int decode_ref_pic_list_reordering(H264Context *h){
2831 MpegEncContext * const s = &h->s;
949da388 2832 int list, index, pic_structure;
115329f1 2833
827c91bf
LLL
2834 print_short_term(h);
2835 print_long_term(h);
115329f1 2836
3425501d 2837 for(list=0; list<h->list_count; list++){
0da71265
MN
2838 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2839
2840 if(get_bits1(&s->gb)){
2841 int pred= h->curr_pic_num;
0da71265
MN
2842
2843 for(index=0; ; index++){
88e7a4d1
MN
2844 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2845 unsigned int pic_id;
0da71265 2846 int i;
2f944356 2847 Picture *ref = NULL;
115329f1
DB
2848
2849 if(reordering_of_pic_nums_idc==3)
0bc42cad 2850 break;
115329f1 2851
0da71265 2852 if(index >= h->ref_count[list]){
9b879566 2853 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
0da71265
MN
2854 return -1;
2855 }
115329f1 2856
0da71265
MN
2857 if(reordering_of_pic_nums_idc<3){
2858 if(reordering_of_pic_nums_idc<2){
88e7a4d1 2859 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
949da388 2860 int frame_num;
0da71265 2861
03d3cab8 2862 if(abs_diff_pic_num > h->max_pic_num){
9b879566 2863 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
0da71265
MN
2864 return -1;
2865 }
2866
2867 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2868 else pred+= abs_diff_pic_num;
2869 pred &= h->max_pic_num - 1;
115329f1 2870
949da388
JD
2871 frame_num = pic_num_extract(h, pred, &pic_structure);
2872
0d175622
MN
2873 for(i= h->short_ref_count-1; i>=0; i--){
2874 ref = h->short_ref[i];
949da388 2875 assert(ref->reference);
0d175622 2876 assert(!ref->long_ref);
6edac8e1 2877 if(
af8c5e08
MN
2878 ref->frame_num == frame_num &&
2879 (ref->reference & pic_structure)
6edac8e1 2880 )
0da71265
MN
2881 break;
2882 }
0d175622 2883 if(i>=0)
949da388 2884 ref->pic_id= pred;
0da71265 2885 }else{
949da388 2886 int long_idx;
0da71265 2887 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
949da388
JD
2888
2889 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2890
2891 if(long_idx>31){
88e7a4d1
MN
2892 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2893 return -1;
2894 }
949da388
JD
2895 ref = h->long_ref[long_idx];
2896 assert(!(ref && !ref->reference));
af8c5e08 2897 if(ref && (ref->reference & pic_structure)){
ac658be5 2898 ref->pic_id= pic_id;
ac658be5
FOL
2899 assert(ref->long_ref);
2900 i=0;
2901 }else{
2902 i=-1;
2903 }
0da71265
MN
2904 }
2905
0d315f28 2906 if (i < 0) {
9b879566 2907 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
0da71265 2908 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
0d175622
MN
2909 } else {
2910 for(i=index; i+1<h->ref_count[list]; i++){
2911 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2912 break;
21be92bf
MN
2913 }
2914 for(; i > index; i--){
2915 h->ref_list[list][i]= h->ref_list[list][i-1];
2916 }
0d175622 2917 h->ref_list[list][index]= *ref;
949da388 2918 if (FIELD_PICTURE){
2143b118 2919 pic_as_field(&h->ref_list[list][index], pic_structure);
949da388 2920 }
0da71265 2921 }
0bc42cad 2922 }else{
9b879566 2923 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
0da71265
MN
2924 return -1;
2925 }
2926 }
2927 }
0da71265 2928 }
3425501d 2929 for(list=0; list<h->list_count; list++){
6ab87211 2930 for(index= 0; index < h->ref_count[list]; index++){
79b5c776
MN
2931 if(!h->ref_list[list][index].data[0]){
2932 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2933 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2934 }
6ab87211 2935 }
6ab87211 2936 }
115329f1 2937
115329f1 2938 return 0;
0da71265
MN
2939}
2940
91c58c94 2941static void fill_mbaff_ref_list(H264Context *h){
5d18eaad 2942 int list, i, j;
3425501d 2943 for(list=0; list<2; list++){ //FIXME try list_count
5d18eaad
LM
2944 for(i=0; i<h->ref_count[list]; i++){
2945 Picture *frame = &h->ref_list[list][i];
2946 Picture *field = &h->ref_list[list][16+2*i];
2947 field[0] = *frame;
2948 for(j=0; j<3; j++)
2949 field[0].linesize[j] <<= 1;
2143b118 2950 field[0].reference = PICT_TOP_FIELD;
078f42dd 2951 field[0].poc= field[0].field_poc[0];
5d18eaad
LM
2952 field[1] = field[0];
2953 for(j=0; j<3; j++)
2954 field[1].data[j] += frame->linesize[j];
2143b118 2955 field[1].reference = PICT_BOTTOM_FIELD;
078f42dd 2956 field[1].poc= field[1].field_poc[1];
5d18eaad
LM
2957
2958 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2959 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2960 for(j=0; j<2; j++){
2961 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2962 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2963 }
2964 }
2965 }
2966 for(j=0; j<h->ref_count[1]; j++){
2967 for(i=0; i<h->ref_count[0]; i++)
2968 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2969 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2970 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2971 }
2972}
2973
0da71265
MN
2974static int pred_weight_table(H264Context *h){
2975 MpegEncContext * const s = &h->s;
2976 int list, i;
9f2d1b4f 2977 int luma_def, chroma_def;
115329f1 2978
9f2d1b4f
LM
2979 h->use_weight= 0;
2980 h->use_weight_chroma= 0;
0da71265
MN
2981 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2982 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
2983 luma_def = 1<<h->luma_log2_weight_denom;
2984 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
2985
2986 for(list=0; list<2; list++){
2987 for(i=0; i<h->ref_count[list]; i++){
2988 int luma_weight_flag, chroma_weight_flag;
115329f1 2989
0da71265
MN
2990 luma_weight_flag= get_bits1(&s->gb);
2991 if(luma_weight_flag){
2992 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2993 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f
LM
2994 if( h->luma_weight[list][i] != luma_def
2995 || h->luma_offset[list][i] != 0)
2996 h->use_weight= 1;
2997 }else{
2998 h->luma_weight[list][i]= luma_def;
2999 h->luma_offset[list][i]= 0;
0da71265
MN
3000 }
3001
0af6967e 3002 if(CHROMA){
fef744d4
MN
3003 chroma_weight_flag= get_bits1(&s->gb);
3004 if(chroma_weight_flag){
3005 int j;
3006 for(j=0; j<2; j++){
3007 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3008 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3009 if( h->chroma_weight[list][i][j] != chroma_def
3010 || h->chroma_offset[list][i][j] != 0)
3011 h->use_weight_chroma= 1;
3012 }
3013 }else{
3014 int j;
3015 for(j=0; j<2; j++){
3016 h->chroma_weight[list][i][j]= chroma_def;
3017 h->chroma_offset[list][i][j]= 0;
3018 }
0da71265
MN
3019 }
3020 }
3021 }
9f5c1037 3022 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 3023 }
9f2d1b4f 3024 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
3025 return 0;
3026}
3027
9f2d1b4f
LM
3028static void implicit_weight_table(H264Context *h){
3029 MpegEncContext * const s = &h->s;
9f2d1b4f
LM
3030 int ref0, ref1;
3031 int cur_poc = s->current_picture_ptr->poc;
3032
3033 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3034 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3035 h->use_weight= 0;
3036 h->use_weight_chroma= 0;
3037 return;
3038 }
3039
3040 h->use_weight= 2;
3041 h->use_weight_chroma= 2;
3042 h->luma_log2_weight_denom= 5;
3043 h->chroma_log2_weight_denom= 5;
3044
9f2d1b4f
LM
3045 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3046 int poc0 = h->ref_list[0][ref0].poc;
3047 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 3048 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 3049 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 3050 if(td){
f66e4f5f 3051 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 3052 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 3053 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
3054 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3055 h->implicit_weight[ref0][ref1] = 32;
3056 else
3057 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3058 }else
3059 h->implicit_weight[ref0][ref1] = 32;
3060 }
3061 }
3062}
3063
8fd57a66
JD
3064/**
3065 * Mark a picture as no longer needed for reference. The refmask
3066 * argument allows unreferencing of individual fields or the whole frame.
3067 * If the picture becomes entirely unreferenced, but is being held for
3068 * display purposes, it is marked as such.
3069 * @param refmask mask of fields to unreference; the mask is bitwise
3070 * anded with the reference marking of pic
3071 * @return non-zero if pic becomes entirely unreferenced (except possibly
3072 * for display purposes) zero if one of the fields remains in
3073 * reference
3074 */
3075static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
4e4d983e 3076 int i;
8fd57a66
JD
3077 if (pic->reference &= refmask) {
3078 return 0;
3079 } else {
79f4494a
MN
3080 for(i = 0; h->delayed_pic[i]; i++)
3081 if(pic == h->delayed_pic[i]){
3082 pic->reference=DELAYED_PIC_REF;
3083 break;
3084 }
8fd57a66
JD
3085 return 1;
3086 }
4e4d983e
LM
3087}
3088
0da71265 3089/**
5175b937 3090 * instantaneous decoder refresh.
0da71265
MN
3091 */
3092static void idr(H264Context *h){
4e4d983e 3093 int i;
0da71265 3094
dc032f33 3095 for(i=0; i<16; i++){
9c0e4624 3096 remove_long(h, i, 0);
0da71265 3097 }
849b9cef 3098 assert(h->long_ref_count==0);
0da71265
MN
3099
3100 for(i=0; i<h->short_ref_count; i++){
8fd57a66 3101 unreference_pic(h, h->short_ref[i], 0);
0da71265
MN
3102 h->short_ref[i]= NULL;
3103 }
3104 h->short_ref_count=0;
a149c1a5 3105 h->prev_frame_num= 0;
80f8e035
MN
3106 h->prev_frame_num_offset= 0;
3107 h->prev_poc_msb=
3108 h->prev_poc_lsb= 0;
0da71265
MN
3109}
3110
7c33ad19
LM
3111/* forget old pics after a seek */
3112static void flush_dpb(AVCodecContext *avctx){
3113 H264Context *h= avctx->priv_data;
3114 int i;
64b9d48f 3115 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
3116 if(h->delayed_pic[i])
3117 h->delayed_pic[i]->reference= 0;
7c33ad19 3118 h->delayed_pic[i]= NULL;
285b570f 3119 }
df8a7dff 3120 h->outputed_poc= INT_MIN;
7c33ad19 3121 idr(h);
ca159196
MR
3122 if(h->s.current_picture_ptr)
3123 h->s.current_picture_ptr->reference= 0;
12d96de3 3124 h->s.first_field= 0;
e240f898 3125 ff_mpeg_flush(avctx);
7c33ad19
LM
3126}
3127
0da71265 3128/**
47e112f8
JD
3129 * Find a Picture in the short term reference list by frame number.
3130 * @param frame_num frame number to search for
3131 * @param idx the index into h->short_ref where returned picture is found
3132 * undefined if no picture found.
3133 * @return pointer to the found picture, or NULL if no pic with the provided
3134 * frame number is found
0da71265 3135 */
47e112f8 3136static Picture * find_short(H264Context *h, int frame_num, int *idx){
1924f3ce 3137 MpegEncContext * const s = &h->s;
0da71265 3138 int i;
115329f1 3139
0da71265
MN
3140 for(i=0; i<h->short_ref_count; i++){
3141 Picture *pic= h->short_ref[i];
1924f3ce 3142 if(s->avctx->debug&FF_DEBUG_MMCO)
9b879566 3143 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
47e112f8
JD
3144 if(pic->frame_num == frame_num) {
3145 *idx = i;
0da71265
MN
3146 return pic;
3147 }
3148 }
3149 return NULL;
3150}
3151
3152/**
47e112f8
JD
3153 * Remove a picture from the short term reference list by its index in
3154 * that list. This does no checking on the provided index; it is assumed
3155 * to be valid. Other list entries are shifted down.
3156 * @param i index into h->short_ref of picture to remove.
3157 */
3158static void remove_short_at_index(H264Context *h, int i){
e1f15d38 3159 assert(i >= 0 && i < h->short_ref_count);
47e112f8
JD
3160 h->short_ref[i]= NULL;
3161 if (--h->short_ref_count)
3162 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3163}
3164
3165/**
3166 *
3167 * @return the removed picture or NULL if an error occurs
3168 */
d9e32422 3169static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
47e112f8
JD
3170 MpegEncContext * const s = &h->s;
3171 Picture *pic;
3172 int i;
3173
3174 if(s->avctx->debug&FF_DEBUG_MMCO)
3175 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3176
3177 pic = find_short(h, frame_num, &i);
d9e32422
MN
3178 if (pic){
3179 if(unreference_pic(h, pic, ref_mask))
47e112f8 3180 remove_short_at_index(h, i);
d9e32422 3181 }
47e112f8
JD