100l, I broke H.264 again, forgot one hunk.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265
MN
22/**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
0da71265
MN
28#include "dsputil.h"
29#include "avcodec.h"
30#include "mpegvideo.h"
26b4fe82 31#include "h264.h"
0da71265 32#include "h264data.h"
26b4fe82 33#include "h264_parser.h"
0da71265 34#include "golomb.h"
626464fb 35#include "rectangle.h"
0da71265 36
e5017ab8 37#include "cabac.h"
52cb7981 38#ifdef ARCH_X86
a6493a8f 39#include "x86/h264_i386.h"
52cb7981 40#endif
e5017ab8 41
2848ce84 42//#undef NDEBUG
0da71265
MN
43#include <assert.h>
44
2ddcf84b
JD
45/**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
48 */
49#define DELAYED_PIC_REF 4
50
0da71265 51static VLC coeff_token_vlc[4];
910e3668
AC
52static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
54
0da71265 55static VLC chroma_dc_coeff_token_vlc;
910e3668
AC
56static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57static const int chroma_dc_coeff_token_vlc_table_size = 256;
0da71265
MN
58
59static VLC total_zeros_vlc[15];
910e3668
AC
60static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61static const int total_zeros_vlc_tables_size = 512;
62
0da71265 63static VLC chroma_dc_total_zeros_vlc[3];
910e3668
AC
64static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65static const int chroma_dc_total_zeros_vlc_tables_size = 8;
0da71265
MN
66
67static VLC run_vlc[6];
910e3668
AC
68static VLC_TYPE run_vlc_tables[6][8][2];
69static const int run_vlc_tables_size = 8;
70
0da71265 71static VLC run7_vlc;
910e3668
AC
72static VLC_TYPE run7_vlc_table[96][2];
73static const int run7_vlc_table_size = 96;
0da71265 74
8b82a956
MN
75static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
6ba71fc4 77static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
3e20143e 78static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
9c0e4624 79static Picture * remove_long(H264Context *h, int i, int ref_mask);
8b82a956 80
849f1035 81static av_always_inline uint32_t pack16to32(int a, int b){
377ec888
MN
82#ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
84#else
85 return (a&0xFFFF) + (b<<16);
86#endif
87}
88
d9ec210b 89static const uint8_t rem6[52]={
acd8d10f
PI
900, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
91};
92
d9ec210b 93static const uint8_t div6[52]={
acd8d10f
PI
940, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
95};
96
143d7f14
PK
97static const int left_block_options[4][8]={
98 {0,1,2,3,7,10,8,11},
99 {2,2,3,3,8,11,8,11},
100 {0,0,1,1,7,10,7,10},
101 {0,2,0,2,7,10,7,10}
102};
acd8d10f 103
70abb407 104static void fill_caches(H264Context *h, int mb_type, int for_deblock){
0da71265 105 MpegEncContext * const s = &h->s;
64514ee8 106 const int mb_xy= h->mb_xy;
0da71265
MN
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
cac55c91 109 const int * left_block;
02f7695b 110 int topleft_partition= -1;
0da71265
MN
111 int i;
112
36e097bc
JD
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
114
717b1733 115 //FIXME deblocking could skip the intra and nnz parts.
36e097bc 116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
e2e5894a
LM
117 return;
118
2cab6401
DB
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
115329f1 121
6867a90b
LLL
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
143d7f14 125 left_block = left_block_options[0];
5d18eaad 126 if(FRAME_MBAFF){
6867a90b
LLL
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
6f3c50f2
MN
131 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
6867a90b 136 const int bottom = (s->mb_y & 1);
6f3c50f2 137 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
60c6ba7a 138
6f3c50f2 139 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
6867a90b
LLL
140 top_xy -= s->mb_stride;
141 }
6f3c50f2 142 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
6867a90b 143 topleft_xy -= s->mb_stride;
6f3c50f2 144 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
02f7695b 145 topleft_xy += s->mb_stride;
1412060e 146 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
02f7695b 147 topleft_partition = 0;
6867a90b 148 }
6f3c50f2 149 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
6867a90b
LLL
150 topright_xy -= s->mb_stride;
151 }
6f3c50f2 152 if (left_mb_field_flag != curr_mb_field_flag) {
6867a90b 153 left_xy[1] = left_xy[0] = pair_xy - 1;
6f3c50f2
MN
154 if (curr_mb_field_flag) {
155 left_xy[1] += s->mb_stride;
156 left_block = left_block_options[3];
157 } else {
03a035e0 158 left_block= left_block_options[2 - bottom];
6867a90b
LLL
159 }
160 }
0da71265
MN
161 }
162
826de46e
LLL
163 h->top_mb_xy = top_xy;
164 h->left_mb_xy[0] = left_xy[0];
165 h->left_mb_xy[1] = left_xy[1];
6ba71fc4 166 if(for_deblock){
717b1733
LM
167 topleft_type = 0;
168 topright_type = 0;
b735aeea
MN
169 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
170 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
171 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
5d18eaad 172
e248cb60 173 if(MB_MBAFF && !IS_INTRA(mb_type)){
5d18eaad 174 int list;
3425501d 175 for(list=0; list<h->list_count; list++){
e248cb60
MN
176 //These values where changed for ease of performing MC, we need to change them back
177 //FIXME maybe we can make MC and loop filter use the same values or prevent
178 //the MC code from changing ref_cache and rather use a temporary array.
5d18eaad 179 if(USES_LIST(mb_type,list)){
191e8ca7 180 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
5d18eaad 181 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
beca9a28 182 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
183 ref += h->b8_stride;
184 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
beca9a28 185 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
186 }
187 }
188 }
46f2f05f
MN
189 }else{
190 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
191 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
192 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
193 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
194 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
0da71265
MN
195
196 if(IS_INTRA(mb_type)){
faa7e394 197 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
115329f1
DB
198 h->topleft_samples_available=
199 h->top_samples_available=
0da71265
MN
200 h->left_samples_available= 0xFFFF;
201 h->topright_samples_available= 0xEEEA;
202
faa7e394 203 if(!(top_type & type_mask)){
0da71265
MN
204 h->topleft_samples_available= 0xB3FF;
205 h->top_samples_available= 0x33FF;
206 h->topright_samples_available= 0x26EA;
207 }
d1d10e91
MN
208 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
209 if(IS_INTERLACED(mb_type)){
faa7e394 210 if(!(left_type[0] & type_mask)){
d1d10e91
MN
211 h->topleft_samples_available&= 0xDFFF;
212 h->left_samples_available&= 0x5FFF;
213 }
faa7e394 214 if(!(left_type[1] & type_mask)){
d1d10e91
MN
215 h->topleft_samples_available&= 0xFF5F;
216 h->left_samples_available&= 0xFF5F;
217 }
218 }else{
219 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
220 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
221 assert(left_xy[0] == left_xy[1]);
faa7e394 222 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
d1d10e91
MN
223 h->topleft_samples_available&= 0xDF5F;
224 h->left_samples_available&= 0x5F5F;
225 }
226 }
227 }else{
faa7e394 228 if(!(left_type[0] & type_mask)){
0da71265
MN
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
231 }
232 }
115329f1 233
faa7e394 234 if(!(topleft_type & type_mask))
0da71265 235 h->topleft_samples_available&= 0x7FFF;
115329f1 236
faa7e394 237 if(!(topright_type & type_mask))
0da71265 238 h->topright_samples_available&= 0xFBFF;
115329f1 239
0da71265
MN
240 if(IS_INTRA4x4(mb_type)){
241 if(IS_INTRA4x4(top_type)){
242 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
243 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
244 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
245 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
246 }else{
247 int pred;
faa7e394 248 if(!(top_type & type_mask))
0da71265 249 pred= -1;
6fbcaaa0
LLL
250 else{
251 pred= 2;
0da71265
MN
252 }
253 h->intra4x4_pred_mode_cache[4+8*0]=
254 h->intra4x4_pred_mode_cache[5+8*0]=
255 h->intra4x4_pred_mode_cache[6+8*0]=
256 h->intra4x4_pred_mode_cache[7+8*0]= pred;
257 }
258 for(i=0; i<2; i++){
259 if(IS_INTRA4x4(left_type[i])){
260 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
261 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
262 }else{
263 int pred;
faa7e394 264 if(!(left_type[i] & type_mask))
0da71265 265 pred= -1;
6fbcaaa0
LLL
266 else{
267 pred= 2;
0da71265
MN
268 }
269 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
270 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
271 }
272 }
273 }
274 }
29671011 275 }
115329f1
DB
276
277
0da71265 278/*
115329f1
DB
2790 . T T. T T T T
2801 L . .L . . . .
2812 L . .L . . . .
2823 . T TL . . . .
2834 L . .L . . . .
2845 L . .. . . . .
0da71265 285*/
1412060e 286//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
0da71265 287 if(top_type){
6867a90b
LLL
288 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
289 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
290 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
53c05b1e 291 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
115329f1 292
6867a90b 293 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
53c05b1e 294 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
115329f1 295
6867a90b 296 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
53c05b1e 297 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
115329f1 298
0da71265 299 }else{
115329f1 300 h->non_zero_count_cache[4+8*0]=
0da71265
MN
301 h->non_zero_count_cache[5+8*0]=
302 h->non_zero_count_cache[6+8*0]=
303 h->non_zero_count_cache[7+8*0]=
115329f1 304
0da71265
MN
305 h->non_zero_count_cache[1+8*0]=
306 h->non_zero_count_cache[2+8*0]=
115329f1 307
0da71265 308 h->non_zero_count_cache[1+8*3]=
3981c385 309 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
115329f1 310
0da71265 311 }
826de46e 312
6867a90b
LLL
313 for (i=0; i<2; i++) {
314 if(left_type[i]){
315 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
316 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
317 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
318 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
6867a90b 319 }else{
115329f1
DB
320 h->non_zero_count_cache[3+8*1 + 2*8*i]=
321 h->non_zero_count_cache[3+8*2 + 2*8*i]=
322 h->non_zero_count_cache[0+8*1 + 8*i]=
6867a90b 323 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
826de46e
LLL
324 }
325 }
326
327 if( h->pps.cabac ) {
328 // top_cbp
329 if(top_type) {
330 h->top_cbp = h->cbp_table[top_xy];
331 } else if(IS_INTRA(mb_type)) {
332 h->top_cbp = 0x1C0;
333 } else {
334 h->top_cbp = 0;
335 }
336 // left_cbp
337 if (left_type[0]) {
338 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
339 } else if(IS_INTRA(mb_type)) {
340 h->left_cbp = 0x1C0;
341 } else {
342 h->left_cbp = 0;
343 }
344 if (left_type[0]) {
345 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
346 }
347 if (left_type[1]) {
348 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
6867a90b 349 }
0da71265 350 }
6867a90b 351
0da71265 352#if 1
e2e5894a 353 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
0da71265 354 int list;
3425501d 355 for(list=0; list<h->list_count; list++){
e2e5894a 356 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
0da71265
MN
357 /*if(!h->mv_cache_clean[list]){
358 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
359 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
360 h->mv_cache_clean[list]= 1;
361 }*/
5ad984c9 362 continue;
0da71265
MN
363 }
364 h->mv_cache_clean[list]= 0;
115329f1 365
53b19144 366 if(USES_LIST(top_type, list)){
0da71265
MN
367 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
368 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
369 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
370 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
371 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
372 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
373 h->ref_cache[list][scan8[0] + 0 - 1*8]=
374 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
375 h->ref_cache[list][scan8[0] + 2 - 1*8]=
376 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
377 }else{
115329f1
DB
378 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
379 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
380 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
0da71265
MN
381 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
382 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
383 }
384
4672503d
LM
385 for(i=0; i<2; i++){
386 int cache_idx = scan8[0] - 1 + i*2*8;
387 if(USES_LIST(left_type[i], list)){
388 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
389 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
390 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
391 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
392 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
393 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
394 }else{
395 *(uint32_t*)h->mv_cache [list][cache_idx ]=
396 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
397 h->ref_cache[list][cache_idx ]=
398 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
399 }
0da71265
MN
400 }
401
0281d325 402 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
46f2f05f
MN
403 continue;
404
53b19144 405 if(USES_LIST(topleft_type, list)){
02f7695b
LM
406 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
407 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
e2e5894a
LM
408 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
409 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
410 }else{
411 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
412 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
413 }
115329f1 414
53b19144 415 if(USES_LIST(topright_type, list)){
e2e5894a
LM
416 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
417 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
418 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
419 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
420 }else{
421 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
422 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
423 }
e2e5894a 424
ae08a563 425 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
717b1733 426 continue;
115329f1
DB
427
428 h->ref_cache[list][scan8[5 ]+1] =
429 h->ref_cache[list][scan8[7 ]+1] =
3b66c4c5 430 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
115329f1 431 h->ref_cache[list][scan8[4 ]] =
0da71265
MN
432 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
433 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
434 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
3b66c4c5 435 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
0da71265
MN
436 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
437 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
9e528114
LA
438
439 if( h->pps.cabac ) {
440 /* XXX beurk, Load mvd */
53b19144 441 if(USES_LIST(top_type, list)){
9e528114
LA
442 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
443 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
444 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
445 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
446 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
447 }else{
115329f1
DB
448 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
449 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
450 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
9e528114
LA
451 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
452 }
53b19144 453 if(USES_LIST(left_type[0], list)){
9e528114
LA
454 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
457 }else{
458 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
460 }
53b19144 461 if(USES_LIST(left_type[1], list)){
9e528114
LA
462 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
463 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
465 }else{
466 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
468 }
469 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
470 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
3b66c4c5 471 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
9e528114
LA
472 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
473 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
5ad984c9 474
9f5c1037 475 if(h->slice_type_nos == FF_B_TYPE){
5ad984c9
LM
476 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
477
478 if(IS_DIRECT(top_type)){
479 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
480 }else if(IS_8X8(top_type)){
481 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
482 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
483 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
484 }else{
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
486 }
115329f1 487
5d18eaad
LM
488 if(IS_DIRECT(left_type[0]))
489 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
490 else if(IS_8X8(left_type[0]))
491 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
492 else
493 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
494
495 if(IS_DIRECT(left_type[1]))
5ad984c9 496 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
5d18eaad
LM
497 else if(IS_8X8(left_type[1]))
498 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
499 else
5ad984c9 500 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
5d18eaad
LM
501 }
502 }
503
504 if(FRAME_MBAFF){
505#define MAP_MVS\
506 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
507 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
508 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
509 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
510 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
511 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
512 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
513 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
514 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
515 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
516 if(MB_FIELD){
517#define MAP_F2F(idx, mb_type)\
518 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
519 h->ref_cache[list][idx] <<= 1;\
520 h->mv_cache[list][idx][1] /= 2;\
521 h->mvd_cache[list][idx][1] /= 2;\
522 }
523 MAP_MVS
524#undef MAP_F2F
525 }else{
526#define MAP_F2F(idx, mb_type)\
527 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] >>= 1;\
529 h->mv_cache[list][idx][1] <<= 1;\
530 h->mvd_cache[list][idx][1] <<= 1;\
5ad984c9 531 }
5d18eaad
LM
532 MAP_MVS
533#undef MAP_F2F
5ad984c9 534 }
9e528114 535 }
0da71265 536 }
0da71265
MN
537 }
538#endif
43efd19a
LM
539
540 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
0da71265
MN
541}
542
543static inline void write_back_intra_pred_mode(H264Context *h){
64514ee8 544 const int mb_xy= h->mb_xy;
0da71265
MN
545
546 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
547 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
548 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
549 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
550 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
551 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
552 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
553}
554
555/**
556 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
557 */
558static inline int check_intra4x4_pred_mode(H264Context *h){
559 MpegEncContext * const s = &h->s;
560 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
561 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
562 int i;
115329f1 563
0da71265
MN
564 if(!(h->top_samples_available&0x8000)){
565 for(i=0; i<4; i++){
566 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
567 if(status<0){
9b879566 568 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
569 return -1;
570 } else if(status){
571 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
572 }
573 }
574 }
115329f1 575
d1d10e91
MN
576 if((h->left_samples_available&0x8888)!=0x8888){
577 static const int mask[4]={0x8000,0x2000,0x80,0x20};
0da71265 578 for(i=0; i<4; i++){
d1d10e91 579 if(!(h->left_samples_available&mask[i])){
26695973
MN
580 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
581 if(status<0){
582 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 return -1;
584 } else if(status){
585 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
586 }
d1d10e91 587 }
0da71265
MN
588 }
589 }
590
591 return 0;
592} //FIXME cleanup like next
593
594/**
595 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
596 */
597static inline int check_intra_pred_mode(H264Context *h, int mode){
598 MpegEncContext * const s = &h->s;
599 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
600 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 601
43ff0714 602 if(mode > 6U) {
5175b937 603 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 604 return -1;
5175b937 605 }
115329f1 606
0da71265
MN
607 if(!(h->top_samples_available&0x8000)){
608 mode= top[ mode ];
609 if(mode<0){
9b879566 610 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
611 return -1;
612 }
613 }
115329f1 614
d1d10e91 615 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 616 mode= left[ mode ];
d1d10e91
MN
617 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
618 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
619 }
0da71265 620 if(mode<0){
9b879566 621 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 622 return -1;
115329f1 623 }
0da71265
MN
624 }
625
626 return mode;
627}
628
629/**
630 * gets the predicted intra4x4 prediction mode.
631 */
632static inline int pred_intra_mode(H264Context *h, int n){
633 const int index8= scan8[n];
634 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
635 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
636 const int min= FFMIN(left, top);
637
a9c9a240 638 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
0da71265
MN
639
640 if(min<0) return DC_PRED;
641 else return min;
642}
643
644static inline void write_back_non_zero_count(H264Context *h){
64514ee8 645 const int mb_xy= h->mb_xy;
0da71265 646
6867a90b
LLL
647 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
648 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
649 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
53c05b1e 650 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
6867a90b
LLL
651 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
652 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
653 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
115329f1 654
6867a90b 655 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
53c05b1e 656 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
6867a90b 657 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
53c05b1e 658
6867a90b 659 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
53c05b1e 660 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
6867a90b 661 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
0da71265
MN
662}
663
664/**
1412060e 665 * gets the predicted number of non-zero coefficients.
0da71265
MN
666 * @param n block index
667 */
668static inline int pred_non_zero_count(H264Context *h, int n){
669 const int index8= scan8[n];
670 const int left= h->non_zero_count_cache[index8 - 1];
671 const int top = h->non_zero_count_cache[index8 - 8];
672 int i= left + top;
115329f1 673
0da71265
MN
674 if(i<64) i= (i+1)>>1;
675
a9c9a240 676 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
0da71265
MN
677
678 return i&31;
679}
680
1924f3ce
MN
681static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
682 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
a9c9a240 683 MpegEncContext *s = &h->s;
1924f3ce 684
5d18eaad
LM
685 /* there is no consistent mapping of mvs to neighboring locations that will
686 * make mbaff happy, so we can't move all this logic to fill_caches */
687 if(FRAME_MBAFF){
191e8ca7 688 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
5d18eaad
LM
689 const int16_t *mv;
690 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
691 *C = h->mv_cache[list][scan8[0]-2];
692
693 if(!MB_FIELD
694 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
695 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
696 if(IS_INTERLACED(mb_types[topright_xy])){
697#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
698 const int x4 = X4, y4 = Y4;\
699 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
02f7695b 700 if(!USES_LIST(mb_type,list))\
5d18eaad
LM
701 return LIST_NOT_USED;\
702 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
703 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
704 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
705 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
706
707 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
708 }
709 }
710 if(topright_ref == PART_NOT_AVAILABLE
711 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
712 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
713 if(!MB_FIELD
714 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
715 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
716 }
717 if(MB_FIELD
718 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
719 && i >= scan8[0]+8){
1412060e 720 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
02f7695b 721 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
5d18eaad
LM
722 }
723 }
724#undef SET_DIAG_MV
725 }
726
1924f3ce
MN
727 if(topright_ref != PART_NOT_AVAILABLE){
728 *C= h->mv_cache[list][ i - 8 + part_width ];
729 return topright_ref;
730 }else{
a9c9a240 731 tprintf(s->avctx, "topright MV not available\n");
95c26348 732
1924f3ce
MN
733 *C= h->mv_cache[list][ i - 8 - 1 ];
734 return h->ref_cache[list][ i - 8 - 1 ];
735 }
736}
737
0da71265
MN
738/**
739 * gets the predicted MV.
740 * @param n the block index
741 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
742 * @param mx the x component of the predicted motion vector
743 * @param my the y component of the predicted motion vector
744 */
745static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
0da71265
MN
746 const int index8= scan8[n];
747 const int top_ref= h->ref_cache[list][ index8 - 8 ];
0da71265
MN
748 const int left_ref= h->ref_cache[list][ index8 - 1 ];
749 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
750 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1924f3ce
MN
751 const int16_t * C;
752 int diagonal_ref, match_count;
753
0da71265 754 assert(part_width==1 || part_width==2 || part_width==4);
1924f3ce 755
0da71265 756/* mv_cache
115329f1 757 B . . A T T T T
0da71265
MN
758 U . . L . . , .
759 U . . L . . . .
760 U . . L . . , .
761 . . . L . . . .
762*/
1924f3ce
MN
763
764 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
765 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
a9c9a240 766 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1924f3ce
MN
767 if(match_count > 1){ //most common
768 *mx= mid_pred(A[0], B[0], C[0]);
769 *my= mid_pred(A[1], B[1], C[1]);
770 }else if(match_count==1){
771 if(left_ref==ref){
772 *mx= A[0];
115329f1 773 *my= A[1];
1924f3ce
MN
774 }else if(top_ref==ref){
775 *mx= B[0];
115329f1 776 *my= B[1];
0da71265 777 }else{
1924f3ce 778 *mx= C[0];
115329f1 779 *my= C[1];
0da71265
MN
780 }
781 }else{
1924f3ce 782 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
0da71265 783 *mx= A[0];
115329f1 784 *my= A[1];
0da71265 785 }else{
1924f3ce
MN
786 *mx= mid_pred(A[0], B[0], C[0]);
787 *my= mid_pred(A[1], B[1], C[1]);
0da71265 788 }
0da71265 789 }
115329f1 790
a9c9a240 791 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
792}
793
794/**
795 * gets the directionally predicted 16x8 MV.
796 * @param n the block index
797 * @param mx the x component of the predicted motion vector
798 * @param my the y component of the predicted motion vector
799 */
800static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
801 if(n==0){
802 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
803 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
804
a9c9a240 805 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
115329f1 806
0da71265
MN
807 if(top_ref == ref){
808 *mx= B[0];
809 *my= B[1];
810 return;
811 }
812 }else{
813 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
814 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
115329f1 815
a9c9a240 816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
817
818 if(left_ref == ref){
819 *mx= A[0];
820 *my= A[1];
821 return;
822 }
823 }
824
825 //RARE
826 pred_motion(h, n, 4, list, ref, mx, my);
827}
828
829/**
830 * gets the directionally predicted 8x16 MV.
831 * @param n the block index
832 * @param mx the x component of the predicted motion vector
833 * @param my the y component of the predicted motion vector
834 */
835static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
836 if(n==0){
837 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
838 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
115329f1 839
a9c9a240 840 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
841
842 if(left_ref == ref){
843 *mx= A[0];
844 *my= A[1];
845 return;
846 }
847 }else{
1924f3ce
MN
848 const int16_t * C;
849 int diagonal_ref;
850
851 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
115329f1 852
a9c9a240 853 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265 854
115329f1 855 if(diagonal_ref == ref){
0da71265
MN
856 *mx= C[0];
857 *my= C[1];
858 return;
859 }
0da71265
MN
860 }
861
862 //RARE
863 pred_motion(h, n, 2, list, ref, mx, my);
864}
865
866static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
0da71265
MN
867 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
868 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
869
a9c9a240 870 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
0da71265
MN
871
872 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
873 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
874 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
115329f1 875
0da71265
MN
876 *mx = *my = 0;
877 return;
878 }
115329f1 879
0da71265
MN
880 pred_motion(h, 0, 4, 0, 0, mx, my);
881
882 return;
883}
884
8b1fd554
MN
885static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
886 int poc0 = h->ref_list[0][i].poc;
887 int td = av_clip(poc1 - poc0, -128, 127);
888 if(td == 0 || h->ref_list[0][i].long_ref){
889 return 256;
890 }else{
891 int tb = av_clip(poc - poc0, -128, 127);
892 int tx = (16384 + (FFABS(td) >> 1)) / td;
893 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
894 }
895}
896
5ad984c9 897static inline void direct_dist_scale_factor(H264Context * const h){
2879c75f
MN
898 MpegEncContext * const s = &h->s;
899 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
5ad984c9 900 const int poc1 = h->ref_list[1][0].poc;
8b1fd554
MN
901 int i, field;
902 for(field=0; field<2; field++){
903 const int poc = h->s.current_picture_ptr->field_poc[field];
904 const int poc1 = h->ref_list[1][0].field_poc[field];
905 for(i=0; i < 2*h->ref_count[0]; i++)
906 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
5ad984c9 907 }
8b1fd554
MN
908
909 for(i=0; i<h->ref_count[0]; i++){
910 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
5d18eaad 911 }
5ad984c9 912}
f4d3382d
MN
913
914static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
915 MpegEncContext * const s = &h->s;
916 Picture * const ref1 = &h->ref_list[1][0];
917 int j, old_ref, rfield;
918 int start= mbafi ? 16 : 0;
919 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
920 int interl= mbafi || s->picture_structure != PICT_FRAME;
921
922 /* bogus; fills in for missing frames */
923 memset(map[list], 0, sizeof(map[list]));
924
925 for(rfield=0; rfield<2; rfield++){
926 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
927 int poc = ref1->ref_poc[colfield][list][old_ref];
928
929 if (!interl)
930 poc |= 3;
931 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
932 poc= (poc&~3) + rfield + 1;
933
934 for(j=start; j<end; j++){
935 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
936 int cur_ref= mbafi ? (j-16)^field : j;
937 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
938 if(rfield == field)
939 map[list][old_ref] = cur_ref;
940 break;
941 }
942 }
943 }
944 }
945}
946
2f944356
LM
947static inline void direct_ref_list_init(H264Context * const h){
948 MpegEncContext * const s = &h->s;
949 Picture * const ref1 = &h->ref_list[1][0];
950 Picture * const cur = s->current_picture_ptr;
bbc78fb4 951 int list, j, field;
f4d3382d
MN
952 int sidx= (s->picture_structure&1)^1;
953 int ref1sidx= (ref1->reference&1)^1;
aa617518 954
2f944356 955 for(list=0; list<2; list++){
2879c75f 956 cur->ref_count[sidx][list] = h->ref_count[list];
2f944356 957 for(j=0; j<h->ref_count[list]; j++)
42de393d 958 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
2f944356 959 }
aa617518 960
7762cc3d 961 if(s->picture_structure == PICT_FRAME){
f4d3382d
MN
962 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
963 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
7762cc3d 964 }
aa617518 965
48e025e5 966 cur->mbaff= FRAME_MBAFF;
aa617518 967
9701840b 968 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
2f944356 969 return;
aa617518 970
2f944356 971 for(list=0; list<2; list++){
f4d3382d
MN
972 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
973 for(field=0; field<2; field++)
974 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
2f944356
LM
975 }
976}
5ad984c9
LM
977
978static inline void pred_direct_motion(H264Context * const h, int *mb_type){
979 MpegEncContext * const s = &h->s;
d00eac6c
MN
980 int b8_stride = h->b8_stride;
981 int b4_stride = h->b_stride;
982 int mb_xy = h->mb_xy;
983 int mb_type_col[2];
984 const int16_t (*l1mv0)[2], (*l1mv1)[2];
985 const int8_t *l1ref0, *l1ref1;
5ad984c9 986 const int is_b8x8 = IS_8X8(*mb_type);
88e7a4d1 987 unsigned int sub_mb_type;
5ad984c9
LM
988 int i8, i4;
989
5d18eaad 990#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
d00eac6c
MN
991
992 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
53c193a9 993 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
471341a7
MN
994 int cur_poc = s->current_picture_ptr->poc;
995 int *col_poc = h->ref_list[1]->field_poc;
996 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
997 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
998 b8_stride = 0;
60c9b24d 999 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
d00eac6c
MN
1000 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1001 mb_xy += s->mb_stride*fieldoff;
1002 }
1003 goto single_col;
1004 }else{ // AFL/AFR/FR/FL -> AFR/FR
1005 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1006 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1007 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1008 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1009 b8_stride *= 3;
1010 b4_stride *= 6;
1011 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1012 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1013 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1014 && !is_b8x8){
1015 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1016 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1017 }else{
1018 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1019 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1020 }
1021 }else{ // AFR/FR -> AFR/FR
1022single_col:
1023 mb_type_col[0] =
1024 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
cc615d2c
MN
1025 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1026 /* FIXME save sub mb types from previous frames (or derive from MVs)
1027 * so we know exactly what block size to use */
1028 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1029 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1030 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1031 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1032 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1033 }else{
1034 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1035 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1036 }
d00eac6c 1037 }
5ad984c9 1038 }
5ad984c9 1039
7d54ecc9
MN
1040 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1041 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1042 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1043 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
9b5fab91
MN
1044 if(!b8_stride){
1045 if(s->mb_y&1){
1046 l1ref0 += h->b8_stride;
1047 l1ref1 += h->b8_stride;
1048 l1mv0 += 2*b4_stride;
1049 l1mv1 += 2*b4_stride;
1050 }
d00eac6c 1051 }
115329f1 1052
5ad984c9
LM
1053 if(h->direct_spatial_mv_pred){
1054 int ref[2];
1055 int mv[2][2];
1056 int list;
1057
5d18eaad
LM
1058 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1059
5ad984c9
LM
1060 /* ref = min(neighbors) */
1061 for(list=0; list<2; list++){
1062 int refa = h->ref_cache[list][scan8[0] - 1];
1063 int refb = h->ref_cache[list][scan8[0] - 8];
1064 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
9bec77fe 1065 if(refc == PART_NOT_AVAILABLE)
5ad984c9 1066 refc = h->ref_cache[list][scan8[0] - 8 - 1];
29d05ebc 1067 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
5ad984c9
LM
1068 if(ref[list] < 0)
1069 ref[list] = -1;
1070 }
1071
1072 if(ref[0] < 0 && ref[1] < 0){
1073 ref[0] = ref[1] = 0;
1074 mv[0][0] = mv[0][1] =
1075 mv[1][0] = mv[1][1] = 0;
1076 }else{
1077 for(list=0; list<2; list++){
1078 if(ref[list] >= 0)
1079 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1080 else
1081 mv[list][0] = mv[list][1] = 0;
1082 }
1083 }
1084
1085 if(ref[1] < 0){
50b3ab0f
LM
1086 if(!is_b8x8)
1087 *mb_type &= ~MB_TYPE_L1;
1088 sub_mb_type &= ~MB_TYPE_L1;
5ad984c9 1089 }else if(ref[0] < 0){
50b3ab0f
LM
1090 if(!is_b8x8)
1091 *mb_type &= ~MB_TYPE_L0;
1092 sub_mb_type &= ~MB_TYPE_L0;
5ad984c9
LM
1093 }
1094
d00eac6c 1095 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
50b3ab0f
LM
1096 for(i8=0; i8<4; i8++){
1097 int x8 = i8&1;
1098 int y8 = i8>>1;
1099 int xy8 = x8+y8*b8_stride;
1100 int xy4 = 3*x8+y8*b4_stride;
1101 int a=0, b=0;
1102
1103 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1104 continue;
1105 h->sub_mb_type[i8] = sub_mb_type;
1106
1107 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1108 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
d00eac6c 1109 if(!IS_INTRA(mb_type_col[y8])
50b3ab0f
LM
1110 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1111 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1112 if(ref[0] > 0)
1113 a= pack16to32(mv[0][0],mv[0][1]);
1114 if(ref[1] > 0)
1115 b= pack16to32(mv[1][0],mv[1][1]);
1116 }else{
1117 a= pack16to32(mv[0][0],mv[0][1]);
1118 b= pack16to32(mv[1][0],mv[1][1]);
1119 }
1120 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1121 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1122 }
1123 }else if(IS_16X16(*mb_type)){
d19f5acb
MN
1124 int a=0, b=0;
1125
cec93959
LM
1126 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1127 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
d00eac6c 1128 if(!IS_INTRA(mb_type_col[0])
c26abfa5
DB
1129 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1130 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
bf4e3bd2 1131 && (h->x264_build>33 || !h->x264_build)))){
5ad984c9 1132 if(ref[0] > 0)
d19f5acb 1133 a= pack16to32(mv[0][0],mv[0][1]);
5ad984c9 1134 if(ref[1] > 0)
d19f5acb 1135 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1136 }else{
d19f5acb
MN
1137 a= pack16to32(mv[0][0],mv[0][1]);
1138 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1139 }
d19f5acb
MN
1140 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1141 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
5ad984c9
LM
1142 }else{
1143 for(i8=0; i8<4; i8++){
1144 const int x8 = i8&1;
1145 const int y8 = i8>>1;
115329f1 1146
5ad984c9
LM
1147 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1148 continue;
1149 h->sub_mb_type[i8] = sub_mb_type;
115329f1 1150
5ad984c9
LM
1151 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1152 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
cec93959
LM
1153 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1154 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
115329f1 1155
5ad984c9 1156 /* col_zero_flag */
2ccd25d0
MN
1157 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1158 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
bf4e3bd2 1159 && (h->x264_build>33 || !h->x264_build)))){
2ccd25d0 1160 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
f1f17e54 1161 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1162 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
c26abfa5 1163 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
f1f17e54
LM
1164 if(ref[0] == 0)
1165 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1166 if(ref[1] == 0)
1167 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1168 }
1169 }else
5ad984c9 1170 for(i4=0; i4<4; i4++){
2ccd25d0 1171 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
c26abfa5 1172 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
5ad984c9
LM
1173 if(ref[0] == 0)
1174 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1175 if(ref[1] == 0)
1176 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1177 }
1178 }
1179 }
1180 }
1181 }
1182 }else{ /* direct temporal mv pred */
5d18eaad
LM
1183 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1184 const int *dist_scale_factor = h->dist_scale_factor;
f4d3382d 1185 int ref_offset= 0;
5d18eaad 1186
cc615d2c 1187 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
8b1fd554
MN
1188 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1189 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1190 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
cc615d2c 1191 }
48e025e5 1192 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
f4d3382d 1193 ref_offset += 16;
48e025e5 1194
cc615d2c
MN
1195 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1196 /* FIXME assumes direct_8x8_inference == 1 */
c210fa61 1197 int y_shift = 2*!IS_INTERLACED(*mb_type);
5d18eaad 1198
cc615d2c
MN
1199 for(i8=0; i8<4; i8++){
1200 const int x8 = i8&1;
1201 const int y8 = i8>>1;
1202 int ref0, scale;
1203 const int16_t (*l1mv)[2]= l1mv0;
5d18eaad 1204
cc615d2c
MN
1205 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1206 continue;
1207 h->sub_mb_type[i8] = sub_mb_type;
1208
1209 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1210 if(IS_INTRA(mb_type_col[y8])){
1211 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1212 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1213 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1214 continue;
1215 }
1216
1217 ref0 = l1ref0[x8 + y8*b8_stride];
1218 if(ref0 >= 0)
f4d3382d 1219 ref0 = map_col_to_list0[0][ref0 + ref_offset];
cc615d2c 1220 else{
f4d3382d 1221 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
cc615d2c
MN
1222 l1mv= l1mv1;
1223 }
1224 scale = dist_scale_factor[ref0];
1225 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1226
1227 {
1228 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1229 int my_col = (mv_col[1]<<y_shift)/2;
1230 int mx = (scale * mv_col[0] + 128) >> 8;
1231 int my = (scale * my_col + 128) >> 8;
1232 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1233 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
5d18eaad 1234 }
5d18eaad 1235 }
cc615d2c
MN
1236 return;
1237 }
5d18eaad
LM
1238
1239 /* one-to-one mv scaling */
1240
5ad984c9 1241 if(IS_16X16(*mb_type)){
fda51641
MN
1242 int ref, mv0, mv1;
1243
5ad984c9 1244 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
d00eac6c 1245 if(IS_INTRA(mb_type_col[0])){
fda51641 1246 ref=mv0=mv1=0;
5ad984c9 1247 }else{
f4d3382d
MN
1248 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1249 : map_col_to_list0[1][l1ref1[0] + ref_offset];
5d18eaad 1250 const int scale = dist_scale_factor[ref0];
8583bef8 1251 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
5ad984c9 1252 int mv_l0[2];
5d18eaad
LM
1253 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1254 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
fda51641
MN
1255 ref= ref0;
1256 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1257 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
5ad984c9 1258 }
fda51641
MN
1259 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1260 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1261 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
5ad984c9
LM
1262 }else{
1263 for(i8=0; i8<4; i8++){
1264 const int x8 = i8&1;
1265 const int y8 = i8>>1;
5d18eaad 1266 int ref0, scale;
bf4e3bd2 1267 const int16_t (*l1mv)[2]= l1mv0;
8583bef8 1268
5ad984c9
LM
1269 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1270 continue;
1271 h->sub_mb_type[i8] = sub_mb_type;
5d18eaad 1272 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
d00eac6c 1273 if(IS_INTRA(mb_type_col[0])){
5ad984c9 1274 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1275 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1276 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1277 continue;
1278 }
115329f1 1279
f4d3382d 1280 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
2f944356 1281 if(ref0 >= 0)
5d18eaad 1282 ref0 = map_col_to_list0[0][ref0];
8583bef8 1283 else{
f4d3382d 1284 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
8583bef8
MN
1285 l1mv= l1mv1;
1286 }
5d18eaad 1287 scale = dist_scale_factor[ref0];
115329f1 1288
5ad984c9 1289 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
f1f17e54 1290 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1291 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
5d18eaad
LM
1292 int mx = (scale * mv_col[0] + 128) >> 8;
1293 int my = (scale * mv_col[1] + 128) >> 8;
f1f17e54
LM
1294 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1295 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1296 }else
5ad984c9 1297 for(i4=0; i4<4; i4++){
2ccd25d0 1298 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
5ad984c9 1299 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
5d18eaad
LM
1300 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1301 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
5ad984c9
LM
1302 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1303 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1304 }
1305 }
1306 }
1307 }
1308}
1309
0da71265
MN
1310static inline void write_back_motion(H264Context *h, int mb_type){
1311 MpegEncContext * const s = &h->s;
0da71265
MN
1312 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1313 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1314 int list;
1315
2ea39252
LM
1316 if(!USES_LIST(mb_type, 0))
1317 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1318
3425501d 1319 for(list=0; list<h->list_count; list++){
0da71265 1320 int y;
53b19144 1321 if(!USES_LIST(mb_type, list))
5ad984c9 1322 continue;
115329f1 1323
0da71265
MN
1324 for(y=0; y<4; y++){
1325 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1326 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1327 }
9e528114 1328 if( h->pps.cabac ) {
e6e77eb6
LM
1329 if(IS_SKIP(mb_type))
1330 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1331 else
9e528114
LA
1332 for(y=0; y<4; y++){
1333 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1335 }
1336 }
53b19144
LM
1337
1338 {
191e8ca7 1339 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
53b19144
LM
1340 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1341 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1342 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1343 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
0da71265
MN
1344 }
1345 }
115329f1 1346
9f5c1037 1347 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
5ad984c9 1348 if(IS_8X8(mb_type)){
53b19144
LM
1349 uint8_t *direct_table = &h->direct_table[b8_xy];
1350 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1351 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1352 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
5ad984c9
LM
1353 }
1354 }
0da71265
MN
1355}
1356
1357/**
1358 * Decodes a network abstraction layer unit.
1359 * @param consumed is the number of bytes used as input
1360 * @param length is the length of the array
3b66c4c5 1361 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
115329f1 1362 * @returns decoded bytes, might be src+1 if no escapes
0da71265 1363 */
30317501 1364static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
1365 int i, si, di;
1366 uint8_t *dst;
24456882 1367 int bufidx;
0da71265 1368
bb270c08 1369// src[0]&0x80; //forbidden bit
0da71265
MN
1370 h->nal_ref_idc= src[0]>>5;
1371 h->nal_unit_type= src[0]&0x1F;
1372
1373 src++; length--;
115329f1 1374#if 0
0da71265
MN
1375 for(i=0; i<length; i++)
1376 printf("%2X ", src[i]);
1377#endif
e08715d3
MN
1378
1379#ifdef HAVE_FAST_UNALIGNED
1380# ifdef HAVE_FAST_64BIT
1381# define RS 7
1382 for(i=0; i+1<length; i+=9){
1383 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1384# else
1385# define RS 3
1386 for(i=0; i+1<length; i+=5){
1387 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1388# endif
1389 continue;
1390 if(i>0 && !src[i]) i--;
1391 while(src[i]) i++;
1392#else
1393# define RS 0
0da71265
MN
1394 for(i=0; i+1<length; i+=2){
1395 if(src[i]) continue;
1396 if(i>0 && src[i-1]==0) i--;
e08715d3 1397#endif
0da71265
MN
1398 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1399 if(src[i+2]!=3){
1400 /* startcode, so we must be past the end */
1401 length=i;
1402 }
1403 break;
1404 }
abb27cfb 1405 i-= RS;
0da71265
MN
1406 }
1407
1408 if(i>=length-1){ //no escaped 0
1409 *dst_length= length;
1410 *consumed= length+1; //+1 for the header
115329f1 1411 return src;
0da71265
MN
1412 }
1413
24456882 1414 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
d4369630 1415 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 1416 dst= h->rbsp_buffer[bufidx];
0da71265 1417
ac658be5
FOL
1418 if (dst == NULL){
1419 return NULL;
1420 }
1421
3b66c4c5 1422//printf("decoding esc\n");
0da71265 1423 si=di=0;
115329f1 1424 while(si<length){
0da71265
MN
1425 //remove escapes (very rare 1:2^22)
1426 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1427 if(src[si+2]==3){ //escape
1428 dst[di++]= 0;
1429 dst[di++]= 0;
1430 si+=3;
c8470cc1 1431 continue;
0da71265
MN
1432 }else //next start code
1433 break;
1434 }
1435
1436 dst[di++]= src[si++];
1437 }
1438
d4369630
AS
1439 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1440
0da71265
MN
1441 *dst_length= di;
1442 *consumed= si + 1;//+1 for the header
90b5b51e 1443//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
1444 return dst;
1445}
1446
0da71265
MN
1447/**
1448 * identifies the exact end of the bitstream
1449 * @return the length of the trailing, or 0 if damaged
1450 */
30317501 1451static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
1452 int v= *src;
1453 int r;
1454
a9c9a240 1455 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
1456
1457 for(r=1; r<9; r++){
1458 if(v&1) return r;
1459 v>>=1;
1460 }
1461 return 0;
1462}
1463
1464/**
1412060e 1465 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
1466 * @param qp quantization parameter
1467 */
239ea04c 1468static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1469#define stride 16
1470 int i;
1471 int temp[16]; //FIXME check if this is a good idea
1472 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1473 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1474
1475//memset(block, 64, 2*256);
1476//return;
1477 for(i=0; i<4; i++){
1478 const int offset= y_offset[i];
1479 const int z0= block[offset+stride*0] + block[offset+stride*4];
1480 const int z1= block[offset+stride*0] - block[offset+stride*4];
1481 const int z2= block[offset+stride*1] - block[offset+stride*5];
1482 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483
1484 temp[4*i+0]= z0+z3;
1485 temp[4*i+1]= z1+z2;
1486 temp[4*i+2]= z1-z2;
1487 temp[4*i+3]= z0-z3;
1488 }
1489
1490 for(i=0; i<4; i++){
1491 const int offset= x_offset[i];
1492 const int z0= temp[4*0+i] + temp[4*2+i];
1493 const int z1= temp[4*0+i] - temp[4*2+i];
1494 const int z2= temp[4*1+i] - temp[4*3+i];
1495 const int z3= temp[4*1+i] + temp[4*3+i];
1496
1412060e 1497 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
1498 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1499 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1500 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
1501 }
1502}
1503
e5017ab8 1504#if 0
0da71265 1505/**
1412060e 1506 * DCT transforms the 16 dc values.
0da71265
MN
1507 * @param qp quantization parameter ??? FIXME
1508 */
1509static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1510// const int qmul= dequant_coeff[qp][0];
1511 int i;
1512 int temp[16]; //FIXME check if this is a good idea
1513 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1514 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1515
1516 for(i=0; i<4; i++){
1517 const int offset= y_offset[i];
1518 const int z0= block[offset+stride*0] + block[offset+stride*4];
1519 const int z1= block[offset+stride*0] - block[offset+stride*4];
1520 const int z2= block[offset+stride*1] - block[offset+stride*5];
1521 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522
1523 temp[4*i+0]= z0+z3;
1524 temp[4*i+1]= z1+z2;
1525 temp[4*i+2]= z1-z2;
1526 temp[4*i+3]= z0-z3;
1527 }
1528
1529 for(i=0; i<4; i++){
1530 const int offset= x_offset[i];
1531 const int z0= temp[4*0+i] + temp[4*2+i];
1532 const int z1= temp[4*0+i] - temp[4*2+i];
1533 const int z2= temp[4*1+i] - temp[4*3+i];
1534 const int z3= temp[4*1+i] + temp[4*3+i];
1535
1536 block[stride*0 +offset]= (z0 + z3)>>1;
1537 block[stride*2 +offset]= (z1 + z2)>>1;
1538 block[stride*8 +offset]= (z1 - z2)>>1;
1539 block[stride*10+offset]= (z0 - z3)>>1;
1540 }
1541}
e5017ab8
LA
1542#endif
1543
0da71265
MN
1544#undef xStride
1545#undef stride
1546
239ea04c 1547static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1548 const int stride= 16*2;
1549 const int xStride= 16;
1550 int a,b,c,d,e;
1551
1552 a= block[stride*0 + xStride*0];
1553 b= block[stride*0 + xStride*1];
1554 c= block[stride*1 + xStride*0];
1555 d= block[stride*1 + xStride*1];
1556
1557 e= a-b;
1558 a= a+b;
1559 b= c-d;
1560 c= c+d;
1561
239ea04c
LM
1562 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1563 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1564 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1565 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
1566}
1567
e5017ab8 1568#if 0
0da71265
MN
1569static void chroma_dc_dct_c(DCTELEM *block){
1570 const int stride= 16*2;
1571 const int xStride= 16;
1572 int a,b,c,d,e;
1573
1574 a= block[stride*0 + xStride*0];
1575 b= block[stride*0 + xStride*1];
1576 c= block[stride*1 + xStride*0];
1577 d= block[stride*1 + xStride*1];
1578
1579 e= a-b;
1580 a= a+b;
1581 b= c-d;
1582 c= c+d;
1583
1584 block[stride*0 + xStride*0]= (a+c);
1585 block[stride*0 + xStride*1]= (e+b);
1586 block[stride*1 + xStride*0]= (a-c);
1587 block[stride*1 + xStride*1]= (e-b);
1588}
e5017ab8 1589#endif
0da71265
MN
1590
1591/**
1592 * gets the chroma qp.
1593 */
4691a77d 1594static inline int get_chroma_qp(H264Context *h, int t, int qscale){
5a78bfbd 1595 return h->pps.chroma_qp_table[t][qscale];
0da71265
MN
1596}
1597
0da71265
MN
1598static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1599 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1600 int src_x_offset, int src_y_offset,
1601 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1602 MpegEncContext * const s = &h->s;
1603 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 1604 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 1605 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
1606 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1607 uint8_t * src_cb, * src_cr;
1608 int extra_width= h->emu_edge_width;
1609 int extra_height= h->emu_edge_height;
0da71265
MN
1610 int emu=0;
1611 const int full_mx= mx>>2;
1612 const int full_my= my>>2;
fbd312fd 1613 const int pic_width = 16*s->mb_width;
0d43dd8c 1614 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 1615
0da71265
MN
1616 if(mx&7) extra_width -= 3;
1617 if(my&7) extra_height -= 3;
115329f1
DB
1618
1619 if( full_mx < 0-extra_width
1620 || full_my < 0-extra_height
1621 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 1622 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
1623 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1624 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
1625 emu=1;
1626 }
115329f1 1627
5d18eaad 1628 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 1629 if(!square){
5d18eaad 1630 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 1631 }
115329f1 1632
87352549 1633 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 1634
0d43dd8c 1635 if(MB_FIELD){
5d18eaad 1636 // chroma offset when predicting from a field of opposite parity
2143b118 1637 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
1638 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1639 }
1640 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1641 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1642
0da71265 1643 if(emu){
5d18eaad 1644 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1645 src_cb= s->edge_emu_buffer;
1646 }
5d18eaad 1647 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1648
1649 if(emu){
5d18eaad 1650 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1651 src_cr= s->edge_emu_buffer;
1652 }
5d18eaad 1653 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1654}
1655
9f2d1b4f 1656static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
1657 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1658 int x_offset, int y_offset,
1659 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1660 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1661 int list0, int list1){
1662 MpegEncContext * const s = &h->s;
1663 qpel_mc_func *qpix_op= qpix_put;
1664 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 1665
5d18eaad
LM
1666 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1667 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1668 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 1669 x_offset += 8*s->mb_x;
0d43dd8c 1670 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1671
0da71265 1672 if(list0){
1924f3ce 1673 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
1674 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1675 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1676 qpix_op, chroma_op);
1677
1678 qpix_op= qpix_avg;
1679 chroma_op= chroma_avg;
1680 }
1681
1682 if(list1){
1924f3ce 1683 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
1684 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1685 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1686 qpix_op, chroma_op);
1687 }
1688}
1689
9f2d1b4f
LM
1690static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1691 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1692 int x_offset, int y_offset,
1693 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1694 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1695 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1696 int list0, int list1){
1697 MpegEncContext * const s = &h->s;
1698
5d18eaad
LM
1699 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1700 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1701 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 1702 x_offset += 8*s->mb_x;
0d43dd8c 1703 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1704
9f2d1b4f
LM
1705 if(list0 && list1){
1706 /* don't optimize for luma-only case, since B-frames usually
1707 * use implicit weights => chroma too. */
1708 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
1709 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1710 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
1711 int refn0 = h->ref_cache[0][ scan8[n] ];
1712 int refn1 = h->ref_cache[1][ scan8[n] ];
1713
1714 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1715 dest_y, dest_cb, dest_cr,
1716 x_offset, y_offset, qpix_put, chroma_put);
1717 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1718 tmp_y, tmp_cb, tmp_cr,
1719 x_offset, y_offset, qpix_put, chroma_put);
1720
1721 if(h->use_weight == 2){
1722 int weight0 = h->implicit_weight[refn0][refn1];
1723 int weight1 = 64 - weight0;
5d18eaad
LM
1724 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1725 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1726 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 1727 }else{
5d18eaad 1728 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 1729 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 1730 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 1731 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1732 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 1733 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 1734 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1735 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 1736 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
1737 }
1738 }else{
1739 int list = list1 ? 1 : 0;
1740 int refn = h->ref_cache[list][ scan8[n] ];
1741 Picture *ref= &h->ref_list[list][refn];
1742 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1743 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1744 qpix_put, chroma_put);
1745
5d18eaad 1746 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
1747 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1748 if(h->use_weight_chroma){
5d18eaad 1749 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 1750 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 1751 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
1752 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1753 }
1754 }
1755}
1756
1757static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1758 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1759 int x_offset, int y_offset,
1760 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1761 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 1762 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
1763 int list0, int list1){
1764 if((h->use_weight==2 && list0 && list1
1765 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1766 || h->use_weight==1)
1767 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1768 x_offset, y_offset, qpix_put, chroma_put,
1769 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1770 else
1771 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1773}
1774
513fbd8e
LM
1775static inline void prefetch_motion(H264Context *h, int list){
1776 /* fetch pixels for estimated mv 4 macroblocks ahead
1777 * optimized for 64byte cache lines */
1778 MpegEncContext * const s = &h->s;
1779 const int refn = h->ref_cache[list][scan8[0]];
1780 if(refn >= 0){
1781 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1782 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1783 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 1784 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
1785 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1786 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1787 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1788 }
1789}
1790
0da71265
MN
1791static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1792 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
1793 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1794 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 1795 MpegEncContext * const s = &h->s;
64514ee8 1796 const int mb_xy= h->mb_xy;
0da71265 1797 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 1798
0da71265 1799 assert(IS_INTER(mb_type));
115329f1 1800
513fbd8e
LM
1801 prefetch_motion(h, 0);
1802
0da71265
MN
1803 if(IS_16X16(mb_type)){
1804 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1805 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
9f2d1b4f 1806 &weight_op[0], &weight_avg[0],
0da71265
MN
1807 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1808 }else if(IS_16X8(mb_type)){
1809 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1810 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1811 &weight_op[1], &weight_avg[1],
0da71265
MN
1812 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1813 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1814 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1815 &weight_op[1], &weight_avg[1],
0da71265
MN
1816 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1817 }else if(IS_8X16(mb_type)){
5d18eaad 1818 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 1819 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1820 &weight_op[2], &weight_avg[2],
0da71265 1821 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 1822 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 1823 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1824 &weight_op[2], &weight_avg[2],
0da71265
MN
1825 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1826 }else{
1827 int i;
115329f1 1828
0da71265
MN
1829 assert(IS_8X8(mb_type));
1830
1831 for(i=0; i<4; i++){
1832 const int sub_mb_type= h->sub_mb_type[i];
1833 const int n= 4*i;
1834 int x_offset= (i&1)<<2;
1835 int y_offset= (i&2)<<1;
1836
1837 if(IS_SUB_8X8(sub_mb_type)){
1838 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1840 &weight_op[3], &weight_avg[3],
0da71265
MN
1841 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1842 }else if(IS_SUB_8X4(sub_mb_type)){
1843 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1844 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1845 &weight_op[4], &weight_avg[4],
0da71265
MN
1846 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1847 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1848 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1849 &weight_op[4], &weight_avg[4],
0da71265
MN
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 1852 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 1853 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1854 &weight_op[5], &weight_avg[5],
0da71265 1855 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 1856 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 1857 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1858 &weight_op[5], &weight_avg[5],
0da71265
MN
1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1860 }else{
1861 int j;
1862 assert(IS_SUB_4X4(sub_mb_type));
1863 for(j=0; j<4; j++){
1864 int sub_x_offset= x_offset + 2*(j&1);
1865 int sub_y_offset= y_offset + (j&2);
1866 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1867 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1868 &weight_op[6], &weight_avg[6],
0da71265
MN
1869 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1870 }
1871 }
1872 }
1873 }
513fbd8e
LM
1874
1875 prefetch_motion(h, 1);
0da71265
MN
1876}
1877
98a6fff9 1878static av_cold void decode_init_vlc(void){
0da71265
MN
1879 static int done = 0;
1880
1881 if (!done) {
1882 int i;
910e3668 1883 int offset;
0da71265
MN
1884 done = 1;
1885
910e3668
AC
1886 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1887 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
115329f1 1888 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
0da71265 1889 &chroma_dc_coeff_token_len [0], 1, 1,
910e3668
AC
1890 &chroma_dc_coeff_token_bits[0], 1, 1,
1891 INIT_VLC_USE_NEW_STATIC);
0da71265 1892
910e3668 1893 offset = 0;
0da71265 1894 for(i=0; i<4; i++){
910e3668
AC
1895 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1896 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
115329f1 1897 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
0da71265 1898 &coeff_token_len [i][0], 1, 1,
910e3668
AC
1899 &coeff_token_bits[i][0], 1, 1,
1900 INIT_VLC_USE_NEW_STATIC);
1901 offset += coeff_token_vlc_tables_size[i];
0da71265 1902 }
910e3668
AC
1903 /*
1904 * This is a one time safety check to make sure that
1905 * the packed static coeff_token_vlc table sizes
1906 * were initialized correctly.
1907 */
37d3e066 1908 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
0da71265
MN
1909
1910 for(i=0; i<3; i++){
910e3668
AC
1911 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1912 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1913 init_vlc(&chroma_dc_total_zeros_vlc[i],
1914 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
0da71265 1915 &chroma_dc_total_zeros_len [i][0], 1, 1,
910e3668
AC
1916 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1917 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1918 }
1919 for(i=0; i<15; i++){
910e3668
AC
1920 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1921 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1922 init_vlc(&total_zeros_vlc[i],
1923 TOTAL_ZEROS_VLC_BITS, 16,
0da71265 1924 &total_zeros_len [i][0], 1, 1,
910e3668
AC
1925 &total_zeros_bits[i][0], 1, 1,
1926 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1927 }
1928
1929 for(i=0; i<6; i++){
910e3668
AC
1930 run_vlc[i].table = run_vlc_tables[i];
1931 run_vlc[i].table_allocated = run_vlc_tables_size;
1932 init_vlc(&run_vlc[i],
1933 RUN_VLC_BITS, 7,
0da71265 1934 &run_len [i][0], 1, 1,
910e3668
AC
1935 &run_bits[i][0], 1, 1,
1936 INIT_VLC_USE_NEW_STATIC);
0da71265 1937 }
910e3668
AC
1938 run7_vlc.table = run7_vlc_table,
1939 run7_vlc.table_allocated = run7_vlc_table_size;
115329f1 1940 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
0da71265 1941 &run_len [6][0], 1, 1,
910e3668
AC
1942 &run_bits[6][0], 1, 1,
1943 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1944 }
1945}
1946
0da71265 1947static void free_tables(H264Context *h){
7978debd 1948 int i;
afebe2f7 1949 H264Context *hx;
0da71265 1950 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
1951 av_freep(&h->chroma_pred_mode_table);
1952 av_freep(&h->cbp_table);
9e528114
LA
1953 av_freep(&h->mvd_table[0]);
1954 av_freep(&h->mvd_table[1]);
5ad984c9 1955 av_freep(&h->direct_table);
0da71265
MN
1956 av_freep(&h->non_zero_count);
1957 av_freep(&h->slice_table_base);
1958 h->slice_table= NULL;
e5017ab8 1959
0da71265
MN
1960 av_freep(&h->mb2b_xy);
1961 av_freep(&h->mb2b8_xy);
9f2d1b4f 1962
afebe2f7
1963 for(i = 0; i < h->s.avctx->thread_count; i++) {
1964 hx = h->thread_context[i];
1965 if(!hx) continue;
1966 av_freep(&hx->top_borders[1]);
1967 av_freep(&hx->top_borders[0]);
1968 av_freep(&hx->s.obmc_scratchpad);
afebe2f7 1969 }
0da71265
MN
1970}
1971
239ea04c
LM
1972static void init_dequant8_coeff_table(H264Context *h){
1973 int i,q,x;
548a1c8a 1974 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
1975 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1976 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1977
1978 for(i=0; i<2; i++ ){
1979 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1980 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1981 break;
1982 }
1983
1984 for(q=0; q<52; q++){
d9ec210b
DP
1985 int shift = div6[q];
1986 int idx = rem6[q];
239ea04c 1987 for(x=0; x<64; x++)
548a1c8a
LM
1988 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1989 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1990 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
1991 }
1992 }
1993}
1994
1995static void init_dequant4_coeff_table(H264Context *h){
1996 int i,j,q,x;
ab2e3e2c 1997 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
1998 for(i=0; i<6; i++ ){
1999 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2000 for(j=0; j<i; j++){
2001 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2002 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2003 break;
2004 }
2005 }
2006 if(j<i)
2007 continue;
2008
2009 for(q=0; q<52; q++){
d9ec210b
DP
2010 int shift = div6[q] + 2;
2011 int idx = rem6[q];
239ea04c 2012 for(x=0; x<16; x++)
ab2e3e2c
LM
2013 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2014 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
2015 h->pps.scaling_matrix4[i][x]) << shift;
2016 }
2017 }
2018}
2019
2020static void init_dequant_tables(H264Context *h){
2021 int i,x;
2022 init_dequant4_coeff_table(h);
2023 if(h->pps.transform_8x8_mode)
2024 init_dequant8_coeff_table(h);
2025 if(h->sps.transform_bypass){
2026 for(i=0; i<6; i++)
2027 for(x=0; x<16; x++)
2028 h->dequant4_coeff[i][0][x] = 1<<6;
2029 if(h->pps.transform_8x8_mode)
2030 for(i=0; i<2; i++)
2031 for(x=0; x<64; x++)
2032 h->dequant8_coeff[i][0][x] = 1<<6;
2033 }
2034}
2035
2036
0da71265
MN
2037/**
2038 * allocates tables.
3b66c4c5 2039 * needs width/height
0da71265
MN
2040 */
2041static int alloc_tables(H264Context *h){
2042 MpegEncContext * const s = &h->s;
7bc9090a 2043 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 2044 int x,y;
0da71265
MN
2045
2046 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
e5017ab8 2047
53c05b1e 2048 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
b735aeea 2049 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
5d0e4cb8 2050 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
0da71265 2051
7526ade2
MN
2052 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2053 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2054 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2055 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
e5017ab8 2056
b735aeea 2057 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 2058 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 2059
a55f20bd
LM
2060 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2061 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
0da71265
MN
2062 for(y=0; y<s->mb_height; y++){
2063 for(x=0; x<s->mb_width; x++){
7bc9090a 2064 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
2065 const int b_xy = 4*x + 4*y*h->b_stride;
2066 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 2067
0da71265
MN
2068 h->mb2b_xy [mb_xy]= b_xy;
2069 h->mb2b8_xy[mb_xy]= b8_xy;
2070 }
2071 }
9f2d1b4f 2072
9c6221ae
GV
2073 s->obmc_scratchpad = NULL;
2074
56edbd81
LM
2075 if(!h->dequant4_coeff[0])
2076 init_dequant_tables(h);
2077
0da71265
MN
2078 return 0;
2079fail:
2080 free_tables(h);
2081 return -1;
2082}
2083
afebe2f7
2084/**
2085 * Mimic alloc_tables(), but for every context thread.
2086 */
2087static void clone_tables(H264Context *dst, H264Context *src){
2088 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2089 dst->non_zero_count = src->non_zero_count;
2090 dst->slice_table = src->slice_table;
2091 dst->cbp_table = src->cbp_table;
2092 dst->mb2b_xy = src->mb2b_xy;
2093 dst->mb2b8_xy = src->mb2b8_xy;
2094 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2095 dst->mvd_table[0] = src->mvd_table[0];
2096 dst->mvd_table[1] = src->mvd_table[1];
2097 dst->direct_table = src->direct_table;
2098
afebe2f7
2099 dst->s.obmc_scratchpad = NULL;
2100 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
2101}
2102
2103/**
2104 * Init context
2105 * Allocate buffers which are not shared amongst multiple threads.
2106 */
2107static int context_init(H264Context *h){
afebe2f7
2108 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2109 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2110
afebe2f7
2111 return 0;
2112fail:
2113 return -1; // free_tables will clean up for us
2114}
2115
98a6fff9 2116static av_cold void common_init(H264Context *h){
0da71265 2117 MpegEncContext * const s = &h->s;
0da71265
MN
2118
2119 s->width = s->avctx->width;
2120 s->height = s->avctx->height;
2121 s->codec_id= s->avctx->codec->id;
115329f1 2122
c92a30bb 2123 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 2124
239ea04c 2125 h->dequant_coeff_pps= -1;
9a41c2c7 2126 s->unrestricted_mv=1;
0da71265 2127 s->decode=1; //FIXME
56edbd81 2128
a5805aa9
MN
2129 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2130
56edbd81
LM
2131 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2132 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
2133}
2134
98a6fff9 2135static av_cold int decode_init(AVCodecContext *avctx){
0da71265
MN
2136 H264Context *h= avctx->priv_data;
2137 MpegEncContext * const s = &h->s;
2138
3edcacde 2139 MPV_decode_defaults(s);
115329f1 2140
0da71265
MN
2141 s->avctx = avctx;
2142 common_init(h);
2143
2144 s->out_format = FMT_H264;
2145 s->workaround_bugs= avctx->workaround_bugs;
2146
2147 // set defaults
0da71265 2148// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 2149 s->quarter_sample = 1;
0da71265 2150 s->low_delay= 1;
7a9dba3c
MN
2151
2152 if(avctx->codec_id == CODEC_ID_SVQ3)
2153 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2154 else
1d42f410 2155 avctx->pix_fmt= PIX_FMT_YUV420P;
0da71265 2156
c2212338 2157 decode_init_vlc();
115329f1 2158
26165f99
MR
2159 if(avctx->extradata_size > 0 && avctx->extradata &&
2160 *(char *)avctx->extradata == 1){
4770b1b4
RT
2161 h->is_avc = 1;
2162 h->got_avcC = 0;
26165f99
MR
2163 } else {
2164 h->is_avc = 0;
4770b1b4
RT
2165 }
2166
afebe2f7 2167 h->thread_context[0] = h;
18c7be65 2168 h->outputed_poc = INT_MIN;
e4b8f1fa 2169 h->prev_poc_msb= 1<<16;
0da71265
MN
2170 return 0;
2171}
2172
af8aa846 2173static int frame_start(H264Context *h){
0da71265
MN
2174 MpegEncContext * const s = &h->s;
2175 int i;
2176
af8aa846
MN
2177 if(MPV_frame_start(s, s->avctx) < 0)
2178 return -1;
0da71265 2179 ff_er_frame_start(s);
3a22d7fa
JD
2180 /*
2181 * MPV_frame_start uses pict_type to derive key_frame.
2182 * This is incorrect for H.264; IDR markings must be used.
1412060e 2183 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
2184 * See decode_nal_units().
2185 */
2186 s->current_picture_ptr->key_frame= 0;
0da71265
MN
2187
2188 assert(s->linesize && s->uvlinesize);
2189
2190 for(i=0; i<16; i++){
2191 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 2192 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2193 }
2194 for(i=0; i<4; i++){
2195 h->block_offset[16+i]=
2196 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
2197 h->block_offset[24+16+i]=
2198 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2199 }
2200
934b0821
LM
2201 /* can't be in alloc_tables because linesize isn't known there.
2202 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
2203 for(i = 0; i < s->avctx->thread_count; i++)
2204 if(!h->thread_context[i]->s.obmc_scratchpad)
2205 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
2206
2207 /* some macroblocks will be accessed before they're available */
afebe2f7 2208 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 2209 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 2210
0da71265 2211// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 2212
1412060e 2213 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
2214 // that if we break out due to an error it can be released automatically
2215 // in the next MPV_frame_start().
2216 // SVQ3 as well as most other codecs have only last/next/current and thus
2217 // get released even with set reference, besides SVQ3 and others do not
2218 // mark frames as reference later "naturally".
2219 if(s->codec_id != CODEC_ID_SVQ3)
2220 s->current_picture_ptr->reference= 0;
357282c6
MN
2221
2222 s->current_picture_ptr->field_poc[0]=
2223 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 2224 assert(s->current_picture_ptr->long_ref==0);
357282c6 2225
af8aa846 2226 return 0;
0da71265
MN
2227}
2228
93cc10fa 2229static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e
MN
2230 MpegEncContext * const s = &h->s;
2231 int i;
5f7f9719
MN
2232 int step = 1;
2233 int offset = 1;
2234 int uvoffset= 1;
2235 int top_idx = 1;
2236 int skiplast= 0;
115329f1 2237
53c05b1e
MN
2238 src_y -= linesize;
2239 src_cb -= uvlinesize;
2240 src_cr -= uvlinesize;
2241
5f7f9719
MN
2242 if(!simple && FRAME_MBAFF){
2243 if(s->mb_y&1){
2244 offset = MB_MBAFF ? 1 : 17;
2245 uvoffset= MB_MBAFF ? 1 : 9;
2246 if(!MB_MBAFF){
2247 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2248 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2249 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2250 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2251 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2252 }
2253 }
2254 }else{
2255 if(!MB_MBAFF){
2256 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2257 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2258 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2259 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2260 }
2261 skiplast= 1;
2262 }
2263 offset =
2264 uvoffset=
2265 top_idx = MB_MBAFF ? 0 : 1;
2266 }
2267 step= MB_MBAFF ? 2 : 1;
2268 }
2269
3b66c4c5 2270 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 2271 // and the line above the bottom macroblock
5f7f9719
MN
2272 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2273 for(i=1; i<17 - skiplast; i++){
2274 h->left_border[offset+i*step]= src_y[15+i* linesize];
53c05b1e 2275 }
115329f1 2276
5f7f9719
MN
2277 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2278 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
53c05b1e 2279
87352549 2280 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2281 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2282 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2283 for(i=1; i<9 - skiplast; i++){
2284 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2285 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
53c05b1e 2286 }
5f7f9719
MN
2287 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2288 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
53c05b1e
MN
2289 }
2290}
2291
93cc10fa 2292static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
2293 MpegEncContext * const s = &h->s;
2294 int temp8, i;
2295 uint64_t temp64;
b69378e2
2296 int deblock_left;
2297 int deblock_top;
2298 int mb_xy;
5f7f9719
MN
2299 int step = 1;
2300 int offset = 1;
2301 int uvoffset= 1;
2302 int top_idx = 1;
2303
2304 if(!simple && FRAME_MBAFF){
2305 if(s->mb_y&1){
2306 offset = MB_MBAFF ? 1 : 17;
2307 uvoffset= MB_MBAFF ? 1 : 9;
2308 }else{
2309 offset =
2310 uvoffset=
2311 top_idx = MB_MBAFF ? 0 : 1;
2312 }
2313 step= MB_MBAFF ? 2 : 1;
2314 }
b69378e2
2315
2316 if(h->deblocking_filter == 2) {
64514ee8 2317 mb_xy = h->mb_xy;
b69378e2
2318 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2319 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2320 } else {
2321 deblock_left = (s->mb_x > 0);
6c805007 2322 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 2323 }
53c05b1e
MN
2324
2325 src_y -= linesize + 1;
2326 src_cb -= uvlinesize + 1;
2327 src_cr -= uvlinesize + 1;
2328
2329#define XCHG(a,b,t,xchg)\
2330t= a;\
2331if(xchg)\
2332 a= b;\
2333b= t;
d89dc06a
LM
2334
2335 if(deblock_left){
5f7f9719
MN
2336 for(i = !deblock_top; i<16; i++){
2337 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
d89dc06a 2338 }
5f7f9719 2339 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
d89dc06a
LM
2340 }
2341
2342 if(deblock_top){
5f7f9719
MN
2343 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2344 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
cad4368a 2345 if(s->mb_x+1 < s->mb_width){
5f7f9719 2346 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
43efd19a 2347 }
53c05b1e 2348 }
53c05b1e 2349
87352549 2350 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 2351 if(deblock_left){
5f7f9719
MN
2352 for(i = !deblock_top; i<8; i++){
2353 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2354 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
d89dc06a 2355 }
5f7f9719
MN
2356 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2357 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
d89dc06a
LM
2358 }
2359 if(deblock_top){
5f7f9719
MN
2360 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2361 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
53c05b1e 2362 }
53c05b1e
MN
2363 }
2364}
2365
5a6a6cc7 2366static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
2367 MpegEncContext * const s = &h->s;
2368 const int mb_x= s->mb_x;
2369 const int mb_y= s->mb_y;
64514ee8 2370 const int mb_xy= h->mb_xy;
0da71265
MN
2371 const int mb_type= s->current_picture.mb_type[mb_xy];
2372 uint8_t *dest_y, *dest_cb, *dest_cr;
2373 int linesize, uvlinesize /*dct_offset*/;
2374 int i;
6867a90b 2375 int *block_offset = &h->block_offset[0];
41e4055b
MN
2376 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2377 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
36940eca 2378 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 2379 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 2380
6120a343
MN
2381 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2382 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2383 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 2384
a957c27b
LM
2385 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2386 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2387
bd91fee3 2388 if (!simple && MB_FIELD) {
5d18eaad
LM
2389 linesize = h->mb_linesize = s->linesize * 2;
2390 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 2391 block_offset = &h->block_offset[24];
1412060e 2392 if(mb_y&1){ //FIXME move out of this function?
0da71265 2393 dest_y -= s->linesize*15;
6867a90b
LLL
2394 dest_cb-= s->uvlinesize*7;
2395 dest_cr-= s->uvlinesize*7;
0da71265 2396 }
5d18eaad
LM
2397 if(FRAME_MBAFF) {
2398 int list;
3425501d 2399 for(list=0; list<h->list_count; list++){
5d18eaad
LM
2400 if(!USES_LIST(mb_type, list))
2401 continue;
2402 if(IS_16X16(mb_type)){
2403 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 2404 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
2405 }else{
2406 for(i=0; i<16; i+=4){
5d18eaad
LM
2407 int ref = h->ref_cache[list][scan8[i]];
2408 if(ref >= 0)
1710856c 2409 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
2410 }
2411 }
2412 }
2413 }
0da71265 2414 } else {
5d18eaad
LM
2415 linesize = h->mb_linesize = s->linesize;
2416 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
2417// dct_offset = s->linesize * 16;
2418 }
115329f1 2419
bd91fee3 2420 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
2421 for (i=0; i<16; i++) {
2422 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 2423 }
c1708e8d
MN
2424 for (i=0; i<8; i++) {
2425 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2426 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 2427 }
e7e09b49
LLL
2428 } else {
2429 if(IS_INTRA(mb_type)){
5f7f9719 2430 if(h->deblocking_filter)
93cc10fa 2431 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 2432
87352549 2433 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
2434 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2435 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 2436 }
0da71265 2437
e7e09b49 2438 if(IS_INTRA4x4(mb_type)){
bd91fee3 2439 if(simple || !s->encoding){
43efd19a 2440 if(IS_8x8DCT(mb_type)){
1eb96035
MN
2441 if(transform_bypass){
2442 idct_dc_add =
2443 idct_add = s->dsp.add_pixels8;
dae006d7 2444 }else{
1eb96035
MN
2445 idct_dc_add = s->dsp.h264_idct8_dc_add;
2446 idct_add = s->dsp.h264_idct8_add;
2447 }
43efd19a
LM
2448 for(i=0; i<16; i+=4){
2449 uint8_t * const ptr= dest_y + block_offset[i];
2450 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
2451 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2452 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2453 }else{
ac0623b2
MN
2454 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2455 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2456 (h->topright_samples_available<<i)&0x4000, linesize);
2457 if(nnz){
2458 if(nnz == 1 && h->mb[i*16])
2459 idct_dc_add(ptr, h->mb + i*16, linesize);
2460 else
2461 idct_add (ptr, h->mb + i*16, linesize);
2462 }
41e4055b 2463 }
43efd19a 2464 }
1eb96035
MN
2465 }else{
2466 if(transform_bypass){
2467 idct_dc_add =
2468 idct_add = s->dsp.add_pixels4;
2469 }else{
2470 idct_dc_add = s->dsp.h264_idct_dc_add;
2471 idct_add = s->dsp.h264_idct_add;
2472 }
aebb5d6d
MN
2473 for(i=0; i<16; i++){
2474 uint8_t * const ptr= dest_y + block_offset[i];
2475 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 2476
aebb5d6d
MN
2477 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2478 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2479 }else{
2480 uint8_t *topright;
2481 int nnz, tr;
2482 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2483 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2484 assert(mb_y || linesize <= block_offset[i]);
2485 if(!topright_avail){
2486 tr= ptr[3 - linesize]*0x01010101;
2487 topright= (uint8_t*) &tr;
2488 }else
2489 topright= ptr + 4 - linesize;
ac0623b2 2490 }else
aebb5d6d
MN
2491 topright= NULL;
2492
2493 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2494 nnz = h->non_zero_count_cache[ scan8[i] ];
2495 if(nnz){
2496 if(is_h264){
2497 if(nnz == 1 && h->mb[i*16])
2498 idct_dc_add(ptr, h->mb + i*16, linesize);
2499 else
2500 idct_add (ptr, h->mb + i*16, linesize);
2501 }else
2502 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2503 }
ac0623b2 2504 }
41e4055b 2505 }
8b82a956 2506 }
0da71265 2507 }
e7e09b49 2508 }else{
c92a30bb 2509 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 2510 if(is_h264){
36940eca 2511 if(!transform_bypass)
93f0c0a4 2512 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 2513 }else
e7e09b49 2514 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 2515 }
5f7f9719 2516 if(h->deblocking_filter)
93cc10fa 2517 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 2518 }else if(is_h264){
e7e09b49 2519 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
2520 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2521 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 2522 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 2523 }
e7e09b49
LLL
2524
2525
2526 if(!IS_INTRA4x4(mb_type)){
bd91fee3 2527 if(is_h264){
ef9d1d15 2528 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
2529 if(transform_bypass){
2530 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
2531 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2532 }else{
2533 for(i=0; i<16; i++){
2534 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 2535 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2536 }
2fd1f0e0
MN
2537 }
2538 }else{
2539 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 2540 }
49c084a7 2541 }else if(h->cbp&15){
2fd1f0e0 2542 if(transform_bypass){
0a8ca22f 2543 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 2544 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 2545 for(i=0; i<16; i+=di){
62bc966f 2546 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 2547 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2548 }
ef9d1d15 2549 }
2fd1f0e0
MN
2550 }else{
2551 if(IS_8x8DCT(mb_type)){
2552 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2553 }else{
2554 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2555 }
2556 }
4704097a 2557 }
e7e09b49
LLL
2558 }else{
2559 for(i=0; i<16; i++){
2560 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 2561 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2562 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2563 }
4704097a 2564 }
0da71265
MN
2565 }
2566 }
0da71265 2567
621561cd 2568 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
2569 uint8_t *dest[2] = {dest_cb, dest_cr};
2570 if(transform_bypass){
96465b90
MN
2571 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2572 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2573 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2574 }else{
c25ac15a 2575 idct_add = s->dsp.add_pixels4;
96465b90
MN
2576 for(i=16; i<16+8; i++){
2577 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2578 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2579 }
2580 }
ef9d1d15 2581 }else{
4691a77d
2582 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2583 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 2584 if(is_h264){
c25ac15a
MN
2585 idct_add = s->dsp.h264_idct_add;
2586 idct_dc_add = s->dsp.h264_idct_dc_add;
ac0623b2
MN
2587 for(i=16; i<16+8; i++){
2588 if(h->non_zero_count_cache[ scan8[i] ])
2589 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2590 else if(h->mb[i*16])
2591 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2592 }
aebb5d6d
MN
2593 }else{
2594 for(i=16; i<16+8; i++){
2595 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2596 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2597 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2598 }
e7e09b49 2599 }
4704097a 2600 }
0da71265
MN
2601 }
2602 }
2603 }
c212fb0c
MN
2604 if(h->cbp || IS_INTRA(mb_type))
2605 s->dsp.clear_blocks(h->mb);
2606
53c05b1e 2607 if(h->deblocking_filter) {
5f7f9719
MN
2608 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2609 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2610 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2611 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
bd91fee3 2612 if (!simple && FRAME_MBAFF) {
5f7f9719 2613 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2614 } else {
3e20143e 2615 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2616 }
53c05b1e 2617 }
0da71265
MN
2618}
2619
0da71265 2620/**
bd91fee3
AS
2621 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2622 */
2623static void hl_decode_mb_simple(H264Context *h){
2624 hl_decode_mb_internal(h, 1);
2625}
2626
2627/**
2628 * Process a macroblock; this handles edge cases, such as interlacing.
2629 */
2630static void av_noinline hl_decode_mb_complex(H264Context *h){
2631 hl_decode_mb_internal(h, 0);
2632}
2633
2634static void hl_decode_mb(H264Context *h){
2635 MpegEncContext * const s = &h->s;
64514ee8 2636 const int mb_xy= h->mb_xy;
bd91fee3 2637 const int mb_type= s->current_picture.mb_type[mb_xy];
1dd488e9 2638 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 2639
fedec603 2640 if(ENABLE_H264_ENCODER && !s->decode)
bd91fee3
AS
2641 return;
2642
2643 if (is_complex)
2644 hl_decode_mb_complex(h);
2645 else hl_decode_mb_simple(h);
2646}
2647
2143b118 2648static void pic_as_field(Picture *pic, const int parity){
11cc1d8c
JD
2649 int i;
2650 for (i = 0; i < 4; ++i) {
2143b118 2651 if (parity == PICT_BOTTOM_FIELD)
11cc1d8c 2652 pic->data[i] += pic->linesize[i];
2143b118 2653 pic->reference = parity;
11cc1d8c
JD
2654 pic->linesize[i] *= 2;
2655 }
2879c75f 2656 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
11cc1d8c
JD
2657}
2658
2659static int split_field_copy(Picture *dest, Picture *src,
2660 int parity, int id_add){
2661 int match = !!(src->reference & parity);
2662
2663 if (match) {
2664 *dest = *src;
d4f7d838 2665 if(parity != PICT_FRAME){
b3e93fd4
MN
2666 pic_as_field(dest, parity);
2667 dest->pic_id *= 2;
2668 dest->pic_id += id_add;
d4f7d838 2669 }
11cc1d8c
JD
2670 }
2671
2672 return match;
2673}
2674
d4f7d838
MN
2675static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2676 int i[2]={0};
2677 int index=0;
11cc1d8c 2678
d4f7d838
MN
2679 while(i[0]<len || i[1]<len){
2680 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2681 i[0]++;
2682 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2683 i[1]++;
2684 if(i[0] < len){
2685 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2686 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2687 }
2688 if(i[1] < len){
2689 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2690 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
11cc1d8c
JD
2691 }
2692 }
2693
d4f7d838 2694 return index;
11cc1d8c
JD
2695}
2696
d4f7d838
MN
2697static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2698 int i, best_poc;
2699 int out_i= 0;
11cc1d8c 2700
d4f7d838
MN
2701 for(;;){
2702 best_poc= dir ? INT_MIN : INT_MAX;
11cc1d8c 2703
d4f7d838
MN
2704 for(i=0; i<len; i++){
2705 const int poc= src[i]->poc;
2706 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2707 best_poc= poc;
2708 sorted[out_i]= src[i];
2709 }
2710 }
2711 if(best_poc == (dir ? INT_MIN : INT_MAX))
2712 break;
2713 limit= sorted[out_i++]->poc - dir;
2714 }
2715 return out_i;
11cc1d8c
JD
2716}
2717
bd91fee3 2718/**
0da71265
MN
2719 * fills the default_ref_list.
2720 */
2721static int fill_default_ref_list(H264Context *h){
2722 MpegEncContext * const s = &h->s;
d4f7d838 2723 int i, len;
115329f1 2724
9f5c1037 2725 if(h->slice_type_nos==FF_B_TYPE){
d4f7d838
MN
2726 Picture *sorted[32];
2727 int cur_poc, list;
2728 int lens[2];
11cc1d8c 2729
d4f7d838
MN
2730 if(FIELD_PICTURE)
2731 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2732 else
2733 cur_poc= s->current_picture_ptr->poc;
086acdd5 2734
d4f7d838
MN
2735 for(list= 0; list<2; list++){
2736 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2737 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2738 assert(len<=32);
2739 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2740 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2741 assert(len<=32);
086acdd5 2742
d4f7d838
MN
2743 if(len < h->ref_count[list])
2744 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2745 lens[list]= len;
086acdd5
JD
2746 }
2747
d4f7d838
MN
2748 if(lens[0] == lens[1] && lens[1] > 1){
2749 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2750 if(i == lens[0])
2751 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
086acdd5 2752 }
086acdd5 2753 }else{
d4f7d838
MN
2754 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2755 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2756 assert(len <= 32);
2757 if(len < h->ref_count[0])
2758 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
0da71265 2759 }
827c91bf
LLL
2760#ifdef TRACE
2761 for (i=0; i<h->ref_count[0]; i++) {
a9c9a240 2762 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
827c91bf 2763 }
9f5c1037 2764 if(h->slice_type_nos==FF_B_TYPE){
827c91bf 2765 for (i=0; i<h->ref_count[1]; i++) {
ffbc5e04 2766 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
827c91bf
LLL
2767 }
2768 }
2769#endif
0da71265
MN
2770 return 0;
2771}
2772
827c91bf
LLL
2773static void print_short_term(H264Context *h);
2774static void print_long_term(H264Context *h);
2775
949da388
JD
2776/**
2777 * Extract structure information about the picture described by pic_num in
2778 * the current decoding context (frame or field). Note that pic_num is
2779 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2780 * @param pic_num picture number for which to extract structure information
2781 * @param structure one of PICT_XXX describing structure of picture
2782 * with pic_num
2783 * @return frame number (short term) or long term index of picture
2784 * described by pic_num
2785 */
2786static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2787 MpegEncContext * const s = &h->s;
2788
2789 *structure = s->picture_structure;
2790 if(FIELD_PICTURE){
2791 if (!(pic_num & 1))
2792 /* opposite field */
2793 *structure ^= PICT_FRAME;
2794 pic_num >>= 1;
2795 }
2796
2797 return pic_num;
2798}
2799
0da71265
MN
2800static int decode_ref_pic_list_reordering(H264Context *h){
2801 MpegEncContext * const s = &h->s;
949da388 2802 int list, index, pic_structure;
115329f1 2803
827c91bf
LLL
2804 print_short_term(h);
2805 print_long_term(h);
115329f1 2806
3425501d 2807 for(list=0; list<h->list_count; list++){
0da71265
MN
2808 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2809
2810 if(get_bits1(&s->gb)){
2811 int pred= h->curr_pic_num;
0da71265
MN
2812
2813 for(index=0; ; index++){
88e7a4d1
MN
2814 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2815 unsigned int pic_id;
0da71265 2816 int i;
2f944356 2817 Picture *ref = NULL;
115329f1
DB
2818
2819 if(reordering_of_pic_nums_idc==3)
0bc42cad 2820 break;
115329f1 2821
0da71265 2822 if(index >= h->ref_count[list]){
9b879566 2823 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
0da71265
MN
2824 return -1;
2825 }
115329f1 2826
0da71265
MN
2827 if(reordering_of_pic_nums_idc<3){
2828 if(reordering_of_pic_nums_idc<2){
88e7a4d1 2829 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
949da388 2830 int frame_num;
0da71265 2831
03d3cab8 2832 if(abs_diff_pic_num > h->max_pic_num){
9b879566 2833 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
0da71265
MN
2834 return -1;
2835 }
2836
2837 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2838 else pred+= abs_diff_pic_num;
2839 pred &= h->max_pic_num - 1;
115329f1 2840
949da388
JD
2841 frame_num = pic_num_extract(h, pred, &pic_structure);
2842
0d175622
MN
2843 for(i= h->short_ref_count-1; i>=0; i--){
2844 ref = h->short_ref[i];
949da388 2845 assert(ref->reference);
0d175622 2846 assert(!ref->long_ref);
6edac8e1 2847 if(
af8c5e08
MN
2848 ref->frame_num == frame_num &&
2849 (ref->reference & pic_structure)
6edac8e1 2850 )
0da71265
MN
2851 break;
2852 }
0d175622 2853 if(i>=0)
949da388 2854 ref->pic_id= pred;
0da71265 2855 }else{
949da388 2856 int long_idx;
0da71265 2857 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
949da388
JD
2858
2859 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2860
2861 if(long_idx>31){
88e7a4d1
MN
2862 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2863 return -1;
2864 }
949da388
JD
2865 ref = h->long_ref[long_idx];
2866 assert(!(ref && !ref->reference));
af8c5e08 2867 if(ref && (ref->reference & pic_structure)){
ac658be5 2868 ref->pic_id= pic_id;
ac658be5
FOL
2869 assert(ref->long_ref);
2870 i=0;
2871 }else{
2872 i=-1;
2873 }
0da71265
MN
2874 }
2875
0d315f28 2876 if (i < 0) {
9b879566 2877 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
0da71265 2878 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
0d175622
MN
2879 } else {
2880 for(i=index; i+1<h->ref_count[list]; i++){
2881 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2882 break;
21be92bf
MN
2883 }
2884 for(; i > index; i--){
2885 h->ref_list[list][i]= h->ref_list[list][i-1];
2886 }
0d175622 2887 h->ref_list[list][index]= *ref;
949da388 2888 if (FIELD_PICTURE){
2143b118 2889 pic_as_field(&h->ref_list[list][index], pic_structure);
949da388 2890 }
0da71265 2891 }
0bc42cad 2892 }else{
9b879566 2893 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
0da71265
MN
2894 return -1;
2895 }
2896 }
2897 }
0da71265 2898 }
3425501d 2899 for(list=0; list<h->list_count; list++){
6ab87211 2900 for(index= 0; index < h->ref_count[list]; index++){
79b5c776
MN
2901 if(!h->ref_list[list][index].data[0]){
2902 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2903 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2904 }
6ab87211 2905 }
6ab87211 2906 }
115329f1 2907
115329f1 2908 return 0;
0da71265
MN
2909}
2910
91c58c94 2911static void fill_mbaff_ref_list(H264Context *h){
5d18eaad 2912 int list, i, j;
3425501d 2913 for(list=0; list<2; list++){ //FIXME try list_count
5d18eaad
LM
2914 for(i=0; i<h->ref_count[list]; i++){
2915 Picture *frame = &h->ref_list[list][i];
2916 Picture *field = &h->ref_list[list][16+2*i];
2917 field[0] = *frame;
2918 for(j=0; j<3; j++)
2919 field[0].linesize[j] <<= 1;
2143b118 2920 field[0].reference = PICT_TOP_FIELD;
078f42dd 2921 field[0].poc= field[0].field_poc[0];
5d18eaad
LM
2922 field[1] = field[0];
2923 for(j=0; j<3; j++)
2924 field[1].data[j] += frame->linesize[j];
2143b118 2925 field[1].reference = PICT_BOTTOM_FIELD;
078f42dd 2926 field[1].poc= field[1].field_poc[1];
5d18eaad
LM
2927
2928 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2929 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2930 for(j=0; j<2; j++){
2931 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2932 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2933 }
2934 }
2935 }
2936 for(j=0; j<h->ref_count[1]; j++){
2937 for(i=0; i<h->ref_count[0]; i++)
2938 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2939 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2940 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2941 }
2942}
2943
0da71265
MN
2944static int pred_weight_table(H264Context *h){
2945 MpegEncContext * const s = &h->s;
2946 int list, i;
9f2d1b4f 2947 int luma_def, chroma_def;
115329f1 2948
9f2d1b4f
LM
2949 h->use_weight= 0;
2950 h->use_weight_chroma= 0;
0da71265
MN
2951 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2952 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
2953 luma_def = 1<<h->luma_log2_weight_denom;
2954 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
2955
2956 for(list=0; list<2; list++){
2957 for(i=0; i<h->ref_count[list]; i++){
2958 int luma_weight_flag, chroma_weight_flag;
115329f1 2959
0da71265
MN
2960 luma_weight_flag= get_bits1(&s->gb);
2961 if(luma_weight_flag){
2962 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2963 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f
LM
2964 if( h->luma_weight[list][i] != luma_def
2965 || h->luma_offset[list][i] != 0)
2966 h->use_weight= 1;
2967 }else{
2968 h->luma_weight[list][i]= luma_def;
2969 h->luma_offset[list][i]= 0;
0da71265
MN
2970 }
2971
0af6967e 2972 if(CHROMA){
fef744d4
MN
2973 chroma_weight_flag= get_bits1(&s->gb);
2974 if(chroma_weight_flag){
2975 int j;
2976 for(j=0; j<2; j++){
2977 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2978 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2979 if( h->chroma_weight[list][i][j] != chroma_def
2980 || h->chroma_offset[list][i][j] != 0)
2981 h->use_weight_chroma= 1;
2982 }
2983 }else{
2984 int j;
2985 for(j=0; j<2; j++){
2986 h->chroma_weight[list][i][j]= chroma_def;
2987 h->chroma_offset[list][i][j]= 0;
2988 }
0da71265
MN
2989 }
2990 }
2991 }
9f5c1037 2992 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 2993 }
9f2d1b4f 2994 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
2995 return 0;
2996}
2997
9f2d1b4f
LM
2998static void implicit_weight_table(H264Context *h){
2999 MpegEncContext * const s = &h->s;
9f2d1b4f
LM
3000 int ref0, ref1;
3001 int cur_poc = s->current_picture_ptr->poc;
3002
3003 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3004 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3005 h->use_weight= 0;
3006 h->use_weight_chroma= 0;
3007 return;
3008 }
3009
3010 h->use_weight= 2;
3011 h->use_weight_chroma= 2;
3012 h->luma_log2_weight_denom= 5;
3013 h->chroma_log2_weight_denom= 5;
3014
9f2d1b4f
LM
3015 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3016 int poc0 = h->ref_list[0][ref0].poc;
3017 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 3018 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 3019 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 3020 if(td){
f66e4f5f 3021 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 3022 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 3023 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
3024 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3025 h->implicit_weight[ref0][ref1] = 32;
3026 else
3027 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3028 }else
3029 h->implicit_weight[ref0][ref1] = 32;
3030 }
3031 }
3032}
3033
8fd57a66
JD
3034/**
3035 * Mark a picture as no longer needed for reference. The refmask
3036 * argument allows unreferencing of individual fields or the whole frame.
3037 * If the picture becomes entirely unreferenced, but is being held for
3038 * display purposes, it is marked as such.
3039 * @param refmask mask of fields to unreference; the mask is bitwise
3040 * anded with the reference marking of pic
3041 * @return non-zero if pic becomes entirely unreferenced (except possibly
3042 * for display purposes) zero if one of the fields remains in
3043 * reference
3044 */
3045static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
4e4d983e 3046 int i;
8fd57a66
JD
3047 if (pic->reference &= refmask) {
3048 return 0;
3049 } else {
79f4494a
MN
3050 for(i = 0; h->delayed_pic[i]; i++)
3051 if(pic == h->delayed_pic[i]){
3052 pic->reference=DELAYED_PIC_REF;
3053 break;
3054 }
8fd57a66
JD
3055 return 1;
3056 }
4e4d983e
LM
3057}
3058
0da71265 3059/**
5175b937 3060 * instantaneous decoder refresh.
0da71265
MN
3061 */
3062static void idr(H264Context *h){
4e4d983e 3063 int i;
0da71265 3064
dc032f33 3065 for(i=0; i<16; i++){
9c0e4624 3066 remove_long(h, i, 0);
0da71265 3067 }
849b9cef 3068 assert(h->long_ref_count==0);
0da71265
MN
3069
3070 for(i=0; i<h->short_ref_count; i++){
8fd57a66 3071 unreference_pic(h, h->short_ref[i], 0);
0da71265
MN
3072 h->short_ref[i]= NULL;
3073 }
3074 h->short_ref_count=0;
a149c1a5 3075 h->prev_frame_num= 0;
80f8e035
MN
3076 h->prev_frame_num_offset= 0;
3077 h->prev_poc_msb=
3078 h->prev_poc_lsb= 0;
0da71265
MN
3079}
3080
7c33ad19
LM
3081/* forget old pics after a seek */
3082static void flush_dpb(AVCodecContext *avctx){
3083 H264Context *h= avctx->priv_data;
3084 int i;
64b9d48f 3085 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
3086 if(h->delayed_pic[i])
3087 h->delayed_pic[i]->reference= 0;
7c33ad19 3088 h->delayed_pic[i]= NULL;
285b570f 3089 }
df8a7dff 3090 h->outputed_poc= INT_MIN;
7c33ad19 3091 idr(h);
ca159196
MR
3092 if(h->s.current_picture_ptr)
3093 h->s.current_picture_ptr->reference= 0;
12d96de3 3094 h->s.first_field= 0;
e240f898 3095 ff_mpeg_flush(avctx);
7c33ad19
LM
3096}
3097
0da71265 3098/**
47e112f8
JD
3099 * Find a Picture in the short term reference list by frame number.
3100 * @param frame_num frame number to search for
3101 * @param idx the index into h->short_ref where returned picture is found
3102 * undefined if no picture found.
3103 * @return pointer to the found picture, or NULL if no pic with the provided
3104 * frame number is found
0da71265 3105 */
47e112f8 3106static Picture * find_short(H264Context *h, int frame_num, int *idx){
1924f3ce 3107 MpegEncContext * const s = &h->s;
0da71265 3108 int i;
115329f1 3109
0da71265
MN
3110 for(i=0; i<h->short_ref_count; i++){
3111 Picture *pic= h->short_ref[i];
1924f3ce 3112 if(s->avctx->debug&FF_DEBUG_MMCO)
9b879566 3113 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
47e112f8
JD
3114 if(pic->frame_num == frame_num) {
3115 *idx = i;
0da71265
MN
3116 return pic;
3117 }
3118 }
3119 return NULL;
3120}
3121
3122/**
47e112f8
JD
3123 * Remove a picture from the short term reference list by its index in
3124 * that list. This does no checking on the provided index; it is assumed
3125 * to be valid. Other list entries are shifted down.
3126 * @param i index into h->short_ref of picture to remove.
3127 */
3128static void remove_short_at_index(H264Context *h, int i){
e1f15d38 3129 assert(i >= 0 && i < h->short_ref_count);
47e112f8
JD
3130 h->short_ref[i]= NULL;
3131 if (--h->short_ref_count)
3132 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3133}
3134
3135/**
3136 *
3137 * @return the removed picture or NULL if an error occurs
3138 */
d9e32422 3139static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
47e112f8
JD
3140 MpegEncContext * const s = &h->s;
3141 Picture *pic;
3142 int i;
3143
3144 if(s->avctx->debug&FF_DEBUG_MMCO)
3145 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3146
3147 pic = find_short(h, frame_num, &i);
d9e32422
MN
3148 if (pic){
3149 if(unreference_pic(h, pic, ref_mask))
47e112f8 3150 remove_short_at_index(h, i);
d9e32422 3151 }
47e112f8
JD
3152
3153 return pic;
3154}
3155
3156/**
24231e4c 3157 * Remove a picture from the long term reference list by its index in
1cea5d0d 3158 * that list.
3b66c4c5 3159 * @return the removed picture or NULL if an error occurs
0da71265 3160 */
9c0e4624 3161static Picture * remove_long(H264Context *h, int i, int ref_mask){
0da71265
MN
3162 Picture *pic;
3163
0da71265 3164 pic= h->long_ref[i];
1cea5d0d 3165 if (pic){
9c0e4624
MN
3166 if(unreference_pic(h, pic, ref_mask)){
3167 assert(h->long_ref[i]->long_ref == 1);
3168 h->long_ref[i]->long_ref= 0;
3169 h->long_ref[i]= NULL;
3170 h->long_ref_count--;
3171 }
1cea5d0d 3172 }
0da71265
MN
3173
3174 return pic;
3175}
3176