Only warn about "Invalid and inefficient vfw-avi packed B frames" once.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265
MN
22/**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
0da71265
MN
28#include "dsputil.h"
29#include "avcodec.h"
30#include "mpegvideo.h"
26b4fe82 31#include "h264.h"
0da71265 32#include "h264data.h"
26b4fe82 33#include "h264_parser.h"
0da71265 34#include "golomb.h"
626464fb 35#include "rectangle.h"
0da71265 36
e5017ab8 37#include "cabac.h"
52cb7981
JD
38#ifdef ARCH_X86
39#include "i386/h264_i386.h"
40#endif
e5017ab8 41
2848ce84 42//#undef NDEBUG
0da71265
MN
43#include <assert.h>
44
2ddcf84b
JD
45/**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
48 */
49#define DELAYED_PIC_REF 4
50
0da71265 51static VLC coeff_token_vlc[4];
910e3668
AC
52static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
54
0da71265 55static VLC chroma_dc_coeff_token_vlc;
910e3668
AC
56static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57static const int chroma_dc_coeff_token_vlc_table_size = 256;
0da71265
MN
58
59static VLC total_zeros_vlc[15];
910e3668
AC
60static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61static const int total_zeros_vlc_tables_size = 512;
62
0da71265 63static VLC chroma_dc_total_zeros_vlc[3];
910e3668
AC
64static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65static const int chroma_dc_total_zeros_vlc_tables_size = 8;
0da71265
MN
66
67static VLC run_vlc[6];
910e3668
AC
68static VLC_TYPE run_vlc_tables[6][8][2];
69static const int run_vlc_tables_size = 8;
70
0da71265 71static VLC run7_vlc;
910e3668
AC
72static VLC_TYPE run7_vlc_table[96][2];
73static const int run7_vlc_table_size = 96;
0da71265 74
8b82a956
MN
75static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
6ba71fc4 77static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
3e20143e 78static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
9c0e4624 79static Picture * remove_long(H264Context *h, int i, int ref_mask);
8b82a956 80
849f1035 81static av_always_inline uint32_t pack16to32(int a, int b){
377ec888
MN
82#ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
84#else
85 return (a&0xFFFF) + (b<<16);
86#endif
87}
88
d9ec210b 89static const uint8_t rem6[52]={
acd8d10f
PI
900, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
91};
92
d9ec210b 93static const uint8_t div6[52]={
acd8d10f
PI
940, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
95};
96
143d7f14
PK
97static const int left_block_options[4][8]={
98 {0,1,2,3,7,10,8,11},
99 {2,2,3,3,8,11,8,11},
100 {0,0,1,1,7,10,7,10},
101 {0,2,0,2,7,10,7,10}
102};
acd8d10f 103
70abb407 104static void fill_caches(H264Context *h, int mb_type, int for_deblock){
0da71265 105 MpegEncContext * const s = &h->s;
64514ee8 106 const int mb_xy= h->mb_xy;
0da71265
MN
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
143d7f14 109 int * left_block;
02f7695b 110 int topleft_partition= -1;
0da71265
MN
111 int i;
112
36e097bc
JD
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
114
717b1733 115 //FIXME deblocking could skip the intra and nnz parts.
36e097bc 116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
e2e5894a
LM
117 return;
118
2cab6401
DB
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
115329f1 121
6867a90b
LLL
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
143d7f14 125 left_block = left_block_options[0];
5d18eaad 126 if(FRAME_MBAFF){
6867a90b
LLL
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
a9c9a240 137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
6867a90b
LLL
138 if (bottom
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
141 ) {
142 top_xy -= s->mb_stride;
143 }
144 if (bottom
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
147 ) {
148 topleft_xy -= s->mb_stride;
02f7695b
LM
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
1412060e 151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
02f7695b 152 topleft_partition = 0;
6867a90b
LLL
153 }
154 if (bottom
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
157 ) {
158 topright_xy -= s->mb_stride;
159 }
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
163 if (bottom) {
143d7f14 164 left_block = left_block_options[1];
6867a90b 165 } else {
143d7f14 166 left_block= left_block_options[2];
6867a90b
LLL
167 }
168 } else {
169 left_xy[1] += s->mb_stride;
143d7f14 170 left_block = left_block_options[3];
6867a90b
LLL
171 }
172 }
0da71265
MN
173 }
174
826de46e
LLL
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
6ba71fc4 178 if(for_deblock){
717b1733
LM
179 topleft_type = 0;
180 topright_type = 0;
b735aeea
MN
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
5d18eaad 184
e248cb60 185 if(MB_MBAFF && !IS_INTRA(mb_type)){
5d18eaad 186 int list;
3425501d 187 for(list=0; list<h->list_count; list++){
e248cb60
MN
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
5d18eaad 191 if(USES_LIST(mb_type,list)){
191e8ca7 192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
5d18eaad 193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
beca9a28 194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
195 ref += h->b8_stride;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
beca9a28 197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
198 }
199 }
200 }
46f2f05f
MN
201 }else{
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
0da71265
MN
207
208 if(IS_INTRA(mb_type)){
faa7e394 209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
115329f1
DB
210 h->topleft_samples_available=
211 h->top_samples_available=
0da71265
MN
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
214
faa7e394 215 if(!(top_type & type_mask)){
0da71265
MN
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
219 }
d1d10e91
MN
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
faa7e394 222 if(!(left_type[0] & type_mask)){
d1d10e91
MN
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
225 }
faa7e394 226 if(!(left_type[1] & type_mask)){
d1d10e91
MN
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
229 }
230 }else{
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
faa7e394 234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
d1d10e91
MN
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
237 }
238 }
239 }else{
faa7e394 240 if(!(left_type[0] & type_mask)){
0da71265
MN
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
243 }
244 }
115329f1 245
faa7e394 246 if(!(topleft_type & type_mask))
0da71265 247 h->topleft_samples_available&= 0x7FFF;
115329f1 248
faa7e394 249 if(!(topright_type & type_mask))
0da71265 250 h->topright_samples_available&= 0xFBFF;
115329f1 251
0da71265
MN
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 }else{
259 int pred;
faa7e394 260 if(!(top_type & type_mask))
0da71265 261 pred= -1;
6fbcaaa0
LLL
262 else{
263 pred= 2;
0da71265
MN
264 }
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
269 }
270 for(i=0; i<2; i++){
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 }else{
275 int pred;
faa7e394 276 if(!(left_type[i] & type_mask))
0da71265 277 pred= -1;
6fbcaaa0
LLL
278 else{
279 pred= 2;
0da71265
MN
280 }
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
283 }
284 }
285 }
286 }
29671011 287 }
115329f1
DB
288
289
0da71265 290/*
115329f1
DB
2910 . T T. T T T T
2921 L . .L . . . .
2932 L . .L . . . .
2943 . T TL . . . .
2954 L . .L . . . .
2965 L . .. . . . .
0da71265 297*/
1412060e 298//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
0da71265 299 if(top_type){
6867a90b
LLL
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
53c05b1e 303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
115329f1 304
6867a90b 305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
53c05b1e 306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
115329f1 307
6867a90b 308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
53c05b1e 309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
115329f1 310
0da71265 311 }else{
115329f1 312 h->non_zero_count_cache[4+8*0]=
0da71265
MN
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
115329f1 316
0da71265
MN
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
115329f1 319
0da71265 320 h->non_zero_count_cache[1+8*3]=
3981c385 321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
115329f1 322
0da71265 323 }
826de46e 324
6867a90b
LLL
325 for (i=0; i<2; i++) {
326 if(left_type[i]){
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
6867a90b 331 }else{
115329f1
DB
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
6867a90b 335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
826de46e
LLL
336 }
337 }
338
339 if( h->pps.cabac ) {
340 // top_cbp
341 if(top_type) {
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
344 h->top_cbp = 0x1C0;
345 } else {
346 h->top_cbp = 0;
347 }
348 // left_cbp
349 if (left_type[0]) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
352 h->left_cbp = 0x1C0;
353 } else {
354 h->left_cbp = 0;
355 }
356 if (left_type[0]) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
358 }
359 if (left_type[1]) {
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
6867a90b 361 }
0da71265 362 }
6867a90b 363
0da71265 364#if 1
e2e5894a 365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
0da71265 366 int list;
3425501d 367 for(list=0; list<h->list_count; list++){
e2e5894a 368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
0da71265
MN
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
373 }*/
5ad984c9 374 continue;
0da71265
MN
375 }
376 h->mv_cache_clean[list]= 0;
115329f1 377
53b19144 378 if(USES_LIST(top_type, list)){
0da71265
MN
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
389 }else{
115329f1
DB
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
0da71265
MN
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
395 }
396
4672503d
LM
397 for(i=0; i<2; i++){
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
406 }else{
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 }
0da71265
MN
412 }
413
0281d325 414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
46f2f05f
MN
415 continue;
416
53b19144 417 if(USES_LIST(topleft_type, list)){
02f7695b
LM
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
e2e5894a
LM
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
422 }else{
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
425 }
115329f1 426
53b19144 427 if(USES_LIST(topright_type, list)){
e2e5894a
LM
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
432 }else{
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
435 }
e2e5894a 436
ae08a563 437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
717b1733 438 continue;
115329f1
DB
439
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
3b66c4c5 442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
115329f1 443 h->ref_cache[list][scan8[4 ]] =
0da71265
MN
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
3b66c4c5 447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
0da71265
MN
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
9e528114
LA
450
451 if( h->pps.cabac ) {
452 /* XXX beurk, Load mvd */
53b19144 453 if(USES_LIST(top_type, list)){
9e528114
LA
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
459 }else{
115329f1
DB
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
9e528114
LA
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
464 }
53b19144 465 if(USES_LIST(left_type[0], list)){
9e528114
LA
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
469 }else{
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
472 }
53b19144 473 if(USES_LIST(left_type[1], list)){
9e528114
LA
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
477 }else{
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
480 }
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
3b66c4c5 483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
9e528114
LA
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
5ad984c9 486
9f5c1037 487 if(h->slice_type_nos == FF_B_TYPE){
5ad984c9
LM
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
489
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
496 }else{
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
498 }
115329f1 499
5d18eaad
LM
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
504 else
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
506
507 if(IS_DIRECT(left_type[1]))
5ad984c9 508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
5d18eaad
LM
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
511 else
5ad984c9 512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
5d18eaad
LM
513 }
514 }
515
516 if(FRAME_MBAFF){
517#define MAP_MVS\
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
528 if(MB_FIELD){
529#define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
534 }
535 MAP_MVS
536#undef MAP_F2F
537 }else{
538#define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
5ad984c9 543 }
5d18eaad
LM
544 MAP_MVS
545#undef MAP_F2F
5ad984c9 546 }
9e528114 547 }
0da71265 548 }
0da71265
MN
549 }
550#endif
43efd19a
LM
551
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
0da71265
MN
553}
554
555static inline void write_back_intra_pred_mode(H264Context *h){
64514ee8 556 const int mb_xy= h->mb_xy;
0da71265
MN
557
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
565}
566
567/**
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
569 */
570static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
574 int i;
115329f1 575
0da71265
MN
576 if(!(h->top_samples_available&0x8000)){
577 for(i=0; i<4; i++){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
579 if(status<0){
9b879566 580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
581 return -1;
582 } else if(status){
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
584 }
585 }
586 }
115329f1 587
d1d10e91
MN
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
0da71265 590 for(i=0; i<4; i++){
d1d10e91 591 if(!(h->left_samples_available&mask[i])){
0da71265
MN
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
593 if(status<0){
9b879566 594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
595 return -1;
596 } else if(status){
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 }
d1d10e91 599 }
0da71265
MN
600 }
601 }
602
603 return 0;
604} //FIXME cleanup like next
605
606/**
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
608 */
609static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 613
43ff0714 614 if(mode > 6U) {
5175b937 615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 616 return -1;
5175b937 617 }
115329f1 618
0da71265
MN
619 if(!(h->top_samples_available&0x8000)){
620 mode= top[ mode ];
621 if(mode<0){
9b879566 622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
623 return -1;
624 }
625 }
115329f1 626
d1d10e91 627 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 628 mode= left[ mode ];
d1d10e91
MN
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
631 }
0da71265 632 if(mode<0){
9b879566 633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 634 return -1;
115329f1 635 }
0da71265
MN
636 }
637
638 return mode;
639}
640
641/**
642 * gets the predicted intra4x4 prediction mode.
643 */
644static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
649
a9c9a240 650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
0da71265
MN
651
652 if(min<0) return DC_PRED;
653 else return min;
654}
655
656static inline void write_back_non_zero_count(H264Context *h){
64514ee8 657 const int mb_xy= h->mb_xy;
0da71265 658
6867a90b
LLL
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
53c05b1e 662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
6867a90b
LLL
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
115329f1 666
6867a90b 667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
53c05b1e 668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
6867a90b 669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
53c05b1e 670
6867a90b 671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
53c05b1e 672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
6867a90b 673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
0da71265
MN
674}
675
676/**
1412060e 677 * gets the predicted number of non-zero coefficients.
0da71265
MN
678 * @param n block index
679 */
680static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
684 int i= left + top;
115329f1 685
0da71265
MN
686 if(i<64) i= (i+1)>>1;
687
a9c9a240 688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
0da71265
MN
689
690 return i&31;
691}
692
1924f3ce
MN
693static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
a9c9a240 695 MpegEncContext *s = &h->s;
1924f3ce 696
5d18eaad
LM
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
699 if(FRAME_MBAFF){
191e8ca7 700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
5d18eaad
LM
701 const int16_t *mv;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
704
705 if(!MB_FIELD
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
02f7695b 712 if(!USES_LIST(mb_type,list))\
5d18eaad
LM
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
720 }
721 }
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 if(!MB_FIELD
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
728 }
729 if(MB_FIELD
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 && i >= scan8[0]+8){
1412060e 732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
02f7695b 733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
5d18eaad
LM
734 }
735 }
736#undef SET_DIAG_MV
737 }
738
1924f3ce
MN
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
741 return topright_ref;
742 }else{
a9c9a240 743 tprintf(s->avctx, "topright MV not available\n");
95c26348 744
1924f3ce
MN
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
747 }
748}
749
0da71265
MN
750/**
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
756 */
757static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
0da71265
MN
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
0da71265
MN
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1924f3ce
MN
763 const int16_t * C;
764 int diagonal_ref, match_count;
765
0da71265 766 assert(part_width==1 || part_width==2 || part_width==4);
1924f3ce 767
0da71265 768/* mv_cache
115329f1 769 B . . A T T T T
0da71265
MN
770 U . . L . . , .
771 U . . L . . . .
772 U . . L . . , .
773 . . . L . . . .
774*/
1924f3ce
MN
775
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
a9c9a240 778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1924f3ce
MN
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
783 if(left_ref==ref){
784 *mx= A[0];
115329f1 785 *my= A[1];
1924f3ce
MN
786 }else if(top_ref==ref){
787 *mx= B[0];
115329f1 788 *my= B[1];
0da71265 789 }else{
1924f3ce 790 *mx= C[0];
115329f1 791 *my= C[1];
0da71265
MN
792 }
793 }else{
1924f3ce 794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
0da71265 795 *mx= A[0];
115329f1 796 *my= A[1];
0da71265 797 }else{
1924f3ce
MN
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
0da71265 800 }
0da71265 801 }
115329f1 802
a9c9a240 803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
804}
805
806/**
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
811 */
812static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
813 if(n==0){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
816
a9c9a240 817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
115329f1 818
0da71265
MN
819 if(top_ref == ref){
820 *mx= B[0];
821 *my= B[1];
822 return;
823 }
824 }else{
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
115329f1 827
a9c9a240 828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
829
830 if(left_ref == ref){
831 *mx= A[0];
832 *my= A[1];
833 return;
834 }
835 }
836
837 //RARE
838 pred_motion(h, n, 4, list, ref, mx, my);
839}
840
841/**
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
846 */
847static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
848 if(n==0){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
115329f1 851
a9c9a240 852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
853
854 if(left_ref == ref){
855 *mx= A[0];
856 *my= A[1];
857 return;
858 }
859 }else{
1924f3ce
MN
860 const int16_t * C;
861 int diagonal_ref;
862
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
115329f1 864
a9c9a240 865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265 866
115329f1 867 if(diagonal_ref == ref){
0da71265
MN
868 *mx= C[0];
869 *my= C[1];
870 return;
871 }
0da71265
MN
872 }
873
874 //RARE
875 pred_motion(h, n, 2, list, ref, mx, my);
876}
877
878static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
0da71265
MN
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
881
a9c9a240 882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
0da71265
MN
883
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
115329f1 887
0da71265
MN
888 *mx = *my = 0;
889 return;
890 }
115329f1 891
0da71265
MN
892 pred_motion(h, 0, 4, 0, 0, mx, my);
893
894 return;
895}
896
8b1fd554
MN
897static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
901 return 256;
902 }else{
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
906 }
907}
908
5ad984c9 909static inline void direct_dist_scale_factor(H264Context * const h){
2879c75f
MN
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
5ad984c9 912 const int poc1 = h->ref_list[1][0].poc;
8b1fd554
MN
913 int i, field;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
5ad984c9 919 }
8b1fd554
MN
920
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
5d18eaad 923 }
5ad984c9 924}
f4d3382d
MN
925
926static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
933
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
936
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
940
941 if (!interl)
942 poc |= 3;
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
945
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
950 if(rfield == field)
951 map[list][old_ref] = cur_ref;
952 break;
953 }
954 }
955 }
956 }
957}
958
2f944356
LM
959static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
bbc78fb4 963 int list, j, field;
f4d3382d
MN
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
aa617518 966
2f944356 967 for(list=0; list<2; list++){
2879c75f 968 cur->ref_count[sidx][list] = h->ref_count[list];
2f944356 969 for(j=0; j<h->ref_count[list]; j++)
42de393d 970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
2f944356 971 }
aa617518 972
7762cc3d 973 if(s->picture_structure == PICT_FRAME){
f4d3382d
MN
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
7762cc3d 976 }
aa617518 977
48e025e5 978 cur->mbaff= FRAME_MBAFF;
aa617518 979
9701840b 980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
2f944356 981 return;
aa617518 982
2f944356 983 for(list=0; list<2; list++){
f4d3382d
MN
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
2f944356
LM
987 }
988}
5ad984c9
LM
989
990static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
d00eac6c
MN
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
995 int mb_type_col[2];
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
5ad984c9 998 const int is_b8x8 = IS_8X8(*mb_type);
88e7a4d1 999 unsigned int sub_mb_type;
5ad984c9
LM
1000 int i8, i4;
1001
5d18eaad 1002#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
d00eac6c
MN
1003
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
53c193a9 1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
471341a7
MN
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1010 b8_stride = 0;
60c9b24d 1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
d00eac6c
MN
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1014 }
1015 goto single_col;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1021 b8_stride *= 3;
1022 b4_stride *= 6;
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1026 && !is_b8x8){
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1029 }else{
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1032 }
1033 }else{ // AFR/FR -> AFR/FR
1034single_col:
1035 mb_type_col[0] =
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
cc615d2c
MN
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1045 }else{
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1048 }
d00eac6c 1049 }
5ad984c9 1050 }
5ad984c9 1051
7d54ecc9
MN
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
9b5fab91
MN
1056 if(!b8_stride){
1057 if(s->mb_y&1){
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1062 }
d00eac6c 1063 }
115329f1 1064
5ad984c9
LM
1065 if(h->direct_spatial_mv_pred){
1066 int ref[2];
1067 int mv[2][2];
1068 int list;
1069
5d18eaad
LM
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1071
5ad984c9
LM
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
9bec77fe 1077 if(refc == PART_NOT_AVAILABLE)
5ad984c9 1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
29d05ebc 1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
5ad984c9
LM
1080 if(ref[list] < 0)
1081 ref[list] = -1;
1082 }
1083
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1088 }else{
1089 for(list=0; list<2; list++){
1090 if(ref[list] >= 0)
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1092 else
1093 mv[list][0] = mv[list][1] = 0;
1094 }
1095 }
1096
1097 if(ref[1] < 0){
50b3ab0f
LM
1098 if(!is_b8x8)
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
5ad984c9 1101 }else if(ref[0] < 0){
50b3ab0f
LM
1102 if(!is_b8x8)
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
5ad984c9
LM
1105 }
1106
d00eac6c 1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
50b3ab0f
LM
1108 for(i8=0; i8<4; i8++){
1109 int x8 = i8&1;
1110 int y8 = i8>>1;
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1113 int a=0, b=0;
1114
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1116 continue;
1117 h->sub_mb_type[i8] = sub_mb_type;
1118
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
d00eac6c 1121 if(!IS_INTRA(mb_type_col[y8])
50b3ab0f
LM
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1124 if(ref[0] > 0)
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 if(ref[1] > 0)
1127 b= pack16to32(mv[1][0],mv[1][1]);
1128 }else{
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1131 }
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1134 }
1135 }else if(IS_16X16(*mb_type)){
d19f5acb
MN
1136 int a=0, b=0;
1137
cec93959
LM
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
d00eac6c 1140 if(!IS_INTRA(mb_type_col[0])
c26abfa5
DB
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
bf4e3bd2 1143 && (h->x264_build>33 || !h->x264_build)))){
5ad984c9 1144 if(ref[0] > 0)
d19f5acb 1145 a= pack16to32(mv[0][0],mv[0][1]);
5ad984c9 1146 if(ref[1] > 0)
d19f5acb 1147 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1148 }else{
d19f5acb
MN
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1151 }
d19f5acb
MN
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
5ad984c9
LM
1154 }else{
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
115329f1 1158
5ad984c9
LM
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1160 continue;
1161 h->sub_mb_type[i8] = sub_mb_type;
115329f1 1162
5ad984c9
LM
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
cec93959
LM
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
115329f1 1167
5ad984c9 1168 /* col_zero_flag */
2ccd25d0
MN
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
bf4e3bd2 1171 && (h->x264_build>33 || !h->x264_build)))){
2ccd25d0 1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
f1f17e54 1173 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
c26abfa5 1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
f1f17e54
LM
1176 if(ref[0] == 0)
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1178 if(ref[1] == 0)
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1180 }
1181 }else
5ad984c9 1182 for(i4=0; i4<4; i4++){
2ccd25d0 1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
c26abfa5 1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
5ad984c9
LM
1185 if(ref[0] == 0)
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1187 if(ref[1] == 0)
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1189 }
1190 }
1191 }
1192 }
1193 }
1194 }else{ /* direct temporal mv pred */
5d18eaad
LM
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
f4d3382d 1197 int ref_offset= 0;
5d18eaad 1198
cc615d2c 1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
8b1fd554
MN
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
cc615d2c 1203 }
48e025e5 1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
f4d3382d 1205 ref_offset += 16;
48e025e5 1206
cc615d2c
MN
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
c210fa61 1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
5d18eaad 1210
cc615d2c
MN
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1214 int ref0, scale;
1215 const int16_t (*l1mv)[2]= l1mv0;
5d18eaad 1216
cc615d2c
MN
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1218 continue;
1219 h->sub_mb_type[i8] = sub_mb_type;
1220
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1226 continue;
1227 }
1228
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1230 if(ref0 >= 0)
f4d3382d 1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
cc615d2c 1232 else{
f4d3382d 1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
cc615d2c
MN
1234 l1mv= l1mv1;
1235 }
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1238
1239 {
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
5d18eaad 1246 }
5d18eaad 1247 }
cc615d2c
MN
1248 return;
1249 }
5d18eaad
LM
1250
1251 /* one-to-one mv scaling */
1252
5ad984c9 1253 if(IS_16X16(*mb_type)){
fda51641
MN
1254 int ref, mv0, mv1;
1255
5ad984c9 1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
d00eac6c 1257 if(IS_INTRA(mb_type_col[0])){
fda51641 1258 ref=mv0=mv1=0;
5ad984c9 1259 }else{
f4d3382d
MN
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
5d18eaad 1262 const int scale = dist_scale_factor[ref0];
8583bef8 1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
5ad984c9 1264 int mv_l0[2];
5d18eaad
LM
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
fda51641
MN
1267 ref= ref0;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
5ad984c9 1270 }
fda51641
MN
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
5ad984c9
LM
1274 }else{
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
5d18eaad 1278 int ref0, scale;
bf4e3bd2 1279 const int16_t (*l1mv)[2]= l1mv0;
8583bef8 1280
5ad984c9
LM
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1282 continue;
1283 h->sub_mb_type[i8] = sub_mb_type;
5d18eaad 1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
d00eac6c 1285 if(IS_INTRA(mb_type_col[0])){
5ad984c9 1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1289 continue;
1290 }
115329f1 1291
f4d3382d 1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
2f944356 1293 if(ref0 >= 0)
5d18eaad 1294 ref0 = map_col_to_list0[0][ref0];
8583bef8 1295 else{
f4d3382d 1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
8583bef8
MN
1297 l1mv= l1mv1;
1298 }
5d18eaad 1299 scale = dist_scale_factor[ref0];
115329f1 1300
5ad984c9 1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
f1f17e54 1302 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
5d18eaad
LM
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
f1f17e54
LM
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1308 }else
5ad984c9 1309 for(i4=0; i4<4; i4++){
2ccd25d0 1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
5ad984c9 1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
5d18eaad
LM
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
5ad984c9
LM
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1316 }
1317 }
1318 }
1319 }
1320}
1321
0da71265
MN
1322static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
0da71265
MN
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1326 int list;
1327
2ea39252
LM
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1330
3425501d 1331 for(list=0; list<h->list_count; list++){
0da71265 1332 int y;
53b19144 1333 if(!USES_LIST(mb_type, list))
5ad984c9 1334 continue;
115329f1 1335
0da71265
MN
1336 for(y=0; y<4; y++){
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1339 }
9e528114 1340 if( h->pps.cabac ) {
e6e77eb6
LM
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1343 else
9e528114
LA
1344 for(y=0; y<4; y++){
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1347 }
1348 }
53b19144
LM
1349
1350 {
191e8ca7 1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
53b19144
LM
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
0da71265
MN
1356 }
1357 }
115329f1 1358
9f5c1037 1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
5ad984c9 1360 if(IS_8X8(mb_type)){
53b19144
LM
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
5ad984c9
LM
1365 }
1366 }
0da71265
MN
1367}
1368
1369/**
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
3b66c4c5 1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
115329f1 1374 * @returns decoded bytes, might be src+1 if no escapes
0da71265 1375 */
30317501 1376static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
1377 int i, si, di;
1378 uint8_t *dst;
24456882 1379 int bufidx;
0da71265 1380
bb270c08 1381// src[0]&0x80; //forbidden bit
0da71265
MN
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1384
1385 src++; length--;
115329f1 1386#if 0
0da71265
MN
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1389#endif
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1394 if(src[i+2]!=3){
1395 /* startcode, so we must be past the end */
1396 length=i;
1397 }
1398 break;
1399 }
1400 }
1401
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
115329f1 1405 return src;
0da71265
MN
1406 }
1407
24456882
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
0da71265 1411
ac658be5
FOL
1412 if (dst == NULL){
1413 return NULL;
1414 }
1415
3b66c4c5 1416//printf("decoding esc\n");
0da71265 1417 si=di=0;
115329f1 1418 while(si<length){
0da71265
MN
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1422 dst[di++]= 0;
1423 dst[di++]= 0;
1424 si+=3;
c8470cc1 1425 continue;
0da71265
MN
1426 }else //next start code
1427 break;
1428 }
1429
1430 dst[di++]= src[si++];
1431 }
1432
1433 *dst_length= di;
1434 *consumed= si + 1;//+1 for the header
90b5b51e 1435//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
1436 return dst;
1437}
1438
0da71265
MN
1439/**
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1442 */
30317501 1443static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
1444 int v= *src;
1445 int r;
1446
a9c9a240 1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
1448
1449 for(r=1; r<9; r++){
1450 if(v&1) return r;
1451 v>>=1;
1452 }
1453 return 0;
1454}
1455
1456/**
1412060e 1457 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
1458 * @param qp quantization parameter
1459 */
239ea04c 1460static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1461#define stride 16
1462 int i;
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1466
1467//memset(block, 64, 2*256);
1468//return;
1469 for(i=0; i<4; i++){
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1475
1476 temp[4*i+0]= z0+z3;
1477 temp[4*i+1]= z1+z2;
1478 temp[4*i+2]= z1-z2;
1479 temp[4*i+3]= z0-z3;
1480 }
1481
1482 for(i=0; i<4; i++){
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1488
1412060e 1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
1493 }
1494}
1495
e5017ab8 1496#if 0
0da71265 1497/**
1412060e 1498 * DCT transforms the 16 dc values.
0da71265
MN
1499 * @param qp quantization parameter ??? FIXME
1500 */
1501static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502// const int qmul= dequant_coeff[qp][0];
1503 int i;
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1507
1508 for(i=0; i<4; i++){
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1514
1515 temp[4*i+0]= z0+z3;
1516 temp[4*i+1]= z1+z2;
1517 temp[4*i+2]= z1-z2;
1518 temp[4*i+3]= z0-z3;
1519 }
1520
1521 for(i=0; i<4; i++){
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1527
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1532 }
1533}
e5017ab8
LA
1534#endif
1535
0da71265
MN
1536#undef xStride
1537#undef stride
1538
239ea04c 1539static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1540 const int stride= 16*2;
1541 const int xStride= 16;
1542 int a,b,c,d,e;
1543
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1548
1549 e= a-b;
1550 a= a+b;
1551 b= c-d;
1552 c= c+d;
1553
239ea04c
LM
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
1558}
1559
e5017ab8 1560#if 0
0da71265
MN
1561static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1564 int a,b,c,d,e;
1565
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1570
1571 e= a-b;
1572 a= a+b;
1573 b= c-d;
1574 c= c+d;
1575
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1580}
e5017ab8 1581#endif
0da71265
MN
1582
1583/**
1584 * gets the chroma qp.
1585 */
4691a77d 1586static inline int get_chroma_qp(H264Context *h, int t, int qscale){
5a78bfbd 1587 return h->pps.chroma_qp_table[t][qscale];
0da71265
MN
1588}
1589
2cab6401 1590//FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
0afd2a92
DB
1591//FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1592static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
0da71265
MN
1593 int i;
1594 const int * const quant_table= quant_coeff[qscale];
1595 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1596 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1597 const unsigned int threshold2= (threshold1<<1);
1598 int last_non_zero;
1599
0afd2a92 1600 if(separate_dc){
0da71265
MN
1601 if(qscale<=18){
1602 //avoid overflows
1603 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1604 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1605 const unsigned int dc_threshold2= (dc_threshold1<<1);
1606
1607 int level= block[0]*quant_coeff[qscale+18][0];
1608 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1609 if(level>0){
1610 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1611 block[0]= level;
1612 }else{
1613 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1614 block[0]= -level;
1615 }
1616// last_non_zero = i;
1617 }else{
1618 block[0]=0;
1619 }
1620 }else{
1621 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1622 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1623 const unsigned int dc_threshold2= (dc_threshold1<<1);
1624
1625 int level= block[0]*quant_table[0];
1626 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1627 if(level>0){
1628 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1629 block[0]= level;
1630 }else{
1631 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1632 block[0]= -level;
1633 }
1634// last_non_zero = i;
1635 }else{
1636 block[0]=0;
1637 }
1638 }
1639 last_non_zero= 0;
1640 i=1;
1641 }else{
1642 last_non_zero= -1;
1643 i=0;
1644 }
1645
1646 for(; i<16; i++){
1647 const int j= scantable[i];
1648 int level= block[j]*quant_table[j];
1649
1650// if( bias+level >= (1<<(QMAT_SHIFT - 3))
1651// || bias-level >= (1<<(QMAT_SHIFT - 3))){
1652 if(((unsigned)(level+threshold1))>threshold2){
1653 if(level>0){
1654 level= (bias + level)>>QUANT_SHIFT;
1655 block[j]= level;
1656 }else{
1657 level= (bias - level)>>QUANT_SHIFT;
1658 block[j]= -level;
1659 }
1660 last_non_zero = i;
1661 }else{
1662 block[j]=0;
1663 }
1664 }
1665
1666 return last_non_zero;
1667}
1668
0da71265
MN
1669static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1670 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1671 int src_x_offset, int src_y_offset,
1672 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1673 MpegEncContext * const s = &h->s;
1674 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 1675 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 1676 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
1677 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1678 uint8_t * src_cb, * src_cr;
1679 int extra_width= h->emu_edge_width;
1680 int extra_height= h->emu_edge_height;
0da71265
MN
1681 int emu=0;
1682 const int full_mx= mx>>2;
1683 const int full_my= my>>2;
fbd312fd 1684 const int pic_width = 16*s->mb_width;
0d43dd8c 1685 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 1686
1412060e 1687 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
171c4076 1688 return;
115329f1 1689
0da71265
MN
1690 if(mx&7) extra_width -= 3;
1691 if(my&7) extra_height -= 3;
115329f1
DB
1692
1693 if( full_mx < 0-extra_width
1694 || full_my < 0-extra_height
1695 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 1696 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
1697 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1698 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
1699 emu=1;
1700 }
115329f1 1701
5d18eaad 1702 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 1703 if(!square){
5d18eaad 1704 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 1705 }
115329f1 1706
87352549 1707 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 1708
0d43dd8c 1709 if(MB_FIELD){
5d18eaad 1710 // chroma offset when predicting from a field of opposite parity
2143b118 1711 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
1712 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1713 }
1714 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1715 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1716
0da71265 1717 if(emu){
5d18eaad 1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1719 src_cb= s->edge_emu_buffer;
1720 }
5d18eaad 1721 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1722
1723 if(emu){
5d18eaad 1724 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1725 src_cr= s->edge_emu_buffer;
1726 }
5d18eaad 1727 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1728}
1729
9f2d1b4f 1730static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
1731 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1732 int x_offset, int y_offset,
1733 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1734 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1735 int list0, int list1){
1736 MpegEncContext * const s = &h->s;
1737 qpel_mc_func *qpix_op= qpix_put;
1738 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 1739
5d18eaad
LM
1740 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1741 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1742 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 1743 x_offset += 8*s->mb_x;
0d43dd8c 1744 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1745
0da71265 1746 if(list0){
1924f3ce 1747 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
1748 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1749 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1750 qpix_op, chroma_op);
1751
1752 qpix_op= qpix_avg;
1753 chroma_op= chroma_avg;
1754 }
1755
1756 if(list1){
1924f3ce 1757 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
1758 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1759 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1760 qpix_op, chroma_op);
1761 }
1762}
1763
9f2d1b4f
LM
1764static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1765 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1766 int x_offset, int y_offset,
1767 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1768 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1769 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1770 int list0, int list1){
1771 MpegEncContext * const s = &h->s;
1772
5d18eaad
LM
1773 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1774 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1775 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 1776 x_offset += 8*s->mb_x;
0d43dd8c 1777 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1778
9f2d1b4f
LM
1779 if(list0 && list1){
1780 /* don't optimize for luma-only case, since B-frames usually
1781 * use implicit weights => chroma too. */
1782 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
1783 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1784 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
1785 int refn0 = h->ref_cache[0][ scan8[n] ];
1786 int refn1 = h->ref_cache[1][ scan8[n] ];
1787
1788 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1789 dest_y, dest_cb, dest_cr,
1790 x_offset, y_offset, qpix_put, chroma_put);
1791 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1792 tmp_y, tmp_cb, tmp_cr,
1793 x_offset, y_offset, qpix_put, chroma_put);
1794
1795 if(h->use_weight == 2){
1796 int weight0 = h->implicit_weight[refn0][refn1];
1797 int weight1 = 64 - weight0;
5d18eaad
LM
1798 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1799 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1800 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 1801 }else{
5d18eaad 1802 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 1803 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 1804 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 1805 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1806 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 1807 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 1808 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1809 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 1810 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
1811 }
1812 }else{
1813 int list = list1 ? 1 : 0;
1814 int refn = h->ref_cache[list][ scan8[n] ];
1815 Picture *ref= &h->ref_list[list][refn];
1816 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1817 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1818 qpix_put, chroma_put);
1819
5d18eaad 1820 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
1821 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1822 if(h->use_weight_chroma){
5d18eaad 1823 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 1824 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 1825 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
1826 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1827 }
1828 }
1829}
1830
1831static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1832 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1833 int x_offset, int y_offset,
1834 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1835 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 1836 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
1837 int list0, int list1){
1838 if((h->use_weight==2 && list0 && list1
1839 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1840 || h->use_weight==1)
1841 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1842 x_offset, y_offset, qpix_put, chroma_put,
1843 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1844 else
1845 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1847}
1848
513fbd8e
LM
1849static inline void prefetch_motion(H264Context *h, int list){
1850 /* fetch pixels for estimated mv 4 macroblocks ahead
1851 * optimized for 64byte cache lines */
1852 MpegEncContext * const s = &h->s;
1853 const int refn = h->ref_cache[list][scan8[0]];
1854 if(refn >= 0){
1855 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1856 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1857 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 1858 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
1859 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1860 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1861 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1862 }
1863}
1864
0da71265
MN
1865static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1866 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
1867 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1868 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 1869 MpegEncContext * const s = &h->s;
64514ee8 1870 const int mb_xy= h->mb_xy;
0da71265 1871 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 1872
0da71265 1873 assert(IS_INTER(mb_type));
115329f1 1874
513fbd8e
LM
1875 prefetch_motion(h, 0);
1876
0da71265
MN
1877 if(IS_16X16(mb_type)){
1878 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1879 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
9f2d1b4f 1880 &weight_op[0], &weight_avg[0],
0da71265
MN
1881 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1882 }else if(IS_16X8(mb_type)){
1883 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1884 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1885 &weight_op[1], &weight_avg[1],
0da71265
MN
1886 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1887 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1889 &weight_op[1], &weight_avg[1],
0da71265
MN
1890 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1891 }else if(IS_8X16(mb_type)){
5d18eaad 1892 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 1893 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1894 &weight_op[2], &weight_avg[2],
0da71265 1895 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 1896 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1898 &weight_op[2], &weight_avg[2],
0da71265
MN
1899 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1900 }else{
1901 int i;
115329f1 1902
0da71265
MN
1903 assert(IS_8X8(mb_type));
1904
1905 for(i=0; i<4; i++){
1906 const int sub_mb_type= h->sub_mb_type[i];
1907 const int n= 4*i;
1908 int x_offset= (i&1)<<2;
1909 int y_offset= (i&2)<<1;
1910
1911 if(IS_SUB_8X8(sub_mb_type)){
1912 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1913 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1914 &weight_op[3], &weight_avg[3],
0da71265
MN
1915 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1916 }else if(IS_SUB_8X4(sub_mb_type)){
1917 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1918 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1919 &weight_op[4], &weight_avg[4],
0da71265
MN
1920 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1921 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1923 &weight_op[4], &weight_avg[4],
0da71265
MN
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 1926 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 1927 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1928 &weight_op[5], &weight_avg[5],
0da71265 1929 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 1930 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1932 &weight_op[5], &weight_avg[5],
0da71265
MN
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1934 }else{
1935 int j;
1936 assert(IS_SUB_4X4(sub_mb_type));
1937 for(j=0; j<4; j++){
1938 int sub_x_offset= x_offset + 2*(j&1);
1939 int sub_y_offset= y_offset + (j&2);
1940 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1941 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1942 &weight_op[6], &weight_avg[6],
0da71265
MN
1943 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1944 }
1945 }
1946 }
1947 }
513fbd8e
LM
1948
1949 prefetch_motion(h, 1);
0da71265
MN
1950}
1951
98a6fff9 1952static av_cold void decode_init_vlc(void){
0da71265
MN
1953 static int done = 0;
1954
1955 if (!done) {
1956 int i;
910e3668 1957 int offset;
0da71265
MN
1958 done = 1;
1959
910e3668
AC
1960 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1961 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
115329f1 1962 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
0da71265 1963 &chroma_dc_coeff_token_len [0], 1, 1,
910e3668
AC
1964 &chroma_dc_coeff_token_bits[0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
0da71265 1966
910e3668 1967 offset = 0;
0da71265 1968 for(i=0; i<4; i++){
910e3668
AC
1969 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1970 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
115329f1 1971 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
0da71265 1972 &coeff_token_len [i][0], 1, 1,
910e3668
AC
1973 &coeff_token_bits[i][0], 1, 1,
1974 INIT_VLC_USE_NEW_STATIC);
1975 offset += coeff_token_vlc_tables_size[i];
0da71265 1976 }
910e3668
AC
1977 /*
1978 * This is a one time safety check to make sure that
1979 * the packed static coeff_token_vlc table sizes
1980 * were initialized correctly.
1981 */
37d3e066 1982 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
0da71265
MN
1983
1984 for(i=0; i<3; i++){
910e3668
AC
1985 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1986 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1987 init_vlc(&chroma_dc_total_zeros_vlc[i],
1988 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
0da71265 1989 &chroma_dc_total_zeros_len [i][0], 1, 1,
910e3668
AC
1990 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1991 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1992 }
1993 for(i=0; i<15; i++){
910e3668
AC
1994 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1995 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1996 init_vlc(&total_zeros_vlc[i],
1997 TOTAL_ZEROS_VLC_BITS, 16,
0da71265 1998 &total_zeros_len [i][0], 1, 1,
910e3668
AC
1999 &total_zeros_bits[i][0], 1, 1,
2000 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
2001 }
2002
2003 for(i=0; i<6; i++){
910e3668
AC
2004 run_vlc[i].table = run_vlc_tables[i];
2005 run_vlc[i].table_allocated = run_vlc_tables_size;
2006 init_vlc(&run_vlc[i],
2007 RUN_VLC_BITS, 7,
0da71265 2008 &run_len [i][0], 1, 1,
910e3668
AC
2009 &run_bits[i][0], 1, 1,
2010 INIT_VLC_USE_NEW_STATIC);
0da71265 2011 }
910e3668
AC
2012 run7_vlc.table = run7_vlc_table,
2013 run7_vlc.table_allocated = run7_vlc_table_size;
115329f1 2014 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
0da71265 2015 &run_len [6][0], 1, 1,
910e3668
AC
2016 &run_bits[6][0], 1, 1,
2017 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
2018 }
2019}
2020
0da71265 2021static void free_tables(H264Context *h){
7978debd 2022 int i;
afebe2f7 2023 H264Context *hx;
0da71265 2024 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
2025 av_freep(&h->chroma_pred_mode_table);
2026 av_freep(&h->cbp_table);
9e528114
LA
2027 av_freep(&h->mvd_table[0]);
2028 av_freep(&h->mvd_table[1]);
5ad984c9 2029 av_freep(&h->direct_table);
0da71265
MN
2030 av_freep(&h->non_zero_count);
2031 av_freep(&h->slice_table_base);
2032 h->slice_table= NULL;
e5017ab8 2033
0da71265
MN
2034 av_freep(&h->mb2b_xy);
2035 av_freep(&h->mb2b8_xy);
9f2d1b4f 2036
afebe2f7
2037 for(i = 0; i < h->s.avctx->thread_count; i++) {
2038 hx = h->thread_context[i];
2039 if(!hx) continue;
2040 av_freep(&hx->top_borders[1]);
2041 av_freep(&hx->top_borders[0]);
2042 av_freep(&hx->s.obmc_scratchpad);
afebe2f7 2043 }
0da71265
MN
2044}
2045
239ea04c
LM
2046static void init_dequant8_coeff_table(H264Context *h){
2047 int i,q,x;
548a1c8a 2048 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
2049 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2050 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2051
2052 for(i=0; i<2; i++ ){
2053 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2054 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2055 break;
2056 }
2057
2058 for(q=0; q<52; q++){
d9ec210b
DP
2059 int shift = div6[q];
2060 int idx = rem6[q];
239ea04c 2061 for(x=0; x<64; x++)
548a1c8a
LM
2062 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2063 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2064 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
2065 }
2066 }
2067}
2068
2069static void init_dequant4_coeff_table(H264Context *h){
2070 int i,j,q,x;
ab2e3e2c 2071 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
2072 for(i=0; i<6; i++ ){
2073 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2074 for(j=0; j<i; j++){
2075 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2076 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2077 break;
2078 }
2079 }
2080 if(j<i)
2081 continue;
2082
2083 for(q=0; q<52; q++){
d9ec210b
DP
2084 int shift = div6[q] + 2;
2085 int idx = rem6[q];
239ea04c 2086 for(x=0; x<16; x++)
ab2e3e2c
LM
2087 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2088 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
2089 h->pps.scaling_matrix4[i][x]) << shift;
2090 }
2091 }
2092}
2093
2094static void init_dequant_tables(H264Context *h){
2095 int i,x;
2096 init_dequant4_coeff_table(h);
2097 if(h->pps.transform_8x8_mode)
2098 init_dequant8_coeff_table(h);
2099 if(h->sps.transform_bypass){
2100 for(i=0; i<6; i++)
2101 for(x=0; x<16; x++)
2102 h->dequant4_coeff[i][0][x] = 1<<6;
2103 if(h->pps.transform_8x8_mode)
2104 for(i=0; i<2; i++)
2105 for(x=0; x<64; x++)
2106 h->dequant8_coeff[i][0][x] = 1<<6;
2107 }
2108}
2109
2110
0da71265
MN
2111/**
2112 * allocates tables.
3b66c4c5 2113 * needs width/height
0da71265
MN
2114 */
2115static int alloc_tables(H264Context *h){
2116 MpegEncContext * const s = &h->s;
7bc9090a 2117 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 2118 int x,y;
0da71265
MN
2119
2120 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
e5017ab8 2121
53c05b1e 2122 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
b735aeea 2123 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
5d0e4cb8 2124 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
0da71265 2125
7526ade2
MN
2126 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2127 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2128 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2129 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
e5017ab8 2130
b735aeea 2131 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 2132 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 2133
a55f20bd
LM
2134 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2135 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
0da71265
MN
2136 for(y=0; y<s->mb_height; y++){
2137 for(x=0; x<s->mb_width; x++){
7bc9090a 2138 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
2139 const int b_xy = 4*x + 4*y*h->b_stride;
2140 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 2141
0da71265
MN
2142 h->mb2b_xy [mb_xy]= b_xy;
2143 h->mb2b8_xy[mb_xy]= b8_xy;
2144 }
2145 }
9f2d1b4f 2146
9c6221ae
GV
2147 s->obmc_scratchpad = NULL;
2148
56edbd81
LM
2149 if(!h->dequant4_coeff[0])
2150 init_dequant_tables(h);
2151
0da71265
MN
2152 return 0;
2153fail:
2154 free_tables(h);
2155 return -1;
2156}
2157
afebe2f7
2158/**
2159 * Mimic alloc_tables(), but for every context thread.
2160 */
2161static void clone_tables(H264Context *dst, H264Context *src){
2162 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2163 dst->non_zero_count = src->non_zero_count;
2164 dst->slice_table = src->slice_table;
2165 dst->cbp_table = src->cbp_table;
2166 dst->mb2b_xy = src->mb2b_xy;
2167 dst->mb2b8_xy = src->mb2b8_xy;
2168 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2169 dst->mvd_table[0] = src->mvd_table[0];
2170 dst->mvd_table[1] = src->mvd_table[1];
2171 dst->direct_table = src->direct_table;
2172
afebe2f7
2173 dst->s.obmc_scratchpad = NULL;
2174 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
2175}
2176
2177/**
2178 * Init context
2179 * Allocate buffers which are not shared amongst multiple threads.
2180 */
2181static int context_init(H264Context *h){
afebe2f7
2182 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2183 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2184
afebe2f7
2185 return 0;
2186fail:
2187 return -1; // free_tables will clean up for us
2188}
2189
98a6fff9 2190static av_cold void common_init(H264Context *h){
0da71265 2191 MpegEncContext * const s = &h->s;
0da71265
MN
2192
2193 s->width = s->avctx->width;
2194 s->height = s->avctx->height;
2195 s->codec_id= s->avctx->codec->id;
115329f1 2196
c92a30bb 2197 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 2198
239ea04c 2199 h->dequant_coeff_pps= -1;
9a41c2c7 2200 s->unrestricted_mv=1;
0da71265 2201 s->decode=1; //FIXME
56edbd81
LM
2202
2203 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2204 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
2205}
2206
98a6fff9 2207static av_cold int decode_init(AVCodecContext *avctx){
0da71265
MN
2208 H264Context *h= avctx->priv_data;
2209 MpegEncContext * const s = &h->s;
2210
3edcacde 2211 MPV_decode_defaults(s);
115329f1 2212
0da71265
MN
2213 s->avctx = avctx;
2214 common_init(h);
2215
2216 s->out_format = FMT_H264;
2217 s->workaround_bugs= avctx->workaround_bugs;
2218
2219 // set defaults
0da71265 2220// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 2221 s->quarter_sample = 1;
0da71265 2222 s->low_delay= 1;
7a9dba3c
MN
2223
2224 if(avctx->codec_id == CODEC_ID_SVQ3)
2225 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2226 else
1d42f410 2227 avctx->pix_fmt= PIX_FMT_YUV420P;
0da71265 2228
c2212338 2229 decode_init_vlc();
115329f1 2230
26165f99
MR
2231 if(avctx->extradata_size > 0 && avctx->extradata &&
2232 *(char *)avctx->extradata == 1){
4770b1b4
RT
2233 h->is_avc = 1;
2234 h->got_avcC = 0;
26165f99
MR
2235 } else {
2236 h->is_avc = 0;
4770b1b4
RT
2237 }
2238
afebe2f7 2239 h->thread_context[0] = h;
18c7be65 2240 h->outputed_poc = INT_MIN;
e4b8f1fa 2241 h->prev_poc_msb= 1<<16;
0da71265
MN
2242 return 0;
2243}
2244
af8aa846 2245static int frame_start(H264Context *h){
0da71265
MN
2246 MpegEncContext * const s = &h->s;
2247 int i;
2248
af8aa846
MN
2249 if(MPV_frame_start(s, s->avctx) < 0)
2250 return -1;
0da71265 2251 ff_er_frame_start(s);
3a22d7fa
JD
2252 /*
2253 * MPV_frame_start uses pict_type to derive key_frame.
2254 * This is incorrect for H.264; IDR markings must be used.
1412060e 2255 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
2256 * See decode_nal_units().
2257 */
2258 s->current_picture_ptr->key_frame= 0;
0da71265
MN
2259
2260 assert(s->linesize && s->uvlinesize);
2261
2262 for(i=0; i<16; i++){
2263 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 2264 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2265 }
2266 for(i=0; i<4; i++){
2267 h->block_offset[16+i]=
2268 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
2269 h->block_offset[24+16+i]=
2270 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2271 }
2272
934b0821
LM
2273 /* can't be in alloc_tables because linesize isn't known there.
2274 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
2275 for(i = 0; i < s->avctx->thread_count; i++)
2276 if(!h->thread_context[i]->s.obmc_scratchpad)
2277 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
2278
2279 /* some macroblocks will be accessed before they're available */
afebe2f7 2280 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 2281 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 2282
0da71265 2283// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 2284
1412060e 2285 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
2286 // that if we break out due to an error it can be released automatically
2287 // in the next MPV_frame_start().
2288 // SVQ3 as well as most other codecs have only last/next/current and thus
2289 // get released even with set reference, besides SVQ3 and others do not
2290 // mark frames as reference later "naturally".
2291 if(s->codec_id != CODEC_ID_SVQ3)
2292 s->current_picture_ptr->reference= 0;
357282c6
MN
2293
2294 s->current_picture_ptr->field_poc[0]=
2295 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 2296 assert(s->current_picture_ptr->long_ref==0);
357282c6 2297
af8aa846 2298 return 0;
0da71265
MN
2299}
2300
93cc10fa 2301static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e
MN
2302 MpegEncContext * const s = &h->s;
2303 int i;
5f7f9719
MN
2304 int step = 1;
2305 int offset = 1;
2306 int uvoffset= 1;
2307 int top_idx = 1;
2308 int skiplast= 0;
115329f1 2309
53c05b1e
MN
2310 src_y -= linesize;
2311 src_cb -= uvlinesize;
2312 src_cr -= uvlinesize;
2313
5f7f9719
MN
2314 if(!simple && FRAME_MBAFF){
2315 if(s->mb_y&1){
2316 offset = MB_MBAFF ? 1 : 17;
2317 uvoffset= MB_MBAFF ? 1 : 9;
2318 if(!MB_MBAFF){
2319 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2320 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2321 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2322 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2323 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2324 }
2325 }
2326 }else{
2327 if(!MB_MBAFF){
2328 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2329 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2330 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2331 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2332 }
2333 skiplast= 1;
2334 }
2335 offset =
2336 uvoffset=
2337 top_idx = MB_MBAFF ? 0 : 1;
2338 }
2339 step= MB_MBAFF ? 2 : 1;
2340 }
2341
3b66c4c5 2342 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 2343 // and the line above the bottom macroblock
5f7f9719
MN
2344 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2345 for(i=1; i<17 - skiplast; i++){
2346 h->left_border[offset+i*step]= src_y[15+i* linesize];
53c05b1e 2347 }
115329f1 2348
5f7f9719
MN
2349 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2350 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
53c05b1e 2351
87352549 2352 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2353 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2354 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2355 for(i=1; i<9 - skiplast; i++){
2356 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2357 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
53c05b1e 2358 }
5f7f9719
MN
2359 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2360 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
53c05b1e
MN
2361 }
2362}
2363
93cc10fa 2364static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
2365 MpegEncContext * const s = &h->s;
2366 int temp8, i;
2367 uint64_t temp64;
b69378e2
2368 int deblock_left;
2369 int deblock_top;
2370 int mb_xy;
5f7f9719
MN
2371 int step = 1;
2372 int offset = 1;
2373 int uvoffset= 1;
2374 int top_idx = 1;
2375
2376 if(!simple && FRAME_MBAFF){
2377 if(s->mb_y&1){
2378 offset = MB_MBAFF ? 1 : 17;
2379 uvoffset= MB_MBAFF ? 1 : 9;
2380 }else{
2381 offset =
2382 uvoffset=
2383 top_idx = MB_MBAFF ? 0 : 1;
2384 }
2385 step= MB_MBAFF ? 2 : 1;
2386 }
b69378e2
2387
2388 if(h->deblocking_filter == 2) {
64514ee8 2389 mb_xy = h->mb_xy;
b69378e2
2390 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2391 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2392 } else {
2393 deblock_left = (s->mb_x > 0);
6c805007 2394 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 2395 }
53c05b1e
MN
2396
2397 src_y -= linesize + 1;
2398 src_cb -= uvlinesize + 1;
2399 src_cr -= uvlinesize + 1;
2400
2401#define XCHG(a,b,t,xchg)\
2402t= a;\
2403if(xchg)\
2404 a= b;\
2405b= t;
d89dc06a
LM
2406
2407 if(deblock_left){
5f7f9719
MN
2408 for(i = !deblock_top; i<16; i++){
2409 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
d89dc06a 2410 }
5f7f9719 2411 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
d89dc06a
LM
2412 }
2413
2414 if(deblock_top){
5f7f9719
MN
2415 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2416 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
cad4368a 2417 if(s->mb_x+1 < s->mb_width){
5f7f9719 2418 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
43efd19a 2419 }
53c05b1e 2420 }
53c05b1e 2421
87352549 2422 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 2423 if(deblock_left){
5f7f9719
MN
2424 for(i = !deblock_top; i<8; i++){
2425 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2426 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
d89dc06a 2427 }
5f7f9719
MN
2428 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2429 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
d89dc06a
LM
2430 }
2431 if(deblock_top){
5f7f9719
MN
2432 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2433 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
53c05b1e 2434 }
53c05b1e
MN
2435 }
2436}
2437
5a6a6cc7 2438static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
2439 MpegEncContext * const s = &h->s;
2440 const int mb_x= s->mb_x;
2441 const int mb_y= s->mb_y;
64514ee8 2442 const int mb_xy= h->mb_xy;
0da71265
MN
2443 const int mb_type= s->current_picture.mb_type[mb_xy];
2444 uint8_t *dest_y, *dest_cb, *dest_cr;
2445 int linesize, uvlinesize /*dct_offset*/;
2446 int i;
6867a90b 2447 int *block_offset = &h->block_offset[0];
bd91fee3 2448 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
36940eca 2449 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 2450 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 2451
0da71265
MN
2452 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2453 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2454 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2455
a957c27b
LM
2456 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2457 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2458
bd91fee3 2459 if (!simple && MB_FIELD) {
5d18eaad
LM
2460 linesize = h->mb_linesize = s->linesize * 2;
2461 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 2462 block_offset = &h->block_offset[24];
1412060e 2463 if(mb_y&1){ //FIXME move out of this function?
0da71265 2464 dest_y -= s->linesize*15;
6867a90b
LLL
2465 dest_cb-= s->uvlinesize*7;
2466 dest_cr-= s->uvlinesize*7;
0da71265 2467 }
5d18eaad
LM
2468 if(FRAME_MBAFF) {
2469 int list;
3425501d 2470 for(list=0; list<h->list_count; list++){
5d18eaad
LM
2471 if(!USES_LIST(mb_type, list))
2472 continue;
2473 if(IS_16X16(mb_type)){
2474 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 2475 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
2476 }else{
2477 for(i=0; i<16; i+=4){
5d18eaad
LM
2478 int ref = h->ref_cache[list][scan8[i]];
2479 if(ref >= 0)
1710856c 2480 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
2481 }
2482 }
2483 }
2484 }
0da71265 2485 } else {
5d18eaad
LM
2486 linesize = h->mb_linesize = s->linesize;
2487 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
2488// dct_offset = s->linesize * 16;
2489 }
115329f1 2490
ef9d1d15
LM
2491 if(transform_bypass){
2492 idct_dc_add =
2493 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2494 }else if(IS_8x8DCT(mb_type)){
2495 idct_dc_add = s->dsp.h264_idct8_dc_add;
2496 idct_add = s->dsp.h264_idct8_add;
2497 }else{
2498 idct_dc_add = s->dsp.h264_idct_dc_add;
2499 idct_add = s->dsp.h264_idct_add;
2500 }
0da71265 2501
bd91fee3 2502 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
2503 for (i=0; i<16; i++) {
2504 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 2505 }
c1708e8d
MN
2506 for (i=0; i<8; i++) {
2507 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2508 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 2509 }
e7e09b49
LLL
2510 } else {
2511 if(IS_INTRA(mb_type)){
5f7f9719 2512 if(h->deblocking_filter)
93cc10fa 2513 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 2514
87352549 2515 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
2516 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2517 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 2518 }
0da71265 2519
e7e09b49 2520 if(IS_INTRA4x4(mb_type)){
bd91fee3 2521 if(simple || !s->encoding){
43efd19a
LM
2522 if(IS_8x8DCT(mb_type)){
2523 for(i=0; i<16; i+=4){
2524 uint8_t * const ptr= dest_y + block_offset[i];
2525 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
ef9d1d15 2526 const int nnz = h->non_zero_count_cache[ scan8[i] ];
c92a30bb 2527 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
4672503d 2528 (h->topright_samples_available<<i)&0x4000, linesize);
ef9d1d15
LM
2529 if(nnz){
2530 if(nnz == 1 && h->mb[i*16])
2531 idct_dc_add(ptr, h->mb + i*16, linesize);
2532 else
2533 idct_add(ptr, h->mb + i*16, linesize);
2534 }
43efd19a
LM
2535 }
2536 }else
e7e09b49 2537 for(i=0; i<16; i++){
6867a90b 2538 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2539 uint8_t *topright;
2540 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
ef9d1d15 2541 int nnz, tr;
e7e09b49
LLL
2542
2543 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2544 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
6867a90b 2545 assert(mb_y || linesize <= block_offset[i]);
e7e09b49
LLL
2546 if(!topright_avail){
2547 tr= ptr[3 - linesize]*0x01010101;
2548 topright= (uint8_t*) &tr;
115329f1 2549 }else
e7e09b49 2550 topright= ptr + 4 - linesize;
a9799653 2551 }else
e7e09b49
LLL
2552 topright= NULL;
2553
c92a30bb 2554 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
ef9d1d15
LM
2555 nnz = h->non_zero_count_cache[ scan8[i] ];
2556 if(nnz){
bd91fee3 2557 if(is_h264){
ef9d1d15
LM
2558 if(nnz == 1 && h->mb[i*16])
2559 idct_dc_add(ptr, h->mb + i*16, linesize);
2560 else
2561 idct_add(ptr, h->mb + i*16, linesize);
2562 }else
e7e09b49
LLL
2563 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2564 }
8b82a956 2565 }
0da71265 2566 }
e7e09b49 2567 }else{
c92a30bb 2568 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 2569 if(is_h264){
36940eca 2570 if(!transform_bypass)
93f0c0a4 2571 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 2572 }else
e7e09b49 2573 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 2574 }
5f7f9719 2575 if(h->deblocking_filter)
93cc10fa 2576 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 2577 }else if(is_h264){
e7e09b49 2578 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
2579 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2580 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 2581 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 2582 }
e7e09b49
LLL
2583
2584
2585 if(!IS_INTRA4x4(mb_type)){
bd91fee3 2586 if(is_h264){
ef9d1d15
LM
2587 if(IS_INTRA16x16(mb_type)){
2588 for(i=0; i<16; i++){
2589 if(h->non_zero_count_cache[ scan8[i] ])
2590 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2591 else if(h->mb[i*16])
2592 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2593 }
2594 }else{
2595 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2596 for(i=0; i<16; i+=di){
2597 int nnz = h->non_zero_count_cache[ scan8[i] ];
2598 if(nnz){
2599 if(nnz==1 && h->mb[i*16])
2600 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2601 else
2602 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2603 }
e7e09b49 2604 }
4704097a 2605 }
e7e09b49
LLL
2606 }else{
2607 for(i=0; i<16; i++){
2608 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 2609 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2610 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2611 }
4704097a 2612 }
0da71265
MN
2613 }
2614 }
0da71265 2615
87352549 2616 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
ef9d1d15
LM
2617 uint8_t *dest[2] = {dest_cb, dest_cr};
2618 if(transform_bypass){
2619 idct_add = idct_dc_add = s->dsp.add_pixels4;
2620 }else{
2621 idct_add = s->dsp.h264_idct_add;
2622 idct_dc_add = s->dsp.h264_idct_dc_add;
4691a77d
2623 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2624 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
36940eca 2625 }
bd91fee3 2626 if(is_h264){
ef9d1d15
LM
2627 for(i=16; i<16+8; i++){
2628 if(h->non_zero_count_cache[ scan8[i] ])
2629 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2630 else if(h->mb[i*16])
2631 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
4704097a 2632 }
e7e09b49 2633 }else{
ef9d1d15 2634 for(i=16; i<16+8; i++){
e7e09b49 2635 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
ef9d1d15 2636 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
e7e09b49
LLL
2637 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2638 }
4704097a 2639 }
0da71265
MN
2640 }
2641 }
2642 }
53c05b1e 2643 if(h->deblocking_filter) {
5f7f9719
MN
2644 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2645 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2646 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2647 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
bd91fee3 2648 if (!simple && FRAME_MBAFF) {
5f7f9719 2649 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2650 } else {
3e20143e 2651 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2652 }
53c05b1e 2653 }
0da71265
MN
2654}
2655
0da71265 2656/**
bd91fee3
AS
2657 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2658 */
2659static void hl_decode_mb_simple(H264Context *h){
2660 hl_decode_mb_internal(h, 1);
2661}
2662
2663/**
2664 * Process a macroblock; this handles edge cases, such as interlacing.
2665 */
2666static void av_noinline hl_decode_mb_complex(H264Context *h){
2667 hl_decode_mb_internal(h, 0);
2668}
2669
2670static void hl_decode_mb(H264Context *h){
2671 MpegEncContext * const s = &h->s;
64514ee8 2672 const int mb_xy= h->mb_xy;
bd91fee3 2673 const int mb_type= s->current_picture.mb_type[mb_xy];
58cc7dd9
AS
2674 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2675 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
bd91fee3 2676
fedec603 2677 if(ENABLE_H264_ENCODER && !s->decode)
bd91fee3
AS
2678 return;
2679
2680 if (is_complex)
2681 hl_decode_mb_complex(h);
2682 else hl_decode_mb_simple(h);
2683}
2684
2143b118 2685static void pic_as_field(Picture *pic, const int parity){
11cc1d8c
JD
2686 int i;
2687 for (i = 0; i < 4; ++i) {
2143b118 2688 if (parity == PICT_BOTTOM_FIELD)
11cc1d8c 2689 pic->data[i] += pic->linesize[i];
2143b118 2690 pic->reference = parity;
11cc1d8c
JD
2691 pic->linesize[i] *= 2;
2692 }
2879c75f 2693 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
11cc1d8c
JD
2694}
2695
2696static int split_field_copy(Picture *dest, Picture *src,
2697 int parity, int id_add){
2698 int match = !!(src->reference & parity);
2699
2700 if (match) {
2701 *dest = *src;
d4f7d838 2702 if(parity != PICT_FRAME){
b3e93fd4
MN
2703 pic_as_field(dest, parity);
2704 dest->pic_id *= 2;
2705 dest->pic_id += id_add;
d4f7d838 2706 }
11cc1d8c
JD
2707 }
2708
2709 return match;
2710}
2711
d4f7d838
MN
2712static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2713 int i[2]={0};
2714 int index=0;
11cc1d8c 2715
d4f7d838
MN
2716 while(i[0]<len || i[1]<len){
2717 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2718 i[0]++;
2719 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2720 i[1]++;
2721 if(i[0] < len){
2722 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2723 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2724 }
2725 if(i[1] < len){
2726 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2727 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
11cc1d8c
JD
2728 }
2729 }
2730
d4f7d838 2731 return index;
11cc1d8c
JD
2732}
2733
d4f7d838
MN
2734static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2735 int i, best_poc;
2736 int out_i= 0;
11cc1d8c 2737
d4f7d838
MN
2738 for(;;){
2739 best_poc= dir ? INT_MIN : INT_MAX;
11cc1d8c 2740
d4f7d838
MN
2741 for(i=0; i<len; i++){
2742 const int poc= src[i]->poc;
2743 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2744 best_poc= poc;
2745 sorted[out_i]= src[i];
2746 }
2747 }
2748 if(best_poc == (dir ? INT_MIN : INT_MAX))
2749 break;
2750 limit= sorted[out_i++]->poc - dir;
2751 }
2752 return out_i;
11cc1d8c
JD
2753}
2754
bd91fee3 2755/**
0da71265
MN
2756 * fills the default_ref_list.
2757 */
2758static int fill_default_ref_list(H264Context *h){
2759 MpegEncContext * const s = &h->s;
d4f7d838 2760 int i, len;
115329f1 2761
9f5c1037 2762 if(h->slice_type_nos==FF_B_TYPE){
d4f7d838
MN
2763 Picture *sorted[32];
2764 int cur_poc, list;
2765 int lens[2];
11cc1d8c 2766
d4f7d838
MN
2767 if(FIELD_PICTURE)
2768 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2769 else
2770 cur_poc= s->current_picture_ptr->poc;
086acdd5 2771
d4f7d838
MN
2772 for(list= 0; list<2; list++){
2773 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2774 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2775 assert(len<=32);
2776 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2777 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2778 assert(len<=32);
086acdd5 2779
d4f7d838
MN
2780 if(len < h->ref_count[list])
2781 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2782 lens[list]= len;
086acdd5
JD
2783 }
2784
d4f7d838
MN
2785 if(lens[0] == lens[1] && lens[1] > 1){
2786 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2787 if(i == lens[0])
2788 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
086acdd5 2789 }
086acdd5 2790 }else{
d4f7d838
MN
2791 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2792 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2793 assert(len <= 32);
2794 if(len < h->ref_count[0])
2795 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
0da71265 2796 }
827c91bf
LLL
2797#ifdef TRACE
2798 for (i=0; i<h->ref_count[0]; i++) {
a9c9a240 2799 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
827c91bf 2800 }
9f5c1037 2801 if(h->slice_type_nos==FF_B_TYPE){
827c91bf 2802 for (i=0; i<h->ref_count[1]; i++) {
ffbc5e04 2803 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
827c91bf
LLL
2804 }
2805 }
2806#endif
0da71265
MN
2807 return 0;
2808}
2809
827c91bf
LLL
2810static void print_short_term(H264Context *h);
2811static void print_long_term(H264Context *h);
2812
949da388
JD
2813/**
2814 * Extract structure information about the picture described by pic_num in
2815 * the current decoding context (frame or field). Note that pic_num is
2816 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2817 * @param pic_num picture number for which to extract structure information
2818 * @param structure one of PICT_XXX describing structure of picture
2819 * with pic_num
2820 * @return frame number (short term) or long term index of picture
2821 * described by pic_num
2822 */
2823static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2824 MpegEncContext * const s = &h->s;
2825
2826 *structure = s->picture_structure;
2827 if(FIELD_PICTURE){
2828 if (!(pic_num & 1))
2829 /* opposite field */
2830 *structure ^= PICT_FRAME;
2831 pic_num >>= 1;
2832 }
2833
2834 return pic_num;
2835}
2836
0da71265
MN
2837static int decode_ref_pic_list_reordering(H264Context *h){
2838 MpegEncContext * const s = &h->s;
949da388 2839 int list, index, pic_structure;
115329f1 2840
827c91bf
LLL
2841 print_short_term(h);
2842 print_long_term(h);
115329f1 2843
3425501d 2844 for(list=0; list<h->list_count; list++){
0da71265
MN
2845 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2846
2847 if(get_bits1(&s->gb)){
2848 int pred= h->curr_pic_num;
0da71265
MN
2849
2850 for(index=0; ; index++){
88e7a4d1
MN
2851 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2852 unsigned int pic_id;
0da71265 2853 int i;
2f944356 2854 Picture *ref = NULL;
115329f1
DB
2855
2856 if(reordering_of_pic_nums_idc==3)
0bc42cad 2857 break;
115329f1 2858
0da71265 2859 if(index >= h->ref_count[list]){
9b879566 2860 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
0da71265
MN
2861 return -1;
2862 }
115329f1 2863
0da71265
MN
2864 if(reordering_of_pic_nums_idc<3){
2865 if(reordering_of_pic_nums_idc<2){
88e7a4d1 2866 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
949da388 2867 int frame_num;
0da71265 2868
03d3cab8 2869 if(abs_diff_pic_num > h->max_pic_num){
9b879566 2870 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
0da71265
MN
2871 return -1;
2872 }
2873
2874 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2875 else pred+= abs_diff_pic_num;
2876 pred &= h->max_pic_num - 1;
115329f1 2877
949da388
JD
2878 frame_num = pic_num_extract(h, pred, &pic_structure);
2879
0d175622
MN
2880 for(i= h->short_ref_count-1; i>=0; i--){
2881 ref = h->short_ref[i];
949da388 2882 assert(ref->reference);
0d175622 2883 assert(!ref->long_ref);
6edac8e1 2884 if(
af8c5e08
MN
2885 ref->frame_num == frame_num &&
2886 (ref->reference & pic_structure)
6edac8e1 2887 )
0da71265
MN
2888 break;
2889 }
0d175622 2890 if(i>=0)
949da388 2891 ref->pic_id= pred;
0da71265 2892 }else{
949da388 2893 int long_idx;
0da71265 2894 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
949da388
JD
2895
2896 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2897
2898 if(long_idx>31){
88e7a4d1
MN
2899 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2900 return -1;
2901 }
949da388
JD
2902 ref = h->long_ref[long_idx];
2903 assert(!(ref && !ref->reference));
af8c5e08 2904 if(ref && (ref->reference & pic_structure)){
ac658be5 2905 ref->pic_id= pic_id;
ac658be5
FOL
2906 assert(ref->long_ref);
2907 i=0;
2908 }else{
2909 i=-1;
2910 }
0da71265
MN
2911 }
2912
0d315f28 2913 if (i < 0) {
9b879566 2914 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
0da71265 2915 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
0d175622
MN
2916 } else {
2917 for(i=index; i+1<h->ref_count[list]; i++){
2918 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2919 break;
21be92bf
MN
2920 }
2921 for(; i > index; i--){
2922 h->ref_list[list][i]= h->ref_list[list][i-1];
2923 }
0d175622 2924 h->ref_list[list][index]= *ref;
949da388 2925 if (FIELD_PICTURE){
2143b118 2926 pic_as_field(&h->ref_list[list][index], pic_structure);
949da388 2927 }
0da71265 2928 }
0bc42cad 2929 }else{
9b879566 2930 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
0da71265
MN
2931 return -1;
2932 }
2933 }
2934 }
0da71265 2935 }
3425501d 2936 for(list=0; list<h->list_count; list++){
6ab87211 2937 for(index= 0; index < h->ref_count[list]; index++){
79b5c776
MN
2938 if(!h->ref_list[list][index].data[0]){
2939 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2940 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2941 }
6ab87211 2942 }
6ab87211 2943 }
115329f1 2944
115329f1 2945 return 0;
0da71265
MN
2946}
2947
91c58c94 2948static void fill_mbaff_ref_list(H264Context *h){
5d18eaad 2949 int list, i, j;
3425501d 2950 for(list=0; list<2; list++){ //FIXME try list_count
5d18eaad
LM
2951 for(i=0; i<h->ref_count[list]; i++){
2952 Picture *frame = &h->ref_list[list][i];
2953 Picture *field = &h->ref_list[list][16+2*i];
2954 field[0] = *frame;
2955 for(j=0; j<3; j++)
2956 field[0].linesize[j] <<= 1;
2143b118 2957 field[0].reference = PICT_TOP_FIELD;
078f42dd 2958 field[0].poc= field[0].field_poc[0];
5d18eaad
LM
2959 field[1] = field[0];
2960 for(j=0; j<3; j++)
2961 field[1].data[j] += frame->linesize[j];
2143b118 2962 field[1].reference = PICT_BOTTOM_FIELD;
078f42dd 2963 field[1].poc= field[1].field_poc[1];
5d18eaad
LM
2964
2965 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2966 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2967 for(j=0; j<2; j++){
2968 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2969 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2970 }
2971 }
2972 }
2973 for(j=0; j<h->ref_count[1]; j++){
2974 for(i=0; i<h->ref_count[0]; i++)
2975 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2976 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2977 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2978 }
2979}
2980
0da71265
MN
2981static int pred_weight_table(H264Context *h){
2982 MpegEncContext * const s = &h->s;
2983 int list, i;
9f2d1b4f 2984 int luma_def, chroma_def;
115329f1 2985
9f2d1b4f
LM
2986 h->use_weight= 0;
2987 h->use_weight_chroma= 0;
0da71265
MN
2988 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2989 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
2990 luma_def = 1<<h->luma_log2_weight_denom;
2991 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
2992
2993 for(list=0; list<2; list++){
2994 for(i=0; i<h->ref_count[list]; i++){
2995 int luma_weight_flag, chroma_weight_flag;
115329f1 2996
0da71265
MN
2997 luma_weight_flag= get_bits1(&s->gb);
2998 if(luma_weight_flag){
2999 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3000 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f
LM
3001 if( h->luma_weight[list][i] != luma_def
3002 || h->luma_offset[list][i] != 0)
3003 h->use_weight= 1;
3004 }else{
3005 h->luma_weight[list][i]= luma_def;
3006 h->luma_offset[list][i]= 0;
0da71265
MN
3007 }
3008
0af6967e 3009 if(CHROMA){
fef744d4
MN
3010 chroma_weight_flag= get_bits1(&s->gb);
3011 if(chroma_weight_flag){
3012 int j;
3013 for(j=0; j<2; j++){
3014 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3015 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3016 if( h->chroma_weight[list][i][j] != chroma_def
3017 || h->chroma_offset[list][i][j] != 0)
3018 h->use_weight_chroma= 1;
3019 }
3020 }else{
3021 int j;
3022 for(j=0; j<2; j++){
3023 h->chroma_weight[list][i][j]= chroma_def;
3024 h->chroma_offset[list][i][j]= 0;
3025 }
0da71265
MN
3026 }
3027 }
3028 }
9f5c1037 3029 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 3030 }
9f2d1b4f 3031 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
3032 return 0;
3033}
3034
9f2d1b4f
LM
3035static void implicit_weight_table(H264Context *h){
3036 MpegEncContext * const s = &h->s;
9f2d1b4f
LM
3037 int ref0, ref1;
3038 int cur_poc = s->current_picture_ptr->poc;
3039
3040 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3041 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3042 h->use_weight= 0;
3043 h->use_weight_chroma= 0;
3044 return;
3045 }
3046
3047 h->use_weight= 2;
3048 h->use_weight_chroma= 2;
3049 h->luma_log2_weight_denom= 5;
3050 h->chroma_log2_weight_denom= 5;
3051
9f2d1b4f
LM
3052 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3053 int poc0 = h->ref_list[0][ref0].poc;
3054 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 3055 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 3056 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 3057 if(td){
f66e4f5f 3058 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 3059 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 3060 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
3061 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3062 h->implicit_weight[ref0][ref1] = 32;
3063 else
3064 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3065 }else
3066 h->implicit_weight[ref0][ref1] = 32;
3067 }
3068 }
3069}
3070
8fd57a66
JD
3071/**
3072 * Mark a picture as no longer needed for reference. The refmask
3073 * argument allows unreferencing of individual fields or the whole frame.
3074 * If the picture becomes entirely unreferenced, but is being held for
3075 * display purposes, it is marked as such.
3076 * @param refmask mask of fields to unreference; the mask is bitwise
3077 * anded with the reference marking of pic
3078 * @return non-zero if pic becomes entirely unreferenced (except possibly
3079 * for display purposes) zero if one of the fields remains in
3080 * reference
3081 */
3082static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
4e4d983e 3083 int i;
8fd57a66
JD
3084 if (pic->reference &= refmask) {
3085 return 0;
3086 } else {
79f4494a
MN
3087 for(i = 0; h->delayed_pic[i]; i++)
3088 if(pic == h->delayed_pic[i]){
3089 pic->reference=DELAYED_PIC_REF;
3090 break;
3091 }
8fd57a66
JD
3092 return 1;
3093 }
4e4d983e
LM
3094}
3095
0da71265 3096/**
5175b937 3097 * instantaneous decoder refresh.
0da71265
MN
3098 */
3099static void idr(H264Context *h){
4e4d983e 3100 int i;
0da71265 3101
dc032f33 3102 for(i=0; i<16; i++){
9c0e4624 3103 remove_long(h, i, 0);
0da71265 3104 }
849b9cef 3105 assert(h->long_ref_count==0);
0da71265
MN
3106
3107 for(i=0; i<h->short_ref_count; i++){
8fd57a66 3108 unreference_pic(h, h->short_ref[i], 0);
0da71265
MN
3109 h->short_ref[i]= NULL;
3110 }
3111 h->short_ref_count=0;
a149c1a5 3112 h->prev_frame_num= 0;
80f8e035
MN
3113 h->prev_frame_num_offset= 0;
3114 h->prev_poc_msb=
3115 h->prev_poc_lsb= 0;
0da71265
MN
3116}
3117
7c33ad19
LM
3118/* forget old pics after a seek */
3119static void flush_dpb(AVCodecContext *avctx){
3120 H264Context *h= avctx->priv_data;
3121 int i;
64b9d48f 3122 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
3123 if(h->delayed_pic[i])
3124 h->delayed_pic[i]->reference= 0;
7c33ad19 3125 h->delayed_pic[i]= NULL;
285b570f 3126 }
df8a7dff 3127 h->outputed_poc= INT_MIN;
7c33ad19 3128 idr(h);
ca159196
MR
3129 if(h->s.current_picture_ptr)
3130 h->s.current_picture_ptr->reference= 0;
12d96de3 3131 h->s.first_field= 0;
e240f898 3132 ff_mpeg_flush(avctx);
7c33ad19
LM
3133}
3134
0da71265 3135/**
47e112f8
JD
3136 * Find a Picture in the short term reference list by frame number.
3137 * @param frame_num frame number to search for
3138 * @param idx the index into h->short_ref where returned picture is found
3139 * undefined if no picture found.
3140 * @return pointer to the found picture, or NULL if no pic with the provided
3141 * frame number is found
0da71265 3142 */
47e112f8 3143static Picture * find_short(H264Context *h, int frame_num, int *idx){
1924f3ce 3144 MpegEncContext * const s = &h->s;
0da71265 3145 int i;
115329f1 3146
0da71265
MN
3147 for(i=0; i<h->short_ref_count; i++){
3148 Picture *pic= h->short_ref[i];
1924f3ce 3149 if(s->avctx->debug&FF_DEBUG_MMCO)
9b879566 3150 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
47e112f8
JD
3151 if(pic->frame_num == frame_num) {
3152 *idx = i;
0da71265
MN
3153 return pic;
3154 }
3155 }
3156 return NULL;
3157}
3158
3159/**
47e112f8
JD
3160 * Remove a picture from the short term reference list by its index in
3161 * that list. This does no checking on the provided index; it is assumed
3162 * to be valid. Other list entries are shifted down.
3163 * @param i index into h->short_ref of picture to remove.
3164 */
3165static void remove_short_at_index(H264Context *h, int i){
e1f15d38 3166 assert(i >= 0 && i < h->short_ref_count);
47e112f8
JD
3167 h->short_ref[i]= NULL;
3168 if (--h->short_ref_count)
3169 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3170}
3171
3172/**
3173 *
3174 * @return the removed picture or NULL if an error occurs
3175 */
d9e32422 3176static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
47e112f8
JD
3177 MpegEncContext * const s = &h->s;
3178 Picture *pic;
3179 int i;
3180
3181 if(s->avctx->debug&FF_DEBUG_MMCO)
3182 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3183
3184 pic = find_short(h, frame_num, &i);
d9e32422
MN
3185 if (pic){
3186 if(unreference_pic(h, pic, ref_mask))
47e112f8 3187 remove_short_at_index(h, i);
d9e32422 3188 }
47e112f8
JD
3189
3190 return pic;
3191}
3192
3193/**
24231e4c 3194 * Remove a picture from the long term reference list by its index in
1cea5d0d 3195 * that list.
3b66c4c5 3196 * @return the removed picture or NULL if an error occurs
0da71265 3197 */
9c0e4624 3198static Picture * remove_long(H264Context *h, int i, int ref_mask){
0da71265
MN