Reorder ifs in chroma hl_decode_mb to avoid a duplicate transform_bypass
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265
MN
22/**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
0da71265
MN
28#include "dsputil.h"
29#include "avcodec.h"
30#include "mpegvideo.h"
26b4fe82 31#include "h264.h"
0da71265 32#include "h264data.h"
26b4fe82 33#include "h264_parser.h"
0da71265 34#include "golomb.h"
626464fb 35#include "rectangle.h"
0da71265 36
e5017ab8 37#include "cabac.h"
52cb7981
JD
38#ifdef ARCH_X86
39#include "i386/h264_i386.h"
40#endif
e5017ab8 41
2848ce84 42//#undef NDEBUG
0da71265
MN
43#include <assert.h>
44
2ddcf84b
JD
45/**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
48 */
49#define DELAYED_PIC_REF 4
50
0da71265 51static VLC coeff_token_vlc[4];
910e3668
AC
52static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
54
0da71265 55static VLC chroma_dc_coeff_token_vlc;
910e3668
AC
56static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57static const int chroma_dc_coeff_token_vlc_table_size = 256;
0da71265
MN
58
59static VLC total_zeros_vlc[15];
910e3668
AC
60static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61static const int total_zeros_vlc_tables_size = 512;
62
0da71265 63static VLC chroma_dc_total_zeros_vlc[3];
910e3668
AC
64static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65static const int chroma_dc_total_zeros_vlc_tables_size = 8;
0da71265
MN
66
67static VLC run_vlc[6];
910e3668
AC
68static VLC_TYPE run_vlc_tables[6][8][2];
69static const int run_vlc_tables_size = 8;
70
0da71265 71static VLC run7_vlc;
910e3668
AC
72static VLC_TYPE run7_vlc_table[96][2];
73static const int run7_vlc_table_size = 96;
0da71265 74
8b82a956
MN
75static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
6ba71fc4 77static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
3e20143e 78static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
9c0e4624 79static Picture * remove_long(H264Context *h, int i, int ref_mask);
8b82a956 80
849f1035 81static av_always_inline uint32_t pack16to32(int a, int b){
377ec888
MN
82#ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
84#else
85 return (a&0xFFFF) + (b<<16);
86#endif
87}
88
d9ec210b 89static const uint8_t rem6[52]={
acd8d10f
PI
900, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
91};
92
d9ec210b 93static const uint8_t div6[52]={
acd8d10f
PI
940, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
95};
96
143d7f14
PK
97static const int left_block_options[4][8]={
98 {0,1,2,3,7,10,8,11},
99 {2,2,3,3,8,11,8,11},
100 {0,0,1,1,7,10,7,10},
101 {0,2,0,2,7,10,7,10}
102};
acd8d10f 103
70abb407 104static void fill_caches(H264Context *h, int mb_type, int for_deblock){
0da71265 105 MpegEncContext * const s = &h->s;
64514ee8 106 const int mb_xy= h->mb_xy;
0da71265
MN
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
cac55c91 109 const int * left_block;
02f7695b 110 int topleft_partition= -1;
0da71265
MN
111 int i;
112
36e097bc
JD
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
114
717b1733 115 //FIXME deblocking could skip the intra and nnz parts.
36e097bc 116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
e2e5894a
LM
117 return;
118
2cab6401
DB
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
115329f1 121
6867a90b
LLL
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
143d7f14 125 left_block = left_block_options[0];
5d18eaad 126 if(FRAME_MBAFF){
6867a90b
LLL
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
a9c9a240 137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
6867a90b
LLL
138 if (bottom
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
141 ) {
142 top_xy -= s->mb_stride;
143 }
144 if (bottom
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
147 ) {
148 topleft_xy -= s->mb_stride;
02f7695b
LM
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
1412060e 151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
02f7695b 152 topleft_partition = 0;
6867a90b
LLL
153 }
154 if (bottom
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
157 ) {
158 topright_xy -= s->mb_stride;
159 }
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
163 if (bottom) {
143d7f14 164 left_block = left_block_options[1];
6867a90b 165 } else {
143d7f14 166 left_block= left_block_options[2];
6867a90b
LLL
167 }
168 } else {
169 left_xy[1] += s->mb_stride;
143d7f14 170 left_block = left_block_options[3];
6867a90b
LLL
171 }
172 }
0da71265
MN
173 }
174
826de46e
LLL
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
6ba71fc4 178 if(for_deblock){
717b1733
LM
179 topleft_type = 0;
180 topright_type = 0;
b735aeea
MN
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
5d18eaad 184
e248cb60 185 if(MB_MBAFF && !IS_INTRA(mb_type)){
5d18eaad 186 int list;
3425501d 187 for(list=0; list<h->list_count; list++){
e248cb60
MN
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
5d18eaad 191 if(USES_LIST(mb_type,list)){
191e8ca7 192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
5d18eaad 193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
beca9a28 194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
195 ref += h->b8_stride;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
beca9a28 197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
198 }
199 }
200 }
46f2f05f
MN
201 }else{
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
0da71265
MN
207
208 if(IS_INTRA(mb_type)){
faa7e394 209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
115329f1
DB
210 h->topleft_samples_available=
211 h->top_samples_available=
0da71265
MN
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
214
faa7e394 215 if(!(top_type & type_mask)){
0da71265
MN
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
219 }
d1d10e91
MN
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
faa7e394 222 if(!(left_type[0] & type_mask)){
d1d10e91
MN
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
225 }
faa7e394 226 if(!(left_type[1] & type_mask)){
d1d10e91
MN
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
229 }
230 }else{
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
faa7e394 234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
d1d10e91
MN
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
237 }
238 }
239 }else{
faa7e394 240 if(!(left_type[0] & type_mask)){
0da71265
MN
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
243 }
244 }
115329f1 245
faa7e394 246 if(!(topleft_type & type_mask))
0da71265 247 h->topleft_samples_available&= 0x7FFF;
115329f1 248
faa7e394 249 if(!(topright_type & type_mask))
0da71265 250 h->topright_samples_available&= 0xFBFF;
115329f1 251
0da71265
MN
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 }else{
259 int pred;
faa7e394 260 if(!(top_type & type_mask))
0da71265 261 pred= -1;
6fbcaaa0
LLL
262 else{
263 pred= 2;
0da71265
MN
264 }
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
269 }
270 for(i=0; i<2; i++){
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 }else{
275 int pred;
faa7e394 276 if(!(left_type[i] & type_mask))
0da71265 277 pred= -1;
6fbcaaa0
LLL
278 else{
279 pred= 2;
0da71265
MN
280 }
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
283 }
284 }
285 }
286 }
29671011 287 }
115329f1
DB
288
289
0da71265 290/*
115329f1
DB
2910 . T T. T T T T
2921 L . .L . . . .
2932 L . .L . . . .
2943 . T TL . . . .
2954 L . .L . . . .
2965 L . .. . . . .
0da71265 297*/
1412060e 298//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
0da71265 299 if(top_type){
6867a90b
LLL
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
53c05b1e 303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
115329f1 304
6867a90b 305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
53c05b1e 306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
115329f1 307
6867a90b 308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
53c05b1e 309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
115329f1 310
0da71265 311 }else{
115329f1 312 h->non_zero_count_cache[4+8*0]=
0da71265
MN
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
115329f1 316
0da71265
MN
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
115329f1 319
0da71265 320 h->non_zero_count_cache[1+8*3]=
3981c385 321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
115329f1 322
0da71265 323 }
826de46e 324
6867a90b
LLL
325 for (i=0; i<2; i++) {
326 if(left_type[i]){
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
6867a90b 331 }else{
115329f1
DB
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
6867a90b 335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
826de46e
LLL
336 }
337 }
338
339 if( h->pps.cabac ) {
340 // top_cbp
341 if(top_type) {
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
344 h->top_cbp = 0x1C0;
345 } else {
346 h->top_cbp = 0;
347 }
348 // left_cbp
349 if (left_type[0]) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
352 h->left_cbp = 0x1C0;
353 } else {
354 h->left_cbp = 0;
355 }
356 if (left_type[0]) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
358 }
359 if (left_type[1]) {
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
6867a90b 361 }
0da71265 362 }
6867a90b 363
0da71265 364#if 1
e2e5894a 365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
0da71265 366 int list;
3425501d 367 for(list=0; list<h->list_count; list++){
e2e5894a 368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
0da71265
MN
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
373 }*/
5ad984c9 374 continue;
0da71265
MN
375 }
376 h->mv_cache_clean[list]= 0;
115329f1 377
53b19144 378 if(USES_LIST(top_type, list)){
0da71265
MN
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
389 }else{
115329f1
DB
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
0da71265
MN
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
395 }
396
4672503d
LM
397 for(i=0; i<2; i++){
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
406 }else{
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 }
0da71265
MN
412 }
413
0281d325 414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
46f2f05f
MN
415 continue;
416
53b19144 417 if(USES_LIST(topleft_type, list)){
02f7695b
LM
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
e2e5894a
LM
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
422 }else{
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
425 }
115329f1 426
53b19144 427 if(USES_LIST(topright_type, list)){
e2e5894a
LM
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
432 }else{
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
435 }
e2e5894a 436
ae08a563 437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
717b1733 438 continue;
115329f1
DB
439
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
3b66c4c5 442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
115329f1 443 h->ref_cache[list][scan8[4 ]] =
0da71265
MN
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
3b66c4c5 447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
0da71265
MN
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
9e528114
LA
450
451 if( h->pps.cabac ) {
452 /* XXX beurk, Load mvd */
53b19144 453 if(USES_LIST(top_type, list)){
9e528114
LA
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
459 }else{
115329f1
DB
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
9e528114
LA
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
464 }
53b19144 465 if(USES_LIST(left_type[0], list)){
9e528114
LA
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
469 }else{
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
472 }
53b19144 473 if(USES_LIST(left_type[1], list)){
9e528114
LA
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
477 }else{
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
480 }
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
3b66c4c5 483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
9e528114
LA
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
5ad984c9 486
9f5c1037 487 if(h->slice_type_nos == FF_B_TYPE){
5ad984c9
LM
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
489
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
496 }else{
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
498 }
115329f1 499
5d18eaad
LM
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
504 else
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
506
507 if(IS_DIRECT(left_type[1]))
5ad984c9 508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
5d18eaad
LM
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
511 else
5ad984c9 512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
5d18eaad
LM
513 }
514 }
515
516 if(FRAME_MBAFF){
517#define MAP_MVS\
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
528 if(MB_FIELD){
529#define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
534 }
535 MAP_MVS
536#undef MAP_F2F
537 }else{
538#define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
5ad984c9 543 }
5d18eaad
LM
544 MAP_MVS
545#undef MAP_F2F
5ad984c9 546 }
9e528114 547 }
0da71265 548 }
0da71265
MN
549 }
550#endif
43efd19a
LM
551
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
0da71265
MN
553}
554
555static inline void write_back_intra_pred_mode(H264Context *h){
64514ee8 556 const int mb_xy= h->mb_xy;
0da71265
MN
557
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
565}
566
567/**
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
569 */
570static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
574 int i;
115329f1 575
0da71265
MN
576 if(!(h->top_samples_available&0x8000)){
577 for(i=0; i<4; i++){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
579 if(status<0){
9b879566 580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
581 return -1;
582 } else if(status){
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
584 }
585 }
586 }
115329f1 587
d1d10e91
MN
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
0da71265 590 for(i=0; i<4; i++){
d1d10e91 591 if(!(h->left_samples_available&mask[i])){
0da71265
MN
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
593 if(status<0){
9b879566 594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
0da71265
MN
595 return -1;
596 } else if(status){
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 }
d1d10e91 599 }
0da71265
MN
600 }
601 }
602
603 return 0;
604} //FIXME cleanup like next
605
606/**
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
608 */
609static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 613
43ff0714 614 if(mode > 6U) {
5175b937 615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 616 return -1;
5175b937 617 }
115329f1 618
0da71265
MN
619 if(!(h->top_samples_available&0x8000)){
620 mode= top[ mode ];
621 if(mode<0){
9b879566 622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
623 return -1;
624 }
625 }
115329f1 626
d1d10e91 627 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 628 mode= left[ mode ];
d1d10e91
MN
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
631 }
0da71265 632 if(mode<0){
9b879566 633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 634 return -1;
115329f1 635 }
0da71265
MN
636 }
637
638 return mode;
639}
640
641/**
642 * gets the predicted intra4x4 prediction mode.
643 */
644static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
649
a9c9a240 650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
0da71265
MN
651
652 if(min<0) return DC_PRED;
653 else return min;
654}
655
656static inline void write_back_non_zero_count(H264Context *h){
64514ee8 657 const int mb_xy= h->mb_xy;
0da71265 658
6867a90b
LLL
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
53c05b1e 662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
6867a90b
LLL
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
115329f1 666
6867a90b 667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
53c05b1e 668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
6867a90b 669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
53c05b1e 670
6867a90b 671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
53c05b1e 672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
6867a90b 673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
0da71265
MN
674}
675
676/**
1412060e 677 * gets the predicted number of non-zero coefficients.
0da71265
MN
678 * @param n block index
679 */
680static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
684 int i= left + top;
115329f1 685
0da71265
MN
686 if(i<64) i= (i+1)>>1;
687
a9c9a240 688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
0da71265
MN
689
690 return i&31;
691}
692
1924f3ce
MN
693static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
a9c9a240 695 MpegEncContext *s = &h->s;
1924f3ce 696
5d18eaad
LM
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
699 if(FRAME_MBAFF){
191e8ca7 700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
5d18eaad
LM
701 const int16_t *mv;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
704
705 if(!MB_FIELD
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
02f7695b 712 if(!USES_LIST(mb_type,list))\
5d18eaad
LM
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
720 }
721 }
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 if(!MB_FIELD
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
728 }
729 if(MB_FIELD
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 && i >= scan8[0]+8){
1412060e 732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
02f7695b 733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
5d18eaad
LM
734 }
735 }
736#undef SET_DIAG_MV
737 }
738
1924f3ce
MN
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
741 return topright_ref;
742 }else{
a9c9a240 743 tprintf(s->avctx, "topright MV not available\n");
95c26348 744
1924f3ce
MN
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
747 }
748}
749
0da71265
MN
750/**
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
756 */
757static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
0da71265
MN
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
0da71265
MN
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1924f3ce
MN
763 const int16_t * C;
764 int diagonal_ref, match_count;
765
0da71265 766 assert(part_width==1 || part_width==2 || part_width==4);
1924f3ce 767
0da71265 768/* mv_cache
115329f1 769 B . . A T T T T
0da71265
MN
770 U . . L . . , .
771 U . . L . . . .
772 U . . L . . , .
773 . . . L . . . .
774*/
1924f3ce
MN
775
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
a9c9a240 778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1924f3ce
MN
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
783 if(left_ref==ref){
784 *mx= A[0];
115329f1 785 *my= A[1];
1924f3ce
MN
786 }else if(top_ref==ref){
787 *mx= B[0];
115329f1 788 *my= B[1];
0da71265 789 }else{
1924f3ce 790 *mx= C[0];
115329f1 791 *my= C[1];
0da71265
MN
792 }
793 }else{
1924f3ce 794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
0da71265 795 *mx= A[0];
115329f1 796 *my= A[1];
0da71265 797 }else{
1924f3ce
MN
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
0da71265 800 }
0da71265 801 }
115329f1 802
a9c9a240 803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
804}
805
806/**
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
811 */
812static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
813 if(n==0){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
816
a9c9a240 817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
115329f1 818
0da71265
MN
819 if(top_ref == ref){
820 *mx= B[0];
821 *my= B[1];
822 return;
823 }
824 }else{
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
115329f1 827
a9c9a240 828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
829
830 if(left_ref == ref){
831 *mx= A[0];
832 *my= A[1];
833 return;
834 }
835 }
836
837 //RARE
838 pred_motion(h, n, 4, list, ref, mx, my);
839}
840
841/**
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
846 */
847static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
848 if(n==0){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
115329f1 851
a9c9a240 852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
853
854 if(left_ref == ref){
855 *mx= A[0];
856 *my= A[1];
857 return;
858 }
859 }else{
1924f3ce
MN
860 const int16_t * C;
861 int diagonal_ref;
862
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
115329f1 864
a9c9a240 865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265 866
115329f1 867 if(diagonal_ref == ref){
0da71265
MN
868 *mx= C[0];
869 *my= C[1];
870 return;
871 }
0da71265
MN
872 }
873
874 //RARE
875 pred_motion(h, n, 2, list, ref, mx, my);
876}
877
878static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
0da71265
MN
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
881
a9c9a240 882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
0da71265
MN
883
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
115329f1 887
0da71265
MN
888 *mx = *my = 0;
889 return;
890 }
115329f1 891
0da71265
MN
892 pred_motion(h, 0, 4, 0, 0, mx, my);
893
894 return;
895}
896
8b1fd554
MN
897static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
901 return 256;
902 }else{
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
906 }
907}
908
5ad984c9 909static inline void direct_dist_scale_factor(H264Context * const h){
2879c75f
MN
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
5ad984c9 912 const int poc1 = h->ref_list[1][0].poc;
8b1fd554
MN
913 int i, field;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
5ad984c9 919 }
8b1fd554
MN
920
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
5d18eaad 923 }
5ad984c9 924}
f4d3382d
MN
925
926static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
933
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
936
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
940
941 if (!interl)
942 poc |= 3;
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
945
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
950 if(rfield == field)
951 map[list][old_ref] = cur_ref;
952 break;
953 }
954 }
955 }
956 }
957}
958
2f944356
LM
959static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
bbc78fb4 963 int list, j, field;
f4d3382d
MN
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
aa617518 966
2f944356 967 for(list=0; list<2; list++){
2879c75f 968 cur->ref_count[sidx][list] = h->ref_count[list];
2f944356 969 for(j=0; j<h->ref_count[list]; j++)
42de393d 970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
2f944356 971 }
aa617518 972
7762cc3d 973 if(s->picture_structure == PICT_FRAME){
f4d3382d
MN
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
7762cc3d 976 }
aa617518 977
48e025e5 978 cur->mbaff= FRAME_MBAFF;
aa617518 979
9701840b 980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
2f944356 981 return;
aa617518 982
2f944356 983 for(list=0; list<2; list++){
f4d3382d
MN
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
2f944356
LM
987 }
988}
5ad984c9
LM
989
990static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
d00eac6c
MN
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
995 int mb_type_col[2];
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
5ad984c9 998 const int is_b8x8 = IS_8X8(*mb_type);
88e7a4d1 999 unsigned int sub_mb_type;
5ad984c9
LM
1000 int i8, i4;
1001
5d18eaad 1002#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
d00eac6c
MN
1003
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
53c193a9 1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
471341a7
MN
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1010 b8_stride = 0;
60c9b24d 1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
d00eac6c
MN
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1014 }
1015 goto single_col;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1021 b8_stride *= 3;
1022 b4_stride *= 6;
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1026 && !is_b8x8){
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1029 }else{
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1032 }
1033 }else{ // AFR/FR -> AFR/FR
1034single_col:
1035 mb_type_col[0] =
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
cc615d2c
MN
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1045 }else{
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1048 }
d00eac6c 1049 }
5ad984c9 1050 }
5ad984c9 1051
7d54ecc9
MN
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
9b5fab91
MN
1056 if(!b8_stride){
1057 if(s->mb_y&1){
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1062 }
d00eac6c 1063 }
115329f1 1064
5ad984c9
LM
1065 if(h->direct_spatial_mv_pred){
1066 int ref[2];
1067 int mv[2][2];
1068 int list;
1069
5d18eaad
LM
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1071
5ad984c9
LM
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
9bec77fe 1077 if(refc == PART_NOT_AVAILABLE)
5ad984c9 1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
29d05ebc 1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
5ad984c9
LM
1080 if(ref[list] < 0)
1081 ref[list] = -1;
1082 }
1083
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1088 }else{
1089 for(list=0; list<2; list++){
1090 if(ref[list] >= 0)
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1092 else
1093 mv[list][0] = mv[list][1] = 0;
1094 }
1095 }
1096
1097 if(ref[1] < 0){
50b3ab0f
LM
1098 if(!is_b8x8)
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
5ad984c9 1101 }else if(ref[0] < 0){
50b3ab0f
LM
1102 if(!is_b8x8)
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
5ad984c9
LM
1105 }
1106
d00eac6c 1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
50b3ab0f
LM
1108 for(i8=0; i8<4; i8++){
1109 int x8 = i8&1;
1110 int y8 = i8>>1;
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1113 int a=0, b=0;
1114
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1116 continue;
1117 h->sub_mb_type[i8] = sub_mb_type;
1118
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
d00eac6c 1121 if(!IS_INTRA(mb_type_col[y8])
50b3ab0f
LM
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1124 if(ref[0] > 0)
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 if(ref[1] > 0)
1127 b= pack16to32(mv[1][0],mv[1][1]);
1128 }else{
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1131 }
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1134 }
1135 }else if(IS_16X16(*mb_type)){
d19f5acb
MN
1136 int a=0, b=0;
1137
cec93959
LM
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
d00eac6c 1140 if(!IS_INTRA(mb_type_col[0])
c26abfa5
DB
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
bf4e3bd2 1143 && (h->x264_build>33 || !h->x264_build)))){
5ad984c9 1144 if(ref[0] > 0)
d19f5acb 1145 a= pack16to32(mv[0][0],mv[0][1]);
5ad984c9 1146 if(ref[1] > 0)
d19f5acb 1147 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1148 }else{
d19f5acb
MN
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
5ad984c9 1151 }
d19f5acb
MN
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
5ad984c9
LM
1154 }else{
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
115329f1 1158
5ad984c9
LM
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1160 continue;
1161 h->sub_mb_type[i8] = sub_mb_type;
115329f1 1162
5ad984c9
LM
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
cec93959
LM
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
115329f1 1167
5ad984c9 1168 /* col_zero_flag */
2ccd25d0
MN
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
bf4e3bd2 1171 && (h->x264_build>33 || !h->x264_build)))){
2ccd25d0 1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
f1f17e54 1173 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
c26abfa5 1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
f1f17e54
LM
1176 if(ref[0] == 0)
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1178 if(ref[1] == 0)
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1180 }
1181 }else
5ad984c9 1182 for(i4=0; i4<4; i4++){
2ccd25d0 1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
c26abfa5 1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
5ad984c9
LM
1185 if(ref[0] == 0)
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1187 if(ref[1] == 0)
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1189 }
1190 }
1191 }
1192 }
1193 }
1194 }else{ /* direct temporal mv pred */
5d18eaad
LM
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
f4d3382d 1197 int ref_offset= 0;
5d18eaad 1198
cc615d2c 1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
8b1fd554
MN
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
cc615d2c 1203 }
48e025e5 1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
f4d3382d 1205 ref_offset += 16;
48e025e5 1206
cc615d2c
MN
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
c210fa61 1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
5d18eaad 1210
cc615d2c
MN
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1214 int ref0, scale;
1215 const int16_t (*l1mv)[2]= l1mv0;
5d18eaad 1216
cc615d2c
MN
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1218 continue;
1219 h->sub_mb_type[i8] = sub_mb_type;
1220
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1226 continue;
1227 }
1228
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1230 if(ref0 >= 0)
f4d3382d 1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
cc615d2c 1232 else{
f4d3382d 1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
cc615d2c
MN
1234 l1mv= l1mv1;
1235 }
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1238
1239 {
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
5d18eaad 1246 }
5d18eaad 1247 }
cc615d2c
MN
1248 return;
1249 }
5d18eaad
LM
1250
1251 /* one-to-one mv scaling */
1252
5ad984c9 1253 if(IS_16X16(*mb_type)){
fda51641
MN
1254 int ref, mv0, mv1;
1255
5ad984c9 1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
d00eac6c 1257 if(IS_INTRA(mb_type_col[0])){
fda51641 1258 ref=mv0=mv1=0;
5ad984c9 1259 }else{
f4d3382d
MN
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
5d18eaad 1262 const int scale = dist_scale_factor[ref0];
8583bef8 1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
5ad984c9 1264 int mv_l0[2];
5d18eaad
LM
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
fda51641
MN
1267 ref= ref0;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
5ad984c9 1270 }
fda51641
MN
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
5ad984c9
LM
1274 }else{
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
5d18eaad 1278 int ref0, scale;
bf4e3bd2 1279 const int16_t (*l1mv)[2]= l1mv0;
8583bef8 1280
5ad984c9
LM
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1282 continue;
1283 h->sub_mb_type[i8] = sub_mb_type;
5d18eaad 1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
d00eac6c 1285 if(IS_INTRA(mb_type_col[0])){
5ad984c9 1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
5ad984c9
LM
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1289 continue;
1290 }
115329f1 1291
f4d3382d 1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
2f944356 1293 if(ref0 >= 0)
5d18eaad 1294 ref0 = map_col_to_list0[0][ref0];
8583bef8 1295 else{
f4d3382d 1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
8583bef8
MN
1297 l1mv= l1mv1;
1298 }
5d18eaad 1299 scale = dist_scale_factor[ref0];
115329f1 1300
5ad984c9 1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
f1f17e54 1302 if(IS_SUB_8X8(sub_mb_type)){
2ccd25d0 1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
5d18eaad
LM
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
f1f17e54
LM
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1308 }else
5ad984c9 1309 for(i4=0; i4<4; i4++){
2ccd25d0 1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
5ad984c9 1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
5d18eaad
LM
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
5ad984c9
LM
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1316 }
1317 }
1318 }
1319 }
1320}
1321
0da71265
MN
1322static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
0da71265
MN
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1326 int list;
1327
2ea39252
LM
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1330
3425501d 1331 for(list=0; list<h->list_count; list++){
0da71265 1332 int y;
53b19144 1333 if(!USES_LIST(mb_type, list))
5ad984c9 1334 continue;
115329f1 1335
0da71265
MN
1336 for(y=0; y<4; y++){
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1339 }
9e528114 1340 if( h->pps.cabac ) {
e6e77eb6
LM
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1343 else
9e528114
LA
1344 for(y=0; y<4; y++){
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1347 }
1348 }
53b19144
LM
1349
1350 {
191e8ca7 1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
53b19144
LM
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
0da71265
MN
1356 }
1357 }
115329f1 1358
9f5c1037 1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
5ad984c9 1360 if(IS_8X8(mb_type)){
53b19144
LM
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
5ad984c9
LM
1365 }
1366 }
0da71265
MN
1367}
1368
1369/**
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
3b66c4c5 1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
115329f1 1374 * @returns decoded bytes, might be src+1 if no escapes
0da71265 1375 */
30317501 1376static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
1377 int i, si, di;
1378 uint8_t *dst;
24456882 1379 int bufidx;
0da71265 1380
bb270c08 1381// src[0]&0x80; //forbidden bit
0da71265
MN
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1384
1385 src++; length--;
115329f1 1386#if 0
0da71265
MN
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1389#endif
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1394 if(src[i+2]!=3){
1395 /* startcode, so we must be past the end */
1396 length=i;
1397 }
1398 break;
1399 }
1400 }
1401
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
115329f1 1405 return src;
0da71265
MN
1406 }
1407
24456882
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
0da71265 1411
ac658be5
FOL
1412 if (dst == NULL){
1413 return NULL;
1414 }
1415
3b66c4c5 1416//printf("decoding esc\n");
0da71265 1417 si=di=0;
115329f1 1418 while(si<length){
0da71265
MN
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1422 dst[di++]= 0;
1423 dst[di++]= 0;
1424 si+=3;
c8470cc1 1425 continue;
0da71265
MN
1426 }else //next start code
1427 break;
1428 }
1429
1430 dst[di++]= src[si++];
1431 }
1432
1433 *dst_length= di;
1434 *consumed= si + 1;//+1 for the header
90b5b51e 1435//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
1436 return dst;
1437}
1438
0da71265
MN
1439/**
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1442 */
30317501 1443static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
1444 int v= *src;
1445 int r;
1446
a9c9a240 1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
1448
1449 for(r=1; r<9; r++){
1450 if(v&1) return r;
1451 v>>=1;
1452 }
1453 return 0;
1454}
1455
1456/**
1412060e 1457 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
1458 * @param qp quantization parameter
1459 */
239ea04c 1460static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1461#define stride 16
1462 int i;
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1466
1467//memset(block, 64, 2*256);
1468//return;
1469 for(i=0; i<4; i++){
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1475
1476 temp[4*i+0]= z0+z3;
1477 temp[4*i+1]= z1+z2;
1478 temp[4*i+2]= z1-z2;
1479 temp[4*i+3]= z0-z3;
1480 }
1481
1482 for(i=0; i<4; i++){
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1488
1412060e 1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
1493 }
1494}
1495
e5017ab8 1496#if 0
0da71265 1497/**
1412060e 1498 * DCT transforms the 16 dc values.
0da71265
MN
1499 * @param qp quantization parameter ??? FIXME
1500 */
1501static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502// const int qmul= dequant_coeff[qp][0];
1503 int i;
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1507
1508 for(i=0; i<4; i++){
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1514
1515 temp[4*i+0]= z0+z3;
1516 temp[4*i+1]= z1+z2;
1517 temp[4*i+2]= z1-z2;
1518 temp[4*i+3]= z0-z3;
1519 }
1520
1521 for(i=0; i<4; i++){
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1527
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1532 }
1533}
e5017ab8
LA
1534#endif
1535
0da71265
MN
1536#undef xStride
1537#undef stride
1538
239ea04c 1539static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
1540 const int stride= 16*2;
1541 const int xStride= 16;
1542 int a,b,c,d,e;
1543
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1548
1549 e= a-b;
1550 a= a+b;
1551 b= c-d;
1552 c= c+d;
1553
239ea04c
LM
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
1558}
1559
e5017ab8 1560#if 0
0da71265
MN
1561static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1564 int a,b,c,d,e;
1565
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1570
1571 e= a-b;
1572 a= a+b;
1573 b= c-d;
1574 c= c+d;
1575
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1580}
e5017ab8 1581#endif
0da71265
MN
1582
1583/**
1584 * gets the chroma qp.
1585 */
4691a77d 1586static inline int get_chroma_qp(H264Context *h, int t, int qscale){
5a78bfbd 1587 return h->pps.chroma_qp_table[t][qscale];
0da71265
MN
1588}
1589
0da71265
MN
1590static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1591 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1592 int src_x_offset, int src_y_offset,
1593 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1594 MpegEncContext * const s = &h->s;
1595 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 1596 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 1597 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
1598 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1599 uint8_t * src_cb, * src_cr;
1600 int extra_width= h->emu_edge_width;
1601 int extra_height= h->emu_edge_height;
0da71265
MN
1602 int emu=0;
1603 const int full_mx= mx>>2;
1604 const int full_my= my>>2;
fbd312fd 1605 const int pic_width = 16*s->mb_width;
0d43dd8c 1606 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 1607
1412060e 1608 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
171c4076 1609 return;
115329f1 1610
0da71265
MN
1611 if(mx&7) extra_width -= 3;
1612 if(my&7) extra_height -= 3;
115329f1
DB
1613
1614 if( full_mx < 0-extra_width
1615 || full_my < 0-extra_height
1616 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 1617 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
1618 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1619 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
1620 emu=1;
1621 }
115329f1 1622
5d18eaad 1623 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 1624 if(!square){
5d18eaad 1625 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 1626 }
115329f1 1627
87352549 1628 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 1629
0d43dd8c 1630 if(MB_FIELD){
5d18eaad 1631 // chroma offset when predicting from a field of opposite parity
2143b118 1632 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
1633 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1634 }
1635 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1636 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1637
0da71265 1638 if(emu){
5d18eaad 1639 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1640 src_cb= s->edge_emu_buffer;
1641 }
5d18eaad 1642 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1643
1644 if(emu){
5d18eaad 1645 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1646 src_cr= s->edge_emu_buffer;
1647 }
5d18eaad 1648 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1649}
1650
9f2d1b4f 1651static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
1652 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1653 int x_offset, int y_offset,
1654 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1655 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1656 int list0, int list1){
1657 MpegEncContext * const s = &h->s;
1658 qpel_mc_func *qpix_op= qpix_put;
1659 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 1660
5d18eaad
LM
1661 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1662 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1663 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 1664 x_offset += 8*s->mb_x;
0d43dd8c 1665 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1666
0da71265 1667 if(list0){
1924f3ce 1668 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
1669 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1670 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1671 qpix_op, chroma_op);
1672
1673 qpix_op= qpix_avg;
1674 chroma_op= chroma_avg;
1675 }
1676
1677 if(list1){
1924f3ce 1678 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
1679 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1680 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1681 qpix_op, chroma_op);
1682 }
1683}
1684
9f2d1b4f
LM
1685static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1686 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1687 int x_offset, int y_offset,
1688 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1689 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1690 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1691 int list0, int list1){
1692 MpegEncContext * const s = &h->s;
1693
5d18eaad
LM
1694 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1695 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1696 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 1697 x_offset += 8*s->mb_x;
0d43dd8c 1698 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1699
9f2d1b4f
LM
1700 if(list0 && list1){
1701 /* don't optimize for luma-only case, since B-frames usually
1702 * use implicit weights => chroma too. */
1703 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
1704 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1705 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
1706 int refn0 = h->ref_cache[0][ scan8[n] ];
1707 int refn1 = h->ref_cache[1][ scan8[n] ];
1708
1709 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1710 dest_y, dest_cb, dest_cr,
1711 x_offset, y_offset, qpix_put, chroma_put);
1712 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1713 tmp_y, tmp_cb, tmp_cr,
1714 x_offset, y_offset, qpix_put, chroma_put);
1715
1716 if(h->use_weight == 2){
1717 int weight0 = h->implicit_weight[refn0][refn1];
1718 int weight1 = 64 - weight0;
5d18eaad
LM
1719 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1720 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1721 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 1722 }else{
5d18eaad 1723 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 1724 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 1725 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 1726 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1727 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 1728 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 1729 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1730 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 1731 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
1732 }
1733 }else{
1734 int list = list1 ? 1 : 0;
1735 int refn = h->ref_cache[list][ scan8[n] ];
1736 Picture *ref= &h->ref_list[list][refn];
1737 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1738 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1739 qpix_put, chroma_put);
1740
5d18eaad 1741 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
1742 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1743 if(h->use_weight_chroma){
5d18eaad 1744 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 1745 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 1746 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
1747 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1748 }
1749 }
1750}
1751
1752static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1753 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1754 int x_offset, int y_offset,
1755 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1756 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 1757 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
1758 int list0, int list1){
1759 if((h->use_weight==2 && list0 && list1
1760 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1761 || h->use_weight==1)
1762 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1763 x_offset, y_offset, qpix_put, chroma_put,
1764 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1765 else
1766 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1767 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1768}
1769
513fbd8e
LM
1770static inline void prefetch_motion(H264Context *h, int list){
1771 /* fetch pixels for estimated mv 4 macroblocks ahead
1772 * optimized for 64byte cache lines */
1773 MpegEncContext * const s = &h->s;
1774 const int refn = h->ref_cache[list][scan8[0]];
1775 if(refn >= 0){
1776 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1777 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1778 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 1779 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
1780 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1781 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1782 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1783 }
1784}
1785
0da71265
MN
1786static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1787 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
1788 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1789 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 1790 MpegEncContext * const s = &h->s;
64514ee8 1791 const int mb_xy= h->mb_xy;
0da71265 1792 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 1793
0da71265 1794 assert(IS_INTER(mb_type));
115329f1 1795
513fbd8e
LM
1796 prefetch_motion(h, 0);
1797
0da71265
MN
1798 if(IS_16X16(mb_type)){
1799 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1800 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
9f2d1b4f 1801 &weight_op[0], &weight_avg[0],
0da71265
MN
1802 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1803 }else if(IS_16X8(mb_type)){
1804 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1805 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1806 &weight_op[1], &weight_avg[1],
0da71265
MN
1807 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1808 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1809 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1810 &weight_op[1], &weight_avg[1],
0da71265
MN
1811 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1812 }else if(IS_8X16(mb_type)){
5d18eaad 1813 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 1814 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1815 &weight_op[2], &weight_avg[2],
0da71265 1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 1817 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 1818 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1819 &weight_op[2], &weight_avg[2],
0da71265
MN
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 }else{
1822 int i;
115329f1 1823
0da71265
MN
1824 assert(IS_8X8(mb_type));
1825
1826 for(i=0; i<4; i++){
1827 const int sub_mb_type= h->sub_mb_type[i];
1828 const int n= 4*i;
1829 int x_offset= (i&1)<<2;
1830 int y_offset= (i&2)<<1;
1831
1832 if(IS_SUB_8X8(sub_mb_type)){
1833 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1834 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1835 &weight_op[3], &weight_avg[3],
0da71265
MN
1836 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1837 }else if(IS_SUB_8X4(sub_mb_type)){
1838 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1840 &weight_op[4], &weight_avg[4],
0da71265
MN
1841 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1842 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1843 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1844 &weight_op[4], &weight_avg[4],
0da71265
MN
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 1847 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 1848 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1849 &weight_op[5], &weight_avg[5],
0da71265 1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 1851 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 1852 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1853 &weight_op[5], &weight_avg[5],
0da71265
MN
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1855 }else{
1856 int j;
1857 assert(IS_SUB_4X4(sub_mb_type));
1858 for(j=0; j<4; j++){
1859 int sub_x_offset= x_offset + 2*(j&1);
1860 int sub_y_offset= y_offset + (j&2);
1861 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1862 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1863 &weight_op[6], &weight_avg[6],
0da71265
MN
1864 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1865 }
1866 }
1867 }
1868 }
513fbd8e
LM
1869
1870 prefetch_motion(h, 1);
0da71265
MN
1871}
1872
98a6fff9 1873static av_cold void decode_init_vlc(void){
0da71265
MN
1874 static int done = 0;
1875
1876 if (!done) {
1877 int i;
910e3668 1878 int offset;
0da71265
MN
1879 done = 1;
1880
910e3668
AC
1881 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1882 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
115329f1 1883 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
0da71265 1884 &chroma_dc_coeff_token_len [0], 1, 1,
910e3668
AC
1885 &chroma_dc_coeff_token_bits[0], 1, 1,
1886 INIT_VLC_USE_NEW_STATIC);
0da71265 1887
910e3668 1888 offset = 0;
0da71265 1889 for(i=0; i<4; i++){
910e3668
AC
1890 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1891 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
115329f1 1892 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
0da71265 1893 &coeff_token_len [i][0], 1, 1,
910e3668
AC
1894 &coeff_token_bits[i][0], 1, 1,
1895 INIT_VLC_USE_NEW_STATIC);
1896 offset += coeff_token_vlc_tables_size[i];
0da71265 1897 }
910e3668
AC
1898 /*
1899 * This is a one time safety check to make sure that
1900 * the packed static coeff_token_vlc table sizes
1901 * were initialized correctly.
1902 */
37d3e066 1903 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
0da71265
MN
1904
1905 for(i=0; i<3; i++){
910e3668
AC
1906 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1907 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1908 init_vlc(&chroma_dc_total_zeros_vlc[i],
1909 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
0da71265 1910 &chroma_dc_total_zeros_len [i][0], 1, 1,
910e3668
AC
1911 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1912 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1913 }
1914 for(i=0; i<15; i++){
910e3668
AC
1915 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1916 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1917 init_vlc(&total_zeros_vlc[i],
1918 TOTAL_ZEROS_VLC_BITS, 16,
0da71265 1919 &total_zeros_len [i][0], 1, 1,
910e3668
AC
1920 &total_zeros_bits[i][0], 1, 1,
1921 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1922 }
1923
1924 for(i=0; i<6; i++){
910e3668
AC
1925 run_vlc[i].table = run_vlc_tables[i];
1926 run_vlc[i].table_allocated = run_vlc_tables_size;
1927 init_vlc(&run_vlc[i],
1928 RUN_VLC_BITS, 7,
0da71265 1929 &run_len [i][0], 1, 1,
910e3668
AC
1930 &run_bits[i][0], 1, 1,
1931 INIT_VLC_USE_NEW_STATIC);
0da71265 1932 }
910e3668
AC
1933 run7_vlc.table = run7_vlc_table,
1934 run7_vlc.table_allocated = run7_vlc_table_size;
115329f1 1935 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
0da71265 1936 &run_len [6][0], 1, 1,
910e3668
AC
1937 &run_bits[6][0], 1, 1,
1938 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1939 }
1940}
1941
0da71265 1942static void free_tables(H264Context *h){
7978debd 1943 int i;
afebe2f7 1944 H264Context *hx;
0da71265 1945 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
1946 av_freep(&h->chroma_pred_mode_table);
1947 av_freep(&h->cbp_table);
9e528114
LA
1948 av_freep(&h->mvd_table[0]);
1949 av_freep(&h->mvd_table[1]);
5ad984c9 1950 av_freep(&h->direct_table);
0da71265
MN
1951 av_freep(&h->non_zero_count);
1952 av_freep(&h->slice_table_base);
1953 h->slice_table= NULL;
e5017ab8 1954
0da71265
MN
1955 av_freep(&h->mb2b_xy);
1956 av_freep(&h->mb2b8_xy);
9f2d1b4f 1957
afebe2f7
1958 for(i = 0; i < h->s.avctx->thread_count; i++) {
1959 hx = h->thread_context[i];
1960 if(!hx) continue;
1961 av_freep(&hx->top_borders[1]);
1962 av_freep(&hx->top_borders[0]);
1963 av_freep(&hx->s.obmc_scratchpad);
afebe2f7 1964 }
0da71265
MN
1965}
1966
239ea04c
LM
1967static void init_dequant8_coeff_table(H264Context *h){
1968 int i,q,x;
548a1c8a 1969 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
1970 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1971 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1972
1973 for(i=0; i<2; i++ ){
1974 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1975 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1976 break;
1977 }
1978
1979 for(q=0; q<52; q++){
d9ec210b
DP
1980 int shift = div6[q];
1981 int idx = rem6[q];
239ea04c 1982 for(x=0; x<64; x++)
548a1c8a
LM
1983 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1984 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1985 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
1986 }
1987 }
1988}
1989
1990static void init_dequant4_coeff_table(H264Context *h){
1991 int i,j,q,x;
ab2e3e2c 1992 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
1993 for(i=0; i<6; i++ ){
1994 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1995 for(j=0; j<i; j++){
1996 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1997 h->dequant4_coeff[i] = h->dequant4_buffer[j];
1998 break;
1999 }
2000 }
2001 if(j<i)
2002 continue;
2003
2004 for(q=0; q<52; q++){
d9ec210b
DP
2005 int shift = div6[q] + 2;
2006 int idx = rem6[q];
239ea04c 2007 for(x=0; x<16; x++)
ab2e3e2c
LM
2008 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2009 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
2010 h->pps.scaling_matrix4[i][x]) << shift;
2011 }
2012 }
2013}
2014
2015static void init_dequant_tables(H264Context *h){
2016 int i,x;
2017 init_dequant4_coeff_table(h);
2018 if(h->pps.transform_8x8_mode)
2019 init_dequant8_coeff_table(h);
2020 if(h->sps.transform_bypass){
2021 for(i=0; i<6; i++)
2022 for(x=0; x<16; x++)
2023 h->dequant4_coeff[i][0][x] = 1<<6;
2024 if(h->pps.transform_8x8_mode)
2025 for(i=0; i<2; i++)
2026 for(x=0; x<64; x++)
2027 h->dequant8_coeff[i][0][x] = 1<<6;
2028 }
2029}
2030
2031
0da71265
MN
2032/**
2033 * allocates tables.
3b66c4c5 2034 * needs width/height
0da71265
MN
2035 */
2036static int alloc_tables(H264Context *h){
2037 MpegEncContext * const s = &h->s;
7bc9090a 2038 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 2039 int x,y;
0da71265
MN
2040
2041 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
e5017ab8 2042
53c05b1e 2043 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
b735aeea 2044 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
5d0e4cb8 2045 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
0da71265 2046
7526ade2
MN
2047 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2048 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2049 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2050 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
e5017ab8 2051
b735aeea 2052 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 2053 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 2054
a55f20bd
LM
2055 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2056 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
0da71265
MN
2057 for(y=0; y<s->mb_height; y++){
2058 for(x=0; x<s->mb_width; x++){
7bc9090a 2059 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
2060 const int b_xy = 4*x + 4*y*h->b_stride;
2061 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 2062
0da71265
MN
2063 h->mb2b_xy [mb_xy]= b_xy;
2064 h->mb2b8_xy[mb_xy]= b8_xy;
2065 }
2066 }
9f2d1b4f 2067
9c6221ae
GV
2068 s->obmc_scratchpad = NULL;
2069
56edbd81
LM
2070 if(!h->dequant4_coeff[0])
2071 init_dequant_tables(h);
2072
0da71265
MN
2073 return 0;
2074fail:
2075 free_tables(h);
2076 return -1;
2077}
2078
afebe2f7
2079/**
2080 * Mimic alloc_tables(), but for every context thread.
2081 */
2082static void clone_tables(H264Context *dst, H264Context *src){
2083 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2084 dst->non_zero_count = src->non_zero_count;
2085 dst->slice_table = src->slice_table;
2086 dst->cbp_table = src->cbp_table;
2087 dst->mb2b_xy = src->mb2b_xy;
2088 dst->mb2b8_xy = src->mb2b8_xy;
2089 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2090 dst->mvd_table[0] = src->mvd_table[0];
2091 dst->mvd_table[1] = src->mvd_table[1];
2092 dst->direct_table = src->direct_table;
2093
afebe2f7
2094 dst->s.obmc_scratchpad = NULL;
2095 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
2096}
2097
2098/**
2099 * Init context
2100 * Allocate buffers which are not shared amongst multiple threads.
2101 */
2102static int context_init(H264Context *h){
afebe2f7
2103 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2105
afebe2f7
2106 return 0;
2107fail:
2108 return -1; // free_tables will clean up for us
2109}
2110
98a6fff9 2111static av_cold void common_init(H264Context *h){
0da71265 2112 MpegEncContext * const s = &h->s;
0da71265
MN
2113
2114 s->width = s->avctx->width;
2115 s->height = s->avctx->height;
2116 s->codec_id= s->avctx->codec->id;
115329f1 2117
c92a30bb 2118 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 2119
239ea04c 2120 h->dequant_coeff_pps= -1;
9a41c2c7 2121 s->unrestricted_mv=1;
0da71265 2122 s->decode=1; //FIXME
56edbd81
LM
2123
2124 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2125 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
2126}
2127
98a6fff9 2128static av_cold int decode_init(AVCodecContext *avctx){
0da71265
MN
2129 H264Context *h= avctx->priv_data;
2130 MpegEncContext * const s = &h->s;
2131
3edcacde 2132 MPV_decode_defaults(s);
115329f1 2133
0da71265
MN
2134 s->avctx = avctx;
2135 common_init(h);
2136
2137 s->out_format = FMT_H264;
2138 s->workaround_bugs= avctx->workaround_bugs;
2139
2140 // set defaults
0da71265 2141// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 2142 s->quarter_sample = 1;
0da71265 2143 s->low_delay= 1;
7a9dba3c
MN
2144
2145 if(avctx->codec_id == CODEC_ID_SVQ3)
2146 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2147 else
1d42f410 2148 avctx->pix_fmt= PIX_FMT_YUV420P;
0da71265 2149
c2212338 2150 decode_init_vlc();
115329f1 2151
26165f99
MR
2152 if(avctx->extradata_size > 0 && avctx->extradata &&
2153 *(char *)avctx->extradata == 1){
4770b1b4
RT
2154 h->is_avc = 1;
2155 h->got_avcC = 0;
26165f99
MR
2156 } else {
2157 h->is_avc = 0;
4770b1b4
RT
2158 }
2159
afebe2f7 2160 h->thread_context[0] = h;
18c7be65 2161 h->outputed_poc = INT_MIN;
e4b8f1fa 2162 h->prev_poc_msb= 1<<16;
0da71265
MN
2163 return 0;
2164}
2165
af8aa846 2166static int frame_start(H264Context *h){
0da71265
MN
2167 MpegEncContext * const s = &h->s;
2168 int i;
2169
af8aa846
MN
2170 if(MPV_frame_start(s, s->avctx) < 0)
2171 return -1;
0da71265 2172 ff_er_frame_start(s);
3a22d7fa
JD
2173 /*
2174 * MPV_frame_start uses pict_type to derive key_frame.
2175 * This is incorrect for H.264; IDR markings must be used.
1412060e 2176 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
2177 * See decode_nal_units().
2178 */
2179 s->current_picture_ptr->key_frame= 0;
0da71265
MN
2180
2181 assert(s->linesize && s->uvlinesize);
2182
2183 for(i=0; i<16; i++){
2184 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 2185 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2186 }
2187 for(i=0; i<4; i++){
2188 h->block_offset[16+i]=
2189 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
2190 h->block_offset[24+16+i]=
2191 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
2192 }
2193
934b0821
LM
2194 /* can't be in alloc_tables because linesize isn't known there.
2195 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
2196 for(i = 0; i < s->avctx->thread_count; i++)
2197 if(!h->thread_context[i]->s.obmc_scratchpad)
2198 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
2199
2200 /* some macroblocks will be accessed before they're available */
afebe2f7 2201 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 2202 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 2203
0da71265 2204// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 2205
1412060e 2206 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
2207 // that if we break out due to an error it can be released automatically
2208 // in the next MPV_frame_start().
2209 // SVQ3 as well as most other codecs have only last/next/current and thus
2210 // get released even with set reference, besides SVQ3 and others do not
2211 // mark frames as reference later "naturally".
2212 if(s->codec_id != CODEC_ID_SVQ3)
2213 s->current_picture_ptr->reference= 0;
357282c6
MN
2214
2215 s->current_picture_ptr->field_poc[0]=
2216 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 2217 assert(s->current_picture_ptr->long_ref==0);
357282c6 2218
af8aa846 2219 return 0;
0da71265
MN
2220}
2221
93cc10fa 2222static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e
MN
2223 MpegEncContext * const s = &h->s;
2224 int i;
5f7f9719
MN
2225 int step = 1;
2226 int offset = 1;
2227 int uvoffset= 1;
2228 int top_idx = 1;
2229 int skiplast= 0;
115329f1 2230
53c05b1e
MN
2231 src_y -= linesize;
2232 src_cb -= uvlinesize;
2233 src_cr -= uvlinesize;
2234
5f7f9719
MN
2235 if(!simple && FRAME_MBAFF){
2236 if(s->mb_y&1){
2237 offset = MB_MBAFF ? 1 : 17;
2238 uvoffset= MB_MBAFF ? 1 : 9;
2239 if(!MB_MBAFF){
2240 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2241 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2242 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2243 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2244 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2245 }
2246 }
2247 }else{
2248 if(!MB_MBAFF){
2249 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2250 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2251 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2252 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2253 }
2254 skiplast= 1;
2255 }
2256 offset =
2257 uvoffset=
2258 top_idx = MB_MBAFF ? 0 : 1;
2259 }
2260 step= MB_MBAFF ? 2 : 1;
2261 }
2262
3b66c4c5 2263 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 2264 // and the line above the bottom macroblock
5f7f9719
MN
2265 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2266 for(i=1; i<17 - skiplast; i++){
2267 h->left_border[offset+i*step]= src_y[15+i* linesize];
53c05b1e 2268 }
115329f1 2269
5f7f9719
MN
2270 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2271 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
53c05b1e 2272
87352549 2273 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
2274 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2275 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2276 for(i=1; i<9 - skiplast; i++){
2277 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2278 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
53c05b1e 2279 }
5f7f9719
MN
2280 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2281 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
53c05b1e
MN
2282 }
2283}
2284
93cc10fa 2285static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
2286 MpegEncContext * const s = &h->s;
2287 int temp8, i;
2288 uint64_t temp64;
b69378e2
2289 int deblock_left;
2290 int deblock_top;
2291 int mb_xy;
5f7f9719
MN
2292 int step = 1;
2293 int offset = 1;
2294 int uvoffset= 1;
2295 int top_idx = 1;
2296
2297 if(!simple && FRAME_MBAFF){
2298 if(s->mb_y&1){
2299 offset = MB_MBAFF ? 1 : 17;
2300 uvoffset= MB_MBAFF ? 1 : 9;
2301 }else{
2302 offset =
2303 uvoffset=
2304 top_idx = MB_MBAFF ? 0 : 1;
2305 }
2306 step= MB_MBAFF ? 2 : 1;
2307 }
b69378e2
2308
2309 if(h->deblocking_filter == 2) {
64514ee8 2310 mb_xy = h->mb_xy;
b69378e2
2311 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2312 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2313 } else {
2314 deblock_left = (s->mb_x > 0);
6c805007 2315 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 2316 }
53c05b1e
MN
2317
2318 src_y -= linesize + 1;
2319 src_cb -= uvlinesize + 1;
2320 src_cr -= uvlinesize + 1;
2321
2322#define XCHG(a,b,t,xchg)\
2323t= a;\
2324if(xchg)\
2325 a= b;\
2326b= t;
d89dc06a
LM
2327
2328 if(deblock_left){
5f7f9719
MN
2329 for(i = !deblock_top; i<16; i++){
2330 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
d89dc06a 2331 }
5f7f9719 2332 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
d89dc06a
LM
2333 }
2334
2335 if(deblock_top){
5f7f9719
MN
2336 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2337 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
cad4368a 2338 if(s->mb_x+1 < s->mb_width){
5f7f9719 2339 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
43efd19a 2340 }
53c05b1e 2341 }
53c05b1e 2342
87352549 2343 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 2344 if(deblock_left){
5f7f9719
MN
2345 for(i = !deblock_top; i<8; i++){
2346 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2347 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
d89dc06a 2348 }
5f7f9719
MN
2349 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2350 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
d89dc06a
LM
2351 }
2352 if(deblock_top){
5f7f9719
MN
2353 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2354 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
53c05b1e 2355 }
53c05b1e
MN
2356 }
2357}
2358
5a6a6cc7 2359static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
2360 MpegEncContext * const s = &h->s;
2361 const int mb_x= s->mb_x;
2362 const int mb_y= s->mb_y;
64514ee8 2363 const int mb_xy= h->mb_xy;
0da71265
MN
2364 const int mb_type= s->current_picture.mb_type[mb_xy];
2365 uint8_t *dest_y, *dest_cb, *dest_cr;
2366 int linesize, uvlinesize /*dct_offset*/;
2367 int i;
6867a90b 2368 int *block_offset = &h->block_offset[0];
41e4055b
MN
2369 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2370 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
36940eca 2371 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 2372 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 2373
6120a343
MN
2374 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2375 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2376 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 2377
a957c27b
LM
2378 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2379 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2380
bd91fee3 2381 if (!simple && MB_FIELD) {
5d18eaad
LM
2382 linesize = h->mb_linesize = s->linesize * 2;
2383 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 2384 block_offset = &h->block_offset[24];
1412060e 2385 if(mb_y&1){ //FIXME move out of this function?
0da71265 2386 dest_y -= s->linesize*15;
6867a90b
LLL
2387 dest_cb-= s->uvlinesize*7;
2388 dest_cr-= s->uvlinesize*7;
0da71265 2389 }
5d18eaad
LM
2390 if(FRAME_MBAFF) {
2391 int list;
3425501d 2392 for(list=0; list<h->list_count; list++){
5d18eaad
LM
2393 if(!USES_LIST(mb_type, list))
2394 continue;
2395 if(IS_16X16(mb_type)){
2396 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 2397 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
2398 }else{
2399 for(i=0; i<16; i+=4){
5d18eaad
LM
2400 int ref = h->ref_cache[list][scan8[i]];
2401 if(ref >= 0)
1710856c 2402 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
2403 }
2404 }
2405 }
2406 }
0da71265 2407 } else {
5d18eaad
LM
2408 linesize = h->mb_linesize = s->linesize;
2409 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
2410// dct_offset = s->linesize * 16;
2411 }
115329f1 2412
bd91fee3 2413 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
2414 for (i=0; i<16; i++) {
2415 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 2416 }
c1708e8d
MN
2417 for (i=0; i<8; i++) {
2418 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2419 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 2420 }
e7e09b49
LLL
2421 } else {
2422 if(IS_INTRA(mb_type)){
5f7f9719 2423 if(h->deblocking_filter)
93cc10fa 2424 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 2425
87352549 2426 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
2427 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2428 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 2429 }
0da71265 2430
e7e09b49 2431 if(IS_INTRA4x4(mb_type)){
bd91fee3 2432 if(simple || !s->encoding){
43efd19a 2433 if(IS_8x8DCT(mb_type)){
1eb96035
MN
2434 if(transform_bypass){
2435 idct_dc_add =
2436 idct_add = s->dsp.add_pixels8;
dae006d7 2437 }else{
1eb96035
MN
2438 idct_dc_add = s->dsp.h264_idct8_dc_add;
2439 idct_add = s->dsp.h264_idct8_add;
2440 }
43efd19a
LM
2441 for(i=0; i<16; i+=4){
2442 uint8_t * const ptr= dest_y + block_offset[i];
2443 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
2444 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2445 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2446 }else{
ac0623b2
MN
2447 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2448 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2449 (h->topright_samples_available<<i)&0x4000, linesize);
2450 if(nnz){
2451 if(nnz == 1 && h->mb[i*16])
2452 idct_dc_add(ptr, h->mb + i*16, linesize);
2453 else
2454 idct_add (ptr, h->mb + i*16, linesize);
2455 }
41e4055b 2456 }
43efd19a 2457 }
1eb96035
MN
2458 }else{
2459 if(transform_bypass){
2460 idct_dc_add =
2461 idct_add = s->dsp.add_pixels4;
2462 }else{
2463 idct_dc_add = s->dsp.h264_idct_dc_add;
2464 idct_add = s->dsp.h264_idct_add;
2465 }
e7e09b49 2466 for(i=0; i<16; i++){
6867a90b 2467 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49 2468 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 2469
41e4055b
MN
2470 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2471 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2472 }else{
ac0623b2
MN
2473 uint8_t *topright;
2474 int nnz, tr;
2475 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2476 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2477 assert(mb_y || linesize <= block_offset[i]);
2478 if(!topright_avail){
2479 tr= ptr[3 - linesize]*0x01010101;
2480 topright= (uint8_t*) &tr;
2481 }else
2482 topright= ptr + 4 - linesize;
115329f1 2483 }else
ac0623b2 2484 topright= NULL;
e7e09b49 2485
ac0623b2
MN
2486 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2487 nnz = h->non_zero_count_cache[ scan8[i] ];
2488 if(nnz){
2489 if(is_h264){
2490 if(nnz == 1 && h->mb[i*16])
2491 idct_dc_add(ptr, h->mb + i*16, linesize);
2492 else
2493 idct_add (ptr, h->mb + i*16, linesize);
2494 }else
2495 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2496 }
41e4055b 2497 }
8b82a956 2498 }
1eb96035 2499 }
0da71265 2500 }
e7e09b49 2501 }else{
c92a30bb 2502 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 2503 if(is_h264){
36940eca 2504 if(!transform_bypass)
93f0c0a4 2505 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 2506 }else
e7e09b49 2507 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 2508 }
5f7f9719 2509 if(h->deblocking_filter)
93cc10fa 2510 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 2511 }else if(is_h264){
e7e09b49 2512 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
2513 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2514 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 2515 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 2516 }
e7e09b49
LLL
2517
2518
2519 if(!IS_INTRA4x4(mb_type)){
bd91fee3 2520 if(is_h264){
ef9d1d15 2521 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
2522 if(transform_bypass){
2523 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
2524 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2525 }else{
2526 for(i=0; i<16; i++){
2527 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 2528 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2529 }
2fd1f0e0
MN
2530 }
2531 }else{
2532 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 2533 }
49c084a7 2534 }else if(h->cbp&15){
2fd1f0e0 2535 if(transform_bypass){
0a8ca22f 2536 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 2537 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 2538 for(i=0; i<16; i+=di){
62bc966f 2539 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 2540 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 2541 }
ef9d1d15 2542 }
2fd1f0e0
MN
2543 }else{
2544 if(IS_8x8DCT(mb_type)){
2545 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2546 }else{
2547 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2548 }
2549 }
4704097a 2550 }
e7e09b49
LLL
2551 }else{
2552 for(i=0; i<16; i++){
2553 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 2554 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
2555 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2556 }
4704097a 2557 }
0da71265
MN
2558 }
2559 }
0da71265 2560
621561cd 2561 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
2562 uint8_t *dest[2] = {dest_cb, dest_cr};
2563 if(transform_bypass){
96465b90
MN
2564 idct_add = s->dsp.add_pixels4;
2565 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2566 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2567 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2568 }else{
2569 for(i=16; i<16+8; i++){
2570 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2571 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2572 }
2573 }
ef9d1d15
LM
2574 }else{
2575 idct_add = s->dsp.h264_idct_add;
2576 idct_dc_add = s->dsp.h264_idct_dc_add;
4691a77d
2577 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2578 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
bd91fee3 2579 if(is_h264){
ac0623b2
MN
2580 for(i=16; i<16+8; i++){
2581 if(h->non_zero_count_cache[ scan8[i] ])
2582 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2583 else if(h->mb[i*16])
2584 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2585 }
e7e09b49 2586 }else{
ef9d1d15 2587 for(i=16; i<16+8; i++){
e7e09b49 2588 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
ef9d1d15 2589 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
e7e09b49
LLL
2590 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2591 }
4704097a 2592 }
96465b90 2593 }
0da71265
MN
2594 }
2595 }
2596 }
53c05b1e 2597 if(h->deblocking_filter) {
5f7f9719
MN
2598 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2599 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2600 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2601 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
bd91fee3 2602 if (!simple && FRAME_MBAFF) {
5f7f9719 2603 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2604 } else {
3e20143e 2605 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2606 }
53c05b1e 2607 }
0da71265
MN
2608}
2609
0da71265 2610/**
bd91fee3
AS
2611 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2612 */
2613static void hl_decode_mb_simple(H264Context *h){
2614 hl_decode_mb_internal(h, 1);
2615}
2616
2617/**
2618 * Process a macroblock; this handles edge cases, such as interlacing.
2619 */
2620static void av_noinline hl_decode_mb_complex(H264Context *h){
2621 hl_decode_mb_internal(h, 0);
2622}
2623
2624static void hl_decode_mb(H264Context *h){
2625 MpegEncContext * const s = &h->s;
64514ee8 2626 const int mb_xy= h->mb_xy;
bd91fee3 2627 const int mb_type= s->current_picture.mb_type[mb_xy];
1dd488e9 2628 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 2629
fedec603 2630 if(ENABLE_H264_ENCODER && !s->decode)
bd91fee3
AS
2631 return;
2632
2633 if (is_complex)
2634 hl_decode_mb_complex(h);
2635 else hl_decode_mb_simple(h);
2636}
2637
2143b118 2638static void pic_as_field(Picture *pic, const int parity){
11cc1d8c
JD
2639 int i;
2640 for (i = 0; i < 4; ++i) {
2143b118 2641 if (parity == PICT_BOTTOM_FIELD)
11cc1d8c 2642 pic->data[i] += pic->linesize[i];
2143b118 2643 pic->reference = parity;
11cc1d8c
JD
2644 pic->linesize[i] *= 2;
2645 }
2879c75f 2646 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
11cc1d8c
JD
2647}
2648
2649static int split_field_copy(Picture *dest, Picture *src,
2650 int parity, int id_add){
2651 int match = !!(src->reference & parity);
2652
2653 if (match) {
2654 *dest = *src;
d4f7d838 2655 if(parity != PICT_FRAME){
b3e93fd4
MN
2656 pic_as_field(dest, parity);
2657 dest->pic_id *= 2;
2658 dest->pic_id += id_add;
d4f7d838 2659 }
11cc1d8c
JD
2660 }
2661
2662 return match;
2663}
2664
d4f7d838
MN
2665static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2666 int i[2]={0};
2667 int index=0;
11cc1d8c 2668
d4f7d838
MN
2669 while(i[0]<len || i[1]<len){
2670 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2671 i[0]++;
2672 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2673 i[1]++;
2674 if(i[0] < len){
2675 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2676 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2677 }
2678 if(i[1] < len){
2679 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2680 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
11cc1d8c
JD
2681 }
2682 }
2683
d4f7d838 2684 return index;
11cc1d8c
JD
2685}
2686
d4f7d838
MN
2687static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2688 int i, best_poc;
2689 int out_i= 0;
11cc1d8c 2690
d4f7d838
MN
2691 for(;;){
2692 best_poc= dir ? INT_MIN : INT_MAX;
11cc1d8c 2693
d4f7d838
MN
2694 for(i=0; i<len; i++){
2695 const int poc= src[i]->poc;
2696 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2697 best_poc= poc;
2698 sorted[out_i]= src[i];
2699 }
2700 }
2701 if(best_poc == (dir ? INT_MIN : INT_MAX))
2702 break;
2703 limit= sorted[out_i++]->poc - dir;
2704 }
2705 return out_i;
11cc1d8c
JD
2706}
2707
bd91fee3 2708/**
0da71265
MN
2709 * fills the default_ref_list.
2710 */
2711static int fill_default_ref_list(H264Context *h){
2712 MpegEncContext * const s = &h->s;
d4f7d838 2713 int i, len;
115329f1 2714
9f5c1037 2715 if(h->slice_type_nos==FF_B_TYPE){
d4f7d838
MN
2716 Picture *sorted[32];
2717 int cur_poc, list;
2718 int lens[2];
11cc1d8c 2719
d4f7d838
MN
2720 if(FIELD_PICTURE)
2721 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2722 else
2723 cur_poc= s->current_picture_ptr->poc;
086acdd5 2724
d4f7d838
MN
2725 for(list= 0; list<2; list++){
2726 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2727 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2728 assert(len<=32);
2729 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2730 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2731 assert(len<=32);
086acdd5 2732
d4f7d838
MN
2733 if(len < h->ref_count[list])
2734 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2735 lens[list]= len;
086acdd5
JD
2736 }
2737
d4f7d838
MN
2738 if(lens[0] == lens[1] && lens[1] > 1){
2739 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2740 if(i == lens[0])
2741 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
086acdd5 2742 }
086acdd5 2743 }else{
d4f7d838
MN
2744 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2745 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2746 assert(len <= 32);
2747 if(len < h->ref_count[0])
2748 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
0da71265 2749 }
827c91bf
LLL
2750#ifdef TRACE
2751 for (i=0; i<h->ref_count[0]; i++) {
a9c9a240 2752 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
827c91bf 2753 }
9f5c1037 2754 if(h->slice_type_nos==FF_B_TYPE){
827c91bf 2755 for (i=0; i<h->ref_count[1]; i++) {
ffbc5e04 2756 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
827c91bf
LLL
2757 }
2758 }
2759#endif
0da71265
MN
2760 return 0;
2761}
2762
827c91bf
LLL
2763static void print_short_term(H264Context *h);
2764static void print_long_term(H264Context *h);
2765
949da388
JD
2766/**
2767 * Extract structure information about the picture described by pic_num in
2768 * the current decoding context (frame or field). Note that pic_num is
2769 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2770 * @param pic_num picture number for which to extract structure information
2771 * @param structure one of PICT_XXX describing structure of picture
2772 * with pic_num
2773 * @return frame number (short term) or long term index of picture
2774 * described by pic_num
2775 */
2776static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2777 MpegEncContext * const s = &h->s;
2778
2779 *structure = s->picture_structure;
2780 if(FIELD_PICTURE){
2781 if (!(pic_num & 1))
2782 /* opposite field */
2783 *structure ^= PICT_FRAME;
2784 pic_num >>= 1;
2785 }
2786
2787 return pic_num;
2788}
2789
0da71265
MN
2790static int decode_ref_pic_list_reordering(H264Context *h){
2791 MpegEncContext * const s = &h->s;
949da388 2792 int list, index, pic_structure;
115329f1 2793
827c91bf
LLL
2794 print_short_term(h);
2795 print_long_term(h);
115329f1 2796
3425501d 2797 for(list=0; list<h->list_count; list++){
0da71265
MN
2798 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2799
2800 if(get_bits1(&s->gb)){
2801 int pred= h->curr_pic_num;
0da71265
MN
2802
2803 for(index=0; ; index++){
88e7a4d1
MN
2804 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2805 unsigned int pic_id;
0da71265 2806 int i;
2f944356 2807 Picture *ref = NULL;
115329f1
DB
2808
2809 if(reordering_of_pic_nums_idc==3)
0bc42cad 2810 break;
115329f1 2811
0da71265 2812 if(index >= h->ref_count[list]){
9b879566 2813 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
0da71265
MN
2814 return -1;
2815 }
115329f1 2816
0da71265
MN
2817 if(reordering_of_pic_nums_idc<3){
2818 if(reordering_of_pic_nums_idc<2){
88e7a4d1 2819 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
949da388 2820 int frame_num;
0da71265 2821
03d3cab8 2822 if(abs_diff_pic_num > h->max_pic_num){
9b879566 2823 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
0da71265
MN
2824 return -1;
2825 }
2826
2827 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2828 else pred+= abs_diff_pic_num;
2829 pred &= h->max_pic_num - 1;
115329f1 2830
949da388
JD
2831 frame_num = pic_num_extract(h, pred, &pic_structure);
2832
0d175622
MN
2833 for(i= h->short_ref_count-1; i>=0; i--){
2834 ref = h->short_ref[i];
949da388 2835 assert(ref->reference);
0d175622 2836 assert(!ref->long_ref);
6edac8e1 2837 if(
af8c5e08
MN
2838 ref->frame_num == frame_num &&
2839 (ref->reference & pic_structure)
6edac8e1 2840 )
0da71265
MN
2841 break;
2842 }
0d175622 2843 if(i>=0)
949da388 2844 ref->pic_id= pred;
0da71265 2845 }else{
949da388 2846 int long_idx;
0da71265 2847 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
949da388
JD
2848
2849 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2850
2851 if(long_idx>31){
88e7a4d1
MN
2852 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2853 return -1;
2854 }
949da388
JD
2855 ref = h->long_ref[long_idx];
2856 assert(!(ref && !ref->reference));
af8c5e08 2857 if(ref && (ref->reference & pic_structure)){
ac658be5 2858 ref->pic_id= pic_id;
ac658be5
FOL
2859 assert(ref->long_ref);
2860 i=0;
2861 }else{
2862 i=-1;
2863 }
0da71265
MN
2864 }
2865
0d315f28 2866 if (i < 0) {
9b879566 2867 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
0da71265 2868 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
0d175622
MN
2869 } else {
2870 for(i=index; i+1<h->ref_count[list]; i++){
2871 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2872 break;
21be92bf
MN
2873 }
2874 for(; i > index; i--){
2875 h->ref_list[list][i]= h->ref_list[list][i-1];
2876 }
0d175622 2877 h->ref_list[list][index]= *ref;
949da388 2878 if (FIELD_PICTURE){
2143b118 2879 pic_as_field(&h->ref_list[list][index], pic_structure);
949da388 2880 }
0da71265 2881 }
0bc42cad 2882 }else{
9b879566 2883 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
0da71265
MN
2884 return -1;
2885 }
2886 }
2887 }
0da71265 2888 }
3425501d 2889 for(list=0; list<h->list_count; list++){
6ab87211 2890 for(index= 0; index < h->ref_count[list]; index++){
79b5c776
MN
2891 if(!h->ref_list[list][index].data[0]){
2892 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2893 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2894 }
6ab87211 2895 }
6ab87211 2896 }
115329f1 2897
115329f1 2898 return 0;
0da71265
MN
2899}
2900
91c58c94 2901static void fill_mbaff_ref_list(H264Context *h){
5d18eaad 2902 int list, i, j;
3425501d 2903 for(list=0; list<2; list++){ //FIXME try list_count
5d18eaad
LM
2904 for(i=0; i<h->ref_count[list]; i++){
2905 Picture *frame = &h->ref_list[list][i];
2906 Picture *field = &h->ref_list[list][16+2*i];
2907 field[0] = *frame;
2908 for(j=0; j<3; j++)
2909 field[0].linesize[j] <<= 1;
2143b118 2910 field[0].reference = PICT_TOP_FIELD;
078f42dd 2911 field[0].poc= field[0].field_poc[0];
5d18eaad
LM
2912 field[1] = field[0];
2913 for(j=0; j<3; j++)
2914 field[1].data[j] += frame->linesize[j];
2143b118 2915 field[1].reference = PICT_BOTTOM_FIELD;
078f42dd 2916 field[1].poc= field[1].field_poc[1];
5d18eaad
LM
2917
2918 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2919 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2920 for(j=0; j<2; j++){
2921 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2922 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2923 }
2924 }
2925 }
2926 for(j=0; j<h->ref_count[1]; j++){
2927 for(i=0; i<h->ref_count[0]; i++)
2928 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2929 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2930 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2931 }
2932}
2933
0da71265
MN
2934static int pred_weight_table(H264Context *h){
2935 MpegEncContext * const s = &h->s;
2936 int list, i;
9f2d1b4f 2937 int luma_def, chroma_def;
115329f1 2938
9f2d1b4f
LM
2939 h->use_weight= 0;
2940 h->use_weight_chroma= 0;
0da71265
MN
2941 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2942 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
2943 luma_def = 1<<h->luma_log2_weight_denom;
2944 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
2945
2946 for(list=0; list<2; list++){
2947 for(i=0; i<h->ref_count[list]; i++){
2948 int luma_weight_flag, chroma_weight_flag;
115329f1 2949
0da71265
MN
2950 luma_weight_flag= get_bits1(&s->gb);
2951 if(luma_weight_flag){
2952 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2953 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f
LM
2954 if( h->luma_weight[list][i] != luma_def
2955 || h->luma_offset[list][i] != 0)
2956 h->use_weight= 1;
2957 }else{
2958 h->luma_weight[list][i]= luma_def;
2959 h->luma_offset[list][i]= 0;
0da71265
MN
2960 }
2961
0af6967e 2962 if(CHROMA){
fef744d4
MN
2963 chroma_weight_flag= get_bits1(&s->gb);
2964 if(chroma_weight_flag){
2965 int j;
2966 for(j=0; j<2; j++){
2967 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2968 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2969 if( h->chroma_weight[list][i][j] != chroma_def
2970 || h->chroma_offset[list][i][j] != 0)
2971 h->use_weight_chroma= 1;
2972 }
2973 }else{
2974 int j;
2975 for(j=0; j<2; j++){
2976 h->chroma_weight[list][i][j]= chroma_def;
2977 h->chroma_offset[list][i][j]= 0;
2978 }
0da71265
MN
2979 }
2980 }
2981 }
9f5c1037 2982 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 2983 }
9f2d1b4f 2984 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
2985 return 0;
2986}
2987
9f2d1b4f
LM
2988static void implicit_weight_table(H264Context *h){
2989 MpegEncContext * const s = &h->s;
9f2d1b4f
LM
2990 int ref0, ref1;
2991 int cur_poc = s->current_picture_ptr->poc;
2992
2993 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2994 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2995 h->use_weight= 0;
2996 h->use_weight_chroma= 0;
2997 return;
2998 }
2999
3000 h->use_weight= 2;
3001 h->use_weight_chroma= 2;
3002 h->luma_log2_weight_denom= 5;
3003 h->chroma_log2_weight_denom= 5;
3004
9f2d1b4f
LM
3005 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3006 int poc0 = h->ref_list[0][ref0].poc;
3007 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 3008 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 3009 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 3010 if(td){
f66e4f5f 3011 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 3012 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 3013 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
3014 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3015 h->implicit_weight[ref0][ref1] = 32;
3016 else
3017 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3018 }else
3019 h->implicit_weight[ref0][ref1] = 32;
3020 }
3021 }
3022}
3023
8fd57a66
JD
3024/**
3025 * Mark a picture as no longer needed for reference. The refmask
3026 * argument allows unreferencing of individual fields or the whole frame.
3027 * If the picture becomes entirely unreferenced, but is being held for
3028 * display purposes, it is marked as such.
3029 * @param refmask mask of fields to unreference; the mask is bitwise
3030 * anded with the reference marking of pic
3031 * @return non-zero if pic becomes entirely unreferenced (except possibly
3032 * for display purposes) zero if one of the fields remains in
3033 * reference
3034 */
3035static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
4e4d983e 3036 int i;
8fd57a66
JD
3037 if (pic->reference &= refmask) {
3038 return 0;
3039 } else {
79f4494a
MN
3040 for(i = 0; h->delayed_pic[i]; i++)
3041 if(pic == h->delayed_pic[i]){
3042 pic->reference=DELAYED_PIC_REF;
3043 break;
3044 }
8fd57a66
JD
3045 return 1;
3046 }
4e4d983e
LM
3047}
3048
0da71265 3049/**
5175b937 3050 * instantaneous decoder refresh.
0da71265
MN
3051 */
3052static void idr(H264Context *h){
4e4d983e 3053 int i;
0da71265 3054
dc032f33 3055 for(i=0; i<16; i++){
9c0e4624 3056 remove_long(h, i, 0);
0da71265 3057 }
849b9cef 3058 assert(h->long_ref_count==0);
0da71265
MN
3059
3060 for(i=0; i<h->short_ref_count; i++){
8fd57a66 3061 unreference_pic(h, h->short_ref[i], 0);
0da71265
MN
3062 h->short_ref[i]= NULL;
3063 }
3064 h->short_ref_count=0;
a149c1a5 3065 h->prev_frame_num= 0;
80f8e035
MN
3066 h->prev_frame_num_offset= 0;
3067 h->prev_poc_msb=
3068 h->prev_poc_lsb= 0;
0da71265
MN
3069}
3070
7c33ad19
LM
3071/* forget old pics after a seek */
3072static void flush_dpb(AVCodecContext *avctx){
3073 H264Context *h= avctx->priv_data;
3074 int i;
64b9d48f 3075 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
3076 if(h->delayed_pic[i])
3077 h->delayed_pic[i]->reference= 0;
7c33ad19 3078 h->delayed_pic[i]= NULL;
285b570f 3079 }
df8a7dff 3080 h->outputed_poc= INT_MIN;
7c33ad19 3081 idr(h);
ca159196
MR
3082 if(h->s.current_picture_ptr)
3083 h->s.current_picture_ptr->reference= 0;
12d96de3 3084 h->s.first_field= 0;
e240f898 3085 ff_mpeg_flush(avctx);
7c33ad19
LM
3086}
3087
0da71265 3088/**
47e112f8
JD
3089 * Find a Picture in the short term reference list by frame number.
3090 * @param frame_num frame number to search for
3091 * @param idx the index into h->short_ref where returned picture is found
3092 * undefined if no picture found.
3093 * @return pointer to the found picture, or NULL if no pic with the provided
3094 * frame number is found
0da71265 3095 */
47e112f8 3096static Picture * find_short(H264Context *h, int frame_num, int *idx){
1924f3ce 3097 MpegEncContext * const s = &h->s;
0da71265 3098 int i;
115329f1 3099
0da71265
MN
3100 for(i=0; i<h->short_ref_count; i++){
3101 Picture *pic= h->short_ref[i];
1924f3ce 3102 if(s->avctx->debug&FF_DEBUG_MMCO)
9b879566 3103 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
47e112f8
JD
3104 if(pic->frame_num == frame_num) {
3105 *idx = i;
0da71265
MN
3106 return pic;
3107 }
3108 }
3109 return NULL;
3110}
3111
3112/**
47e112f8
JD
3113 * Remove a picture from the short term reference list by its index in
3114 * that list. This does no checking on the provided index; it is assumed
3115 * to be valid. Other list entries are shifted down.
3116 * @param i index into h->short_ref of picture to remove.
3117 */
3118static void remove_short_at_index(H264Context *h, int i){
e1f15d38 3119 assert(i >= 0 && i < h->short_ref_count);
47e112f8
JD
3120 h->short_ref[i]= NULL;
3121 if (--h->short_ref_count)
3122 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3123}
3124
3125/**
3126 *
3127 * @return the removed picture or NULL if an error occurs
3128 */
d9e32422 3129static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
47e112f8
JD
3130 MpegEncContext * const s = &h->s;
3131 Picture *pic;
3132 int i;
3133
3134 if(s->avctx->debug&FF_DEBUG_MMCO)
3135 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3136
3137 pic = find_short(h, frame_num, &i);
d9e32422
MN
3138 if (pic){
3139 if(unreference_pic(h, pic, ref_mask))
47e112f8 3140 remove_short_at_index(h, i);
d9e32422 3141 }
47e112f8
JD
3142
3143 return pic;
3144}
3145
3146/**
24231e4c 3147 * Remove a picture from the long term reference list by its index in
1cea5d0d 3148 * that list.
3b66c4c5 3149 * @return the removed picture or NULL if an error occurs
0da71265 3150 */
9c0e4624 3151static Picture * remove_long(H264Context *h, int i, int ref_mask){
0da71265
MN
3152 Picture *pic;
3153
0da71265 3154 pic= h->long_ref[i];
1cea5d0d 3155 if (pic){
9c0e4624
MN
3156 if(unreference_pic(h, pic, ref_mask)){
3157 assert(h->long_ref[i]->long_ref == 1);
3158 h->long_ref[i]->long_ref= 0;
3159 h->long_ref[i]= NULL;
3160 h->long_ref_count--;
3161 }
1cea5d0d 3162 }
0da71265
MN
3163
3164 return pic;
3165}
3166
3167/**
827c91bf
LLL
3168 * print short term list
3169 */
3170static void print_short_term(H264Context *h) {
3171 uint32_t i;
3172 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3173 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3174 for(i=0; i<h->short_ref_count; i++){
3175 Picture *pic= h->short_ref[i];
3176 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3177 }
3178 }
3179}
3180
3181/**
3182 * print long term list
3183 */
3184static void print_long_term(H264Context *h) {
3185 uint32_t i;
3186 if(h->s.avctx->