Split all the reference picture handling off h264.c.
[libav.git] / libavcodec / h264.c
CommitLineData
0da71265
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
0da71265
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
0da71265 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
0da71265
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265 20 */
115329f1 21
0da71265 22/**
bad5537e 23 * @file libavcodec/h264.c
0da71265
MN
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
40e5d31b 28#include "internal.h"
0da71265
MN
29#include "dsputil.h"
30#include "avcodec.h"
31#include "mpegvideo.h"
26b4fe82 32#include "h264.h"
0da71265 33#include "h264data.h"
26b4fe82 34#include "h264_parser.h"
0da71265 35#include "golomb.h"
199436b9 36#include "mathops.h"
626464fb 37#include "rectangle.h"
369122dd 38#include "vdpau_internal.h"
0da71265 39
e5017ab8 40#include "cabac.h"
b250f9c6 41#if ARCH_X86
a6493a8f 42#include "x86/h264_i386.h"
52cb7981 43#endif
e5017ab8 44
2848ce84 45//#undef NDEBUG
0da71265
MN
46#include <assert.h>
47
0da71265 48static VLC coeff_token_vlc[4];
910e3668
AC
49static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
50static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
51
0da71265 52static VLC chroma_dc_coeff_token_vlc;
910e3668
AC
53static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
54static const int chroma_dc_coeff_token_vlc_table_size = 256;
0da71265
MN
55
56static VLC total_zeros_vlc[15];
910e3668
AC
57static VLC_TYPE total_zeros_vlc_tables[15][512][2];
58static const int total_zeros_vlc_tables_size = 512;
59
0da71265 60static VLC chroma_dc_total_zeros_vlc[3];
910e3668
AC
61static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
62static const int chroma_dc_total_zeros_vlc_tables_size = 8;
0da71265
MN
63
64static VLC run_vlc[6];
910e3668
AC
65static VLC_TYPE run_vlc_tables[6][8][2];
66static const int run_vlc_tables_size = 8;
67
0da71265 68static VLC run7_vlc;
910e3668
AC
69static VLC_TYPE run7_vlc_table[96][2];
70static const int run7_vlc_table_size = 96;
0da71265 71
8b82a956
MN
72static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
73static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
74
d9ec210b 75static const uint8_t rem6[52]={
acd8d10f
PI
760, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77};
78
d9ec210b 79static const uint8_t div6[52]={
acd8d10f
PI
800, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
81};
82
89818988 83static const uint8_t left_block_options[4][8]={
143d7f14
PK
84 {0,1,2,3,7,10,8,11},
85 {2,2,3,3,8,11,8,11},
86 {0,0,1,1,7,10,7,10},
87 {0,2,0,2,7,10,7,10}
88};
acd8d10f 89
8140955d
MN
90#define LEVEL_TAB_BITS 8
91static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
92
70abb407 93static void fill_caches(H264Context *h, int mb_type, int for_deblock){
0da71265 94 MpegEncContext * const s = &h->s;
64514ee8 95 const int mb_xy= h->mb_xy;
0da71265
MN
96 int topleft_xy, top_xy, topright_xy, left_xy[2];
97 int topleft_type, top_type, topright_type, left_type[2];
89818988 98 const uint8_t * left_block;
02f7695b 99 int topleft_partition= -1;
0da71265
MN
100 int i;
101
36e097bc
JD
102 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
103
717b1733 104 //FIXME deblocking could skip the intra and nnz parts.
36e097bc 105 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
e2e5894a
LM
106 return;
107
2cab6401
DB
108 /* Wow, what a mess, why didn't they simplify the interlacing & intra
109 * stuff, I can't imagine that these complex rules are worth it. */
115329f1 110
6867a90b
LLL
111 topleft_xy = top_xy - 1;
112 topright_xy= top_xy + 1;
113 left_xy[1] = left_xy[0] = mb_xy-1;
143d7f14 114 left_block = left_block_options[0];
5d18eaad 115 if(FRAME_MBAFF){
6867a90b
LLL
116 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
117 const int top_pair_xy = pair_xy - s->mb_stride;
118 const int topleft_pair_xy = top_pair_xy - 1;
119 const int topright_pair_xy = top_pair_xy + 1;
6f3c50f2
MN
120 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
121 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
122 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
123 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
124 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
6867a90b 125 const int bottom = (s->mb_y & 1);
6f3c50f2 126 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
60c6ba7a 127
6f3c50f2 128 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
6867a90b
LLL
129 top_xy -= s->mb_stride;
130 }
6f3c50f2 131 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
6867a90b 132 topleft_xy -= s->mb_stride;
6f3c50f2 133 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
02f7695b 134 topleft_xy += s->mb_stride;
1412060e 135 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
02f7695b 136 topleft_partition = 0;
6867a90b 137 }
6f3c50f2 138 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
6867a90b
LLL
139 topright_xy -= s->mb_stride;
140 }
6f3c50f2 141 if (left_mb_field_flag != curr_mb_field_flag) {
6867a90b 142 left_xy[1] = left_xy[0] = pair_xy - 1;
6f3c50f2
MN
143 if (curr_mb_field_flag) {
144 left_xy[1] += s->mb_stride;
145 left_block = left_block_options[3];
146 } else {
03a035e0 147 left_block= left_block_options[2 - bottom];
6867a90b
LLL
148 }
149 }
0da71265
MN
150 }
151
826de46e
LLL
152 h->top_mb_xy = top_xy;
153 h->left_mb_xy[0] = left_xy[0];
154 h->left_mb_xy[1] = left_xy[1];
6ba71fc4 155 if(for_deblock){
717b1733
LM
156 topleft_type = 0;
157 topright_type = 0;
b735aeea
MN
158 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
159 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
160 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
5d18eaad 161
e248cb60 162 if(MB_MBAFF && !IS_INTRA(mb_type)){
5d18eaad 163 int list;
3425501d 164 for(list=0; list<h->list_count; list++){
e248cb60
MN
165 //These values where changed for ease of performing MC, we need to change them back
166 //FIXME maybe we can make MC and loop filter use the same values or prevent
167 //the MC code from changing ref_cache and rather use a temporary array.
5d18eaad 168 if(USES_LIST(mb_type,list)){
191e8ca7 169 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
5d18eaad 170 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
beca9a28 171 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
172 ref += h->b8_stride;
173 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
beca9a28 174 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
5d18eaad
LM
175 }
176 }
177 }
46f2f05f
MN
178 }else{
179 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
180 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
181 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
182 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
0da71265
MN
184
185 if(IS_INTRA(mb_type)){
faa7e394 186 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
115329f1
DB
187 h->topleft_samples_available=
188 h->top_samples_available=
0da71265
MN
189 h->left_samples_available= 0xFFFF;
190 h->topright_samples_available= 0xEEEA;
191
faa7e394 192 if(!(top_type & type_mask)){
0da71265
MN
193 h->topleft_samples_available= 0xB3FF;
194 h->top_samples_available= 0x33FF;
195 h->topright_samples_available= 0x26EA;
196 }
d1d10e91
MN
197 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
198 if(IS_INTERLACED(mb_type)){
faa7e394 199 if(!(left_type[0] & type_mask)){
d1d10e91
MN
200 h->topleft_samples_available&= 0xDFFF;
201 h->left_samples_available&= 0x5FFF;
202 }
faa7e394 203 if(!(left_type[1] & type_mask)){
d1d10e91
MN
204 h->topleft_samples_available&= 0xFF5F;
205 h->left_samples_available&= 0xFF5F;
206 }
207 }else{
208 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
209 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
210 assert(left_xy[0] == left_xy[1]);
faa7e394 211 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
d1d10e91
MN
212 h->topleft_samples_available&= 0xDF5F;
213 h->left_samples_available&= 0x5F5F;
214 }
215 }
216 }else{
faa7e394 217 if(!(left_type[0] & type_mask)){
0da71265
MN
218 h->topleft_samples_available&= 0xDF5F;
219 h->left_samples_available&= 0x5F5F;
220 }
221 }
115329f1 222
faa7e394 223 if(!(topleft_type & type_mask))
0da71265 224 h->topleft_samples_available&= 0x7FFF;
115329f1 225
faa7e394 226 if(!(topright_type & type_mask))
0da71265 227 h->topright_samples_available&= 0xFBFF;
115329f1 228
0da71265
MN
229 if(IS_INTRA4x4(mb_type)){
230 if(IS_INTRA4x4(top_type)){
231 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
232 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
233 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
234 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
235 }else{
236 int pred;
faa7e394 237 if(!(top_type & type_mask))
0da71265 238 pred= -1;
6fbcaaa0
LLL
239 else{
240 pred= 2;
0da71265
MN
241 }
242 h->intra4x4_pred_mode_cache[4+8*0]=
243 h->intra4x4_pred_mode_cache[5+8*0]=
244 h->intra4x4_pred_mode_cache[6+8*0]=
245 h->intra4x4_pred_mode_cache[7+8*0]= pred;
246 }
247 for(i=0; i<2; i++){
248 if(IS_INTRA4x4(left_type[i])){
249 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
250 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
251 }else{
252 int pred;
faa7e394 253 if(!(left_type[i] & type_mask))
0da71265 254 pred= -1;
6fbcaaa0
LLL
255 else{
256 pred= 2;
0da71265
MN
257 }
258 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
259 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
260 }
261 }
262 }
263 }
29671011 264 }
115329f1
DB
265
266
0da71265 267/*
115329f1
DB
2680 . T T. T T T T
2691 L . .L . . . .
2702 L . .L . . . .
2713 . T TL . . . .
2724 L . .L . . . .
2735 L . .. . . . .
0da71265 274*/
1412060e 275//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
0da71265 276 if(top_type){
6867a90b
LLL
277 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
278 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
279 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
53c05b1e 280 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
115329f1 281
6867a90b 282 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
53c05b1e 283 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
115329f1 284
6867a90b 285 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
53c05b1e 286 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
115329f1 287
0da71265 288 }else{
115329f1 289 h->non_zero_count_cache[4+8*0]=
0da71265
MN
290 h->non_zero_count_cache[5+8*0]=
291 h->non_zero_count_cache[6+8*0]=
292 h->non_zero_count_cache[7+8*0]=
115329f1 293
0da71265
MN
294 h->non_zero_count_cache[1+8*0]=
295 h->non_zero_count_cache[2+8*0]=
115329f1 296
0da71265 297 h->non_zero_count_cache[1+8*3]=
3981c385 298 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
115329f1 299
0da71265 300 }
826de46e 301
6867a90b
LLL
302 for (i=0; i<2; i++) {
303 if(left_type[i]){
304 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
305 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
306 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
307 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
6867a90b 308 }else{
115329f1
DB
309 h->non_zero_count_cache[3+8*1 + 2*8*i]=
310 h->non_zero_count_cache[3+8*2 + 2*8*i]=
311 h->non_zero_count_cache[0+8*1 + 8*i]=
6867a90b 312 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
826de46e
LLL
313 }
314 }
315
316 if( h->pps.cabac ) {
317 // top_cbp
318 if(top_type) {
319 h->top_cbp = h->cbp_table[top_xy];
320 } else if(IS_INTRA(mb_type)) {
321 h->top_cbp = 0x1C0;
322 } else {
323 h->top_cbp = 0;
324 }
325 // left_cbp
326 if (left_type[0]) {
327 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
328 } else if(IS_INTRA(mb_type)) {
329 h->left_cbp = 0x1C0;
330 } else {
331 h->left_cbp = 0;
332 }
333 if (left_type[0]) {
334 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
335 }
336 if (left_type[1]) {
337 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
6867a90b 338 }
0da71265 339 }
6867a90b 340
0da71265 341#if 1
e2e5894a 342 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
0da71265 343 int list;
3425501d 344 for(list=0; list<h->list_count; list++){
e2e5894a 345 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
0da71265
MN
346 /*if(!h->mv_cache_clean[list]){
347 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
348 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
349 h->mv_cache_clean[list]= 1;
350 }*/
5ad984c9 351 continue;
0da71265
MN
352 }
353 h->mv_cache_clean[list]= 0;
115329f1 354
53b19144 355 if(USES_LIST(top_type, list)){
0da71265
MN
356 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
357 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
358 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
359 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
360 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
361 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
362 h->ref_cache[list][scan8[0] + 0 - 1*8]=
363 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
364 h->ref_cache[list][scan8[0] + 2 - 1*8]=
365 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
366 }else{
115329f1
DB
367 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
368 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
369 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
0da71265
MN
370 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
371 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
372 }
373
4672503d
LM
374 for(i=0; i<2; i++){
375 int cache_idx = scan8[0] - 1 + i*2*8;
376 if(USES_LIST(left_type[i], list)){
377 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
378 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
379 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
380 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
381 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
382 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
383 }else{
384 *(uint32_t*)h->mv_cache [list][cache_idx ]=
385 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
386 h->ref_cache[list][cache_idx ]=
387 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
388 }
0da71265
MN
389 }
390
0281d325 391 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
46f2f05f
MN
392 continue;
393
53b19144 394 if(USES_LIST(topleft_type, list)){
02f7695b
LM
395 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
396 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
e2e5894a
LM
397 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
398 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
399 }else{
400 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
401 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
402 }
115329f1 403
53b19144 404 if(USES_LIST(topright_type, list)){
e2e5894a
LM
405 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
406 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
407 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
408 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
409 }else{
410 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
411 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
412 }
e2e5894a 413
ae08a563 414 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
717b1733 415 continue;
115329f1
DB
416
417 h->ref_cache[list][scan8[5 ]+1] =
418 h->ref_cache[list][scan8[7 ]+1] =
3b66c4c5 419 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
115329f1 420 h->ref_cache[list][scan8[4 ]] =
0da71265
MN
421 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
422 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
423 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
3b66c4c5 424 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
0da71265
MN
425 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
426 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
9e528114
LA
427
428 if( h->pps.cabac ) {
429 /* XXX beurk, Load mvd */
53b19144 430 if(USES_LIST(top_type, list)){
9e528114
LA
431 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
432 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
433 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
434 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
435 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
436 }else{
115329f1
DB
437 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
438 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
439 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
9e528114
LA
440 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
441 }
53b19144 442 if(USES_LIST(left_type[0], list)){
9e528114
LA
443 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
444 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
445 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
446 }else{
447 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
448 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
449 }
53b19144 450 if(USES_LIST(left_type[1], list)){
9e528114
LA
451 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
452 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
453 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
454 }else{
455 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
457 }
458 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
459 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
3b66c4c5 460 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
9e528114
LA
461 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
462 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
5ad984c9 463
9f5c1037 464 if(h->slice_type_nos == FF_B_TYPE){
5ad984c9
LM
465 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
466
467 if(IS_DIRECT(top_type)){
468 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
469 }else if(IS_8X8(top_type)){
470 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
471 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
472 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
473 }else{
474 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
475 }
115329f1 476
5d18eaad
LM
477 if(IS_DIRECT(left_type[0]))
478 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
479 else if(IS_8X8(left_type[0]))
480 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
481 else
482 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
483
484 if(IS_DIRECT(left_type[1]))
5ad984c9 485 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
5d18eaad
LM
486 else if(IS_8X8(left_type[1]))
487 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
488 else
5ad984c9 489 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
5d18eaad
LM
490 }
491 }
492
493 if(FRAME_MBAFF){
494#define MAP_MVS\
495 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
496 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
497 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
498 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
499 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
500 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
501 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
502 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
503 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
504 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
505 if(MB_FIELD){
506#define MAP_F2F(idx, mb_type)\
507 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
508 h->ref_cache[list][idx] <<= 1;\
509 h->mv_cache[list][idx][1] /= 2;\
510 h->mvd_cache[list][idx][1] /= 2;\
511 }
512 MAP_MVS
513#undef MAP_F2F
514 }else{
515#define MAP_F2F(idx, mb_type)\
516 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
517 h->ref_cache[list][idx] >>= 1;\
518 h->mv_cache[list][idx][1] <<= 1;\
519 h->mvd_cache[list][idx][1] <<= 1;\
5ad984c9 520 }
5d18eaad
LM
521 MAP_MVS
522#undef MAP_F2F
5ad984c9 523 }
9e528114 524 }
0da71265 525 }
0da71265
MN
526 }
527#endif
43efd19a
LM
528
529 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
0da71265
MN
530}
531
903d58f6 532void ff_h264_write_back_intra_pred_mode(H264Context *h){
64514ee8 533 const int mb_xy= h->mb_xy;
0da71265
MN
534
535 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
536 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
537 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
538 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
539 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
540 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
541 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
542}
543
544/**
545 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
546 */
903d58f6 547int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
0da71265
MN
548 MpegEncContext * const s = &h->s;
549 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
550 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1 551
43ff0714 552 if(mode > 6U) {
5175b937 553 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83 554 return -1;
5175b937 555 }
115329f1 556
0da71265
MN
557 if(!(h->top_samples_available&0x8000)){
558 mode= top[ mode ];
559 if(mode<0){
9b879566 560 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
MN
561 return -1;
562 }
563 }
115329f1 564
d1d10e91 565 if((h->left_samples_available&0x8080) != 0x8080){
0da71265 566 mode= left[ mode ];
d1d10e91
MN
567 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
568 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
569 }
0da71265 570 if(mode<0){
9b879566 571 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265 572 return -1;
115329f1 573 }
0da71265
MN
574 }
575
576 return mode;
577}
578
579/**
580 * gets the predicted intra4x4 prediction mode.
581 */
582static inline int pred_intra_mode(H264Context *h, int n){
583 const int index8= scan8[n];
584 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
585 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
586 const int min= FFMIN(left, top);
587
a9c9a240 588 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
0da71265
MN
589
590 if(min<0) return DC_PRED;
591 else return min;
592}
593
594static inline void write_back_non_zero_count(H264Context *h){
64514ee8 595 const int mb_xy= h->mb_xy;
0da71265 596
6867a90b
LLL
597 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
598 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
599 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
53c05b1e 600 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
6867a90b
LLL
601 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
602 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
603 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
115329f1 604
6867a90b 605 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
53c05b1e 606 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
6867a90b 607 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
53c05b1e 608
6867a90b 609 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
53c05b1e 610 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
6867a90b 611 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
0da71265
MN
612}
613
614/**
1412060e 615 * gets the predicted number of non-zero coefficients.
0da71265
MN
616 * @param n block index
617 */
618static inline int pred_non_zero_count(H264Context *h, int n){
619 const int index8= scan8[n];
620 const int left= h->non_zero_count_cache[index8 - 1];
621 const int top = h->non_zero_count_cache[index8 - 8];
622 int i= left + top;
115329f1 623
0da71265
MN
624 if(i<64) i= (i+1)>>1;
625
a9c9a240 626 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
0da71265
MN
627
628 return i&31;
629}
630
0da71265
MN
631/**
632 * gets the directionally predicted 16x8 MV.
633 * @param n the block index
634 * @param mx the x component of the predicted motion vector
635 * @param my the y component of the predicted motion vector
636 */
637static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
638 if(n==0){
639 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
640 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
641
a9c9a240 642 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
115329f1 643
0da71265
MN
644 if(top_ref == ref){
645 *mx= B[0];
646 *my= B[1];
647 return;
648 }
649 }else{
650 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
651 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
115329f1 652
a9c9a240 653 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
654
655 if(left_ref == ref){
656 *mx= A[0];
657 *my= A[1];
658 return;
659 }
660 }
661
662 //RARE
663 pred_motion(h, n, 4, list, ref, mx, my);
664}
665
666/**
667 * gets the directionally predicted 8x16 MV.
668 * @param n the block index
669 * @param mx the x component of the predicted motion vector
670 * @param my the y component of the predicted motion vector
671 */
672static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
0da71265
MN
673 if(n==0){
674 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
675 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
115329f1 676
a9c9a240 677 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265
MN
678
679 if(left_ref == ref){
680 *mx= A[0];
681 *my= A[1];
682 return;
683 }
684 }else{
1924f3ce
MN
685 const int16_t * C;
686 int diagonal_ref;
687
688 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
115329f1 689
a9c9a240 690 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
0da71265 691
115329f1 692 if(diagonal_ref == ref){
0da71265
MN
693 *mx= C[0];
694 *my= C[1];
695 return;
696 }
0da71265
MN
697 }
698
699 //RARE
700 pred_motion(h, n, 2, list, ref, mx, my);
701}
702
703static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
0da71265
MN
704 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
705 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
706
a9c9a240 707 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
0da71265
MN
708
709 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
62ea19c0
MN
710 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
711 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
115329f1 712
0da71265
MN
713 *mx = *my = 0;
714 return;
715 }
115329f1 716
0da71265
MN
717 pred_motion(h, 0, 4, 0, 0, mx, my);
718
719 return;
720}
721
722static inline void write_back_motion(H264Context *h, int mb_type){
723 MpegEncContext * const s = &h->s;
0da71265
MN
724 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
725 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
726 int list;
727
2ea39252
LM
728 if(!USES_LIST(mb_type, 0))
729 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
730
3425501d 731 for(list=0; list<h->list_count; list++){
0da71265 732 int y;
53b19144 733 if(!USES_LIST(mb_type, list))
5ad984c9 734 continue;
115329f1 735
0da71265
MN
736 for(y=0; y<4; y++){
737 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
738 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
739 }
9e528114 740 if( h->pps.cabac ) {
e6e77eb6
LM
741 if(IS_SKIP(mb_type))
742 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
743 else
9e528114
LA
744 for(y=0; y<4; y++){
745 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
746 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
747 }
748 }
53b19144
LM
749
750 {
191e8ca7 751 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
53b19144
LM
752 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
753 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
754 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
755 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
0da71265
MN
756 }
757 }
115329f1 758
9f5c1037 759 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
5ad984c9 760 if(IS_8X8(mb_type)){
53b19144
LM
761 uint8_t *direct_table = &h->direct_table[b8_xy];
762 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
763 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
764 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
5ad984c9
LM
765 }
766 }
0da71265
MN
767}
768
1790a5e9 769const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
MN
770 int i, si, di;
771 uint8_t *dst;
24456882 772 int bufidx;
0da71265 773
bb270c08 774// src[0]&0x80; //forbidden bit
0da71265
MN
775 h->nal_ref_idc= src[0]>>5;
776 h->nal_unit_type= src[0]&0x1F;
777
778 src++; length--;
115329f1 779#if 0
0da71265
MN
780 for(i=0; i<length; i++)
781 printf("%2X ", src[i]);
782#endif
e08715d3 783
b250f9c6
AJ
784#if HAVE_FAST_UNALIGNED
785# if HAVE_FAST_64BIT
e08715d3
MN
786# define RS 7
787 for(i=0; i+1<length; i+=9){
3878be31 788 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
e08715d3
MN
789# else
790# define RS 3
791 for(i=0; i+1<length; i+=5){
3878be31 792 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
e08715d3
MN
793# endif
794 continue;
795 if(i>0 && !src[i]) i--;
796 while(src[i]) i++;
797#else
798# define RS 0
0da71265
MN
799 for(i=0; i+1<length; i+=2){
800 if(src[i]) continue;
801 if(i>0 && src[i-1]==0) i--;
e08715d3 802#endif
0da71265
MN
803 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
804 if(src[i+2]!=3){
805 /* startcode, so we must be past the end */
806 length=i;
807 }
808 break;
809 }
abb27cfb 810 i-= RS;
0da71265
MN
811 }
812
813 if(i>=length-1){ //no escaped 0
814 *dst_length= length;
815 *consumed= length+1; //+1 for the header
115329f1 816 return src;
0da71265
MN
817 }
818
24456882 819 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
238ef6da 820 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
24456882 821 dst= h->rbsp_buffer[bufidx];
0da71265 822
ac658be5
FOL
823 if (dst == NULL){
824 return NULL;
825 }
826
3b66c4c5 827//printf("decoding esc\n");
593af7cd
MN
828 memcpy(dst, src, i);
829 si=di=i;
830 while(si+2<length){
0da71265 831 //remove escapes (very rare 1:2^22)
593af7cd
MN
832 if(src[si+2]>3){
833 dst[di++]= src[si++];
834 dst[di++]= src[si++];
835 }else if(src[si]==0 && src[si+1]==0){
0da71265
MN
836 if(src[si+2]==3){ //escape
837 dst[di++]= 0;
838 dst[di++]= 0;
839 si+=3;
c8470cc1 840 continue;
0da71265 841 }else //next start code
593af7cd 842 goto nsc;
0da71265
MN
843 }
844
845 dst[di++]= src[si++];
846 }
593af7cd
MN
847 while(si<length)
848 dst[di++]= src[si++];
849nsc:
0da71265 850
d4369630
AS
851 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
852
0da71265
MN
853 *dst_length= di;
854 *consumed= si + 1;//+1 for the header
90b5b51e 855//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
MN
856 return dst;
857}
858
1790a5e9 859int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
MN
860 int v= *src;
861 int r;
862
a9c9a240 863 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
MN
864
865 for(r=1; r<9; r++){
866 if(v&1) return r;
867 v>>=1;
868 }
869 return 0;
870}
871
872/**
1412060e 873 * IDCT transforms the 16 dc values and dequantizes them.
0da71265
MN
874 * @param qp quantization parameter
875 */
239ea04c 876static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
877#define stride 16
878 int i;
879 int temp[16]; //FIXME check if this is a good idea
880 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
881 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
882
883//memset(block, 64, 2*256);
884//return;
885 for(i=0; i<4; i++){
886 const int offset= y_offset[i];
887 const int z0= block[offset+stride*0] + block[offset+stride*4];
888 const int z1= block[offset+stride*0] - block[offset+stride*4];
889 const int z2= block[offset+stride*1] - block[offset+stride*5];
890 const int z3= block[offset+stride*1] + block[offset+stride*5];
891
892 temp[4*i+0]= z0+z3;
893 temp[4*i+1]= z1+z2;
894 temp[4*i+2]= z1-z2;
895 temp[4*i+3]= z0-z3;
896 }
897
898 for(i=0; i<4; i++){
899 const int offset= x_offset[i];
900 const int z0= temp[4*0+i] + temp[4*2+i];
901 const int z1= temp[4*0+i] - temp[4*2+i];
902 const int z2= temp[4*1+i] - temp[4*3+i];
903 const int z3= temp[4*1+i] + temp[4*3+i];
904
1412060e 905 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
239ea04c
LM
906 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
907 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
908 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
0da71265
MN
909 }
910}
911
e5017ab8 912#if 0
0da71265 913/**
1412060e 914 * DCT transforms the 16 dc values.
0da71265
MN
915 * @param qp quantization parameter ??? FIXME
916 */
917static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
918// const int qmul= dequant_coeff[qp][0];
919 int i;
920 int temp[16]; //FIXME check if this is a good idea
921 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
922 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
923
924 for(i=0; i<4; i++){
925 const int offset= y_offset[i];
926 const int z0= block[offset+stride*0] + block[offset+stride*4];
927 const int z1= block[offset+stride*0] - block[offset+stride*4];
928 const int z2= block[offset+stride*1] - block[offset+stride*5];
929 const int z3= block[offset+stride*1] + block[offset+stride*5];
930
931 temp[4*i+0]= z0+z3;
932 temp[4*i+1]= z1+z2;
933 temp[4*i+2]= z1-z2;
934 temp[4*i+3]= z0-z3;
935 }
936
937 for(i=0; i<4; i++){
938 const int offset= x_offset[i];
939 const int z0= temp[4*0+i] + temp[4*2+i];
940 const int z1= temp[4*0+i] - temp[4*2+i];
941 const int z2= temp[4*1+i] - temp[4*3+i];
942 const int z3= temp[4*1+i] + temp[4*3+i];
943
944 block[stride*0 +offset]= (z0 + z3)>>1;
945 block[stride*2 +offset]= (z1 + z2)>>1;
946 block[stride*8 +offset]= (z1 - z2)>>1;
947 block[stride*10+offset]= (z0 - z3)>>1;
948 }
949}
e5017ab8
LA
950#endif
951
0da71265
MN
952#undef xStride
953#undef stride
954
239ea04c 955static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
0da71265
MN
956 const int stride= 16*2;
957 const int xStride= 16;
958 int a,b,c,d,e;
959
960 a= block[stride*0 + xStride*0];
961 b= block[stride*0 + xStride*1];
962 c= block[stride*1 + xStride*0];
963 d= block[stride*1 + xStride*1];
964
965 e= a-b;
966 a= a+b;
967 b= c-d;
968 c= c+d;
969
239ea04c
LM
970 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
971 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
972 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
973 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
0da71265
MN
974}
975
e5017ab8 976#if 0
0da71265
MN
977static void chroma_dc_dct_c(DCTELEM *block){
978 const int stride= 16*2;
979 const int xStride= 16;
980 int a,b,c,d,e;
981
982 a= block[stride*0 + xStride*0];
983 b= block[stride*0 + xStride*1];
984 c= block[stride*1 + xStride*0];
985 d= block[stride*1 + xStride*1];
986
987 e= a-b;
988 a= a+b;
989 b= c-d;
990 c= c+d;
991
992 block[stride*0 + xStride*0]= (a+c);
993 block[stride*0 + xStride*1]= (e+b);
994 block[stride*1 + xStride*0]= (a-c);
995 block[stride*1 + xStride*1]= (e-b);
996}
e5017ab8 997#endif
0da71265 998
0da71265
MN
999static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1000 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1001 int src_x_offset, int src_y_offset,
1002 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1003 MpegEncContext * const s = &h->s;
1004 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
5d18eaad 1005 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
0da71265 1006 const int luma_xy= (mx&3) + ((my&3)<<2);
5d18eaad
LM
1007 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1008 uint8_t * src_cb, * src_cr;
1009 int extra_width= h->emu_edge_width;
1010 int extra_height= h->emu_edge_height;
0da71265
MN
1011 int emu=0;
1012 const int full_mx= mx>>2;
1013 const int full_my= my>>2;
fbd312fd 1014 const int pic_width = 16*s->mb_width;
0d43dd8c 1015 const int pic_height = 16*s->mb_height >> MB_FIELD;
115329f1 1016
0da71265
MN
1017 if(mx&7) extra_width -= 3;
1018 if(my&7) extra_height -= 3;
115329f1
DB
1019
1020 if( full_mx < 0-extra_width
1021 || full_my < 0-extra_height
1022 || full_mx + 16/*FIXME*/ > pic_width + extra_width
fbd312fd 1023 || full_my + 16/*FIXME*/ > pic_height + extra_height){
5d18eaad
LM
1024 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1025 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
0da71265
MN
1026 emu=1;
1027 }
115329f1 1028
5d18eaad 1029 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
0da71265 1030 if(!square){
5d18eaad 1031 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
0da71265 1032 }
115329f1 1033
49fb20cb 1034 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
115329f1 1035
0d43dd8c 1036 if(MB_FIELD){
5d18eaad 1037 // chroma offset when predicting from a field of opposite parity
2143b118 1038 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
5d18eaad
LM
1039 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1040 }
1041 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1042 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1043
0da71265 1044 if(emu){
5d18eaad 1045 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1046 src_cb= s->edge_emu_buffer;
1047 }
5d18eaad 1048 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1049
1050 if(emu){
5d18eaad 1051 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
0da71265
MN
1052 src_cr= s->edge_emu_buffer;
1053 }
5d18eaad 1054 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
0da71265
MN
1055}
1056
9f2d1b4f 1057static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
0da71265
MN
1058 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1059 int x_offset, int y_offset,
1060 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1061 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1062 int list0, int list1){
1063 MpegEncContext * const s = &h->s;
1064 qpel_mc_func *qpix_op= qpix_put;
1065 h264_chroma_mc_func chroma_op= chroma_put;
115329f1 1066
5d18eaad
LM
1067 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1068 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1069 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
0da71265 1070 x_offset += 8*s->mb_x;
0d43dd8c 1071 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1072
0da71265 1073 if(list0){
1924f3ce 1074 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
0da71265
MN
1075 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1076 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1077 qpix_op, chroma_op);
1078
1079 qpix_op= qpix_avg;
1080 chroma_op= chroma_avg;
1081 }
1082
1083 if(list1){
1924f3ce 1084 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
0da71265
MN
1085 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1086 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1087 qpix_op, chroma_op);
1088 }
1089}
1090
9f2d1b4f
LM
1091static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1092 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1093 int x_offset, int y_offset,
1094 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1095 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1096 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1097 int list0, int list1){
1098 MpegEncContext * const s = &h->s;
1099
5d18eaad
LM
1100 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1101 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1102 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
9f2d1b4f 1103 x_offset += 8*s->mb_x;
0d43dd8c 1104 y_offset += 8*(s->mb_y >> MB_FIELD);
115329f1 1105
9f2d1b4f
LM
1106 if(list0 && list1){
1107 /* don't optimize for luma-only case, since B-frames usually
1108 * use implicit weights => chroma too. */
1109 uint8_t *tmp_cb = s->obmc_scratchpad;
5d18eaad
LM
1110 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1111 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
9f2d1b4f
LM
1112 int refn0 = h->ref_cache[0][ scan8[n] ];
1113 int refn1 = h->ref_cache[1][ scan8[n] ];
1114
1115 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1116 dest_y, dest_cb, dest_cr,
1117 x_offset, y_offset, qpix_put, chroma_put);
1118 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1119 tmp_y, tmp_cb, tmp_cr,
1120 x_offset, y_offset, qpix_put, chroma_put);
1121
1122 if(h->use_weight == 2){
1123 int weight0 = h->implicit_weight[refn0][refn1];
1124 int weight1 = 64 - weight0;
5d18eaad
LM
1125 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1126 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1127 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
9f2d1b4f 1128 }else{
5d18eaad 1129 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
115329f1 1130 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
e8b56208 1131 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
5d18eaad 1132 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1133 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
e8b56208 1134 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
5d18eaad 1135 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
115329f1 1136 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
e8b56208 1137 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
9f2d1b4f
LM
1138 }
1139 }else{
1140 int list = list1 ? 1 : 0;
1141 int refn = h->ref_cache[list][ scan8[n] ];
1142 Picture *ref= &h->ref_list[list][refn];
1143 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1144 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1145 qpix_put, chroma_put);
1146
5d18eaad 1147 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
9f2d1b4f
LM
1148 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1149 if(h->use_weight_chroma){
5d18eaad 1150 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f 1151 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
5d18eaad 1152 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
9f2d1b4f
LM
1153 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1154 }
1155 }
1156}
1157
1158static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1159 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1160 int x_offset, int y_offset,
1161 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1162 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
115329f1 1163 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
9f2d1b4f
LM
1164 int list0, int list1){
1165 if((h->use_weight==2 && list0 && list1
1166 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1167 || h->use_weight==1)
1168 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1169 x_offset, y_offset, qpix_put, chroma_put,
1170 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1171 else
1172 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1173 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1174}
1175
513fbd8e
LM
1176static inline void prefetch_motion(H264Context *h, int list){
1177 /* fetch pixels for estimated mv 4 macroblocks ahead
1178 * optimized for 64byte cache lines */
1179 MpegEncContext * const s = &h->s;
1180 const int refn = h->ref_cache[list][scan8[0]];
1181 if(refn >= 0){
1182 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1183 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1184 uint8_t **src= h->ref_list[list][refn].data;
5d18eaad 1185 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
513fbd8e
LM
1186 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1187 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1188 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1189 }
1190}
1191
0da71265
MN
1192static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1193 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
9f2d1b4f
LM
1194 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1195 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
0da71265 1196 MpegEncContext * const s = &h->s;
64514ee8 1197 const int mb_xy= h->mb_xy;
0da71265 1198 const int mb_type= s->current_picture.mb_type[mb_xy];
115329f1 1199
0da71265 1200 assert(IS_INTER(mb_type));
115329f1 1201
513fbd8e
LM
1202 prefetch_motion(h, 0);
1203
0da71265
MN
1204 if(IS_16X16(mb_type)){
1205 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1206 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
9f2d1b4f 1207 &weight_op[0], &weight_avg[0],
0da71265
MN
1208 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1209 }else if(IS_16X8(mb_type)){
1210 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1211 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1212 &weight_op[1], &weight_avg[1],
0da71265
MN
1213 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1214 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1215 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
9f2d1b4f 1216 &weight_op[1], &weight_avg[1],
0da71265
MN
1217 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1218 }else if(IS_8X16(mb_type)){
5d18eaad 1219 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
0da71265 1220 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1221 &weight_op[2], &weight_avg[2],
0da71265 1222 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
5d18eaad 1223 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
0da71265 1224 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1225 &weight_op[2], &weight_avg[2],
0da71265
MN
1226 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1227 }else{
1228 int i;
115329f1 1229
0da71265
MN
1230 assert(IS_8X8(mb_type));
1231
1232 for(i=0; i<4; i++){
1233 const int sub_mb_type= h->sub_mb_type[i];
1234 const int n= 4*i;
1235 int x_offset= (i&1)<<2;
1236 int y_offset= (i&2)<<1;
1237
1238 if(IS_SUB_8X8(sub_mb_type)){
1239 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1240 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
9f2d1b4f 1241 &weight_op[3], &weight_avg[3],
0da71265
MN
1242 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1243 }else if(IS_SUB_8X4(sub_mb_type)){
1244 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1245 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1246 &weight_op[4], &weight_avg[4],
0da71265
MN
1247 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1248 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1249 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
9f2d1b4f 1250 &weight_op[4], &weight_avg[4],
0da71265
MN
1251 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1252 }else if(IS_SUB_4X8(sub_mb_type)){
5d18eaad 1253 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
0da71265 1254 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1255 &weight_op[5], &weight_avg[5],
0da71265 1256 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
5d18eaad 1257 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
0da71265 1258 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1259 &weight_op[5], &weight_avg[5],
0da71265
MN
1260 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1261 }else{
1262 int j;
1263 assert(IS_SUB_4X4(sub_mb_type));
1264 for(j=0; j<4; j++){
1265 int sub_x_offset= x_offset + 2*(j&1);
1266 int sub_y_offset= y_offset + (j&2);
1267 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1268 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
9f2d1b4f 1269 &weight_op[6], &weight_avg[6],
0da71265
MN
1270 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1271 }
1272 }
1273 }
1274 }
513fbd8e
LM
1275
1276 prefetch_motion(h, 1);
0da71265
MN
1277}
1278
8140955d
MN
1279static av_cold void init_cavlc_level_tab(void){
1280 int suffix_length, mask;
1281 unsigned int i;
1282
1283 for(suffix_length=0; suffix_length<7; suffix_length++){
1284 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1285 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1286 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1287
1288 mask= -(level_code&1);
1289 level_code= (((2+level_code)>>1) ^ mask) - mask;
1290 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1291 cavlc_level_tab[suffix_length][i][0]= level_code;
1292 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1293 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1294 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1295 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1296 }else{
1297 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1298 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1299 }
1300 }
1301 }
1302}
1303
98a6fff9 1304static av_cold void decode_init_vlc(void){
0da71265
MN
1305 static int done = 0;
1306
1307 if (!done) {
1308 int i;
910e3668 1309 int offset;
0da71265
MN
1310 done = 1;
1311
910e3668
AC
1312 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1313 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
115329f1 1314 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
0da71265 1315 &chroma_dc_coeff_token_len [0], 1, 1,
910e3668
AC
1316 &chroma_dc_coeff_token_bits[0], 1, 1,
1317 INIT_VLC_USE_NEW_STATIC);
0da71265 1318
910e3668 1319 offset = 0;
0da71265 1320 for(i=0; i<4; i++){
910e3668
AC
1321 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1322 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
115329f1 1323 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
0da71265 1324 &coeff_token_len [i][0], 1, 1,
910e3668
AC
1325 &coeff_token_bits[i][0], 1, 1,
1326 INIT_VLC_USE_NEW_STATIC);
1327 offset += coeff_token_vlc_tables_size[i];
0da71265 1328 }
910e3668
AC
1329 /*
1330 * This is a one time safety check to make sure that
1331 * the packed static coeff_token_vlc table sizes
1332 * were initialized correctly.
1333 */
37d3e066 1334 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
0da71265
MN
1335
1336 for(i=0; i<3; i++){
910e3668
AC
1337 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1338 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1339 init_vlc(&chroma_dc_total_zeros_vlc[i],
1340 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
0da71265 1341 &chroma_dc_total_zeros_len [i][0], 1, 1,
910e3668
AC
1342 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1343 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1344 }
1345 for(i=0; i<15; i++){
910e3668
AC
1346 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1347 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1348 init_vlc(&total_zeros_vlc[i],
1349 TOTAL_ZEROS_VLC_BITS, 16,
0da71265 1350 &total_zeros_len [i][0], 1, 1,
910e3668
AC
1351 &total_zeros_bits[i][0], 1, 1,
1352 INIT_VLC_USE_NEW_STATIC);
0da71265
MN
1353 }
1354
1355 for(i=0; i<6; i++){
910e3668
AC
1356 run_vlc[i].table = run_vlc_tables[i];
1357 run_vlc[i].table_allocated = run_vlc_tables_size;
1358 init_vlc(&run_vlc[i],
1359 RUN_VLC_BITS, 7,
0da71265 1360 &run_len [i][0], 1, 1,
910e3668
AC
1361 &run_bits[i][0], 1, 1,
1362 INIT_VLC_USE_NEW_STATIC);
0da71265 1363 }
910e3668
AC
1364 run7_vlc.table = run7_vlc_table,
1365 run7_vlc.table_allocated = run7_vlc_table_size;
115329f1 1366 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
0da71265 1367 &run_len [6][0], 1, 1,
910e3668
AC
1368 &run_bits[6][0], 1, 1,
1369 INIT_VLC_USE_NEW_STATIC);
8140955d
MN
1370
1371 init_cavlc_level_tab();
0da71265
MN
1372 }
1373}
1374
0da71265 1375static void free_tables(H264Context *h){
7978debd 1376 int i;
afebe2f7 1377 H264Context *hx;
0da71265 1378 av_freep(&h->intra4x4_pred_mode);
e5017ab8
LA
1379 av_freep(&h->chroma_pred_mode_table);
1380 av_freep(&h->cbp_table);
9e528114
LA
1381 av_freep(&h->mvd_table[0]);
1382 av_freep(&h->mvd_table[1]);
5ad984c9 1383 av_freep(&h->direct_table);
0da71265
MN
1384 av_freep(&h->non_zero_count);
1385 av_freep(&h->slice_table_base);
1386 h->slice_table= NULL;
e5017ab8 1387
0da71265
MN
1388 av_freep(&h->mb2b_xy);
1389 av_freep(&h->mb2b8_xy);
9f2d1b4f 1390
6752dd5a 1391 for(i = 0; i < MAX_THREADS; i++) {
afebe2f7
1392 hx = h->thread_context[i];
1393 if(!hx) continue;
1394 av_freep(&hx->top_borders[1]);
1395 av_freep(&hx->top_borders[0]);
1396 av_freep(&hx->s.obmc_scratchpad);
d2d5e067
AS
1397 av_freep(&hx->rbsp_buffer[1]);
1398 av_freep(&hx->rbsp_buffer[0]);
eda4ea4e
MS
1399 hx->rbsp_buffer_size[0] = 0;
1400 hx->rbsp_buffer_size[1] = 0;
d2d5e067 1401 if (i) av_freep(&h->thread_context[i]);
afebe2f7 1402 }
0da71265
MN
1403}
1404
239ea04c
LM
1405static void init_dequant8_coeff_table(H264Context *h){
1406 int i,q,x;
548a1c8a 1407 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
239ea04c
LM
1408 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1409 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1410
1411 for(i=0; i<2; i++ ){
1412 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1413 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1414 break;
1415 }
1416
1417 for(q=0; q<52; q++){
d9ec210b
DP
1418 int shift = div6[q];
1419 int idx = rem6[q];
239ea04c 1420 for(x=0; x<64; x++)
548a1c8a
LM
1421 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1422 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1423 h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
LM
1424 }
1425 }
1426}
1427
1428static void init_dequant4_coeff_table(H264Context *h){
1429 int i,j,q,x;
ab2e3e2c 1430 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
239ea04c
LM
1431 for(i=0; i<6; i++ ){
1432 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1433 for(j=0; j<i; j++){
1434 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1435 h->dequant4_coeff[i] = h->dequant4_buffer[j];
1436 break;
1437 }
1438 }
1439 if(j<i)
1440 continue;
1441
1442 for(q=0; q<52; q++){
d9ec210b
DP
1443 int shift = div6[q] + 2;
1444 int idx = rem6[q];
239ea04c 1445 for(x=0; x<16; x++)
ab2e3e2c
LM
1446 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
1447 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
LM
1448 h->pps.scaling_matrix4[i][x]) << shift;
1449 }
1450 }
1451}
1452
1453static void init_dequant_tables(H264Context *h){
1454 int i,x;
1455 init_dequant4_coeff_table(h);
1456 if(h->pps.transform_8x8_mode)
1457 init_dequant8_coeff_table(h);
1458 if(h->sps.transform_bypass){
1459 for(i=0; i<6; i++)
1460 for(x=0; x<16; x++)
1461 h->dequant4_coeff[i][0][x] = 1<<6;
1462 if(h->pps.transform_8x8_mode)
1463 for(i=0; i<2; i++)
1464 for(x=0; x<64; x++)
1465 h->dequant8_coeff[i][0][x] = 1<<6;
1466 }
1467}
1468
1469
903d58f6 1470int ff_h264_alloc_tables(H264Context *h){
0da71265 1471 MpegEncContext * const s = &h->s;
7bc9090a 1472 const int big_mb_num= s->mb_stride * (s->mb_height+1);
239ea04c 1473 int x,y;
0da71265 1474
d31dbec3 1475 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t), fail)
e5017ab8 1476
d31dbec3
RP
1477 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t), fail)
1478 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
1479 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
0da71265 1480
d31dbec3
RP
1481 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
1482 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t), fail);
1483 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t), fail);
1484 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 32*big_mb_num * sizeof(uint8_t) , fail);
e5017ab8 1485
b735aeea 1486 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
5d18eaad 1487 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265 1488
d31dbec3
RP
1489 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
1490 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
0da71265
MN
1491 for(y=0; y<s->mb_height; y++){
1492 for(x=0; x<s->mb_width; x++){
7bc9090a 1493 const int mb_xy= x + y*s->mb_stride;
0da71265
MN
1494 const int b_xy = 4*x + 4*y*h->b_stride;
1495 const int b8_xy= 2*x + 2*y*h->b8_stride;
115329f1 1496
0da71265
MN
1497 h->mb2b_xy [mb_xy]= b_xy;
1498 h->mb2b8_xy[mb_xy]= b8_xy;
1499 }
1500 }
9f2d1b4f 1501
9c6221ae
GV
1502 s->obmc_scratchpad = NULL;
1503
56edbd81
LM
1504 if(!h->dequant4_coeff[0])
1505 init_dequant_tables(h);
1506
0da71265
MN
1507 return 0;
1508fail:
1509 free_tables(h);
1510 return -1;
1511}
1512
afebe2f7
1513/**
1514 * Mimic alloc_tables(), but for every context thread.
1515 */
1516static void clone_tables(H264Context *dst, H264Context *src){
1517 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
1518 dst->non_zero_count = src->non_zero_count;
1519 dst->slice_table = src->slice_table;
1520 dst->cbp_table = src->cbp_table;
1521 dst->mb2b_xy = src->mb2b_xy;
1522 dst->mb2b8_xy = src->mb2b8_xy;
1523 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
1524 dst->mvd_table[0] = src->mvd_table[0];
1525 dst->mvd_table[1] = src->mvd_table[1];
1526 dst->direct_table = src->direct_table;
1527
afebe2f7
1528 dst->s.obmc_scratchpad = NULL;
1529 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
afebe2f7
1530}
1531
1532/**
1533 * Init context
1534 * Allocate buffers which are not shared amongst multiple threads.
1535 */
1536static int context_init(H264Context *h){
d31dbec3
RP
1537 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
1538 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
afebe2f7 1539
afebe2f7
1540 return 0;
1541fail:
1542 return -1; // free_tables will clean up for us
1543}
1544
98a6fff9 1545static av_cold void common_init(H264Context *h){
0da71265 1546 MpegEncContext * const s = &h->s;
0da71265
MN
1547
1548 s->width = s->avctx->width;
1549 s->height = s->avctx->height;
1550 s->codec_id= s->avctx->codec->id;
115329f1 1551
c92a30bb 1552 ff_h264_pred_init(&h->hpc, s->codec_id);
0da71265 1553
239ea04c 1554 h->dequant_coeff_pps= -1;
9a41c2c7 1555 s->unrestricted_mv=1;
0da71265 1556 s->decode=1; //FIXME
56edbd81 1557
a5805aa9
MN
1558 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
1559
56edbd81
LM
1560 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
1561 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
MN
1562}
1563
903d58f6 1564av_cold int ff_h264_decode_init(AVCodecContext *avctx){
0da71265
MN
1565 H264Context *h= avctx->priv_data;
1566 MpegEncContext * const s = &h->s;
1567
3edcacde 1568 MPV_decode_defaults(s);
115329f1 1569
0da71265
MN
1570 s->avctx = avctx;
1571 common_init(h);
1572
1573 s->out_format = FMT_H264;
1574 s->workaround_bugs= avctx->workaround_bugs;
1575
1576 // set defaults
0da71265 1577// s->decode_mb= ff_h263_decode_mb;
9a5a05d0 1578 s->quarter_sample = 1;
47cd974a 1579 if(!avctx->has_b_frames)
0da71265 1580 s->low_delay= 1;
7a9dba3c 1581
580a7465 1582 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
0da71265 1583
c2212338 1584 decode_init_vlc();
115329f1 1585
26165f99
MR
1586 if(avctx->extradata_size > 0 && avctx->extradata &&
1587 *(char *)avctx->extradata == 1){
4770b1b4
RT
1588 h->is_avc = 1;
1589 h->got_avcC = 0;
26165f99
MR
1590 } else {
1591 h->is_avc = 0;
4770b1b4
RT
1592 }
1593
afebe2f7 1594 h->thread_context[0] = h;
18c7be65 1595 h->outputed_poc = INT_MIN;
e4b8f1fa 1596 h->prev_poc_msb= 1<<16;
9c095463 1597 ff_h264_reset_sei(h);
efd8c1f6
MN
1598 if(avctx->codec_id == CODEC_ID_H264){
1599 if(avctx->ticks_per_frame == 1){
1600 s->avctx->time_base.den *=2;
1601 }
19df37a8 1602 avctx->ticks_per_frame = 2;
efd8c1f6 1603 }
0da71265
MN
1604 return 0;
1605}
1606
903d58f6 1607int ff_h264_frame_start(H264Context *h){
0da71265
MN
1608 MpegEncContext * const s = &h->s;
1609 int i;
1610
af8aa846
MN
1611 if(MPV_frame_start(s, s->avctx) < 0)
1612 return -1;
0da71265 1613 ff_er_frame_start(s);
3a22d7fa
JD
1614 /*
1615 * MPV_frame_start uses pict_type to derive key_frame.
1616 * This is incorrect for H.264; IDR markings must be used.
1412060e 1617 * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
JD
1618 * See decode_nal_units().
1619 */
1620 s->current_picture_ptr->key_frame= 0;
c173a088 1621 s->current_picture_ptr->mmco_reset= 0;
0da71265
MN
1622
1623 assert(s->linesize && s->uvlinesize);
1624
1625 for(i=0; i<16; i++){
1626 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
6867a90b 1627 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
1628 }
1629 for(i=0; i<4; i++){
1630 h->block_offset[16+i]=
1631 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
6867a90b
LLL
1632 h->block_offset[24+16+i]=
1633 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
MN
1634 }
1635
934b0821
LM
1636 /* can't be in alloc_tables because linesize isn't known there.
1637 * FIXME: redo bipred weight to not require extra buffer? */
afebe2f7
1638 for(i = 0; i < s->avctx->thread_count; i++)
1639 if(!h->thread_context[i]->s.obmc_scratchpad)
1640 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
5d18eaad
LM
1641
1642 /* some macroblocks will be accessed before they're available */
afebe2f7 1643 if(FRAME_MBAFF || s->avctx->thread_count > 1)
b735aeea 1644 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821 1645
0da71265 1646// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2 1647
1412060e 1648 // We mark the current picture as non-reference after allocating it, so
28bb9eb2
MN
1649 // that if we break out due to an error it can be released automatically
1650 // in the next MPV_frame_start().
1651 // SVQ3 as well as most other codecs have only last/next/current and thus
1652 // get released even with set reference, besides SVQ3 and others do not
1653 // mark frames as reference later "naturally".
1654 if(s->codec_id != CODEC_ID_SVQ3)
1655 s->current_picture_ptr->reference= 0;
357282c6
MN
1656
1657 s->current_picture_ptr->field_poc[0]=
1658 s->current_picture_ptr->field_poc[1]= INT_MAX;
5118c6c7 1659 assert(s->current_picture_ptr->long_ref==0);
357282c6 1660
af8aa846 1661 return 0;
0da71265
MN
1662}
1663
93cc10fa 1664static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
53c05b1e
MN
1665 MpegEncContext * const s = &h->s;
1666 int i;
5f7f9719
MN
1667 int step = 1;
1668 int offset = 1;
1669 int uvoffset= 1;
1670 int top_idx = 1;
1671 int skiplast= 0;
115329f1 1672
53c05b1e
MN
1673 src_y -= linesize;
1674 src_cb -= uvlinesize;
1675 src_cr -= uvlinesize;
1676
5f7f9719
MN
1677 if(!simple && FRAME_MBAFF){
1678 if(s->mb_y&1){
1679 offset = MB_MBAFF ? 1 : 17;
1680 uvoffset= MB_MBAFF ? 1 : 9;
1681 if(!MB_MBAFF){
1682 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
1683 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
49fb20cb 1684 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
1685 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
1686 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
1687 }
1688 }
1689 }else{
1690 if(!MB_MBAFF){
1691 h->left_border[0]= h->top_borders[0][s->mb_x][15];
49fb20cb 1692 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
1693 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
1694 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
1695 }
1696 skiplast= 1;
1697 }
1698 offset =
1699 uvoffset=
1700 top_idx = MB_MBAFF ? 0 : 1;
1701 }
1702 step= MB_MBAFF ? 2 : 1;
1703 }
1704
3b66c4c5 1705 // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b 1706 // and the line above the bottom macroblock
5f7f9719
MN
1707 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
1708 for(i=1; i<17 - skiplast; i++){
1709 h->left_border[offset+i*step]= src_y[15+i* linesize];
53c05b1e 1710 }
115329f1 1711
5f7f9719
MN
1712 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
1713 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
53c05b1e 1714
49fb20cb 1715 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
5f7f9719
MN
1716 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
1717 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
1718 for(i=1; i<9 - skiplast; i++){
1719 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
1720 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
53c05b1e 1721 }
5f7f9719
MN
1722 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
1723 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
53c05b1e
MN
1724 }
1725}
1726
93cc10fa 1727static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
53c05b1e
MN
1728 MpegEncContext * const s = &h->s;
1729 int temp8, i;
1730 uint64_t temp64;
b69378e2
1731 int deblock_left;
1732 int deblock_top;
1733 int mb_xy;
5f7f9719
MN
1734 int step = 1;
1735 int offset = 1;
1736 int uvoffset= 1;
1737 int top_idx = 1;
1738
1739 if(!simple && FRAME_MBAFF){
1740 if(s->mb_y&1){
1741 offset = MB_MBAFF ? 1 : 17;
1742 uvoffset= MB_MBAFF ? 1 : 9;
1743 }else{
1744 offset =
1745 uvoffset=
1746 top_idx = MB_MBAFF ? 0 : 1;
1747 }
1748 step= MB_MBAFF ? 2 : 1;
1749 }
b69378e2
1750
1751 if(h->deblocking_filter == 2) {
64514ee8 1752 mb_xy = h->mb_xy;
b69378e2
1753 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
1754 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
1755 } else {
1756 deblock_left = (s->mb_x > 0);
6c805007 1757 deblock_top = (s->mb_y > !!MB_FIELD);
b69378e2 1758 }
53c05b1e
MN
1759
1760 src_y -= linesize + 1;
1761 src_cb -= uvlinesize + 1;
1762 src_cr -= uvlinesize + 1;
1763
1764#define XCHG(a,b,t,xchg)\
1765t= a;\
1766if(xchg)\
1767 a= b;\
1768b= t;
d89dc06a
LM
1769
1770 if(deblock_left){
5f7f9719
MN
1771 for(i = !deblock_top; i<16; i++){
1772 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
d89dc06a 1773 }
5f7f9719 1774 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
d89dc06a
LM
1775 }
1776
1777 if(deblock_top){
5f7f9719
MN
1778 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
1779 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
cad4368a 1780 if(s->mb_x+1 < s->mb_width){
5f7f9719 1781 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
43efd19a 1782 }
53c05b1e 1783 }
53c05b1e 1784
49fb20cb 1785 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
d89dc06a 1786 if(deblock_left){
5f7f9719
MN
1787 for(i = !deblock_top; i<8; i++){
1788 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
1789 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
d89dc06a 1790 }
5f7f9719
MN
1791 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
1792 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
d89dc06a
LM
1793 }
1794 if(deblock_top){
5f7f9719
MN
1795 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
1796 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
53c05b1e 1797 }
53c05b1e
MN
1798 }
1799}
1800
5a6a6cc7 1801static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
0da71265
MN
1802 MpegEncContext * const s = &h->s;
1803 const int mb_x= s->mb_x;
1804 const int mb_y= s->mb_y;
64514ee8 1805 const int mb_xy= h->mb_xy;
0da71265
MN
1806 const int mb_type= s->current_picture.mb_type[mb_xy];
1807 uint8_t *dest_y, *dest_cb, *dest_cr;
1808 int linesize, uvlinesize /*dct_offset*/;
1809 int i;
6867a90b 1810 int *block_offset = &h->block_offset[0];
41e4055b 1811 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed 1812 /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb 1813 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca 1814 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
ef9d1d15 1815 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265 1816
6120a343
MN
1817 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
1818 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
1819 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
0da71265 1820
a957c27b
LM
1821 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1822 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
1823
bd91fee3 1824 if (!simple && MB_FIELD) {
5d18eaad
LM
1825 linesize = h->mb_linesize = s->linesize * 2;
1826 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
6867a90b 1827 block_offset = &h->block_offset[24];
1412060e 1828 if(mb_y&1){ //FIXME move out of this function?
0da71265 1829 dest_y -= s->linesize*15;
6867a90b
LLL
1830 dest_cb-= s->uvlinesize*7;
1831 dest_cr-= s->uvlinesize*7;
0da71265 1832 }
5d18eaad
LM
1833 if(FRAME_MBAFF) {
1834 int list;
3425501d 1835 for(list=0; list<h->list_count; list++){
5d18eaad
LM
1836 if(!USES_LIST(mb_type, list))
1837 continue;
1838 if(IS_16X16(mb_type)){
1839 int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c 1840 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
LM
1841 }else{
1842 for(i=0; i<16; i+=4){
5d18eaad
LM
1843 int ref = h->ref_cache[list][scan8[i]];
1844 if(ref >= 0)
1710856c 1845 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
LM
1846 }
1847 }
1848 }
1849 }
0da71265 1850 } else {
5d18eaad
LM
1851 linesize = h->mb_linesize = s->linesize;
1852 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
MN
1853// dct_offset = s->linesize * 16;
1854 }
115329f1 1855
bd91fee3 1856 if (!simple && IS_INTRA_PCM(mb_type)) {
c1708e8d
MN
1857 for (i=0; i<16; i++) {
1858 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
6fbcaaa0 1859 }
c1708e8d
MN
1860 for (i=0; i<8; i++) {
1861 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
1862 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
6fbcaaa0 1863 }
e7e09b49
LLL
1864 } else {
1865 if(IS_INTRA(mb_type)){
5f7f9719 1866 if(h->deblocking_filter)
93cc10fa 1867 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
53c05b1e 1868
49fb20cb 1869 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
KS
1870 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
1871 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49 1872 }
0da71265 1873
e7e09b49 1874 if(IS_INTRA4x4(mb_type)){
bd91fee3 1875 if(simple || !s->encoding){
43efd19a 1876 if(IS_8x8DCT(mb_type)){
1eb96035
MN
1877 if(transform_bypass){
1878 idct_dc_add =
1879 idct_add = s->dsp.add_pixels8;
dae006d7 1880 }else{
1eb96035
MN
1881 idct_dc_add = s->dsp.h264_idct8_dc_add;
1882 idct_add = s->dsp.h264_idct8_add;
1883 }
43efd19a
LM
1884 for(i=0; i<16; i+=4){
1885 uint8_t * const ptr= dest_y + block_offset[i];
1886 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
41e4055b
MN
1887 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1888 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
1889 }else{
ac0623b2
MN
1890 const int nnz = h->non_zero_count_cache[ scan8[i] ];
1891 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1892 (h->topright_samples_available<<i)&0x4000, linesize);
1893 if(nnz){
1894 if(nnz == 1 && h->mb[i*16])
1895 idct_dc_add(ptr, h->mb + i*16, linesize);
1896 else
1897 idct_add (ptr, h->mb + i*16, linesize);
1898 }
41e4055b 1899 }
43efd19a 1900 }
1eb96035
MN
1901 }else{
1902 if(transform_bypass){
1903 idct_dc_add =
1904 idct_add = s->dsp.add_pixels4;
1905 }else{
1906 idct_dc_add = s->dsp.h264_idct_dc_add;
1907 idct_add = s->dsp.h264_idct_add;
1908 }
aebb5d6d
MN
1909 for(i=0; i<16; i++){
1910 uint8_t * const ptr= dest_y + block_offset[i];
1911 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
e7e09b49 1912
aebb5d6d
MN
1913 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1914 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
1915 }else{
1916 uint8_t *topright;
1917 int nnz, tr;
1918 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
1919 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1920 assert(mb_y || linesize <= block_offset[i]);
1921 if(!topright_avail){
1922 tr= ptr[3 - linesize]*0x01010101;
1923 topright= (uint8_t*) &tr;
1924 }else
1925 topright= ptr + 4 - linesize;
ac0623b2 1926 }else
aebb5d6d
MN
1927 topright= NULL;
1928
1929 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
1930 nnz = h->non_zero_count_cache[ scan8[i] ];
1931 if(nnz){
1932 if(is_h264){
1933 if(nnz == 1 && h->mb[i*16])
1934 idct_dc_add(ptr, h->mb + i*16, linesize);
1935 else
1936 idct_add (ptr, h->mb + i*16, linesize);
1937 }else
1938 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
1939 }
ac0623b2 1940 }
41e4055b 1941 }
8b82a956 1942 }
0da71265 1943 }
e7e09b49 1944 }else{
c92a30bb 1945 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
bd91fee3 1946 if(is_h264){
36940eca 1947 if(!transform_bypass)
93f0c0a4 1948 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
36940eca 1949 }else
e7e09b49 1950 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
0da71265 1951 }
5f7f9719 1952 if(h->deblocking_filter)
93cc10fa 1953 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
bd91fee3 1954 }else if(is_h264){
e7e09b49 1955 hl_motion(h, dest_y, dest_cb, dest_cr,
2833fc46
LM
1956 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
1957 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
e7e09b49 1958 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
0da71265 1959 }
e7e09b49
LLL
1960
1961
1962 if(!IS_INTRA4x4(mb_type)){
bd91fee3 1963 if(is_h264){
ef9d1d15 1964 if(IS_INTRA16x16(mb_type)){
2fd1f0e0
MN
1965 if(transform_bypass){
1966 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
0a8ca22f
MN
1967 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
1968 }else{
1969 for(i=0; i<16; i++){
1970 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1eb96035 1971 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1972 }
2fd1f0e0
MN
1973 }
1974 }else{
1975 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
41e4055b 1976 }
49c084a7 1977 }else if(h->cbp&15){
2fd1f0e0 1978 if(transform_bypass){
0a8ca22f 1979 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1eb96035 1980 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
0a8ca22f 1981 for(i=0; i<16; i+=di){
62bc966f 1982 if(h->non_zero_count_cache[ scan8[i] ]){
ef9d1d15 1983 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
0a8ca22f 1984 }
ef9d1d15 1985 }
2fd1f0e0
MN
1986 }else{
1987 if(IS_8x8DCT(mb_type)){
1988 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1989 }else{
1990 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1991 }
1992 }
4704097a 1993 }
e7e09b49
LLL
1994 }else{
1995 for(i=0; i<16; i++){
1996 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
6867a90b 1997 uint8_t * const ptr= dest_y + block_offset[i];
e7e09b49
LLL
1998 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
1999 }
4704097a 2000 }
0da71265
MN
2001 }
2002 }
0da71265 2003
49fb20cb 2004 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
LM
2005 uint8_t *dest[2] = {dest_cb, dest_cr};
2006 if(transform_bypass){
96465b90
MN
2007 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2008 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2009 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2010 }else{
c25ac15a 2011 idct_add = s->dsp.add_pixels4;
96465b90
MN
2012 for(i=16; i<16+8; i++){
2013 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2014 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2015 }
2016 }
ef9d1d15 2017 }else{
4691a77d
2018 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2019 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
aebb5d6d 2020 if(is_h264){
c25ac15a
MN
2021 idct_add = s->dsp.h264_idct_add;
2022 idct_dc_add = s->dsp.h264_idct_dc_add;
ac0623b2
MN
2023 for(i=16; i<16+8; i++){
2024 if(h->non_zero_count_cache[ scan8[i] ])
2025 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2026 else if(h->mb[i*16])
2027 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2028 }
aebb5d6d
MN
2029 }else{
2030 for(i=16; i<16+8; i++){
2031 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2032 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
889fce8e 2033 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2);
aebb5d6d 2034 }
e7e09b49 2035 }
4704097a 2036 }
0da71265
MN
2037 }
2038 }
2039 }
c212fb0c
MN
2040 if(h->cbp || IS_INTRA(mb_type))
2041 s->dsp.clear_blocks(h->mb);
2042
53c05b1e 2043 if(h->deblocking_filter) {
5f7f9719
MN
2044 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2045 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2046 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2047 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
bd91fee3 2048 if (!simple && FRAME_MBAFF) {
082cf971 2049 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2050 } else {
082cf971 2051 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
6ba71fc4 2052 }
53c05b1e 2053 }
0da71265
MN
2054}
2055
0da71265 2056/**
bd91fee3
AS
2057 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2058 */
2059static void hl_decode_mb_simple(H264Context *h){
2060 hl_decode_mb_internal(h, 1);
2061}
2062
2063/**
2064 * Process a macroblock; this handles edge cases, such as interlacing.
2065 */
2066static void av_noinline hl_decode_mb_complex(H264Context *h){
2067 hl_decode_mb_internal(h, 0);
2068}
2069
903d58f6 2070void ff_h264_hl_decode_mb(H264Context *h){
bd91fee3 2071 MpegEncContext * const s = &h->s;
64514ee8 2072 const int mb_xy= h->mb_xy;
bd91fee3 2073 const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb 2074 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3 2075
bd91fee3
AS
2076 if (is_complex)
2077 hl_decode_mb_complex(h);
2078 else hl_decode_mb_simple(h);
2079}
2080
0da71265
MN
2081static int pred_weight_table(H264Context *h){
2082 MpegEncContext * const s = &h->s;
2083 int list, i;
9f2d1b4f 2084 int luma_def, chroma_def;
115329f1 2085
9f2d1b4f
LM
2086 h->use_weight= 0;
2087 h->use_weight_chroma= 0;
0da71265
MN
2088 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2089 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
LM
2090 luma_def = 1<<h->luma_log2_weight_denom;
2091 chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
MN
2092
2093 for(list=0; list<2; list++){
cb99c652
GB
2094 h->luma_weight_flag[list] = 0;
2095 h->chroma_weight_flag[list] = 0;
0da71265
MN
2096 for(i=0; i<h->ref_count[list]; i++){
2097 int luma_weight_flag, chroma_weight_flag;
115329f1 2098
0da71265
MN
2099 luma_weight_flag= get_bits1(&s->gb);
2100 if(luma_weight_flag){
2101 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2102 h->luma_offset[list][i]= get_se_golomb(&s->gb);
9f2d1b4f 2103 if( h->luma_weight[list][i] != luma_def
cb99c652 2104 || h->luma_offset[list][i] != 0) {
9f2d1b4f 2105 h->use_weight= 1;
cb99c652
GB
2106 h->luma_weight_flag[list]= 1;
2107 }
9f2d1b4f
LM
2108 }else{
2109 h->luma_weight[list][i]= luma_def;
2110 h->luma_offset[list][i]= 0;
0da71265
MN
2111 }
2112
0af6967e 2113 if(CHROMA){
fef744d4
MN
2114 chroma_weight_flag= get_bits1(&s->gb);
2115 if(chroma_weight_flag){
2116 int j;
2117 for(j=0; j<2; j++){
2118 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2119 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2120 if( h->chroma_weight[list][i][j] != chroma_def
cb99c652 2121 || h->chroma_offset[list][i][j] != 0) {
fef744d4 2122 h->use_weight_chroma= 1;
cb99c652
GB
2123 h->chroma_weight_flag[list]= 1;
2124 }
fef744d4
MN
2125 }
2126 }else{
2127 int j;
2128 for(j=0; j<2; j++){
2129 h->chroma_weight[list][i][j]= chroma_def;
2130 h->chroma_offset[list][i][j]= 0;
2131 }
0da71265
MN
2132 }
2133 }
2134 }
9f5c1037 2135 if(h->slice_type_nos != FF_B_TYPE) break;
0da71265 2136 }
9f2d1b4f 2137 h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
MN
2138 return 0;
2139}
2140
9f2d1b4f
LM
2141static void implicit_weight_table(H264Context *h){
2142 MpegEncContext * const s = &h->s;
cb99c652 2143 int ref0, ref1, i;
9f2d1b4f
LM
2144 int cur_poc = s->current_picture_ptr->poc;
2145
ce09f927
GB
2146 for (i = 0; i < 2; i++) {
2147 h->luma_weight_flag[i] = 0;
2148 h->chroma_weight_flag[i] = 0;
2149 }
2150
9f2d1b4f
LM
2151 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2152 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2153 h->use_weight= 0;
2154 h->use_weight_chroma= 0;
2155 return;
2156 }
2157
2158 h->use_weight= 2;
2159 h->use_weight_chroma= 2;
2160 h->luma_log2_weight_denom= 5;
2161 h->chroma_log2_weight_denom= 5;
2162
9f2d1b4f
LM
2163 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
2164 int poc0 = h->ref_list[0][ref0].poc;
2165 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
738386a5 2166 int poc1 = h->ref_list[1][ref1].poc;
f66e4f5f 2167 int td = av_clip(poc1 - poc0, -128, 127);
9f2d1b4f 2168 if(td){
f66e4f5f 2169 int tb = av_clip(cur_poc - poc0, -128, 127);
c26abfa5 2170 int tx = (16384 + (FFABS(td) >> 1)) / td;
f66e4f5f 2171 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
9f2d1b4f
LM
2172 if(dist_scale_factor < -64 || dist_scale_factor > 128)
2173 h->implicit_weight[ref0][ref1] = 32;
2174 else
2175 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
2176 }else
2177 h->implicit_weight[ref0][ref1] = 32;
2178 }
2179 }
2180}
2181
8fd57a66 2182/**
5175b937 2183 * instantaneous decoder refresh.
0da71265
MN
2184 */
2185static void idr(H264Context *h){
ea6f00c4 2186 ff_h264_remove_all_refs(h);
a149c1a5 2187 h->prev_frame_num= 0;
80f8e035
MN
2188 h->prev_frame_num_offset= 0;
2189 h->prev_poc_msb=
2190 h->prev_poc_lsb= 0;
0da71265
MN
2191}
2192
7c33ad19
LM
2193/* forget old pics after a seek */
2194static void flush_dpb(AVCodecContext *avctx){
2195 H264Context *h= avctx->priv_data;
2196 int i;
64b9d48f 2197 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
285b570f
LM
2198 if(h->delayed_pic[i])
2199 h->delayed_pic[i]->reference= 0;
7c33ad19 2200 h->delayed_pic[i]= NULL;
285b570f 2201 }
df8a7dff 2202 h->outputed_poc= INT_MIN;
b19d493f 2203 h->prev_interlaced_frame = 1;
7c33ad19 2204 idr(h);
ca159196
MR
2205 if(h->s.current_picture_ptr)
2206 h->s.current_picture_ptr->reference= 0;
12d96de3 2207 h->s.first_field= 0;
9c095463 2208 ff_h264_reset_sei(h);
e240f898 2209 ff_mpeg_flush(avctx);
7c33ad19
LM
2210}
2211
0da71265
MN
2212static int init_poc(H264Context *h){
2213 MpegEncContext * const s = &h->s;
2214 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
2215 int field_poc[2];
357282c6 2216 Picture *cur = s->current_picture_ptr;
0da71265 2217
b78a6baa 2218 h->frame_num_offset= h->prev_frame_num_offset;
5710b371 2219 if(h->frame_num < h->prev_frame_num)
b78a6baa 2220 h->frame_num_offset += max_frame_num;
0da71265
MN
2221
2222 if(h->sps.poc_type==0){
2223 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
2224
2225 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
2226 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
2227 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
2228 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
2229 else
2230 h->poc_msb = h->prev_poc_msb;
2231//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
115329f1 2232 field_poc[0] =
0da71265 2233 field_poc[1] = h->poc_msb + h->poc_lsb;
115329f1 2234 if(s->picture_structure == PICT_FRAME)
0da71265
MN
2235 field_poc[1] += h->delta_poc_bottom;
2236 }else if(h->sps.poc_type==1){
2237 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
2238 int i;
2239
2240 if(h->sps.poc_cycle_length != 0)
2241 abs_frame_num = h->frame_num_offset + h->frame_num;
2242 else
2243 abs_frame_num = 0;
2244
2245 if(h->nal_ref_idc==0 && abs_frame_num > 0)
2246 abs_frame_num--;
115329f1 2247
0da71265
MN
2248 expected_delta_per_poc_cycle = 0;
2249 for(i=0; i < h->sps.poc_cycle_length; i++)
2250 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
2251
2252 if(abs_frame_num > 0){
2253 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
2254 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
2255
2256 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
2257 for(i = 0; i <= frame_num_in_poc_cycle; i++)
2258 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
2259 } else
2260 expectedpoc = 0;
2261
115329f1 2262 if(h->nal_ref_idc == 0)
0da71265 2263 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
115329f1 2264
0da71265
MN
2265 field_poc[0] = expectedpoc + h->delta_poc[0];
2266 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
2267
2268 if(s->picture_structure == PICT_FRAME)
2269 field_poc[1] += h->delta_poc[1];
2270 }else{
b78a6baa 2271 int poc= 2*(h->frame_num_offset + h->frame_num);
5710b371 2272
b78a6baa
MN
2273 if(!h->nal_ref_idc)
2274 poc--;
5710b371 2275
0da71265
MN
2276 field_poc[0]= poc;
2277 field_poc[1]= poc;
2278 }
115329f1 2279
357282c6 2280 if(s->picture_structure != PICT_BOTTOM_FIELD)
0da71265 2281 s->current_picture_ptr->field_poc[0]= field_poc[0];
357282c6 2282 if(s->picture_structure != PICT_TOP_FIELD)
0da71265 2283 s->current_picture_ptr->field_poc[1]= field_poc[1];
357282c6 2284 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
0da71265
MN
2285
2286 return 0;
2287}
2288
b41c1db3
2289
2290/**
2291 * initialize scan tables
2292 */
2293static void init_scan_tables(H264Context *h){
2294 MpegEncContext * const s = &h->s;
2295 int i;
2296 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
2297 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
2298 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
2299 }else{
2300 for(i=0; i<16; i++){
2301#define T(x) (x>>2) | ((x<<2) & 0xF)
2302 h->zigzag_scan[i] = T(zigzag_scan[i]);
2303 h-> field_scan[i] = T( field_scan[i]);
2304#undef T
2305 }
2306 }
2307 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
45beb850 2308 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
b41c1db3
2309 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
2310 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
2311 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
2312 }else{
2313 for(i=0; i<64; i++){
2314#define T(x) (x>>3) | ((x&7)<<3)
45beb850 2315 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
b41c1db3
2316 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
2317 h->field_scan8x8[i] = T(field_scan8x8[i]);
2318 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
2319#undef T
2320 }
2321 }
2322 if(h->sps.transform_bypass){ //FIXME same ugly
2323 h->zigzag_scan_q0 = zigzag_scan;
45beb850 2324 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
b41c1db3
2325 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
2326 h->field_scan_q0 = field_scan;
2327 h->field_scan8x8_q0 = field_scan8x8;
2328 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
2329 }else{
2330 h->zigzag_scan_q0 = h->zigzag_scan;
2331 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
2332 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
2333 h->field_scan_q0 = h->field_scan;
2334 h->field_scan8x8_q0 = h->field_scan8x8;
2335 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
2336 }
2337}
afebe2f7 2338
256299d3
MN
2339static void field_end(H264Context *h){
2340 MpegEncContext * const s = &h->s;
2341 AVCodecContext * const avctx= s->avctx;
2342 s->mb_y= 0;
2343
2344 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
2345 s->current_picture_ptr->pict_type= s->pict_type;
2346
2347 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2348 ff_vdpau_h264_set_reference_frames(s);
2349
2350 if(!s->dropable) {
ea6f00c4 2351 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
256299d3
MN
2352 h->prev_poc_msb= h->poc_msb;
2353 h->prev_poc_lsb= h->poc_lsb;
2354 }
2355 h->prev_frame_num_offset= h->frame_num_offset;
2356 h->prev_frame_num= h->frame_num;
2357
2358 if (avctx->hwaccel) {
2359 if (avctx->hwaccel->end_frame(avctx) < 0)
2360 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
2361 }
2362
2363 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2364 ff_vdpau_h264_picture_complete(s);
2365
2366 /*
2367 * FIXME: Error handling code does not seem to support interlaced
2368 * when slices span multiple rows
2369 * The ff_er_add_slice calls don't work right for bottom
2370 * fields; they cause massive erroneous error concealing
2371 * Error marking covers both fields (top and bottom).
2372 * This causes a mismatched s->error_count
2373 * and a bad error table. Further, the error count goes to
2374 * INT_MAX when called for bottom field, because mb_y is
2375 * past end by one (callers fault) and resync_mb_y != 0
2376 * causes problems for the first MB line, too.
2377 */
2378 if (!FIELD_PICTURE)
2379 ff_er_frame_end(s);
2380
2381 MPV_frame_end(s);
d225a1e2
MN
2382
2383 h->current_slice=0;
256299d3
MN
2384}
2385
afebe2f7
2386/**
2387 * Replicates H264 "master" context to thread contexts.
2388 */
2389static void clone_slice(H264Context *dst, H264Context *src)
2390{
2391 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
2392 dst->s.current_picture_ptr = src->s.current_picture_ptr;
2393 dst->s.current_picture = src->s.current_picture;
2394 dst->s.linesize = src->s.linesize;
2395 dst->s.uvlinesize = src->s.uvlinesize;
12d96de3 2396 dst->s.first_field = src->s.first_field;
afebe2f7
2397
2398 dst->prev_poc_msb = src->prev_poc_msb;
2399 dst->prev_poc_lsb = src->prev_poc_lsb;
2400 dst->prev_frame_num_offset = src->prev_frame_num_offset;
2401 dst->prev_frame_num = src->prev_frame_num;
2402 dst->short_ref_count = src->short_ref_count;
2403
2404 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
2405 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
2406 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
2407 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
50c21814
2408
2409 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
2410 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
afebe2f7
2411}
2412
0da71265
MN
2413/**
2414 * decodes a slice header.
9c852bcf 2415 * This will also call MPV_common_init() and frame_start() as needed.
afebe2f7
2416 *
2417 * @param h h264context
2418 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
2419 *
d9526386 2420 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
0da71265 2421 */
afebe2f7 2422static int decode_slice_header(H264Context *h, H264Context *h0){
0da71265 2423 MpegEncContext * const s = &h->s;
12d96de3 2424 MpegEncContext * const s0 = &h0->s;
88e7a4d1 2425 unsigned int first_mb_in_slice;
ac658be5 2426 unsigned int pps_id;
0da71265 2427 int num_ref_idx_active_override_flag;
41f5c62f 2428 unsigned int slice_type, tmp, i, j;
0bf79634 2429 int default_ref_list_done = 0;
12d96de3 2430 int last_pic_structure;
0da71265 2431
2f944356 2432 s->dropable= h->nal_ref_idc == 0;
0da71265 2433
cf653d08
JD
2434 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
2435 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
2436 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
2437 }else{
2438 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
2439 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
2440 }
2441
0da71265
MN
2442 first_mb_in_slice= get_ue_golomb(&s->gb);
2443
d225a1e2
MN
2444 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
2445 if(h0->current_slice && FIELD_PICTURE){
2446 field_end(h);
2447 }
2448
afebe2f7 2449 h0->current_slice = 0;
12d96de3 2450 if (!s0->first_field)
f6e3c460 2451 s->current_picture_ptr= NULL;
66a4b2c1
MN
2452 }
2453
9963b332 2454 slice_type= get_ue_golomb_31(&s->gb);
0bf79634 2455 if(slice_type > 9){
9b879566 2456 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
5175b937 2457 return -1;
0da71265 2458 }
0bf79634
LLL
2459 if(slice_type > 4){
2460 slice_type -= 5;
0da71265
MN
2461 h->slice_type_fixed=1;
2462 }else
2463 h->slice_type_fixed=0;
115329f1 2464
ee2a957f 2465 slice_type= golomb_to_pict_type[ slice_type ];
9701840b 2466 if (slice_type == FF_I_TYPE
afebe2f7 2467 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
0bf79634
LLL
2468 default_ref_list_done = 1;
2469 }
2470 h->slice_type= slice_type;
e3e6f18f 2471 h->slice_type_nos= slice_type & 3;
0bf79634 2472
1412060e 2473 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
9701840b 2474 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
33e00731
JD
2475 av_log(h->s.avctx, AV_LOG_ERROR,
2476 "B picture before any references, skipping\n");
2477 return -1;
2478 }
115329f1 2479
0da71265 2480 pps_id= get_ue_golomb(&s->gb);
ac658be5 2481 if(pps_id>=MAX_PPS_COUNT){
9b879566 2482 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
0da71265
MN
2483 return -1;
2484 }
afebe2f7 2485 if(!h0->pps_buffers[pps_id]) {
a0f80050 2486 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
8b92b792
MN
2487 return -1;
2488 }
afebe2f7 2489 h->pps= *h0->pps_buffers[pps_id];
8b92b792 2490
afebe2f7 2491 if(!h0->sps_buffers[h->pps.sps_id]) {
a0f80050 2492 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
8b92b792
MN
2493 return -1;
2494 }
afebe2f7 2495 h->sps = *h0->sps_buffers[h->pps.sps_id];
239ea04c 2496
50c21814 2497 if(h == h0 && h->dequant_coeff_pps != pps_id){
50eaa857 2498 h->dequant_coeff_pps = pps_id;
239ea04c
LM
2499 init_dequant_tables(h);
2500 }
115329f1 2501
0da71265 2502 s->mb_width= h->sps.mb_width;
6867a90b 2503 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
115329f1 2504
bf4665ee
DM
2505 h->b_stride= s->mb_width*4;
2506 h->b8_stride= s->mb_width*2;
0da71265 2507
faf3dfb9 2508 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
0da71265 2509 if(h->sps.frame_mbs_only_flag)
faf3dfb9 2510 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
0da71265 2511 else
faf3dfb9 2512 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
115329f1
DB
2513
2514 if (s->context_initialized
5ff85f1d 2515 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
afebe2f7
2516 if(h != h0)
2517 return -1; // width / height changed during parallelized decoding
0da71265 2518 free_tables(h);
ff7f75e1 2519 flush_dpb(s->avctx);
0da71265
MN
2520 MPV_common_end(s);
2521 }
2522 if (!s->context_initialized) {
afebe2f7
2523 if(h != h0)
2524 return -1; // we cant (re-)initialize context during parallel decoding
f3bdc3da
RD
2525
2526 avcodec_set_dimensions(s->avctx, s->width, s->height);
2527 s->avctx->sample_aspect_ratio= h->sps.sar;
2528 if(!s->avctx->sample_aspect_ratio.den)
2529 s->avctx->sample_aspect_ratio.den = 1;
2530
c4dffe7e
DC
2531 if(h->sps.video_signal_type_present_flag){
2532 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
2533 if(h->sps.colour_description_present_flag){
2534 s->avctx->color_primaries = h->sps.color_primaries;
2535 s->avctx->color_trc = h->sps.color_trc;
2536 s->avctx->colorspace = h->sps.colorspace;
2537 }
2538 }
2539
f3bdc3da
RD
2540 if(h->sps.timing_info_present_flag){
2541 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
2542 if(h->x264_build > 0 && h->x264_build < 44)
2543 s->avctx->time_base.den *= 2;
2544 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
2545 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
2546 }
2547 s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts);
2548 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
2549
0da71265
MN
2550 if (MPV_common_init(s) < 0)
2551 return -1;
12d96de3 2552 s->first_field = 0;
b19d493f 2553 h->prev_interlaced_frame = 1;
115329f1 2554
b41c1db3 2555 init_scan_tables(h);
903d58f6 2556 ff_h264_alloc_tables(h);
0da71265 2557
afebe2f7
2558 for(i = 1; i < s->avctx->thread_count; i++) {
2559 H264Context *c;
2560 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
79db7ac6 2561 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
afebe2f7
2562 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
2563 c->sps = h->sps;
2564 c->pps = h->pps;
2565 init_scan_tables(c);
2566 clone_tables(c, h);
2567 }
2568
2569 for(i = 0; i < s->avctx->thread_count; i++)
2570 if(context_init(h->thread_context[i]) < 0)
2571 return -1;
0da71265
MN
2572 }
2573
0da71265
MN
2574 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
2575
5d18eaad 2576 h->mb_mbaff = 0;
6ba71fc4 2577 h->mb_aff_frame = 0;
12d96de3 2578 last_pic_structure = s0->picture_structure;
0da71265
MN
2579 if(h->sps.frame_mbs_only_flag){
2580 s->picture_structure= PICT_FRAME;
2581 }else{
6ba71fc4 2582 if(get_bits1(&s->gb)) { //field_pic_flag
0da71265 2583 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
6ba71fc4 2584 } else {
0da71265 2585 s->picture_structure= PICT_FRAME;
6ba71fc4 2586 h->mb_aff_frame = h->sps.mb_aff;
6867a90b 2587 }
0da71265 2588 }
44e9dcf1 2589 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
2ddcf84b
JD
2590
2591 if(h0->current_slice == 0){
26b86e47
MN
2592 while(h->frame_num != h->prev_frame_num &&
2593 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
2594 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
903d58f6 2595 if (ff_h264_frame_start(h) < 0)
66e6038c 2596 return -1;
26b86e47
MN
2597 h->prev_frame_num++;
2598 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
2599 s->current_picture_ptr->frame_num= h->prev_frame_num;
ea6f00c4 2600 ff_h264_execute_ref_pic_marking(h, NULL, 0);
26b86e47
MN
2601 }
2602
12d96de3
JD
2603 /* See if we have a decoded first field looking for a pair... */
2604 if (s0->first_field) {
2605 assert(s0->current_picture_ptr);
2606 assert(s0->current_picture_ptr->data[0]);
2607 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
2608
2609 /* figure out if we have a complementary field pair */
2610 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
2611 /*
2612 * Previous field is unmatched. Don't display it, but let it
2613 * remain for reference if marked as such.
2614 */
2615 s0->current_picture_ptr = NULL;
2616 s0->first_field = FIELD_PICTURE;
2617
2618 } else {
2619 if (h->nal_ref_idc &&
2620 s0->current_picture_ptr->reference &&
2621 s0->current_picture_ptr->frame_num != h->frame_num) {
2622 /*
2623 * This and previous field were reference, but had
2624 * different frame_nums. Consider this field first in
2625 * pair. Throw away previous field except for reference
2626 * purposes.
2627 */
2628 s0->first_field = 1;
2629 s0->current_picture_ptr = NULL;
2630
2631 } else {
2632 /* Second field in complementary pair */
2633 s0->first_field = 0;
2634 }
2635 }
2636
2637 } else {
2638 /* Frame or first field in a potentially complementary pair */
2639 assert(!s0->current_picture_ptr);
2640 s0->first_field = FIELD_PICTURE;
2641 }
2642
903d58f6 2643 if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) {
12d96de3 2644 s0->first_field = 0;
2ddcf84b 2645 return -1;
12d96de3 2646 }
2ddcf84b
JD
2647 }
2648 if(h != h0)
2649 clone_slice(h, h0);
2650
2651 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
2652
88e7a4d1 2653 assert(s->mb_num == s->mb_width * s->mb_height);
f3e53d9f 2654 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
88e7a4d1
MN
2655 first_mb_in_slice >= s->mb_num){
2656 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
6b53b87e
MN
2657 return -1;
2658 }
88e7a4d1 2659 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
f3e53d9f
JD
2660 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
2661 if (s->picture_structure == PICT_BOTTOM_FIELD)
2662 s->resync_mb_y = s->mb_y = s->mb_y + 1;
88e7a4d1 2663 assert(s->mb_y < s->mb_height);
115329f1 2664
0da71265
MN
2665 if(s->picture_structure==PICT_FRAME){
2666 h->curr_pic_num= h->frame_num;
2667 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
2668 }else{
f57e2af6 2669 h->curr_pic_num= 2*h->frame_num + 1;
0da71265
MN
2670 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
2671 }
115329f1 2672
0da71265 2673 if(h->nal_unit_type == NAL_IDR_SLICE){
1df1df0b 2674 get_ue_golomb(&s->gb); /* idr_pic_id */
0da71265 2675 }
115329f1 2676
0da71265
MN
2677 if(h->sps.poc_type==0){
2678 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
115329f1 2679
0da71265
MN
2680 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
2681 h->delta_poc_bottom= get_se_golomb(&s->gb);
2682 }
2683 }
115329f1 2684
0da71265
MN
2685 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
2686 h->delta_poc[0]= get_se_golomb(&s->gb);
115329f1 2687
0da71265
MN
2688 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
2689 h->delta_poc[1]= get_se_golomb(&s->gb);
2690 }
115329f1 2691
0da71265 2692 init_poc(h);
115329f1 2693
0da71265
MN
2694 if(h->pps.redundant_pic_cnt_present){
2695 h->redundant_pic_count= get_ue_golomb(&s->gb);
2696 }
2697
1412060e 2698 //set defaults, might be overridden a few lines later
0da71265
MN
2699 h->ref_count[0]= h->pps.ref_count[0];
2700 h->ref_count[1]= h->pps.ref_count[1];
2701
e3e6f18f 2702 if(h->slice_type_nos != FF_I_TYPE){
9f5c1037 2703 if(h->slice_type_nos == FF_B_TYPE){
0da71265
MN
2704 h->direct_spatial_mv_pred= get_bits1(&s->gb);
2705 }
2706 num_ref_idx_active_override_flag= get_bits1(&s->gb);
115329f1 2707
0da71265
MN
2708 if(num_ref_idx_active_override_flag){
2709 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
9f5c1037 2710 if(h->slice_type_nos==FF_B_TYPE)
0da71265
MN
2711 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
2712
187696fa 2713 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
9b879566 2714 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
88e7a4d1 2715 h->ref_count[0]= h->ref_count[1]= 1;
0da71265
MN
2716 return -1;
2717 }
2718 }
9f5c1037 2719 if(h->slice_type_nos == FF_B_TYPE)
187696fa
MN
2720 h->list_count= 2;
2721 else
2722 h->list_count= 1;
2723 }else
2724 h->list_count= 0;
0da71265 2725
0bf79634 2726 if(!default_ref_list_done){
ea6f00c4 2727 ff_h264_fill_default_ref_list(h);
0da71265
MN
2728 }
2729
ea6f00c4 2730 if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0)
806bb93f 2731 return -1;
0da71265 2732
07dff5c7
MN
2733 if(h->slice_type_nos!=FF_I_TYPE){
2734 s->last_picture_ptr= &h->ref_list[0][0];
8d2fc163 2735 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
07dff5c7
MN
2736 }
2737 if(h->slice_type_nos==FF_B_TYPE){
2738 s->next_picture_ptr= &h->ref_list[1][0];
8d2fc163 2739 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
07dff5c7
MN
2740 }
2741
932f396f 2742 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
9f5c1037 2743 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
0da71265 2744 pred_weight_table(h);
9f5c1037 2745 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
9f2d1b4f 2746 implicit_weight_table(h);
cb99c652 2747 else {
9f2d1b4f 2748 h->use_weight = 0;
cb99c652
GB
2749 for (i = 0; i < 2; i++) {
2750 h->luma_weight_flag[i] = 0;
2751 h->chroma_weight_flag[i] = 0;
2752 }
2753 }
115329f1 2754
2ddcf84b 2755 if(h->nal_ref_idc)
ea6f00c4 2756 ff_h264_decode_ref_pic_marking(h0, &s->gb);
0da71265 2757
5d18eaad 2758 if(FRAME_MBAFF)
ea6f00c4 2759 ff_h264_fill_mbaff_ref_list(h);
5d18eaad 2760
8f56e219 2761 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
943f69a6
MN
2762 ff_h264_direct_dist_scale_factor(h);
2763 ff_h264_direct_ref_list_init(h);
8f56e219 2764
e3e6f18f 2765 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
9963b332 2766 tmp = get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2767 if(tmp > 2){
2768 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
2769 return -1;
2770 }
2771 h->cabac_init_idc= tmp;
2772 }
e5017ab8
LA
2773
2774 h->last_qscale_diff = 0;
88e7a4d1
MN
2775 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
2776 if(tmp>51){
2777 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3ebc7e04
MN
2778 return -1;
2779 }
88e7a4d1 2780 s->qscale= tmp;
4691a77d
2781 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2782 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
0da71265 2783 //FIXME qscale / qp ... stuff
9701840b 2784 if(h->slice_type == FF_SP_TYPE){
1df1df0b 2785 get_bits1(&s->gb); /* sp_for_switch_flag */
0da71265 2786 }
9701840b 2787 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
1df1df0b 2788 get_se_golomb(&s->gb); /* slice_qs_delta */
0da71265
MN
2789 }
2790
53c05b1e 2791 h->deblocking_filter = 1;
3ebc7e04
MN
2792 h->slice_alpha_c0_offset = 0;
2793 h->slice_beta_offset = 0;
0da71265 2794 if( h->pps.deblocking_filter_parameters_present ) {
9963b332 2795 tmp= get_ue_golomb_31(&s->gb);
88e7a4d1
MN
2796 if(tmp > 2){
2797 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
2798 return -1;
2799 }
2800 h->deblocking_filter= tmp;
115329f1 2801 if(h->deblocking_filter < 2)
53c05b1e
MN
2802 h->deblocking_filter^= 1; // 1<->0
2803
2804 if( h->deblocking_filter ) {
980a82b7
MN
2805 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
2806 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
0da71265 2807 }
980a82b7 2808 }
afebe2f7 2809
61858a76 2810 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4b30289e 2811 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
9f5c1037 2812 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
61858a76
RD
2813 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
2814 h->deblocking_filter= 0;
2815
afebe2f7 2816 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
ec970c21
2817 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
2818 /* Cheat slightly for speed:
5d81d641 2819 Do not bother to deblock across slices. */
ec970c21
2820 h->deblocking_filter = 2;
2821 } else {
7ae94d52
2822 h0->max_contexts = 1;
2823 if(!h0->single_decode_warning) {
2824 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
2825 h0->single_decode_warning = 1;
2826 }
2827 if(h != h0)
2828 return 1; // deblocking switched inside frame
ec970c21 2829 }
afebe2f7
2830 }
2831
0da71265
MN
2832#if 0 //FMO
2833 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
2834 slice_group_change_cycle= get_bits(&s->gb, ?);
2835#endif
2836
afebe2f7
2837 h0->last_slice_type = slice_type;
2838 h->slice_num = ++h0->current_slice;
b735aeea
MN
2839 if(h->slice_num >= MAX_SLICES){
2840 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
2841 }
5175b937 2842
c32867b5 2843 for(j=0; j<2; j++){
b735aeea 2844 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
c32867b5
MN
2845 ref2frm[0]=
2846 ref2frm[1]= -1;
d50cdd82 2847 for(i=0; i<16; i++)
c32867b5
MN
2848 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
2849 +(h->ref_list[j][i].reference&3);
d50cdd82
MN
2850 ref2frm[18+0]=
2851 ref2frm[18+1]= -1;
2852 for(i=16; i<48; i++)
2853 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
2854 +(h->ref_list[j][i].reference&3);
c32867b5
MN
2855 }
2856
5d18eaad 2857 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
8a11a969 2858 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
5d18eaad 2859
802e9146
MN
2860 s->avctx->refs= h->sps.ref_frame_count;
2861
0da71265 2862 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
49573a87 2863 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
6867a90b
LLL
2864 h->slice_num,
2865 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
115329f1 2866 first_mb_in_slice,
49573a87 2867 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
0da71265
MN
2868 pps_id, h->frame_num,
2869 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
2870 h->ref_count[0], h->ref_count[1],
2871 s->qscale,
048bfeeb 2872 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
9f2d1b4f 2873 h->use_weight,
4806b922
MN
2874 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
2875 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
0da71265
MN
2876 );
2877 }
2878
2879 return 0;
2880}
2881
75dd6938
LA
2882int ff_h264_get_slice_type(H264Context *h)
2883{
2884 switch (h->slice_type) {
2885 case FF_P_TYPE: return 0;
2886 case FF_B_TYPE: return 1;
2887 case FF_I_TYPE: return 2;
2888 case FF_SP_TYPE: return 3;
2889 case FF_SI_TYPE: return 4;
2890 default: return -1;
2891 }
2892}
2893
0da71265
MN
2894/**
2895 *
2896 */
2897static inline int get_level_prefix(GetBitContext *gb){
2898 unsigned int buf;
2899 int log;
115329f1 2900
0da71265
MN
2901 OPEN_READER(re, gb);
2902 UPDATE_CACHE(re, gb);
2903 buf=GET_CACHE(re, gb);
115329f1 2904
0da71265
MN
2905 log= 32 - av_log2(buf);
2906#ifdef TRACE
2907 print_bin(buf>>(32-log), log);
0fa8158d 2908 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
0da71265
MN
2909#endif
2910
2911 LAST_SKIP_BITS(re, gb, log);
2912 CLOSE_READER(re, gb);
2913
2914 return log-1;
2915}
2916
43efd19a 2917static inline int get_dct8x8_allowed(H264Context *h){
66c07ca9
MN
2918 if(h->sps.direct_8x8_inference_flag)
2919 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
2920 else
2921 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
43efd19a
LM
2922}
2923
0da71265
MN
2924/**
2925 * decodes a residual block.
2926 * @param n block index
2927 * @param scantable scantable
2928 * @param max_coeff number of coefficients in the block
d9526386 2929 * @return <0 if an error occurred
0da71265 2930 */
239ea04c 2931static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
0da71265 2932 MpegEncContext * const s = &h->s;
0da71265 2933 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
ca3b0d27 2934 int level[16];
2935 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
0da71265
MN
2936
2937 //FIXME put trailing_onex into the context
2938
2939 if(n == CHROMA_DC_BLOCK_INDEX){
2940 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
2941 total_coeff= coeff_token>>2;
115329f1 2942 }else{
0da71265
MN
2943 if(n == LUMA_DC_BLOCK_INDEX){
2944 total_coeff= pred_non_zero_count(h, 0);
2945 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
2946 total_coeff= coeff_token>>2;
2947 }else{
2948 total_coeff= pred_non_zero_count(h, n);
2949 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
2950 total_coeff= coeff_token>>2;
2951 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
2952 }
2953 }
2954
2955 //FIXME set last_non_zero?
2956
2957 if(total_coeff==0)
2958 return 0;
08eb8fcd
MN
2959 if(total_coeff > (unsigned)max_coeff) {
2960 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
a083f53c
AC
2961 return -1;
2962 }
115329f1 2963
0da71265 2964 trailing_ones= coeff_token&3;
a9c9a240 2965 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
0da71265 2966 assert(total_coeff<=16);
115329f1 2967
04824298
MN
2968 i = show_bits(gb, 3);
2969 skip_bits(gb, trailing_ones);
2970 level[0] = 1-((i&4)>>1);
2971 level[1] = 1-((i&2) );
2972 level[2] = 1-((i&1)<<1);
0da71265 2973
93445d16 2974 if(trailing_ones<total_coeff) {
d42fc4a8 2975 int mask, prefix;
ca3b0d27 2976 int suffix_length = total_coeff > 10 && trailing_ones < 3;
d42fc4a8
MN
2977 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
2978 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
2979
2980 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
2981 if(level_code >= 100){
2982 prefix= level_code - 100;
2983 if(prefix == LEVEL_TAB_BITS)
2984 prefix += get_level_prefix(gb);
0da71265 2985
2f2893ce
MN
2986 //first coefficient has suffix_length equal to 0 or 1
2987 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
2988 if(suffix_length)
09a64ee6 2989 level_code= (prefix<<1) + get_bits1(gb); //part
2f2893ce 2990 else
09a64ee6 2991 level_code= prefix; //part
2f2893ce
MN
2992 }else if(prefix==14){
2993 if(suffix_length)
09a64ee6 2994 level_code= (prefix<<1) + get_bits1(gb); //part
2f2893ce
MN
2995 else
2996 level_code= prefix + get_bits(gb, 4); //part
2997 }else{
09a64ee6 2998 level_code= 30 + get_bits(gb, prefix-3); //part
2f2893ce
MN
2999 if(prefix>=16)
3000 level_code += (1<<(prefix-3))-4096;
3001 }
0da71265 3002
2f2893ce 3003 if(trailing_ones < 3) level_code += 2;
0da71265 3004
2f2893ce
MN
3005 suffix_length = 2;
3006 mask= -(level_code&1);
3007 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
d42fc4a8
MN
3008 }else{
3009 if(trailing_ones < 3) level_code += (level_code>>31)|1;
3010
3011 suffix_length = 1;
3012 if(level_code + 3U > 6U)
3013 suffix_length++;
3014 level[trailing_ones]= level_code;
3015 }
ca3b0d27 3016
3017 //remaining coefficients have suffix_length > 0
93445d16 3018 for(i=trailing_ones+1;i<total_coeff;i++) {
8140955d
MN
3019 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
3020 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
3021 level_code= cavlc_level_tab[suffix_length][bitsi][0];
3022
3023 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
3024 if(level_code >= 100){
3025 prefix= level_code - 100;
3026 if(prefix == LEVEL_TAB_BITS){
3027 prefix += get_level_prefix(gb);
3028 }
2d76bf39
MN
3029 if(prefix<15){
3030 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
3031 }else{
3032 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
3033 if(prefix>=16)
3034 level_code += (1<<(prefix-3))-4096;
3035 }
3036 mask= -(level_code&1);
8140955d
MN
3037 level_code= (((2+level_code)>>1) ^ mask) - mask;
3038 }
3039 level[i]= level_code;
3040
3041 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
ca3b0d27 3042 suffix_length++;
3043 }
0da71265
MN
3044 }
3045
3046 if(total_coeff == max_coeff)
3047 zeros_left=0;
3048 else{
3049 if(n == CHROMA_DC_BLOCK_INDEX)
3050 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
3051 else
3052 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
3053 }
0da71265 3054
ca3b0d27 3055 coeff_num = zeros_left + total_coeff - 1;
3056 j = scantable[coeff_num];
0da71265 3057 if(n > 24){
ca3b0d27 3058 block[j] = level[0];
3059 for(i=1;i<total_coeff;i++) {
3060 if(zeros_left <= 0)
3061 run_before = 0;
3062 else if(zeros_left < 7){
3063 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
3064 }else{
3065 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
3066 }
3067 zeros_left -= run_before;
3068 coeff_num -= 1 + run_before;
0da71265
MN
3069 j= scantable[ coeff_num ];
3070
3071 block[j]= level[i];
3072 }
3073 }else{
239ea04c 3074 block[j] = (level[0] * qmul[j] + 32)>>6;
ca3b0d27 3075 for(i=1;i<total_coeff;i++) {
3076 if(zeros_left <= 0)
3077 run_before = 0;
3078 else if(zeros_left < 7){
3079 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
3080 }else{
3081 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
3082 }
3083 zeros_left -= run_before;
3084 coeff_num -= 1 + run_before;
0da71265
MN
3085 j= scantable[ coeff_num ];
3086
239ea04c 3087 block[j]= (level[i] * qmul[j] + 32)>>6;
0da71265
MN
3088 }
3089 }
ca3b0d27 3090
3091 if(zeros_left<0){
3092 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
3093 return -1;
3094 }
3095
0da71265
MN
3096 return 0;
3097}
3098
5d18eaad
LM
3099static void predict_field_decoding_flag(H264Context *h){
3100 MpegEncContext * const s = &h->s;
64514ee8 3101 const int mb_xy= h->mb_xy;
5d18eaad
LM
3102 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
3103 ? s->current_picture.mb_type[mb_xy-1]
3104 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
3105 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
3106 : 0;
3107 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
3108}
3109
0da71265 3110/**
5ad984c9
LM
3111 * decodes a P_SKIP or B_SKIP macroblock
3112 */
3113static void decode_mb_skip(H264Context *h){
3114 MpegEncContext * const s = &h->s;
64514ee8 3115 const int mb_xy= h->mb_xy;
d2cc7468 3116 int mb_type=0;
115329f1 3117
5ad984c9
LM
3118 memset(h->non_zero_count[mb_xy], 0, 16);
3119 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
3120
5d18eaad 3121 if(MB_FIELD)
6ba71fc4 3122 mb_type|= MB_TYPE_INTERLACED;
d2cc7468 3123
9f5c1037 3124 if( h->slice_type_nos == FF_B_TYPE )
5ad984c9
LM
3125 {
3126 // just for fill_caches. pred_direct_motion will set the real mb_type
d00eac6c 3127 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5ad984c9 3128
46f2f05f 3129 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
943f69a6 3130 ff_h264_pred_direct_motion(h, &mb_type);
e6e77eb6 3131 mb_type|= MB_TYPE_SKIP;
5ad984c9
LM
3132 }
3133 else
3134 {
3135 int mx, my;
d2cc7468 3136 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5ad984c9 3137
46f2f05f 3138 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5ad984c9
LM
3139 pred_pskip_motion(h, &mx, &my);
3140 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
3141 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5ad984c9
LM
3142 }
3143
3144 write_back_motion(h, mb_type);
e6e77eb6 3145 s->current_picture.mb_type[mb_xy]= mb_type;
5ad984c9
LM
3146 s->current_picture.qscale_table[mb_xy]= s->qscale;
3147 h->slice_table[ mb_xy ]= h->slice_num;
160d679c 3148 h->prev_mb_skipped= 1;
5ad984c9
LM
3149<