Fix third pixel motion compensation
[libav.git] / libavcodec / svq3.c
CommitLineData
8b82a956
MN
1/*
2 * Copyright (c) 2003 The FFmpeg Project.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 *
19 * How to use this decoder:
20 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
89a79364
MM
21 * have stsd atoms to describe media trak properties. A stsd atom for a
22 * video trak contains 1 or more ImageDescription atoms. These atoms begin
23 * with the 4-byte length of the atom followed by the codec fourcc. Some
24 * decoders need information in this atom to operate correctly. Such
25 * is the case with SVQ3. In order to get the best use out of this decoder,
26 * the calling app must make the SVQ3 ImageDescription atom available
8b82a956
MN
27 * via the AVCodecContext's extradata[_size] field:
28 *
89a79364
MM
29 * AVCodecContext.extradata = pointer to ImageDescription, first characters
30 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
31 * AVCodecContext.extradata_size = size of ImageDescription atom memory
32 * buffer (which will be the same as the ImageDescription atom size field
33 * from the QT file, minus 4 bytes since the length is missing)
34 *
35 * You will know you have these parameters passed correctly when the decoder
36 * correctly decodes this file:
37 * ftp://ftp.mplayerhq.hu/MPlayer/samples/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
8b82a956
MN
38 *
39 */
40
41/**
42 * @file svq3.c
43 * svq3 decoder.
44 */
45
94d44f45
MN
46#define FULLPEL_MODE 1
47#define HALFPEL_MODE 2
48#define THIRDPEL_MODE 3
49
f7a8c179
MN
50/* dual scan (from some older h264 draft)
51 o-->o-->o o
52 | /|
53 o o o / o
54 | / | |/ |
55 o o o o
56 /
57 o-->o-->o-->o
58*/
8b82a956
MN
59static const uint8_t svq3_scan[16]={
60 0+0*4, 1+0*4, 2+0*4, 2+1*4,
61 2+2*4, 3+0*4, 3+1*4, 3+2*4,
62 0+1*4, 0+2*4, 1+1*4, 1+2*4,
63 0+3*4, 1+3*4, 2+3*4, 3+3*4,
64};
65
66static const uint8_t svq3_pred_0[25][2] = {
67 { 0, 0 },
68 { 1, 0 }, { 0, 1 },
69 { 0, 2 }, { 1, 1 }, { 2, 0 },
70 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
71 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
72 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
73 { 2, 4 }, { 3, 3 }, { 4, 2 },
74 { 4, 3 }, { 3, 4 },
75 { 4, 4 }
76};
77
78static const int8_t svq3_pred_1[6][6][5] = {
79 { { 2,-1,-1,-1,-1 }, { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 },
80 { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 }, { 1, 2,-1,-1,-1 } },
81 { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
82 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
83 { { 2, 0,-1,-1,-1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
84 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
85 { { 2, 0,-1,-1,-1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
86 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
87 { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
88 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
89 { { 0, 2,-1,-1,-1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
90 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
91};
92
93static const struct { uint8_t run; uint8_t level; } svq3_dct_tables[2][16] = {
94 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
95 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
96 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
97 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
98};
99
100static const uint32_t svq3_dequant_coeff[32] = {
101 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
102 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
103 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
104 61694, 68745, 77615, 89113,100253,109366,126635,141533
105};
106
107
108static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
109 const int qmul= svq3_dequant_coeff[qp];
110#define stride 16
111 int i;
112 int temp[16];
113 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
114 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
115
116 for(i=0; i<4; i++){
117 const int offset= y_offset[i];
118 const int z0= 13*(block[offset+stride*0] + block[offset+stride*4]);
119 const int z1= 13*(block[offset+stride*0] - block[offset+stride*4]);
120 const int z2= 7* block[offset+stride*1] - 17*block[offset+stride*5];
121 const int z3= 17* block[offset+stride*1] + 7*block[offset+stride*5];
122
123 temp[4*i+0]= z0+z3;
124 temp[4*i+1]= z1+z2;
125 temp[4*i+2]= z1-z2;
126 temp[4*i+3]= z0-z3;
127 }
128
129 for(i=0; i<4; i++){
130 const int offset= x_offset[i];
131 const int z0= 13*(temp[4*0+i] + temp[4*2+i]);
132 const int z1= 13*(temp[4*0+i] - temp[4*2+i]);
133 const int z2= 7* temp[4*1+i] - 17*temp[4*3+i];
134 const int z3= 17* temp[4*1+i] + 7*temp[4*3+i];
135
136 block[stride*0 +offset]= ((z0 + z3)*qmul + 0x80000)>>20;
137 block[stride*2 +offset]= ((z1 + z2)*qmul + 0x80000)>>20;
138 block[stride*8 +offset]= ((z1 - z2)*qmul + 0x80000)>>20;
139 block[stride*10+offset]= ((z0 - z3)*qmul + 0x80000)>>20;
140 }
141}
142#undef stride
143
144static void svq3_add_idct_c (uint8_t *dst, DCTELEM *block, int stride, int qp, int dc){
145 const int qmul= svq3_dequant_coeff[qp];
146 int i;
147 uint8_t *cm = cropTbl + MAX_NEG_CROP;
148
149 if (dc) {
150 dc = 13*13*((dc == 1) ? 1538*block[0] : ((qmul*(block[0] >> 3)) / 2));
151 block[0] = 0;
152 }
153
154 for (i=0; i < 4; i++) {
155 const int z0= 13*(block[0 + 4*i] + block[2 + 4*i]);
156 const int z1= 13*(block[0 + 4*i] - block[2 + 4*i]);
157 const int z2= 7* block[1 + 4*i] - 17*block[3 + 4*i];
158 const int z3= 17* block[1 + 4*i] + 7*block[3 + 4*i];
159
160 block[0 + 4*i]= z0 + z3;
161 block[1 + 4*i]= z1 + z2;
162 block[2 + 4*i]= z1 - z2;
163 block[3 + 4*i]= z0 - z3;
164 }
165
166 for (i=0; i < 4; i++) {
167 const int z0= 13*(block[i + 4*0] + block[i + 4*2]);
168 const int z1= 13*(block[i + 4*0] - block[i + 4*2]);
169 const int z2= 7* block[i + 4*1] - 17*block[i + 4*3];
170 const int z3= 17* block[i + 4*1] + 7*block[i + 4*3];
171 const int rr= (dc + 0x80000);
172
173 dst[i + stride*0]= cm[ dst[i + stride*0] + (((z0 + z3)*qmul + rr) >> 20) ];
174 dst[i + stride*1]= cm[ dst[i + stride*1] + (((z1 + z2)*qmul + rr) >> 20) ];
175 dst[i + stride*2]= cm[ dst[i + stride*2] + (((z1 - z2)*qmul + rr) >> 20) ];
176 dst[i + stride*3]= cm[ dst[i + stride*3] + (((z0 - z3)*qmul + rr) >> 20) ];
177 }
178}
179
180static void pred4x4_down_left_svq3_c(uint8_t *src, uint8_t *topright, int stride){
181 LOAD_TOP_EDGE
182 LOAD_LEFT_EDGE
183 const __attribute__((unused)) int unu0= t0;
184 const __attribute__((unused)) int unu1= l0;
185
186 src[0+0*stride]=(l1 + t1)>>1;
187 src[1+0*stride]=
188 src[0+1*stride]=(l2 + t2)>>1;
189 src[2+0*stride]=
190 src[1+1*stride]=
191 src[0+2*stride]=
192 src[3+0*stride]=
193 src[2+1*stride]=
194 src[1+2*stride]=
195 src[0+3*stride]=
196 src[3+1*stride]=
197 src[2+2*stride]=
198 src[1+3*stride]=
199 src[3+2*stride]=
200 src[2+3*stride]=
201 src[3+3*stride]=(l3 + t3)>>1;
4cfbf61b 202}
8b82a956
MN
203
204static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
205 pred16x16_plane_compat_c(src, stride, 1);
206}
207
208static inline int svq3_decode_block (GetBitContext *gb, DCTELEM *block,
209 int index, const int type) {
210
211 static const uint8_t *const scan_patterns[4] =
212 { luma_dc_zigzag_scan, zigzag_scan, svq3_scan, chroma_dc_scan };
213
214 int run, level, sign, vlc, limit;
215 const int intra = (3 * type) >> 2;
216 const uint8_t *const scan = scan_patterns[type];
217
218 for (limit=(16 >> intra); index < 16; index=limit, limit+=8) {
219 for (; (vlc = svq3_get_ue_golomb (gb)) != 0; index++) {
220
221 if (vlc == INVALID_VLC)
222 return -1;
223
224 sign = (vlc & 0x1) - 1;
225 vlc = (vlc + 1) >> 1;
226
227 if (type == 3) {
228 if (vlc < 3) {
229 run = 0;
230 level = vlc;
231 } else if (vlc < 4) {
232 run = 1;
233 level = 1;
234 } else {
235 run = (vlc & 0x3);
236 level = ((vlc + 9) >> 2) - run;
237 }
238 } else {
239 if (vlc < 16) {
240 run = svq3_dct_tables[intra][vlc].run;
241 level = svq3_dct_tables[intra][vlc].level;
242 } else if (intra) {
243 run = (vlc & 0x7);
244 level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
245 } else {
246 run = (vlc & 0xF);
247 level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
248 }
249 }
250
251 if ((index += run) >= limit)
252 return -1;
253
254 block[scan[index]] = (level ^ sign) - sign;
255 }
256
257 if (type != 2) {
258 break;
259 }
260 }
261
262 return 0;
263}
264
da3b9756
MM
265static inline void svq3_mc_dir_part (MpegEncContext *s,
266 int x, int y, int width, int height,
267 int mx, int my, int dxy,
268 int thirdpel, int dir, int avg) {
269
270 const Picture *pic = (dir == 0) ? &s->last_picture : &s->next_picture;
8b82a956
MN
271 uint8_t *src, *dest;
272 int i, emu = 0;
669ac79c 273 int blocksize= 2 - (width>>3); //16->0, 8->1, 4->2
8b82a956 274
94d44f45
MN
275 mx += x;
276 my += y;
277
4c701ac8
MN
278 if (mx < 0 || mx >= (s->h_edge_pos - width - 1) ||
279 my < 0 || my >= (s->v_edge_pos - height - 1)) {
8b82a956
MN
280
281 if ((s->flags & CODEC_FLAG_EMU_EDGE)) {
282 emu = 1;
283 }
284
4c701ac8
MN
285 mx = clip (mx, -16, (s->h_edge_pos - width + 15));
286 my = clip (my, -16, (s->v_edge_pos - height + 15));
8b82a956
MN
287 }
288
289 /* form component predictions */
290 dest = s->current_picture.data[0] + x + y*s->linesize;
da3b9756 291 src = pic->data[0] + mx + my*s->linesize;
8b82a956
MN
292
293 if (emu) {
a7d3e772 294 ff_emulated_edge_mc (s->edge_emu_buffer, src, s->linesize, (width + 1), (height + 1),
4c701ac8 295 mx, my, s->h_edge_pos, s->v_edge_pos);
8b82a956
MN
296 src = s->edge_emu_buffer;
297 }
669ac79c 298 if(thirdpel)
da3b9756 299 (avg ? s->dsp.avg_tpel_pixels_tab : s->dsp.put_tpel_pixels_tab)[dxy](dest, src, s->linesize, width, height);
669ac79c 300 else
da3b9756 301 (avg ? s->dsp.avg_pixels_tab : s->dsp.put_pixels_tab)[blocksize][dxy](dest, src, s->linesize, height);
8b82a956
MN
302
303 if (!(s->flags & CODEC_FLAG_GRAY)) {
304 mx = (mx + (mx < (int) x)) >> 1;
305 my = (my + (my < (int) y)) >> 1;
306 width = (width >> 1);
307 height = (height >> 1);
669ac79c 308 blocksize++;
8b82a956
MN
309
310 for (i=1; i < 3; i++) {
311 dest = s->current_picture.data[i] + (x >> 1) + (y >> 1)*s->uvlinesize;
da3b9756 312 src = pic->data[i] + mx + my*s->uvlinesize;
8b82a956
MN
313
314 if (emu) {
a7d3e772 315 ff_emulated_edge_mc (s->edge_emu_buffer, src, s->uvlinesize, (width + 1), (height + 1),
4c701ac8 316 mx, my, (s->h_edge_pos >> 1), (s->v_edge_pos >> 1));
a7d3e772 317 src = s->edge_emu_buffer;
8b82a956 318 }
669ac79c 319 if(thirdpel)
da3b9756 320 (avg ? s->dsp.avg_tpel_pixels_tab : s->dsp.put_tpel_pixels_tab)[dxy](dest, src, s->uvlinesize, width, height);
669ac79c 321 else
da3b9756 322 (avg ? s->dsp.avg_pixels_tab : s->dsp.put_pixels_tab)[blocksize][dxy](dest, src, s->uvlinesize, height);
8b82a956
MN
323 }
324 }
325}
326
327static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
328 int cbp, dir, mode, mx, my, dx, dy, x, y, part_width, part_height;
329 int i, j, k, l, m;
330 uint32_t vlc;
331 int8_t *top, *left;
332 MpegEncContext *const s = (MpegEncContext *) h;
333 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
334 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
335
336 h->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
337 h->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
338 h->topright_samples_available = 0xFFFF;
339
340 if (mb_type == 0) { /* SKIP */
da3b9756
MM
341 if (s->pict_type == P_TYPE) {
342 svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 0, 0);
343
344 cbp = 0;
345 mb_type = MB_TYPE_SKIP;
346 } else {
347 for (dir=0; dir < 2; dir++) {
348 for (i=0; i < 4; i++) {
349 for (j=0; j < 4; j++) {
350 int dxy;
351 x = 16*s->mb_x + 4*j;
352 y = 16*s->mb_y + 4*i;
353
354 mx = 2*s->next_picture.motion_val[0][b_xy + j + i*h->b_stride][0];
355 my = 2*s->next_picture.motion_val[0][b_xy + j + i*h->b_stride][1];
356
357 if (dir == 0) {
358 mx = (mx * h->frame_num_offset) / h->prev_frame_num_offset;
359 my = (my * h->frame_num_offset) / h->prev_frame_num_offset;
360 } else {
361 mx = (mx * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset;
362 my = (my * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset;
363 }
364
365 mx = ((unsigned)(mx + 3 + 0x6000))/6 - 0x1000;
366 my = ((unsigned)(my + 3 + 0x6000))/6 - 0x1000;
367 dxy= (mx&1) + 2*(my&1);
368
369 /* update mv_cache */
370 s->current_picture.motion_val[dir][b_xy + j + i*h->b_stride][0] = 3*mx;
371 s->current_picture.motion_val[dir][b_xy + j + i*h->b_stride][1] = 3*my;
372
373 svq3_mc_dir_part (s, x, y, 4, 4, mx>>1, my>>1, dxy, 0, dir, (dir == 1));
374 }
375 }
376 }
8b82a956 377
da3b9756
MM
378 if ((vlc = svq3_get_ue_golomb (&s->gb)) >= 48)
379 return -1;
380
381 cbp = golomb_to_inter_cbp[vlc];
382 mb_type = MB_TYPE_16x16;
383 }
8b82a956 384 } else if (mb_type < 8) { /* INTER */
da3b9756
MM
385 int dir0, dir1;
386
8b82a956 387 if (h->thirdpel_flag && h->halfpel_flag == !get_bits (&s->gb, 1)) {
94d44f45 388 mode = THIRDPEL_MODE;
8b82a956 389 } else if (h->halfpel_flag && h->thirdpel_flag == !get_bits (&s->gb, 1)) {
94d44f45 390 mode = HALFPEL_MODE;
8b82a956 391 } else {
94d44f45 392 mode = FULLPEL_MODE;
8b82a956
MN
393 }
394
395 /* fill caches */
da3b9756 396 /* note ref_cache should contain here:
f7a8c179
MN
397 ????????
398 ???11111
399 N??11111
400 N??11111
401 N??11111
402 N
403 */
404
da3b9756
MM
405 for (m=0; m < 2; m++) {
406 if (s->mb_x > 0 && h->intra4x4_pred_mode[mb_xy - 1][0] != -1) {
407 for (i=0; i < 4; i++) {
408 *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - 1 + i*h->b_stride];
409 }
410 } else {
411 for (i=0; i < 4; i++) {
412 *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = 0;
413 }
8b82a956 414 }
da3b9756
MM
415 if (s->mb_y > 0) {
416 memcpy (h->mv_cache[m][scan8[0] - 1*8], s->current_picture.motion_val[m][b_xy - h->b_stride], 4*2*sizeof(int16_t));
417 memset (&h->ref_cache[m][scan8[0] - 1*8], (h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1, 4);
418
419 if (s->mb_x < (s->mb_width - 1)) {
420 *(uint32_t *) h->mv_cache[m][scan8[0] + 4 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride + 4];
421 h->ref_cache[m][scan8[0] + 4 - 1*8] =
422 (h->intra4x4_pred_mode[mb_xy - s->mb_stride + 1][0] == -1 ||
423 h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1;
424 }else
425 h->ref_cache[m][scan8[0] + 4 - 1*8] = PART_NOT_AVAILABLE;
426 if (s->mb_x > 0) {
427 *(uint32_t *) h->mv_cache[0][scan8[0] - 1 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride - 1];
428 h->ref_cache[m][scan8[0] - 1 - 1*8] = (h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] == -1) ? PART_NOT_AVAILABLE : 1;
429 }else
430 h->ref_cache[m][scan8[0] - 1 - 1*8] = PART_NOT_AVAILABLE;
f7a8c179 431 }else
da3b9756
MM
432 memset (&h->ref_cache[m][scan8[0] - 1*8 - 1], PART_NOT_AVAILABLE, 8);
433
434 if (s->pict_type != B_TYPE)
435 break;
436 }
8b82a956
MN
437
438 /* decode motion vector(s) and form prediction(s) */
da3b9756
MM
439 if (s->pict_type == P_TYPE) {
440 part_width = ((mb_type & 5) == 5) ? 4 : 8 << (mb_type & 1);
441 part_height = 16 >> ((unsigned) mb_type / 3);
442 dir0 = 0;
443 dir1 = 0;
444 } else { /* B_TYPE */
445 part_width = 16;
446 part_height = 16;
447 dir0 = (mb_type == 2) ? 1 : 0;
448 dir1 = (mb_type == 1) ? 0 : 1;
449 }
8b82a956 450
da3b9756 451 for (dir=dir0; dir <= dir1; dir++) {
8b82a956
MN
452 for (i=0; i < 16; i+=part_height) {
453 for (j=0; j < 16; j+=part_width) {
da3b9756
MM
454 int avg=(dir == 1 && dir0 != dir1);
455 int dxy;
8b82a956
MN
456 x = 16*s->mb_x + j;
457 y = 16*s->mb_y + i;
458 k = ((j>>2)&1) + ((i>>1)&2) + ((j>>1)&4) + (i&8);
459
da3b9756 460 pred_motion (h, k, (part_width >> 2), dir, 1, &mx, &my);
8b82a956
MN
461
462 /* clip motion vector prediction to frame border */
4c701ac8
MN
463 mx = clip (mx, -6*x, 6*(s->h_edge_pos - part_width - x));
464 my = clip (my, -6*y, 6*(s->v_edge_pos - part_height - y));
8b82a956
MN
465
466 /* get motion vector differential */
467 dy = svq3_get_se_golomb (&s->gb);
468 dx = svq3_get_se_golomb (&s->gb);
469
470 if (dx == INVALID_VLC || dy == INVALID_VLC) {
471 return -1;
472 }
8b82a956 473 /* compute motion vector */
94d44f45
MN
474 if (mode == THIRDPEL_MODE) {
475 int fx, fy;
476 mx = ((mx + 1)>>1) + dx;
477 my = ((my + 1)>>1) + dy;
478 fx= ((unsigned)(mx + 0x3000))/3 - 0x1000;
479 fy= ((unsigned)(my + 0x3000))/3 - 0x1000;
669ac79c 480 dxy= (mx - 3*fx) + 4*(my - 3*fy);
94d44f45 481
da3b9756 482 svq3_mc_dir_part (s, x, y, part_width, part_height, fx, fy, dxy, 1, dir, avg);
94d44f45
MN
483 mx += mx;
484 my += my;
485 } else if (mode == HALFPEL_MODE) {
486 mx = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000;
487 my = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000;
669ac79c 488 dxy= (mx&1) + 2*(my&1);
94d44f45 489
da3b9756 490 svq3_mc_dir_part (s, x, y, part_width, part_height, mx>>1, my>>1, dxy, 0, dir, avg);
94d44f45
MN
491 mx *= 3;
492 my *= 3;
493 } else {
494 assert(mode == FULLPEL_MODE);
495 mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000;
496 my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000;
497
da3b9756 498 svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my, 0, 0, dir, avg);
94d44f45
MN
499 mx *= 6;
500 my *= 6;
8b82a956
MN
501 }
502
94d44f45 503 /* update mv_cache */
da3b9756 504 fill_rectangle(h->mv_cache[dir][scan8[k]], part_width>>2, part_height>>2, 8, pack16to32(mx,my), 4);
8b82a956
MN
505 }
506 }
da3b9756 507 }
8b82a956 508
da3b9756
MM
509 /* write back or clear motion vectors */
510 if (s->pict_type == P_TYPE || mb_type != 2) {
511 for (i=0; i < 4; i++) {
512 memcpy (s->current_picture.motion_val[0][b_xy + i*h->b_stride], h->mv_cache[0][scan8[0] + 8*i], 4*2*sizeof(int16_t));
513 }
514 } else {
515 for (i=0; i < 4; i++) {
516 memset (s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
517 }
518 }
519 if (s->pict_type == B_TYPE) {
520 if (mb_type != 1) {
521 for (i=0; i < 4; i++) {
522 memcpy (s->current_picture.motion_val[1][b_xy + i*h->b_stride], h->mv_cache[1][scan8[0] + 8*i], 4*2*sizeof(int16_t));
523 }
524 } else {
525 for (i=0; i < 4; i++) {
526 memset (s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
527 }
528 }
8b82a956
MN
529 }
530
531 if ((vlc = svq3_get_ue_golomb (&s->gb)) >= 48)
532 return -1;
533
534 cbp = golomb_to_inter_cbp[vlc];
535 mb_type = MB_TYPE_16x16;
da3b9756 536 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
8b82a956
MN
537 memset (h->intra4x4_pred_mode_cache, -1, 8*5*sizeof(int8_t));
538
da3b9756
MM
539 if (mb_type == 8) {
540 if (s->mb_x > 0) {
541 for (i=0; i < 4; i++) {
542 h->intra4x4_pred_mode_cache[scan8[0] - 1 + i*8] = h->intra4x4_pred_mode[mb_xy - 1][i];
543 }
544 if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1) {
545 h->left_samples_available = 0x5F5F;
546 }
547 }
548 if (s->mb_y > 0) {
549 h->intra4x4_pred_mode_cache[4+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][4];
550 h->intra4x4_pred_mode_cache[5+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][5];
551 h->intra4x4_pred_mode_cache[6+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][6];
552 h->intra4x4_pred_mode_cache[7+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][3];
553
554 if (h->intra4x4_pred_mode_cache[4+8*0] == -1) {
555 h->top_samples_available = 0x33FF;
556 }
8b82a956 557 }
8b82a956 558
da3b9756
MM
559 /* decode prediction codes for luma blocks */
560 for (i=0; i < 16; i+=2) {
561 vlc = svq3_get_ue_golomb (&s->gb);
8b82a956 562
da3b9756
MM
563 if (vlc >= 25)
564 return -1;
8b82a956 565
da3b9756
MM
566 left = &h->intra4x4_pred_mode_cache[scan8[i] - 1];
567 top = &h->intra4x4_pred_mode_cache[scan8[i] - 8];
8b82a956 568
da3b9756
MM
569 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
570 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
8b82a956 571
da3b9756
MM
572 if (left[1] == -1 || left[2] == -1)
573 return -1;
574 }
575 } else {
576 /* DC_128_PRED block type */
577 for (i=0; i < 4; i++) {
578 memset (&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_PRED, 4);
579 }
8b82a956
MN
580 }
581
582 write_back_intra_pred_mode (h);
da3b9756
MM
583
584 if (mb_type == 8) {
585 check_intra4x4_pred_mode (h);
586
587 h->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
588 h->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
589 } else {
590 for (i=0; i < 4; i++) {
591 memset (&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_128_PRED, 4);
592 }
593
594 h->top_samples_available = 0x33FF;
595 h->left_samples_available = 0x5F5F;
596 }
8b82a956
MN
597
598 if ((vlc = svq3_get_ue_golomb (&s->gb)) >= 48)
599 return -1;
600
601 cbp = golomb_to_intra4x4_cbp[vlc];
602 mb_type = MB_TYPE_INTRA4x4;
603 } else { /* INTRA16x16 */
604 dir = i_mb_type_info[mb_type - 8].pred_mode;
605 dir = (dir >> 1) ^ 3*(dir & 1) ^ 1;
606
607 if ((h->intra16x16_pred_mode = check_intra_pred_mode (h, dir)) == -1)
608 return -1;
609
610 cbp = i_mb_type_info[mb_type - 8].cbp;
611 mb_type = MB_TYPE_INTRA16x16;
612 }
613
614 if (!IS_INTER(mb_type) && s->pict_type != I_TYPE) {
615 for (i=0; i < 4; i++) {
616 memset (s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
617 }
da3b9756
MM
618 if (s->pict_type == B_TYPE) {
619 for (i=0; i < 4; i++) {
620 memset (s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
621 }
622 }
8b82a956
MN
623 }
624 if (!IS_INTRA4x4(mb_type)) {
625 memset (h->intra4x4_pred_mode[mb_xy], DC_PRED, 8);
626 }
627 if (!IS_SKIP(mb_type)) {
f7a8c179
MN
628 memset (h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t));
629 s->dsp.clear_blocks(h->mb);
8b82a956
MN
630 }
631
632 if (IS_INTRA16x16(mb_type) || (s->pict_type != I_TYPE && s->adaptive_quant && cbp)) {
633 s->qscale += svq3_get_se_golomb (&s->gb);
634
635 if (s->qscale > 31)
636 return -1;
637 }
638 if (IS_INTRA16x16(mb_type)) {
639 if (svq3_decode_block (&s->gb, h->mb, 0, 0))
640 return -1;
641 }
642
643 if (!IS_SKIP(mb_type) && cbp) {
644 l = IS_INTRA16x16(mb_type) ? 1 : 0;
645 m = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
646
647 for (i=0; i < 4; i++) {
648 if ((cbp & (1 << i))) {
649 for (j=0; j < 4; j++) {
650 k = l ? ((j&1) + 2*(i&1) + 2*(j&2) + 4*(i&2)) : (4*i + j);
651 h->non_zero_count_cache[ scan8[k] ] = 1;
652
653 if (svq3_decode_block (&s->gb, &h->mb[16*k], l, m))
654 return -1;
655 }
656 }
657 }
658
659 if ((cbp & 0x30)) {
660 for (i=0; i < 2; ++i) {
661 if (svq3_decode_block (&s->gb, &h->mb[16*(16 + 4*i)], 0, 3))
662 return -1;
663 }
664
665 if ((cbp & 0x20)) {
666 for (i=0; i < 8; i++) {
667 h->non_zero_count_cache[ scan8[16+i] ] = 1;
668
669 if (svq3_decode_block (&s->gb, &h->mb[16*(16 + i)], 1, 1))
670 return -1;
671 }
672 }
673 }
674 }
675
676 s->current_picture.mb_type[mb_xy] = mb_type;
677
678 if (IS_INTRA(mb_type)) {
679 h->chroma_pred_mode = check_intra_pred_mode (h, DC_PRED8x8);
680 }
681
682 return 0;
683}
684
da3b9756
MM
685static int svq3_decode_slice_header (H264Context *h) {
686 MpegEncContext *const s = (MpegEncContext *) h;
687 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
688 int i, header;
689
690 header = get_bits (&s->gb, 8);
691
692 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
693 /* TODO: what? */
694 fprintf (stderr, "unsupported slice header (%02X)\n", header);
695 return -1;
696 } else {
697 int length = (header >> 5) & 3;
698
699 h->next_slice_index = s->gb.index + 8*show_bits (&s->gb, 8*length) + 8*length;
700
701 if (h->next_slice_index > s->gb.size_in_bits)
702 return -1;
703
704 s->gb.size_in_bits = h->next_slice_index - 8*(length - 1);
705 s->gb.index += 8;
706
707 if (length > 0) {
708 memcpy ((uint8_t *) &s->gb.buffer[s->gb.index >> 3],
709 &s->gb.buffer[s->gb.size_in_bits >> 3], (length - 1));
710 }
711 }
712
713 if ((i = svq3_get_ue_golomb (&s->gb)) == INVALID_VLC || i >= 3)
714 return -1;
715
716 h->slice_type = golomb_to_pict_type[i];
717
718 if ((header & 0x9F) == 2) {
719 i = (s->mb_num < 64) ? 6 : (1 + av_log2 (s->mb_num - 1));
720 s->mb_skip_run = get_bits (&s->gb, i) - (s->mb_x + (s->mb_y * s->mb_width));
721 } else {
722 get_bits1 (&s->gb);
723 s->mb_skip_run = 0;
724 }
725
726 h->slice_num = get_bits (&s->gb, 8);
727 s->qscale = get_bits (&s->gb, 5);
728 s->adaptive_quant = get_bits1 (&s->gb);
729
730 /* unknown fields */
731 get_bits1 (&s->gb);
732
733 if (h->unknown_svq3_flag) {
734 get_bits1 (&s->gb);
735 }
736
737 get_bits1 (&s->gb);
738 get_bits (&s->gb, 2);
739
740 while (get_bits1 (&s->gb)) {
741 get_bits (&s->gb, 8);
742 }
743
744 /* reset intra predictors and invalidate motion vector references */
745 if (s->mb_x > 0) {
746 memset (h->intra4x4_pred_mode[mb_xy - 1], -1, 4*sizeof(int8_t));
747 memset (h->intra4x4_pred_mode[mb_xy - s->mb_x], -1, 8*sizeof(int8_t)*s->mb_x);
748 }
749 if (s->mb_y > 0) {
750 memset (h->intra4x4_pred_mode[mb_xy - s->mb_stride], -1, 8*sizeof(int8_t)*(s->mb_width - s->mb_x));
751
752 if (s->mb_x > 0) {
753 h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] = -1;
754 }
755 }
756
757 return 0;
758}
759
8b82a956
MN
760static int svq3_decode_frame (AVCodecContext *avctx,
761 void *data, int *data_size,
762 uint8_t *buf, int buf_size) {
763 MpegEncContext *const s = avctx->priv_data;
764 H264Context *const h = avctx->priv_data;
da3b9756
MM
765 int m, mb_type;
766
767 *data_size = 0;
8b82a956
MN
768
769 s->flags = avctx->flags;
4c701ac8 770
8b82a956 771 if (!s->context_initialized) {
4c701ac8
MN
772 s->width = avctx->width;
773 s->height = avctx->height;
8b82a956
MN
774 h->pred4x4[DIAG_DOWN_LEFT_PRED] = pred4x4_down_left_svq3_c;
775 h->pred16x16[PLANE_PRED8x8] = pred16x16_plane_svq3_c;
776 h->halfpel_flag = 1;
777 h->thirdpel_flag = 1;
da3b9756 778 h->unknown_svq3_flag = 0;
8b82a956
MN
779 h->chroma_qp = 4;
780
781 if (MPV_common_init (s) < 0)
782 return -1;
783
4c701ac8
MN
784 h->b_stride = 4*s->mb_width;
785
8b82a956 786 alloc_tables (h);
8b82a956 787
da3b9756
MM
788 if (avctx->extradata && avctx->extradata_size >= 0x64
789 && !memcmp (avctx->extradata, "SVQ3", 4)) {
790
791 GetBitContext gb;
8b82a956 792
da3b9756
MM
793 init_get_bits (&gb, (uint8_t *) avctx->extradata + 0x62,
794 8*(avctx->extradata_size - 0x62));
8b82a956 795
da3b9756
MM
796 /* 'frame size code' and optional 'width, height' */
797 if (get_bits (&gb, 3) == 7) {
798 get_bits (&gb, 12);
799 get_bits (&gb, 12);
8b82a956
MN
800 }
801
da3b9756
MM
802 h->halfpel_flag = get_bits1 (&gb);
803 h->thirdpel_flag = get_bits1 (&gb);
8b82a956 804
da3b9756
MM
805 /* unknown fields */
806 get_bits1 (&gb);
807 get_bits1 (&gb);
808 get_bits1 (&gb);
809 get_bits1 (&gb);
8b82a956 810
da3b9756 811 s->low_delay = get_bits1 (&gb);
8b82a956 812
da3b9756
MM
813 /* unknown field */
814 get_bits1 (&gb);
8b82a956 815
da3b9756
MM
816 while (get_bits1 (&gb)) {
817 get_bits (&gb, 8);
818 }
8b82a956 819
da3b9756
MM
820 h->unknown_svq3_flag = get_bits1 (&gb);
821 avctx->has_b_frames = !s->low_delay;
822 }
823 }
8b82a956 824
da3b9756
MM
825 /* special case for last picture */
826 if (buf_size == 0) {
827 if (s->next_picture_ptr && !s->low_delay) {
828 *(AVFrame *) data = *(AVFrame *) &s->next_picture;
829 *data_size = sizeof(AVFrame);
830 }
831 return 0;
832 }
8b82a956 833
da3b9756 834 init_get_bits (&s->gb, buf, 8*buf_size);
8b82a956 835
da3b9756 836 s->mb_x = s->mb_y = 0;
8b82a956 837
da3b9756
MM
838 if (svq3_decode_slice_header (h))
839 return -1;
8b82a956 840
da3b9756
MM
841 s->pict_type = h->slice_type;
842 s->picture_number = h->slice_num;
8b82a956 843
4704097a
MN
844 if(avctx->debug&FF_DEBUG_PICT_INFO){
845 printf("%c hpel:%d, tpel:%d aqp:%d qp:%d\n",
d8085ea7 846 av_get_pict_type_char(s->pict_type), h->halfpel_flag, h->thirdpel_flag,
4704097a
MN
847 s->adaptive_quant, s->qscale
848 );
849 }
8b82a956 850
da3b9756
MM
851 /* for hurry_up==5 */
852 s->current_picture.pict_type = s->pict_type;
853 s->current_picture.key_frame = (s->pict_type == I_TYPE);
854
855 /* skip b frames if we dont have reference frames */
856 if (s->last_picture_ptr == NULL && s->pict_type == B_TYPE) return 0;
857 /* skip b frames if we are in a hurry */
858 if (avctx->hurry_up && s->pict_type == B_TYPE) return 0;
859 /* skip everything if we are in a hurry >= 5 */
860 if (avctx->hurry_up >= 5) return 0;
861
862 if (s->next_p_frame_damaged) {
863 if (s->pict_type == B_TYPE)
864 return 0;
865 else
866 s->next_p_frame_damaged = 0;
867 }
8b82a956
MN
868
869 frame_start (h);
870
da3b9756
MM
871 if (s->pict_type == B_TYPE) {
872 h->frame_num_offset = (h->slice_num - h->prev_frame_num);
873
874 if (h->frame_num_offset < 0) {
875 h->frame_num_offset += 256;
876 }
877 if (h->frame_num_offset == 0 || h->frame_num_offset >= h->prev_frame_num_offset) {
878 printf ("error in B-frame picture id\n");
879 return -1;
880 }
881 } else {
882 h->prev_frame_num = h->frame_num;
883 h->frame_num = h->slice_num;
884 h->prev_frame_num_offset = (h->frame_num - h->prev_frame_num);
885
886 if (h->prev_frame_num_offset < 0) {
887 h->prev_frame_num_offset += 256;
888 }
889 }
890
891 for(m=0; m<2; m++){
892 int i;
893 for(i=0; i<4; i++){
894 int j;
895 for(j=-1; j<4; j++)
896 h->ref_cache[m][scan8[0] + 8*i + j]= 1;
897 h->ref_cache[m][scan8[0] + 8*i + j]= PART_NOT_AVAILABLE;
898 }
f7a8c179
MN
899 }
900
8b82a956
MN
901 for (s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) {
902 for (s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
da3b9756
MM
903
904 if ( (s->gb.index + 7) >= s->gb.size_in_bits &&
905 ((s->gb.index & 7) == 0 || show_bits (&s->gb, (-s->gb.index & 7)) == 0)) {
906
907 s->gb.index = h->next_slice_index;
908 s->gb.size_in_bits = 8*buf_size;
909
910 if (svq3_decode_slice_header (h))
911 return -1;
912
913 /* TODO: support s->mb_skip_run */
914 }
915
916 mb_type = svq3_get_ue_golomb (&s->gb);
8b82a956
MN
917
918 if (s->pict_type == I_TYPE) {
919 mb_type += 8;
da3b9756
MM
920 } else if (s->pict_type == B_TYPE && mb_type >= 4) {
921 mb_type += 4;
8b82a956 922 }
da3b9756 923 if (mb_type > 33 || svq3_decode_mb (h, mb_type)) {
8b82a956
MN
924 fprintf (stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
925 return -1;
926 }
927
928 if (mb_type != 0) {
929 hl_decode_mb (h);
930 }
931 }
4c701ac8
MN
932
933 ff_draw_horiz_band(s, 16*s->mb_y, 16);
8b82a956
MN
934 }
935
8b82a956 936 MPV_frame_end(s);
da3b9756
MM
937
938 if (s->pict_type == B_TYPE || s->low_delay) {
939 *(AVFrame *) data = *(AVFrame *) &s->current_picture;
940 } else {
941 *(AVFrame *) data = *(AVFrame *) &s->last_picture;
942 }
943
944 avctx->frame_number = s->picture_number - 1;
945
946 /* dont output the last pic after seeking */
947 if (s->last_picture_ptr || s->low_delay) {
948 *data_size = sizeof(AVFrame);
949 }
950
8b82a956
MN
951 return buf_size;
952}
953
954
955AVCodec svq3_decoder = {
956 "svq3",
957 CODEC_TYPE_VIDEO,
958 CODEC_ID_SVQ3,
959 sizeof(H264Context),
960 decode_init,
961 NULL,
962 decode_end,
963 svq3_decode_frame,
4c701ac8 964 CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
8b82a956 965};