optimize
[libav.git] / libavcodec / svq3.c
CommitLineData
8b82a956
MN
1/*
2 * Copyright (c) 2003 The FFmpeg Project.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 *
19 * How to use this decoder:
20 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
89a79364
MM
21 * have stsd atoms to describe media trak properties. A stsd atom for a
22 * video trak contains 1 or more ImageDescription atoms. These atoms begin
23 * with the 4-byte length of the atom followed by the codec fourcc. Some
24 * decoders need information in this atom to operate correctly. Such
25 * is the case with SVQ3. In order to get the best use out of this decoder,
26 * the calling app must make the SVQ3 ImageDescription atom available
8b82a956
MN
27 * via the AVCodecContext's extradata[_size] field:
28 *
89a79364
MM
29 * AVCodecContext.extradata = pointer to ImageDescription, first characters
30 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
31 * AVCodecContext.extradata_size = size of ImageDescription atom memory
32 * buffer (which will be the same as the ImageDescription atom size field
33 * from the QT file, minus 4 bytes since the length is missing)
34 *
35 * You will know you have these parameters passed correctly when the decoder
36 * correctly decodes this file:
37 * ftp://ftp.mplayerhq.hu/MPlayer/samples/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
8b82a956
MN
38 *
39 */
40
41/**
42 * @file svq3.c
43 * svq3 decoder.
44 */
45
94d44f45
MN
46#define FULLPEL_MODE 1
47#define HALFPEL_MODE 2
48#define THIRDPEL_MODE 3
49
f7a8c179
MN
50/* dual scan (from some older h264 draft)
51 o-->o-->o o
52 | /|
53 o o o / o
54 | / | |/ |
55 o o o o
56 /
57 o-->o-->o-->o
58*/
8b82a956
MN
59static const uint8_t svq3_scan[16]={
60 0+0*4, 1+0*4, 2+0*4, 2+1*4,
61 2+2*4, 3+0*4, 3+1*4, 3+2*4,
62 0+1*4, 0+2*4, 1+1*4, 1+2*4,
63 0+3*4, 1+3*4, 2+3*4, 3+3*4,
64};
65
66static const uint8_t svq3_pred_0[25][2] = {
67 { 0, 0 },
68 { 1, 0 }, { 0, 1 },
69 { 0, 2 }, { 1, 1 }, { 2, 0 },
70 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
71 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
72 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
73 { 2, 4 }, { 3, 3 }, { 4, 2 },
74 { 4, 3 }, { 3, 4 },
75 { 4, 4 }
76};
77
78static const int8_t svq3_pred_1[6][6][5] = {
79 { { 2,-1,-1,-1,-1 }, { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 },
80 { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 }, { 1, 2,-1,-1,-1 } },
81 { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
82 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
83 { { 2, 0,-1,-1,-1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
84 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
85 { { 2, 0,-1,-1,-1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
86 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
87 { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
88 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
89 { { 0, 2,-1,-1,-1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
90 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
91};
92
93static const struct { uint8_t run; uint8_t level; } svq3_dct_tables[2][16] = {
94 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
95 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
96 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
97 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
98};
99
100static const uint32_t svq3_dequant_coeff[32] = {
101 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
102 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
103 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
104 61694, 68745, 77615, 89113,100253,109366,126635,141533
105};
106
107
108static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
109 const int qmul= svq3_dequant_coeff[qp];
110#define stride 16
111 int i;
112 int temp[16];
113 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
114 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
115
116 for(i=0; i<4; i++){
117 const int offset= y_offset[i];
118 const int z0= 13*(block[offset+stride*0] + block[offset+stride*4]);
119 const int z1= 13*(block[offset+stride*0] - block[offset+stride*4]);
120 const int z2= 7* block[offset+stride*1] - 17*block[offset+stride*5];
121 const int z3= 17* block[offset+stride*1] + 7*block[offset+stride*5];
122
123 temp[4*i+0]= z0+z3;
124 temp[4*i+1]= z1+z2;
125 temp[4*i+2]= z1-z2;
126 temp[4*i+3]= z0-z3;
127 }
128
129 for(i=0; i<4; i++){
130 const int offset= x_offset[i];
131 const int z0= 13*(temp[4*0+i] + temp[4*2+i]);
132 const int z1= 13*(temp[4*0+i] - temp[4*2+i]);
133 const int z2= 7* temp[4*1+i] - 17*temp[4*3+i];
134 const int z3= 17* temp[4*1+i] + 7*temp[4*3+i];
135
136 block[stride*0 +offset]= ((z0 + z3)*qmul + 0x80000)>>20;
137 block[stride*2 +offset]= ((z1 + z2)*qmul + 0x80000)>>20;
138 block[stride*8 +offset]= ((z1 - z2)*qmul + 0x80000)>>20;
139 block[stride*10+offset]= ((z0 - z3)*qmul + 0x80000)>>20;
140 }
141}
142#undef stride
143
144static void svq3_add_idct_c (uint8_t *dst, DCTELEM *block, int stride, int qp, int dc){
145 const int qmul= svq3_dequant_coeff[qp];
146 int i;
147 uint8_t *cm = cropTbl + MAX_NEG_CROP;
148
149 if (dc) {
150 dc = 13*13*((dc == 1) ? 1538*block[0] : ((qmul*(block[0] >> 3)) / 2));
151 block[0] = 0;
152 }
153
154 for (i=0; i < 4; i++) {
155 const int z0= 13*(block[0 + 4*i] + block[2 + 4*i]);
156 const int z1= 13*(block[0 + 4*i] - block[2 + 4*i]);
157 const int z2= 7* block[1 + 4*i] - 17*block[3 + 4*i];
158 const int z3= 17* block[1 + 4*i] + 7*block[3 + 4*i];
159
160 block[0 + 4*i]= z0 + z3;
161 block[1 + 4*i]= z1 + z2;
162 block[2 + 4*i]= z1 - z2;
163 block[3 + 4*i]= z0 - z3;
164 }
165
166 for (i=0; i < 4; i++) {
167 const int z0= 13*(block[i + 4*0] + block[i + 4*2]);
168 const int z1= 13*(block[i + 4*0] - block[i + 4*2]);
169 const int z2= 7* block[i + 4*1] - 17*block[i + 4*3];
170 const int z3= 17* block[i + 4*1] + 7*block[i + 4*3];
171 const int rr= (dc + 0x80000);
172
173 dst[i + stride*0]= cm[ dst[i + stride*0] + (((z0 + z3)*qmul + rr) >> 20) ];
174 dst[i + stride*1]= cm[ dst[i + stride*1] + (((z1 + z2)*qmul + rr) >> 20) ];
175 dst[i + stride*2]= cm[ dst[i + stride*2] + (((z1 - z2)*qmul + rr) >> 20) ];
176 dst[i + stride*3]= cm[ dst[i + stride*3] + (((z0 - z3)*qmul + rr) >> 20) ];
177 }
178}
179
180static void pred4x4_down_left_svq3_c(uint8_t *src, uint8_t *topright, int stride){
181 LOAD_TOP_EDGE
182 LOAD_LEFT_EDGE
183 const __attribute__((unused)) int unu0= t0;
184 const __attribute__((unused)) int unu1= l0;
185
186 src[0+0*stride]=(l1 + t1)>>1;
187 src[1+0*stride]=
188 src[0+1*stride]=(l2 + t2)>>1;
189 src[2+0*stride]=
190 src[1+1*stride]=
191 src[0+2*stride]=
192 src[3+0*stride]=
193 src[2+1*stride]=
194 src[1+2*stride]=
195 src[0+3*stride]=
196 src[3+1*stride]=
197 src[2+2*stride]=
198 src[1+3*stride]=
199 src[3+2*stride]=
200 src[2+3*stride]=
201 src[3+3*stride]=(l3 + t3)>>1;
202};
203
204static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
205 pred16x16_plane_compat_c(src, stride, 1);
206}
207
208static inline int svq3_decode_block (GetBitContext *gb, DCTELEM *block,
209 int index, const int type) {
210
211 static const uint8_t *const scan_patterns[4] =
212 { luma_dc_zigzag_scan, zigzag_scan, svq3_scan, chroma_dc_scan };
213
214 int run, level, sign, vlc, limit;
215 const int intra = (3 * type) >> 2;
216 const uint8_t *const scan = scan_patterns[type];
217
218 for (limit=(16 >> intra); index < 16; index=limit, limit+=8) {
219 for (; (vlc = svq3_get_ue_golomb (gb)) != 0; index++) {
220
221 if (vlc == INVALID_VLC)
222 return -1;
223
224 sign = (vlc & 0x1) - 1;
225 vlc = (vlc + 1) >> 1;
226
227 if (type == 3) {
228 if (vlc < 3) {
229 run = 0;
230 level = vlc;
231 } else if (vlc < 4) {
232 run = 1;
233 level = 1;
234 } else {
235 run = (vlc & 0x3);
236 level = ((vlc + 9) >> 2) - run;
237 }
238 } else {
239 if (vlc < 16) {
240 run = svq3_dct_tables[intra][vlc].run;
241 level = svq3_dct_tables[intra][vlc].level;
242 } else if (intra) {
243 run = (vlc & 0x7);
244 level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
245 } else {
246 run = (vlc & 0xF);
247 level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
248 }
249 }
250
251 if ((index += run) >= limit)
252 return -1;
253
254 block[scan[index]] = (level ^ sign) - sign;
255 }
256
257 if (type != 2) {
258 break;
259 }
260 }
261
262 return 0;
263}
264
265static void sixpel_mc_put (MpegEncContext *s,
266 uint8_t *src, uint8_t *dst, int stride,
267 int dxy, int width, int height) {
268 int i, j;
269
270 switch (dxy) {
271 case 6*0+0:
272 for (i=0; i < height; i++) {
273 memcpy (dst, src, width);
274 src += stride;
275 dst += stride;
276 }
277 break;
278 case 6*0+2:
279 for (i=0; i < height; i++) {
280 for (j=0; j < width; j++) {
281 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
282 }
283 src += stride;
284 dst += stride;
285 }
286 break;
287 case 6*0+3:
288 for (i=0; i < height; i++) {
289 for (j=0; j < width; j++) {
290 dst[j] = (src[j] + src[j+1] + 1) >> 1;
291 }
292 src += stride;
293 dst += stride;
294 }
295 break;
296 case 6*0+4:
297 for (i=0; i < height; i++) {
298 for (j=0; j < width; j++) {
299 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
300 }
301 src += stride;
302 dst += stride;
303 }
304 break;
305 case 6*2+0:
306 for (i=0; i < height; i++) {
307 for (j=0; j < width; j++) {
308 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
309 }
310 src += stride;
311 dst += stride;
312 }
313 break;
314 case 6*2+2:
315 for (i=0; i < height; i++) {
316 for (j=0; j < width; j++) {
317 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
318 }
319 src += stride;
320 dst += stride;
321 }
322 break;
323 case 6*2+4:
324 for (i=0; i < height; i++) {
325 for (j=0; j < width; j++) {
326 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
327 }
328 src += stride;
329 dst += stride;
330 }
331 break;
332 case 6*3+0:
333 for (i=0; i < height; i++) {
334 for (j=0; j < width; j++) {
335 dst[j] = (src[j] + src[j+stride]+1) >> 1;
336 }
337 src += stride;
338 dst += stride;
339 }
340 break;
341 case 6*3+3:
342 for (i=0; i < height; i++) {
343 for (j=0; j < width; j++) {
344 dst[j] = (src[j] + src[j+1] + src[j+stride] + src[j+stride+1] + 2) >> 2;
345 }
346 src += stride;
347 dst += stride;
348 }
349 break;
350 case 6*4+0:
351 for (i=0; i < height; i++) {
352 for (j=0; j < width; j++) {
353 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
354 }
355 src += stride;
356 dst += stride;
357 }
358 break;
359 case 6*4+2:
360 for (i=0; i < height; i++) {
361 for (j=0; j < width; j++) {
362 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
363 }
364 src += stride;
365 dst += stride;
366 }
367 break;
368 case 6*4+4:
369 for (i=0; i < height; i++) {
370 for (j=0; j < width; j++) {
371 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
372 }
373 src += stride;
374 dst += stride;
375 }
376 break;
377 }
378}
379
380static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y,
94d44f45 381 int width, int height, int mx, int my, int dxy) {
8b82a956
MN
382 uint8_t *src, *dest;
383 int i, emu = 0;
8b82a956 384
94d44f45
MN
385 mx += x;
386 my += y;
387
8b82a956
MN
388 if (mx < 0 || mx >= (s->width - width - 1) ||
389 my < 0 || my >= (s->height - height - 1)) {
390
391 if ((s->flags & CODEC_FLAG_EMU_EDGE)) {
392 emu = 1;
393 }
394
395 mx = clip (mx, -16, (s->width - width + 15));
396 my = clip (my, -16, (s->height - height + 15));
397 }
398
399 /* form component predictions */
400 dest = s->current_picture.data[0] + x + y*s->linesize;
401 src = s->last_picture.data[0] + mx + my*s->linesize;
402
403 if (emu) {
404 ff_emulated_edge_mc (s, src, s->linesize, (width + 1), (height + 1),
405 mx, my, s->width, s->height);
406 src = s->edge_emu_buffer;
407 }
408 sixpel_mc_put (s, src, dest, s->linesize, dxy, width, height);
409
410 if (!(s->flags & CODEC_FLAG_GRAY)) {
411 mx = (mx + (mx < (int) x)) >> 1;
412 my = (my + (my < (int) y)) >> 1;
413 width = (width >> 1);
414 height = (height >> 1);
415
416 for (i=1; i < 3; i++) {
417 dest = s->current_picture.data[i] + (x >> 1) + (y >> 1)*s->uvlinesize;
418 src = s->last_picture.data[i] + mx + my*s->uvlinesize;
419
420 if (emu) {
421 ff_emulated_edge_mc (s, src, s->uvlinesize, (width + 1), (height + 1),
422 mx, my, (s->width >> 1), (s->height >> 1));
423 src = s->edge_emu_buffer;
424 }
425 sixpel_mc_put (s, src, dest, s->uvlinesize, dxy, width, height);
426 }
427 }
428}
429
430static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
431 int cbp, dir, mode, mx, my, dx, dy, x, y, part_width, part_height;
432 int i, j, k, l, m;
433 uint32_t vlc;
434 int8_t *top, *left;
435 MpegEncContext *const s = (MpegEncContext *) h;
436 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
437 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
438
439 h->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
440 h->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
441 h->topright_samples_available = 0xFFFF;
442
443 if (mb_type == 0) { /* SKIP */
94d44f45 444 svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0);
8b82a956
MN
445
446 cbp = 0;
447 mb_type = MB_TYPE_SKIP;
448 } else if (mb_type < 8) { /* INTER */
449 if (h->thirdpel_flag && h->halfpel_flag == !get_bits (&s->gb, 1)) {
94d44f45 450 mode = THIRDPEL_MODE;
8b82a956 451 } else if (h->halfpel_flag && h->thirdpel_flag == !get_bits (&s->gb, 1)) {
94d44f45 452 mode = HALFPEL_MODE;
8b82a956 453 } else {
94d44f45 454 mode = FULLPEL_MODE;
8b82a956
MN
455 }
456
457 /* fill caches */
f7a8c179
MN
458 /* note ref_cache[0] should contain here:
459 ????????
460 ???11111
461 N??11111
462 N??11111
463 N??11111
464 N
465 */
466
8b82a956
MN
467 if (s->mb_x > 0) {
468 for (i=0; i < 4; i++) {
469 *(uint32_t *) h->mv_cache[0][scan8[0] - 1 + i*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - 1 + i*h->b_stride];
8b82a956
MN
470 }
471 } else {
472 for (i=0; i < 4; i++) {
473 *(uint32_t *) h->mv_cache[0][scan8[0] - 1 + i*8] = 0;
8b82a956
MN
474 }
475 }
476 if (s->mb_y > 0) {
477 memcpy (h->mv_cache[0][scan8[0] - 1*8], s->current_picture.motion_val[0][b_xy - h->b_stride], 4*2*sizeof(int16_t));
478 memset (&h->ref_cache[0][scan8[0] - 1*8], 1, 4);
479
480 if (s->mb_x < (s->mb_width - 1)) {
481 *(uint32_t *) h->mv_cache[0][scan8[0] + 4 - 1*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - h->b_stride + 4];
482 h->ref_cache[0][scan8[0] + 4 - 1*8] = 1;
f7a8c179
MN
483 }else
484 h->ref_cache[0][scan8[0] + 4 - 1*8] = PART_NOT_AVAILABLE;
8b82a956
MN
485 if (s->mb_x > 0) {
486 *(uint32_t *) h->mv_cache[0][scan8[0] - 1 - 1*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - h->b_stride - 1];
487 h->ref_cache[0][scan8[0] - 1 - 1*8] = 1;
f7a8c179
MN
488 }else
489 h->ref_cache[0][scan8[0] - 1 - 1*8] = PART_NOT_AVAILABLE;
490 }else
491 memset (&h->ref_cache[0][scan8[0] - 1*8 - 1], PART_NOT_AVAILABLE, 8);
8b82a956
MN
492
493 /* decode motion vector(s) and form prediction(s) */
494 part_width = ((mb_type & 5) == 5) ? 4 : 8 << (mb_type & 1);
495 part_height = 16 >> ((unsigned) mb_type / 3);
496
497 for (i=0; i < 16; i+=part_height) {
498 for (j=0; j < 16; j+=part_width) {
94d44f45 499 int dxy;
8b82a956
MN
500 x = 16*s->mb_x + j;
501 y = 16*s->mb_y + i;
502 k = ((j>>2)&1) + ((i>>1)&2) + ((j>>1)&4) + (i&8);
503
504 pred_motion (h, k, (part_width >> 2), 0, 1, &mx, &my);
505
506 /* clip motion vector prediction to frame border */
507 mx = clip (mx, -6*x, 6*(s->width - part_width - x));
508 my = clip (my, -6*y, 6*(s->height - part_height - y));
509
510 /* get motion vector differential */
511 dy = svq3_get_se_golomb (&s->gb);
512 dx = svq3_get_se_golomb (&s->gb);
513
514 if (dx == INVALID_VLC || dy == INVALID_VLC) {
515 return -1;
516 }
8b82a956 517 /* compute motion vector */
94d44f45
MN
518 if (mode == THIRDPEL_MODE) {
519 int fx, fy;
520 mx = ((mx + 1)>>1) + dx;
521 my = ((my + 1)>>1) + dy;
522 fx= ((unsigned)(mx + 0x3000))/3 - 0x1000;
523 fy= ((unsigned)(my + 0x3000))/3 - 0x1000;
524 dxy= 2*(mx - 3*fx) + 2*6*(my - 3*fy);
525
526 svq3_mc_dir_part (s, x, y, part_width, part_height, fx, fy, dxy);
527 mx += mx;
528 my += my;
529 } else if (mode == HALFPEL_MODE) {
530 mx = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000;
531 my = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000;
532 dxy= 3*(mx&1) + 6*3*(my&1);
533
534 svq3_mc_dir_part (s, x, y, part_width, part_height, mx>>1, my>>1, dxy);
535 mx *= 3;
536 my *= 3;
537 } else {
538 assert(mode == FULLPEL_MODE);
539 mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000;
540 my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000;
541
542 svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my, 0);
543 mx *= 6;
544 my *= 6;
8b82a956
MN
545 }
546
94d44f45 547 /* update mv_cache */
f7a8c179 548 fill_rectangle(h->mv_cache[0][scan8[k]], part_width>>2, part_height>>2, 8, (mx&0xFFFF)+(my<<16), 4);
8b82a956
MN
549 }
550 }
551
552 for (i=0; i < 4; i++) {
553 memcpy (s->current_picture.motion_val[0][b_xy + i*h->b_stride], h->mv_cache[0][scan8[0] + 8*i], 4*2*sizeof(int16_t));
554 }
555
556 if ((vlc = svq3_get_ue_golomb (&s->gb)) >= 48)
557 return -1;
558
559 cbp = golomb_to_inter_cbp[vlc];
560 mb_type = MB_TYPE_16x16;
561 } else if (mb_type == 8) { /* INTRA4x4 */
562 memset (h->intra4x4_pred_mode_cache, -1, 8*5*sizeof(int8_t));
563
564 if (s->mb_x > 0) {
565 for (i=0; i < 4; i++) {
566 h->intra4x4_pred_mode_cache[scan8[0] - 1 + i*8] = h->intra4x4_pred_mode[mb_xy - 1][i];
567 }
568 }
569 if (s->mb_y > 0) {
570 h->intra4x4_pred_mode_cache[4+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][4];
571 h->intra4x4_pred_mode_cache[5+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][5];
572 h->intra4x4_pred_mode_cache[6+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][6];
573 h->intra4x4_pred_mode_cache[7+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][3];
574 }
575
576 /* decode prediction codes for luma blocks */
577 for (i=0; i < 16; i+=2) {
578 vlc = svq3_get_ue_golomb (&s->gb);
579
580 if (vlc >= 25)
581 return -1;
582
583 left = &h->intra4x4_pred_mode_cache[scan8[i] - 1];
584 top = &h->intra4x4_pred_mode_cache[scan8[i] - 8];
585
586 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
587 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
588
589 if (left[1] == -1 || left[2] == -1)
590 return -1;
591 }
592
593 write_back_intra_pred_mode (h);
594 check_intra4x4_pred_mode (h);
595
596 if ((vlc = svq3_get_ue_golomb (&s->gb)) >= 48)
597 return -1;
598
599 cbp = golomb_to_intra4x4_cbp[vlc];
600 mb_type = MB_TYPE_INTRA4x4;
601 } else { /* INTRA16x16 */
602 dir = i_mb_type_info[mb_type - 8].pred_mode;
603 dir = (dir >> 1) ^ 3*(dir & 1) ^ 1;
604
605 if ((h->intra16x16_pred_mode = check_intra_pred_mode (h, dir)) == -1)
606 return -1;
607
608 cbp = i_mb_type_info[mb_type - 8].cbp;
609 mb_type = MB_TYPE_INTRA16x16;
610 }
611
612 if (!IS_INTER(mb_type) && s->pict_type != I_TYPE) {
613 for (i=0; i < 4; i++) {
614 memset (s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
615 }
616 }
617 if (!IS_INTRA4x4(mb_type)) {
618 memset (h->intra4x4_pred_mode[mb_xy], DC_PRED, 8);
619 }
620 if (!IS_SKIP(mb_type)) {
f7a8c179
MN
621 memset (h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t));
622 s->dsp.clear_blocks(h->mb);
8b82a956
MN
623 }
624
625 if (IS_INTRA16x16(mb_type) || (s->pict_type != I_TYPE && s->adaptive_quant && cbp)) {
626 s->qscale += svq3_get_se_golomb (&s->gb);
627
628 if (s->qscale > 31)
629 return -1;
630 }
631 if (IS_INTRA16x16(mb_type)) {
632 if (svq3_decode_block (&s->gb, h->mb, 0, 0))
633 return -1;
634 }
635
636 if (!IS_SKIP(mb_type) && cbp) {
637 l = IS_INTRA16x16(mb_type) ? 1 : 0;
638 m = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
639
640 for (i=0; i < 4; i++) {
641 if ((cbp & (1 << i))) {
642 for (j=0; j < 4; j++) {
643 k = l ? ((j&1) + 2*(i&1) + 2*(j&2) + 4*(i&2)) : (4*i + j);
644 h->non_zero_count_cache[ scan8[k] ] = 1;
645
646 if (svq3_decode_block (&s->gb, &h->mb[16*k], l, m))
647 return -1;
648 }
649 }
650 }
651
652 if ((cbp & 0x30)) {
653 for (i=0; i < 2; ++i) {
654 if (svq3_decode_block (&s->gb, &h->mb[16*(16 + 4*i)], 0, 3))
655 return -1;
656 }
657
658 if ((cbp & 0x20)) {
659 for (i=0; i < 8; i++) {
660 h->non_zero_count_cache[ scan8[16+i] ] = 1;
661
662 if (svq3_decode_block (&s->gb, &h->mb[16*(16 + i)], 1, 1))
663 return -1;
664 }
665 }
666 }
667 }
668
669 s->current_picture.mb_type[mb_xy] = mb_type;
670
671 if (IS_INTRA(mb_type)) {
672 h->chroma_pred_mode = check_intra_pred_mode (h, DC_PRED8x8);
673 }
674
675 return 0;
676}
677
678static int svq3_decode_frame (AVCodecContext *avctx,
679 void *data, int *data_size,
680 uint8_t *buf, int buf_size) {
681 MpegEncContext *const s = avctx->priv_data;
682 H264Context *const h = avctx->priv_data;
683 int i;
684
685 s->flags = avctx->flags;
686
687 if (!s->context_initialized) {
688 s->width = (avctx->width + 15) & ~15;
689 s->height = (avctx->height + 15) & ~15;
690 h->b_stride = (s->width >> 2);
691 h->pred4x4[DIAG_DOWN_LEFT_PRED] = pred4x4_down_left_svq3_c;
692 h->pred16x16[PLANE_PRED8x8] = pred16x16_plane_svq3_c;
693 h->halfpel_flag = 1;
694 h->thirdpel_flag = 1;
695 h->chroma_qp = 4;
696
697 if (MPV_common_init (s) < 0)
698 return -1;
699
700 alloc_tables (h);
701 }
89a79364
MM
702 if (avctx->extradata && avctx->extradata_size >= 0x63
703 && !memcmp (avctx->extradata, "SVQ3", 4)) {
8b82a956 704
89a79364 705 uint8_t *stsd = (uint8_t *) avctx->extradata + 0x62;
8b82a956 706
89a79364 707 if ((*stsd >> 5) != 7 || avctx->extradata_size >= 0x66) {
8b82a956
MN
708
709 if ((*stsd >> 5) == 7) {
710 stsd += 3; /* skip width, height (12 bits each) */
711 }
712
713 h->halfpel_flag = (*stsd >> 4) & 1;
714 h->thirdpel_flag = (*stsd >> 3) & 1;
715 }
716 }
717
718 if ((buf[0] & 0x9F) != 1) {
719 /* TODO: what? */
720 fprintf (stderr, "unsupported header (%02X)\n", buf[0]);
721 return -1;
722 } else {
723 int length = (buf[0] >> 5) & 3;
724 int offset = 0;
725
726 for (i=0; i < length; i++) {
727 offset = (offset << 8) | buf[i + 1];
728 }
729
730 if (buf_size < (offset + length + 1) || length == 0)
731 return -1;
732
733 memcpy (&buf[2], &buf[offset + 2], (length - 1));
734 }
735
736 init_get_bits (&s->gb, &buf[2], 8*(buf_size - 2));
737
738 if ((i = svq3_get_ue_golomb (&s->gb)) == INVALID_VLC || i >= 3)
739 return -1;
740
741 s->pict_type = golomb_to_pict_type[i];
742
743 /* unknown fields */
744 get_bits (&s->gb, 1);
745 get_bits (&s->gb, 8);
746
747 s->qscale = get_bits (&s->gb, 5);
748 s->adaptive_quant = get_bits (&s->gb, 1);
749
750 /* unknown fields */
751 get_bits (&s->gb, 1);
752 get_bits (&s->gb, 1);
753 get_bits (&s->gb, 2);
754
755 while (get_bits (&s->gb, 1)) {
756 get_bits (&s->gb, 8);
757 }
4704097a
MN
758
759 if(avctx->debug&FF_DEBUG_PICT_INFO){
760 printf("%c hpel:%d, tpel:%d aqp:%d qp:%d\n",
d8085ea7 761 av_get_pict_type_char(s->pict_type), h->halfpel_flag, h->thirdpel_flag,
4704097a
MN
762 s->adaptive_quant, s->qscale
763 );
764 }
8b82a956
MN
765
766 /* B-frames are not supported */
767 if (s->pict_type == B_TYPE/* && avctx->hurry_up*/)
768 return buf_size;
769
770 frame_start (h);
771
f7a8c179
MN
772 for(i=0; i<4; i++){
773 int j;
774 for(j=-1; j<4; j++)
775 h->ref_cache[0][scan8[0] + 8*i + j]= 1;
776 h->ref_cache[0][scan8[0] + 8*i + j]= PART_NOT_AVAILABLE;
777 }
778
8b82a956
MN
779 for (s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) {
780 for (s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
781 int mb_type = svq3_get_ue_golomb (&s->gb);
782
783 if (s->pict_type == I_TYPE) {
784 mb_type += 8;
785 }
786 if (mb_type > 32 || svq3_decode_mb (h, mb_type)) {
787 fprintf (stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
788 return -1;
789 }
790
791 if (mb_type != 0) {
792 hl_decode_mb (h);
793 }
794 }
795 }
796
797 *(AVFrame *) data = *(AVFrame *) &s->current_picture;
798 *data_size = sizeof(AVFrame);
799
800 MPV_frame_end(s);
801
802 return buf_size;
803}
804
805
806AVCodec svq3_decoder = {
807 "svq3",
808 CODEC_TYPE_VIDEO,
809 CODEC_ID_SVQ3,
810 sizeof(H264Context),
811 decode_init,
812 NULL,
813 decode_end,
814 svq3_decode_frame,
815 CODEC_CAP_DR1,
816};