3785cca2306e2f120c000d3690716ccb6385e90a
[libav.git] / libavcodec / svq3.c
1 /*
2 * Copyright (c) 2003 The Libav Project
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
31 *
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
37 *
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 * http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41 */
42
43 #include <inttypes.h>
44
45 #include "libavutil/attributes.h"
46 #include "internal.h"
47 #include "avcodec.h"
48 #include "mpegutils.h"
49 #include "h264.h"
50 #include "h264_mvpred.h"
51 #include "h264data.h"
52 #include "golomb.h"
53 #include "hpeldsp.h"
54 #include "mathops.h"
55 #include "rectangle.h"
56 #include "tpeldsp.h"
57
58 #if CONFIG_ZLIB
59 #include <zlib.h>
60 #endif
61
62 #include "svq1.h"
63
64 /**
65 * @file
66 * svq3 decoder.
67 */
68
69 typedef struct SVQ3Context {
70 H264Context h;
71
72 H264DSPContext h264dsp;
73 H264PredContext hpc;
74 HpelDSPContext hdsp;
75 TpelDSPContext tdsp;
76 VideoDSPContext vdsp;
77
78 H264Picture *cur_pic;
79 H264Picture *next_pic;
80 H264Picture *last_pic;
81 GetBitContext gb;
82 uint8_t *slice_buf;
83 int slice_size;
84 int halfpel_flag;
85 int thirdpel_flag;
86 int unknown_flag;
87 uint32_t watermark_key;
88 int adaptive_quant;
89 int next_p_frame_damaged;
90 int h_edge_pos;
91 int v_edge_pos;
92 int last_frame_output;
93
94 int mb_x, mb_y;
95 int mb_xy;
96
97 int chroma_pred_mode;
98 int intra16x16_pred_mode;
99
100 int8_t intra4x4_pred_mode_cache[5 * 8];
101 int8_t (*intra4x4_pred_mode);
102
103 unsigned int top_samples_available;
104 unsigned int topright_samples_available;
105 unsigned int left_samples_available;
106 } SVQ3Context;
107
108 #define FULLPEL_MODE 1
109 #define HALFPEL_MODE 2
110 #define THIRDPEL_MODE 3
111 #define PREDICT_MODE 4
112
113 /* dual scan (from some older h264 draft)
114 * o-->o-->o o
115 * | /|
116 * o o o / o
117 * | / | |/ |
118 * o o o o
119 * /
120 * o-->o-->o-->o
121 */
122 static const uint8_t svq3_scan[16] = {
123 0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
124 2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
125 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
126 0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
127 };
128
129 static const uint8_t luma_dc_zigzag_scan[16] = {
130 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
131 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
132 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
133 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
134 };
135
136 static const uint8_t svq3_pred_0[25][2] = {
137 { 0, 0 },
138 { 1, 0 }, { 0, 1 },
139 { 0, 2 }, { 1, 1 }, { 2, 0 },
140 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
141 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
142 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
143 { 2, 4 }, { 3, 3 }, { 4, 2 },
144 { 4, 3 }, { 3, 4 },
145 { 4, 4 }
146 };
147
148 static const int8_t svq3_pred_1[6][6][5] = {
149 { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
150 { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
151 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
152 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
153 { { 2, 0, -1, -1, -1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
154 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
155 { { 2, 0, -1, -1, -1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
156 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
157 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
158 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
159 { { 0, 2, -1, -1, -1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
160 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
161 };
162
163 static const struct {
164 uint8_t run;
165 uint8_t level;
166 } svq3_dct_tables[2][16] = {
167 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
168 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
169 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
170 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
171 };
172
173 static const uint32_t svq3_dequant_coeff[32] = {
174 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
175 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
176 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
177 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
178 };
179
180 static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
181 {
182 const int qmul = svq3_dequant_coeff[qp];
183 #define stride 16
184 int i;
185 int temp[16];
186 static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
187
188 for (i = 0; i < 4; i++) {
189 const int z0 = 13 * (input[4 * i + 0] + input[4 * i + 2]);
190 const int z1 = 13 * (input[4 * i + 0] - input[4 * i + 2]);
191 const int z2 = 7 * input[4 * i + 1] - 17 * input[4 * i + 3];
192 const int z3 = 17 * input[4 * i + 1] + 7 * input[4 * i + 3];
193
194 temp[4 * i + 0] = z0 + z3;
195 temp[4 * i + 1] = z1 + z2;
196 temp[4 * i + 2] = z1 - z2;
197 temp[4 * i + 3] = z0 - z3;
198 }
199
200 for (i = 0; i < 4; i++) {
201 const int offset = x_offset[i];
202 const int z0 = 13 * (temp[4 * 0 + i] + temp[4 * 2 + i]);
203 const int z1 = 13 * (temp[4 * 0 + i] - temp[4 * 2 + i]);
204 const int z2 = 7 * temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
205 const int z3 = 17 * temp[4 * 1 + i] + 7 * temp[4 * 3 + i];
206
207 output[stride * 0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
208 output[stride * 2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
209 output[stride * 8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
210 output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
211 }
212 }
213 #undef stride
214
215 static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
216 int stride, int qp, int dc)
217 {
218 const int qmul = svq3_dequant_coeff[qp];
219 int i;
220
221 if (dc) {
222 dc = 13 * 13 * (dc == 1 ? 1538 * block[0]
223 : qmul * (block[0] >> 3) / 2);
224 block[0] = 0;
225 }
226
227 for (i = 0; i < 4; i++) {
228 const int z0 = 13 * (block[0 + 4 * i] + block[2 + 4 * i]);
229 const int z1 = 13 * (block[0 + 4 * i] - block[2 + 4 * i]);
230 const int z2 = 7 * block[1 + 4 * i] - 17 * block[3 + 4 * i];
231 const int z3 = 17 * block[1 + 4 * i] + 7 * block[3 + 4 * i];
232
233 block[0 + 4 * i] = z0 + z3;
234 block[1 + 4 * i] = z1 + z2;
235 block[2 + 4 * i] = z1 - z2;
236 block[3 + 4 * i] = z0 - z3;
237 }
238
239 for (i = 0; i < 4; i++) {
240 const int z0 = 13 * (block[i + 4 * 0] + block[i + 4 * 2]);
241 const int z1 = 13 * (block[i + 4 * 0] - block[i + 4 * 2]);
242 const int z2 = 7 * block[i + 4 * 1] - 17 * block[i + 4 * 3];
243 const int z3 = 17 * block[i + 4 * 1] + 7 * block[i + 4 * 3];
244 const int rr = (dc + 0x80000);
245
246 dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
247 dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
248 dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
249 dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
250 }
251
252 memset(block, 0, 16 * sizeof(int16_t));
253 }
254
255 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
256 int index, const int type)
257 {
258 static const uint8_t *const scan_patterns[4] = {
259 luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
260 };
261
262 int run, level, limit;
263 unsigned vlc;
264 const int intra = 3 * type >> 2;
265 const uint8_t *const scan = scan_patterns[type];
266
267 for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
268 for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
269 int sign = (vlc & 1) ? 0 : -1;
270 vlc = vlc + 1 >> 1;
271
272 if (type == 3) {
273 if (vlc < 3) {
274 run = 0;
275 level = vlc;
276 } else if (vlc < 4) {
277 run = 1;
278 level = 1;
279 } else {
280 run = vlc & 0x3;
281 level = (vlc + 9 >> 2) - run;
282 }
283 } else {
284 if (vlc < 16) {
285 run = svq3_dct_tables[intra][vlc].run;
286 level = svq3_dct_tables[intra][vlc].level;
287 } else if (intra) {
288 run = vlc & 0x7;
289 level = (vlc >> 3) +
290 ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
291 } else {
292 run = vlc & 0xF;
293 level = (vlc >> 4) +
294 ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
295 }
296 }
297
298 if ((index += run) >= limit)
299 return -1;
300
301 block[scan[index]] = (level ^ sign) - sign;
302 }
303
304 if (type != 2) {
305 break;
306 }
307 }
308
309 return 0;
310 }
311
312 static inline void svq3_mc_dir_part(SVQ3Context *s,
313 int x, int y, int width, int height,
314 int mx, int my, int dxy,
315 int thirdpel, int dir, int avg)
316 {
317 H264Context *h = &s->h;
318 H264SliceContext *sl = &h->slice_ctx[0];
319 const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
320 uint8_t *src, *dest;
321 int i, emu = 0;
322 int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
323
324 mx += x;
325 my += y;
326
327 if (mx < 0 || mx >= s->h_edge_pos - width - 1 ||
328 my < 0 || my >= s->v_edge_pos - height - 1) {
329 emu = 1;
330 mx = av_clip(mx, -16, s->h_edge_pos - width + 15);
331 my = av_clip(my, -16, s->v_edge_pos - height + 15);
332 }
333
334 /* form component predictions */
335 dest = h->cur_pic.f->data[0] + x + y * sl->linesize;
336 src = pic->f->data[0] + mx + my * sl->linesize;
337
338 if (emu) {
339 s->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src,
340 sl->linesize, sl->linesize,
341 width + 1, height + 1,
342 mx, my, s->h_edge_pos, s->v_edge_pos);
343 src = sl->edge_emu_buffer;
344 }
345 if (thirdpel)
346 (avg ? s->tdsp.avg_tpel_pixels_tab
347 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, sl->linesize,
348 width, height);
349 else
350 (avg ? s->hdsp.avg_pixels_tab
351 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, sl->linesize,
352 height);
353
354 if (!(h->flags & AV_CODEC_FLAG_GRAY)) {
355 mx = mx + (mx < (int) x) >> 1;
356 my = my + (my < (int) y) >> 1;
357 width = width >> 1;
358 height = height >> 1;
359 blocksize++;
360
361 for (i = 1; i < 3; i++) {
362 dest = h->cur_pic.f->data[i] + (x >> 1) + (y >> 1) * sl->uvlinesize;
363 src = pic->f->data[i] + mx + my * sl->uvlinesize;
364
365 if (emu) {
366 s->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src,
367 sl->uvlinesize, sl->uvlinesize,
368 width + 1, height + 1,
369 mx, my, (s->h_edge_pos >> 1),
370 s->v_edge_pos >> 1);
371 src = sl->edge_emu_buffer;
372 }
373 if (thirdpel)
374 (avg ? s->tdsp.avg_tpel_pixels_tab
375 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
376 sl->uvlinesize,
377 width, height);
378 else
379 (avg ? s->hdsp.avg_pixels_tab
380 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
381 sl->uvlinesize,
382 height);
383 }
384 }
385 }
386
387 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
388 int dir, int avg)
389 {
390 int i, j, k, mx, my, dx, dy, x, y;
391 H264Context *h = &s->h;
392 H264SliceContext *sl = &h->slice_ctx[0];
393 const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
394 const int part_height = 16 >> ((unsigned)(size + 1) / 3);
395 const int extra_width = (mode == PREDICT_MODE) ? -16 * 6 : 0;
396 const int h_edge_pos = 6 * (s->h_edge_pos - part_width) - extra_width;
397 const int v_edge_pos = 6 * (s->v_edge_pos - part_height) - extra_width;
398
399 for (i = 0; i < 16; i += part_height)
400 for (j = 0; j < 16; j += part_width) {
401 const int b_xy = (4 * s->mb_x + (j >> 2)) +
402 (4 * s->mb_y + (i >> 2)) * h->b_stride;
403 int dxy;
404 x = 16 * s->mb_x + j;
405 y = 16 * s->mb_y + i;
406 k = (j >> 2 & 1) + (i >> 1 & 2) +
407 (j >> 1 & 4) + (i & 8);
408
409 if (mode != PREDICT_MODE) {
410 pred_motion(h, sl, k, part_width >> 2, dir, 1, &mx, &my);
411 } else {
412 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
413 my = s->next_pic->motion_val[0][b_xy][1] << 1;
414
415 if (dir == 0) {
416 mx = mx * h->frame_num_offset /
417 h->prev_frame_num_offset + 1 >> 1;
418 my = my * h->frame_num_offset /
419 h->prev_frame_num_offset + 1 >> 1;
420 } else {
421 mx = mx * (h->frame_num_offset - h->prev_frame_num_offset) /
422 h->prev_frame_num_offset + 1 >> 1;
423 my = my * (h->frame_num_offset - h->prev_frame_num_offset) /
424 h->prev_frame_num_offset + 1 >> 1;
425 }
426 }
427
428 /* clip motion vector prediction to frame border */
429 mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
430 my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
431
432 /* get (optional) motion vector differential */
433 if (mode == PREDICT_MODE) {
434 dx = dy = 0;
435 } else {
436 dy = svq3_get_se_golomb(&h->gb);
437 dx = svq3_get_se_golomb(&h->gb);
438
439 if (dx == INVALID_VLC || dy == INVALID_VLC) {
440 av_log(h->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
441 return -1;
442 }
443 }
444
445 /* compute motion vector */
446 if (mode == THIRDPEL_MODE) {
447 int fx, fy;
448 mx = (mx + 1 >> 1) + dx;
449 my = (my + 1 >> 1) + dy;
450 fx = (unsigned)(mx + 0x3000) / 3 - 0x1000;
451 fy = (unsigned)(my + 0x3000) / 3 - 0x1000;
452 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
453
454 svq3_mc_dir_part(s, x, y, part_width, part_height,
455 fx, fy, dxy, 1, dir, avg);
456 mx += mx;
457 my += my;
458 } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
459 mx = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
460 my = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
461 dxy = (mx & 1) + 2 * (my & 1);
462
463 svq3_mc_dir_part(s, x, y, part_width, part_height,
464 mx >> 1, my >> 1, dxy, 0, dir, avg);
465 mx *= 3;
466 my *= 3;
467 } else {
468 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
469 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
470
471 svq3_mc_dir_part(s, x, y, part_width, part_height,
472 mx, my, 0, 0, dir, avg);
473 mx *= 6;
474 my *= 6;
475 }
476
477 /* update mv_cache */
478 if (mode != PREDICT_MODE) {
479 int32_t mv = pack16to32(mx, my);
480
481 if (part_height == 8 && i < 8) {
482 AV_WN32A(sl->mv_cache[dir][scan8[k] + 1 * 8], mv);
483
484 if (part_width == 8 && j < 8)
485 AV_WN32A(sl->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
486 }
487 if (part_width == 8 && j < 8)
488 AV_WN32A(sl->mv_cache[dir][scan8[k] + 1], mv);
489 if (part_width == 4 || part_height == 4)
490 AV_WN32A(sl->mv_cache[dir][scan8[k]], mv);
491 }
492
493 /* write back motion vectors */
494 fill_rectangle(h->cur_pic.motion_val[dir][b_xy],
495 part_width >> 2, part_height >> 2, h->b_stride,
496 pack16to32(mx, my), 4);
497 }
498
499 return 0;
500 }
501
502 static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl,
503 int mb_type, const int *block_offset,
504 int linesize, uint8_t *dest_y)
505 {
506 int i;
507 if (!IS_INTRA4x4(mb_type)) {
508 for (i = 0; i < 16; i++)
509 if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
510 uint8_t *const ptr = dest_y + block_offset[i];
511 svq3_add_idct_c(ptr, sl->mb + i * 16, linesize,
512 sl->qscale, IS_INTRA(mb_type) ? 1 : 0);
513 }
514 }
515 }
516
517 static av_always_inline int dctcoef_get(int16_t *mb, int index)
518 {
519 return AV_RN16A(mb + index);
520 }
521
522 static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
523 const H264Context *h,
524 H264SliceContext *sl,
525 int mb_type,
526 const int *block_offset,
527 int linesize,
528 uint8_t *dest_y)
529 {
530 int i;
531 int qscale = sl->qscale;
532
533 if (IS_INTRA4x4(mb_type)) {
534 for (i = 0; i < 16; i++) {
535 uint8_t *const ptr = dest_y + block_offset[i];
536 const int dir = s->intra4x4_pred_mode_cache[scan8[i]];
537
538 uint8_t *topright;
539 int nnz, tr;
540 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
541 const int topright_avail = (s->topright_samples_available << i) & 0x8000;
542 assert(s->mb_y || linesize <= block_offset[i]);
543 if (!topright_avail) {
544 tr = ptr[3 - linesize] * 0x01010101u;
545 topright = (uint8_t *)&tr;
546 } else
547 topright = ptr + 4 - linesize;
548 } else
549 topright = NULL;
550
551 s->hpc.pred4x4[dir](ptr, topright, linesize);
552 nnz = sl->non_zero_count_cache[scan8[i]];
553 if (nnz) {
554 svq3_add_idct_c(ptr, sl->mb + i * 16, linesize, qscale, 0);
555 }
556 }
557 } else {
558 s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
559 svq3_luma_dc_dequant_idct_c(sl->mb, sl->mb_luma_dc[0], qscale);
560 }
561 }
562
563 static void hl_decode_mb(SVQ3Context *s, const H264Context *h, H264SliceContext *sl)
564 {
565 const int mb_x = s->mb_x;
566 const int mb_y = s->mb_y;
567 const int mb_xy = s->mb_xy;
568 const int mb_type = h->cur_pic.mb_type[mb_xy];
569 uint8_t *dest_y, *dest_cb, *dest_cr;
570 int linesize, uvlinesize;
571 int i, j;
572 const int *block_offset = &h->block_offset[0];
573 const int block_h = 16 >> h->chroma_y_shift;
574
575 dest_y = h->cur_pic.f->data[0] + (mb_x + mb_y * sl->linesize) * 16;
576 dest_cb = h->cur_pic.f->data[1] + mb_x * 8 + mb_y * sl->uvlinesize * block_h;
577 dest_cr = h->cur_pic.f->data[2] + mb_x * 8 + mb_y * sl->uvlinesize * block_h;
578
579 s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * sl->linesize + 64, sl->linesize, 4);
580 s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * sl->uvlinesize + 64, dest_cr - dest_cb, 2);
581
582 h->list_counts[mb_xy] = sl->list_count;
583
584 linesize = sl->mb_linesize = sl->linesize;
585 uvlinesize = sl->mb_uvlinesize = sl->uvlinesize;
586
587 if (IS_INTRA(mb_type)) {
588 s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
589 s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
590
591 hl_decode_mb_predict_luma(s, h, sl, mb_type, block_offset, linesize, dest_y);
592 }
593
594 hl_decode_mb_idct_luma(h, sl, mb_type, block_offset, linesize, dest_y);
595
596 if (sl->cbp & 0x30) {
597 uint8_t *dest[2] = { dest_cb, dest_cr };
598 s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 1,
599 h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][sl->chroma_qp[0]][0]);
600 s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 2,
601 h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][sl->chroma_qp[1]][0]);
602 for (j = 1; j < 3; j++) {
603 for (i = j * 16; i < j * 16 + 4; i++)
604 if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
605 uint8_t *const ptr = dest[j - 1] + block_offset[i];
606 svq3_add_idct_c(ptr, sl->mb + i * 16,
607 uvlinesize, ff_h264_chroma_qp[0][sl->qscale + 12] - 12, 2);
608 }
609 }
610 }
611 }
612
613 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
614 {
615 H264Context *h = &s->h;
616 H264SliceContext *sl = &h->slice_ctx[0];
617 int i, j, k, m, dir, mode;
618 int cbp = 0;
619 uint32_t vlc;
620 int8_t *top, *left;
621 const int mb_xy = s->mb_xy;
622 const int b_xy = 4 * s->mb_x + 4 * s->mb_y * h->b_stride;
623
624 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
625 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
626 s->topright_samples_available = 0xFFFF;
627
628 if (mb_type == 0) { /* SKIP */
629 if (h->pict_type == AV_PICTURE_TYPE_P ||
630 s->next_pic->mb_type[mb_xy] == -1) {
631 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
632 0, 0, 0, 0, 0, 0);
633
634 if (h->pict_type == AV_PICTURE_TYPE_B)
635 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
636 0, 0, 0, 0, 1, 1);
637
638 mb_type = MB_TYPE_SKIP;
639 } else {
640 mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
641 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
642 return -1;
643 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
644 return -1;
645
646 mb_type = MB_TYPE_16x16;
647 }
648 } else if (mb_type < 8) { /* INTER */
649 if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&h->gb))
650 mode = THIRDPEL_MODE;
651 else if (s->halfpel_flag &&
652 s->thirdpel_flag == !get_bits1(&h->gb))
653 mode = HALFPEL_MODE;
654 else
655 mode = FULLPEL_MODE;
656
657 /* fill caches */
658 /* note ref_cache should contain here:
659 * ????????
660 * ???11111
661 * N??11111
662 * N??11111
663 * N??11111
664 */
665
666 for (m = 0; m < 2; m++) {
667 if (s->mb_x > 0 && s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6] != -1) {
668 for (i = 0; i < 4; i++)
669 AV_COPY32(sl->mv_cache[m][scan8[0] - 1 + i * 8],
670 h->cur_pic.motion_val[m][b_xy - 1 + i * h->b_stride]);
671 } else {
672 for (i = 0; i < 4; i++)
673 AV_ZERO32(sl->mv_cache[m][scan8[0] - 1 + i * 8]);
674 }
675 if (s->mb_y > 0) {
676 memcpy(sl->mv_cache[m][scan8[0] - 1 * 8],
677 h->cur_pic.motion_val[m][b_xy - h->b_stride],
678 4 * 2 * sizeof(int16_t));
679 memset(&sl->ref_cache[m][scan8[0] - 1 * 8],
680 (s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
681
682 if (s->mb_x < h->mb_width - 1) {
683 AV_COPY32(sl->mv_cache[m][scan8[0] + 4 - 1 * 8],
684 h->cur_pic.motion_val[m][b_xy - h->b_stride + 4]);
685 sl->ref_cache[m][scan8[0] + 4 - 1 * 8] =
686 (s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride + 1] + 6] == -1 ||
687 s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
688 } else
689 sl->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
690 if (s->mb_x > 0) {
691 AV_COPY32(sl->mv_cache[m][scan8[0] - 1 - 1 * 8],
692 h->cur_pic.motion_val[m][b_xy - h->b_stride - 1]);
693 sl->ref_cache[m][scan8[0] - 1 - 1 * 8] =
694 (s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
695 } else
696 sl->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
697 } else
698 memset(&sl->ref_cache[m][scan8[0] - 1 * 8 - 1],
699 PART_NOT_AVAILABLE, 8);
700
701 if (h->pict_type != AV_PICTURE_TYPE_B)
702 break;
703 }
704
705 /* decode motion vector(s) and form prediction(s) */
706 if (h->pict_type == AV_PICTURE_TYPE_P) {
707 if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
708 return -1;
709 } else { /* AV_PICTURE_TYPE_B */
710 if (mb_type != 2) {
711 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
712 return -1;
713 } else {
714 for (i = 0; i < 4; i++)
715 memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
716 0, 4 * 2 * sizeof(int16_t));
717 }
718 if (mb_type != 1) {
719 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
720 return -1;
721 } else {
722 for (i = 0; i < 4; i++)
723 memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
724 0, 4 * 2 * sizeof(int16_t));
725 }
726 }
727
728 mb_type = MB_TYPE_16x16;
729 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
730 int8_t *i4x4 = s->intra4x4_pred_mode + h->mb2br_xy[s->mb_xy];
731 int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
732
733 memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
734
735 if (mb_type == 8) {
736 if (s->mb_x > 0) {
737 for (i = 0; i < 4; i++)
738 s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6 - i];
739 if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
740 s->left_samples_available = 0x5F5F;
741 }
742 if (s->mb_y > 0) {
743 s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 0];
744 s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 1];
745 s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 2];
746 s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 3];
747
748 if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
749 s->top_samples_available = 0x33FF;
750 }
751
752 /* decode prediction codes for luma blocks */
753 for (i = 0; i < 16; i += 2) {
754 vlc = svq3_get_ue_golomb(&h->gb);
755
756 if (vlc >= 25) {
757 av_log(h->avctx, AV_LOG_ERROR,
758 "luma prediction:%"PRIu32"\n", vlc);
759 return -1;
760 }
761
762 left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
763 top = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
764
765 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
766 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
767
768 if (left[1] == -1 || left[2] == -1) {
769 av_log(h->avctx, AV_LOG_ERROR, "weird prediction\n");
770 return -1;
771 }
772 }
773 } else { /* mb_type == 33, DC_128_PRED block type */
774 for (i = 0; i < 4; i++)
775 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
776 }
777
778 AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
779 i4x4[4] = i4x4_cache[7 + 8 * 3];
780 i4x4[5] = i4x4_cache[7 + 8 * 2];
781 i4x4[6] = i4x4_cache[7 + 8 * 1];
782
783 if (mb_type == 8) {
784 ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
785 h->avctx, s->top_samples_available,
786 s->left_samples_available);
787
788 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
789 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
790 } else {
791 for (i = 0; i < 4; i++)
792 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
793
794 s->top_samples_available = 0x33FF;
795 s->left_samples_available = 0x5F5F;
796 }
797
798 mb_type = MB_TYPE_INTRA4x4;
799 } else { /* INTRA16x16 */
800 dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
801 dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
802
803 if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
804 s->left_samples_available, dir, 0)) < 0) {
805 av_log(h->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
806 return s->intra16x16_pred_mode;
807 }
808
809 cbp = ff_h264_i_mb_type_info[mb_type - 8].cbp;
810 mb_type = MB_TYPE_INTRA16x16;
811 }
812
813 if (!IS_INTER(mb_type) && h->pict_type != AV_PICTURE_TYPE_I) {
814 for (i = 0; i < 4; i++)
815 memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
816 0, 4 * 2 * sizeof(int16_t));
817 if (h->pict_type == AV_PICTURE_TYPE_B) {
818 for (i = 0; i < 4; i++)
819 memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
820 0, 4 * 2 * sizeof(int16_t));
821 }
822 }
823 if (!IS_INTRA4x4(mb_type)) {
824 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy], DC_PRED, 8);
825 }
826 if (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B) {
827 memset(sl->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
828 }
829
830 if (!IS_INTRA16x16(mb_type) &&
831 (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B)) {
832 if ((vlc = svq3_get_ue_golomb(&h->gb)) >= 48) {
833 av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
834 return -1;
835 }
836
837 cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
838 : ff_h264_golomb_to_inter_cbp[vlc];
839 }
840 if (IS_INTRA16x16(mb_type) ||
841 (h->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
842 sl->qscale += svq3_get_se_golomb(&h->gb);
843
844 if (sl->qscale > 31u) {
845 av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", sl->qscale);
846 return -1;
847 }
848 }
849 if (IS_INTRA16x16(mb_type)) {
850 AV_ZERO128(sl->mb_luma_dc[0] + 0);
851 AV_ZERO128(sl->mb_luma_dc[0] + 8);
852 if (svq3_decode_block(&h->gb, sl->mb_luma_dc[0], 0, 1)) {
853 av_log(h->avctx, AV_LOG_ERROR,
854 "error while decoding intra luma dc\n");
855 return -1;
856 }
857 }
858
859 if (cbp) {
860 const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
861 const int type = ((sl->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
862
863 for (i = 0; i < 4; i++)
864 if ((cbp & (1 << i))) {
865 for (j = 0; j < 4; j++) {
866 k = index ? (1 * (j & 1) + 2 * (i & 1) +
867 2 * (j & 2) + 4 * (i & 2))
868 : (4 * i + j);
869 sl->non_zero_count_cache[scan8[k]] = 1;
870
871 if (svq3_decode_block(&h->gb, &sl->mb[16 * k], index, type)) {
872 av_log(h->avctx, AV_LOG_ERROR,
873 "error while decoding block\n");
874 return -1;
875 }
876 }
877 }
878
879 if ((cbp & 0x30)) {
880 for (i = 1; i < 3; ++i)
881 if (svq3_decode_block(&h->gb, &sl->mb[16 * 16 * i], 0, 3)) {
882 av_log(h->avctx, AV_LOG_ERROR,
883 "error while decoding chroma dc block\n");
884 return -1;
885 }
886
887 if ((cbp & 0x20)) {
888 for (i = 1; i < 3; i++) {
889 for (j = 0; j < 4; j++) {
890 k = 16 * i + j;
891 sl->non_zero_count_cache[scan8[k]] = 1;
892
893 if (svq3_decode_block(&h->gb, &sl->mb[16 * k], 1, 1)) {
894 av_log(h->avctx, AV_LOG_ERROR,
895 "error while decoding chroma ac block\n");
896 return -1;
897 }
898 }
899 }
900 }
901 }
902 }
903
904 sl->cbp = cbp;
905 h->cur_pic.mb_type[mb_xy] = mb_type;
906
907 if (IS_INTRA(mb_type))
908 s->chroma_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
909 s->left_samples_available, DC_PRED8x8, 1);
910
911 return 0;
912 }
913
914 static int svq3_decode_slice_header(AVCodecContext *avctx)
915 {
916 SVQ3Context *s = avctx->priv_data;
917 H264Context *h = &s->h;
918 H264SliceContext *sl = &h->slice_ctx[0];
919 const int mb_xy = s->mb_xy;
920 int i, header;
921 unsigned slice_id;
922
923 header = get_bits(&s->gb, 8);
924
925 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
926 /* TODO: what? */
927 av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
928 return -1;
929 } else {
930 int slice_bits, slice_bytes, slice_length;
931 int length = header >> 5 & 3;
932
933 slice_length = show_bits(&s->gb, 8 * length);
934 slice_bits = slice_length * 8;
935 slice_bytes = slice_length + length - 1;
936
937 if (slice_bytes > get_bits_left(&s->gb)) {
938 av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
939 return -1;
940 }
941
942 skip_bits(&s->gb, 8);
943
944 av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
945 if (!s->slice_buf)
946 return AVERROR(ENOMEM);
947
948 memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
949
950 init_get_bits(&h->gb, s->slice_buf, slice_bits);
951
952 if (s->watermark_key) {
953 uint32_t header = AV_RL32(&h->gb.buffer[1]);
954 AV_WL32(&h->gb.buffer[1], header ^ s->watermark_key);
955 }
956 if (length > 0) {
957 memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
958 }
959 skip_bits_long(&s->gb, slice_bytes * 8);
960 }
961
962 if ((slice_id = svq3_get_ue_golomb(&h->gb)) >= 3) {
963 av_log(h->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
964 return -1;
965 }
966
967 sl->slice_type = ff_h264_golomb_to_pict_type[slice_id];
968
969 if ((header & 0x9F) == 2) {
970 i = (h->mb_num < 64) ? 6 : (1 + av_log2(h->mb_num - 1));
971 sl->mb_skip_run = get_bits(&h->gb, i) -
972 (s->mb_y * h->mb_width + s->mb_x);
973 } else {
974 skip_bits1(&h->gb);
975 sl->mb_skip_run = 0;
976 }
977
978 sl->slice_num = get_bits(&h->gb, 8);
979 sl->qscale = get_bits(&h->gb, 5);
980 s->adaptive_quant = get_bits1(&h->gb);
981
982 /* unknown fields */
983 skip_bits1(&h->gb);
984
985 if (s->unknown_flag)
986 skip_bits1(&h->gb);
987
988 skip_bits1(&h->gb);
989 skip_bits(&h->gb, 2);
990
991 while (get_bits1(&h->gb))
992 skip_bits(&h->gb, 8);
993
994 /* reset intra predictors and invalidate motion vector references */
995 if (s->mb_x > 0) {
996 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy - 1] + 3,
997 -1, 4 * sizeof(int8_t));
998 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy - s->mb_x],
999 -1, 8 * sizeof(int8_t) * s->mb_x);
1000 }
1001 if (s->mb_y > 0) {
1002 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_stride],
1003 -1, 8 * sizeof(int8_t) * (h->mb_width - s->mb_x));
1004
1005 if (s->mb_x > 0)
1006 s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] = -1;
1007 }
1008
1009 return 0;
1010 }
1011
1012 static av_cold int svq3_decode_init(AVCodecContext *avctx)
1013 {
1014 SVQ3Context *s = avctx->priv_data;
1015 H264Context *h = &s->h;
1016 H264SliceContext *sl;
1017 int m;
1018 unsigned char *extradata;
1019 unsigned char *extradata_end;
1020 unsigned int size;
1021 int marker_found = 0;
1022
1023 s->cur_pic = av_mallocz(sizeof(*s->cur_pic));
1024 s->last_pic = av_mallocz(sizeof(*s->last_pic));
1025 s->next_pic = av_mallocz(sizeof(*s->next_pic));
1026 if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1027 av_freep(&s->cur_pic);
1028 av_freep(&s->last_pic);
1029 av_freep(&s->next_pic);
1030 return AVERROR(ENOMEM);
1031 }
1032
1033 s->cur_pic->f = av_frame_alloc();
1034 s->last_pic->f = av_frame_alloc();
1035 s->next_pic->f = av_frame_alloc();
1036 if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1037 return AVERROR(ENOMEM);
1038
1039 if (ff_h264_decode_init(avctx) < 0)
1040 return -1;
1041
1042 // we will overwrite it later during decoding
1043 av_frame_free(&h->cur_pic.f);
1044
1045 ff_h264dsp_init(&s->h264dsp, 8, 1);
1046 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1047 ff_videodsp_init(&s->vdsp, 8);
1048
1049 memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
1050 memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
1051
1052 h->sps.bit_depth_luma = 8;
1053 h->chroma_format_idc = 1;
1054
1055 ff_hpeldsp_init(&s->hdsp, avctx->flags);
1056 ff_tpeldsp_init(&s->tdsp);
1057
1058 sl = h->slice_ctx;
1059
1060 h->flags = avctx->flags;
1061 sl->is_complex = 1;
1062 h->picture_structure = PICT_FRAME;
1063 avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
1064 avctx->color_range = AVCOL_RANGE_JPEG;
1065
1066 h->slice_ctx[0].chroma_qp[0] = h->slice_ctx[0].chroma_qp[1] = 4;
1067 h->chroma_x_shift = h->chroma_y_shift = 1;
1068
1069 s->halfpel_flag = 1;
1070 s->thirdpel_flag = 1;
1071 s->unknown_flag = 0;
1072
1073 /* prowl for the "SEQH" marker in the extradata */
1074 extradata = (unsigned char *)avctx->extradata;
1075 extradata_end = avctx->extradata + avctx->extradata_size;
1076 if (extradata) {
1077 for (m = 0; m + 8 < avctx->extradata_size; m++) {
1078 if (!memcmp(extradata, "SEQH", 4)) {
1079 marker_found = 1;
1080 break;
1081 }
1082 extradata++;
1083 }
1084 }
1085
1086 /* if a match was found, parse the extra data */
1087 if (marker_found) {
1088 GetBitContext gb;
1089 int frame_size_code;
1090
1091 size = AV_RB32(&extradata[4]);
1092 if (size > extradata_end - extradata - 8)
1093 return AVERROR_INVALIDDATA;
1094 init_get_bits(&gb, extradata + 8, size * 8);
1095
1096 /* 'frame size code' and optional 'width, height' */
1097 frame_size_code = get_bits(&gb, 3);
1098 switch (frame_size_code) {
1099 case 0:
1100 avctx->width = 160;
1101 avctx->height = 120;
1102 break;
1103 case 1:
1104 avctx->width = 128;
1105 avctx->height = 96;
1106 break;
1107 case 2:
1108 avctx->width = 176;
1109 avctx->height = 144;
1110 break;
1111 case 3:
1112 avctx->width = 352;
1113 avctx->height = 288;
1114 break;
1115 case 4:
1116 avctx->width = 704;
1117 avctx->height = 576;
1118 break;
1119 case 5:
1120 avctx->width = 240;
1121 avctx->height = 180;
1122 break;
1123 case 6:
1124 avctx->width = 320;
1125 avctx->height = 240;
1126 break;
1127 case 7:
1128 avctx->width = get_bits(&gb, 12);
1129 avctx->height = get_bits(&gb, 12);
1130 break;
1131 }
1132
1133 s->halfpel_flag = get_bits1(&gb);
1134 s->thirdpel_flag = get_bits1(&gb);
1135
1136 /* unknown fields */
1137 skip_bits1(&gb);
1138 skip_bits1(&gb);
1139 skip_bits1(&gb);
1140 skip_bits1(&gb);
1141
1142 h->low_delay = get_bits1(&gb);
1143
1144 /* unknown field */
1145 skip_bits1(&gb);
1146
1147 while (get_bits1(&gb))
1148 skip_bits(&gb, 8);
1149
1150 s->unknown_flag = get_bits1(&gb);
1151 avctx->has_b_frames = !h->low_delay;
1152 if (s->unknown_flag) {
1153 #if CONFIG_ZLIB
1154 unsigned watermark_width = svq3_get_ue_golomb(&gb);
1155 unsigned watermark_height = svq3_get_ue_golomb(&gb);
1156 int u1 = svq3_get_ue_golomb(&gb);
1157 int u2 = get_bits(&gb, 8);
1158 int u3 = get_bits(&gb, 2);
1159 int u4 = svq3_get_ue_golomb(&gb);
1160 unsigned long buf_len = watermark_width *
1161 watermark_height * 4;
1162 int offset = get_bits_count(&gb) + 7 >> 3;
1163 uint8_t *buf;
1164
1165 if (watermark_height > 0 &&
1166 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1167 return -1;
1168
1169 buf = av_malloc(buf_len);
1170 av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1171 watermark_width, watermark_height);
1172 av_log(avctx, AV_LOG_DEBUG,
1173 "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1174 u1, u2, u3, u4, offset);
1175 if (uncompress(buf, &buf_len, extradata + 8 + offset,
1176 size - offset) != Z_OK) {
1177 av_log(avctx, AV_LOG_ERROR,
1178 "could not uncompress watermark logo\n");
1179 av_free(buf);
1180 return -1;
1181 }
1182 s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1183 s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1184 av_log(avctx, AV_LOG_DEBUG,
1185 "watermark key %#"PRIx32"\n", s->watermark_key);
1186 av_free(buf);
1187 #else
1188 av_log(avctx, AV_LOG_ERROR,
1189 "this svq3 file contains watermark which need zlib support compiled in\n");
1190 return -1;
1191 #endif
1192 }
1193 }
1194
1195 h->width = avctx->width;
1196 h->height = avctx->height;
1197 h->mb_width = (h->width + 15) / 16;
1198 h->mb_height = (h->height + 15) / 16;
1199 h->mb_stride = h->mb_width + 1;
1200 h->mb_num = h->mb_width * h->mb_height;
1201 h->b_stride = 4 * h->mb_width;
1202 s->h_edge_pos = h->mb_width * 16;
1203 s->v_edge_pos = h->mb_height * 16;
1204
1205 s->intra4x4_pred_mode = av_mallocz(h->mb_stride * 2 * 8);
1206 if (!s->intra4x4_pred_mode)
1207 return AVERROR(ENOMEM);
1208
1209 if (ff_h264_alloc_tables(h) < 0) {
1210 av_log(avctx, AV_LOG_ERROR, "svq3 memory allocation failed\n");
1211 return AVERROR(ENOMEM);
1212 }
1213
1214 return 0;
1215 }
1216
1217 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1218 {
1219 int i;
1220 for (i = 0; i < 2; i++) {
1221 av_buffer_unref(&pic->motion_val_buf[i]);
1222 av_buffer_unref(&pic->ref_index_buf[i]);
1223 }
1224 av_buffer_unref(&pic->mb_type_buf);
1225
1226 av_frame_unref(pic->f);
1227 }
1228
1229 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1230 {
1231 SVQ3Context *s = avctx->priv_data;
1232 H264Context *h = &s->h;
1233 H264SliceContext *sl = &h->slice_ctx[0];
1234 const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1;
1235 const int mb_array_size = h->mb_stride * h->mb_height;
1236 const int b4_stride = h->mb_width * 4 + 1;
1237 const int b4_array_size = b4_stride * h->mb_height * 4;
1238 int ret;
1239
1240 if (!pic->motion_val_buf[0]) {
1241 int i;
1242
1243 pic->mb_type_buf = av_buffer_allocz((big_mb_num + h->mb_stride) * sizeof(uint32_t));
1244 if (!pic->mb_type_buf)
1245 return AVERROR(ENOMEM);
1246 pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1;
1247
1248 for (i = 0; i < 2; i++) {
1249 pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1250 pic->ref_index_buf[i] = av_buffer_allocz(4 * mb_array_size);
1251 if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1252 ret = AVERROR(ENOMEM);
1253 goto fail;
1254 }
1255
1256 pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1257 pic->ref_index[i] = pic->ref_index_buf[i]->data;
1258 }
1259 }
1260 pic->reference = !(h->pict_type == AV_PICTURE_TYPE_B);
1261
1262 ret = ff_get_buffer(avctx, pic->f,
1263 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1264 if (ret < 0)
1265 goto fail;
1266
1267 if (!sl->edge_emu_buffer) {
1268 sl->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1269 if (!sl->edge_emu_buffer)
1270 return AVERROR(ENOMEM);
1271 }
1272
1273 sl->linesize = pic->f->linesize[0];
1274 sl->uvlinesize = pic->f->linesize[1];
1275
1276 return 0;
1277 fail:
1278 free_picture(avctx, pic);
1279 return ret;
1280 }
1281
1282 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1283 int *got_frame, AVPacket *avpkt)
1284 {
1285 const uint8_t *buf = avpkt->data;
1286 SVQ3Context *s = avctx->priv_data;
1287 H264Context *h = &s->h;
1288 H264SliceContext *sl = &h->slice_ctx[0];
1289 int buf_size = avpkt->size;
1290 int ret, m, i;
1291
1292 /* special case for last picture */
1293 if (buf_size == 0) {
1294 if (s->next_pic->f->data[0] && !h->low_delay && !s->last_frame_output) {
1295 ret = av_frame_ref(data, s->next_pic->f);
1296 if (ret < 0)
1297 return ret;
1298 s->last_frame_output = 1;
1299 *got_frame = 1;
1300 }
1301 return 0;
1302 }
1303
1304 ret = init_get_bits(&s->gb, buf, 8 * buf_size);
1305 if (ret < 0)
1306 return ret;
1307
1308 s->mb_x = s->mb_y = s->mb_xy = 0;
1309
1310 if (svq3_decode_slice_header(avctx))
1311 return -1;
1312
1313 h->pict_type = sl->slice_type;
1314
1315 if (h->pict_type != AV_PICTURE_TYPE_B)
1316 FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1317
1318 av_frame_unref(s->cur_pic->f);
1319
1320 /* for skipping the frame */
1321 s->cur_pic->f->pict_type = h->pict_type;
1322 s->cur_pic->f->key_frame = (h->pict_type == AV_PICTURE_TYPE_I);
1323
1324 ret = get_buffer(avctx, s->cur_pic);
1325 if (ret < 0)
1326 return ret;
1327
1328 h->cur_pic_ptr = s->cur_pic;
1329 h->cur_pic = *s->cur_pic;
1330
1331 for (i = 0; i < 16; i++) {
1332 h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
1333 h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
1334 }
1335 for (i = 0; i < 16; i++) {
1336 h->block_offset[16 + i] =
1337 h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1338 h->block_offset[48 + 16 + i] =
1339 h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1340 }
1341
1342 if (h->pict_type != AV_PICTURE_TYPE_I) {
1343 if (!s->last_pic->f->data[0]) {
1344 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1345 ret = get_buffer(avctx, s->last_pic);
1346 if (ret < 0)
1347 return ret;
1348 memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1349 memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1350 s->last_pic->f->linesize[1]);
1351 memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1352 s->last_pic->f->linesize[2]);
1353 }
1354
1355 if (h->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1356 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1357 ret = get_buffer(avctx, s->next_pic);
1358 if (ret < 0)
1359 return ret;
1360 memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1361 memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1362 s->next_pic->f->linesize[1]);
1363 memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1364 s->next_pic->f->linesize[2]);
1365 }
1366 }
1367
1368 if (avctx->debug & FF_DEBUG_PICT_INFO)
1369 av_log(h->avctx, AV_LOG_DEBUG,
1370 "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1371 av_get_picture_type_char(h->pict_type),
1372 s->halfpel_flag, s->thirdpel_flag,
1373 s->adaptive_quant, h->slice_ctx[0].qscale, sl->slice_num);
1374
1375 if (avctx->skip_frame >= AVDISCARD_NONREF && h->pict_type == AV_PICTURE_TYPE_B ||
1376 avctx->skip_frame >= AVDISCARD_NONKEY && h->pict_type != AV_PICTURE_TYPE_I ||
1377 avctx->skip_frame >= AVDISCARD_ALL)
1378 return 0;
1379
1380 if (s->next_p_frame_damaged) {
1381 if (h->pict_type == AV_PICTURE_TYPE_B)
1382 return 0;
1383 else
1384 s->next_p_frame_damaged = 0;
1385 }
1386
1387 if (h->pict_type == AV_PICTURE_TYPE_B) {
1388 h->frame_num_offset = sl->slice_num - h->prev_frame_num;
1389
1390 if (h->frame_num_offset < 0)
1391 h->frame_num_offset += 256;
1392 if (h->frame_num_offset == 0 ||
1393 h->frame_num_offset >= h->prev_frame_num_offset) {
1394 av_log(h->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1395 return -1;
1396 }
1397 } else {
1398 h->prev_frame_num = h->frame_num;
1399 h->frame_num = sl->slice_num;
1400 h->prev_frame_num_offset = h->frame_num - h->prev_frame_num;
1401
1402 if (h->prev_frame_num_offset < 0)
1403 h->prev_frame_num_offset += 256;
1404 }
1405
1406 for (m = 0; m < 2; m++) {
1407 int i;
1408 for (i = 0; i < 4; i++) {
1409 int j;
1410 for (j = -1; j < 4; j++)
1411 sl->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1412 if (i < 3)
1413 sl->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1414 }
1415 }
1416
1417 for (s->mb_y = 0; s->mb_y < h->mb_height; s->mb_y++) {
1418 for (s->mb_x = 0; s->mb_x < h->mb_width; s->mb_x++) {
1419 unsigned mb_type;
1420 s->mb_xy = s->mb_x + s->mb_y * h->mb_stride;
1421
1422 if ((get_bits_left(&h->gb)) <= 7) {
1423 if (((get_bits_count(&h->gb) & 7) == 0 ||
1424 show_bits(&h->gb, get_bits_left(&h->gb) & 7) == 0)) {
1425
1426 if (svq3_decode_slice_header(avctx))
1427 return -1;
1428 }
1429 /* TODO: support s->mb_skip_run */
1430 }
1431
1432 mb_type = svq3_get_ue_golomb(&h->gb);
1433
1434 if (h->pict_type == AV_PICTURE_TYPE_I)
1435 mb_type += 8;
1436 else if (h->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1437 mb_type += 4;
1438 if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1439 av_log(h->avctx, AV_LOG_ERROR,
1440 "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1441 return -1;
1442 }
1443
1444 if (mb_type != 0)
1445 hl_decode_mb(s, h, &h->slice_ctx[0]);
1446
1447 if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
1448 h->cur_pic.mb_type[s->mb_x + s->mb_y * h->mb_stride] =
1449 (h->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1450 }
1451
1452 ff_draw_horiz_band(avctx, s->cur_pic->f,
1453 s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1454 16 * s->mb_y, 16, h->picture_structure, 0,
1455 h->low_delay);
1456 }
1457
1458 if (h->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
1459 ret = av_frame_ref(data, s->cur_pic->f);
1460 else if (s->last_pic->f->data[0])
1461 ret = av_frame_ref(data, s->last_pic->f);
1462 if (ret < 0)
1463 return ret;
1464
1465 /* Do not output the last pic after seeking. */
1466 if (s->last_pic->f->data[0] || h->low_delay)
1467 *got_frame = 1;
1468
1469 if (h->pict_type != AV_PICTURE_TYPE_B) {
1470 FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1471 } else {
1472 av_frame_unref(s->cur_pic->f);
1473 }
1474
1475 return buf_size;
1476 }
1477
1478 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1479 {
1480 SVQ3Context *s = avctx->priv_data;
1481 H264Context *h = &s->h;
1482
1483 free_picture(avctx, s->cur_pic);
1484 free_picture(avctx, s->next_pic);
1485 free_picture(avctx, s->last_pic);
1486 av_frame_free(&s->cur_pic->f);
1487 av_frame_free(&s->next_pic->f);
1488 av_frame_free(&s->last_pic->f);
1489 av_freep(&s->cur_pic);
1490 av_freep(&s->next_pic);
1491 av_freep(&s->last_pic);
1492 av_freep(&s->slice_buf);
1493 av_freep(&s->intra4x4_pred_mode);
1494
1495 memset(&h->cur_pic, 0, sizeof(h->cur_pic));
1496
1497 ff_h264_free_context(h);
1498
1499 return 0;
1500 }
1501
1502 AVCodec ff_svq3_decoder = {
1503 .name = "svq3",
1504 .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1505 .type = AVMEDIA_TYPE_VIDEO,
1506 .id = AV_CODEC_ID_SVQ3,
1507 .priv_data_size = sizeof(SVQ3Context),
1508 .init = svq3_decode_init,
1509 .close = svq3_decode_end,
1510 .decode = svq3_decode_frame,
1511 .capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1512 AV_CODEC_CAP_DR1 |
1513 AV_CODEC_CAP_DELAY,
1514 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1515 AV_PIX_FMT_NONE},
1516 };