svq3: move edge_emu_buffer to the SVQ3Context
[libav.git] / libavcodec / svq3.c
1 /*
2 * Copyright (c) 2003 The Libav Project
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
31 *
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
37 *
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 * http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41 */
42
43 #include <inttypes.h>
44
45 #include "libavutil/attributes.h"
46 #include "internal.h"
47 #include "avcodec.h"
48 #include "mpegutils.h"
49 #include "h264.h"
50 #include "h264_mvpred.h"
51 #include "h264data.h"
52 #include "golomb.h"
53 #include "hpeldsp.h"
54 #include "mathops.h"
55 #include "rectangle.h"
56 #include "tpeldsp.h"
57
58 #if CONFIG_ZLIB
59 #include <zlib.h>
60 #endif
61
62 #include "svq1.h"
63
64 /**
65 * @file
66 * svq3 decoder.
67 */
68
69 typedef struct SVQ3Context {
70 H264Context h;
71
72 H264DSPContext h264dsp;
73 H264PredContext hpc;
74 HpelDSPContext hdsp;
75 TpelDSPContext tdsp;
76 VideoDSPContext vdsp;
77
78 H264Picture *cur_pic;
79 H264Picture *next_pic;
80 H264Picture *last_pic;
81 GetBitContext gb;
82 uint8_t *slice_buf;
83 int slice_size;
84 int halfpel_flag;
85 int thirdpel_flag;
86 int unknown_flag;
87 uint32_t watermark_key;
88 int adaptive_quant;
89 int next_p_frame_damaged;
90 int h_edge_pos;
91 int v_edge_pos;
92 int last_frame_output;
93
94 int mb_x, mb_y;
95 int mb_xy;
96
97 int chroma_pred_mode;
98 int intra16x16_pred_mode;
99
100 int8_t intra4x4_pred_mode_cache[5 * 8];
101 int8_t (*intra4x4_pred_mode);
102
103 unsigned int top_samples_available;
104 unsigned int topright_samples_available;
105 unsigned int left_samples_available;
106
107 uint8_t *edge_emu_buffer;
108 } SVQ3Context;
109
110 #define FULLPEL_MODE 1
111 #define HALFPEL_MODE 2
112 #define THIRDPEL_MODE 3
113 #define PREDICT_MODE 4
114
115 /* dual scan (from some older h264 draft)
116 * o-->o-->o o
117 * | /|
118 * o o o / o
119 * | / | |/ |
120 * o o o o
121 * /
122 * o-->o-->o-->o
123 */
124 static const uint8_t svq3_scan[16] = {
125 0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
126 2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
127 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
128 0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
129 };
130
131 static const uint8_t luma_dc_zigzag_scan[16] = {
132 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
133 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
134 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
135 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
136 };
137
138 static const uint8_t svq3_pred_0[25][2] = {
139 { 0, 0 },
140 { 1, 0 }, { 0, 1 },
141 { 0, 2 }, { 1, 1 }, { 2, 0 },
142 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
143 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
144 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
145 { 2, 4 }, { 3, 3 }, { 4, 2 },
146 { 4, 3 }, { 3, 4 },
147 { 4, 4 }
148 };
149
150 static const int8_t svq3_pred_1[6][6][5] = {
151 { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
152 { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
153 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
154 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
155 { { 2, 0, -1, -1, -1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
156 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
157 { { 2, 0, -1, -1, -1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
158 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
159 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
160 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
161 { { 0, 2, -1, -1, -1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
162 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
163 };
164
165 static const struct {
166 uint8_t run;
167 uint8_t level;
168 } svq3_dct_tables[2][16] = {
169 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
170 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
171 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
172 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
173 };
174
175 static const uint32_t svq3_dequant_coeff[32] = {
176 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
177 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
178 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
179 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
180 };
181
182 static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
183 {
184 const int qmul = svq3_dequant_coeff[qp];
185 #define stride 16
186 int i;
187 int temp[16];
188 static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
189
190 for (i = 0; i < 4; i++) {
191 const int z0 = 13 * (input[4 * i + 0] + input[4 * i + 2]);
192 const int z1 = 13 * (input[4 * i + 0] - input[4 * i + 2]);
193 const int z2 = 7 * input[4 * i + 1] - 17 * input[4 * i + 3];
194 const int z3 = 17 * input[4 * i + 1] + 7 * input[4 * i + 3];
195
196 temp[4 * i + 0] = z0 + z3;
197 temp[4 * i + 1] = z1 + z2;
198 temp[4 * i + 2] = z1 - z2;
199 temp[4 * i + 3] = z0 - z3;
200 }
201
202 for (i = 0; i < 4; i++) {
203 const int offset = x_offset[i];
204 const int z0 = 13 * (temp[4 * 0 + i] + temp[4 * 2 + i]);
205 const int z1 = 13 * (temp[4 * 0 + i] - temp[4 * 2 + i]);
206 const int z2 = 7 * temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
207 const int z3 = 17 * temp[4 * 1 + i] + 7 * temp[4 * 3 + i];
208
209 output[stride * 0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
210 output[stride * 2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
211 output[stride * 8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
212 output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
213 }
214 }
215 #undef stride
216
217 static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
218 int stride, int qp, int dc)
219 {
220 const int qmul = svq3_dequant_coeff[qp];
221 int i;
222
223 if (dc) {
224 dc = 13 * 13 * (dc == 1 ? 1538 * block[0]
225 : qmul * (block[0] >> 3) / 2);
226 block[0] = 0;
227 }
228
229 for (i = 0; i < 4; i++) {
230 const int z0 = 13 * (block[0 + 4 * i] + block[2 + 4 * i]);
231 const int z1 = 13 * (block[0 + 4 * i] - block[2 + 4 * i]);
232 const int z2 = 7 * block[1 + 4 * i] - 17 * block[3 + 4 * i];
233 const int z3 = 17 * block[1 + 4 * i] + 7 * block[3 + 4 * i];
234
235 block[0 + 4 * i] = z0 + z3;
236 block[1 + 4 * i] = z1 + z2;
237 block[2 + 4 * i] = z1 - z2;
238 block[3 + 4 * i] = z0 - z3;
239 }
240
241 for (i = 0; i < 4; i++) {
242 const int z0 = 13 * (block[i + 4 * 0] + block[i + 4 * 2]);
243 const int z1 = 13 * (block[i + 4 * 0] - block[i + 4 * 2]);
244 const int z2 = 7 * block[i + 4 * 1] - 17 * block[i + 4 * 3];
245 const int z3 = 17 * block[i + 4 * 1] + 7 * block[i + 4 * 3];
246 const int rr = (dc + 0x80000);
247
248 dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
249 dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
250 dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
251 dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
252 }
253
254 memset(block, 0, 16 * sizeof(int16_t));
255 }
256
257 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
258 int index, const int type)
259 {
260 static const uint8_t *const scan_patterns[4] = {
261 luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
262 };
263
264 int run, level, limit;
265 unsigned vlc;
266 const int intra = 3 * type >> 2;
267 const uint8_t *const scan = scan_patterns[type];
268
269 for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
270 for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
271 int sign = (vlc & 1) ? 0 : -1;
272 vlc = vlc + 1 >> 1;
273
274 if (type == 3) {
275 if (vlc < 3) {
276 run = 0;
277 level = vlc;
278 } else if (vlc < 4) {
279 run = 1;
280 level = 1;
281 } else {
282 run = vlc & 0x3;
283 level = (vlc + 9 >> 2) - run;
284 }
285 } else {
286 if (vlc < 16) {
287 run = svq3_dct_tables[intra][vlc].run;
288 level = svq3_dct_tables[intra][vlc].level;
289 } else if (intra) {
290 run = vlc & 0x7;
291 level = (vlc >> 3) +
292 ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
293 } else {
294 run = vlc & 0xF;
295 level = (vlc >> 4) +
296 ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
297 }
298 }
299
300 if ((index += run) >= limit)
301 return -1;
302
303 block[scan[index]] = (level ^ sign) - sign;
304 }
305
306 if (type != 2) {
307 break;
308 }
309 }
310
311 return 0;
312 }
313
314 static inline void svq3_mc_dir_part(SVQ3Context *s,
315 int x, int y, int width, int height,
316 int mx, int my, int dxy,
317 int thirdpel, int dir, int avg)
318 {
319 H264Context *h = &s->h;
320 H264SliceContext *sl = &h->slice_ctx[0];
321 const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
322 uint8_t *src, *dest;
323 int i, emu = 0;
324 int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
325
326 mx += x;
327 my += y;
328
329 if (mx < 0 || mx >= s->h_edge_pos - width - 1 ||
330 my < 0 || my >= s->v_edge_pos - height - 1) {
331 emu = 1;
332 mx = av_clip(mx, -16, s->h_edge_pos - width + 15);
333 my = av_clip(my, -16, s->v_edge_pos - height + 15);
334 }
335
336 /* form component predictions */
337 dest = h->cur_pic.f->data[0] + x + y * sl->linesize;
338 src = pic->f->data[0] + mx + my * sl->linesize;
339
340 if (emu) {
341 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
342 sl->linesize, sl->linesize,
343 width + 1, height + 1,
344 mx, my, s->h_edge_pos, s->v_edge_pos);
345 src = s->edge_emu_buffer;
346 }
347 if (thirdpel)
348 (avg ? s->tdsp.avg_tpel_pixels_tab
349 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, sl->linesize,
350 width, height);
351 else
352 (avg ? s->hdsp.avg_pixels_tab
353 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, sl->linesize,
354 height);
355
356 if (!(h->flags & AV_CODEC_FLAG_GRAY)) {
357 mx = mx + (mx < (int) x) >> 1;
358 my = my + (my < (int) y) >> 1;
359 width = width >> 1;
360 height = height >> 1;
361 blocksize++;
362
363 for (i = 1; i < 3; i++) {
364 dest = h->cur_pic.f->data[i] + (x >> 1) + (y >> 1) * sl->uvlinesize;
365 src = pic->f->data[i] + mx + my * sl->uvlinesize;
366
367 if (emu) {
368 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
369 sl->uvlinesize, sl->uvlinesize,
370 width + 1, height + 1,
371 mx, my, (s->h_edge_pos >> 1),
372 s->v_edge_pos >> 1);
373 src = s->edge_emu_buffer;
374 }
375 if (thirdpel)
376 (avg ? s->tdsp.avg_tpel_pixels_tab
377 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
378 sl->uvlinesize,
379 width, height);
380 else
381 (avg ? s->hdsp.avg_pixels_tab
382 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
383 sl->uvlinesize,
384 height);
385 }
386 }
387 }
388
389 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
390 int dir, int avg)
391 {
392 int i, j, k, mx, my, dx, dy, x, y;
393 H264Context *h = &s->h;
394 H264SliceContext *sl = &h->slice_ctx[0];
395 const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
396 const int part_height = 16 >> ((unsigned)(size + 1) / 3);
397 const int extra_width = (mode == PREDICT_MODE) ? -16 * 6 : 0;
398 const int h_edge_pos = 6 * (s->h_edge_pos - part_width) - extra_width;
399 const int v_edge_pos = 6 * (s->v_edge_pos - part_height) - extra_width;
400
401 for (i = 0; i < 16; i += part_height)
402 for (j = 0; j < 16; j += part_width) {
403 const int b_xy = (4 * s->mb_x + (j >> 2)) +
404 (4 * s->mb_y + (i >> 2)) * h->b_stride;
405 int dxy;
406 x = 16 * s->mb_x + j;
407 y = 16 * s->mb_y + i;
408 k = (j >> 2 & 1) + (i >> 1 & 2) +
409 (j >> 1 & 4) + (i & 8);
410
411 if (mode != PREDICT_MODE) {
412 pred_motion(h, sl, k, part_width >> 2, dir, 1, &mx, &my);
413 } else {
414 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
415 my = s->next_pic->motion_val[0][b_xy][1] << 1;
416
417 if (dir == 0) {
418 mx = mx * h->frame_num_offset /
419 h->prev_frame_num_offset + 1 >> 1;
420 my = my * h->frame_num_offset /
421 h->prev_frame_num_offset + 1 >> 1;
422 } else {
423 mx = mx * (h->frame_num_offset - h->prev_frame_num_offset) /
424 h->prev_frame_num_offset + 1 >> 1;
425 my = my * (h->frame_num_offset - h->prev_frame_num_offset) /
426 h->prev_frame_num_offset + 1 >> 1;
427 }
428 }
429
430 /* clip motion vector prediction to frame border */
431 mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
432 my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
433
434 /* get (optional) motion vector differential */
435 if (mode == PREDICT_MODE) {
436 dx = dy = 0;
437 } else {
438 dy = svq3_get_se_golomb(&h->gb);
439 dx = svq3_get_se_golomb(&h->gb);
440
441 if (dx == INVALID_VLC || dy == INVALID_VLC) {
442 av_log(h->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
443 return -1;
444 }
445 }
446
447 /* compute motion vector */
448 if (mode == THIRDPEL_MODE) {
449 int fx, fy;
450 mx = (mx + 1 >> 1) + dx;
451 my = (my + 1 >> 1) + dy;
452 fx = (unsigned)(mx + 0x3000) / 3 - 0x1000;
453 fy = (unsigned)(my + 0x3000) / 3 - 0x1000;
454 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
455
456 svq3_mc_dir_part(s, x, y, part_width, part_height,
457 fx, fy, dxy, 1, dir, avg);
458 mx += mx;
459 my += my;
460 } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
461 mx = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
462 my = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
463 dxy = (mx & 1) + 2 * (my & 1);
464
465 svq3_mc_dir_part(s, x, y, part_width, part_height,
466 mx >> 1, my >> 1, dxy, 0, dir, avg);
467 mx *= 3;
468 my *= 3;
469 } else {
470 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
471 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
472
473 svq3_mc_dir_part(s, x, y, part_width, part_height,
474 mx, my, 0, 0, dir, avg);
475 mx *= 6;
476 my *= 6;
477 }
478
479 /* update mv_cache */
480 if (mode != PREDICT_MODE) {
481 int32_t mv = pack16to32(mx, my);
482
483 if (part_height == 8 && i < 8) {
484 AV_WN32A(sl->mv_cache[dir][scan8[k] + 1 * 8], mv);
485
486 if (part_width == 8 && j < 8)
487 AV_WN32A(sl->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
488 }
489 if (part_width == 8 && j < 8)
490 AV_WN32A(sl->mv_cache[dir][scan8[k] + 1], mv);
491 if (part_width == 4 || part_height == 4)
492 AV_WN32A(sl->mv_cache[dir][scan8[k]], mv);
493 }
494
495 /* write back motion vectors */
496 fill_rectangle(h->cur_pic.motion_val[dir][b_xy],
497 part_width >> 2, part_height >> 2, h->b_stride,
498 pack16to32(mx, my), 4);
499 }
500
501 return 0;
502 }
503
504 static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl,
505 int mb_type, const int *block_offset,
506 int linesize, uint8_t *dest_y)
507 {
508 int i;
509 if (!IS_INTRA4x4(mb_type)) {
510 for (i = 0; i < 16; i++)
511 if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
512 uint8_t *const ptr = dest_y + block_offset[i];
513 svq3_add_idct_c(ptr, sl->mb + i * 16, linesize,
514 sl->qscale, IS_INTRA(mb_type) ? 1 : 0);
515 }
516 }
517 }
518
519 static av_always_inline int dctcoef_get(int16_t *mb, int index)
520 {
521 return AV_RN16A(mb + index);
522 }
523
524 static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
525 const H264Context *h,
526 H264SliceContext *sl,
527 int mb_type,
528 const int *block_offset,
529 int linesize,
530 uint8_t *dest_y)
531 {
532 int i;
533 int qscale = sl->qscale;
534
535 if (IS_INTRA4x4(mb_type)) {
536 for (i = 0; i < 16; i++) {
537 uint8_t *const ptr = dest_y + block_offset[i];
538 const int dir = s->intra4x4_pred_mode_cache[scan8[i]];
539
540 uint8_t *topright;
541 int nnz, tr;
542 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
543 const int topright_avail = (s->topright_samples_available << i) & 0x8000;
544 assert(s->mb_y || linesize <= block_offset[i]);
545 if (!topright_avail) {
546 tr = ptr[3 - linesize] * 0x01010101u;
547 topright = (uint8_t *)&tr;
548 } else
549 topright = ptr + 4 - linesize;
550 } else
551 topright = NULL;
552
553 s->hpc.pred4x4[dir](ptr, topright, linesize);
554 nnz = sl->non_zero_count_cache[scan8[i]];
555 if (nnz) {
556 svq3_add_idct_c(ptr, sl->mb + i * 16, linesize, qscale, 0);
557 }
558 }
559 } else {
560 s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
561 svq3_luma_dc_dequant_idct_c(sl->mb, sl->mb_luma_dc[0], qscale);
562 }
563 }
564
565 static void hl_decode_mb(SVQ3Context *s, const H264Context *h, H264SliceContext *sl)
566 {
567 const int mb_x = s->mb_x;
568 const int mb_y = s->mb_y;
569 const int mb_xy = s->mb_xy;
570 const int mb_type = h->cur_pic.mb_type[mb_xy];
571 uint8_t *dest_y, *dest_cb, *dest_cr;
572 int linesize, uvlinesize;
573 int i, j;
574 const int *block_offset = &h->block_offset[0];
575 const int block_h = 16 >> h->chroma_y_shift;
576
577 dest_y = h->cur_pic.f->data[0] + (mb_x + mb_y * sl->linesize) * 16;
578 dest_cb = h->cur_pic.f->data[1] + mb_x * 8 + mb_y * sl->uvlinesize * block_h;
579 dest_cr = h->cur_pic.f->data[2] + mb_x * 8 + mb_y * sl->uvlinesize * block_h;
580
581 s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * sl->linesize + 64, sl->linesize, 4);
582 s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * sl->uvlinesize + 64, dest_cr - dest_cb, 2);
583
584 h->list_counts[mb_xy] = sl->list_count;
585
586 linesize = sl->mb_linesize = sl->linesize;
587 uvlinesize = sl->mb_uvlinesize = sl->uvlinesize;
588
589 if (IS_INTRA(mb_type)) {
590 s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
591 s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
592
593 hl_decode_mb_predict_luma(s, h, sl, mb_type, block_offset, linesize, dest_y);
594 }
595
596 hl_decode_mb_idct_luma(h, sl, mb_type, block_offset, linesize, dest_y);
597
598 if (sl->cbp & 0x30) {
599 uint8_t *dest[2] = { dest_cb, dest_cr };
600 s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 1,
601 h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][sl->chroma_qp[0]][0]);
602 s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 2,
603 h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][sl->chroma_qp[1]][0]);
604 for (j = 1; j < 3; j++) {
605 for (i = j * 16; i < j * 16 + 4; i++)
606 if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
607 uint8_t *const ptr = dest[j - 1] + block_offset[i];
608 svq3_add_idct_c(ptr, sl->mb + i * 16,
609 uvlinesize, ff_h264_chroma_qp[0][sl->qscale + 12] - 12, 2);
610 }
611 }
612 }
613 }
614
615 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
616 {
617 H264Context *h = &s->h;
618 H264SliceContext *sl = &h->slice_ctx[0];
619 int i, j, k, m, dir, mode;
620 int cbp = 0;
621 uint32_t vlc;
622 int8_t *top, *left;
623 const int mb_xy = s->mb_xy;
624 const int b_xy = 4 * s->mb_x + 4 * s->mb_y * h->b_stride;
625
626 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
627 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
628 s->topright_samples_available = 0xFFFF;
629
630 if (mb_type == 0) { /* SKIP */
631 if (h->pict_type == AV_PICTURE_TYPE_P ||
632 s->next_pic->mb_type[mb_xy] == -1) {
633 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
634 0, 0, 0, 0, 0, 0);
635
636 if (h->pict_type == AV_PICTURE_TYPE_B)
637 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
638 0, 0, 0, 0, 1, 1);
639
640 mb_type = MB_TYPE_SKIP;
641 } else {
642 mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
643 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
644 return -1;
645 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
646 return -1;
647
648 mb_type = MB_TYPE_16x16;
649 }
650 } else if (mb_type < 8) { /* INTER */
651 if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&h->gb))
652 mode = THIRDPEL_MODE;
653 else if (s->halfpel_flag &&
654 s->thirdpel_flag == !get_bits1(&h->gb))
655 mode = HALFPEL_MODE;
656 else
657 mode = FULLPEL_MODE;
658
659 /* fill caches */
660 /* note ref_cache should contain here:
661 * ????????
662 * ???11111
663 * N??11111
664 * N??11111
665 * N??11111
666 */
667
668 for (m = 0; m < 2; m++) {
669 if (s->mb_x > 0 && s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6] != -1) {
670 for (i = 0; i < 4; i++)
671 AV_COPY32(sl->mv_cache[m][scan8[0] - 1 + i * 8],
672 h->cur_pic.motion_val[m][b_xy - 1 + i * h->b_stride]);
673 } else {
674 for (i = 0; i < 4; i++)
675 AV_ZERO32(sl->mv_cache[m][scan8[0] - 1 + i * 8]);
676 }
677 if (s->mb_y > 0) {
678 memcpy(sl->mv_cache[m][scan8[0] - 1 * 8],
679 h->cur_pic.motion_val[m][b_xy - h->b_stride],
680 4 * 2 * sizeof(int16_t));
681 memset(&sl->ref_cache[m][scan8[0] - 1 * 8],
682 (s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
683
684 if (s->mb_x < h->mb_width - 1) {
685 AV_COPY32(sl->mv_cache[m][scan8[0] + 4 - 1 * 8],
686 h->cur_pic.motion_val[m][b_xy - h->b_stride + 4]);
687 sl->ref_cache[m][scan8[0] + 4 - 1 * 8] =
688 (s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride + 1] + 6] == -1 ||
689 s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
690 } else
691 sl->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
692 if (s->mb_x > 0) {
693 AV_COPY32(sl->mv_cache[m][scan8[0] - 1 - 1 * 8],
694 h->cur_pic.motion_val[m][b_xy - h->b_stride - 1]);
695 sl->ref_cache[m][scan8[0] - 1 - 1 * 8] =
696 (s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
697 } else
698 sl->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
699 } else
700 memset(&sl->ref_cache[m][scan8[0] - 1 * 8 - 1],
701 PART_NOT_AVAILABLE, 8);
702
703 if (h->pict_type != AV_PICTURE_TYPE_B)
704 break;
705 }
706
707 /* decode motion vector(s) and form prediction(s) */
708 if (h->pict_type == AV_PICTURE_TYPE_P) {
709 if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
710 return -1;
711 } else { /* AV_PICTURE_TYPE_B */
712 if (mb_type != 2) {
713 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
714 return -1;
715 } else {
716 for (i = 0; i < 4; i++)
717 memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
718 0, 4 * 2 * sizeof(int16_t));
719 }
720 if (mb_type != 1) {
721 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
722 return -1;
723 } else {
724 for (i = 0; i < 4; i++)
725 memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
726 0, 4 * 2 * sizeof(int16_t));
727 }
728 }
729
730 mb_type = MB_TYPE_16x16;
731 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
732 int8_t *i4x4 = s->intra4x4_pred_mode + h->mb2br_xy[s->mb_xy];
733 int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
734
735 memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
736
737 if (mb_type == 8) {
738 if (s->mb_x > 0) {
739 for (i = 0; i < 4; i++)
740 s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6 - i];
741 if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
742 s->left_samples_available = 0x5F5F;
743 }
744 if (s->mb_y > 0) {
745 s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 0];
746 s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 1];
747 s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 2];
748 s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 3];
749
750 if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
751 s->top_samples_available = 0x33FF;
752 }
753
754 /* decode prediction codes for luma blocks */
755 for (i = 0; i < 16; i += 2) {
756 vlc = svq3_get_ue_golomb(&h->gb);
757
758 if (vlc >= 25) {
759 av_log(h->avctx, AV_LOG_ERROR,
760 "luma prediction:%"PRIu32"\n", vlc);
761 return -1;
762 }
763
764 left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
765 top = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
766
767 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
768 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
769
770 if (left[1] == -1 || left[2] == -1) {
771 av_log(h->avctx, AV_LOG_ERROR, "weird prediction\n");
772 return -1;
773 }
774 }
775 } else { /* mb_type == 33, DC_128_PRED block type */
776 for (i = 0; i < 4; i++)
777 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
778 }
779
780 AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
781 i4x4[4] = i4x4_cache[7 + 8 * 3];
782 i4x4[5] = i4x4_cache[7 + 8 * 2];
783 i4x4[6] = i4x4_cache[7 + 8 * 1];
784
785 if (mb_type == 8) {
786 ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
787 h->avctx, s->top_samples_available,
788 s->left_samples_available);
789
790 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
791 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
792 } else {
793 for (i = 0; i < 4; i++)
794 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
795
796 s->top_samples_available = 0x33FF;
797 s->left_samples_available = 0x5F5F;
798 }
799
800 mb_type = MB_TYPE_INTRA4x4;
801 } else { /* INTRA16x16 */
802 dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
803 dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
804
805 if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
806 s->left_samples_available, dir, 0)) < 0) {
807 av_log(h->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
808 return s->intra16x16_pred_mode;
809 }
810
811 cbp = ff_h264_i_mb_type_info[mb_type - 8].cbp;
812 mb_type = MB_TYPE_INTRA16x16;
813 }
814
815 if (!IS_INTER(mb_type) && h->pict_type != AV_PICTURE_TYPE_I) {
816 for (i = 0; i < 4; i++)
817 memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
818 0, 4 * 2 * sizeof(int16_t));
819 if (h->pict_type == AV_PICTURE_TYPE_B) {
820 for (i = 0; i < 4; i++)
821 memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
822 0, 4 * 2 * sizeof(int16_t));
823 }
824 }
825 if (!IS_INTRA4x4(mb_type)) {
826 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy], DC_PRED, 8);
827 }
828 if (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B) {
829 memset(sl->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
830 }
831
832 if (!IS_INTRA16x16(mb_type) &&
833 (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B)) {
834 if ((vlc = svq3_get_ue_golomb(&h->gb)) >= 48) {
835 av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
836 return -1;
837 }
838
839 cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
840 : ff_h264_golomb_to_inter_cbp[vlc];
841 }
842 if (IS_INTRA16x16(mb_type) ||
843 (h->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
844 sl->qscale += svq3_get_se_golomb(&h->gb);
845
846 if (sl->qscale > 31u) {
847 av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", sl->qscale);
848 return -1;
849 }
850 }
851 if (IS_INTRA16x16(mb_type)) {
852 AV_ZERO128(sl->mb_luma_dc[0] + 0);
853 AV_ZERO128(sl->mb_luma_dc[0] + 8);
854 if (svq3_decode_block(&h->gb, sl->mb_luma_dc[0], 0, 1)) {
855 av_log(h->avctx, AV_LOG_ERROR,
856 "error while decoding intra luma dc\n");
857 return -1;
858 }
859 }
860
861 if (cbp) {
862 const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
863 const int type = ((sl->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
864
865 for (i = 0; i < 4; i++)
866 if ((cbp & (1 << i))) {
867 for (j = 0; j < 4; j++) {
868 k = index ? (1 * (j & 1) + 2 * (i & 1) +
869 2 * (j & 2) + 4 * (i & 2))
870 : (4 * i + j);
871 sl->non_zero_count_cache[scan8[k]] = 1;
872
873 if (svq3_decode_block(&h->gb, &sl->mb[16 * k], index, type)) {
874 av_log(h->avctx, AV_LOG_ERROR,
875 "error while decoding block\n");
876 return -1;
877 }
878 }
879 }
880
881 if ((cbp & 0x30)) {
882 for (i = 1; i < 3; ++i)
883 if (svq3_decode_block(&h->gb, &sl->mb[16 * 16 * i], 0, 3)) {
884 av_log(h->avctx, AV_LOG_ERROR,
885 "error while decoding chroma dc block\n");
886 return -1;
887 }
888
889 if ((cbp & 0x20)) {
890 for (i = 1; i < 3; i++) {
891 for (j = 0; j < 4; j++) {
892 k = 16 * i + j;
893 sl->non_zero_count_cache[scan8[k]] = 1;
894
895 if (svq3_decode_block(&h->gb, &sl->mb[16 * k], 1, 1)) {
896 av_log(h->avctx, AV_LOG_ERROR,
897 "error while decoding chroma ac block\n");
898 return -1;
899 }
900 }
901 }
902 }
903 }
904 }
905
906 sl->cbp = cbp;
907 h->cur_pic.mb_type[mb_xy] = mb_type;
908
909 if (IS_INTRA(mb_type))
910 s->chroma_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
911 s->left_samples_available, DC_PRED8x8, 1);
912
913 return 0;
914 }
915
916 static int svq3_decode_slice_header(AVCodecContext *avctx)
917 {
918 SVQ3Context *s = avctx->priv_data;
919 H264Context *h = &s->h;
920 H264SliceContext *sl = &h->slice_ctx[0];
921 const int mb_xy = s->mb_xy;
922 int i, header;
923 unsigned slice_id;
924
925 header = get_bits(&s->gb, 8);
926
927 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
928 /* TODO: what? */
929 av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
930 return -1;
931 } else {
932 int slice_bits, slice_bytes, slice_length;
933 int length = header >> 5 & 3;
934
935 slice_length = show_bits(&s->gb, 8 * length);
936 slice_bits = slice_length * 8;
937 slice_bytes = slice_length + length - 1;
938
939 if (slice_bytes > get_bits_left(&s->gb)) {
940 av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
941 return -1;
942 }
943
944 skip_bits(&s->gb, 8);
945
946 av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
947 if (!s->slice_buf)
948 return AVERROR(ENOMEM);
949
950 memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
951
952 init_get_bits(&h->gb, s->slice_buf, slice_bits);
953
954 if (s->watermark_key) {
955 uint32_t header = AV_RL32(&h->gb.buffer[1]);
956 AV_WL32(&h->gb.buffer[1], header ^ s->watermark_key);
957 }
958 if (length > 0) {
959 memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
960 }
961 skip_bits_long(&s->gb, slice_bytes * 8);
962 }
963
964 if ((slice_id = svq3_get_ue_golomb(&h->gb)) >= 3) {
965 av_log(h->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
966 return -1;
967 }
968
969 sl->slice_type = ff_h264_golomb_to_pict_type[slice_id];
970
971 if ((header & 0x9F) == 2) {
972 i = (h->mb_num < 64) ? 6 : (1 + av_log2(h->mb_num - 1));
973 sl->mb_skip_run = get_bits(&h->gb, i) -
974 (s->mb_y * h->mb_width + s->mb_x);
975 } else {
976 skip_bits1(&h->gb);
977 sl->mb_skip_run = 0;
978 }
979
980 sl->slice_num = get_bits(&h->gb, 8);
981 sl->qscale = get_bits(&h->gb, 5);
982 s->adaptive_quant = get_bits1(&h->gb);
983
984 /* unknown fields */
985 skip_bits1(&h->gb);
986
987 if (s->unknown_flag)
988 skip_bits1(&h->gb);
989
990 skip_bits1(&h->gb);
991 skip_bits(&h->gb, 2);
992
993 while (get_bits1(&h->gb))
994 skip_bits(&h->gb, 8);
995
996 /* reset intra predictors and invalidate motion vector references */
997 if (s->mb_x > 0) {
998 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy - 1] + 3,
999 -1, 4 * sizeof(int8_t));
1000 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy - s->mb_x],
1001 -1, 8 * sizeof(int8_t) * s->mb_x);
1002 }
1003 if (s->mb_y > 0) {
1004 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_stride],
1005 -1, 8 * sizeof(int8_t) * (h->mb_width - s->mb_x));
1006
1007 if (s->mb_x > 0)
1008 s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] = -1;
1009 }
1010
1011 return 0;
1012 }
1013
1014 static av_cold int svq3_decode_init(AVCodecContext *avctx)
1015 {
1016 SVQ3Context *s = avctx->priv_data;
1017 H264Context *h = &s->h;
1018 H264SliceContext *sl;
1019 int m;
1020 unsigned char *extradata;
1021 unsigned char *extradata_end;
1022 unsigned int size;
1023 int marker_found = 0;
1024
1025 s->cur_pic = av_mallocz(sizeof(*s->cur_pic));
1026 s->last_pic = av_mallocz(sizeof(*s->last_pic));
1027 s->next_pic = av_mallocz(sizeof(*s->next_pic));
1028 if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1029 av_freep(&s->cur_pic);
1030 av_freep(&s->last_pic);
1031 av_freep(&s->next_pic);
1032 return AVERROR(ENOMEM);
1033 }
1034
1035 s->cur_pic->f = av_frame_alloc();
1036 s->last_pic->f = av_frame_alloc();
1037 s->next_pic->f = av_frame_alloc();
1038 if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1039 return AVERROR(ENOMEM);
1040
1041 if (ff_h264_decode_init(avctx) < 0)
1042 return -1;
1043
1044 // we will overwrite it later during decoding
1045 av_frame_free(&h->cur_pic.f);
1046
1047 ff_h264dsp_init(&s->h264dsp, 8, 1);
1048 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1049 ff_videodsp_init(&s->vdsp, 8);
1050
1051 memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
1052 memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
1053
1054 h->sps.bit_depth_luma = 8;
1055 h->chroma_format_idc = 1;
1056
1057 ff_hpeldsp_init(&s->hdsp, avctx->flags);
1058 ff_tpeldsp_init(&s->tdsp);
1059
1060 sl = h->slice_ctx;
1061
1062 h->flags = avctx->flags;
1063 sl->is_complex = 1;
1064 h->picture_structure = PICT_FRAME;
1065 avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
1066 avctx->color_range = AVCOL_RANGE_JPEG;
1067
1068 h->slice_ctx[0].chroma_qp[0] = h->slice_ctx[0].chroma_qp[1] = 4;
1069 h->chroma_x_shift = h->chroma_y_shift = 1;
1070
1071 s->halfpel_flag = 1;
1072 s->thirdpel_flag = 1;
1073 s->unknown_flag = 0;
1074
1075 /* prowl for the "SEQH" marker in the extradata */
1076 extradata = (unsigned char *)avctx->extradata;
1077 extradata_end = avctx->extradata + avctx->extradata_size;
1078 if (extradata) {
1079 for (m = 0; m + 8 < avctx->extradata_size; m++) {
1080 if (!memcmp(extradata, "SEQH", 4)) {
1081 marker_found = 1;
1082 break;
1083 }
1084 extradata++;
1085 }
1086 }
1087
1088 /* if a match was found, parse the extra data */
1089 if (marker_found) {
1090 GetBitContext gb;
1091 int frame_size_code;
1092
1093 size = AV_RB32(&extradata[4]);
1094 if (size > extradata_end - extradata - 8)
1095 return AVERROR_INVALIDDATA;
1096 init_get_bits(&gb, extradata + 8, size * 8);
1097
1098 /* 'frame size code' and optional 'width, height' */
1099 frame_size_code = get_bits(&gb, 3);
1100 switch (frame_size_code) {
1101 case 0:
1102 avctx->width = 160;
1103 avctx->height = 120;
1104 break;
1105 case 1:
1106 avctx->width = 128;
1107 avctx->height = 96;
1108 break;
1109 case 2:
1110 avctx->width = 176;
1111 avctx->height = 144;
1112 break;
1113 case 3:
1114 avctx->width = 352;
1115 avctx->height = 288;
1116 break;
1117 case 4:
1118 avctx->width = 704;
1119 avctx->height = 576;
1120 break;
1121 case 5:
1122 avctx->width = 240;
1123 avctx->height = 180;
1124 break;
1125 case 6:
1126 avctx->width = 320;
1127 avctx->height = 240;
1128 break;
1129 case 7:
1130 avctx->width = get_bits(&gb, 12);
1131 avctx->height = get_bits(&gb, 12);
1132 break;
1133 }
1134
1135 s->halfpel_flag = get_bits1(&gb);
1136 s->thirdpel_flag = get_bits1(&gb);
1137
1138 /* unknown fields */
1139 skip_bits1(&gb);
1140 skip_bits1(&gb);
1141 skip_bits1(&gb);
1142 skip_bits1(&gb);
1143
1144 h->low_delay = get_bits1(&gb);
1145
1146 /* unknown field */
1147 skip_bits1(&gb);
1148
1149 while (get_bits1(&gb))
1150 skip_bits(&gb, 8);
1151
1152 s->unknown_flag = get_bits1(&gb);
1153 avctx->has_b_frames = !h->low_delay;
1154 if (s->unknown_flag) {
1155 #if CONFIG_ZLIB
1156 unsigned watermark_width = svq3_get_ue_golomb(&gb);
1157 unsigned watermark_height = svq3_get_ue_golomb(&gb);
1158 int u1 = svq3_get_ue_golomb(&gb);
1159 int u2 = get_bits(&gb, 8);
1160 int u3 = get_bits(&gb, 2);
1161 int u4 = svq3_get_ue_golomb(&gb);
1162 unsigned long buf_len = watermark_width *
1163 watermark_height * 4;
1164 int offset = get_bits_count(&gb) + 7 >> 3;
1165 uint8_t *buf;
1166
1167 if (watermark_height > 0 &&
1168 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1169 return -1;
1170
1171 buf = av_malloc(buf_len);
1172 av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1173 watermark_width, watermark_height);
1174 av_log(avctx, AV_LOG_DEBUG,
1175 "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1176 u1, u2, u3, u4, offset);
1177 if (uncompress(buf, &buf_len, extradata + 8 + offset,
1178 size - offset) != Z_OK) {
1179 av_log(avctx, AV_LOG_ERROR,
1180 "could not uncompress watermark logo\n");
1181 av_free(buf);
1182 return -1;
1183 }
1184 s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1185 s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1186 av_log(avctx, AV_LOG_DEBUG,
1187 "watermark key %#"PRIx32"\n", s->watermark_key);
1188 av_free(buf);
1189 #else
1190 av_log(avctx, AV_LOG_ERROR,
1191 "this svq3 file contains watermark which need zlib support compiled in\n");
1192 return -1;
1193 #endif
1194 }
1195 }
1196
1197 h->width = avctx->width;
1198 h->height = avctx->height;
1199 h->mb_width = (h->width + 15) / 16;
1200 h->mb_height = (h->height + 15) / 16;
1201 h->mb_stride = h->mb_width + 1;
1202 h->mb_num = h->mb_width * h->mb_height;
1203 h->b_stride = 4 * h->mb_width;
1204 s->h_edge_pos = h->mb_width * 16;
1205 s->v_edge_pos = h->mb_height * 16;
1206
1207 s->intra4x4_pred_mode = av_mallocz(h->mb_stride * 2 * 8);
1208 if (!s->intra4x4_pred_mode)
1209 return AVERROR(ENOMEM);
1210
1211 if (ff_h264_alloc_tables(h) < 0) {
1212 av_log(avctx, AV_LOG_ERROR, "svq3 memory allocation failed\n");
1213 return AVERROR(ENOMEM);
1214 }
1215
1216 return 0;
1217 }
1218
1219 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1220 {
1221 int i;
1222 for (i = 0; i < 2; i++) {
1223 av_buffer_unref(&pic->motion_val_buf[i]);
1224 av_buffer_unref(&pic->ref_index_buf[i]);
1225 }
1226 av_buffer_unref(&pic->mb_type_buf);
1227
1228 av_frame_unref(pic->f);
1229 }
1230
1231 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1232 {
1233 SVQ3Context *s = avctx->priv_data;
1234 H264Context *h = &s->h;
1235 H264SliceContext *sl = &h->slice_ctx[0];
1236 const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1;
1237 const int mb_array_size = h->mb_stride * h->mb_height;
1238 const int b4_stride = h->mb_width * 4 + 1;
1239 const int b4_array_size = b4_stride * h->mb_height * 4;
1240 int ret;
1241
1242 if (!pic->motion_val_buf[0]) {
1243 int i;
1244
1245 pic->mb_type_buf = av_buffer_allocz((big_mb_num + h->mb_stride) * sizeof(uint32_t));
1246 if (!pic->mb_type_buf)
1247 return AVERROR(ENOMEM);
1248 pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1;
1249
1250 for (i = 0; i < 2; i++) {
1251 pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1252 pic->ref_index_buf[i] = av_buffer_allocz(4 * mb_array_size);
1253 if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1254 ret = AVERROR(ENOMEM);
1255 goto fail;
1256 }
1257
1258 pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1259 pic->ref_index[i] = pic->ref_index_buf[i]->data;
1260 }
1261 }
1262 pic->reference = !(h->pict_type == AV_PICTURE_TYPE_B);
1263
1264 ret = ff_get_buffer(avctx, pic->f,
1265 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1266 if (ret < 0)
1267 goto fail;
1268
1269 if (!s->edge_emu_buffer) {
1270 s->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1271 if (!s->edge_emu_buffer)
1272 return AVERROR(ENOMEM);
1273 }
1274
1275 sl->linesize = pic->f->linesize[0];
1276 sl->uvlinesize = pic->f->linesize[1];
1277
1278 return 0;
1279 fail:
1280 free_picture(avctx, pic);
1281 return ret;
1282 }
1283
1284 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1285 int *got_frame, AVPacket *avpkt)
1286 {
1287 const uint8_t *buf = avpkt->data;
1288 SVQ3Context *s = avctx->priv_data;
1289 H264Context *h = &s->h;
1290 H264SliceContext *sl = &h->slice_ctx[0];
1291 int buf_size = avpkt->size;
1292 int ret, m, i;
1293
1294 /* special case for last picture */
1295 if (buf_size == 0) {
1296 if (s->next_pic->f->data[0] && !h->low_delay && !s->last_frame_output) {
1297 ret = av_frame_ref(data, s->next_pic->f);
1298 if (ret < 0)
1299 return ret;
1300 s->last_frame_output = 1;
1301 *got_frame = 1;
1302 }
1303 return 0;
1304 }
1305
1306 ret = init_get_bits(&s->gb, buf, 8 * buf_size);
1307 if (ret < 0)
1308 return ret;
1309
1310 s->mb_x = s->mb_y = s->mb_xy = 0;
1311
1312 if (svq3_decode_slice_header(avctx))
1313 return -1;
1314
1315 h->pict_type = sl->slice_type;
1316
1317 if (h->pict_type != AV_PICTURE_TYPE_B)
1318 FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1319
1320 av_frame_unref(s->cur_pic->f);
1321
1322 /* for skipping the frame */
1323 s->cur_pic->f->pict_type = h->pict_type;
1324 s->cur_pic->f->key_frame = (h->pict_type == AV_PICTURE_TYPE_I);
1325
1326 ret = get_buffer(avctx, s->cur_pic);
1327 if (ret < 0)
1328 return ret;
1329
1330 h->cur_pic_ptr = s->cur_pic;
1331 h->cur_pic = *s->cur_pic;
1332
1333 for (i = 0; i < 16; i++) {
1334 h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
1335 h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
1336 }
1337 for (i = 0; i < 16; i++) {
1338 h->block_offset[16 + i] =
1339 h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1340 h->block_offset[48 + 16 + i] =
1341 h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1342 }
1343
1344 if (h->pict_type != AV_PICTURE_TYPE_I) {
1345 if (!s->last_pic->f->data[0]) {
1346 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1347 ret = get_buffer(avctx, s->last_pic);
1348 if (ret < 0)
1349 return ret;
1350 memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1351 memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1352 s->last_pic->f->linesize[1]);
1353 memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1354 s->last_pic->f->linesize[2]);
1355 }
1356
1357 if (h->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1358 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1359 ret = get_buffer(avctx, s->next_pic);
1360 if (ret < 0)
1361 return ret;
1362 memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1363 memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1364 s->next_pic->f->linesize[1]);
1365 memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1366 s->next_pic->f->linesize[2]);
1367 }
1368 }
1369
1370 if (avctx->debug & FF_DEBUG_PICT_INFO)
1371 av_log(h->avctx, AV_LOG_DEBUG,
1372 "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1373 av_get_picture_type_char(h->pict_type),
1374 s->halfpel_flag, s->thirdpel_flag,
1375 s->adaptive_quant, h->slice_ctx[0].qscale, sl->slice_num);
1376
1377 if (avctx->skip_frame >= AVDISCARD_NONREF && h->pict_type == AV_PICTURE_TYPE_B ||
1378 avctx->skip_frame >= AVDISCARD_NONKEY && h->pict_type != AV_PICTURE_TYPE_I ||
1379 avctx->skip_frame >= AVDISCARD_ALL)
1380 return 0;
1381
1382 if (s->next_p_frame_damaged) {
1383 if (h->pict_type == AV_PICTURE_TYPE_B)
1384 return 0;
1385 else
1386 s->next_p_frame_damaged = 0;
1387 }
1388
1389 if (h->pict_type == AV_PICTURE_TYPE_B) {
1390 h->frame_num_offset = sl->slice_num - h->prev_frame_num;
1391
1392 if (h->frame_num_offset < 0)
1393 h->frame_num_offset += 256;
1394 if (h->frame_num_offset == 0 ||
1395 h->frame_num_offset >= h->prev_frame_num_offset) {
1396 av_log(h->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1397 return -1;
1398 }
1399 } else {
1400 h->prev_frame_num = h->frame_num;
1401 h->frame_num = sl->slice_num;
1402 h->prev_frame_num_offset = h->frame_num - h->prev_frame_num;
1403
1404 if (h->prev_frame_num_offset < 0)
1405 h->prev_frame_num_offset += 256;
1406 }
1407
1408 for (m = 0; m < 2; m++) {
1409 int i;
1410 for (i = 0; i < 4; i++) {
1411 int j;
1412 for (j = -1; j < 4; j++)
1413 sl->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1414 if (i < 3)
1415 sl->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1416 }
1417 }
1418
1419 for (s->mb_y = 0; s->mb_y < h->mb_height; s->mb_y++) {
1420 for (s->mb_x = 0; s->mb_x < h->mb_width; s->mb_x++) {
1421 unsigned mb_type;
1422 s->mb_xy = s->mb_x + s->mb_y * h->mb_stride;
1423
1424 if ((get_bits_left(&h->gb)) <= 7) {
1425 if (((get_bits_count(&h->gb) & 7) == 0 ||
1426 show_bits(&h->gb, get_bits_left(&h->gb) & 7) == 0)) {
1427
1428 if (svq3_decode_slice_header(avctx))
1429 return -1;
1430 }
1431 /* TODO: support s->mb_skip_run */
1432 }
1433
1434 mb_type = svq3_get_ue_golomb(&h->gb);
1435
1436 if (h->pict_type == AV_PICTURE_TYPE_I)
1437 mb_type += 8;
1438 else if (h->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1439 mb_type += 4;
1440 if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1441 av_log(h->avctx, AV_LOG_ERROR,
1442 "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1443 return -1;
1444 }
1445
1446 if (mb_type != 0)
1447 hl_decode_mb(s, h, &h->slice_ctx[0]);
1448
1449 if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
1450 h->cur_pic.mb_type[s->mb_x + s->mb_y * h->mb_stride] =
1451 (h->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1452 }
1453
1454 ff_draw_horiz_band(avctx, s->cur_pic->f,
1455 s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1456 16 * s->mb_y, 16, h->picture_structure, 0,
1457 h->low_delay);
1458 }
1459
1460 if (h->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
1461 ret = av_frame_ref(data, s->cur_pic->f);
1462 else if (s->last_pic->f->data[0])
1463 ret = av_frame_ref(data, s->last_pic->f);
1464 if (ret < 0)
1465 return ret;
1466
1467 /* Do not output the last pic after seeking. */
1468 if (s->last_pic->f->data[0] || h->low_delay)
1469 *got_frame = 1;
1470
1471 if (h->pict_type != AV_PICTURE_TYPE_B) {
1472 FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1473 } else {
1474 av_frame_unref(s->cur_pic->f);
1475 }
1476
1477 return buf_size;
1478 }
1479
1480 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1481 {
1482 SVQ3Context *s = avctx->priv_data;
1483 H264Context *h = &s->h;
1484
1485 free_picture(avctx, s->cur_pic);
1486 free_picture(avctx, s->next_pic);
1487 free_picture(avctx, s->last_pic);
1488 av_frame_free(&s->cur_pic->f);
1489 av_frame_free(&s->next_pic->f);
1490 av_frame_free(&s->last_pic->f);
1491 av_freep(&s->cur_pic);
1492 av_freep(&s->next_pic);
1493 av_freep(&s->last_pic);
1494 av_freep(&s->slice_buf);
1495 av_freep(&s->intra4x4_pred_mode);
1496 av_freep(&s->edge_emu_buffer);
1497
1498 memset(&h->cur_pic, 0, sizeof(h->cur_pic));
1499
1500 ff_h264_free_context(h);
1501
1502 return 0;
1503 }
1504
1505 AVCodec ff_svq3_decoder = {
1506 .name = "svq3",
1507 .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1508 .type = AVMEDIA_TYPE_VIDEO,
1509 .id = AV_CODEC_ID_SVQ3,
1510 .priv_data_size = sizeof(SVQ3Context),
1511 .init = svq3_decode_init,
1512 .close = svq3_decode_end,
1513 .decode = svq3_decode_frame,
1514 .capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1515 AV_CODEC_CAP_DR1 |
1516 AV_CODEC_CAP_DELAY,
1517 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1518 AV_PIX_FMT_NONE},
1519 };