d5079f3adaf8e050aadc1e8c1134c746bd63b0f5
[libav.git] / libavcodec / svq3.c
1 /*
2 * Copyright (c) 2003 The Libav Project
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
31 *
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
37 *
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 * http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41 */
42
43 #include <inttypes.h>
44
45 #include "libavutil/attributes.h"
46 #include "internal.h"
47 #include "avcodec.h"
48 #include "mpegutils.h"
49 #include "h264.h"
50 #include "golomb.h"
51 #include "hpeldsp.h"
52 #include "mathops.h"
53 #include "rectangle.h"
54 #include "tpeldsp.h"
55
56 #if CONFIG_ZLIB
57 #include <zlib.h>
58 #endif
59
60 #include "svq1.h"
61
62 /**
63 * @file
64 * svq3 decoder.
65 */
66
67 typedef struct SVQ3Context {
68 AVCodecContext *avctx;
69
70 H264DSPContext h264dsp;
71 H264PredContext hpc;
72 HpelDSPContext hdsp;
73 TpelDSPContext tdsp;
74 VideoDSPContext vdsp;
75
76 H264Picture *cur_pic;
77 H264Picture *next_pic;
78 H264Picture *last_pic;
79 GetBitContext gb;
80 GetBitContext gb_slice;
81 uint8_t *slice_buf;
82 int slice_size;
83 int halfpel_flag;
84 int thirdpel_flag;
85 int unknown_flag;
86 uint32_t watermark_key;
87 int adaptive_quant;
88 int next_p_frame_damaged;
89 int h_edge_pos;
90 int v_edge_pos;
91 int last_frame_output;
92 int slice_num;
93 int qscale;
94 int cbp;
95 int frame_num;
96 int frame_num_offset;
97 int prev_frame_num_offset;
98 int prev_frame_num;
99
100 enum AVPictureType pict_type;
101 int low_delay;
102
103 int mb_x, mb_y;
104 int mb_xy;
105 int mb_width, mb_height;
106 int mb_stride, mb_num;
107 int b_stride;
108
109 uint32_t *mb2br_xy;
110
111 int chroma_pred_mode;
112 int intra16x16_pred_mode;
113
114 int8_t intra4x4_pred_mode_cache[5 * 8];
115 int8_t (*intra4x4_pred_mode);
116
117 unsigned int top_samples_available;
118 unsigned int topright_samples_available;
119 unsigned int left_samples_available;
120
121 uint8_t *edge_emu_buffer;
122
123 DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
124 DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
125 DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2];
126 DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
127 DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
128 uint32_t dequant4_coeff[QP_MAX_NUM + 1][16];
129 int block_offset[2 * (16 * 3)];
130 } SVQ3Context;
131
132 #define FULLPEL_MODE 1
133 #define HALFPEL_MODE 2
134 #define THIRDPEL_MODE 3
135 #define PREDICT_MODE 4
136
137 /* dual scan (from some older h264 draft)
138 * o-->o-->o o
139 * | /|
140 * o o o / o
141 * | / | |/ |
142 * o o o o
143 * /
144 * o-->o-->o-->o
145 */
146 static const uint8_t svq3_scan[16] = {
147 0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
148 2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
149 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
150 0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
151 };
152
153 static const uint8_t luma_dc_zigzag_scan[16] = {
154 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
155 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
156 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
157 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
158 };
159
160 static const uint8_t svq3_pred_0[25][2] = {
161 { 0, 0 },
162 { 1, 0 }, { 0, 1 },
163 { 0, 2 }, { 1, 1 }, { 2, 0 },
164 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
165 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
166 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
167 { 2, 4 }, { 3, 3 }, { 4, 2 },
168 { 4, 3 }, { 3, 4 },
169 { 4, 4 }
170 };
171
172 static const int8_t svq3_pred_1[6][6][5] = {
173 { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
174 { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
175 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
176 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
177 { { 2, 0, -1, -1, -1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
178 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
179 { { 2, 0, -1, -1, -1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
180 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
181 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
182 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
183 { { 0, 2, -1, -1, -1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
184 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
185 };
186
187 static const struct {
188 uint8_t run;
189 uint8_t level;
190 } svq3_dct_tables[2][16] = {
191 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
192 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
193 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
194 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
195 };
196
197 static const uint32_t svq3_dequant_coeff[32] = {
198 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
199 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
200 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
201 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
202 };
203
204 static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
205 {
206 const int qmul = svq3_dequant_coeff[qp];
207 #define stride 16
208 int i;
209 int temp[16];
210 static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
211
212 for (i = 0; i < 4; i++) {
213 const int z0 = 13 * (input[4 * i + 0] + input[4 * i + 2]);
214 const int z1 = 13 * (input[4 * i + 0] - input[4 * i + 2]);
215 const int z2 = 7 * input[4 * i + 1] - 17 * input[4 * i + 3];
216 const int z3 = 17 * input[4 * i + 1] + 7 * input[4 * i + 3];
217
218 temp[4 * i + 0] = z0 + z3;
219 temp[4 * i + 1] = z1 + z2;
220 temp[4 * i + 2] = z1 - z2;
221 temp[4 * i + 3] = z0 - z3;
222 }
223
224 for (i = 0; i < 4; i++) {
225 const int offset = x_offset[i];
226 const int z0 = 13 * (temp[4 * 0 + i] + temp[4 * 2 + i]);
227 const int z1 = 13 * (temp[4 * 0 + i] - temp[4 * 2 + i]);
228 const int z2 = 7 * temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
229 const int z3 = 17 * temp[4 * 1 + i] + 7 * temp[4 * 3 + i];
230
231 output[stride * 0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
232 output[stride * 2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
233 output[stride * 8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
234 output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
235 }
236 }
237 #undef stride
238
239 static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
240 int stride, int qp, int dc)
241 {
242 const int qmul = svq3_dequant_coeff[qp];
243 int i;
244
245 if (dc) {
246 dc = 13 * 13 * (dc == 1 ? 1538 * block[0]
247 : qmul * (block[0] >> 3) / 2);
248 block[0] = 0;
249 }
250
251 for (i = 0; i < 4; i++) {
252 const int z0 = 13 * (block[0 + 4 * i] + block[2 + 4 * i]);
253 const int z1 = 13 * (block[0 + 4 * i] - block[2 + 4 * i]);
254 const int z2 = 7 * block[1 + 4 * i] - 17 * block[3 + 4 * i];
255 const int z3 = 17 * block[1 + 4 * i] + 7 * block[3 + 4 * i];
256
257 block[0 + 4 * i] = z0 + z3;
258 block[1 + 4 * i] = z1 + z2;
259 block[2 + 4 * i] = z1 - z2;
260 block[3 + 4 * i] = z0 - z3;
261 }
262
263 for (i = 0; i < 4; i++) {
264 const int z0 = 13 * (block[i + 4 * 0] + block[i + 4 * 2]);
265 const int z1 = 13 * (block[i + 4 * 0] - block[i + 4 * 2]);
266 const int z2 = 7 * block[i + 4 * 1] - 17 * block[i + 4 * 3];
267 const int z3 = 17 * block[i + 4 * 1] + 7 * block[i + 4 * 3];
268 const int rr = (dc + 0x80000);
269
270 dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
271 dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
272 dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
273 dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
274 }
275
276 memset(block, 0, 16 * sizeof(int16_t));
277 }
278
279 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
280 int index, const int type)
281 {
282 static const uint8_t *const scan_patterns[4] = {
283 luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
284 };
285
286 int run, level, limit;
287 unsigned vlc;
288 const int intra = 3 * type >> 2;
289 const uint8_t *const scan = scan_patterns[type];
290
291 for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
292 for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
293 int sign = (vlc & 1) ? 0 : -1;
294 vlc = vlc + 1 >> 1;
295
296 if (type == 3) {
297 if (vlc < 3) {
298 run = 0;
299 level = vlc;
300 } else if (vlc < 4) {
301 run = 1;
302 level = 1;
303 } else {
304 run = vlc & 0x3;
305 level = (vlc + 9 >> 2) - run;
306 }
307 } else {
308 if (vlc < 16) {
309 run = svq3_dct_tables[intra][vlc].run;
310 level = svq3_dct_tables[intra][vlc].level;
311 } else if (intra) {
312 run = vlc & 0x7;
313 level = (vlc >> 3) +
314 ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
315 } else {
316 run = vlc & 0xF;
317 level = (vlc >> 4) +
318 ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
319 }
320 }
321
322 if ((index += run) >= limit)
323 return -1;
324
325 block[scan[index]] = (level ^ sign) - sign;
326 }
327
328 if (type != 2) {
329 break;
330 }
331 }
332
333 return 0;
334 }
335
336 static av_always_inline int
337 svq3_fetch_diagonal_mv(const SVQ3Context *s, const int16_t **C,
338 int i, int list, int part_width)
339 {
340 const int topright_ref = s->ref_cache[list][i - 8 + part_width];
341
342 if (topright_ref != PART_NOT_AVAILABLE) {
343 *C = s->mv_cache[list][i - 8 + part_width];
344 return topright_ref;
345 } else {
346 *C = s->mv_cache[list][i - 8 - 1];
347 return s->ref_cache[list][i - 8 - 1];
348 }
349 }
350
351 /**
352 * Get the predicted MV.
353 * @param n the block index
354 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
355 * @param mx the x component of the predicted motion vector
356 * @param my the y component of the predicted motion vector
357 */
358 static av_always_inline void svq3_pred_motion(const SVQ3Context *s, int n,
359 int part_width, int list,
360 int ref, int *const mx, int *const my)
361 {
362 const int index8 = scan8[n];
363 const int top_ref = s->ref_cache[list][index8 - 8];
364 const int left_ref = s->ref_cache[list][index8 - 1];
365 const int16_t *const A = s->mv_cache[list][index8 - 1];
366 const int16_t *const B = s->mv_cache[list][index8 - 8];
367 const int16_t *C;
368 int diagonal_ref, match_count;
369
370 /* mv_cache
371 * B . . A T T T T
372 * U . . L . . , .
373 * U . . L . . . .
374 * U . . L . . , .
375 * . . . L . . . .
376 */
377
378 diagonal_ref = svq3_fetch_diagonal_mv(s, &C, index8, list, part_width);
379 match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
380 if (match_count > 1) { //most common
381 *mx = mid_pred(A[0], B[0], C[0]);
382 *my = mid_pred(A[1], B[1], C[1]);
383 } else if (match_count == 1) {
384 if (left_ref == ref) {
385 *mx = A[0];
386 *my = A[1];
387 } else if (top_ref == ref) {
388 *mx = B[0];
389 *my = B[1];
390 } else {
391 *mx = C[0];
392 *my = C[1];
393 }
394 } else {
395 if (top_ref == PART_NOT_AVAILABLE &&
396 diagonal_ref == PART_NOT_AVAILABLE &&
397 left_ref != PART_NOT_AVAILABLE) {
398 *mx = A[0];
399 *my = A[1];
400 } else {
401 *mx = mid_pred(A[0], B[0], C[0]);
402 *my = mid_pred(A[1], B[1], C[1]);
403 }
404 }
405 }
406
407 static inline void svq3_mc_dir_part(SVQ3Context *s,
408 int x, int y, int width, int height,
409 int mx, int my, int dxy,
410 int thirdpel, int dir, int avg)
411 {
412 const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
413 uint8_t *src, *dest;
414 int i, emu = 0;
415 int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
416 int linesize = s->cur_pic->f->linesize[0];
417 int uvlinesize = s->cur_pic->f->linesize[1];
418
419 mx += x;
420 my += y;
421
422 if (mx < 0 || mx >= s->h_edge_pos - width - 1 ||
423 my < 0 || my >= s->v_edge_pos - height - 1) {
424 emu = 1;
425 mx = av_clip(mx, -16, s->h_edge_pos - width + 15);
426 my = av_clip(my, -16, s->v_edge_pos - height + 15);
427 }
428
429 /* form component predictions */
430 dest = s->cur_pic->f->data[0] + x + y * linesize;
431 src = pic->f->data[0] + mx + my * linesize;
432
433 if (emu) {
434 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
435 linesize, linesize,
436 width + 1, height + 1,
437 mx, my, s->h_edge_pos, s->v_edge_pos);
438 src = s->edge_emu_buffer;
439 }
440 if (thirdpel)
441 (avg ? s->tdsp.avg_tpel_pixels_tab
442 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, linesize,
443 width, height);
444 else
445 (avg ? s->hdsp.avg_pixels_tab
446 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, linesize,
447 height);
448
449 if (!(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
450 mx = mx + (mx < (int) x) >> 1;
451 my = my + (my < (int) y) >> 1;
452 width = width >> 1;
453 height = height >> 1;
454 blocksize++;
455
456 for (i = 1; i < 3; i++) {
457 dest = s->cur_pic->f->data[i] + (x >> 1) + (y >> 1) * uvlinesize;
458 src = pic->f->data[i] + mx + my * uvlinesize;
459
460 if (emu) {
461 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
462 uvlinesize, uvlinesize,
463 width + 1, height + 1,
464 mx, my, (s->h_edge_pos >> 1),
465 s->v_edge_pos >> 1);
466 src = s->edge_emu_buffer;
467 }
468 if (thirdpel)
469 (avg ? s->tdsp.avg_tpel_pixels_tab
470 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
471 uvlinesize,
472 width, height);
473 else
474 (avg ? s->hdsp.avg_pixels_tab
475 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
476 uvlinesize,
477 height);
478 }
479 }
480 }
481
482 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
483 int dir, int avg)
484 {
485 int i, j, k, mx, my, dx, dy, x, y;
486 const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
487 const int part_height = 16 >> ((unsigned)(size + 1) / 3);
488 const int extra_width = (mode == PREDICT_MODE) ? -16 * 6 : 0;
489 const int h_edge_pos = 6 * (s->h_edge_pos - part_width) - extra_width;
490 const int v_edge_pos = 6 * (s->v_edge_pos - part_height) - extra_width;
491
492 for (i = 0; i < 16; i += part_height)
493 for (j = 0; j < 16; j += part_width) {
494 const int b_xy = (4 * s->mb_x + (j >> 2)) +
495 (4 * s->mb_y + (i >> 2)) * s->b_stride;
496 int dxy;
497 x = 16 * s->mb_x + j;
498 y = 16 * s->mb_y + i;
499 k = (j >> 2 & 1) + (i >> 1 & 2) +
500 (j >> 1 & 4) + (i & 8);
501
502 if (mode != PREDICT_MODE) {
503 svq3_pred_motion(s, k, part_width >> 2, dir, 1, &mx, &my);
504 } else {
505 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
506 my = s->next_pic->motion_val[0][b_xy][1] << 1;
507
508 if (dir == 0) {
509 mx = mx * s->frame_num_offset /
510 s->prev_frame_num_offset + 1 >> 1;
511 my = my * s->frame_num_offset /
512 s->prev_frame_num_offset + 1 >> 1;
513 } else {
514 mx = mx * (s->frame_num_offset - s->prev_frame_num_offset) /
515 s->prev_frame_num_offset + 1 >> 1;
516 my = my * (s->frame_num_offset - s->prev_frame_num_offset) /
517 s->prev_frame_num_offset + 1 >> 1;
518 }
519 }
520
521 /* clip motion vector prediction to frame border */
522 mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
523 my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
524
525 /* get (optional) motion vector differential */
526 if (mode == PREDICT_MODE) {
527 dx = dy = 0;
528 } else {
529 dy = svq3_get_se_golomb(&s->gb_slice);
530 dx = svq3_get_se_golomb(&s->gb_slice);
531
532 if (dx == INVALID_VLC || dy == INVALID_VLC) {
533 av_log(s->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
534 return -1;
535 }
536 }
537
538 /* compute motion vector */
539 if (mode == THIRDPEL_MODE) {
540 int fx, fy;
541 mx = (mx + 1 >> 1) + dx;
542 my = (my + 1 >> 1) + dy;
543 fx = (unsigned)(mx + 0x3000) / 3 - 0x1000;
544 fy = (unsigned)(my + 0x3000) / 3 - 0x1000;
545 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
546
547 svq3_mc_dir_part(s, x, y, part_width, part_height,
548 fx, fy, dxy, 1, dir, avg);
549 mx += mx;
550 my += my;
551 } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
552 mx = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
553 my = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
554 dxy = (mx & 1) + 2 * (my & 1);
555
556 svq3_mc_dir_part(s, x, y, part_width, part_height,
557 mx >> 1, my >> 1, dxy, 0, dir, avg);
558 mx *= 3;
559 my *= 3;
560 } else {
561 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
562 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
563
564 svq3_mc_dir_part(s, x, y, part_width, part_height,
565 mx, my, 0, 0, dir, avg);
566 mx *= 6;
567 my *= 6;
568 }
569
570 /* update mv_cache */
571 if (mode != PREDICT_MODE) {
572 int32_t mv = pack16to32(mx, my);
573
574 if (part_height == 8 && i < 8) {
575 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 * 8], mv);
576
577 if (part_width == 8 && j < 8)
578 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
579 }
580 if (part_width == 8 && j < 8)
581 AV_WN32A(s->mv_cache[dir][scan8[k] + 1], mv);
582 if (part_width == 4 || part_height == 4)
583 AV_WN32A(s->mv_cache[dir][scan8[k]], mv);
584 }
585
586 /* write back motion vectors */
587 fill_rectangle(s->cur_pic->motion_val[dir][b_xy],
588 part_width >> 2, part_height >> 2, s->b_stride,
589 pack16to32(mx, my), 4);
590 }
591
592 return 0;
593 }
594
595 static av_always_inline void hl_decode_mb_idct_luma(SVQ3Context *s,
596 int mb_type, const int *block_offset,
597 int linesize, uint8_t *dest_y)
598 {
599 int i;
600 if (!IS_INTRA4x4(mb_type)) {
601 for (i = 0; i < 16; i++)
602 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
603 uint8_t *const ptr = dest_y + block_offset[i];
604 svq3_add_idct_c(ptr, s->mb + i * 16, linesize,
605 s->qscale, IS_INTRA(mb_type) ? 1 : 0);
606 }
607 }
608 }
609
610 static av_always_inline int dctcoef_get(int16_t *mb, int index)
611 {
612 return AV_RN16A(mb + index);
613 }
614
615 static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
616 int mb_type,
617 const int *block_offset,
618 int linesize,
619 uint8_t *dest_y)
620 {
621 int i;
622 int qscale = s->qscale;
623
624 if (IS_INTRA4x4(mb_type)) {
625 for (i = 0; i < 16; i++) {
626 uint8_t *const ptr = dest_y + block_offset[i];
627 const int dir = s->intra4x4_pred_mode_cache[scan8[i]];
628
629 uint8_t *topright;
630 int nnz, tr;
631 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
632 const int topright_avail = (s->topright_samples_available << i) & 0x8000;
633 assert(s->mb_y || linesize <= block_offset[i]);
634 if (!topright_avail) {
635 tr = ptr[3 - linesize] * 0x01010101u;
636 topright = (uint8_t *)&tr;
637 } else
638 topright = ptr + 4 - linesize;
639 } else
640 topright = NULL;
641
642 s->hpc.pred4x4[dir](ptr, topright, linesize);
643 nnz = s->non_zero_count_cache[scan8[i]];
644 if (nnz) {
645 svq3_add_idct_c(ptr, s->mb + i * 16, linesize, qscale, 0);
646 }
647 }
648 } else {
649 s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
650 svq3_luma_dc_dequant_idct_c(s->mb, s->mb_luma_dc[0], qscale);
651 }
652 }
653
654 static void hl_decode_mb(SVQ3Context *s)
655 {
656 const int mb_x = s->mb_x;
657 const int mb_y = s->mb_y;
658 const int mb_xy = s->mb_xy;
659 const int mb_type = s->cur_pic->mb_type[mb_xy];
660 uint8_t *dest_y, *dest_cb, *dest_cr;
661 int linesize, uvlinesize;
662 int i, j;
663 const int *block_offset = &s->block_offset[0];
664 const int block_h = 16 >> 1;
665
666 linesize = s->cur_pic->f->linesize[0];
667 uvlinesize = s->cur_pic->f->linesize[1];
668
669 dest_y = s->cur_pic->f->data[0] + (mb_x + mb_y * linesize) * 16;
670 dest_cb = s->cur_pic->f->data[1] + mb_x * 8 + mb_y * uvlinesize * block_h;
671 dest_cr = s->cur_pic->f->data[2] + mb_x * 8 + mb_y * uvlinesize * block_h;
672
673 s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * linesize + 64, linesize, 4);
674 s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * uvlinesize + 64, dest_cr - dest_cb, 2);
675
676 if (IS_INTRA(mb_type)) {
677 s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
678 s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
679
680 hl_decode_mb_predict_luma(s, mb_type, block_offset, linesize, dest_y);
681 }
682
683 hl_decode_mb_idct_luma(s, mb_type, block_offset, linesize, dest_y);
684
685 if (s->cbp & 0x30) {
686 uint8_t *dest[2] = { dest_cb, dest_cr };
687 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
688 s->dequant4_coeff[4][0]);
689 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
690 s->dequant4_coeff[4][0]);
691 for (j = 1; j < 3; j++) {
692 for (i = j * 16; i < j * 16 + 4; i++)
693 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
694 uint8_t *const ptr = dest[j - 1] + block_offset[i];
695 svq3_add_idct_c(ptr, s->mb + i * 16,
696 uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
697 }
698 }
699 }
700 }
701
702 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
703 {
704 int i, j, k, m, dir, mode;
705 int cbp = 0;
706 uint32_t vlc;
707 int8_t *top, *left;
708 const int mb_xy = s->mb_xy;
709 const int b_xy = 4 * s->mb_x + 4 * s->mb_y * s->b_stride;
710
711 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
712 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
713 s->topright_samples_available = 0xFFFF;
714
715 if (mb_type == 0) { /* SKIP */
716 if (s->pict_type == AV_PICTURE_TYPE_P ||
717 s->next_pic->mb_type[mb_xy] == -1) {
718 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
719 0, 0, 0, 0, 0, 0);
720
721 if (s->pict_type == AV_PICTURE_TYPE_B)
722 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
723 0, 0, 0, 0, 1, 1);
724
725 mb_type = MB_TYPE_SKIP;
726 } else {
727 mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
728 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
729 return -1;
730 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
731 return -1;
732
733 mb_type = MB_TYPE_16x16;
734 }
735 } else if (mb_type < 8) { /* INTER */
736 if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&s->gb_slice))
737 mode = THIRDPEL_MODE;
738 else if (s->halfpel_flag &&
739 s->thirdpel_flag == !get_bits1(&s->gb_slice))
740 mode = HALFPEL_MODE;
741 else
742 mode = FULLPEL_MODE;
743
744 /* fill caches */
745 /* note ref_cache should contain here:
746 * ????????
747 * ???11111
748 * N??11111
749 * N??11111
750 * N??11111
751 */
752
753 for (m = 0; m < 2; m++) {
754 if (s->mb_x > 0 && s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6] != -1) {
755 for (i = 0; i < 4; i++)
756 AV_COPY32(s->mv_cache[m][scan8[0] - 1 + i * 8],
757 s->cur_pic->motion_val[m][b_xy - 1 + i * s->b_stride]);
758 } else {
759 for (i = 0; i < 4; i++)
760 AV_ZERO32(s->mv_cache[m][scan8[0] - 1 + i * 8]);
761 }
762 if (s->mb_y > 0) {
763 memcpy(s->mv_cache[m][scan8[0] - 1 * 8],
764 s->cur_pic->motion_val[m][b_xy - s->b_stride],
765 4 * 2 * sizeof(int16_t));
766 memset(&s->ref_cache[m][scan8[0] - 1 * 8],
767 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
768
769 if (s->mb_x < s->mb_width - 1) {
770 AV_COPY32(s->mv_cache[m][scan8[0] + 4 - 1 * 8],
771 s->cur_pic->motion_val[m][b_xy - s->b_stride + 4]);
772 s->ref_cache[m][scan8[0] + 4 - 1 * 8] =
773 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride + 1] + 6] == -1 ||
774 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
775 } else
776 s->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
777 if (s->mb_x > 0) {
778 AV_COPY32(s->mv_cache[m][scan8[0] - 1 - 1 * 8],
779 s->cur_pic->motion_val[m][b_xy - s->b_stride - 1]);
780 s->ref_cache[m][scan8[0] - 1 - 1 * 8] =
781 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
782 } else
783 s->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
784 } else
785 memset(&s->ref_cache[m][scan8[0] - 1 * 8 - 1],
786 PART_NOT_AVAILABLE, 8);
787
788 if (s->pict_type != AV_PICTURE_TYPE_B)
789 break;
790 }
791
792 /* decode motion vector(s) and form prediction(s) */
793 if (s->pict_type == AV_PICTURE_TYPE_P) {
794 if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
795 return -1;
796 } else { /* AV_PICTURE_TYPE_B */
797 if (mb_type != 2) {
798 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
799 return -1;
800 } else {
801 for (i = 0; i < 4; i++)
802 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
803 0, 4 * 2 * sizeof(int16_t));
804 }
805 if (mb_type != 1) {
806 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
807 return -1;
808 } else {
809 for (i = 0; i < 4; i++)
810 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
811 0, 4 * 2 * sizeof(int16_t));
812 }
813 }
814
815 mb_type = MB_TYPE_16x16;
816 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
817 int8_t *i4x4 = s->intra4x4_pred_mode + s->mb2br_xy[s->mb_xy];
818 int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
819
820 memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
821
822 if (mb_type == 8) {
823 if (s->mb_x > 0) {
824 for (i = 0; i < 4; i++)
825 s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6 - i];
826 if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
827 s->left_samples_available = 0x5F5F;
828 }
829 if (s->mb_y > 0) {
830 s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 0];
831 s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 1];
832 s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 2];
833 s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 3];
834
835 if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
836 s->top_samples_available = 0x33FF;
837 }
838
839 /* decode prediction codes for luma blocks */
840 for (i = 0; i < 16; i += 2) {
841 vlc = svq3_get_ue_golomb(&s->gb_slice);
842
843 if (vlc >= 25) {
844 av_log(s->avctx, AV_LOG_ERROR,
845 "luma prediction:%"PRIu32"\n", vlc);
846 return -1;
847 }
848
849 left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
850 top = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
851
852 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
853 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
854
855 if (left[1] == -1 || left[2] == -1) {
856 av_log(s->avctx, AV_LOG_ERROR, "weird prediction\n");
857 return -1;
858 }
859 }
860 } else { /* mb_type == 33, DC_128_PRED block type */
861 for (i = 0; i < 4; i++)
862 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
863 }
864
865 AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
866 i4x4[4] = i4x4_cache[7 + 8 * 3];
867 i4x4[5] = i4x4_cache[7 + 8 * 2];
868 i4x4[6] = i4x4_cache[7 + 8 * 1];
869
870 if (mb_type == 8) {
871 ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
872 s->avctx, s->top_samples_available,
873 s->left_samples_available);
874
875 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
876 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
877 } else {
878 for (i = 0; i < 4; i++)
879 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
880
881 s->top_samples_available = 0x33FF;
882 s->left_samples_available = 0x5F5F;
883 }
884
885 mb_type = MB_TYPE_INTRA4x4;
886 } else { /* INTRA16x16 */
887 dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
888 dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
889
890 if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(s->avctx, s->top_samples_available,
891 s->left_samples_available, dir, 0)) < 0) {
892 av_log(s->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
893 return s->intra16x16_pred_mode;
894 }
895
896 cbp = ff_h264_i_mb_type_info[mb_type - 8].cbp;
897 mb_type = MB_TYPE_INTRA16x16;
898 }
899
900 if (!IS_INTER(mb_type) && s->pict_type != AV_PICTURE_TYPE_I) {
901 for (i = 0; i < 4; i++)
902 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
903 0, 4 * 2 * sizeof(int16_t));
904 if (s->pict_type == AV_PICTURE_TYPE_B) {
905 for (i = 0; i < 4; i++)
906 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
907 0, 4 * 2 * sizeof(int16_t));
908 }
909 }
910 if (!IS_INTRA4x4(mb_type)) {
911 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy], DC_PRED, 8);
912 }
913 if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
914 memset(s->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
915 }
916
917 if (!IS_INTRA16x16(mb_type) &&
918 (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
919 if ((vlc = svq3_get_ue_golomb(&s->gb_slice)) >= 48) {
920 av_log(s->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
921 return -1;
922 }
923
924 cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
925 : ff_h264_golomb_to_inter_cbp[vlc];
926 }
927 if (IS_INTRA16x16(mb_type) ||
928 (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
929 s->qscale += svq3_get_se_golomb(&s->gb_slice);
930
931 if (s->qscale > 31u) {
932 av_log(s->avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
933 return -1;
934 }
935 }
936 if (IS_INTRA16x16(mb_type)) {
937 AV_ZERO128(s->mb_luma_dc[0] + 0);
938 AV_ZERO128(s->mb_luma_dc[0] + 8);
939 if (svq3_decode_block(&s->gb_slice, s->mb_luma_dc[0], 0, 1)) {
940 av_log(s->avctx, AV_LOG_ERROR,
941 "error while decoding intra luma dc\n");
942 return -1;
943 }
944 }
945
946 if (cbp) {
947 const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
948 const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
949
950 for (i = 0; i < 4; i++)
951 if ((cbp & (1 << i))) {
952 for (j = 0; j < 4; j++) {
953 k = index ? (1 * (j & 1) + 2 * (i & 1) +
954 2 * (j & 2) + 4 * (i & 2))
955 : (4 * i + j);
956 s->non_zero_count_cache[scan8[k]] = 1;
957
958 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * k], index, type)) {
959 av_log(s->avctx, AV_LOG_ERROR,
960 "error while decoding block\n");
961 return -1;
962 }
963 }
964 }
965
966 if ((cbp & 0x30)) {
967 for (i = 1; i < 3; ++i)
968 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * 16 * i], 0, 3)) {
969 av_log(s->avctx, AV_LOG_ERROR,
970 "error while decoding chroma dc block\n");
971 return -1;
972 }
973
974 if ((cbp & 0x20)) {
975 for (i = 1; i < 3; i++) {
976 for (j = 0; j < 4; j++) {
977 k = 16 * i + j;
978 s->non_zero_count_cache[scan8[k]] = 1;
979
980 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * k], 1, 1)) {
981 av_log(s->avctx, AV_LOG_ERROR,
982 "error while decoding chroma ac block\n");
983 return -1;
984 }
985 }
986 }
987 }
988 }
989 }
990
991 s->cbp = cbp;
992 s->cur_pic->mb_type[mb_xy] = mb_type;
993
994 if (IS_INTRA(mb_type))
995 s->chroma_pred_mode = ff_h264_check_intra_pred_mode(s->avctx, s->top_samples_available,
996 s->left_samples_available, DC_PRED8x8, 1);
997
998 return 0;
999 }
1000
1001 static int svq3_decode_slice_header(AVCodecContext *avctx)
1002 {
1003 SVQ3Context *s = avctx->priv_data;
1004 const int mb_xy = s->mb_xy;
1005 int i, header;
1006 unsigned slice_id;
1007
1008 header = get_bits(&s->gb, 8);
1009
1010 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
1011 /* TODO: what? */
1012 av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
1013 return -1;
1014 } else {
1015 int slice_bits, slice_bytes, slice_length;
1016 int length = header >> 5 & 3;
1017
1018 slice_length = show_bits(&s->gb, 8 * length);
1019 slice_bits = slice_length * 8;
1020 slice_bytes = slice_length + length - 1;
1021
1022 if (slice_bytes > get_bits_left(&s->gb)) {
1023 av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
1024 return -1;
1025 }
1026
1027 skip_bits(&s->gb, 8);
1028
1029 av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
1030 if (!s->slice_buf)
1031 return AVERROR(ENOMEM);
1032
1033 memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
1034
1035 init_get_bits(&s->gb_slice, s->slice_buf, slice_bits);
1036
1037 if (s->watermark_key) {
1038 uint32_t header = AV_RL32(&s->gb_slice.buffer[1]);
1039 AV_WL32(&s->gb_slice.buffer[1], header ^ s->watermark_key);
1040 }
1041 if (length > 0) {
1042 memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
1043 }
1044 skip_bits_long(&s->gb, slice_bytes * 8);
1045 }
1046
1047 if ((slice_id = svq3_get_ue_golomb(&s->gb_slice)) >= 3) {
1048 av_log(s->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
1049 return -1;
1050 }
1051
1052 s->pict_type = ff_h264_golomb_to_pict_type[slice_id];
1053
1054 if ((header & 0x9F) == 2) {
1055 i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
1056 get_bits(&s->gb_slice, i);
1057 } else {
1058 skip_bits1(&s->gb_slice);
1059 }
1060
1061 s->slice_num = get_bits(&s->gb_slice, 8);
1062 s->qscale = get_bits(&s->gb_slice, 5);
1063 s->adaptive_quant = get_bits1(&s->gb_slice);
1064
1065 /* unknown fields */
1066 skip_bits1(&s->gb_slice);
1067
1068 if (s->unknown_flag)
1069 skip_bits1(&s->gb_slice);
1070
1071 skip_bits1(&s->gb_slice);
1072 skip_bits(&s->gb_slice, 2);
1073
1074 while (get_bits1(&s->gb_slice))
1075 skip_bits(&s->gb_slice, 8);
1076
1077 /* reset intra predictors and invalidate motion vector references */
1078 if (s->mb_x > 0) {
1079 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - 1] + 3,
1080 -1, 4 * sizeof(int8_t));
1081 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_x],
1082 -1, 8 * sizeof(int8_t) * s->mb_x);
1083 }
1084 if (s->mb_y > 0) {
1085 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_stride],
1086 -1, 8 * sizeof(int8_t) * (s->mb_width - s->mb_x));
1087
1088 if (s->mb_x > 0)
1089 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] = -1;
1090 }
1091
1092 return 0;
1093 }
1094
1095 static void init_dequant4_coeff_table(SVQ3Context *s)
1096 {
1097 int q, x;
1098 const int max_qp = 51;
1099
1100 for (q = 0; q < max_qp + 1; q++) {
1101 int shift = ff_h264_quant_div6[q] + 2;
1102 int idx = ff_h264_quant_rem6[q];
1103 for (x = 0; x < 16; x++)
1104 s->dequant4_coeff[q][(x >> 2) | ((x << 2) & 0xF)] =
1105 ((uint32_t)ff_h264_dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * 16) << shift;
1106 }
1107 }
1108
1109 static av_cold int svq3_decode_init(AVCodecContext *avctx)
1110 {
1111 SVQ3Context *s = avctx->priv_data;
1112 int m, x, y;
1113 unsigned char *extradata;
1114 unsigned char *extradata_end;
1115 unsigned int size;
1116 int marker_found = 0;
1117
1118 s->cur_pic = av_mallocz(sizeof(*s->cur_pic));
1119 s->last_pic = av_mallocz(sizeof(*s->last_pic));
1120 s->next_pic = av_mallocz(sizeof(*s->next_pic));
1121 if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1122 av_freep(&s->cur_pic);
1123 av_freep(&s->last_pic);
1124 av_freep(&s->next_pic);
1125 return AVERROR(ENOMEM);
1126 }
1127
1128 s->cur_pic->f = av_frame_alloc();
1129 s->last_pic->f = av_frame_alloc();
1130 s->next_pic->f = av_frame_alloc();
1131 if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1132 return AVERROR(ENOMEM);
1133
1134 ff_h264dsp_init(&s->h264dsp, 8, 1);
1135 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1136 ff_videodsp_init(&s->vdsp, 8);
1137
1138 ff_hpeldsp_init(&s->hdsp, avctx->flags);
1139 ff_tpeldsp_init(&s->tdsp);
1140
1141 avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
1142 avctx->color_range = AVCOL_RANGE_JPEG;
1143
1144 s->avctx = avctx;
1145 s->halfpel_flag = 1;
1146 s->thirdpel_flag = 1;
1147 s->unknown_flag = 0;
1148
1149 /* prowl for the "SEQH" marker in the extradata */
1150 extradata = (unsigned char *)avctx->extradata;
1151 extradata_end = avctx->extradata + avctx->extradata_size;
1152 if (extradata) {
1153 for (m = 0; m + 8 < avctx->extradata_size; m++) {
1154 if (!memcmp(extradata, "SEQH", 4)) {
1155 marker_found = 1;
1156 break;
1157 }
1158 extradata++;
1159 }
1160 }
1161
1162 /* if a match was found, parse the extra data */
1163 if (marker_found) {
1164 GetBitContext gb;
1165 int frame_size_code;
1166
1167 size = AV_RB32(&extradata[4]);
1168 if (size > extradata_end - extradata - 8)
1169 return AVERROR_INVALIDDATA;
1170 init_get_bits(&gb, extradata + 8, size * 8);
1171
1172 /* 'frame size code' and optional 'width, height' */
1173 frame_size_code = get_bits(&gb, 3);
1174 switch (frame_size_code) {
1175 case 0:
1176 avctx->width = 160;
1177 avctx->height = 120;
1178 break;
1179 case 1:
1180 avctx->width = 128;
1181 avctx->height = 96;
1182 break;
1183 case 2:
1184 avctx->width = 176;
1185 avctx->height = 144;
1186 break;
1187 case 3:
1188 avctx->width = 352;
1189 avctx->height = 288;
1190 break;
1191 case 4:
1192 avctx->width = 704;
1193 avctx->height = 576;
1194 break;
1195 case 5:
1196 avctx->width = 240;
1197 avctx->height = 180;
1198 break;
1199 case 6:
1200 avctx->width = 320;
1201 avctx->height = 240;
1202 break;
1203 case 7:
1204 avctx->width = get_bits(&gb, 12);
1205 avctx->height = get_bits(&gb, 12);
1206 break;
1207 }
1208
1209 s->halfpel_flag = get_bits1(&gb);
1210 s->thirdpel_flag = get_bits1(&gb);
1211
1212 /* unknown fields */
1213 skip_bits1(&gb);
1214 skip_bits1(&gb);
1215 skip_bits1(&gb);
1216 skip_bits1(&gb);
1217
1218 s->low_delay = get_bits1(&gb);
1219
1220 /* unknown field */
1221 skip_bits1(&gb);
1222
1223 while (get_bits1(&gb))
1224 skip_bits(&gb, 8);
1225
1226 s->unknown_flag = get_bits1(&gb);
1227 avctx->has_b_frames = !s->low_delay;
1228 if (s->unknown_flag) {
1229 #if CONFIG_ZLIB
1230 unsigned watermark_width = svq3_get_ue_golomb(&gb);
1231 unsigned watermark_height = svq3_get_ue_golomb(&gb);
1232 int u1 = svq3_get_ue_golomb(&gb);
1233 int u2 = get_bits(&gb, 8);
1234 int u3 = get_bits(&gb, 2);
1235 int u4 = svq3_get_ue_golomb(&gb);
1236 unsigned long buf_len = watermark_width *
1237 watermark_height * 4;
1238 int offset = get_bits_count(&gb) + 7 >> 3;
1239 uint8_t *buf;
1240
1241 if (watermark_height > 0 &&
1242 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1243 return -1;
1244
1245 buf = av_malloc(buf_len);
1246 av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1247 watermark_width, watermark_height);
1248 av_log(avctx, AV_LOG_DEBUG,
1249 "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1250 u1, u2, u3, u4, offset);
1251 if (uncompress(buf, &buf_len, extradata + 8 + offset,
1252 size - offset) != Z_OK) {
1253 av_log(avctx, AV_LOG_ERROR,
1254 "could not uncompress watermark logo\n");
1255 av_free(buf);
1256 return -1;
1257 }
1258 s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1259 s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1260 av_log(avctx, AV_LOG_DEBUG,
1261 "watermark key %#"PRIx32"\n", s->watermark_key);
1262 av_free(buf);
1263 #else
1264 av_log(avctx, AV_LOG_ERROR,
1265 "this svq3 file contains watermark which need zlib support compiled in\n");
1266 return -1;
1267 #endif
1268 }
1269 }
1270
1271 s->mb_width = (avctx->width + 15) / 16;
1272 s->mb_height = (avctx->height + 15) / 16;
1273 s->mb_stride = s->mb_width + 1;
1274 s->mb_num = s->mb_width * s->mb_height;
1275 s->b_stride = 4 * s->mb_width;
1276 s->h_edge_pos = s->mb_width * 16;
1277 s->v_edge_pos = s->mb_height * 16;
1278
1279 s->intra4x4_pred_mode = av_mallocz(s->mb_stride * 2 * 8);
1280 if (!s->intra4x4_pred_mode)
1281 return AVERROR(ENOMEM);
1282
1283 s->mb2br_xy = av_mallocz(s->mb_stride * (s->mb_height + 1) *
1284 sizeof(*s->mb2br_xy));
1285 if (!s->mb2br_xy)
1286 return AVERROR(ENOMEM);
1287
1288 for (y = 0; y < s->mb_height; y++)
1289 for (x = 0; x < s->mb_width; x++) {
1290 const int mb_xy = x + y * s->mb_stride;
1291
1292 s->mb2br_xy[mb_xy] = 8 * (mb_xy % (2 * s->mb_stride));
1293 }
1294
1295 init_dequant4_coeff_table(s);
1296
1297 return 0;
1298 }
1299
1300 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1301 {
1302 int i;
1303 for (i = 0; i < 2; i++) {
1304 av_buffer_unref(&pic->motion_val_buf[i]);
1305 av_buffer_unref(&pic->ref_index_buf[i]);
1306 }
1307 av_buffer_unref(&pic->mb_type_buf);
1308
1309 av_frame_unref(pic->f);
1310 }
1311
1312 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1313 {
1314 SVQ3Context *s = avctx->priv_data;
1315 const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1;
1316 const int mb_array_size = s->mb_stride * s->mb_height;
1317 const int b4_stride = s->mb_width * 4 + 1;
1318 const int b4_array_size = b4_stride * s->mb_height * 4;
1319 int ret;
1320
1321 if (!pic->motion_val_buf[0]) {
1322 int i;
1323
1324 pic->mb_type_buf = av_buffer_allocz((big_mb_num + s->mb_stride) * sizeof(uint32_t));
1325 if (!pic->mb_type_buf)
1326 return AVERROR(ENOMEM);
1327 pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * s->mb_stride + 1;
1328
1329 for (i = 0; i < 2; i++) {
1330 pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1331 pic->ref_index_buf[i] = av_buffer_allocz(4 * mb_array_size);
1332 if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1333 ret = AVERROR(ENOMEM);
1334 goto fail;
1335 }
1336
1337 pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1338 pic->ref_index[i] = pic->ref_index_buf[i]->data;
1339 }
1340 }
1341 pic->reference = !(s->pict_type == AV_PICTURE_TYPE_B);
1342
1343 ret = ff_get_buffer(avctx, pic->f,
1344 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1345 if (ret < 0)
1346 goto fail;
1347
1348 if (!s->edge_emu_buffer) {
1349 s->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1350 if (!s->edge_emu_buffer)
1351 return AVERROR(ENOMEM);
1352 }
1353
1354 return 0;
1355 fail:
1356 free_picture(avctx, pic);
1357 return ret;
1358 }
1359
1360 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1361 int *got_frame, AVPacket *avpkt)
1362 {
1363 const uint8_t *buf = avpkt->data;
1364 SVQ3Context *s = avctx->priv_data;
1365 int buf_size = avpkt->size;
1366 int ret, m, i;
1367
1368 /* special case for last picture */
1369 if (buf_size == 0) {
1370 if (s->next_pic->f->data[0] && !s->low_delay && !s->last_frame_output) {
1371 ret = av_frame_ref(data, s->next_pic->f);
1372 if (ret < 0)
1373 return ret;
1374 s->last_frame_output = 1;
1375 *got_frame = 1;
1376 }
1377 return 0;
1378 }
1379
1380 ret = init_get_bits(&s->gb, buf, 8 * buf_size);
1381 if (ret < 0)
1382 return ret;
1383
1384 s->mb_x = s->mb_y = s->mb_xy = 0;
1385
1386 if (svq3_decode_slice_header(avctx))
1387 return -1;
1388
1389 if (s->pict_type != AV_PICTURE_TYPE_B)
1390 FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1391
1392 av_frame_unref(s->cur_pic->f);
1393
1394 /* for skipping the frame */
1395 s->cur_pic->f->pict_type = s->pict_type;
1396 s->cur_pic->f->key_frame = (s->pict_type == AV_PICTURE_TYPE_I);
1397
1398 ret = get_buffer(avctx, s->cur_pic);
1399 if (ret < 0)
1400 return ret;
1401
1402 for (i = 0; i < 16; i++) {
1403 s->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1404 s->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1405 }
1406 for (i = 0; i < 16; i++) {
1407 s->block_offset[16 + i] =
1408 s->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1409 s->block_offset[48 + 16 + i] =
1410 s->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1411 }
1412
1413 if (s->pict_type != AV_PICTURE_TYPE_I) {
1414 if (!s->last_pic->f->data[0]) {
1415 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1416 ret = get_buffer(avctx, s->last_pic);
1417 if (ret < 0)
1418 return ret;
1419 memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1420 memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1421 s->last_pic->f->linesize[1]);
1422 memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1423 s->last_pic->f->linesize[2]);
1424 }
1425
1426 if (s->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1427 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1428 ret = get_buffer(avctx, s->next_pic);
1429 if (ret < 0)
1430 return ret;
1431 memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1432 memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1433 s->next_pic->f->linesize[1]);
1434 memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1435 s->next_pic->f->linesize[2]);
1436 }
1437 }
1438
1439 if (avctx->debug & FF_DEBUG_PICT_INFO)
1440 av_log(s->avctx, AV_LOG_DEBUG,
1441 "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1442 av_get_picture_type_char(s->pict_type),
1443 s->halfpel_flag, s->thirdpel_flag,
1444 s->adaptive_quant, s->qscale, s->slice_num);
1445
1446 if (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B ||
1447 avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I ||
1448 avctx->skip_frame >= AVDISCARD_ALL)
1449 return 0;
1450
1451 if (s->next_p_frame_damaged) {
1452 if (s->pict_type == AV_PICTURE_TYPE_B)
1453 return 0;
1454 else
1455 s->next_p_frame_damaged = 0;
1456 }
1457
1458 if (s->pict_type == AV_PICTURE_TYPE_B) {
1459 s->frame_num_offset = s->slice_num - s->prev_frame_num;
1460
1461 if (s->frame_num_offset < 0)
1462 s->frame_num_offset += 256;
1463 if (s->frame_num_offset == 0 ||
1464 s->frame_num_offset >= s->prev_frame_num_offset) {
1465 av_log(s->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1466 return -1;
1467 }
1468 } else {
1469 s->prev_frame_num = s->frame_num;
1470 s->frame_num = s->slice_num;
1471 s->prev_frame_num_offset = s->frame_num - s->prev_frame_num;
1472
1473 if (s->prev_frame_num_offset < 0)
1474 s->prev_frame_num_offset += 256;
1475 }
1476
1477 for (m = 0; m < 2; m++) {
1478 int i;
1479 for (i = 0; i < 4; i++) {
1480 int j;
1481 for (j = -1; j < 4; j++)
1482 s->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1483 if (i < 3)
1484 s->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1485 }
1486 }
1487
1488 for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
1489 for (s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
1490 unsigned mb_type;
1491 s->mb_xy = s->mb_x + s->mb_y * s->mb_stride;
1492
1493 if ((get_bits_left(&s->gb_slice)) <= 7) {
1494 if (((get_bits_count(&s->gb_slice) & 7) == 0 ||
1495 show_bits(&s->gb_slice, get_bits_left(&s->gb_slice) & 7) == 0)) {
1496
1497 if (svq3_decode_slice_header(avctx))
1498 return -1;
1499 }
1500 /* TODO: support s->mb_skip_run */
1501 }
1502
1503 mb_type = svq3_get_ue_golomb(&s->gb_slice);
1504
1505 if (s->pict_type == AV_PICTURE_TYPE_I)
1506 mb_type += 8;
1507 else if (s->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1508 mb_type += 4;
1509 if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1510 av_log(s->avctx, AV_LOG_ERROR,
1511 "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1512 return -1;
1513 }
1514
1515 if (mb_type != 0)
1516 hl_decode_mb(s);
1517
1518 if (s->pict_type != AV_PICTURE_TYPE_B && !s->low_delay)
1519 s->cur_pic->mb_type[s->mb_x + s->mb_y * s->mb_stride] =
1520 (s->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1521 }
1522
1523 ff_draw_horiz_band(avctx, s->cur_pic->f,
1524 s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1525 16 * s->mb_y, 16, PICT_FRAME, 0,
1526 s->low_delay);
1527 }
1528
1529 if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay)
1530 ret = av_frame_ref(data, s->cur_pic->f);
1531 else if (s->last_pic->f->data[0])
1532 ret = av_frame_ref(data, s->last_pic->f);
1533 if (ret < 0)
1534 return ret;
1535
1536 /* Do not output the last pic after seeking. */
1537 if (s->last_pic->f->data[0] || s->low_delay)
1538 *got_frame = 1;
1539
1540 if (s->pict_type != AV_PICTURE_TYPE_B) {
1541 FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1542 } else {
1543 av_frame_unref(s->cur_pic->f);
1544 }
1545
1546 return buf_size;
1547 }
1548
1549 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1550 {
1551 SVQ3Context *s = avctx->priv_data;
1552
1553 free_picture(avctx, s->cur_pic);
1554 free_picture(avctx, s->next_pic);
1555 free_picture(avctx, s->last_pic);
1556 av_frame_free(&s->cur_pic->f);
1557 av_frame_free(&s->next_pic->f);
1558 av_frame_free(&s->last_pic->f);
1559 av_freep(&s->cur_pic);
1560 av_freep(&s->next_pic);
1561 av_freep(&s->last_pic);
1562 av_freep(&s->slice_buf);
1563 av_freep(&s->intra4x4_pred_mode);
1564 av_freep(&s->edge_emu_buffer);
1565 av_freep(&s->mb2br_xy);
1566
1567 return 0;
1568 }
1569
1570 AVCodec ff_svq3_decoder = {
1571 .name = "svq3",
1572 .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1573 .type = AVMEDIA_TYPE_VIDEO,
1574 .id = AV_CODEC_ID_SVQ3,
1575 .priv_data_size = sizeof(SVQ3Context),
1576 .init = svq3_decode_init,
1577 .close = svq3_decode_end,
1578 .decode = svq3_decode_frame,
1579 .capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1580 AV_CODEC_CAP_DR1 |
1581 AV_CODEC_CAP_DELAY,
1582 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1583 AV_PIX_FMT_NONE},
1584 };