34e00769a763e084dd701cef54595c83c25c7813
[libav.git] / libavcodec / svq3.c
1 /*
2 * Copyright (c) 2003 The Libav Project
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
31 *
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
37 *
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 * http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41 */
42
43 #include <inttypes.h>
44
45 #include "libavutil/attributes.h"
46 #include "internal.h"
47 #include "avcodec.h"
48 #include "mpegutils.h"
49 #include "h264.h"
50 #include "h264_mvpred.h"
51 #include "h264data.h"
52 #include "golomb.h"
53 #include "hpeldsp.h"
54 #include "mathops.h"
55 #include "rectangle.h"
56 #include "tpeldsp.h"
57
58 #if CONFIG_ZLIB
59 #include <zlib.h>
60 #endif
61
62 #include "svq1.h"
63
64 /**
65 * @file
66 * svq3 decoder.
67 */
68
69 typedef struct SVQ3Context {
70 H264Context h;
71
72 H264DSPContext h264dsp;
73 H264PredContext hpc;
74 HpelDSPContext hdsp;
75 TpelDSPContext tdsp;
76 VideoDSPContext vdsp;
77
78 H264Picture *cur_pic;
79 H264Picture *next_pic;
80 H264Picture *last_pic;
81 GetBitContext gb;
82 GetBitContext gb_slice;
83 uint8_t *slice_buf;
84 int slice_size;
85 int halfpel_flag;
86 int thirdpel_flag;
87 int unknown_flag;
88 uint32_t watermark_key;
89 int adaptive_quant;
90 int next_p_frame_damaged;
91 int h_edge_pos;
92 int v_edge_pos;
93 int last_frame_output;
94 int slice_num;
95 int qscale;
96 int cbp;
97 int frame_num;
98 int frame_num_offset;
99 int prev_frame_num_offset;
100 int prev_frame_num;
101
102 enum AVPictureType pict_type;
103
104 int mb_x, mb_y;
105 int mb_xy;
106 int mb_width, mb_height;
107 int mb_stride, mb_num;
108 int b_stride;
109
110 uint32_t *mb2br_xy;
111
112 int chroma_pred_mode;
113 int intra16x16_pred_mode;
114
115 int8_t intra4x4_pred_mode_cache[5 * 8];
116 int8_t (*intra4x4_pred_mode);
117
118 unsigned int top_samples_available;
119 unsigned int topright_samples_available;
120 unsigned int left_samples_available;
121
122 uint8_t *edge_emu_buffer;
123
124 DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
125 DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
126 DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2];
127 DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
128 DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
129 uint32_t dequant4_coeff[QP_MAX_NUM + 1][16];
130 int block_offset[2 * (16 * 3)];
131 } SVQ3Context;
132
133 #define FULLPEL_MODE 1
134 #define HALFPEL_MODE 2
135 #define THIRDPEL_MODE 3
136 #define PREDICT_MODE 4
137
138 /* dual scan (from some older h264 draft)
139 * o-->o-->o o
140 * | /|
141 * o o o / o
142 * | / | |/ |
143 * o o o o
144 * /
145 * o-->o-->o-->o
146 */
147 static const uint8_t svq3_scan[16] = {
148 0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
149 2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
150 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
151 0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
152 };
153
154 static const uint8_t luma_dc_zigzag_scan[16] = {
155 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
156 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
157 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
158 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
159 };
160
161 static const uint8_t svq3_pred_0[25][2] = {
162 { 0, 0 },
163 { 1, 0 }, { 0, 1 },
164 { 0, 2 }, { 1, 1 }, { 2, 0 },
165 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
166 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
167 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
168 { 2, 4 }, { 3, 3 }, { 4, 2 },
169 { 4, 3 }, { 3, 4 },
170 { 4, 4 }
171 };
172
173 static const int8_t svq3_pred_1[6][6][5] = {
174 { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
175 { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
176 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
177 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
178 { { 2, 0, -1, -1, -1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
179 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
180 { { 2, 0, -1, -1, -1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
181 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
182 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
183 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
184 { { 0, 2, -1, -1, -1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
185 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
186 };
187
188 static const struct {
189 uint8_t run;
190 uint8_t level;
191 } svq3_dct_tables[2][16] = {
192 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
193 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
194 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
195 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
196 };
197
198 static const uint32_t svq3_dequant_coeff[32] = {
199 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
200 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
201 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
202 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
203 };
204
205 static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
206 {
207 const int qmul = svq3_dequant_coeff[qp];
208 #define stride 16
209 int i;
210 int temp[16];
211 static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
212
213 for (i = 0; i < 4; i++) {
214 const int z0 = 13 * (input[4 * i + 0] + input[4 * i + 2]);
215 const int z1 = 13 * (input[4 * i + 0] - input[4 * i + 2]);
216 const int z2 = 7 * input[4 * i + 1] - 17 * input[4 * i + 3];
217 const int z3 = 17 * input[4 * i + 1] + 7 * input[4 * i + 3];
218
219 temp[4 * i + 0] = z0 + z3;
220 temp[4 * i + 1] = z1 + z2;
221 temp[4 * i + 2] = z1 - z2;
222 temp[4 * i + 3] = z0 - z3;
223 }
224
225 for (i = 0; i < 4; i++) {
226 const int offset = x_offset[i];
227 const int z0 = 13 * (temp[4 * 0 + i] + temp[4 * 2 + i]);
228 const int z1 = 13 * (temp[4 * 0 + i] - temp[4 * 2 + i]);
229 const int z2 = 7 * temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
230 const int z3 = 17 * temp[4 * 1 + i] + 7 * temp[4 * 3 + i];
231
232 output[stride * 0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
233 output[stride * 2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
234 output[stride * 8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
235 output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
236 }
237 }
238 #undef stride
239
240 static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
241 int stride, int qp, int dc)
242 {
243 const int qmul = svq3_dequant_coeff[qp];
244 int i;
245
246 if (dc) {
247 dc = 13 * 13 * (dc == 1 ? 1538 * block[0]
248 : qmul * (block[0] >> 3) / 2);
249 block[0] = 0;
250 }
251
252 for (i = 0; i < 4; i++) {
253 const int z0 = 13 * (block[0 + 4 * i] + block[2 + 4 * i]);
254 const int z1 = 13 * (block[0 + 4 * i] - block[2 + 4 * i]);
255 const int z2 = 7 * block[1 + 4 * i] - 17 * block[3 + 4 * i];
256 const int z3 = 17 * block[1 + 4 * i] + 7 * block[3 + 4 * i];
257
258 block[0 + 4 * i] = z0 + z3;
259 block[1 + 4 * i] = z1 + z2;
260 block[2 + 4 * i] = z1 - z2;
261 block[3 + 4 * i] = z0 - z3;
262 }
263
264 for (i = 0; i < 4; i++) {
265 const int z0 = 13 * (block[i + 4 * 0] + block[i + 4 * 2]);
266 const int z1 = 13 * (block[i + 4 * 0] - block[i + 4 * 2]);
267 const int z2 = 7 * block[i + 4 * 1] - 17 * block[i + 4 * 3];
268 const int z3 = 17 * block[i + 4 * 1] + 7 * block[i + 4 * 3];
269 const int rr = (dc + 0x80000);
270
271 dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
272 dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
273 dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
274 dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
275 }
276
277 memset(block, 0, 16 * sizeof(int16_t));
278 }
279
280 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
281 int index, const int type)
282 {
283 static const uint8_t *const scan_patterns[4] = {
284 luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
285 };
286
287 int run, level, limit;
288 unsigned vlc;
289 const int intra = 3 * type >> 2;
290 const uint8_t *const scan = scan_patterns[type];
291
292 for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
293 for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
294 int sign = (vlc & 1) ? 0 : -1;
295 vlc = vlc + 1 >> 1;
296
297 if (type == 3) {
298 if (vlc < 3) {
299 run = 0;
300 level = vlc;
301 } else if (vlc < 4) {
302 run = 1;
303 level = 1;
304 } else {
305 run = vlc & 0x3;
306 level = (vlc + 9 >> 2) - run;
307 }
308 } else {
309 if (vlc < 16) {
310 run = svq3_dct_tables[intra][vlc].run;
311 level = svq3_dct_tables[intra][vlc].level;
312 } else if (intra) {
313 run = vlc & 0x7;
314 level = (vlc >> 3) +
315 ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
316 } else {
317 run = vlc & 0xF;
318 level = (vlc >> 4) +
319 ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
320 }
321 }
322
323 if ((index += run) >= limit)
324 return -1;
325
326 block[scan[index]] = (level ^ sign) - sign;
327 }
328
329 if (type != 2) {
330 break;
331 }
332 }
333
334 return 0;
335 }
336
337 static av_always_inline int
338 svq3_fetch_diagonal_mv(const SVQ3Context *s, const int16_t **C,
339 int i, int list, int part_width)
340 {
341 const int topright_ref = s->ref_cache[list][i - 8 + part_width];
342
343 if (topright_ref != PART_NOT_AVAILABLE) {
344 *C = s->mv_cache[list][i - 8 + part_width];
345 return topright_ref;
346 } else {
347 *C = s->mv_cache[list][i - 8 - 1];
348 return s->ref_cache[list][i - 8 - 1];
349 }
350 }
351
352 /**
353 * Get the predicted MV.
354 * @param n the block index
355 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
356 * @param mx the x component of the predicted motion vector
357 * @param my the y component of the predicted motion vector
358 */
359 static av_always_inline void svq3_pred_motion(const SVQ3Context *s, int n,
360 int part_width, int list,
361 int ref, int *const mx, int *const my)
362 {
363 const int index8 = scan8[n];
364 const int top_ref = s->ref_cache[list][index8 - 8];
365 const int left_ref = s->ref_cache[list][index8 - 1];
366 const int16_t *const A = s->mv_cache[list][index8 - 1];
367 const int16_t *const B = s->mv_cache[list][index8 - 8];
368 const int16_t *C;
369 int diagonal_ref, match_count;
370
371 /* mv_cache
372 * B . . A T T T T
373 * U . . L . . , .
374 * U . . L . . . .
375 * U . . L . . , .
376 * . . . L . . . .
377 */
378
379 diagonal_ref = svq3_fetch_diagonal_mv(s, &C, index8, list, part_width);
380 match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
381 if (match_count > 1) { //most common
382 *mx = mid_pred(A[0], B[0], C[0]);
383 *my = mid_pred(A[1], B[1], C[1]);
384 } else if (match_count == 1) {
385 if (left_ref == ref) {
386 *mx = A[0];
387 *my = A[1];
388 } else if (top_ref == ref) {
389 *mx = B[0];
390 *my = B[1];
391 } else {
392 *mx = C[0];
393 *my = C[1];
394 }
395 } else {
396 if (top_ref == PART_NOT_AVAILABLE &&
397 diagonal_ref == PART_NOT_AVAILABLE &&
398 left_ref != PART_NOT_AVAILABLE) {
399 *mx = A[0];
400 *my = A[1];
401 } else {
402 *mx = mid_pred(A[0], B[0], C[0]);
403 *my = mid_pred(A[1], B[1], C[1]);
404 }
405 }
406 }
407
408 static inline void svq3_mc_dir_part(SVQ3Context *s,
409 int x, int y, int width, int height,
410 int mx, int my, int dxy,
411 int thirdpel, int dir, int avg)
412 {
413 H264Context *h = &s->h;
414 const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
415 uint8_t *src, *dest;
416 int i, emu = 0;
417 int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
418 int linesize = s->cur_pic->f->linesize[0];
419 int uvlinesize = s->cur_pic->f->linesize[1];
420
421 mx += x;
422 my += y;
423
424 if (mx < 0 || mx >= s->h_edge_pos - width - 1 ||
425 my < 0 || my >= s->v_edge_pos - height - 1) {
426 emu = 1;
427 mx = av_clip(mx, -16, s->h_edge_pos - width + 15);
428 my = av_clip(my, -16, s->v_edge_pos - height + 15);
429 }
430
431 /* form component predictions */
432 dest = s->cur_pic->f->data[0] + x + y * linesize;
433 src = pic->f->data[0] + mx + my * linesize;
434
435 if (emu) {
436 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
437 linesize, linesize,
438 width + 1, height + 1,
439 mx, my, s->h_edge_pos, s->v_edge_pos);
440 src = s->edge_emu_buffer;
441 }
442 if (thirdpel)
443 (avg ? s->tdsp.avg_tpel_pixels_tab
444 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, linesize,
445 width, height);
446 else
447 (avg ? s->hdsp.avg_pixels_tab
448 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, linesize,
449 height);
450
451 if (!(h->flags & AV_CODEC_FLAG_GRAY)) {
452 mx = mx + (mx < (int) x) >> 1;
453 my = my + (my < (int) y) >> 1;
454 width = width >> 1;
455 height = height >> 1;
456 blocksize++;
457
458 for (i = 1; i < 3; i++) {
459 dest = s->cur_pic->f->data[i] + (x >> 1) + (y >> 1) * uvlinesize;
460 src = pic->f->data[i] + mx + my * uvlinesize;
461
462 if (emu) {
463 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
464 uvlinesize, uvlinesize,
465 width + 1, height + 1,
466 mx, my, (s->h_edge_pos >> 1),
467 s->v_edge_pos >> 1);
468 src = s->edge_emu_buffer;
469 }
470 if (thirdpel)
471 (avg ? s->tdsp.avg_tpel_pixels_tab
472 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
473 uvlinesize,
474 width, height);
475 else
476 (avg ? s->hdsp.avg_pixels_tab
477 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
478 uvlinesize,
479 height);
480 }
481 }
482 }
483
484 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
485 int dir, int avg)
486 {
487 int i, j, k, mx, my, dx, dy, x, y;
488 H264Context *h = &s->h;
489 const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
490 const int part_height = 16 >> ((unsigned)(size + 1) / 3);
491 const int extra_width = (mode == PREDICT_MODE) ? -16 * 6 : 0;
492 const int h_edge_pos = 6 * (s->h_edge_pos - part_width) - extra_width;
493 const int v_edge_pos = 6 * (s->v_edge_pos - part_height) - extra_width;
494
495 for (i = 0; i < 16; i += part_height)
496 for (j = 0; j < 16; j += part_width) {
497 const int b_xy = (4 * s->mb_x + (j >> 2)) +
498 (4 * s->mb_y + (i >> 2)) * s->b_stride;
499 int dxy;
500 x = 16 * s->mb_x + j;
501 y = 16 * s->mb_y + i;
502 k = (j >> 2 & 1) + (i >> 1 & 2) +
503 (j >> 1 & 4) + (i & 8);
504
505 if (mode != PREDICT_MODE) {
506 svq3_pred_motion(s, k, part_width >> 2, dir, 1, &mx, &my);
507 } else {
508 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
509 my = s->next_pic->motion_val[0][b_xy][1] << 1;
510
511 if (dir == 0) {
512 mx = mx * s->frame_num_offset /
513 s->prev_frame_num_offset + 1 >> 1;
514 my = my * s->frame_num_offset /
515 s->prev_frame_num_offset + 1 >> 1;
516 } else {
517 mx = mx * (s->frame_num_offset - s->prev_frame_num_offset) /
518 s->prev_frame_num_offset + 1 >> 1;
519 my = my * (s->frame_num_offset - s->prev_frame_num_offset) /
520 s->prev_frame_num_offset + 1 >> 1;
521 }
522 }
523
524 /* clip motion vector prediction to frame border */
525 mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
526 my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
527
528 /* get (optional) motion vector differential */
529 if (mode == PREDICT_MODE) {
530 dx = dy = 0;
531 } else {
532 dy = svq3_get_se_golomb(&s->gb_slice);
533 dx = svq3_get_se_golomb(&s->gb_slice);
534
535 if (dx == INVALID_VLC || dy == INVALID_VLC) {
536 av_log(h->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
537 return -1;
538 }
539 }
540
541 /* compute motion vector */
542 if (mode == THIRDPEL_MODE) {
543 int fx, fy;
544 mx = (mx + 1 >> 1) + dx;
545 my = (my + 1 >> 1) + dy;
546 fx = (unsigned)(mx + 0x3000) / 3 - 0x1000;
547 fy = (unsigned)(my + 0x3000) / 3 - 0x1000;
548 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
549
550 svq3_mc_dir_part(s, x, y, part_width, part_height,
551 fx, fy, dxy, 1, dir, avg);
552 mx += mx;
553 my += my;
554 } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
555 mx = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
556 my = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
557 dxy = (mx & 1) + 2 * (my & 1);
558
559 svq3_mc_dir_part(s, x, y, part_width, part_height,
560 mx >> 1, my >> 1, dxy, 0, dir, avg);
561 mx *= 3;
562 my *= 3;
563 } else {
564 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
565 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
566
567 svq3_mc_dir_part(s, x, y, part_width, part_height,
568 mx, my, 0, 0, dir, avg);
569 mx *= 6;
570 my *= 6;
571 }
572
573 /* update mv_cache */
574 if (mode != PREDICT_MODE) {
575 int32_t mv = pack16to32(mx, my);
576
577 if (part_height == 8 && i < 8) {
578 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 * 8], mv);
579
580 if (part_width == 8 && j < 8)
581 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
582 }
583 if (part_width == 8 && j < 8)
584 AV_WN32A(s->mv_cache[dir][scan8[k] + 1], mv);
585 if (part_width == 4 || part_height == 4)
586 AV_WN32A(s->mv_cache[dir][scan8[k]], mv);
587 }
588
589 /* write back motion vectors */
590 fill_rectangle(s->cur_pic->motion_val[dir][b_xy],
591 part_width >> 2, part_height >> 2, s->b_stride,
592 pack16to32(mx, my), 4);
593 }
594
595 return 0;
596 }
597
598 static av_always_inline void hl_decode_mb_idct_luma(SVQ3Context *s,
599 int mb_type, const int *block_offset,
600 int linesize, uint8_t *dest_y)
601 {
602 int i;
603 if (!IS_INTRA4x4(mb_type)) {
604 for (i = 0; i < 16; i++)
605 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
606 uint8_t *const ptr = dest_y + block_offset[i];
607 svq3_add_idct_c(ptr, s->mb + i * 16, linesize,
608 s->qscale, IS_INTRA(mb_type) ? 1 : 0);
609 }
610 }
611 }
612
613 static av_always_inline int dctcoef_get(int16_t *mb, int index)
614 {
615 return AV_RN16A(mb + index);
616 }
617
618 static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
619 const H264Context *h,
620 int mb_type,
621 const int *block_offset,
622 int linesize,
623 uint8_t *dest_y)
624 {
625 int i;
626 int qscale = s->qscale;
627
628 if (IS_INTRA4x4(mb_type)) {
629 for (i = 0; i < 16; i++) {
630 uint8_t *const ptr = dest_y + block_offset[i];
631 const int dir = s->intra4x4_pred_mode_cache[scan8[i]];
632
633 uint8_t *topright;
634 int nnz, tr;
635 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
636 const int topright_avail = (s->topright_samples_available << i) & 0x8000;
637 assert(s->mb_y || linesize <= block_offset[i]);
638 if (!topright_avail) {
639 tr = ptr[3 - linesize] * 0x01010101u;
640 topright = (uint8_t *)&tr;
641 } else
642 topright = ptr + 4 - linesize;
643 } else
644 topright = NULL;
645
646 s->hpc.pred4x4[dir](ptr, topright, linesize);
647 nnz = s->non_zero_count_cache[scan8[i]];
648 if (nnz) {
649 svq3_add_idct_c(ptr, s->mb + i * 16, linesize, qscale, 0);
650 }
651 }
652 } else {
653 s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
654 svq3_luma_dc_dequant_idct_c(s->mb, s->mb_luma_dc[0], qscale);
655 }
656 }
657
658 static void hl_decode_mb(SVQ3Context *s, const H264Context *h)
659 {
660 const int mb_x = s->mb_x;
661 const int mb_y = s->mb_y;
662 const int mb_xy = s->mb_xy;
663 const int mb_type = s->cur_pic->mb_type[mb_xy];
664 uint8_t *dest_y, *dest_cb, *dest_cr;
665 int linesize, uvlinesize;
666 int i, j;
667 const int *block_offset = &s->block_offset[0];
668 const int block_h = 16 >> h->chroma_y_shift;
669
670 linesize = s->cur_pic->f->linesize[0];
671 uvlinesize = s->cur_pic->f->linesize[1];
672
673 dest_y = s->cur_pic->f->data[0] + (mb_x + mb_y * linesize) * 16;
674 dest_cb = s->cur_pic->f->data[1] + mb_x * 8 + mb_y * uvlinesize * block_h;
675 dest_cr = s->cur_pic->f->data[2] + mb_x * 8 + mb_y * uvlinesize * block_h;
676
677 s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * linesize + 64, linesize, 4);
678 s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * uvlinesize + 64, dest_cr - dest_cb, 2);
679
680 if (IS_INTRA(mb_type)) {
681 s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
682 s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
683
684 hl_decode_mb_predict_luma(s, h, mb_type, block_offset, linesize, dest_y);
685 }
686
687 hl_decode_mb_idct_luma(s, mb_type, block_offset, linesize, dest_y);
688
689 if (s->cbp & 0x30) {
690 uint8_t *dest[2] = { dest_cb, dest_cr };
691 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
692 s->dequant4_coeff[4][0]);
693 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
694 s->dequant4_coeff[4][0]);
695 for (j = 1; j < 3; j++) {
696 for (i = j * 16; i < j * 16 + 4; i++)
697 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
698 uint8_t *const ptr = dest[j - 1] + block_offset[i];
699 svq3_add_idct_c(ptr, s->mb + i * 16,
700 uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
701 }
702 }
703 }
704 }
705
706 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
707 {
708 H264Context *h = &s->h;
709 int i, j, k, m, dir, mode;
710 int cbp = 0;
711 uint32_t vlc;
712 int8_t *top, *left;
713 const int mb_xy = s->mb_xy;
714 const int b_xy = 4 * s->mb_x + 4 * s->mb_y * s->b_stride;
715
716 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
717 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
718 s->topright_samples_available = 0xFFFF;
719
720 if (mb_type == 0) { /* SKIP */
721 if (s->pict_type == AV_PICTURE_TYPE_P ||
722 s->next_pic->mb_type[mb_xy] == -1) {
723 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
724 0, 0, 0, 0, 0, 0);
725
726 if (s->pict_type == AV_PICTURE_TYPE_B)
727 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
728 0, 0, 0, 0, 1, 1);
729
730 mb_type = MB_TYPE_SKIP;
731 } else {
732 mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
733 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
734 return -1;
735 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
736 return -1;
737
738 mb_type = MB_TYPE_16x16;
739 }
740 } else if (mb_type < 8) { /* INTER */
741 if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&s->gb_slice))
742 mode = THIRDPEL_MODE;
743 else if (s->halfpel_flag &&
744 s->thirdpel_flag == !get_bits1(&s->gb_slice))
745 mode = HALFPEL_MODE;
746 else
747 mode = FULLPEL_MODE;
748
749 /* fill caches */
750 /* note ref_cache should contain here:
751 * ????????
752 * ???11111
753 * N??11111
754 * N??11111
755 * N??11111
756 */
757
758 for (m = 0; m < 2; m++) {
759 if (s->mb_x > 0 && s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6] != -1) {
760 for (i = 0; i < 4; i++)
761 AV_COPY32(s->mv_cache[m][scan8[0] - 1 + i * 8],
762 s->cur_pic->motion_val[m][b_xy - 1 + i * s->b_stride]);
763 } else {
764 for (i = 0; i < 4; i++)
765 AV_ZERO32(s->mv_cache[m][scan8[0] - 1 + i * 8]);
766 }
767 if (s->mb_y > 0) {
768 memcpy(s->mv_cache[m][scan8[0] - 1 * 8],
769 s->cur_pic->motion_val[m][b_xy - s->b_stride],
770 4 * 2 * sizeof(int16_t));
771 memset(&s->ref_cache[m][scan8[0] - 1 * 8],
772 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
773
774 if (s->mb_x < s->mb_width - 1) {
775 AV_COPY32(s->mv_cache[m][scan8[0] + 4 - 1 * 8],
776 s->cur_pic->motion_val[m][b_xy - s->b_stride + 4]);
777 s->ref_cache[m][scan8[0] + 4 - 1 * 8] =
778 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride + 1] + 6] == -1 ||
779 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
780 } else
781 s->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
782 if (s->mb_x > 0) {
783 AV_COPY32(s->mv_cache[m][scan8[0] - 1 - 1 * 8],
784 s->cur_pic->motion_val[m][b_xy - s->b_stride - 1]);
785 s->ref_cache[m][scan8[0] - 1 - 1 * 8] =
786 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
787 } else
788 s->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
789 } else
790 memset(&s->ref_cache[m][scan8[0] - 1 * 8 - 1],
791 PART_NOT_AVAILABLE, 8);
792
793 if (s->pict_type != AV_PICTURE_TYPE_B)
794 break;
795 }
796
797 /* decode motion vector(s) and form prediction(s) */
798 if (s->pict_type == AV_PICTURE_TYPE_P) {
799 if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
800 return -1;
801 } else { /* AV_PICTURE_TYPE_B */
802 if (mb_type != 2) {
803 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
804 return -1;
805 } else {
806 for (i = 0; i < 4; i++)
807 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
808 0, 4 * 2 * sizeof(int16_t));
809 }
810 if (mb_type != 1) {
811 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
812 return -1;
813 } else {
814 for (i = 0; i < 4; i++)
815 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
816 0, 4 * 2 * sizeof(int16_t));
817 }
818 }
819
820 mb_type = MB_TYPE_16x16;
821 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
822 int8_t *i4x4 = s->intra4x4_pred_mode + s->mb2br_xy[s->mb_xy];
823 int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
824
825 memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
826
827 if (mb_type == 8) {
828 if (s->mb_x > 0) {
829 for (i = 0; i < 4; i++)
830 s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6 - i];
831 if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
832 s->left_samples_available = 0x5F5F;
833 }
834 if (s->mb_y > 0) {
835 s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 0];
836 s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 1];
837 s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 2];
838 s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 3];
839
840 if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
841 s->top_samples_available = 0x33FF;
842 }
843
844 /* decode prediction codes for luma blocks */
845 for (i = 0; i < 16; i += 2) {
846 vlc = svq3_get_ue_golomb(&s->gb_slice);
847
848 if (vlc >= 25) {
849 av_log(h->avctx, AV_LOG_ERROR,
850 "luma prediction:%"PRIu32"\n", vlc);
851 return -1;
852 }
853
854 left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
855 top = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
856
857 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
858 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
859
860 if (left[1] == -1 || left[2] == -1) {
861 av_log(h->avctx, AV_LOG_ERROR, "weird prediction\n");
862 return -1;
863 }
864 }
865 } else { /* mb_type == 33, DC_128_PRED block type */
866 for (i = 0; i < 4; i++)
867 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
868 }
869
870 AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
871 i4x4[4] = i4x4_cache[7 + 8 * 3];
872 i4x4[5] = i4x4_cache[7 + 8 * 2];
873 i4x4[6] = i4x4_cache[7 + 8 * 1];
874
875 if (mb_type == 8) {
876 ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
877 h->avctx, s->top_samples_available,
878 s->left_samples_available);
879
880 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
881 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
882 } else {
883 for (i = 0; i < 4; i++)
884 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
885
886 s->top_samples_available = 0x33FF;
887 s->left_samples_available = 0x5F5F;
888 }
889
890 mb_type = MB_TYPE_INTRA4x4;
891 } else { /* INTRA16x16 */
892 dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
893 dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
894
895 if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
896 s->left_samples_available, dir, 0)) < 0) {
897 av_log(h->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
898 return s->intra16x16_pred_mode;
899 }
900
901 cbp = ff_h264_i_mb_type_info[mb_type - 8].cbp;
902 mb_type = MB_TYPE_INTRA16x16;
903 }
904
905 if (!IS_INTER(mb_type) && s->pict_type != AV_PICTURE_TYPE_I) {
906 for (i = 0; i < 4; i++)
907 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
908 0, 4 * 2 * sizeof(int16_t));
909 if (s->pict_type == AV_PICTURE_TYPE_B) {
910 for (i = 0; i < 4; i++)
911 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
912 0, 4 * 2 * sizeof(int16_t));
913 }
914 }
915 if (!IS_INTRA4x4(mb_type)) {
916 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy], DC_PRED, 8);
917 }
918 if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
919 memset(s->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
920 }
921
922 if (!IS_INTRA16x16(mb_type) &&
923 (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
924 if ((vlc = svq3_get_ue_golomb(&s->gb_slice)) >= 48) {
925 av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
926 return -1;
927 }
928
929 cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
930 : ff_h264_golomb_to_inter_cbp[vlc];
931 }
932 if (IS_INTRA16x16(mb_type) ||
933 (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
934 s->qscale += svq3_get_se_golomb(&s->gb_slice);
935
936 if (s->qscale > 31u) {
937 av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
938 return -1;
939 }
940 }
941 if (IS_INTRA16x16(mb_type)) {
942 AV_ZERO128(s->mb_luma_dc[0] + 0);
943 AV_ZERO128(s->mb_luma_dc[0] + 8);
944 if (svq3_decode_block(&s->gb_slice, s->mb_luma_dc[0], 0, 1)) {
945 av_log(h->avctx, AV_LOG_ERROR,
946 "error while decoding intra luma dc\n");
947 return -1;
948 }
949 }
950
951 if (cbp) {
952 const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
953 const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
954
955 for (i = 0; i < 4; i++)
956 if ((cbp & (1 << i))) {
957 for (j = 0; j < 4; j++) {
958 k = index ? (1 * (j & 1) + 2 * (i & 1) +
959 2 * (j & 2) + 4 * (i & 2))
960 : (4 * i + j);
961 s->non_zero_count_cache[scan8[k]] = 1;
962
963 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * k], index, type)) {
964 av_log(h->avctx, AV_LOG_ERROR,
965 "error while decoding block\n");
966 return -1;
967 }
968 }
969 }
970
971 if ((cbp & 0x30)) {
972 for (i = 1; i < 3; ++i)
973 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * 16 * i], 0, 3)) {
974 av_log(h->avctx, AV_LOG_ERROR,
975 "error while decoding chroma dc block\n");
976 return -1;
977 }
978
979 if ((cbp & 0x20)) {
980 for (i = 1; i < 3; i++) {
981 for (j = 0; j < 4; j++) {
982 k = 16 * i + j;
983 s->non_zero_count_cache[scan8[k]] = 1;
984
985 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * k], 1, 1)) {
986 av_log(h->avctx, AV_LOG_ERROR,
987 "error while decoding chroma ac block\n");
988 return -1;
989 }
990 }
991 }
992 }
993 }
994 }
995
996 s->cbp = cbp;
997 s->cur_pic->mb_type[mb_xy] = mb_type;
998
999 if (IS_INTRA(mb_type))
1000 s->chroma_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
1001 s->left_samples_available, DC_PRED8x8, 1);
1002
1003 return 0;
1004 }
1005
1006 static int svq3_decode_slice_header(AVCodecContext *avctx)
1007 {
1008 SVQ3Context *s = avctx->priv_data;
1009 H264Context *h = &s->h;
1010 const int mb_xy = s->mb_xy;
1011 int i, header;
1012 unsigned slice_id;
1013
1014 header = get_bits(&s->gb, 8);
1015
1016 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
1017 /* TODO: what? */
1018 av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
1019 return -1;
1020 } else {
1021 int slice_bits, slice_bytes, slice_length;
1022 int length = header >> 5 & 3;
1023
1024 slice_length = show_bits(&s->gb, 8 * length);
1025 slice_bits = slice_length * 8;
1026 slice_bytes = slice_length + length - 1;
1027
1028 if (slice_bytes > get_bits_left(&s->gb)) {
1029 av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
1030 return -1;
1031 }
1032
1033 skip_bits(&s->gb, 8);
1034
1035 av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
1036 if (!s->slice_buf)
1037 return AVERROR(ENOMEM);
1038
1039 memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
1040
1041 init_get_bits(&s->gb_slice, s->slice_buf, slice_bits);
1042
1043 if (s->watermark_key) {
1044 uint32_t header = AV_RL32(&s->gb_slice.buffer[1]);
1045 AV_WL32(&s->gb_slice.buffer[1], header ^ s->watermark_key);
1046 }
1047 if (length > 0) {
1048 memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
1049 }
1050 skip_bits_long(&s->gb, slice_bytes * 8);
1051 }
1052
1053 if ((slice_id = svq3_get_ue_golomb(&s->gb_slice)) >= 3) {
1054 av_log(h->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
1055 return -1;
1056 }
1057
1058 s->pict_type = ff_h264_golomb_to_pict_type[slice_id];
1059
1060 if ((header & 0x9F) == 2) {
1061 i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
1062 get_bits(&s->gb_slice, i);
1063 } else {
1064 skip_bits1(&s->gb_slice);
1065 }
1066
1067 s->slice_num = get_bits(&s->gb_slice, 8);
1068 s->qscale = get_bits(&s->gb_slice, 5);
1069 s->adaptive_quant = get_bits1(&s->gb_slice);
1070
1071 /* unknown fields */
1072 skip_bits1(&s->gb_slice);
1073
1074 if (s->unknown_flag)
1075 skip_bits1(&s->gb_slice);
1076
1077 skip_bits1(&s->gb_slice);
1078 skip_bits(&s->gb_slice, 2);
1079
1080 while (get_bits1(&s->gb_slice))
1081 skip_bits(&s->gb_slice, 8);
1082
1083 /* reset intra predictors and invalidate motion vector references */
1084 if (s->mb_x > 0) {
1085 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - 1] + 3,
1086 -1, 4 * sizeof(int8_t));
1087 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_x],
1088 -1, 8 * sizeof(int8_t) * s->mb_x);
1089 }
1090 if (s->mb_y > 0) {
1091 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_stride],
1092 -1, 8 * sizeof(int8_t) * (s->mb_width - s->mb_x));
1093
1094 if (s->mb_x > 0)
1095 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] = -1;
1096 }
1097
1098 return 0;
1099 }
1100
1101 static void init_dequant4_coeff_table(SVQ3Context *s)
1102 {
1103 int q, x;
1104 const int max_qp = 51;
1105
1106 for (q = 0; q < max_qp + 1; q++) {
1107 int shift = ff_h264_quant_div6[q] + 2;
1108 int idx = ff_h264_quant_rem6[q];
1109 for (x = 0; x < 16; x++)
1110 s->dequant4_coeff[q][(x >> 2) | ((x << 2) & 0xF)] =
1111 ((uint32_t)ff_h264_dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * 16) << shift;
1112 }
1113 }
1114
1115 static av_cold int svq3_decode_init(AVCodecContext *avctx)
1116 {
1117 SVQ3Context *s = avctx->priv_data;
1118 H264Context *h = &s->h;
1119 int m, x, y;
1120 unsigned char *extradata;
1121 unsigned char *extradata_end;
1122 unsigned int size;
1123 int marker_found = 0;
1124
1125 s->cur_pic = av_mallocz(sizeof(*s->cur_pic));
1126 s->last_pic = av_mallocz(sizeof(*s->last_pic));
1127 s->next_pic = av_mallocz(sizeof(*s->next_pic));
1128 if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1129 av_freep(&s->cur_pic);
1130 av_freep(&s->last_pic);
1131 av_freep(&s->next_pic);
1132 return AVERROR(ENOMEM);
1133 }
1134
1135 s->cur_pic->f = av_frame_alloc();
1136 s->last_pic->f = av_frame_alloc();
1137 s->next_pic->f = av_frame_alloc();
1138 if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1139 return AVERROR(ENOMEM);
1140
1141 if (ff_h264_decode_init(avctx) < 0)
1142 return -1;
1143
1144 // we will overwrite it later during decoding
1145 av_frame_free(&h->cur_pic.f);
1146
1147 ff_h264dsp_init(&s->h264dsp, 8, 1);
1148 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1149 ff_videodsp_init(&s->vdsp, 8);
1150
1151 h->sps.bit_depth_luma = 8;
1152 h->chroma_format_idc = 1;
1153
1154 ff_hpeldsp_init(&s->hdsp, avctx->flags);
1155 ff_tpeldsp_init(&s->tdsp);
1156
1157 h->flags = avctx->flags;
1158 h->picture_structure = PICT_FRAME;
1159 avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
1160 avctx->color_range = AVCOL_RANGE_JPEG;
1161
1162 h->chroma_x_shift = h->chroma_y_shift = 1;
1163
1164 s->halfpel_flag = 1;
1165 s->thirdpel_flag = 1;
1166 s->unknown_flag = 0;
1167
1168 /* prowl for the "SEQH" marker in the extradata */
1169 extradata = (unsigned char *)avctx->extradata;
1170 extradata_end = avctx->extradata + avctx->extradata_size;
1171 if (extradata) {
1172 for (m = 0; m + 8 < avctx->extradata_size; m++) {
1173 if (!memcmp(extradata, "SEQH", 4)) {
1174 marker_found = 1;
1175 break;
1176 }
1177 extradata++;
1178 }
1179 }
1180
1181 /* if a match was found, parse the extra data */
1182 if (marker_found) {
1183 GetBitContext gb;
1184 int frame_size_code;
1185
1186 size = AV_RB32(&extradata[4]);
1187 if (size > extradata_end - extradata - 8)
1188 return AVERROR_INVALIDDATA;
1189 init_get_bits(&gb, extradata + 8, size * 8);
1190
1191 /* 'frame size code' and optional 'width, height' */
1192 frame_size_code = get_bits(&gb, 3);
1193 switch (frame_size_code) {
1194 case 0:
1195 avctx->width = 160;
1196 avctx->height = 120;
1197 break;
1198 case 1:
1199 avctx->width = 128;
1200 avctx->height = 96;
1201 break;
1202 case 2:
1203 avctx->width = 176;
1204 avctx->height = 144;
1205 break;
1206 case 3:
1207 avctx->width = 352;
1208 avctx->height = 288;
1209 break;
1210 case 4:
1211 avctx->width = 704;
1212 avctx->height = 576;
1213 break;
1214 case 5:
1215 avctx->width = 240;
1216 avctx->height = 180;
1217 break;
1218 case 6:
1219 avctx->width = 320;
1220 avctx->height = 240;
1221 break;
1222 case 7:
1223 avctx->width = get_bits(&gb, 12);
1224 avctx->height = get_bits(&gb, 12);
1225 break;
1226 }
1227
1228 s->halfpel_flag = get_bits1(&gb);
1229 s->thirdpel_flag = get_bits1(&gb);
1230
1231 /* unknown fields */
1232 skip_bits1(&gb);
1233 skip_bits1(&gb);
1234 skip_bits1(&gb);
1235 skip_bits1(&gb);
1236
1237 h->low_delay = get_bits1(&gb);
1238
1239 /* unknown field */
1240 skip_bits1(&gb);
1241
1242 while (get_bits1(&gb))
1243 skip_bits(&gb, 8);
1244
1245 s->unknown_flag = get_bits1(&gb);
1246 avctx->has_b_frames = !h->low_delay;
1247 if (s->unknown_flag) {
1248 #if CONFIG_ZLIB
1249 unsigned watermark_width = svq3_get_ue_golomb(&gb);
1250 unsigned watermark_height = svq3_get_ue_golomb(&gb);
1251 int u1 = svq3_get_ue_golomb(&gb);
1252 int u2 = get_bits(&gb, 8);
1253 int u3 = get_bits(&gb, 2);
1254 int u4 = svq3_get_ue_golomb(&gb);
1255 unsigned long buf_len = watermark_width *
1256 watermark_height * 4;
1257 int offset = get_bits_count(&gb) + 7 >> 3;
1258 uint8_t *buf;
1259
1260 if (watermark_height > 0 &&
1261 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1262 return -1;
1263
1264 buf = av_malloc(buf_len);
1265 av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1266 watermark_width, watermark_height);
1267 av_log(avctx, AV_LOG_DEBUG,
1268 "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1269 u1, u2, u3, u4, offset);
1270 if (uncompress(buf, &buf_len, extradata + 8 + offset,
1271 size - offset) != Z_OK) {
1272 av_log(avctx, AV_LOG_ERROR,
1273 "could not uncompress watermark logo\n");
1274 av_free(buf);
1275 return -1;
1276 }
1277 s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1278 s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1279 av_log(avctx, AV_LOG_DEBUG,
1280 "watermark key %#"PRIx32"\n", s->watermark_key);
1281 av_free(buf);
1282 #else
1283 av_log(avctx, AV_LOG_ERROR,
1284 "this svq3 file contains watermark which need zlib support compiled in\n");
1285 return -1;
1286 #endif
1287 }
1288 }
1289
1290 s->mb_width = (avctx->width + 15) / 16;
1291 s->mb_height = (avctx->height + 15) / 16;
1292 s->mb_stride = s->mb_width + 1;
1293 s->mb_num = s->mb_width * s->mb_height;
1294 s->b_stride = 4 * s->mb_width;
1295 s->h_edge_pos = s->mb_width * 16;
1296 s->v_edge_pos = s->mb_height * 16;
1297
1298 s->intra4x4_pred_mode = av_mallocz(s->mb_stride * 2 * 8);
1299 if (!s->intra4x4_pred_mode)
1300 return AVERROR(ENOMEM);
1301
1302 s->mb2br_xy = av_mallocz(s->mb_stride * (s->mb_height + 1) *
1303 sizeof(*s->mb2br_xy));
1304 if (!s->mb2br_xy)
1305 return AVERROR(ENOMEM);
1306
1307 for (y = 0; y < s->mb_height; y++)
1308 for (x = 0; x < s->mb_width; x++) {
1309 const int mb_xy = x + y * s->mb_stride;
1310
1311 s->mb2br_xy[mb_xy] = 8 * (mb_xy % (2 * s->mb_stride));
1312 }
1313
1314 init_dequant4_coeff_table(s);
1315
1316 return 0;
1317 }
1318
1319 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1320 {
1321 int i;
1322 for (i = 0; i < 2; i++) {
1323 av_buffer_unref(&pic->motion_val_buf[i]);
1324 av_buffer_unref(&pic->ref_index_buf[i]);
1325 }
1326 av_buffer_unref(&pic->mb_type_buf);
1327
1328 av_frame_unref(pic->f);
1329 }
1330
1331 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1332 {
1333 SVQ3Context *s = avctx->priv_data;
1334 const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1;
1335 const int mb_array_size = s->mb_stride * s->mb_height;
1336 const int b4_stride = s->mb_width * 4 + 1;
1337 const int b4_array_size = b4_stride * s->mb_height * 4;
1338 int ret;
1339
1340 if (!pic->motion_val_buf[0]) {
1341 int i;
1342
1343 pic->mb_type_buf = av_buffer_allocz((big_mb_num + s->mb_stride) * sizeof(uint32_t));
1344 if (!pic->mb_type_buf)
1345 return AVERROR(ENOMEM);
1346 pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * s->mb_stride + 1;
1347
1348 for (i = 0; i < 2; i++) {
1349 pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1350 pic->ref_index_buf[i] = av_buffer_allocz(4 * mb_array_size);
1351 if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1352 ret = AVERROR(ENOMEM);
1353 goto fail;
1354 }
1355
1356 pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1357 pic->ref_index[i] = pic->ref_index_buf[i]->data;
1358 }
1359 }
1360 pic->reference = !(s->pict_type == AV_PICTURE_TYPE_B);
1361
1362 ret = ff_get_buffer(avctx, pic->f,
1363 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1364 if (ret < 0)
1365 goto fail;
1366
1367 if (!s->edge_emu_buffer) {
1368 s->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1369 if (!s->edge_emu_buffer)
1370 return AVERROR(ENOMEM);
1371 }
1372
1373 return 0;
1374 fail:
1375 free_picture(avctx, pic);
1376 return ret;
1377 }
1378
1379 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1380 int *got_frame, AVPacket *avpkt)
1381 {
1382 const uint8_t *buf = avpkt->data;
1383 SVQ3Context *s = avctx->priv_data;
1384 H264Context *h = &s->h;
1385 int buf_size = avpkt->size;
1386 int ret, m, i;
1387
1388 /* special case for last picture */
1389 if (buf_size == 0) {
1390 if (s->next_pic->f->data[0] && !h->low_delay && !s->last_frame_output) {
1391 ret = av_frame_ref(data, s->next_pic->f);
1392 if (ret < 0)
1393 return ret;
1394 s->last_frame_output = 1;
1395 *got_frame = 1;
1396 }
1397 return 0;
1398 }
1399
1400 ret = init_get_bits(&s->gb, buf, 8 * buf_size);
1401 if (ret < 0)
1402 return ret;
1403
1404 s->mb_x = s->mb_y = s->mb_xy = 0;
1405
1406 if (svq3_decode_slice_header(avctx))
1407 return -1;
1408
1409 if (s->pict_type != AV_PICTURE_TYPE_B)
1410 FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1411
1412 av_frame_unref(s->cur_pic->f);
1413
1414 /* for skipping the frame */
1415 s->cur_pic->f->pict_type = s->pict_type;
1416 s->cur_pic->f->key_frame = (s->pict_type == AV_PICTURE_TYPE_I);
1417
1418 ret = get_buffer(avctx, s->cur_pic);
1419 if (ret < 0)
1420 return ret;
1421
1422 for (i = 0; i < 16; i++) {
1423 s->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1424 s->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1425 }
1426 for (i = 0; i < 16; i++) {
1427 s->block_offset[16 + i] =
1428 s->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1429 s->block_offset[48 + 16 + i] =
1430 s->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1431 }
1432
1433 if (s->pict_type != AV_PICTURE_TYPE_I) {
1434 if (!s->last_pic->f->data[0]) {
1435 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1436 ret = get_buffer(avctx, s->last_pic);
1437 if (ret < 0)
1438 return ret;
1439 memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1440 memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1441 s->last_pic->f->linesize[1]);
1442 memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1443 s->last_pic->f->linesize[2]);
1444 }
1445
1446 if (s->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1447 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1448 ret = get_buffer(avctx, s->next_pic);
1449 if (ret < 0)
1450 return ret;
1451 memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1452 memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1453 s->next_pic->f->linesize[1]);
1454 memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1455 s->next_pic->f->linesize[2]);
1456 }
1457 }
1458
1459 if (avctx->debug & FF_DEBUG_PICT_INFO)
1460 av_log(h->avctx, AV_LOG_DEBUG,
1461 "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1462 av_get_picture_type_char(s->pict_type),
1463 s->halfpel_flag, s->thirdpel_flag,
1464 s->adaptive_quant, s->qscale, s->slice_num);
1465
1466 if (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B ||
1467 avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I ||
1468 avctx->skip_frame >= AVDISCARD_ALL)
1469 return 0;
1470
1471 if (s->next_p_frame_damaged) {
1472 if (s->pict_type == AV_PICTURE_TYPE_B)
1473 return 0;
1474 else
1475 s->next_p_frame_damaged = 0;
1476 }
1477
1478 if (s->pict_type == AV_PICTURE_TYPE_B) {
1479 s->frame_num_offset = s->slice_num - s->prev_frame_num;
1480
1481 if (s->frame_num_offset < 0)
1482 s->frame_num_offset += 256;
1483 if (s->frame_num_offset == 0 ||
1484 s->frame_num_offset >= s->prev_frame_num_offset) {
1485 av_log(h->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1486 return -1;
1487 }
1488 } else {
1489 s->prev_frame_num = s->frame_num;
1490 s->frame_num = s->slice_num;
1491 s->prev_frame_num_offset = s->frame_num - s->prev_frame_num;
1492
1493 if (s->prev_frame_num_offset < 0)
1494 s->prev_frame_num_offset += 256;
1495 }
1496
1497 for (m = 0; m < 2; m++) {
1498 int i;
1499 for (i = 0; i < 4; i++) {
1500 int j;
1501 for (j = -1; j < 4; j++)
1502 s->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1503 if (i < 3)
1504 s->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1505 }
1506 }
1507
1508 for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
1509 for (s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
1510 unsigned mb_type;
1511 s->mb_xy = s->mb_x + s->mb_y * s->mb_stride;
1512
1513 if ((get_bits_left(&s->gb_slice)) <= 7) {
1514 if (((get_bits_count(&s->gb_slice) & 7) == 0 ||
1515 show_bits(&s->gb_slice, get_bits_left(&s->gb_slice) & 7) == 0)) {
1516
1517 if (svq3_decode_slice_header(avctx))
1518 return -1;
1519 }
1520 /* TODO: support s->mb_skip_run */
1521 }
1522
1523 mb_type = svq3_get_ue_golomb(&s->gb_slice);
1524
1525 if (s->pict_type == AV_PICTURE_TYPE_I)
1526 mb_type += 8;
1527 else if (s->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1528 mb_type += 4;
1529 if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1530 av_log(h->avctx, AV_LOG_ERROR,
1531 "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1532 return -1;
1533 }
1534
1535 if (mb_type != 0)
1536 hl_decode_mb(s, h);
1537
1538 if (s->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
1539 s->cur_pic->mb_type[s->mb_x + s->mb_y * s->mb_stride] =
1540 (s->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1541 }
1542
1543 ff_draw_horiz_band(avctx, s->cur_pic->f,
1544 s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1545 16 * s->mb_y, 16, h->picture_structure, 0,
1546 h->low_delay);
1547 }
1548
1549 if (s->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
1550 ret = av_frame_ref(data, s->cur_pic->f);
1551 else if (s->last_pic->f->data[0])
1552 ret = av_frame_ref(data, s->last_pic->f);
1553 if (ret < 0)
1554 return ret;
1555
1556 /* Do not output the last pic after seeking. */
1557 if (s->last_pic->f->data[0] || h->low_delay)
1558 *got_frame = 1;
1559
1560 if (s->pict_type != AV_PICTURE_TYPE_B) {
1561 FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1562 } else {
1563 av_frame_unref(s->cur_pic->f);
1564 }
1565
1566 return buf_size;
1567 }
1568
1569 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1570 {
1571 SVQ3Context *s = avctx->priv_data;
1572 H264Context *h = &s->h;
1573
1574 free_picture(avctx, s->cur_pic);
1575 free_picture(avctx, s->next_pic);
1576 free_picture(avctx, s->last_pic);
1577 av_frame_free(&s->cur_pic->f);
1578 av_frame_free(&s->next_pic->f);
1579 av_frame_free(&s->last_pic->f);
1580 av_freep(&s->cur_pic);
1581 av_freep(&s->next_pic);
1582 av_freep(&s->last_pic);
1583 av_freep(&s->slice_buf);
1584 av_freep(&s->intra4x4_pred_mode);
1585 av_freep(&s->edge_emu_buffer);
1586 av_freep(&s->mb2br_xy);
1587
1588 ff_h264_free_context(h);
1589
1590 return 0;
1591 }
1592
1593 AVCodec ff_svq3_decoder = {
1594 .name = "svq3",
1595 .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1596 .type = AVMEDIA_TYPE_VIDEO,
1597 .id = AV_CODEC_ID_SVQ3,
1598 .priv_data_size = sizeof(SVQ3Context),
1599 .init = svq3_decode_init,
1600 .close = svq3_decode_end,
1601 .decode = svq3_decode_frame,
1602 .capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1603 AV_CODEC_CAP_DR1 |
1604 AV_CODEC_CAP_DELAY,
1605 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1606 AV_PIX_FMT_NONE},
1607 };