a8603c12c4f33bff9526847752525273ade95f74
[libav.git] / libavcodec / svq3.c
1 /*
2 * Copyright (c) 2003 The Libav Project
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
31 *
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
37 *
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 * http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41 */
42
43 #include <inttypes.h>
44
45 #include "libavutil/attributes.h"
46 #include "internal.h"
47 #include "avcodec.h"
48 #include "mpegutils.h"
49 #include "h264.h"
50 #include "h264_mvpred.h"
51 #include "h264data.h"
52 #include "golomb.h"
53 #include "hpeldsp.h"
54 #include "mathops.h"
55 #include "rectangle.h"
56 #include "tpeldsp.h"
57
58 #if CONFIG_ZLIB
59 #include <zlib.h>
60 #endif
61
62 #include "svq1.h"
63
64 /**
65 * @file
66 * svq3 decoder.
67 */
68
69 typedef struct SVQ3Context {
70 H264Context h;
71
72 H264DSPContext h264dsp;
73 H264PredContext hpc;
74 HpelDSPContext hdsp;
75 TpelDSPContext tdsp;
76 VideoDSPContext vdsp;
77
78 H264Picture *cur_pic;
79 H264Picture *next_pic;
80 H264Picture *last_pic;
81 GetBitContext gb;
82 uint8_t *slice_buf;
83 int slice_size;
84 int halfpel_flag;
85 int thirdpel_flag;
86 int unknown_flag;
87 uint32_t watermark_key;
88 int adaptive_quant;
89 int next_p_frame_damaged;
90 int h_edge_pos;
91 int v_edge_pos;
92 int last_frame_output;
93 int slice_num;
94 int qscale;
95 int cbp;
96 int frame_num;
97 int frame_num_offset;
98 int prev_frame_num_offset;
99 int prev_frame_num;
100
101 enum AVPictureType pict_type;
102
103 int mb_x, mb_y;
104 int mb_xy;
105 int mb_width, mb_height;
106 int mb_stride, mb_num;
107 int b_stride;
108
109 uint32_t *mb2br_xy;
110
111 int chroma_pred_mode;
112 int intra16x16_pred_mode;
113
114 int8_t intra4x4_pred_mode_cache[5 * 8];
115 int8_t (*intra4x4_pred_mode);
116
117 unsigned int top_samples_available;
118 unsigned int topright_samples_available;
119 unsigned int left_samples_available;
120
121 uint8_t *edge_emu_buffer;
122
123 DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
124 DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
125 DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2];
126 DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
127 DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
128 uint32_t dequant4_coeff[QP_MAX_NUM + 1][16];
129 } SVQ3Context;
130
131 #define FULLPEL_MODE 1
132 #define HALFPEL_MODE 2
133 #define THIRDPEL_MODE 3
134 #define PREDICT_MODE 4
135
136 /* dual scan (from some older h264 draft)
137 * o-->o-->o o
138 * | /|
139 * o o o / o
140 * | / | |/ |
141 * o o o o
142 * /
143 * o-->o-->o-->o
144 */
145 static const uint8_t svq3_scan[16] = {
146 0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
147 2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
148 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
149 0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
150 };
151
152 static const uint8_t luma_dc_zigzag_scan[16] = {
153 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
154 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
155 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
156 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
157 };
158
159 static const uint8_t svq3_pred_0[25][2] = {
160 { 0, 0 },
161 { 1, 0 }, { 0, 1 },
162 { 0, 2 }, { 1, 1 }, { 2, 0 },
163 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
164 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
165 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
166 { 2, 4 }, { 3, 3 }, { 4, 2 },
167 { 4, 3 }, { 3, 4 },
168 { 4, 4 }
169 };
170
171 static const int8_t svq3_pred_1[6][6][5] = {
172 { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
173 { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
174 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
175 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
176 { { 2, 0, -1, -1, -1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
177 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
178 { { 2, 0, -1, -1, -1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
179 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
180 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
181 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
182 { { 0, 2, -1, -1, -1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
183 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
184 };
185
186 static const struct {
187 uint8_t run;
188 uint8_t level;
189 } svq3_dct_tables[2][16] = {
190 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
191 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
192 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
193 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
194 };
195
196 static const uint32_t svq3_dequant_coeff[32] = {
197 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
198 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
199 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
200 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
201 };
202
203 static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
204 {
205 const int qmul = svq3_dequant_coeff[qp];
206 #define stride 16
207 int i;
208 int temp[16];
209 static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
210
211 for (i = 0; i < 4; i++) {
212 const int z0 = 13 * (input[4 * i + 0] + input[4 * i + 2]);
213 const int z1 = 13 * (input[4 * i + 0] - input[4 * i + 2]);
214 const int z2 = 7 * input[4 * i + 1] - 17 * input[4 * i + 3];
215 const int z3 = 17 * input[4 * i + 1] + 7 * input[4 * i + 3];
216
217 temp[4 * i + 0] = z0 + z3;
218 temp[4 * i + 1] = z1 + z2;
219 temp[4 * i + 2] = z1 - z2;
220 temp[4 * i + 3] = z0 - z3;
221 }
222
223 for (i = 0; i < 4; i++) {
224 const int offset = x_offset[i];
225 const int z0 = 13 * (temp[4 * 0 + i] + temp[4 * 2 + i]);
226 const int z1 = 13 * (temp[4 * 0 + i] - temp[4 * 2 + i]);
227 const int z2 = 7 * temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
228 const int z3 = 17 * temp[4 * 1 + i] + 7 * temp[4 * 3 + i];
229
230 output[stride * 0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
231 output[stride * 2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
232 output[stride * 8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
233 output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
234 }
235 }
236 #undef stride
237
238 static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
239 int stride, int qp, int dc)
240 {
241 const int qmul = svq3_dequant_coeff[qp];
242 int i;
243
244 if (dc) {
245 dc = 13 * 13 * (dc == 1 ? 1538 * block[0]
246 : qmul * (block[0] >> 3) / 2);
247 block[0] = 0;
248 }
249
250 for (i = 0; i < 4; i++) {
251 const int z0 = 13 * (block[0 + 4 * i] + block[2 + 4 * i]);
252 const int z1 = 13 * (block[0 + 4 * i] - block[2 + 4 * i]);
253 const int z2 = 7 * block[1 + 4 * i] - 17 * block[3 + 4 * i];
254 const int z3 = 17 * block[1 + 4 * i] + 7 * block[3 + 4 * i];
255
256 block[0 + 4 * i] = z0 + z3;
257 block[1 + 4 * i] = z1 + z2;
258 block[2 + 4 * i] = z1 - z2;
259 block[3 + 4 * i] = z0 - z3;
260 }
261
262 for (i = 0; i < 4; i++) {
263 const int z0 = 13 * (block[i + 4 * 0] + block[i + 4 * 2]);
264 const int z1 = 13 * (block[i + 4 * 0] - block[i + 4 * 2]);
265 const int z2 = 7 * block[i + 4 * 1] - 17 * block[i + 4 * 3];
266 const int z3 = 17 * block[i + 4 * 1] + 7 * block[i + 4 * 3];
267 const int rr = (dc + 0x80000);
268
269 dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
270 dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
271 dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
272 dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
273 }
274
275 memset(block, 0, 16 * sizeof(int16_t));
276 }
277
278 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
279 int index, const int type)
280 {
281 static const uint8_t *const scan_patterns[4] = {
282 luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
283 };
284
285 int run, level, limit;
286 unsigned vlc;
287 const int intra = 3 * type >> 2;
288 const uint8_t *const scan = scan_patterns[type];
289
290 for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
291 for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
292 int sign = (vlc & 1) ? 0 : -1;
293 vlc = vlc + 1 >> 1;
294
295 if (type == 3) {
296 if (vlc < 3) {
297 run = 0;
298 level = vlc;
299 } else if (vlc < 4) {
300 run = 1;
301 level = 1;
302 } else {
303 run = vlc & 0x3;
304 level = (vlc + 9 >> 2) - run;
305 }
306 } else {
307 if (vlc < 16) {
308 run = svq3_dct_tables[intra][vlc].run;
309 level = svq3_dct_tables[intra][vlc].level;
310 } else if (intra) {
311 run = vlc & 0x7;
312 level = (vlc >> 3) +
313 ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
314 } else {
315 run = vlc & 0xF;
316 level = (vlc >> 4) +
317 ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
318 }
319 }
320
321 if ((index += run) >= limit)
322 return -1;
323
324 block[scan[index]] = (level ^ sign) - sign;
325 }
326
327 if (type != 2) {
328 break;
329 }
330 }
331
332 return 0;
333 }
334
335 static av_always_inline int
336 svq3_fetch_diagonal_mv(const SVQ3Context *s, const int16_t **C,
337 int i, int list, int part_width)
338 {
339 const int topright_ref = s->ref_cache[list][i - 8 + part_width];
340
341 if (topright_ref != PART_NOT_AVAILABLE) {
342 *C = s->mv_cache[list][i - 8 + part_width];
343 return topright_ref;
344 } else {
345 *C = s->mv_cache[list][i - 8 - 1];
346 return s->ref_cache[list][i - 8 - 1];
347 }
348 }
349
350 /**
351 * Get the predicted MV.
352 * @param n the block index
353 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
354 * @param mx the x component of the predicted motion vector
355 * @param my the y component of the predicted motion vector
356 */
357 static av_always_inline void svq3_pred_motion(const SVQ3Context *s, int n,
358 int part_width, int list,
359 int ref, int *const mx, int *const my)
360 {
361 const int index8 = scan8[n];
362 const int top_ref = s->ref_cache[list][index8 - 8];
363 const int left_ref = s->ref_cache[list][index8 - 1];
364 const int16_t *const A = s->mv_cache[list][index8 - 1];
365 const int16_t *const B = s->mv_cache[list][index8 - 8];
366 const int16_t *C;
367 int diagonal_ref, match_count;
368
369 /* mv_cache
370 * B . . A T T T T
371 * U . . L . . , .
372 * U . . L . . . .
373 * U . . L . . , .
374 * . . . L . . . .
375 */
376
377 diagonal_ref = svq3_fetch_diagonal_mv(s, &C, index8, list, part_width);
378 match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
379 if (match_count > 1) { //most common
380 *mx = mid_pred(A[0], B[0], C[0]);
381 *my = mid_pred(A[1], B[1], C[1]);
382 } else if (match_count == 1) {
383 if (left_ref == ref) {
384 *mx = A[0];
385 *my = A[1];
386 } else if (top_ref == ref) {
387 *mx = B[0];
388 *my = B[1];
389 } else {
390 *mx = C[0];
391 *my = C[1];
392 }
393 } else {
394 if (top_ref == PART_NOT_AVAILABLE &&
395 diagonal_ref == PART_NOT_AVAILABLE &&
396 left_ref != PART_NOT_AVAILABLE) {
397 *mx = A[0];
398 *my = A[1];
399 } else {
400 *mx = mid_pred(A[0], B[0], C[0]);
401 *my = mid_pred(A[1], B[1], C[1]);
402 }
403 }
404 }
405
406 static inline void svq3_mc_dir_part(SVQ3Context *s,
407 int x, int y, int width, int height,
408 int mx, int my, int dxy,
409 int thirdpel, int dir, int avg)
410 {
411 H264Context *h = &s->h;
412 const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
413 uint8_t *src, *dest;
414 int i, emu = 0;
415 int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
416 int linesize = s->cur_pic->f->linesize[0];
417 int uvlinesize = s->cur_pic->f->linesize[1];
418
419 mx += x;
420 my += y;
421
422 if (mx < 0 || mx >= s->h_edge_pos - width - 1 ||
423 my < 0 || my >= s->v_edge_pos - height - 1) {
424 emu = 1;
425 mx = av_clip(mx, -16, s->h_edge_pos - width + 15);
426 my = av_clip(my, -16, s->v_edge_pos - height + 15);
427 }
428
429 /* form component predictions */
430 dest = s->cur_pic->f->data[0] + x + y * linesize;
431 src = pic->f->data[0] + mx + my * linesize;
432
433 if (emu) {
434 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
435 linesize, linesize,
436 width + 1, height + 1,
437 mx, my, s->h_edge_pos, s->v_edge_pos);
438 src = s->edge_emu_buffer;
439 }
440 if (thirdpel)
441 (avg ? s->tdsp.avg_tpel_pixels_tab
442 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, linesize,
443 width, height);
444 else
445 (avg ? s->hdsp.avg_pixels_tab
446 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, linesize,
447 height);
448
449 if (!(h->flags & AV_CODEC_FLAG_GRAY)) {
450 mx = mx + (mx < (int) x) >> 1;
451 my = my + (my < (int) y) >> 1;
452 width = width >> 1;
453 height = height >> 1;
454 blocksize++;
455
456 for (i = 1; i < 3; i++) {
457 dest = s->cur_pic->f->data[i] + (x >> 1) + (y >> 1) * uvlinesize;
458 src = pic->f->data[i] + mx + my * uvlinesize;
459
460 if (emu) {
461 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
462 uvlinesize, uvlinesize,
463 width + 1, height + 1,
464 mx, my, (s->h_edge_pos >> 1),
465 s->v_edge_pos >> 1);
466 src = s->edge_emu_buffer;
467 }
468 if (thirdpel)
469 (avg ? s->tdsp.avg_tpel_pixels_tab
470 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
471 uvlinesize,
472 width, height);
473 else
474 (avg ? s->hdsp.avg_pixels_tab
475 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
476 uvlinesize,
477 height);
478 }
479 }
480 }
481
482 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
483 int dir, int avg)
484 {
485 int i, j, k, mx, my, dx, dy, x, y;
486 H264Context *h = &s->h;
487 const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
488 const int part_height = 16 >> ((unsigned)(size + 1) / 3);
489 const int extra_width = (mode == PREDICT_MODE) ? -16 * 6 : 0;
490 const int h_edge_pos = 6 * (s->h_edge_pos - part_width) - extra_width;
491 const int v_edge_pos = 6 * (s->v_edge_pos - part_height) - extra_width;
492
493 for (i = 0; i < 16; i += part_height)
494 for (j = 0; j < 16; j += part_width) {
495 const int b_xy = (4 * s->mb_x + (j >> 2)) +
496 (4 * s->mb_y + (i >> 2)) * s->b_stride;
497 int dxy;
498 x = 16 * s->mb_x + j;
499 y = 16 * s->mb_y + i;
500 k = (j >> 2 & 1) + (i >> 1 & 2) +
501 (j >> 1 & 4) + (i & 8);
502
503 if (mode != PREDICT_MODE) {
504 svq3_pred_motion(s, k, part_width >> 2, dir, 1, &mx, &my);
505 } else {
506 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
507 my = s->next_pic->motion_val[0][b_xy][1] << 1;
508
509 if (dir == 0) {
510 mx = mx * s->frame_num_offset /
511 s->prev_frame_num_offset + 1 >> 1;
512 my = my * s->frame_num_offset /
513 s->prev_frame_num_offset + 1 >> 1;
514 } else {
515 mx = mx * (s->frame_num_offset - s->prev_frame_num_offset) /
516 s->prev_frame_num_offset + 1 >> 1;
517 my = my * (s->frame_num_offset - s->prev_frame_num_offset) /
518 s->prev_frame_num_offset + 1 >> 1;
519 }
520 }
521
522 /* clip motion vector prediction to frame border */
523 mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
524 my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
525
526 /* get (optional) motion vector differential */
527 if (mode == PREDICT_MODE) {
528 dx = dy = 0;
529 } else {
530 dy = svq3_get_se_golomb(&h->gb);
531 dx = svq3_get_se_golomb(&h->gb);
532
533 if (dx == INVALID_VLC || dy == INVALID_VLC) {
534 av_log(h->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
535 return -1;
536 }
537 }
538
539 /* compute motion vector */
540 if (mode == THIRDPEL_MODE) {
541 int fx, fy;
542 mx = (mx + 1 >> 1) + dx;
543 my = (my + 1 >> 1) + dy;
544 fx = (unsigned)(mx + 0x3000) / 3 - 0x1000;
545 fy = (unsigned)(my + 0x3000) / 3 - 0x1000;
546 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
547
548 svq3_mc_dir_part(s, x, y, part_width, part_height,
549 fx, fy, dxy, 1, dir, avg);
550 mx += mx;
551 my += my;
552 } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
553 mx = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
554 my = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
555 dxy = (mx & 1) + 2 * (my & 1);
556
557 svq3_mc_dir_part(s, x, y, part_width, part_height,
558 mx >> 1, my >> 1, dxy, 0, dir, avg);
559 mx *= 3;
560 my *= 3;
561 } else {
562 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
563 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
564
565 svq3_mc_dir_part(s, x, y, part_width, part_height,
566 mx, my, 0, 0, dir, avg);
567 mx *= 6;
568 my *= 6;
569 }
570
571 /* update mv_cache */
572 if (mode != PREDICT_MODE) {
573 int32_t mv = pack16to32(mx, my);
574
575 if (part_height == 8 && i < 8) {
576 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 * 8], mv);
577
578 if (part_width == 8 && j < 8)
579 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
580 }
581 if (part_width == 8 && j < 8)
582 AV_WN32A(s->mv_cache[dir][scan8[k] + 1], mv);
583 if (part_width == 4 || part_height == 4)
584 AV_WN32A(s->mv_cache[dir][scan8[k]], mv);
585 }
586
587 /* write back motion vectors */
588 fill_rectangle(s->cur_pic->motion_val[dir][b_xy],
589 part_width >> 2, part_height >> 2, s->b_stride,
590 pack16to32(mx, my), 4);
591 }
592
593 return 0;
594 }
595
596 static av_always_inline void hl_decode_mb_idct_luma(SVQ3Context *s,
597 int mb_type, const int *block_offset,
598 int linesize, uint8_t *dest_y)
599 {
600 int i;
601 if (!IS_INTRA4x4(mb_type)) {
602 for (i = 0; i < 16; i++)
603 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
604 uint8_t *const ptr = dest_y + block_offset[i];
605 svq3_add_idct_c(ptr, s->mb + i * 16, linesize,
606 s->qscale, IS_INTRA(mb_type) ? 1 : 0);
607 }
608 }
609 }
610
611 static av_always_inline int dctcoef_get(int16_t *mb, int index)
612 {
613 return AV_RN16A(mb + index);
614 }
615
616 static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
617 const H264Context *h,
618 int mb_type,
619 const int *block_offset,
620 int linesize,
621 uint8_t *dest_y)
622 {
623 int i;
624 int qscale = s->qscale;
625
626 if (IS_INTRA4x4(mb_type)) {
627 for (i = 0; i < 16; i++) {
628 uint8_t *const ptr = dest_y + block_offset[i];
629 const int dir = s->intra4x4_pred_mode_cache[scan8[i]];
630
631 uint8_t *topright;
632 int nnz, tr;
633 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
634 const int topright_avail = (s->topright_samples_available << i) & 0x8000;
635 assert(s->mb_y || linesize <= block_offset[i]);
636 if (!topright_avail) {
637 tr = ptr[3 - linesize] * 0x01010101u;
638 topright = (uint8_t *)&tr;
639 } else
640 topright = ptr + 4 - linesize;
641 } else
642 topright = NULL;
643
644 s->hpc.pred4x4[dir](ptr, topright, linesize);
645 nnz = s->non_zero_count_cache[scan8[i]];
646 if (nnz) {
647 svq3_add_idct_c(ptr, s->mb + i * 16, linesize, qscale, 0);
648 }
649 }
650 } else {
651 s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
652 svq3_luma_dc_dequant_idct_c(s->mb, s->mb_luma_dc[0], qscale);
653 }
654 }
655
656 static void hl_decode_mb(SVQ3Context *s, const H264Context *h)
657 {
658 const int mb_x = s->mb_x;
659 const int mb_y = s->mb_y;
660 const int mb_xy = s->mb_xy;
661 const int mb_type = s->cur_pic->mb_type[mb_xy];
662 uint8_t *dest_y, *dest_cb, *dest_cr;
663 int linesize, uvlinesize;
664 int i, j;
665 const int *block_offset = &h->block_offset[0];
666 const int block_h = 16 >> h->chroma_y_shift;
667
668 linesize = s->cur_pic->f->linesize[0];
669 uvlinesize = s->cur_pic->f->linesize[1];
670
671 dest_y = s->cur_pic->f->data[0] + (mb_x + mb_y * linesize) * 16;
672 dest_cb = s->cur_pic->f->data[1] + mb_x * 8 + mb_y * uvlinesize * block_h;
673 dest_cr = s->cur_pic->f->data[2] + mb_x * 8 + mb_y * uvlinesize * block_h;
674
675 s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * linesize + 64, linesize, 4);
676 s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * uvlinesize + 64, dest_cr - dest_cb, 2);
677
678 if (IS_INTRA(mb_type)) {
679 s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
680 s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
681
682 hl_decode_mb_predict_luma(s, h, mb_type, block_offset, linesize, dest_y);
683 }
684
685 hl_decode_mb_idct_luma(s, mb_type, block_offset, linesize, dest_y);
686
687 if (s->cbp & 0x30) {
688 uint8_t *dest[2] = { dest_cb, dest_cr };
689 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
690 s->dequant4_coeff[4][0]);
691 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
692 s->dequant4_coeff[4][0]);
693 for (j = 1; j < 3; j++) {
694 for (i = j * 16; i < j * 16 + 4; i++)
695 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
696 uint8_t *const ptr = dest[j - 1] + block_offset[i];
697 svq3_add_idct_c(ptr, s->mb + i * 16,
698 uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
699 }
700 }
701 }
702 }
703
704 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
705 {
706 H264Context *h = &s->h;
707 int i, j, k, m, dir, mode;
708 int cbp = 0;
709 uint32_t vlc;
710 int8_t *top, *left;
711 const int mb_xy = s->mb_xy;
712 const int b_xy = 4 * s->mb_x + 4 * s->mb_y * s->b_stride;
713
714 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
715 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
716 s->topright_samples_available = 0xFFFF;
717
718 if (mb_type == 0) { /* SKIP */
719 if (s->pict_type == AV_PICTURE_TYPE_P ||
720 s->next_pic->mb_type[mb_xy] == -1) {
721 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
722 0, 0, 0, 0, 0, 0);
723
724 if (s->pict_type == AV_PICTURE_TYPE_B)
725 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
726 0, 0, 0, 0, 1, 1);
727
728 mb_type = MB_TYPE_SKIP;
729 } else {
730 mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
731 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
732 return -1;
733 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
734 return -1;
735
736 mb_type = MB_TYPE_16x16;
737 }
738 } else if (mb_type < 8) { /* INTER */
739 if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&h->gb))
740 mode = THIRDPEL_MODE;
741 else if (s->halfpel_flag &&
742 s->thirdpel_flag == !get_bits1(&h->gb))
743 mode = HALFPEL_MODE;
744 else
745 mode = FULLPEL_MODE;
746
747 /* fill caches */
748 /* note ref_cache should contain here:
749 * ????????
750 * ???11111
751 * N??11111
752 * N??11111
753 * N??11111
754 */
755
756 for (m = 0; m < 2; m++) {
757 if (s->mb_x > 0 && s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6] != -1) {
758 for (i = 0; i < 4; i++)
759 AV_COPY32(s->mv_cache[m][scan8[0] - 1 + i * 8],
760 s->cur_pic->motion_val[m][b_xy - 1 + i * s->b_stride]);
761 } else {
762 for (i = 0; i < 4; i++)
763 AV_ZERO32(s->mv_cache[m][scan8[0] - 1 + i * 8]);
764 }
765 if (s->mb_y > 0) {
766 memcpy(s->mv_cache[m][scan8[0] - 1 * 8],
767 s->cur_pic->motion_val[m][b_xy - s->b_stride],
768 4 * 2 * sizeof(int16_t));
769 memset(&s->ref_cache[m][scan8[0] - 1 * 8],
770 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
771
772 if (s->mb_x < s->mb_width - 1) {
773 AV_COPY32(s->mv_cache[m][scan8[0] + 4 - 1 * 8],
774 s->cur_pic->motion_val[m][b_xy - s->b_stride + 4]);
775 s->ref_cache[m][scan8[0] + 4 - 1 * 8] =
776 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride + 1] + 6] == -1 ||
777 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
778 } else
779 s->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
780 if (s->mb_x > 0) {
781 AV_COPY32(s->mv_cache[m][scan8[0] - 1 - 1 * 8],
782 s->cur_pic->motion_val[m][b_xy - s->b_stride - 1]);
783 s->ref_cache[m][scan8[0] - 1 - 1 * 8] =
784 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
785 } else
786 s->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
787 } else
788 memset(&s->ref_cache[m][scan8[0] - 1 * 8 - 1],
789 PART_NOT_AVAILABLE, 8);
790
791 if (s->pict_type != AV_PICTURE_TYPE_B)
792 break;
793 }
794
795 /* decode motion vector(s) and form prediction(s) */
796 if (s->pict_type == AV_PICTURE_TYPE_P) {
797 if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
798 return -1;
799 } else { /* AV_PICTURE_TYPE_B */
800 if (mb_type != 2) {
801 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
802 return -1;
803 } else {
804 for (i = 0; i < 4; i++)
805 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
806 0, 4 * 2 * sizeof(int16_t));
807 }
808 if (mb_type != 1) {
809 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
810 return -1;
811 } else {
812 for (i = 0; i < 4; i++)
813 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
814 0, 4 * 2 * sizeof(int16_t));
815 }
816 }
817
818 mb_type = MB_TYPE_16x16;
819 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
820 int8_t *i4x4 = s->intra4x4_pred_mode + s->mb2br_xy[s->mb_xy];
821 int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
822
823 memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
824
825 if (mb_type == 8) {
826 if (s->mb_x > 0) {
827 for (i = 0; i < 4; i++)
828 s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6 - i];
829 if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
830 s->left_samples_available = 0x5F5F;
831 }
832 if (s->mb_y > 0) {
833 s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 0];
834 s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 1];
835 s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 2];
836 s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 3];
837
838 if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
839 s->top_samples_available = 0x33FF;
840 }
841
842 /* decode prediction codes for luma blocks */
843 for (i = 0; i < 16; i += 2) {
844 vlc = svq3_get_ue_golomb(&h->gb);
845
846 if (vlc >= 25) {
847 av_log(h->avctx, AV_LOG_ERROR,
848 "luma prediction:%"PRIu32"\n", vlc);
849 return -1;
850 }
851
852 left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
853 top = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
854
855 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
856 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
857
858 if (left[1] == -1 || left[2] == -1) {
859 av_log(h->avctx, AV_LOG_ERROR, "weird prediction\n");
860 return -1;
861 }
862 }
863 } else { /* mb_type == 33, DC_128_PRED block type */
864 for (i = 0; i < 4; i++)
865 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
866 }
867
868 AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
869 i4x4[4] = i4x4_cache[7 + 8 * 3];
870 i4x4[5] = i4x4_cache[7 + 8 * 2];
871 i4x4[6] = i4x4_cache[7 + 8 * 1];
872
873 if (mb_type == 8) {
874 ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
875 h->avctx, s->top_samples_available,
876 s->left_samples_available);
877
878 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
879 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
880 } else {
881 for (i = 0; i < 4; i++)
882 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
883
884 s->top_samples_available = 0x33FF;
885 s->left_samples_available = 0x5F5F;
886 }
887
888 mb_type = MB_TYPE_INTRA4x4;
889 } else { /* INTRA16x16 */
890 dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
891 dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
892
893 if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
894 s->left_samples_available, dir, 0)) < 0) {
895 av_log(h->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
896 return s->intra16x16_pred_mode;
897 }
898
899 cbp = ff_h264_i_mb_type_info[mb_type - 8].cbp;
900 mb_type = MB_TYPE_INTRA16x16;
901 }
902
903 if (!IS_INTER(mb_type) && s->pict_type != AV_PICTURE_TYPE_I) {
904 for (i = 0; i < 4; i++)
905 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
906 0, 4 * 2 * sizeof(int16_t));
907 if (s->pict_type == AV_PICTURE_TYPE_B) {
908 for (i = 0; i < 4; i++)
909 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
910 0, 4 * 2 * sizeof(int16_t));
911 }
912 }
913 if (!IS_INTRA4x4(mb_type)) {
914 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy], DC_PRED, 8);
915 }
916 if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
917 memset(s->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
918 }
919
920 if (!IS_INTRA16x16(mb_type) &&
921 (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
922 if ((vlc = svq3_get_ue_golomb(&h->gb)) >= 48) {
923 av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
924 return -1;
925 }
926
927 cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
928 : ff_h264_golomb_to_inter_cbp[vlc];
929 }
930 if (IS_INTRA16x16(mb_type) ||
931 (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
932 s->qscale += svq3_get_se_golomb(&h->gb);
933
934 if (s->qscale > 31u) {
935 av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
936 return -1;
937 }
938 }
939 if (IS_INTRA16x16(mb_type)) {
940 AV_ZERO128(s->mb_luma_dc[0] + 0);
941 AV_ZERO128(s->mb_luma_dc[0] + 8);
942 if (svq3_decode_block(&h->gb, s->mb_luma_dc[0], 0, 1)) {
943 av_log(h->avctx, AV_LOG_ERROR,
944 "error while decoding intra luma dc\n");
945 return -1;
946 }
947 }
948
949 if (cbp) {
950 const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
951 const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
952
953 for (i = 0; i < 4; i++)
954 if ((cbp & (1 << i))) {
955 for (j = 0; j < 4; j++) {
956 k = index ? (1 * (j & 1) + 2 * (i & 1) +
957 2 * (j & 2) + 4 * (i & 2))
958 : (4 * i + j);
959 s->non_zero_count_cache[scan8[k]] = 1;
960
961 if (svq3_decode_block(&h->gb, &s->mb[16 * k], index, type)) {
962 av_log(h->avctx, AV_LOG_ERROR,
963 "error while decoding block\n");
964 return -1;
965 }
966 }
967 }
968
969 if ((cbp & 0x30)) {
970 for (i = 1; i < 3; ++i)
971 if (svq3_decode_block(&h->gb, &s->mb[16 * 16 * i], 0, 3)) {
972 av_log(h->avctx, AV_LOG_ERROR,
973 "error while decoding chroma dc block\n");
974 return -1;
975 }
976
977 if ((cbp & 0x20)) {
978 for (i = 1; i < 3; i++) {
979 for (j = 0; j < 4; j++) {
980 k = 16 * i + j;
981 s->non_zero_count_cache[scan8[k]] = 1;
982
983 if (svq3_decode_block(&h->gb, &s->mb[16 * k], 1, 1)) {
984 av_log(h->avctx, AV_LOG_ERROR,
985 "error while decoding chroma ac block\n");
986 return -1;
987 }
988 }
989 }
990 }
991 }
992 }
993
994 s->cbp = cbp;
995 s->cur_pic->mb_type[mb_xy] = mb_type;
996
997 if (IS_INTRA(mb_type))
998 s->chroma_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
999 s->left_samples_available, DC_PRED8x8, 1);
1000
1001 return 0;
1002 }
1003
1004 static int svq3_decode_slice_header(AVCodecContext *avctx)
1005 {
1006 SVQ3Context *s = avctx->priv_data;
1007 H264Context *h = &s->h;
1008 const int mb_xy = s->mb_xy;
1009 int i, header;
1010 unsigned slice_id;
1011
1012 header = get_bits(&s->gb, 8);
1013
1014 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
1015 /* TODO: what? */
1016 av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
1017 return -1;
1018 } else {
1019 int slice_bits, slice_bytes, slice_length;
1020 int length = header >> 5 & 3;
1021
1022 slice_length = show_bits(&s->gb, 8 * length);
1023 slice_bits = slice_length * 8;
1024 slice_bytes = slice_length + length - 1;
1025
1026 if (slice_bytes > get_bits_left(&s->gb)) {
1027 av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
1028 return -1;
1029 }
1030
1031 skip_bits(&s->gb, 8);
1032
1033 av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
1034 if (!s->slice_buf)
1035 return AVERROR(ENOMEM);
1036
1037 memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
1038
1039 init_get_bits(&h->gb, s->slice_buf, slice_bits);
1040
1041 if (s->watermark_key) {
1042 uint32_t header = AV_RL32(&h->gb.buffer[1]);
1043 AV_WL32(&h->gb.buffer[1], header ^ s->watermark_key);
1044 }
1045 if (length > 0) {
1046 memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
1047 }
1048 skip_bits_long(&s->gb, slice_bytes * 8);
1049 }
1050
1051 if ((slice_id = svq3_get_ue_golomb(&h->gb)) >= 3) {
1052 av_log(h->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
1053 return -1;
1054 }
1055
1056 s->pict_type = ff_h264_golomb_to_pict_type[slice_id];
1057
1058 if ((header & 0x9F) == 2) {
1059 i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
1060 get_bits(&h->gb, i);
1061 } else {
1062 skip_bits1(&h->gb);
1063 }
1064
1065 s->slice_num = get_bits(&h->gb, 8);
1066 s->qscale = get_bits(&h->gb, 5);
1067 s->adaptive_quant = get_bits1(&h->gb);
1068
1069 /* unknown fields */
1070 skip_bits1(&h->gb);
1071
1072 if (s->unknown_flag)
1073 skip_bits1(&h->gb);
1074
1075 skip_bits1(&h->gb);
1076 skip_bits(&h->gb, 2);
1077
1078 while (get_bits1(&h->gb))
1079 skip_bits(&h->gb, 8);
1080
1081 /* reset intra predictors and invalidate motion vector references */
1082 if (s->mb_x > 0) {
1083 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - 1] + 3,
1084 -1, 4 * sizeof(int8_t));
1085 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_x],
1086 -1, 8 * sizeof(int8_t) * s->mb_x);
1087 }
1088 if (s->mb_y > 0) {
1089 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_stride],
1090 -1, 8 * sizeof(int8_t) * (s->mb_width - s->mb_x));
1091
1092 if (s->mb_x > 0)
1093 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] = -1;
1094 }
1095
1096 return 0;
1097 }
1098
1099 static void init_dequant4_coeff_table(SVQ3Context *s)
1100 {
1101 int q, x;
1102 const int max_qp = 51;
1103
1104 for (q = 0; q < max_qp + 1; q++) {
1105 int shift = ff_h264_quant_div6[q] + 2;
1106 int idx = ff_h264_quant_rem6[q];
1107 for (x = 0; x < 16; x++)
1108 s->dequant4_coeff[q][(x >> 2) | ((x << 2) & 0xF)] =
1109 ((uint32_t)ff_h264_dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * 16) << shift;
1110 }
1111 }
1112
1113 static av_cold int svq3_decode_init(AVCodecContext *avctx)
1114 {
1115 SVQ3Context *s = avctx->priv_data;
1116 H264Context *h = &s->h;
1117 int m, x, y;
1118 unsigned char *extradata;
1119 unsigned char *extradata_end;
1120 unsigned int size;
1121 int marker_found = 0;
1122
1123 s->cur_pic = av_mallocz(sizeof(*s->cur_pic));
1124 s->last_pic = av_mallocz(sizeof(*s->last_pic));
1125 s->next_pic = av_mallocz(sizeof(*s->next_pic));
1126 if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1127 av_freep(&s->cur_pic);
1128 av_freep(&s->last_pic);
1129 av_freep(&s->next_pic);
1130 return AVERROR(ENOMEM);
1131 }
1132
1133 s->cur_pic->f = av_frame_alloc();
1134 s->last_pic->f = av_frame_alloc();
1135 s->next_pic->f = av_frame_alloc();
1136 if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1137 return AVERROR(ENOMEM);
1138
1139 if (ff_h264_decode_init(avctx) < 0)
1140 return -1;
1141
1142 // we will overwrite it later during decoding
1143 av_frame_free(&h->cur_pic.f);
1144
1145 ff_h264dsp_init(&s->h264dsp, 8, 1);
1146 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1147 ff_videodsp_init(&s->vdsp, 8);
1148
1149 h->sps.bit_depth_luma = 8;
1150 h->chroma_format_idc = 1;
1151
1152 ff_hpeldsp_init(&s->hdsp, avctx->flags);
1153 ff_tpeldsp_init(&s->tdsp);
1154
1155 h->flags = avctx->flags;
1156 h->picture_structure = PICT_FRAME;
1157 avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
1158 avctx->color_range = AVCOL_RANGE_JPEG;
1159
1160 h->chroma_x_shift = h->chroma_y_shift = 1;
1161
1162 s->halfpel_flag = 1;
1163 s->thirdpel_flag = 1;
1164 s->unknown_flag = 0;
1165
1166 /* prowl for the "SEQH" marker in the extradata */
1167 extradata = (unsigned char *)avctx->extradata;
1168 extradata_end = avctx->extradata + avctx->extradata_size;
1169 if (extradata) {
1170 for (m = 0; m + 8 < avctx->extradata_size; m++) {
1171 if (!memcmp(extradata, "SEQH", 4)) {
1172 marker_found = 1;
1173 break;
1174 }
1175 extradata++;
1176 }
1177 }
1178
1179 /* if a match was found, parse the extra data */
1180 if (marker_found) {
1181 GetBitContext gb;
1182 int frame_size_code;
1183
1184 size = AV_RB32(&extradata[4]);
1185 if (size > extradata_end - extradata - 8)
1186 return AVERROR_INVALIDDATA;
1187 init_get_bits(&gb, extradata + 8, size * 8);
1188
1189 /* 'frame size code' and optional 'width, height' */
1190 frame_size_code = get_bits(&gb, 3);
1191 switch (frame_size_code) {
1192 case 0:
1193 avctx->width = 160;
1194 avctx->height = 120;
1195 break;
1196 case 1:
1197 avctx->width = 128;
1198 avctx->height = 96;
1199 break;
1200 case 2:
1201 avctx->width = 176;
1202 avctx->height = 144;
1203 break;
1204 case 3:
1205 avctx->width = 352;
1206 avctx->height = 288;
1207 break;
1208 case 4:
1209 avctx->width = 704;
1210 avctx->height = 576;
1211 break;
1212 case 5:
1213 avctx->width = 240;
1214 avctx->height = 180;
1215 break;
1216 case 6:
1217 avctx->width = 320;
1218 avctx->height = 240;
1219 break;
1220 case 7:
1221 avctx->width = get_bits(&gb, 12);
1222 avctx->height = get_bits(&gb, 12);
1223 break;
1224 }
1225
1226 s->halfpel_flag = get_bits1(&gb);
1227 s->thirdpel_flag = get_bits1(&gb);
1228
1229 /* unknown fields */
1230 skip_bits1(&gb);
1231 skip_bits1(&gb);
1232 skip_bits1(&gb);
1233 skip_bits1(&gb);
1234
1235 h->low_delay = get_bits1(&gb);
1236
1237 /* unknown field */
1238 skip_bits1(&gb);
1239
1240 while (get_bits1(&gb))
1241 skip_bits(&gb, 8);
1242
1243 s->unknown_flag = get_bits1(&gb);
1244 avctx->has_b_frames = !h->low_delay;
1245 if (s->unknown_flag) {
1246 #if CONFIG_ZLIB
1247 unsigned watermark_width = svq3_get_ue_golomb(&gb);
1248 unsigned watermark_height = svq3_get_ue_golomb(&gb);
1249 int u1 = svq3_get_ue_golomb(&gb);
1250 int u2 = get_bits(&gb, 8);
1251 int u3 = get_bits(&gb, 2);
1252 int u4 = svq3_get_ue_golomb(&gb);
1253 unsigned long buf_len = watermark_width *
1254 watermark_height * 4;
1255 int offset = get_bits_count(&gb) + 7 >> 3;
1256 uint8_t *buf;
1257
1258 if (watermark_height > 0 &&
1259 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1260 return -1;
1261
1262 buf = av_malloc(buf_len);
1263 av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1264 watermark_width, watermark_height);
1265 av_log(avctx, AV_LOG_DEBUG,
1266 "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1267 u1, u2, u3, u4, offset);
1268 if (uncompress(buf, &buf_len, extradata + 8 + offset,
1269 size - offset) != Z_OK) {
1270 av_log(avctx, AV_LOG_ERROR,
1271 "could not uncompress watermark logo\n");
1272 av_free(buf);
1273 return -1;
1274 }
1275 s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1276 s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1277 av_log(avctx, AV_LOG_DEBUG,
1278 "watermark key %#"PRIx32"\n", s->watermark_key);
1279 av_free(buf);
1280 #else
1281 av_log(avctx, AV_LOG_ERROR,
1282 "this svq3 file contains watermark which need zlib support compiled in\n");
1283 return -1;
1284 #endif
1285 }
1286 }
1287
1288 s->mb_width = (avctx->width + 15) / 16;
1289 s->mb_height = (avctx->height + 15) / 16;
1290 s->mb_stride = s->mb_width + 1;
1291 s->mb_num = s->mb_width * s->mb_height;
1292 s->b_stride = 4 * s->mb_width;
1293 s->h_edge_pos = s->mb_width * 16;
1294 s->v_edge_pos = s->mb_height * 16;
1295
1296 s->intra4x4_pred_mode = av_mallocz(s->mb_stride * 2 * 8);
1297 if (!s->intra4x4_pred_mode)
1298 return AVERROR(ENOMEM);
1299
1300 s->mb2br_xy = av_mallocz(s->mb_stride * (s->mb_height + 1) *
1301 sizeof(*s->mb2br_xy));
1302 if (!s->mb2br_xy)
1303 return AVERROR(ENOMEM);
1304
1305 for (y = 0; y < s->mb_height; y++)
1306 for (x = 0; x < s->mb_width; x++) {
1307 const int mb_xy = x + y * s->mb_stride;
1308
1309 s->mb2br_xy[mb_xy] = 8 * (mb_xy % (2 * s->mb_stride));
1310 }
1311
1312 init_dequant4_coeff_table(s);
1313
1314 return 0;
1315 }
1316
1317 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1318 {
1319 int i;
1320 for (i = 0; i < 2; i++) {
1321 av_buffer_unref(&pic->motion_val_buf[i]);
1322 av_buffer_unref(&pic->ref_index_buf[i]);
1323 }
1324 av_buffer_unref(&pic->mb_type_buf);
1325
1326 av_frame_unref(pic->f);
1327 }
1328
1329 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1330 {
1331 SVQ3Context *s = avctx->priv_data;
1332 const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1;
1333 const int mb_array_size = s->mb_stride * s->mb_height;
1334 const int b4_stride = s->mb_width * 4 + 1;
1335 const int b4_array_size = b4_stride * s->mb_height * 4;
1336 int ret;
1337
1338 if (!pic->motion_val_buf[0]) {
1339 int i;
1340
1341 pic->mb_type_buf = av_buffer_allocz((big_mb_num + s->mb_stride) * sizeof(uint32_t));
1342 if (!pic->mb_type_buf)
1343 return AVERROR(ENOMEM);
1344 pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * s->mb_stride + 1;
1345
1346 for (i = 0; i < 2; i++) {
1347 pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1348 pic->ref_index_buf[i] = av_buffer_allocz(4 * mb_array_size);
1349 if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1350 ret = AVERROR(ENOMEM);
1351 goto fail;
1352 }
1353
1354 pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1355 pic->ref_index[i] = pic->ref_index_buf[i]->data;
1356 }
1357 }
1358 pic->reference = !(s->pict_type == AV_PICTURE_TYPE_B);
1359
1360 ret = ff_get_buffer(avctx, pic->f,
1361 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1362 if (ret < 0)
1363 goto fail;
1364
1365 if (!s->edge_emu_buffer) {
1366 s->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1367 if (!s->edge_emu_buffer)
1368 return AVERROR(ENOMEM);
1369 }
1370
1371 return 0;
1372 fail:
1373 free_picture(avctx, pic);
1374 return ret;
1375 }
1376
1377 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1378 int *got_frame, AVPacket *avpkt)
1379 {
1380 const uint8_t *buf = avpkt->data;
1381 SVQ3Context *s = avctx->priv_data;
1382 H264Context *h = &s->h;
1383 int buf_size = avpkt->size;
1384 int ret, m, i;
1385
1386 /* special case for last picture */
1387 if (buf_size == 0) {
1388 if (s->next_pic->f->data[0] && !h->low_delay && !s->last_frame_output) {
1389 ret = av_frame_ref(data, s->next_pic->f);
1390 if (ret < 0)
1391 return ret;
1392 s->last_frame_output = 1;
1393 *got_frame = 1;
1394 }
1395 return 0;
1396 }
1397
1398 ret = init_get_bits(&s->gb, buf, 8 * buf_size);
1399 if (ret < 0)
1400 return ret;
1401
1402 s->mb_x = s->mb_y = s->mb_xy = 0;
1403
1404 if (svq3_decode_slice_header(avctx))
1405 return -1;
1406
1407 if (s->pict_type != AV_PICTURE_TYPE_B)
1408 FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1409
1410 av_frame_unref(s->cur_pic->f);
1411
1412 /* for skipping the frame */
1413 s->cur_pic->f->pict_type = s->pict_type;
1414 s->cur_pic->f->key_frame = (s->pict_type == AV_PICTURE_TYPE_I);
1415
1416 ret = get_buffer(avctx, s->cur_pic);
1417 if (ret < 0)
1418 return ret;
1419
1420 for (i = 0; i < 16; i++) {
1421 h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1422 h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1423 }
1424 for (i = 0; i < 16; i++) {
1425 h->block_offset[16 + i] =
1426 h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1427 h->block_offset[48 + 16 + i] =
1428 h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1429 }
1430
1431 if (s->pict_type != AV_PICTURE_TYPE_I) {
1432 if (!s->last_pic->f->data[0]) {
1433 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1434 ret = get_buffer(avctx, s->last_pic);
1435 if (ret < 0)
1436 return ret;
1437 memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1438 memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1439 s->last_pic->f->linesize[1]);
1440 memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1441 s->last_pic->f->linesize[2]);
1442 }
1443
1444 if (s->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1445 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1446 ret = get_buffer(avctx, s->next_pic);
1447 if (ret < 0)
1448 return ret;
1449 memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1450 memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1451 s->next_pic->f->linesize[1]);
1452 memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1453 s->next_pic->f->linesize[2]);
1454 }
1455 }
1456
1457 if (avctx->debug & FF_DEBUG_PICT_INFO)
1458 av_log(h->avctx, AV_LOG_DEBUG,
1459 "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1460 av_get_picture_type_char(s->pict_type),
1461 s->halfpel_flag, s->thirdpel_flag,
1462 s->adaptive_quant, s->qscale, s->slice_num);
1463
1464 if (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B ||
1465 avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I ||
1466 avctx->skip_frame >= AVDISCARD_ALL)
1467 return 0;
1468
1469 if (s->next_p_frame_damaged) {
1470 if (s->pict_type == AV_PICTURE_TYPE_B)
1471 return 0;
1472 else
1473 s->next_p_frame_damaged = 0;
1474 }
1475
1476 if (s->pict_type == AV_PICTURE_TYPE_B) {
1477 s->frame_num_offset = s->slice_num - s->prev_frame_num;
1478
1479 if (s->frame_num_offset < 0)
1480 s->frame_num_offset += 256;
1481 if (s->frame_num_offset == 0 ||
1482 s->frame_num_offset >= s->prev_frame_num_offset) {
1483 av_log(h->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1484 return -1;
1485 }
1486 } else {
1487 s->prev_frame_num = s->frame_num;
1488 s->frame_num = s->slice_num;
1489 s->prev_frame_num_offset = s->frame_num - s->prev_frame_num;
1490
1491 if (s->prev_frame_num_offset < 0)
1492 s->prev_frame_num_offset += 256;
1493 }
1494
1495 for (m = 0; m < 2; m++) {
1496 int i;
1497 for (i = 0; i < 4; i++) {
1498 int j;
1499 for (j = -1; j < 4; j++)
1500 s->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1501 if (i < 3)
1502 s->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1503 }
1504 }
1505
1506 for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
1507 for (s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
1508 unsigned mb_type;
1509 s->mb_xy = s->mb_x + s->mb_y * s->mb_stride;
1510
1511 if ((get_bits_left(&h->gb)) <= 7) {
1512 if (((get_bits_count(&h->gb) & 7) == 0 ||
1513 show_bits(&h->gb, get_bits_left(&h->gb) & 7) == 0)) {
1514
1515 if (svq3_decode_slice_header(avctx))
1516 return -1;
1517 }
1518 /* TODO: support s->mb_skip_run */
1519 }
1520
1521 mb_type = svq3_get_ue_golomb(&h->gb);
1522
1523 if (s->pict_type == AV_PICTURE_TYPE_I)
1524 mb_type += 8;
1525 else if (s->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1526 mb_type += 4;
1527 if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1528 av_log(h->avctx, AV_LOG_ERROR,
1529 "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1530 return -1;
1531 }
1532
1533 if (mb_type != 0)
1534 hl_decode_mb(s, h);
1535
1536 if (s->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
1537 s->cur_pic->mb_type[s->mb_x + s->mb_y * s->mb_stride] =
1538 (s->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1539 }
1540
1541 ff_draw_horiz_band(avctx, s->cur_pic->f,
1542 s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1543 16 * s->mb_y, 16, h->picture_structure, 0,
1544 h->low_delay);
1545 }
1546
1547 if (s->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
1548 ret = av_frame_ref(data, s->cur_pic->f);
1549 else if (s->last_pic->f->data[0])
1550 ret = av_frame_ref(data, s->last_pic->f);
1551 if (ret < 0)
1552 return ret;
1553
1554 /* Do not output the last pic after seeking. */
1555 if (s->last_pic->f->data[0] || h->low_delay)
1556 *got_frame = 1;
1557
1558 if (s->pict_type != AV_PICTURE_TYPE_B) {
1559 FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1560 } else {
1561 av_frame_unref(s->cur_pic->f);
1562 }
1563
1564 return buf_size;
1565 }
1566
1567 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1568 {
1569 SVQ3Context *s = avctx->priv_data;
1570 H264Context *h = &s->h;
1571
1572 free_picture(avctx, s->cur_pic);
1573 free_picture(avctx, s->next_pic);
1574 free_picture(avctx, s->last_pic);
1575 av_frame_free(&s->cur_pic->f);
1576 av_frame_free(&s->next_pic->f);
1577 av_frame_free(&s->last_pic->f);
1578 av_freep(&s->cur_pic);
1579 av_freep(&s->next_pic);
1580 av_freep(&s->last_pic);
1581 av_freep(&s->slice_buf);
1582 av_freep(&s->intra4x4_pred_mode);
1583 av_freep(&s->edge_emu_buffer);
1584 av_freep(&s->mb2br_xy);
1585
1586 ff_h264_free_context(h);
1587
1588 return 0;
1589 }
1590
1591 AVCodec ff_svq3_decoder = {
1592 .name = "svq3",
1593 .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1594 .type = AVMEDIA_TYPE_VIDEO,
1595 .id = AV_CODEC_ID_SVQ3,
1596 .priv_data_size = sizeof(SVQ3Context),
1597 .init = svq3_decode_init,
1598 .close = svq3_decode_end,
1599 .decode = svq3_decode_frame,
1600 .capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1601 AV_CODEC_CAP_DR1 |
1602 AV_CODEC_CAP_DELAY,
1603 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1604 AV_PIX_FMT_NONE},
1605 };