svq3: eliminate H264Context.cur_pic usage
[libav.git] / libavcodec / svq3.c
1 /*
2 * Copyright (c) 2003 The Libav Project
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
31 *
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
37 *
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 * http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41 */
42
43 #include <inttypes.h>
44
45 #include "libavutil/attributes.h"
46 #include "internal.h"
47 #include "avcodec.h"
48 #include "mpegutils.h"
49 #include "h264.h"
50 #include "h264_mvpred.h"
51 #include "h264data.h"
52 #include "golomb.h"
53 #include "hpeldsp.h"
54 #include "mathops.h"
55 #include "rectangle.h"
56 #include "tpeldsp.h"
57
58 #if CONFIG_ZLIB
59 #include <zlib.h>
60 #endif
61
62 #include "svq1.h"
63
64 /**
65 * @file
66 * svq3 decoder.
67 */
68
69 typedef struct SVQ3Context {
70 H264Context h;
71
72 H264DSPContext h264dsp;
73 H264PredContext hpc;
74 HpelDSPContext hdsp;
75 TpelDSPContext tdsp;
76 VideoDSPContext vdsp;
77
78 H264Picture *cur_pic;
79 H264Picture *next_pic;
80 H264Picture *last_pic;
81 GetBitContext gb;
82 uint8_t *slice_buf;
83 int slice_size;
84 int halfpel_flag;
85 int thirdpel_flag;
86 int unknown_flag;
87 uint32_t watermark_key;
88 int adaptive_quant;
89 int next_p_frame_damaged;
90 int h_edge_pos;
91 int v_edge_pos;
92 int last_frame_output;
93 int slice_num;
94 int qscale;
95 int cbp;
96
97 enum AVPictureType pict_type;
98
99 int mb_x, mb_y;
100 int mb_xy;
101 int mb_width, mb_height;
102 int mb_stride, mb_num;
103 int b_stride;
104
105 uint32_t *mb2br_xy;
106
107 int chroma_pred_mode;
108 int intra16x16_pred_mode;
109
110 int8_t intra4x4_pred_mode_cache[5 * 8];
111 int8_t (*intra4x4_pred_mode);
112
113 unsigned int top_samples_available;
114 unsigned int topright_samples_available;
115 unsigned int left_samples_available;
116
117 uint8_t *edge_emu_buffer;
118
119 DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
120 DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
121 DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2];
122 DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
123 DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
124 uint32_t dequant4_coeff[QP_MAX_NUM + 1][16];
125 } SVQ3Context;
126
127 #define FULLPEL_MODE 1
128 #define HALFPEL_MODE 2
129 #define THIRDPEL_MODE 3
130 #define PREDICT_MODE 4
131
132 /* dual scan (from some older h264 draft)
133 * o-->o-->o o
134 * | /|
135 * o o o / o
136 * | / | |/ |
137 * o o o o
138 * /
139 * o-->o-->o-->o
140 */
141 static const uint8_t svq3_scan[16] = {
142 0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
143 2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
144 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
145 0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
146 };
147
148 static const uint8_t luma_dc_zigzag_scan[16] = {
149 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
150 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
151 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
152 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
153 };
154
155 static const uint8_t svq3_pred_0[25][2] = {
156 { 0, 0 },
157 { 1, 0 }, { 0, 1 },
158 { 0, 2 }, { 1, 1 }, { 2, 0 },
159 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
160 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
161 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
162 { 2, 4 }, { 3, 3 }, { 4, 2 },
163 { 4, 3 }, { 3, 4 },
164 { 4, 4 }
165 };
166
167 static const int8_t svq3_pred_1[6][6][5] = {
168 { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
169 { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
170 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
171 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
172 { { 2, 0, -1, -1, -1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
173 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
174 { { 2, 0, -1, -1, -1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
175 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
176 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
177 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
178 { { 0, 2, -1, -1, -1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
179 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
180 };
181
182 static const struct {
183 uint8_t run;
184 uint8_t level;
185 } svq3_dct_tables[2][16] = {
186 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
187 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
188 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
189 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
190 };
191
192 static const uint32_t svq3_dequant_coeff[32] = {
193 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
194 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
195 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
196 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
197 };
198
199 static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
200 {
201 const int qmul = svq3_dequant_coeff[qp];
202 #define stride 16
203 int i;
204 int temp[16];
205 static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
206
207 for (i = 0; i < 4; i++) {
208 const int z0 = 13 * (input[4 * i + 0] + input[4 * i + 2]);
209 const int z1 = 13 * (input[4 * i + 0] - input[4 * i + 2]);
210 const int z2 = 7 * input[4 * i + 1] - 17 * input[4 * i + 3];
211 const int z3 = 17 * input[4 * i + 1] + 7 * input[4 * i + 3];
212
213 temp[4 * i + 0] = z0 + z3;
214 temp[4 * i + 1] = z1 + z2;
215 temp[4 * i + 2] = z1 - z2;
216 temp[4 * i + 3] = z0 - z3;
217 }
218
219 for (i = 0; i < 4; i++) {
220 const int offset = x_offset[i];
221 const int z0 = 13 * (temp[4 * 0 + i] + temp[4 * 2 + i]);
222 const int z1 = 13 * (temp[4 * 0 + i] - temp[4 * 2 + i]);
223 const int z2 = 7 * temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
224 const int z3 = 17 * temp[4 * 1 + i] + 7 * temp[4 * 3 + i];
225
226 output[stride * 0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
227 output[stride * 2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
228 output[stride * 8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
229 output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
230 }
231 }
232 #undef stride
233
234 static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
235 int stride, int qp, int dc)
236 {
237 const int qmul = svq3_dequant_coeff[qp];
238 int i;
239
240 if (dc) {
241 dc = 13 * 13 * (dc == 1 ? 1538 * block[0]
242 : qmul * (block[0] >> 3) / 2);
243 block[0] = 0;
244 }
245
246 for (i = 0; i < 4; i++) {
247 const int z0 = 13 * (block[0 + 4 * i] + block[2 + 4 * i]);
248 const int z1 = 13 * (block[0 + 4 * i] - block[2 + 4 * i]);
249 const int z2 = 7 * block[1 + 4 * i] - 17 * block[3 + 4 * i];
250 const int z3 = 17 * block[1 + 4 * i] + 7 * block[3 + 4 * i];
251
252 block[0 + 4 * i] = z0 + z3;
253 block[1 + 4 * i] = z1 + z2;
254 block[2 + 4 * i] = z1 - z2;
255 block[3 + 4 * i] = z0 - z3;
256 }
257
258 for (i = 0; i < 4; i++) {
259 const int z0 = 13 * (block[i + 4 * 0] + block[i + 4 * 2]);
260 const int z1 = 13 * (block[i + 4 * 0] - block[i + 4 * 2]);
261 const int z2 = 7 * block[i + 4 * 1] - 17 * block[i + 4 * 3];
262 const int z3 = 17 * block[i + 4 * 1] + 7 * block[i + 4 * 3];
263 const int rr = (dc + 0x80000);
264
265 dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
266 dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
267 dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
268 dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
269 }
270
271 memset(block, 0, 16 * sizeof(int16_t));
272 }
273
274 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
275 int index, const int type)
276 {
277 static const uint8_t *const scan_patterns[4] = {
278 luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
279 };
280
281 int run, level, limit;
282 unsigned vlc;
283 const int intra = 3 * type >> 2;
284 const uint8_t *const scan = scan_patterns[type];
285
286 for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
287 for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
288 int sign = (vlc & 1) ? 0 : -1;
289 vlc = vlc + 1 >> 1;
290
291 if (type == 3) {
292 if (vlc < 3) {
293 run = 0;
294 level = vlc;
295 } else if (vlc < 4) {
296 run = 1;
297 level = 1;
298 } else {
299 run = vlc & 0x3;
300 level = (vlc + 9 >> 2) - run;
301 }
302 } else {
303 if (vlc < 16) {
304 run = svq3_dct_tables[intra][vlc].run;
305 level = svq3_dct_tables[intra][vlc].level;
306 } else if (intra) {
307 run = vlc & 0x7;
308 level = (vlc >> 3) +
309 ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
310 } else {
311 run = vlc & 0xF;
312 level = (vlc >> 4) +
313 ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
314 }
315 }
316
317 if ((index += run) >= limit)
318 return -1;
319
320 block[scan[index]] = (level ^ sign) - sign;
321 }
322
323 if (type != 2) {
324 break;
325 }
326 }
327
328 return 0;
329 }
330
331 static av_always_inline int
332 svq3_fetch_diagonal_mv(const SVQ3Context *s, const int16_t **C,
333 int i, int list, int part_width)
334 {
335 const int topright_ref = s->ref_cache[list][i - 8 + part_width];
336
337 if (topright_ref != PART_NOT_AVAILABLE) {
338 *C = s->mv_cache[list][i - 8 + part_width];
339 return topright_ref;
340 } else {
341 *C = s->mv_cache[list][i - 8 - 1];
342 return s->ref_cache[list][i - 8 - 1];
343 }
344 }
345
346 /**
347 * Get the predicted MV.
348 * @param n the block index
349 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
350 * @param mx the x component of the predicted motion vector
351 * @param my the y component of the predicted motion vector
352 */
353 static av_always_inline void svq3_pred_motion(const SVQ3Context *s, int n,
354 int part_width, int list,
355 int ref, int *const mx, int *const my)
356 {
357 const int index8 = scan8[n];
358 const int top_ref = s->ref_cache[list][index8 - 8];
359 const int left_ref = s->ref_cache[list][index8 - 1];
360 const int16_t *const A = s->mv_cache[list][index8 - 1];
361 const int16_t *const B = s->mv_cache[list][index8 - 8];
362 const int16_t *C;
363 int diagonal_ref, match_count;
364
365 /* mv_cache
366 * B . . A T T T T
367 * U . . L . . , .
368 * U . . L . . . .
369 * U . . L . . , .
370 * . . . L . . . .
371 */
372
373 diagonal_ref = svq3_fetch_diagonal_mv(s, &C, index8, list, part_width);
374 match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
375 if (match_count > 1) { //most common
376 *mx = mid_pred(A[0], B[0], C[0]);
377 *my = mid_pred(A[1], B[1], C[1]);
378 } else if (match_count == 1) {
379 if (left_ref == ref) {
380 *mx = A[0];
381 *my = A[1];
382 } else if (top_ref == ref) {
383 *mx = B[0];
384 *my = B[1];
385 } else {
386 *mx = C[0];
387 *my = C[1];
388 }
389 } else {
390 if (top_ref == PART_NOT_AVAILABLE &&
391 diagonal_ref == PART_NOT_AVAILABLE &&
392 left_ref != PART_NOT_AVAILABLE) {
393 *mx = A[0];
394 *my = A[1];
395 } else {
396 *mx = mid_pred(A[0], B[0], C[0]);
397 *my = mid_pred(A[1], B[1], C[1]);
398 }
399 }
400 }
401
402 static inline void svq3_mc_dir_part(SVQ3Context *s,
403 int x, int y, int width, int height,
404 int mx, int my, int dxy,
405 int thirdpel, int dir, int avg)
406 {
407 H264Context *h = &s->h;
408 const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
409 uint8_t *src, *dest;
410 int i, emu = 0;
411 int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
412 int linesize = s->cur_pic->f->linesize[0];
413 int uvlinesize = s->cur_pic->f->linesize[1];
414
415 mx += x;
416 my += y;
417
418 if (mx < 0 || mx >= s->h_edge_pos - width - 1 ||
419 my < 0 || my >= s->v_edge_pos - height - 1) {
420 emu = 1;
421 mx = av_clip(mx, -16, s->h_edge_pos - width + 15);
422 my = av_clip(my, -16, s->v_edge_pos - height + 15);
423 }
424
425 /* form component predictions */
426 dest = s->cur_pic->f->data[0] + x + y * linesize;
427 src = pic->f->data[0] + mx + my * linesize;
428
429 if (emu) {
430 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
431 linesize, linesize,
432 width + 1, height + 1,
433 mx, my, s->h_edge_pos, s->v_edge_pos);
434 src = s->edge_emu_buffer;
435 }
436 if (thirdpel)
437 (avg ? s->tdsp.avg_tpel_pixels_tab
438 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, linesize,
439 width, height);
440 else
441 (avg ? s->hdsp.avg_pixels_tab
442 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, linesize,
443 height);
444
445 if (!(h->flags & AV_CODEC_FLAG_GRAY)) {
446 mx = mx + (mx < (int) x) >> 1;
447 my = my + (my < (int) y) >> 1;
448 width = width >> 1;
449 height = height >> 1;
450 blocksize++;
451
452 for (i = 1; i < 3; i++) {
453 dest = s->cur_pic->f->data[i] + (x >> 1) + (y >> 1) * uvlinesize;
454 src = pic->f->data[i] + mx + my * uvlinesize;
455
456 if (emu) {
457 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
458 uvlinesize, uvlinesize,
459 width + 1, height + 1,
460 mx, my, (s->h_edge_pos >> 1),
461 s->v_edge_pos >> 1);
462 src = s->edge_emu_buffer;
463 }
464 if (thirdpel)
465 (avg ? s->tdsp.avg_tpel_pixels_tab
466 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
467 uvlinesize,
468 width, height);
469 else
470 (avg ? s->hdsp.avg_pixels_tab
471 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
472 uvlinesize,
473 height);
474 }
475 }
476 }
477
478 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
479 int dir, int avg)
480 {
481 int i, j, k, mx, my, dx, dy, x, y;
482 H264Context *h = &s->h;
483 const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
484 const int part_height = 16 >> ((unsigned)(size + 1) / 3);
485 const int extra_width = (mode == PREDICT_MODE) ? -16 * 6 : 0;
486 const int h_edge_pos = 6 * (s->h_edge_pos - part_width) - extra_width;
487 const int v_edge_pos = 6 * (s->v_edge_pos - part_height) - extra_width;
488
489 for (i = 0; i < 16; i += part_height)
490 for (j = 0; j < 16; j += part_width) {
491 const int b_xy = (4 * s->mb_x + (j >> 2)) +
492 (4 * s->mb_y + (i >> 2)) * s->b_stride;
493 int dxy;
494 x = 16 * s->mb_x + j;
495 y = 16 * s->mb_y + i;
496 k = (j >> 2 & 1) + (i >> 1 & 2) +
497 (j >> 1 & 4) + (i & 8);
498
499 if (mode != PREDICT_MODE) {
500 svq3_pred_motion(s, k, part_width >> 2, dir, 1, &mx, &my);
501 } else {
502 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
503 my = s->next_pic->motion_val[0][b_xy][1] << 1;
504
505 if (dir == 0) {
506 mx = mx * h->frame_num_offset /
507 h->prev_frame_num_offset + 1 >> 1;
508 my = my * h->frame_num_offset /
509 h->prev_frame_num_offset + 1 >> 1;
510 } else {
511 mx = mx * (h->frame_num_offset - h->prev_frame_num_offset) /
512 h->prev_frame_num_offset + 1 >> 1;
513 my = my * (h->frame_num_offset - h->prev_frame_num_offset) /
514 h->prev_frame_num_offset + 1 >> 1;
515 }
516 }
517
518 /* clip motion vector prediction to frame border */
519 mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
520 my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
521
522 /* get (optional) motion vector differential */
523 if (mode == PREDICT_MODE) {
524 dx = dy = 0;
525 } else {
526 dy = svq3_get_se_golomb(&h->gb);
527 dx = svq3_get_se_golomb(&h->gb);
528
529 if (dx == INVALID_VLC || dy == INVALID_VLC) {
530 av_log(h->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
531 return -1;
532 }
533 }
534
535 /* compute motion vector */
536 if (mode == THIRDPEL_MODE) {
537 int fx, fy;
538 mx = (mx + 1 >> 1) + dx;
539 my = (my + 1 >> 1) + dy;
540 fx = (unsigned)(mx + 0x3000) / 3 - 0x1000;
541 fy = (unsigned)(my + 0x3000) / 3 - 0x1000;
542 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
543
544 svq3_mc_dir_part(s, x, y, part_width, part_height,
545 fx, fy, dxy, 1, dir, avg);
546 mx += mx;
547 my += my;
548 } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
549 mx = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
550 my = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
551 dxy = (mx & 1) + 2 * (my & 1);
552
553 svq3_mc_dir_part(s, x, y, part_width, part_height,
554 mx >> 1, my >> 1, dxy, 0, dir, avg);
555 mx *= 3;
556 my *= 3;
557 } else {
558 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
559 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
560
561 svq3_mc_dir_part(s, x, y, part_width, part_height,
562 mx, my, 0, 0, dir, avg);
563 mx *= 6;
564 my *= 6;
565 }
566
567 /* update mv_cache */
568 if (mode != PREDICT_MODE) {
569 int32_t mv = pack16to32(mx, my);
570
571 if (part_height == 8 && i < 8) {
572 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 * 8], mv);
573
574 if (part_width == 8 && j < 8)
575 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
576 }
577 if (part_width == 8 && j < 8)
578 AV_WN32A(s->mv_cache[dir][scan8[k] + 1], mv);
579 if (part_width == 4 || part_height == 4)
580 AV_WN32A(s->mv_cache[dir][scan8[k]], mv);
581 }
582
583 /* write back motion vectors */
584 fill_rectangle(s->cur_pic->motion_val[dir][b_xy],
585 part_width >> 2, part_height >> 2, s->b_stride,
586 pack16to32(mx, my), 4);
587 }
588
589 return 0;
590 }
591
592 static av_always_inline void hl_decode_mb_idct_luma(SVQ3Context *s,
593 int mb_type, const int *block_offset,
594 int linesize, uint8_t *dest_y)
595 {
596 int i;
597 if (!IS_INTRA4x4(mb_type)) {
598 for (i = 0; i < 16; i++)
599 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
600 uint8_t *const ptr = dest_y + block_offset[i];
601 svq3_add_idct_c(ptr, s->mb + i * 16, linesize,
602 s->qscale, IS_INTRA(mb_type) ? 1 : 0);
603 }
604 }
605 }
606
607 static av_always_inline int dctcoef_get(int16_t *mb, int index)
608 {
609 return AV_RN16A(mb + index);
610 }
611
612 static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
613 const H264Context *h,
614 int mb_type,
615 const int *block_offset,
616 int linesize,
617 uint8_t *dest_y)
618 {
619 int i;
620 int qscale = s->qscale;
621
622 if (IS_INTRA4x4(mb_type)) {
623 for (i = 0; i < 16; i++) {
624 uint8_t *const ptr = dest_y + block_offset[i];
625 const int dir = s->intra4x4_pred_mode_cache[scan8[i]];
626
627 uint8_t *topright;
628 int nnz, tr;
629 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
630 const int topright_avail = (s->topright_samples_available << i) & 0x8000;
631 assert(s->mb_y || linesize <= block_offset[i]);
632 if (!topright_avail) {
633 tr = ptr[3 - linesize] * 0x01010101u;
634 topright = (uint8_t *)&tr;
635 } else
636 topright = ptr + 4 - linesize;
637 } else
638 topright = NULL;
639
640 s->hpc.pred4x4[dir](ptr, topright, linesize);
641 nnz = s->non_zero_count_cache[scan8[i]];
642 if (nnz) {
643 svq3_add_idct_c(ptr, s->mb + i * 16, linesize, qscale, 0);
644 }
645 }
646 } else {
647 s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
648 svq3_luma_dc_dequant_idct_c(s->mb, s->mb_luma_dc[0], qscale);
649 }
650 }
651
652 static void hl_decode_mb(SVQ3Context *s, const H264Context *h)
653 {
654 const int mb_x = s->mb_x;
655 const int mb_y = s->mb_y;
656 const int mb_xy = s->mb_xy;
657 const int mb_type = s->cur_pic->mb_type[mb_xy];
658 uint8_t *dest_y, *dest_cb, *dest_cr;
659 int linesize, uvlinesize;
660 int i, j;
661 const int *block_offset = &h->block_offset[0];
662 const int block_h = 16 >> h->chroma_y_shift;
663
664 linesize = s->cur_pic->f->linesize[0];
665 uvlinesize = s->cur_pic->f->linesize[1];
666
667 dest_y = s->cur_pic->f->data[0] + (mb_x + mb_y * linesize) * 16;
668 dest_cb = s->cur_pic->f->data[1] + mb_x * 8 + mb_y * uvlinesize * block_h;
669 dest_cr = s->cur_pic->f->data[2] + mb_x * 8 + mb_y * uvlinesize * block_h;
670
671 s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * linesize + 64, linesize, 4);
672 s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * uvlinesize + 64, dest_cr - dest_cb, 2);
673
674 if (IS_INTRA(mb_type)) {
675 s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
676 s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
677
678 hl_decode_mb_predict_luma(s, h, mb_type, block_offset, linesize, dest_y);
679 }
680
681 hl_decode_mb_idct_luma(s, mb_type, block_offset, linesize, dest_y);
682
683 if (s->cbp & 0x30) {
684 uint8_t *dest[2] = { dest_cb, dest_cr };
685 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
686 s->dequant4_coeff[4][0]);
687 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
688 s->dequant4_coeff[4][0]);
689 for (j = 1; j < 3; j++) {
690 for (i = j * 16; i < j * 16 + 4; i++)
691 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
692 uint8_t *const ptr = dest[j - 1] + block_offset[i];
693 svq3_add_idct_c(ptr, s->mb + i * 16,
694 uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
695 }
696 }
697 }
698 }
699
700 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
701 {
702 H264Context *h = &s->h;
703 int i, j, k, m, dir, mode;
704 int cbp = 0;
705 uint32_t vlc;
706 int8_t *top, *left;
707 const int mb_xy = s->mb_xy;
708 const int b_xy = 4 * s->mb_x + 4 * s->mb_y * s->b_stride;
709
710 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
711 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
712 s->topright_samples_available = 0xFFFF;
713
714 if (mb_type == 0) { /* SKIP */
715 if (s->pict_type == AV_PICTURE_TYPE_P ||
716 s->next_pic->mb_type[mb_xy] == -1) {
717 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
718 0, 0, 0, 0, 0, 0);
719
720 if (s->pict_type == AV_PICTURE_TYPE_B)
721 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
722 0, 0, 0, 0, 1, 1);
723
724 mb_type = MB_TYPE_SKIP;
725 } else {
726 mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
727 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
728 return -1;
729 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
730 return -1;
731
732 mb_type = MB_TYPE_16x16;
733 }
734 } else if (mb_type < 8) { /* INTER */
735 if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&h->gb))
736 mode = THIRDPEL_MODE;
737 else if (s->halfpel_flag &&
738 s->thirdpel_flag == !get_bits1(&h->gb))
739 mode = HALFPEL_MODE;
740 else
741 mode = FULLPEL_MODE;
742
743 /* fill caches */
744 /* note ref_cache should contain here:
745 * ????????
746 * ???11111
747 * N??11111
748 * N??11111
749 * N??11111
750 */
751
752 for (m = 0; m < 2; m++) {
753 if (s->mb_x > 0 && s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6] != -1) {
754 for (i = 0; i < 4; i++)
755 AV_COPY32(s->mv_cache[m][scan8[0] - 1 + i * 8],
756 s->cur_pic->motion_val[m][b_xy - 1 + i * s->b_stride]);
757 } else {
758 for (i = 0; i < 4; i++)
759 AV_ZERO32(s->mv_cache[m][scan8[0] - 1 + i * 8]);
760 }
761 if (s->mb_y > 0) {
762 memcpy(s->mv_cache[m][scan8[0] - 1 * 8],
763 s->cur_pic->motion_val[m][b_xy - s->b_stride],
764 4 * 2 * sizeof(int16_t));
765 memset(&s->ref_cache[m][scan8[0] - 1 * 8],
766 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
767
768 if (s->mb_x < s->mb_width - 1) {
769 AV_COPY32(s->mv_cache[m][scan8[0] + 4 - 1 * 8],
770 s->cur_pic->motion_val[m][b_xy - s->b_stride + 4]);
771 s->ref_cache[m][scan8[0] + 4 - 1 * 8] =
772 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride + 1] + 6] == -1 ||
773 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
774 } else
775 s->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
776 if (s->mb_x > 0) {
777 AV_COPY32(s->mv_cache[m][scan8[0] - 1 - 1 * 8],
778 s->cur_pic->motion_val[m][b_xy - s->b_stride - 1]);
779 s->ref_cache[m][scan8[0] - 1 - 1 * 8] =
780 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
781 } else
782 s->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
783 } else
784 memset(&s->ref_cache[m][scan8[0] - 1 * 8 - 1],
785 PART_NOT_AVAILABLE, 8);
786
787 if (s->pict_type != AV_PICTURE_TYPE_B)
788 break;
789 }
790
791 /* decode motion vector(s) and form prediction(s) */
792 if (s->pict_type == AV_PICTURE_TYPE_P) {
793 if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
794 return -1;
795 } else { /* AV_PICTURE_TYPE_B */
796 if (mb_type != 2) {
797 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
798 return -1;
799 } else {
800 for (i = 0; i < 4; i++)
801 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
802 0, 4 * 2 * sizeof(int16_t));
803 }
804 if (mb_type != 1) {
805 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
806 return -1;
807 } else {
808 for (i = 0; i < 4; i++)
809 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
810 0, 4 * 2 * sizeof(int16_t));
811 }
812 }
813
814 mb_type = MB_TYPE_16x16;
815 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
816 int8_t *i4x4 = s->intra4x4_pred_mode + s->mb2br_xy[s->mb_xy];
817 int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
818
819 memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
820
821 if (mb_type == 8) {
822 if (s->mb_x > 0) {
823 for (i = 0; i < 4; i++)
824 s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6 - i];
825 if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
826 s->left_samples_available = 0x5F5F;
827 }
828 if (s->mb_y > 0) {
829 s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 0];
830 s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 1];
831 s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 2];
832 s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 3];
833
834 if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
835 s->top_samples_available = 0x33FF;
836 }
837
838 /* decode prediction codes for luma blocks */
839 for (i = 0; i < 16; i += 2) {
840 vlc = svq3_get_ue_golomb(&h->gb);
841
842 if (vlc >= 25) {
843 av_log(h->avctx, AV_LOG_ERROR,
844 "luma prediction:%"PRIu32"\n", vlc);
845 return -1;
846 }
847
848 left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
849 top = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
850
851 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
852 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
853
854 if (left[1] == -1 || left[2] == -1) {
855 av_log(h->avctx, AV_LOG_ERROR, "weird prediction\n");
856 return -1;
857 }
858 }
859 } else { /* mb_type == 33, DC_128_PRED block type */
860 for (i = 0; i < 4; i++)
861 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
862 }
863
864 AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
865 i4x4[4] = i4x4_cache[7 + 8 * 3];
866 i4x4[5] = i4x4_cache[7 + 8 * 2];
867 i4x4[6] = i4x4_cache[7 + 8 * 1];
868
869 if (mb_type == 8) {
870 ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
871 h->avctx, s->top_samples_available,
872 s->left_samples_available);
873
874 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
875 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
876 } else {
877 for (i = 0; i < 4; i++)
878 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
879
880 s->top_samples_available = 0x33FF;
881 s->left_samples_available = 0x5F5F;
882 }
883
884 mb_type = MB_TYPE_INTRA4x4;
885 } else { /* INTRA16x16 */
886 dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
887 dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
888
889 if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
890 s->left_samples_available, dir, 0)) < 0) {
891 av_log(h->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
892 return s->intra16x16_pred_mode;
893 }
894
895 cbp = ff_h264_i_mb_type_info[mb_type - 8].cbp;
896 mb_type = MB_TYPE_INTRA16x16;
897 }
898
899 if (!IS_INTER(mb_type) && s->pict_type != AV_PICTURE_TYPE_I) {
900 for (i = 0; i < 4; i++)
901 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
902 0, 4 * 2 * sizeof(int16_t));
903 if (s->pict_type == AV_PICTURE_TYPE_B) {
904 for (i = 0; i < 4; i++)
905 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
906 0, 4 * 2 * sizeof(int16_t));
907 }
908 }
909 if (!IS_INTRA4x4(mb_type)) {
910 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy], DC_PRED, 8);
911 }
912 if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
913 memset(s->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
914 }
915
916 if (!IS_INTRA16x16(mb_type) &&
917 (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
918 if ((vlc = svq3_get_ue_golomb(&h->gb)) >= 48) {
919 av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
920 return -1;
921 }
922
923 cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
924 : ff_h264_golomb_to_inter_cbp[vlc];
925 }
926 if (IS_INTRA16x16(mb_type) ||
927 (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
928 s->qscale += svq3_get_se_golomb(&h->gb);
929
930 if (s->qscale > 31u) {
931 av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
932 return -1;
933 }
934 }
935 if (IS_INTRA16x16(mb_type)) {
936 AV_ZERO128(s->mb_luma_dc[0] + 0);
937 AV_ZERO128(s->mb_luma_dc[0] + 8);
938 if (svq3_decode_block(&h->gb, s->mb_luma_dc[0], 0, 1)) {
939 av_log(h->avctx, AV_LOG_ERROR,
940 "error while decoding intra luma dc\n");
941 return -1;
942 }
943 }
944
945 if (cbp) {
946 const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
947 const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
948
949 for (i = 0; i < 4; i++)
950 if ((cbp & (1 << i))) {
951 for (j = 0; j < 4; j++) {
952 k = index ? (1 * (j & 1) + 2 * (i & 1) +
953 2 * (j & 2) + 4 * (i & 2))
954 : (4 * i + j);
955 s->non_zero_count_cache[scan8[k]] = 1;
956
957 if (svq3_decode_block(&h->gb, &s->mb[16 * k], index, type)) {
958 av_log(h->avctx, AV_LOG_ERROR,
959 "error while decoding block\n");
960 return -1;
961 }
962 }
963 }
964
965 if ((cbp & 0x30)) {
966 for (i = 1; i < 3; ++i)
967 if (svq3_decode_block(&h->gb, &s->mb[16 * 16 * i], 0, 3)) {
968 av_log(h->avctx, AV_LOG_ERROR,
969 "error while decoding chroma dc block\n");
970 return -1;
971 }
972
973 if ((cbp & 0x20)) {
974 for (i = 1; i < 3; i++) {
975 for (j = 0; j < 4; j++) {
976 k = 16 * i + j;
977 s->non_zero_count_cache[scan8[k]] = 1;
978
979 if (svq3_decode_block(&h->gb, &s->mb[16 * k], 1, 1)) {
980 av_log(h->avctx, AV_LOG_ERROR,
981 "error while decoding chroma ac block\n");
982 return -1;
983 }
984 }
985 }
986 }
987 }
988 }
989
990 s->cbp = cbp;
991 s->cur_pic->mb_type[mb_xy] = mb_type;
992
993 if (IS_INTRA(mb_type))
994 s->chroma_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
995 s->left_samples_available, DC_PRED8x8, 1);
996
997 return 0;
998 }
999
1000 static int svq3_decode_slice_header(AVCodecContext *avctx)
1001 {
1002 SVQ3Context *s = avctx->priv_data;
1003 H264Context *h = &s->h;
1004 const int mb_xy = s->mb_xy;
1005 int i, header;
1006 unsigned slice_id;
1007
1008 header = get_bits(&s->gb, 8);
1009
1010 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
1011 /* TODO: what? */
1012 av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
1013 return -1;
1014 } else {
1015 int slice_bits, slice_bytes, slice_length;
1016 int length = header >> 5 & 3;
1017
1018 slice_length = show_bits(&s->gb, 8 * length);
1019 slice_bits = slice_length * 8;
1020 slice_bytes = slice_length + length - 1;
1021
1022 if (slice_bytes > get_bits_left(&s->gb)) {
1023 av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
1024 return -1;
1025 }
1026
1027 skip_bits(&s->gb, 8);
1028
1029 av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
1030 if (!s->slice_buf)
1031 return AVERROR(ENOMEM);
1032
1033 memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
1034
1035 init_get_bits(&h->gb, s->slice_buf, slice_bits);
1036
1037 if (s->watermark_key) {
1038 uint32_t header = AV_RL32(&h->gb.buffer[1]);
1039 AV_WL32(&h->gb.buffer[1], header ^ s->watermark_key);
1040 }
1041 if (length > 0) {
1042 memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
1043 }
1044 skip_bits_long(&s->gb, slice_bytes * 8);
1045 }
1046
1047 if ((slice_id = svq3_get_ue_golomb(&h->gb)) >= 3) {
1048 av_log(h->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
1049 return -1;
1050 }
1051
1052 s->pict_type = ff_h264_golomb_to_pict_type[slice_id];
1053
1054 if ((header & 0x9F) == 2) {
1055 i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
1056 get_bits(&h->gb, i);
1057 } else {
1058 skip_bits1(&h->gb);
1059 }
1060
1061 s->slice_num = get_bits(&h->gb, 8);
1062 s->qscale = get_bits(&h->gb, 5);
1063 s->adaptive_quant = get_bits1(&h->gb);
1064
1065 /* unknown fields */
1066 skip_bits1(&h->gb);
1067
1068 if (s->unknown_flag)
1069 skip_bits1(&h->gb);
1070
1071 skip_bits1(&h->gb);
1072 skip_bits(&h->gb, 2);
1073
1074 while (get_bits1(&h->gb))
1075 skip_bits(&h->gb, 8);
1076
1077 /* reset intra predictors and invalidate motion vector references */
1078 if (s->mb_x > 0) {
1079 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - 1] + 3,
1080 -1, 4 * sizeof(int8_t));
1081 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_x],
1082 -1, 8 * sizeof(int8_t) * s->mb_x);
1083 }
1084 if (s->mb_y > 0) {
1085 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_stride],
1086 -1, 8 * sizeof(int8_t) * (s->mb_width - s->mb_x));
1087
1088 if (s->mb_x > 0)
1089 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] = -1;
1090 }
1091
1092 return 0;
1093 }
1094
1095 static void init_dequant4_coeff_table(SVQ3Context *s)
1096 {
1097 int q, x;
1098 const int max_qp = 51;
1099
1100 for (q = 0; q < max_qp + 1; q++) {
1101 int shift = ff_h264_quant_div6[q] + 2;
1102 int idx = ff_h264_quant_rem6[q];
1103 for (x = 0; x < 16; x++)
1104 s->dequant4_coeff[q][(x >> 2) | ((x << 2) & 0xF)] =
1105 ((uint32_t)ff_h264_dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * 16) << shift;
1106 }
1107 }
1108
1109 static av_cold int svq3_decode_init(AVCodecContext *avctx)
1110 {
1111 SVQ3Context *s = avctx->priv_data;
1112 H264Context *h = &s->h;
1113 int m, x, y;
1114 unsigned char *extradata;
1115 unsigned char *extradata_end;
1116 unsigned int size;
1117 int marker_found = 0;
1118
1119 s->cur_pic = av_mallocz(sizeof(*s->cur_pic));
1120 s->last_pic = av_mallocz(sizeof(*s->last_pic));
1121 s->next_pic = av_mallocz(sizeof(*s->next_pic));
1122 if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1123 av_freep(&s->cur_pic);
1124 av_freep(&s->last_pic);
1125 av_freep(&s->next_pic);
1126 return AVERROR(ENOMEM);
1127 }
1128
1129 s->cur_pic->f = av_frame_alloc();
1130 s->last_pic->f = av_frame_alloc();
1131 s->next_pic->f = av_frame_alloc();
1132 if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1133 return AVERROR(ENOMEM);
1134
1135 if (ff_h264_decode_init(avctx) < 0)
1136 return -1;
1137
1138 // we will overwrite it later during decoding
1139 av_frame_free(&h->cur_pic.f);
1140
1141 ff_h264dsp_init(&s->h264dsp, 8, 1);
1142 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1143 ff_videodsp_init(&s->vdsp, 8);
1144
1145 h->sps.bit_depth_luma = 8;
1146 h->chroma_format_idc = 1;
1147
1148 ff_hpeldsp_init(&s->hdsp, avctx->flags);
1149 ff_tpeldsp_init(&s->tdsp);
1150
1151 h->flags = avctx->flags;
1152 h->picture_structure = PICT_FRAME;
1153 avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
1154 avctx->color_range = AVCOL_RANGE_JPEG;
1155
1156 h->chroma_x_shift = h->chroma_y_shift = 1;
1157
1158 s->halfpel_flag = 1;
1159 s->thirdpel_flag = 1;
1160 s->unknown_flag = 0;
1161
1162 /* prowl for the "SEQH" marker in the extradata */
1163 extradata = (unsigned char *)avctx->extradata;
1164 extradata_end = avctx->extradata + avctx->extradata_size;
1165 if (extradata) {
1166 for (m = 0; m + 8 < avctx->extradata_size; m++) {
1167 if (!memcmp(extradata, "SEQH", 4)) {
1168 marker_found = 1;
1169 break;
1170 }
1171 extradata++;
1172 }
1173 }
1174
1175 /* if a match was found, parse the extra data */
1176 if (marker_found) {
1177 GetBitContext gb;
1178 int frame_size_code;
1179
1180 size = AV_RB32(&extradata[4]);
1181 if (size > extradata_end - extradata - 8)
1182 return AVERROR_INVALIDDATA;
1183 init_get_bits(&gb, extradata + 8, size * 8);
1184
1185 /* 'frame size code' and optional 'width, height' */
1186 frame_size_code = get_bits(&gb, 3);
1187 switch (frame_size_code) {
1188 case 0:
1189 avctx->width = 160;
1190 avctx->height = 120;
1191 break;
1192 case 1:
1193 avctx->width = 128;
1194 avctx->height = 96;
1195 break;
1196 case 2:
1197 avctx->width = 176;
1198 avctx->height = 144;
1199 break;
1200 case 3:
1201 avctx->width = 352;
1202 avctx->height = 288;
1203 break;
1204 case 4:
1205 avctx->width = 704;
1206 avctx->height = 576;
1207 break;
1208 case 5:
1209 avctx->width = 240;
1210 avctx->height = 180;
1211 break;
1212 case 6:
1213 avctx->width = 320;
1214 avctx->height = 240;
1215 break;
1216 case 7:
1217 avctx->width = get_bits(&gb, 12);
1218 avctx->height = get_bits(&gb, 12);
1219 break;
1220 }
1221
1222 s->halfpel_flag = get_bits1(&gb);
1223 s->thirdpel_flag = get_bits1(&gb);
1224
1225 /* unknown fields */
1226 skip_bits1(&gb);
1227 skip_bits1(&gb);
1228 skip_bits1(&gb);
1229 skip_bits1(&gb);
1230
1231 h->low_delay = get_bits1(&gb);
1232
1233 /* unknown field */
1234 skip_bits1(&gb);
1235
1236 while (get_bits1(&gb))
1237 skip_bits(&gb, 8);
1238
1239 s->unknown_flag = get_bits1(&gb);
1240 avctx->has_b_frames = !h->low_delay;
1241 if (s->unknown_flag) {
1242 #if CONFIG_ZLIB
1243 unsigned watermark_width = svq3_get_ue_golomb(&gb);
1244 unsigned watermark_height = svq3_get_ue_golomb(&gb);
1245 int u1 = svq3_get_ue_golomb(&gb);
1246 int u2 = get_bits(&gb, 8);
1247 int u3 = get_bits(&gb, 2);
1248 int u4 = svq3_get_ue_golomb(&gb);
1249 unsigned long buf_len = watermark_width *
1250 watermark_height * 4;
1251 int offset = get_bits_count(&gb) + 7 >> 3;
1252 uint8_t *buf;
1253
1254 if (watermark_height > 0 &&
1255 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1256 return -1;
1257
1258 buf = av_malloc(buf_len);
1259 av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1260 watermark_width, watermark_height);
1261 av_log(avctx, AV_LOG_DEBUG,
1262 "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1263 u1, u2, u3, u4, offset);
1264 if (uncompress(buf, &buf_len, extradata + 8 + offset,
1265 size - offset) != Z_OK) {
1266 av_log(avctx, AV_LOG_ERROR,
1267 "could not uncompress watermark logo\n");
1268 av_free(buf);
1269 return -1;
1270 }
1271 s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1272 s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1273 av_log(avctx, AV_LOG_DEBUG,
1274 "watermark key %#"PRIx32"\n", s->watermark_key);
1275 av_free(buf);
1276 #else
1277 av_log(avctx, AV_LOG_ERROR,
1278 "this svq3 file contains watermark which need zlib support compiled in\n");
1279 return -1;
1280 #endif
1281 }
1282 }
1283
1284 s->mb_width = (avctx->width + 15) / 16;
1285 s->mb_height = (avctx->height + 15) / 16;
1286 s->mb_stride = s->mb_width + 1;
1287 s->mb_num = s->mb_width * s->mb_height;
1288 s->b_stride = 4 * s->mb_width;
1289 s->h_edge_pos = s->mb_width * 16;
1290 s->v_edge_pos = s->mb_height * 16;
1291
1292 s->intra4x4_pred_mode = av_mallocz(s->mb_stride * 2 * 8);
1293 if (!s->intra4x4_pred_mode)
1294 return AVERROR(ENOMEM);
1295
1296 s->mb2br_xy = av_mallocz(s->mb_stride * (s->mb_height + 1) *
1297 sizeof(*s->mb2br_xy));
1298 if (!s->mb2br_xy)
1299 return AVERROR(ENOMEM);
1300
1301 for (y = 0; y < s->mb_height; y++)
1302 for (x = 0; x < s->mb_width; x++) {
1303 const int mb_xy = x + y * s->mb_stride;
1304
1305 s->mb2br_xy[mb_xy] = 8 * (mb_xy % (2 * s->mb_stride));
1306 }
1307
1308 init_dequant4_coeff_table(s);
1309
1310 return 0;
1311 }
1312
1313 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1314 {
1315 int i;
1316 for (i = 0; i < 2; i++) {
1317 av_buffer_unref(&pic->motion_val_buf[i]);
1318 av_buffer_unref(&pic->ref_index_buf[i]);
1319 }
1320 av_buffer_unref(&pic->mb_type_buf);
1321
1322 av_frame_unref(pic->f);
1323 }
1324
1325 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1326 {
1327 SVQ3Context *s = avctx->priv_data;
1328 const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1;
1329 const int mb_array_size = s->mb_stride * s->mb_height;
1330 const int b4_stride = s->mb_width * 4 + 1;
1331 const int b4_array_size = b4_stride * s->mb_height * 4;
1332 int ret;
1333
1334 if (!pic->motion_val_buf[0]) {
1335 int i;
1336
1337 pic->mb_type_buf = av_buffer_allocz((big_mb_num + s->mb_stride) * sizeof(uint32_t));
1338 if (!pic->mb_type_buf)
1339 return AVERROR(ENOMEM);
1340 pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * s->mb_stride + 1;
1341
1342 for (i = 0; i < 2; i++) {
1343 pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1344 pic->ref_index_buf[i] = av_buffer_allocz(4 * mb_array_size);
1345 if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1346 ret = AVERROR(ENOMEM);
1347 goto fail;
1348 }
1349
1350 pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1351 pic->ref_index[i] = pic->ref_index_buf[i]->data;
1352 }
1353 }
1354 pic->reference = !(s->pict_type == AV_PICTURE_TYPE_B);
1355
1356 ret = ff_get_buffer(avctx, pic->f,
1357 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1358 if (ret < 0)
1359 goto fail;
1360
1361 if (!s->edge_emu_buffer) {
1362 s->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1363 if (!s->edge_emu_buffer)
1364 return AVERROR(ENOMEM);
1365 }
1366
1367 return 0;
1368 fail:
1369 free_picture(avctx, pic);
1370 return ret;
1371 }
1372
1373 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1374 int *got_frame, AVPacket *avpkt)
1375 {
1376 const uint8_t *buf = avpkt->data;
1377 SVQ3Context *s = avctx->priv_data;
1378 H264Context *h = &s->h;
1379 int buf_size = avpkt->size;
1380 int ret, m, i;
1381
1382 /* special case for last picture */
1383 if (buf_size == 0) {
1384 if (s->next_pic->f->data[0] && !h->low_delay && !s->last_frame_output) {
1385 ret = av_frame_ref(data, s->next_pic->f);
1386 if (ret < 0)
1387 return ret;
1388 s->last_frame_output = 1;
1389 *got_frame = 1;
1390 }
1391 return 0;
1392 }
1393
1394 ret = init_get_bits(&s->gb, buf, 8 * buf_size);
1395 if (ret < 0)
1396 return ret;
1397
1398 s->mb_x = s->mb_y = s->mb_xy = 0;
1399
1400 if (svq3_decode_slice_header(avctx))
1401 return -1;
1402
1403 if (s->pict_type != AV_PICTURE_TYPE_B)
1404 FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1405
1406 av_frame_unref(s->cur_pic->f);
1407
1408 /* for skipping the frame */
1409 s->cur_pic->f->pict_type = s->pict_type;
1410 s->cur_pic->f->key_frame = (s->pict_type == AV_PICTURE_TYPE_I);
1411
1412 ret = get_buffer(avctx, s->cur_pic);
1413 if (ret < 0)
1414 return ret;
1415
1416 for (i = 0; i < 16; i++) {
1417 h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1418 h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1419 }
1420 for (i = 0; i < 16; i++) {
1421 h->block_offset[16 + i] =
1422 h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1423 h->block_offset[48 + 16 + i] =
1424 h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1425 }
1426
1427 if (s->pict_type != AV_PICTURE_TYPE_I) {
1428 if (!s->last_pic->f->data[0]) {
1429 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1430 ret = get_buffer(avctx, s->last_pic);
1431 if (ret < 0)
1432 return ret;
1433 memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1434 memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1435 s->last_pic->f->linesize[1]);
1436 memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1437 s->last_pic->f->linesize[2]);
1438 }
1439
1440 if (s->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1441 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1442 ret = get_buffer(avctx, s->next_pic);
1443 if (ret < 0)
1444 return ret;
1445 memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1446 memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1447 s->next_pic->f->linesize[1]);
1448 memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1449 s->next_pic->f->linesize[2]);
1450 }
1451 }
1452
1453 if (avctx->debug & FF_DEBUG_PICT_INFO)
1454 av_log(h->avctx, AV_LOG_DEBUG,
1455 "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1456 av_get_picture_type_char(s->pict_type),
1457 s->halfpel_flag, s->thirdpel_flag,
1458 s->adaptive_quant, s->qscale, s->slice_num);
1459
1460 if (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B ||
1461 avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I ||
1462 avctx->skip_frame >= AVDISCARD_ALL)
1463 return 0;
1464
1465 if (s->next_p_frame_damaged) {
1466 if (s->pict_type == AV_PICTURE_TYPE_B)
1467 return 0;
1468 else
1469 s->next_p_frame_damaged = 0;
1470 }
1471
1472 if (s->pict_type == AV_PICTURE_TYPE_B) {
1473 h->frame_num_offset = s->slice_num - h->prev_frame_num;
1474
1475 if (h->frame_num_offset < 0)
1476 h->frame_num_offset += 256;
1477 if (h->frame_num_offset == 0 ||
1478 h->frame_num_offset >= h->prev_frame_num_offset) {
1479 av_log(h->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1480 return -1;
1481 }
1482 } else {
1483 h->prev_frame_num = h->frame_num;
1484 h->frame_num = s->slice_num;
1485 h->prev_frame_num_offset = h->frame_num - h->prev_frame_num;
1486
1487 if (h->prev_frame_num_offset < 0)
1488 h->prev_frame_num_offset += 256;
1489 }
1490
1491 for (m = 0; m < 2; m++) {
1492 int i;
1493 for (i = 0; i < 4; i++) {
1494 int j;
1495 for (j = -1; j < 4; j++)
1496 s->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1497 if (i < 3)
1498 s->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1499 }
1500 }
1501
1502 for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
1503 for (s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
1504 unsigned mb_type;
1505 s->mb_xy = s->mb_x + s->mb_y * s->mb_stride;
1506
1507 if ((get_bits_left(&h->gb)) <= 7) {
1508 if (((get_bits_count(&h->gb) & 7) == 0 ||
1509 show_bits(&h->gb, get_bits_left(&h->gb) & 7) == 0)) {
1510
1511 if (svq3_decode_slice_header(avctx))
1512 return -1;
1513 }
1514 /* TODO: support s->mb_skip_run */
1515 }
1516
1517 mb_type = svq3_get_ue_golomb(&h->gb);
1518
1519 if (s->pict_type == AV_PICTURE_TYPE_I)
1520 mb_type += 8;
1521 else if (s->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1522 mb_type += 4;
1523 if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1524 av_log(h->avctx, AV_LOG_ERROR,
1525 "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1526 return -1;
1527 }
1528
1529 if (mb_type != 0)
1530 hl_decode_mb(s, h);
1531
1532 if (s->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
1533 s->cur_pic->mb_type[s->mb_x + s->mb_y * s->mb_stride] =
1534 (s->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1535 }
1536
1537 ff_draw_horiz_band(avctx, s->cur_pic->f,
1538 s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1539 16 * s->mb_y, 16, h->picture_structure, 0,
1540 h->low_delay);
1541 }
1542
1543 if (s->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
1544 ret = av_frame_ref(data, s->cur_pic->f);
1545 else if (s->last_pic->f->data[0])
1546 ret = av_frame_ref(data, s->last_pic->f);
1547 if (ret < 0)
1548 return ret;
1549
1550 /* Do not output the last pic after seeking. */
1551 if (s->last_pic->f->data[0] || h->low_delay)
1552 *got_frame = 1;
1553
1554 if (s->pict_type != AV_PICTURE_TYPE_B) {
1555 FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1556 } else {
1557 av_frame_unref(s->cur_pic->f);
1558 }
1559
1560 return buf_size;
1561 }
1562
1563 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1564 {
1565 SVQ3Context *s = avctx->priv_data;
1566 H264Context *h = &s->h;
1567
1568 free_picture(avctx, s->cur_pic);
1569 free_picture(avctx, s->next_pic);
1570 free_picture(avctx, s->last_pic);
1571 av_frame_free(&s->cur_pic->f);
1572 av_frame_free(&s->next_pic->f);
1573 av_frame_free(&s->last_pic->f);
1574 av_freep(&s->cur_pic);
1575 av_freep(&s->next_pic);
1576 av_freep(&s->last_pic);
1577 av_freep(&s->slice_buf);
1578 av_freep(&s->intra4x4_pred_mode);
1579 av_freep(&s->edge_emu_buffer);
1580 av_freep(&s->mb2br_xy);
1581
1582 ff_h264_free_context(h);
1583
1584 return 0;
1585 }
1586
1587 AVCodec ff_svq3_decoder = {
1588 .name = "svq3",
1589 .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1590 .type = AVMEDIA_TYPE_VIDEO,
1591 .id = AV_CODEC_ID_SVQ3,
1592 .priv_data_size = sizeof(SVQ3Context),
1593 .init = svq3_decode_init,
1594 .close = svq3_decode_end,
1595 .decode = svq3_decode_frame,
1596 .capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1597 AV_CODEC_CAP_DR1 |
1598 AV_CODEC_CAP_DELAY,
1599 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1600 AV_PIX_FMT_NONE},
1601 };