a89f14c975e12391f10b133e0e22f3915ee5ebee
[libav.git] / libavcodec / svq3.c
1 /*
2 * Copyright (c) 2003 The Libav Project
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
31 *
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
37 *
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 * http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41 */
42
43 #include <inttypes.h>
44
45 #include "libavutil/attributes.h"
46 #include "internal.h"
47 #include "avcodec.h"
48 #include "mpegutils.h"
49 #include "h264.h"
50 #include "h264_mvpred.h"
51 #include "h264data.h"
52 #include "golomb.h"
53 #include "hpeldsp.h"
54 #include "mathops.h"
55 #include "rectangle.h"
56 #include "tpeldsp.h"
57
58 #if CONFIG_ZLIB
59 #include <zlib.h>
60 #endif
61
62 #include "svq1.h"
63
64 /**
65 * @file
66 * svq3 decoder.
67 */
68
69 typedef struct SVQ3Context {
70 H264Context h;
71
72 H264DSPContext h264dsp;
73 H264PredContext hpc;
74 HpelDSPContext hdsp;
75 TpelDSPContext tdsp;
76 VideoDSPContext vdsp;
77
78 H264Picture *cur_pic;
79 H264Picture *next_pic;
80 H264Picture *last_pic;
81 GetBitContext gb;
82 uint8_t *slice_buf;
83 int slice_size;
84 int halfpel_flag;
85 int thirdpel_flag;
86 int unknown_flag;
87 uint32_t watermark_key;
88 int adaptive_quant;
89 int next_p_frame_damaged;
90 int h_edge_pos;
91 int v_edge_pos;
92 int last_frame_output;
93
94 enum AVPictureType pict_type;
95
96 int mb_x, mb_y;
97 int mb_xy;
98 int mb_width, mb_height;
99 int mb_stride, mb_num;
100 int b_stride;
101
102 uint32_t *mb2br_xy;
103
104 int chroma_pred_mode;
105 int intra16x16_pred_mode;
106
107 int8_t intra4x4_pred_mode_cache[5 * 8];
108 int8_t (*intra4x4_pred_mode);
109
110 unsigned int top_samples_available;
111 unsigned int topright_samples_available;
112 unsigned int left_samples_available;
113
114 uint8_t *edge_emu_buffer;
115
116 DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
117 DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
118 uint32_t dequant4_coeff[QP_MAX_NUM + 1][16];
119 } SVQ3Context;
120
121 #define FULLPEL_MODE 1
122 #define HALFPEL_MODE 2
123 #define THIRDPEL_MODE 3
124 #define PREDICT_MODE 4
125
126 /* dual scan (from some older h264 draft)
127 * o-->o-->o o
128 * | /|
129 * o o o / o
130 * | / | |/ |
131 * o o o o
132 * /
133 * o-->o-->o-->o
134 */
135 static const uint8_t svq3_scan[16] = {
136 0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
137 2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
138 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
139 0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
140 };
141
142 static const uint8_t luma_dc_zigzag_scan[16] = {
143 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
144 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
145 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
146 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
147 };
148
149 static const uint8_t svq3_pred_0[25][2] = {
150 { 0, 0 },
151 { 1, 0 }, { 0, 1 },
152 { 0, 2 }, { 1, 1 }, { 2, 0 },
153 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
154 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
155 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
156 { 2, 4 }, { 3, 3 }, { 4, 2 },
157 { 4, 3 }, { 3, 4 },
158 { 4, 4 }
159 };
160
161 static const int8_t svq3_pred_1[6][6][5] = {
162 { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
163 { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
164 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
165 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
166 { { 2, 0, -1, -1, -1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
167 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
168 { { 2, 0, -1, -1, -1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
169 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
170 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
171 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
172 { { 0, 2, -1, -1, -1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
173 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
174 };
175
176 static const struct {
177 uint8_t run;
178 uint8_t level;
179 } svq3_dct_tables[2][16] = {
180 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
181 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
182 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
183 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
184 };
185
186 static const uint32_t svq3_dequant_coeff[32] = {
187 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
188 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
189 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
190 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
191 };
192
193 static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
194 {
195 const int qmul = svq3_dequant_coeff[qp];
196 #define stride 16
197 int i;
198 int temp[16];
199 static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
200
201 for (i = 0; i < 4; i++) {
202 const int z0 = 13 * (input[4 * i + 0] + input[4 * i + 2]);
203 const int z1 = 13 * (input[4 * i + 0] - input[4 * i + 2]);
204 const int z2 = 7 * input[4 * i + 1] - 17 * input[4 * i + 3];
205 const int z3 = 17 * input[4 * i + 1] + 7 * input[4 * i + 3];
206
207 temp[4 * i + 0] = z0 + z3;
208 temp[4 * i + 1] = z1 + z2;
209 temp[4 * i + 2] = z1 - z2;
210 temp[4 * i + 3] = z0 - z3;
211 }
212
213 for (i = 0; i < 4; i++) {
214 const int offset = x_offset[i];
215 const int z0 = 13 * (temp[4 * 0 + i] + temp[4 * 2 + i]);
216 const int z1 = 13 * (temp[4 * 0 + i] - temp[4 * 2 + i]);
217 const int z2 = 7 * temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
218 const int z3 = 17 * temp[4 * 1 + i] + 7 * temp[4 * 3 + i];
219
220 output[stride * 0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
221 output[stride * 2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
222 output[stride * 8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
223 output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
224 }
225 }
226 #undef stride
227
228 static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
229 int stride, int qp, int dc)
230 {
231 const int qmul = svq3_dequant_coeff[qp];
232 int i;
233
234 if (dc) {
235 dc = 13 * 13 * (dc == 1 ? 1538 * block[0]
236 : qmul * (block[0] >> 3) / 2);
237 block[0] = 0;
238 }
239
240 for (i = 0; i < 4; i++) {
241 const int z0 = 13 * (block[0 + 4 * i] + block[2 + 4 * i]);
242 const int z1 = 13 * (block[0 + 4 * i] - block[2 + 4 * i]);
243 const int z2 = 7 * block[1 + 4 * i] - 17 * block[3 + 4 * i];
244 const int z3 = 17 * block[1 + 4 * i] + 7 * block[3 + 4 * i];
245
246 block[0 + 4 * i] = z0 + z3;
247 block[1 + 4 * i] = z1 + z2;
248 block[2 + 4 * i] = z1 - z2;
249 block[3 + 4 * i] = z0 - z3;
250 }
251
252 for (i = 0; i < 4; i++) {
253 const int z0 = 13 * (block[i + 4 * 0] + block[i + 4 * 2]);
254 const int z1 = 13 * (block[i + 4 * 0] - block[i + 4 * 2]);
255 const int z2 = 7 * block[i + 4 * 1] - 17 * block[i + 4 * 3];
256 const int z3 = 17 * block[i + 4 * 1] + 7 * block[i + 4 * 3];
257 const int rr = (dc + 0x80000);
258
259 dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
260 dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
261 dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
262 dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
263 }
264
265 memset(block, 0, 16 * sizeof(int16_t));
266 }
267
268 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
269 int index, const int type)
270 {
271 static const uint8_t *const scan_patterns[4] = {
272 luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
273 };
274
275 int run, level, limit;
276 unsigned vlc;
277 const int intra = 3 * type >> 2;
278 const uint8_t *const scan = scan_patterns[type];
279
280 for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
281 for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
282 int sign = (vlc & 1) ? 0 : -1;
283 vlc = vlc + 1 >> 1;
284
285 if (type == 3) {
286 if (vlc < 3) {
287 run = 0;
288 level = vlc;
289 } else if (vlc < 4) {
290 run = 1;
291 level = 1;
292 } else {
293 run = vlc & 0x3;
294 level = (vlc + 9 >> 2) - run;
295 }
296 } else {
297 if (vlc < 16) {
298 run = svq3_dct_tables[intra][vlc].run;
299 level = svq3_dct_tables[intra][vlc].level;
300 } else if (intra) {
301 run = vlc & 0x7;
302 level = (vlc >> 3) +
303 ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
304 } else {
305 run = vlc & 0xF;
306 level = (vlc >> 4) +
307 ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
308 }
309 }
310
311 if ((index += run) >= limit)
312 return -1;
313
314 block[scan[index]] = (level ^ sign) - sign;
315 }
316
317 if (type != 2) {
318 break;
319 }
320 }
321
322 return 0;
323 }
324
325 static av_always_inline int
326 svq3_fetch_diagonal_mv(const SVQ3Context *s, const int16_t **C,
327 int i, int list, int part_width)
328 {
329 const int topright_ref = s->ref_cache[list][i - 8 + part_width];
330
331 if (topright_ref != PART_NOT_AVAILABLE) {
332 *C = s->mv_cache[list][i - 8 + part_width];
333 return topright_ref;
334 } else {
335 *C = s->mv_cache[list][i - 8 - 1];
336 return s->ref_cache[list][i - 8 - 1];
337 }
338 }
339
340 /**
341 * Get the predicted MV.
342 * @param n the block index
343 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
344 * @param mx the x component of the predicted motion vector
345 * @param my the y component of the predicted motion vector
346 */
347 static av_always_inline void svq3_pred_motion(const SVQ3Context *s, int n,
348 int part_width, int list,
349 int ref, int *const mx, int *const my)
350 {
351 const int index8 = scan8[n];
352 const int top_ref = s->ref_cache[list][index8 - 8];
353 const int left_ref = s->ref_cache[list][index8 - 1];
354 const int16_t *const A = s->mv_cache[list][index8 - 1];
355 const int16_t *const B = s->mv_cache[list][index8 - 8];
356 const int16_t *C;
357 int diagonal_ref, match_count;
358
359 /* mv_cache
360 * B . . A T T T T
361 * U . . L . . , .
362 * U . . L . . . .
363 * U . . L . . , .
364 * . . . L . . . .
365 */
366
367 diagonal_ref = svq3_fetch_diagonal_mv(s, &C, index8, list, part_width);
368 match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
369 if (match_count > 1) { //most common
370 *mx = mid_pred(A[0], B[0], C[0]);
371 *my = mid_pred(A[1], B[1], C[1]);
372 } else if (match_count == 1) {
373 if (left_ref == ref) {
374 *mx = A[0];
375 *my = A[1];
376 } else if (top_ref == ref) {
377 *mx = B[0];
378 *my = B[1];
379 } else {
380 *mx = C[0];
381 *my = C[1];
382 }
383 } else {
384 if (top_ref == PART_NOT_AVAILABLE &&
385 diagonal_ref == PART_NOT_AVAILABLE &&
386 left_ref != PART_NOT_AVAILABLE) {
387 *mx = A[0];
388 *my = A[1];
389 } else {
390 *mx = mid_pred(A[0], B[0], C[0]);
391 *my = mid_pred(A[1], B[1], C[1]);
392 }
393 }
394 }
395
396 static inline void svq3_mc_dir_part(SVQ3Context *s,
397 int x, int y, int width, int height,
398 int mx, int my, int dxy,
399 int thirdpel, int dir, int avg)
400 {
401 H264Context *h = &s->h;
402 H264SliceContext *sl = &h->slice_ctx[0];
403 const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
404 uint8_t *src, *dest;
405 int i, emu = 0;
406 int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
407
408 mx += x;
409 my += y;
410
411 if (mx < 0 || mx >= s->h_edge_pos - width - 1 ||
412 my < 0 || my >= s->v_edge_pos - height - 1) {
413 emu = 1;
414 mx = av_clip(mx, -16, s->h_edge_pos - width + 15);
415 my = av_clip(my, -16, s->v_edge_pos - height + 15);
416 }
417
418 /* form component predictions */
419 dest = h->cur_pic.f->data[0] + x + y * sl->linesize;
420 src = pic->f->data[0] + mx + my * sl->linesize;
421
422 if (emu) {
423 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
424 sl->linesize, sl->linesize,
425 width + 1, height + 1,
426 mx, my, s->h_edge_pos, s->v_edge_pos);
427 src = s->edge_emu_buffer;
428 }
429 if (thirdpel)
430 (avg ? s->tdsp.avg_tpel_pixels_tab
431 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, sl->linesize,
432 width, height);
433 else
434 (avg ? s->hdsp.avg_pixels_tab
435 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, sl->linesize,
436 height);
437
438 if (!(h->flags & AV_CODEC_FLAG_GRAY)) {
439 mx = mx + (mx < (int) x) >> 1;
440 my = my + (my < (int) y) >> 1;
441 width = width >> 1;
442 height = height >> 1;
443 blocksize++;
444
445 for (i = 1; i < 3; i++) {
446 dest = h->cur_pic.f->data[i] + (x >> 1) + (y >> 1) * sl->uvlinesize;
447 src = pic->f->data[i] + mx + my * sl->uvlinesize;
448
449 if (emu) {
450 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
451 sl->uvlinesize, sl->uvlinesize,
452 width + 1, height + 1,
453 mx, my, (s->h_edge_pos >> 1),
454 s->v_edge_pos >> 1);
455 src = s->edge_emu_buffer;
456 }
457 if (thirdpel)
458 (avg ? s->tdsp.avg_tpel_pixels_tab
459 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
460 sl->uvlinesize,
461 width, height);
462 else
463 (avg ? s->hdsp.avg_pixels_tab
464 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
465 sl->uvlinesize,
466 height);
467 }
468 }
469 }
470
471 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
472 int dir, int avg)
473 {
474 int i, j, k, mx, my, dx, dy, x, y;
475 H264Context *h = &s->h;
476 const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
477 const int part_height = 16 >> ((unsigned)(size + 1) / 3);
478 const int extra_width = (mode == PREDICT_MODE) ? -16 * 6 : 0;
479 const int h_edge_pos = 6 * (s->h_edge_pos - part_width) - extra_width;
480 const int v_edge_pos = 6 * (s->v_edge_pos - part_height) - extra_width;
481
482 for (i = 0; i < 16; i += part_height)
483 for (j = 0; j < 16; j += part_width) {
484 const int b_xy = (4 * s->mb_x + (j >> 2)) +
485 (4 * s->mb_y + (i >> 2)) * s->b_stride;
486 int dxy;
487 x = 16 * s->mb_x + j;
488 y = 16 * s->mb_y + i;
489 k = (j >> 2 & 1) + (i >> 1 & 2) +
490 (j >> 1 & 4) + (i & 8);
491
492 if (mode != PREDICT_MODE) {
493 svq3_pred_motion(s, k, part_width >> 2, dir, 1, &mx, &my);
494 } else {
495 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
496 my = s->next_pic->motion_val[0][b_xy][1] << 1;
497
498 if (dir == 0) {
499 mx = mx * h->frame_num_offset /
500 h->prev_frame_num_offset + 1 >> 1;
501 my = my * h->frame_num_offset /
502 h->prev_frame_num_offset + 1 >> 1;
503 } else {
504 mx = mx * (h->frame_num_offset - h->prev_frame_num_offset) /
505 h->prev_frame_num_offset + 1 >> 1;
506 my = my * (h->frame_num_offset - h->prev_frame_num_offset) /
507 h->prev_frame_num_offset + 1 >> 1;
508 }
509 }
510
511 /* clip motion vector prediction to frame border */
512 mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
513 my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
514
515 /* get (optional) motion vector differential */
516 if (mode == PREDICT_MODE) {
517 dx = dy = 0;
518 } else {
519 dy = svq3_get_se_golomb(&h->gb);
520 dx = svq3_get_se_golomb(&h->gb);
521
522 if (dx == INVALID_VLC || dy == INVALID_VLC) {
523 av_log(h->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
524 return -1;
525 }
526 }
527
528 /* compute motion vector */
529 if (mode == THIRDPEL_MODE) {
530 int fx, fy;
531 mx = (mx + 1 >> 1) + dx;
532 my = (my + 1 >> 1) + dy;
533 fx = (unsigned)(mx + 0x3000) / 3 - 0x1000;
534 fy = (unsigned)(my + 0x3000) / 3 - 0x1000;
535 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
536
537 svq3_mc_dir_part(s, x, y, part_width, part_height,
538 fx, fy, dxy, 1, dir, avg);
539 mx += mx;
540 my += my;
541 } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
542 mx = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
543 my = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
544 dxy = (mx & 1) + 2 * (my & 1);
545
546 svq3_mc_dir_part(s, x, y, part_width, part_height,
547 mx >> 1, my >> 1, dxy, 0, dir, avg);
548 mx *= 3;
549 my *= 3;
550 } else {
551 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
552 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
553
554 svq3_mc_dir_part(s, x, y, part_width, part_height,
555 mx, my, 0, 0, dir, avg);
556 mx *= 6;
557 my *= 6;
558 }
559
560 /* update mv_cache */
561 if (mode != PREDICT_MODE) {
562 int32_t mv = pack16to32(mx, my);
563
564 if (part_height == 8 && i < 8) {
565 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 * 8], mv);
566
567 if (part_width == 8 && j < 8)
568 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
569 }
570 if (part_width == 8 && j < 8)
571 AV_WN32A(s->mv_cache[dir][scan8[k] + 1], mv);
572 if (part_width == 4 || part_height == 4)
573 AV_WN32A(s->mv_cache[dir][scan8[k]], mv);
574 }
575
576 /* write back motion vectors */
577 fill_rectangle(h->cur_pic.motion_val[dir][b_xy],
578 part_width >> 2, part_height >> 2, s->b_stride,
579 pack16to32(mx, my), 4);
580 }
581
582 return 0;
583 }
584
585 static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl,
586 int mb_type, const int *block_offset,
587 int linesize, uint8_t *dest_y)
588 {
589 int i;
590 if (!IS_INTRA4x4(mb_type)) {
591 for (i = 0; i < 16; i++)
592 if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
593 uint8_t *const ptr = dest_y + block_offset[i];
594 svq3_add_idct_c(ptr, sl->mb + i * 16, linesize,
595 sl->qscale, IS_INTRA(mb_type) ? 1 : 0);
596 }
597 }
598 }
599
600 static av_always_inline int dctcoef_get(int16_t *mb, int index)
601 {
602 return AV_RN16A(mb + index);
603 }
604
605 static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
606 const H264Context *h,
607 H264SliceContext *sl,
608 int mb_type,
609 const int *block_offset,
610 int linesize,
611 uint8_t *dest_y)
612 {
613 int i;
614 int qscale = sl->qscale;
615
616 if (IS_INTRA4x4(mb_type)) {
617 for (i = 0; i < 16; i++) {
618 uint8_t *const ptr = dest_y + block_offset[i];
619 const int dir = s->intra4x4_pred_mode_cache[scan8[i]];
620
621 uint8_t *topright;
622 int nnz, tr;
623 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
624 const int topright_avail = (s->topright_samples_available << i) & 0x8000;
625 assert(s->mb_y || linesize <= block_offset[i]);
626 if (!topright_avail) {
627 tr = ptr[3 - linesize] * 0x01010101u;
628 topright = (uint8_t *)&tr;
629 } else
630 topright = ptr + 4 - linesize;
631 } else
632 topright = NULL;
633
634 s->hpc.pred4x4[dir](ptr, topright, linesize);
635 nnz = sl->non_zero_count_cache[scan8[i]];
636 if (nnz) {
637 svq3_add_idct_c(ptr, sl->mb + i * 16, linesize, qscale, 0);
638 }
639 }
640 } else {
641 s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
642 svq3_luma_dc_dequant_idct_c(sl->mb, sl->mb_luma_dc[0], qscale);
643 }
644 }
645
646 static void hl_decode_mb(SVQ3Context *s, const H264Context *h, H264SliceContext *sl)
647 {
648 const int mb_x = s->mb_x;
649 const int mb_y = s->mb_y;
650 const int mb_xy = s->mb_xy;
651 const int mb_type = h->cur_pic.mb_type[mb_xy];
652 uint8_t *dest_y, *dest_cb, *dest_cr;
653 int linesize, uvlinesize;
654 int i, j;
655 const int *block_offset = &h->block_offset[0];
656 const int block_h = 16 >> h->chroma_y_shift;
657
658 dest_y = h->cur_pic.f->data[0] + (mb_x + mb_y * sl->linesize) * 16;
659 dest_cb = h->cur_pic.f->data[1] + mb_x * 8 + mb_y * sl->uvlinesize * block_h;
660 dest_cr = h->cur_pic.f->data[2] + mb_x * 8 + mb_y * sl->uvlinesize * block_h;
661
662 s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * sl->linesize + 64, sl->linesize, 4);
663 s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * sl->uvlinesize + 64, dest_cr - dest_cb, 2);
664
665 linesize = sl->mb_linesize = sl->linesize;
666 uvlinesize = sl->mb_uvlinesize = sl->uvlinesize;
667
668 if (IS_INTRA(mb_type)) {
669 s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
670 s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
671
672 hl_decode_mb_predict_luma(s, h, sl, mb_type, block_offset, linesize, dest_y);
673 }
674
675 hl_decode_mb_idct_luma(h, sl, mb_type, block_offset, linesize, dest_y);
676
677 if (sl->cbp & 0x30) {
678 uint8_t *dest[2] = { dest_cb, dest_cr };
679 s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 1,
680 s->dequant4_coeff[sl->chroma_qp[0]][0]);
681 s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 2,
682 s->dequant4_coeff[sl->chroma_qp[1]][0]);
683 for (j = 1; j < 3; j++) {
684 for (i = j * 16; i < j * 16 + 4; i++)
685 if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
686 uint8_t *const ptr = dest[j - 1] + block_offset[i];
687 svq3_add_idct_c(ptr, sl->mb + i * 16,
688 uvlinesize, ff_h264_chroma_qp[0][sl->qscale + 12] - 12, 2);
689 }
690 }
691 }
692 }
693
694 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
695 {
696 H264Context *h = &s->h;
697 H264SliceContext *sl = &h->slice_ctx[0];
698 int i, j, k, m, dir, mode;
699 int cbp = 0;
700 uint32_t vlc;
701 int8_t *top, *left;
702 const int mb_xy = s->mb_xy;
703 const int b_xy = 4 * s->mb_x + 4 * s->mb_y * s->b_stride;
704
705 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
706 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
707 s->topright_samples_available = 0xFFFF;
708
709 if (mb_type == 0) { /* SKIP */
710 if (s->pict_type == AV_PICTURE_TYPE_P ||
711 s->next_pic->mb_type[mb_xy] == -1) {
712 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
713 0, 0, 0, 0, 0, 0);
714
715 if (s->pict_type == AV_PICTURE_TYPE_B)
716 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
717 0, 0, 0, 0, 1, 1);
718
719 mb_type = MB_TYPE_SKIP;
720 } else {
721 mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
722 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
723 return -1;
724 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
725 return -1;
726
727 mb_type = MB_TYPE_16x16;
728 }
729 } else if (mb_type < 8) { /* INTER */
730 if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&h->gb))
731 mode = THIRDPEL_MODE;
732 else if (s->halfpel_flag &&
733 s->thirdpel_flag == !get_bits1(&h->gb))
734 mode = HALFPEL_MODE;
735 else
736 mode = FULLPEL_MODE;
737
738 /* fill caches */
739 /* note ref_cache should contain here:
740 * ????????
741 * ???11111
742 * N??11111
743 * N??11111
744 * N??11111
745 */
746
747 for (m = 0; m < 2; m++) {
748 if (s->mb_x > 0 && s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6] != -1) {
749 for (i = 0; i < 4; i++)
750 AV_COPY32(s->mv_cache[m][scan8[0] - 1 + i * 8],
751 h->cur_pic.motion_val[m][b_xy - 1 + i * s->b_stride]);
752 } else {
753 for (i = 0; i < 4; i++)
754 AV_ZERO32(s->mv_cache[m][scan8[0] - 1 + i * 8]);
755 }
756 if (s->mb_y > 0) {
757 memcpy(s->mv_cache[m][scan8[0] - 1 * 8],
758 h->cur_pic.motion_val[m][b_xy - s->b_stride],
759 4 * 2 * sizeof(int16_t));
760 memset(&s->ref_cache[m][scan8[0] - 1 * 8],
761 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
762
763 if (s->mb_x < s->mb_width - 1) {
764 AV_COPY32(s->mv_cache[m][scan8[0] + 4 - 1 * 8],
765 h->cur_pic.motion_val[m][b_xy - s->b_stride + 4]);
766 s->ref_cache[m][scan8[0] + 4 - 1 * 8] =
767 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride + 1] + 6] == -1 ||
768 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
769 } else
770 s->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
771 if (s->mb_x > 0) {
772 AV_COPY32(s->mv_cache[m][scan8[0] - 1 - 1 * 8],
773 h->cur_pic.motion_val[m][b_xy - s->b_stride - 1]);
774 s->ref_cache[m][scan8[0] - 1 - 1 * 8] =
775 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
776 } else
777 s->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
778 } else
779 memset(&s->ref_cache[m][scan8[0] - 1 * 8 - 1],
780 PART_NOT_AVAILABLE, 8);
781
782 if (s->pict_type != AV_PICTURE_TYPE_B)
783 break;
784 }
785
786 /* decode motion vector(s) and form prediction(s) */
787 if (s->pict_type == AV_PICTURE_TYPE_P) {
788 if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
789 return -1;
790 } else { /* AV_PICTURE_TYPE_B */
791 if (mb_type != 2) {
792 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
793 return -1;
794 } else {
795 for (i = 0; i < 4; i++)
796 memset(h->cur_pic.motion_val[0][b_xy + i * s->b_stride],
797 0, 4 * 2 * sizeof(int16_t));
798 }
799 if (mb_type != 1) {
800 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
801 return -1;
802 } else {
803 for (i = 0; i < 4; i++)
804 memset(h->cur_pic.motion_val[1][b_xy + i * s->b_stride],
805 0, 4 * 2 * sizeof(int16_t));
806 }
807 }
808
809 mb_type = MB_TYPE_16x16;
810 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
811 int8_t *i4x4 = s->intra4x4_pred_mode + s->mb2br_xy[s->mb_xy];
812 int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
813
814 memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
815
816 if (mb_type == 8) {
817 if (s->mb_x > 0) {
818 for (i = 0; i < 4; i++)
819 s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6 - i];
820 if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
821 s->left_samples_available = 0x5F5F;
822 }
823 if (s->mb_y > 0) {
824 s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 0];
825 s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 1];
826 s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 2];
827 s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 3];
828
829 if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
830 s->top_samples_available = 0x33FF;
831 }
832
833 /* decode prediction codes for luma blocks */
834 for (i = 0; i < 16; i += 2) {
835 vlc = svq3_get_ue_golomb(&h->gb);
836
837 if (vlc >= 25) {
838 av_log(h->avctx, AV_LOG_ERROR,
839 "luma prediction:%"PRIu32"\n", vlc);
840 return -1;
841 }
842
843 left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
844 top = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
845
846 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
847 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
848
849 if (left[1] == -1 || left[2] == -1) {
850 av_log(h->avctx, AV_LOG_ERROR, "weird prediction\n");
851 return -1;
852 }
853 }
854 } else { /* mb_type == 33, DC_128_PRED block type */
855 for (i = 0; i < 4; i++)
856 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
857 }
858
859 AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
860 i4x4[4] = i4x4_cache[7 + 8 * 3];
861 i4x4[5] = i4x4_cache[7 + 8 * 2];
862 i4x4[6] = i4x4_cache[7 + 8 * 1];
863
864 if (mb_type == 8) {
865 ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
866 h->avctx, s->top_samples_available,
867 s->left_samples_available);
868
869 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
870 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
871 } else {
872 for (i = 0; i < 4; i++)
873 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
874
875 s->top_samples_available = 0x33FF;
876 s->left_samples_available = 0x5F5F;
877 }
878
879 mb_type = MB_TYPE_INTRA4x4;
880 } else { /* INTRA16x16 */
881 dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
882 dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
883
884 if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
885 s->left_samples_available, dir, 0)) < 0) {
886 av_log(h->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
887 return s->intra16x16_pred_mode;
888 }
889
890 cbp = ff_h264_i_mb_type_info[mb_type - 8].cbp;
891 mb_type = MB_TYPE_INTRA16x16;
892 }
893
894 if (!IS_INTER(mb_type) && s->pict_type != AV_PICTURE_TYPE_I) {
895 for (i = 0; i < 4; i++)
896 memset(h->cur_pic.motion_val[0][b_xy + i * s->b_stride],
897 0, 4 * 2 * sizeof(int16_t));
898 if (s->pict_type == AV_PICTURE_TYPE_B) {
899 for (i = 0; i < 4; i++)
900 memset(h->cur_pic.motion_val[1][b_xy + i * s->b_stride],
901 0, 4 * 2 * sizeof(int16_t));
902 }
903 }
904 if (!IS_INTRA4x4(mb_type)) {
905 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy], DC_PRED, 8);
906 }
907 if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
908 memset(sl->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
909 }
910
911 if (!IS_INTRA16x16(mb_type) &&
912 (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
913 if ((vlc = svq3_get_ue_golomb(&h->gb)) >= 48) {
914 av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
915 return -1;
916 }
917
918 cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
919 : ff_h264_golomb_to_inter_cbp[vlc];
920 }
921 if (IS_INTRA16x16(mb_type) ||
922 (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
923 sl->qscale += svq3_get_se_golomb(&h->gb);
924
925 if (sl->qscale > 31u) {
926 av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", sl->qscale);
927 return -1;
928 }
929 }
930 if (IS_INTRA16x16(mb_type)) {
931 AV_ZERO128(sl->mb_luma_dc[0] + 0);
932 AV_ZERO128(sl->mb_luma_dc[0] + 8);
933 if (svq3_decode_block(&h->gb, sl->mb_luma_dc[0], 0, 1)) {
934 av_log(h->avctx, AV_LOG_ERROR,
935 "error while decoding intra luma dc\n");
936 return -1;
937 }
938 }
939
940 if (cbp) {
941 const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
942 const int type = ((sl->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
943
944 for (i = 0; i < 4; i++)
945 if ((cbp & (1 << i))) {
946 for (j = 0; j < 4; j++) {
947 k = index ? (1 * (j & 1) + 2 * (i & 1) +
948 2 * (j & 2) + 4 * (i & 2))
949 : (4 * i + j);
950 sl->non_zero_count_cache[scan8[k]] = 1;
951
952 if (svq3_decode_block(&h->gb, &sl->mb[16 * k], index, type)) {
953 av_log(h->avctx, AV_LOG_ERROR,
954 "error while decoding block\n");
955 return -1;
956 }
957 }
958 }
959
960 if ((cbp & 0x30)) {
961 for (i = 1; i < 3; ++i)
962 if (svq3_decode_block(&h->gb, &sl->mb[16 * 16 * i], 0, 3)) {
963 av_log(h->avctx, AV_LOG_ERROR,
964 "error while decoding chroma dc block\n");
965 return -1;
966 }
967
968 if ((cbp & 0x20)) {
969 for (i = 1; i < 3; i++) {
970 for (j = 0; j < 4; j++) {
971 k = 16 * i + j;
972 sl->non_zero_count_cache[scan8[k]] = 1;
973
974 if (svq3_decode_block(&h->gb, &sl->mb[16 * k], 1, 1)) {
975 av_log(h->avctx, AV_LOG_ERROR,
976 "error while decoding chroma ac block\n");
977 return -1;
978 }
979 }
980 }
981 }
982 }
983 }
984
985 sl->cbp = cbp;
986 h->cur_pic.mb_type[mb_xy] = mb_type;
987
988 if (IS_INTRA(mb_type))
989 s->chroma_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
990 s->left_samples_available, DC_PRED8x8, 1);
991
992 return 0;
993 }
994
995 static int svq3_decode_slice_header(AVCodecContext *avctx)
996 {
997 SVQ3Context *s = avctx->priv_data;
998 H264Context *h = &s->h;
999 H264SliceContext *sl = &h->slice_ctx[0];
1000 const int mb_xy = s->mb_xy;
1001 int i, header;
1002 unsigned slice_id;
1003
1004 header = get_bits(&s->gb, 8);
1005
1006 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
1007 /* TODO: what? */
1008 av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
1009 return -1;
1010 } else {
1011 int slice_bits, slice_bytes, slice_length;
1012 int length = header >> 5 & 3;
1013
1014 slice_length = show_bits(&s->gb, 8 * length);
1015 slice_bits = slice_length * 8;
1016 slice_bytes = slice_length + length - 1;
1017
1018 if (slice_bytes > get_bits_left(&s->gb)) {
1019 av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
1020 return -1;
1021 }
1022
1023 skip_bits(&s->gb, 8);
1024
1025 av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
1026 if (!s->slice_buf)
1027 return AVERROR(ENOMEM);
1028
1029 memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
1030
1031 init_get_bits(&h->gb, s->slice_buf, slice_bits);
1032
1033 if (s->watermark_key) {
1034 uint32_t header = AV_RL32(&h->gb.buffer[1]);
1035 AV_WL32(&h->gb.buffer[1], header ^ s->watermark_key);
1036 }
1037 if (length > 0) {
1038 memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
1039 }
1040 skip_bits_long(&s->gb, slice_bytes * 8);
1041 }
1042
1043 if ((slice_id = svq3_get_ue_golomb(&h->gb)) >= 3) {
1044 av_log(h->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
1045 return -1;
1046 }
1047
1048 s->pict_type = ff_h264_golomb_to_pict_type[slice_id];
1049
1050 if ((header & 0x9F) == 2) {
1051 i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
1052 sl->mb_skip_run = get_bits(&h->gb, i) -
1053 (s->mb_y * s->mb_width + s->mb_x);
1054 } else {
1055 skip_bits1(&h->gb);
1056 sl->mb_skip_run = 0;
1057 }
1058
1059 sl->slice_num = get_bits(&h->gb, 8);
1060 sl->qscale = get_bits(&h->gb, 5);
1061 s->adaptive_quant = get_bits1(&h->gb);
1062
1063 /* unknown fields */
1064 skip_bits1(&h->gb);
1065
1066 if (s->unknown_flag)
1067 skip_bits1(&h->gb);
1068
1069 skip_bits1(&h->gb);
1070 skip_bits(&h->gb, 2);
1071
1072 while (get_bits1(&h->gb))
1073 skip_bits(&h->gb, 8);
1074
1075 /* reset intra predictors and invalidate motion vector references */
1076 if (s->mb_x > 0) {
1077 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - 1] + 3,
1078 -1, 4 * sizeof(int8_t));
1079 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_x],
1080 -1, 8 * sizeof(int8_t) * s->mb_x);
1081 }
1082 if (s->mb_y > 0) {
1083 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_stride],
1084 -1, 8 * sizeof(int8_t) * (s->mb_width - s->mb_x));
1085
1086 if (s->mb_x > 0)
1087 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] = -1;
1088 }
1089
1090 return 0;
1091 }
1092
1093 static void init_dequant4_coeff_table(SVQ3Context *s)
1094 {
1095 int q, x;
1096 const int max_qp = 51;
1097
1098 for (q = 0; q < max_qp + 1; q++) {
1099 int shift = ff_h264_quant_div6[q] + 2;
1100 int idx = ff_h264_quant_rem6[q];
1101 for (x = 0; x < 16; x++)
1102 s->dequant4_coeff[q][(x >> 2) | ((x << 2) & 0xF)] =
1103 ((uint32_t)ff_h264_dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * 16) << shift;
1104 }
1105 }
1106
1107 static av_cold int svq3_decode_init(AVCodecContext *avctx)
1108 {
1109 SVQ3Context *s = avctx->priv_data;
1110 H264Context *h = &s->h;
1111 H264SliceContext *sl;
1112 int m, x, y;
1113 unsigned char *extradata;
1114 unsigned char *extradata_end;
1115 unsigned int size;
1116 int marker_found = 0;
1117
1118 s->cur_pic = av_mallocz(sizeof(*s->cur_pic));
1119 s->last_pic = av_mallocz(sizeof(*s->last_pic));
1120 s->next_pic = av_mallocz(sizeof(*s->next_pic));
1121 if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1122 av_freep(&s->cur_pic);
1123 av_freep(&s->last_pic);
1124 av_freep(&s->next_pic);
1125 return AVERROR(ENOMEM);
1126 }
1127
1128 s->cur_pic->f = av_frame_alloc();
1129 s->last_pic->f = av_frame_alloc();
1130 s->next_pic->f = av_frame_alloc();
1131 if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1132 return AVERROR(ENOMEM);
1133
1134 if (ff_h264_decode_init(avctx) < 0)
1135 return -1;
1136
1137 // we will overwrite it later during decoding
1138 av_frame_free(&h->cur_pic.f);
1139
1140 ff_h264dsp_init(&s->h264dsp, 8, 1);
1141 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1142 ff_videodsp_init(&s->vdsp, 8);
1143
1144 h->sps.bit_depth_luma = 8;
1145 h->chroma_format_idc = 1;
1146
1147 ff_hpeldsp_init(&s->hdsp, avctx->flags);
1148 ff_tpeldsp_init(&s->tdsp);
1149
1150 sl = h->slice_ctx;
1151
1152 h->flags = avctx->flags;
1153 sl->is_complex = 1;
1154 h->picture_structure = PICT_FRAME;
1155 avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
1156 avctx->color_range = AVCOL_RANGE_JPEG;
1157
1158 h->slice_ctx[0].chroma_qp[0] = h->slice_ctx[0].chroma_qp[1] = 4;
1159 h->chroma_x_shift = h->chroma_y_shift = 1;
1160
1161 s->halfpel_flag = 1;
1162 s->thirdpel_flag = 1;
1163 s->unknown_flag = 0;
1164
1165 /* prowl for the "SEQH" marker in the extradata */
1166 extradata = (unsigned char *)avctx->extradata;
1167 extradata_end = avctx->extradata + avctx->extradata_size;
1168 if (extradata) {
1169 for (m = 0; m + 8 < avctx->extradata_size; m++) {
1170 if (!memcmp(extradata, "SEQH", 4)) {
1171 marker_found = 1;
1172 break;
1173 }
1174 extradata++;
1175 }
1176 }
1177
1178 /* if a match was found, parse the extra data */
1179 if (marker_found) {
1180 GetBitContext gb;
1181 int frame_size_code;
1182
1183 size = AV_RB32(&extradata[4]);
1184 if (size > extradata_end - extradata - 8)
1185 return AVERROR_INVALIDDATA;
1186 init_get_bits(&gb, extradata + 8, size * 8);
1187
1188 /* 'frame size code' and optional 'width, height' */
1189 frame_size_code = get_bits(&gb, 3);
1190 switch (frame_size_code) {
1191 case 0:
1192 avctx->width = 160;
1193 avctx->height = 120;
1194 break;
1195 case 1:
1196 avctx->width = 128;
1197 avctx->height = 96;
1198 break;
1199 case 2:
1200 avctx->width = 176;
1201 avctx->height = 144;
1202 break;
1203 case 3:
1204 avctx->width = 352;
1205 avctx->height = 288;
1206 break;
1207 case 4:
1208 avctx->width = 704;
1209 avctx->height = 576;
1210 break;
1211 case 5:
1212 avctx->width = 240;
1213 avctx->height = 180;
1214 break;
1215 case 6:
1216 avctx->width = 320;
1217 avctx->height = 240;
1218 break;
1219 case 7:
1220 avctx->width = get_bits(&gb, 12);
1221 avctx->height = get_bits(&gb, 12);
1222 break;
1223 }
1224
1225 s->halfpel_flag = get_bits1(&gb);
1226 s->thirdpel_flag = get_bits1(&gb);
1227
1228 /* unknown fields */
1229 skip_bits1(&gb);
1230 skip_bits1(&gb);
1231 skip_bits1(&gb);
1232 skip_bits1(&gb);
1233
1234 h->low_delay = get_bits1(&gb);
1235
1236 /* unknown field */
1237 skip_bits1(&gb);
1238
1239 while (get_bits1(&gb))
1240 skip_bits(&gb, 8);
1241
1242 s->unknown_flag = get_bits1(&gb);
1243 avctx->has_b_frames = !h->low_delay;
1244 if (s->unknown_flag) {
1245 #if CONFIG_ZLIB
1246 unsigned watermark_width = svq3_get_ue_golomb(&gb);
1247 unsigned watermark_height = svq3_get_ue_golomb(&gb);
1248 int u1 = svq3_get_ue_golomb(&gb);
1249 int u2 = get_bits(&gb, 8);
1250 int u3 = get_bits(&gb, 2);
1251 int u4 = svq3_get_ue_golomb(&gb);
1252 unsigned long buf_len = watermark_width *
1253 watermark_height * 4;
1254 int offset = get_bits_count(&gb) + 7 >> 3;
1255 uint8_t *buf;
1256
1257 if (watermark_height > 0 &&
1258 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1259 return -1;
1260
1261 buf = av_malloc(buf_len);
1262 av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1263 watermark_width, watermark_height);
1264 av_log(avctx, AV_LOG_DEBUG,
1265 "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1266 u1, u2, u3, u4, offset);
1267 if (uncompress(buf, &buf_len, extradata + 8 + offset,
1268 size - offset) != Z_OK) {
1269 av_log(avctx, AV_LOG_ERROR,
1270 "could not uncompress watermark logo\n");
1271 av_free(buf);
1272 return -1;
1273 }
1274 s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1275 s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1276 av_log(avctx, AV_LOG_DEBUG,
1277 "watermark key %#"PRIx32"\n", s->watermark_key);
1278 av_free(buf);
1279 #else
1280 av_log(avctx, AV_LOG_ERROR,
1281 "this svq3 file contains watermark which need zlib support compiled in\n");
1282 return -1;
1283 #endif
1284 }
1285 }
1286
1287 s->mb_width = (avctx->width + 15) / 16;
1288 s->mb_height = (avctx->height + 15) / 16;
1289 s->mb_stride = s->mb_width + 1;
1290 s->mb_num = s->mb_width * s->mb_height;
1291 s->b_stride = 4 * s->mb_width;
1292 s->h_edge_pos = s->mb_width * 16;
1293 s->v_edge_pos = s->mb_height * 16;
1294
1295 s->intra4x4_pred_mode = av_mallocz(s->mb_stride * 2 * 8);
1296 if (!s->intra4x4_pred_mode)
1297 return AVERROR(ENOMEM);
1298
1299 s->mb2br_xy = av_mallocz(s->mb_stride * (s->mb_height + 1) *
1300 sizeof(*s->mb2br_xy));
1301 if (!s->mb2br_xy)
1302 return AVERROR(ENOMEM);
1303
1304 for (y = 0; y < s->mb_height; y++)
1305 for (x = 0; x < s->mb_width; x++) {
1306 const int mb_xy = x + y * s->mb_stride;
1307
1308 s->mb2br_xy[mb_xy] = 8 * (mb_xy % (2 * s->mb_stride));
1309 }
1310
1311 init_dequant4_coeff_table(s);
1312
1313 return 0;
1314 }
1315
1316 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1317 {
1318 int i;
1319 for (i = 0; i < 2; i++) {
1320 av_buffer_unref(&pic->motion_val_buf[i]);
1321 av_buffer_unref(&pic->ref_index_buf[i]);
1322 }
1323 av_buffer_unref(&pic->mb_type_buf);
1324
1325 av_frame_unref(pic->f);
1326 }
1327
1328 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1329 {
1330 SVQ3Context *s = avctx->priv_data;
1331 H264Context *h = &s->h;
1332 H264SliceContext *sl = &h->slice_ctx[0];
1333 const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1;
1334 const int mb_array_size = s->mb_stride * s->mb_height;
1335 const int b4_stride = s->mb_width * 4 + 1;
1336 const int b4_array_size = b4_stride * s->mb_height * 4;
1337 int ret;
1338
1339 if (!pic->motion_val_buf[0]) {
1340 int i;
1341
1342 pic->mb_type_buf = av_buffer_allocz((big_mb_num + s->mb_stride) * sizeof(uint32_t));
1343 if (!pic->mb_type_buf)
1344 return AVERROR(ENOMEM);
1345 pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * s->mb_stride + 1;
1346
1347 for (i = 0; i < 2; i++) {
1348 pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1349 pic->ref_index_buf[i] = av_buffer_allocz(4 * mb_array_size);
1350 if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1351 ret = AVERROR(ENOMEM);
1352 goto fail;
1353 }
1354
1355 pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1356 pic->ref_index[i] = pic->ref_index_buf[i]->data;
1357 }
1358 }
1359 pic->reference = !(s->pict_type == AV_PICTURE_TYPE_B);
1360
1361 ret = ff_get_buffer(avctx, pic->f,
1362 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1363 if (ret < 0)
1364 goto fail;
1365
1366 if (!s->edge_emu_buffer) {
1367 s->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1368 if (!s->edge_emu_buffer)
1369 return AVERROR(ENOMEM);
1370 }
1371
1372 sl->linesize = pic->f->linesize[0];
1373 sl->uvlinesize = pic->f->linesize[1];
1374
1375 return 0;
1376 fail:
1377 free_picture(avctx, pic);
1378 return ret;
1379 }
1380
1381 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1382 int *got_frame, AVPacket *avpkt)
1383 {
1384 const uint8_t *buf = avpkt->data;
1385 SVQ3Context *s = avctx->priv_data;
1386 H264Context *h = &s->h;
1387 H264SliceContext *sl = &h->slice_ctx[0];
1388 int buf_size = avpkt->size;
1389 int ret, m, i;
1390
1391 /* special case for last picture */
1392 if (buf_size == 0) {
1393 if (s->next_pic->f->data[0] && !h->low_delay && !s->last_frame_output) {
1394 ret = av_frame_ref(data, s->next_pic->f);
1395 if (ret < 0)
1396 return ret;
1397 s->last_frame_output = 1;
1398 *got_frame = 1;
1399 }
1400 return 0;
1401 }
1402
1403 ret = init_get_bits(&s->gb, buf, 8 * buf_size);
1404 if (ret < 0)
1405 return ret;
1406
1407 s->mb_x = s->mb_y = s->mb_xy = 0;
1408
1409 if (svq3_decode_slice_header(avctx))
1410 return -1;
1411
1412 if (s->pict_type != AV_PICTURE_TYPE_B)
1413 FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1414
1415 av_frame_unref(s->cur_pic->f);
1416
1417 /* for skipping the frame */
1418 s->cur_pic->f->pict_type = s->pict_type;
1419 s->cur_pic->f->key_frame = (s->pict_type == AV_PICTURE_TYPE_I);
1420
1421 ret = get_buffer(avctx, s->cur_pic);
1422 if (ret < 0)
1423 return ret;
1424
1425 h->cur_pic_ptr = s->cur_pic;
1426 h->cur_pic = *s->cur_pic;
1427
1428 for (i = 0; i < 16; i++) {
1429 h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
1430 h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
1431 }
1432 for (i = 0; i < 16; i++) {
1433 h->block_offset[16 + i] =
1434 h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1435 h->block_offset[48 + 16 + i] =
1436 h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1437 }
1438
1439 if (s->pict_type != AV_PICTURE_TYPE_I) {
1440 if (!s->last_pic->f->data[0]) {
1441 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1442 ret = get_buffer(avctx, s->last_pic);
1443 if (ret < 0)
1444 return ret;
1445 memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1446 memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1447 s->last_pic->f->linesize[1]);
1448 memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1449 s->last_pic->f->linesize[2]);
1450 }
1451
1452 if (s->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1453 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1454 ret = get_buffer(avctx, s->next_pic);
1455 if (ret < 0)
1456 return ret;
1457 memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1458 memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1459 s->next_pic->f->linesize[1]);
1460 memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1461 s->next_pic->f->linesize[2]);
1462 }
1463 }
1464
1465 if (avctx->debug & FF_DEBUG_PICT_INFO)
1466 av_log(h->avctx, AV_LOG_DEBUG,
1467 "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1468 av_get_picture_type_char(s->pict_type),
1469 s->halfpel_flag, s->thirdpel_flag,
1470 s->adaptive_quant, h->slice_ctx[0].qscale, sl->slice_num);
1471
1472 if (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B ||
1473 avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I ||
1474 avctx->skip_frame >= AVDISCARD_ALL)
1475 return 0;
1476
1477 if (s->next_p_frame_damaged) {
1478 if (s->pict_type == AV_PICTURE_TYPE_B)
1479 return 0;
1480 else
1481 s->next_p_frame_damaged = 0;
1482 }
1483
1484 if (s->pict_type == AV_PICTURE_TYPE_B) {
1485 h->frame_num_offset = sl->slice_num - h->prev_frame_num;
1486
1487 if (h->frame_num_offset < 0)
1488 h->frame_num_offset += 256;
1489 if (h->frame_num_offset == 0 ||
1490 h->frame_num_offset >= h->prev_frame_num_offset) {
1491 av_log(h->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1492 return -1;
1493 }
1494 } else {
1495 h->prev_frame_num = h->frame_num;
1496 h->frame_num = sl->slice_num;
1497 h->prev_frame_num_offset = h->frame_num - h->prev_frame_num;
1498
1499 if (h->prev_frame_num_offset < 0)
1500 h->prev_frame_num_offset += 256;
1501 }
1502
1503 for (m = 0; m < 2; m++) {
1504 int i;
1505 for (i = 0; i < 4; i++) {
1506 int j;
1507 for (j = -1; j < 4; j++)
1508 s->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1509 if (i < 3)
1510 s->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1511 }
1512 }
1513
1514 for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
1515 for (s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
1516 unsigned mb_type;
1517 s->mb_xy = s->mb_x + s->mb_y * s->mb_stride;
1518
1519 if ((get_bits_left(&h->gb)) <= 7) {
1520 if (((get_bits_count(&h->gb) & 7) == 0 ||
1521 show_bits(&h->gb, get_bits_left(&h->gb) & 7) == 0)) {
1522
1523 if (svq3_decode_slice_header(avctx))
1524 return -1;
1525 }
1526 /* TODO: support s->mb_skip_run */
1527 }
1528
1529 mb_type = svq3_get_ue_golomb(&h->gb);
1530
1531 if (s->pict_type == AV_PICTURE_TYPE_I)
1532 mb_type += 8;
1533 else if (s->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1534 mb_type += 4;
1535 if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1536 av_log(h->avctx, AV_LOG_ERROR,
1537 "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1538 return -1;
1539 }
1540
1541 if (mb_type != 0)
1542 hl_decode_mb(s, h, &h->slice_ctx[0]);
1543
1544 if (s->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
1545 h->cur_pic.mb_type[s->mb_x + s->mb_y * s->mb_stride] =
1546 (s->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1547 }
1548
1549 ff_draw_horiz_band(avctx, s->cur_pic->f,
1550 s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1551 16 * s->mb_y, 16, h->picture_structure, 0,
1552 h->low_delay);
1553 }
1554
1555 if (s->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
1556 ret = av_frame_ref(data, s->cur_pic->f);
1557 else if (s->last_pic->f->data[0])
1558 ret = av_frame_ref(data, s->last_pic->f);
1559 if (ret < 0)
1560 return ret;
1561
1562 /* Do not output the last pic after seeking. */
1563 if (s->last_pic->f->data[0] || h->low_delay)
1564 *got_frame = 1;
1565
1566 if (s->pict_type != AV_PICTURE_TYPE_B) {
1567 FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1568 } else {
1569 av_frame_unref(s->cur_pic->f);
1570 }
1571
1572 return buf_size;
1573 }
1574
1575 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1576 {
1577 SVQ3Context *s = avctx->priv_data;
1578 H264Context *h = &s->h;
1579
1580 free_picture(avctx, s->cur_pic);
1581 free_picture(avctx, s->next_pic);
1582 free_picture(avctx, s->last_pic);
1583 av_frame_free(&s->cur_pic->f);
1584 av_frame_free(&s->next_pic->f);
1585 av_frame_free(&s->last_pic->f);
1586 av_freep(&s->cur_pic);
1587 av_freep(&s->next_pic);
1588 av_freep(&s->last_pic);
1589 av_freep(&s->slice_buf);
1590 av_freep(&s->intra4x4_pred_mode);
1591 av_freep(&s->edge_emu_buffer);
1592 av_freep(&s->mb2br_xy);
1593
1594 memset(&h->cur_pic, 0, sizeof(h->cur_pic));
1595
1596 ff_h264_free_context(h);
1597
1598 return 0;
1599 }
1600
1601 AVCodec ff_svq3_decoder = {
1602 .name = "svq3",
1603 .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1604 .type = AVMEDIA_TYPE_VIDEO,
1605 .id = AV_CODEC_ID_SVQ3,
1606 .priv_data_size = sizeof(SVQ3Context),
1607 .init = svq3_decode_init,
1608 .close = svq3_decode_end,
1609 .decode = svq3_decode_frame,
1610 .capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1611 AV_CODEC_CAP_DR1 |
1612 AV_CODEC_CAP_DELAY,
1613 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1614 AV_PIX_FMT_NONE},
1615 };