svq3: drop the build dependency on the h264 decoder
[libav.git] / libavcodec / svq3.c
1 /*
2 * Copyright (c) 2003 The Libav Project
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
31 *
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
37 *
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 * http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41 */
42
43 #include <inttypes.h>
44
45 #include "libavutil/attributes.h"
46 #include "internal.h"
47 #include "avcodec.h"
48 #include "mpegutils.h"
49 #include "h264.h"
50 #include "h264data.h"
51 #include "golomb.h"
52 #include "hpeldsp.h"
53 #include "mathops.h"
54 #include "rectangle.h"
55 #include "tpeldsp.h"
56
57 #if CONFIG_ZLIB
58 #include <zlib.h>
59 #endif
60
61 #include "svq1.h"
62
63 /**
64 * @file
65 * svq3 decoder.
66 */
67
68 typedef struct SVQ3Context {
69 AVCodecContext *avctx;
70
71 H264DSPContext h264dsp;
72 H264PredContext hpc;
73 HpelDSPContext hdsp;
74 TpelDSPContext tdsp;
75 VideoDSPContext vdsp;
76
77 H264Picture *cur_pic;
78 H264Picture *next_pic;
79 H264Picture *last_pic;
80 GetBitContext gb;
81 GetBitContext gb_slice;
82 uint8_t *slice_buf;
83 int slice_size;
84 int halfpel_flag;
85 int thirdpel_flag;
86 int unknown_flag;
87 uint32_t watermark_key;
88 int adaptive_quant;
89 int next_p_frame_damaged;
90 int h_edge_pos;
91 int v_edge_pos;
92 int last_frame_output;
93 int slice_num;
94 int qscale;
95 int cbp;
96 int frame_num;
97 int frame_num_offset;
98 int prev_frame_num_offset;
99 int prev_frame_num;
100
101 enum AVPictureType pict_type;
102 int low_delay;
103
104 int mb_x, mb_y;
105 int mb_xy;
106 int mb_width, mb_height;
107 int mb_stride, mb_num;
108 int b_stride;
109
110 uint32_t *mb2br_xy;
111
112 int chroma_pred_mode;
113 int intra16x16_pred_mode;
114
115 int8_t intra4x4_pred_mode_cache[5 * 8];
116 int8_t (*intra4x4_pred_mode);
117
118 unsigned int top_samples_available;
119 unsigned int topright_samples_available;
120 unsigned int left_samples_available;
121
122 uint8_t *edge_emu_buffer;
123
124 DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
125 DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
126 DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2];
127 DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
128 DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
129 uint32_t dequant4_coeff[QP_MAX_NUM + 1][16];
130 int block_offset[2 * (16 * 3)];
131 } SVQ3Context;
132
133 #define FULLPEL_MODE 1
134 #define HALFPEL_MODE 2
135 #define THIRDPEL_MODE 3
136 #define PREDICT_MODE 4
137
138 /* dual scan (from some older h264 draft)
139 * o-->o-->o o
140 * | /|
141 * o o o / o
142 * | / | |/ |
143 * o o o o
144 * /
145 * o-->o-->o-->o
146 */
147 static const uint8_t svq3_scan[16] = {
148 0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
149 2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
150 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
151 0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
152 };
153
154 static const uint8_t luma_dc_zigzag_scan[16] = {
155 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
156 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
157 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
158 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
159 };
160
161 static const uint8_t svq3_pred_0[25][2] = {
162 { 0, 0 },
163 { 1, 0 }, { 0, 1 },
164 { 0, 2 }, { 1, 1 }, { 2, 0 },
165 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
166 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
167 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
168 { 2, 4 }, { 3, 3 }, { 4, 2 },
169 { 4, 3 }, { 3, 4 },
170 { 4, 4 }
171 };
172
173 static const int8_t svq3_pred_1[6][6][5] = {
174 { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
175 { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
176 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
177 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
178 { { 2, 0, -1, -1, -1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
179 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
180 { { 2, 0, -1, -1, -1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
181 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
182 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
183 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
184 { { 0, 2, -1, -1, -1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
185 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
186 };
187
188 static const struct {
189 uint8_t run;
190 uint8_t level;
191 } svq3_dct_tables[2][16] = {
192 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
193 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
194 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
195 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
196 };
197
198 static const uint32_t svq3_dequant_coeff[32] = {
199 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
200 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
201 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
202 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
203 };
204
205 static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
206 {
207 const int qmul = svq3_dequant_coeff[qp];
208 #define stride 16
209 int i;
210 int temp[16];
211 static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
212
213 for (i = 0; i < 4; i++) {
214 const int z0 = 13 * (input[4 * i + 0] + input[4 * i + 2]);
215 const int z1 = 13 * (input[4 * i + 0] - input[4 * i + 2]);
216 const int z2 = 7 * input[4 * i + 1] - 17 * input[4 * i + 3];
217 const int z3 = 17 * input[4 * i + 1] + 7 * input[4 * i + 3];
218
219 temp[4 * i + 0] = z0 + z3;
220 temp[4 * i + 1] = z1 + z2;
221 temp[4 * i + 2] = z1 - z2;
222 temp[4 * i + 3] = z0 - z3;
223 }
224
225 for (i = 0; i < 4; i++) {
226 const int offset = x_offset[i];
227 const int z0 = 13 * (temp[4 * 0 + i] + temp[4 * 2 + i]);
228 const int z1 = 13 * (temp[4 * 0 + i] - temp[4 * 2 + i]);
229 const int z2 = 7 * temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
230 const int z3 = 17 * temp[4 * 1 + i] + 7 * temp[4 * 3 + i];
231
232 output[stride * 0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
233 output[stride * 2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
234 output[stride * 8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
235 output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
236 }
237 }
238 #undef stride
239
240 static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
241 int stride, int qp, int dc)
242 {
243 const int qmul = svq3_dequant_coeff[qp];
244 int i;
245
246 if (dc) {
247 dc = 13 * 13 * (dc == 1 ? 1538 * block[0]
248 : qmul * (block[0] >> 3) / 2);
249 block[0] = 0;
250 }
251
252 for (i = 0; i < 4; i++) {
253 const int z0 = 13 * (block[0 + 4 * i] + block[2 + 4 * i]);
254 const int z1 = 13 * (block[0 + 4 * i] - block[2 + 4 * i]);
255 const int z2 = 7 * block[1 + 4 * i] - 17 * block[3 + 4 * i];
256 const int z3 = 17 * block[1 + 4 * i] + 7 * block[3 + 4 * i];
257
258 block[0 + 4 * i] = z0 + z3;
259 block[1 + 4 * i] = z1 + z2;
260 block[2 + 4 * i] = z1 - z2;
261 block[3 + 4 * i] = z0 - z3;
262 }
263
264 for (i = 0; i < 4; i++) {
265 const int z0 = 13 * (block[i + 4 * 0] + block[i + 4 * 2]);
266 const int z1 = 13 * (block[i + 4 * 0] - block[i + 4 * 2]);
267 const int z2 = 7 * block[i + 4 * 1] - 17 * block[i + 4 * 3];
268 const int z3 = 17 * block[i + 4 * 1] + 7 * block[i + 4 * 3];
269 const int rr = (dc + 0x80000);
270
271 dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
272 dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
273 dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
274 dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
275 }
276
277 memset(block, 0, 16 * sizeof(int16_t));
278 }
279
280 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
281 int index, const int type)
282 {
283 static const uint8_t *const scan_patterns[4] = {
284 luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
285 };
286
287 int run, level, limit;
288 unsigned vlc;
289 const int intra = 3 * type >> 2;
290 const uint8_t *const scan = scan_patterns[type];
291
292 for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
293 for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
294 int sign = (vlc & 1) ? 0 : -1;
295 vlc = vlc + 1 >> 1;
296
297 if (type == 3) {
298 if (vlc < 3) {
299 run = 0;
300 level = vlc;
301 } else if (vlc < 4) {
302 run = 1;
303 level = 1;
304 } else {
305 run = vlc & 0x3;
306 level = (vlc + 9 >> 2) - run;
307 }
308 } else {
309 if (vlc < 16) {
310 run = svq3_dct_tables[intra][vlc].run;
311 level = svq3_dct_tables[intra][vlc].level;
312 } else if (intra) {
313 run = vlc & 0x7;
314 level = (vlc >> 3) +
315 ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
316 } else {
317 run = vlc & 0xF;
318 level = (vlc >> 4) +
319 ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
320 }
321 }
322
323 if ((index += run) >= limit)
324 return -1;
325
326 block[scan[index]] = (level ^ sign) - sign;
327 }
328
329 if (type != 2) {
330 break;
331 }
332 }
333
334 return 0;
335 }
336
337 static av_always_inline int
338 svq3_fetch_diagonal_mv(const SVQ3Context *s, const int16_t **C,
339 int i, int list, int part_width)
340 {
341 const int topright_ref = s->ref_cache[list][i - 8 + part_width];
342
343 if (topright_ref != PART_NOT_AVAILABLE) {
344 *C = s->mv_cache[list][i - 8 + part_width];
345 return topright_ref;
346 } else {
347 *C = s->mv_cache[list][i - 8 - 1];
348 return s->ref_cache[list][i - 8 - 1];
349 }
350 }
351
352 /**
353 * Get the predicted MV.
354 * @param n the block index
355 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
356 * @param mx the x component of the predicted motion vector
357 * @param my the y component of the predicted motion vector
358 */
359 static av_always_inline void svq3_pred_motion(const SVQ3Context *s, int n,
360 int part_width, int list,
361 int ref, int *const mx, int *const my)
362 {
363 const int index8 = scan8[n];
364 const int top_ref = s->ref_cache[list][index8 - 8];
365 const int left_ref = s->ref_cache[list][index8 - 1];
366 const int16_t *const A = s->mv_cache[list][index8 - 1];
367 const int16_t *const B = s->mv_cache[list][index8 - 8];
368 const int16_t *C;
369 int diagonal_ref, match_count;
370
371 /* mv_cache
372 * B . . A T T T T
373 * U . . L . . , .
374 * U . . L . . . .
375 * U . . L . . , .
376 * . . . L . . . .
377 */
378
379 diagonal_ref = svq3_fetch_diagonal_mv(s, &C, index8, list, part_width);
380 match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
381 if (match_count > 1) { //most common
382 *mx = mid_pred(A[0], B[0], C[0]);
383 *my = mid_pred(A[1], B[1], C[1]);
384 } else if (match_count == 1) {
385 if (left_ref == ref) {
386 *mx = A[0];
387 *my = A[1];
388 } else if (top_ref == ref) {
389 *mx = B[0];
390 *my = B[1];
391 } else {
392 *mx = C[0];
393 *my = C[1];
394 }
395 } else {
396 if (top_ref == PART_NOT_AVAILABLE &&
397 diagonal_ref == PART_NOT_AVAILABLE &&
398 left_ref != PART_NOT_AVAILABLE) {
399 *mx = A[0];
400 *my = A[1];
401 } else {
402 *mx = mid_pred(A[0], B[0], C[0]);
403 *my = mid_pred(A[1], B[1], C[1]);
404 }
405 }
406 }
407
408 static inline void svq3_mc_dir_part(SVQ3Context *s,
409 int x, int y, int width, int height,
410 int mx, int my, int dxy,
411 int thirdpel, int dir, int avg)
412 {
413 const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
414 uint8_t *src, *dest;
415 int i, emu = 0;
416 int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
417 int linesize = s->cur_pic->f->linesize[0];
418 int uvlinesize = s->cur_pic->f->linesize[1];
419
420 mx += x;
421 my += y;
422
423 if (mx < 0 || mx >= s->h_edge_pos - width - 1 ||
424 my < 0 || my >= s->v_edge_pos - height - 1) {
425 emu = 1;
426 mx = av_clip(mx, -16, s->h_edge_pos - width + 15);
427 my = av_clip(my, -16, s->v_edge_pos - height + 15);
428 }
429
430 /* form component predictions */
431 dest = s->cur_pic->f->data[0] + x + y * linesize;
432 src = pic->f->data[0] + mx + my * linesize;
433
434 if (emu) {
435 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
436 linesize, linesize,
437 width + 1, height + 1,
438 mx, my, s->h_edge_pos, s->v_edge_pos);
439 src = s->edge_emu_buffer;
440 }
441 if (thirdpel)
442 (avg ? s->tdsp.avg_tpel_pixels_tab
443 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, linesize,
444 width, height);
445 else
446 (avg ? s->hdsp.avg_pixels_tab
447 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, linesize,
448 height);
449
450 if (!(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
451 mx = mx + (mx < (int) x) >> 1;
452 my = my + (my < (int) y) >> 1;
453 width = width >> 1;
454 height = height >> 1;
455 blocksize++;
456
457 for (i = 1; i < 3; i++) {
458 dest = s->cur_pic->f->data[i] + (x >> 1) + (y >> 1) * uvlinesize;
459 src = pic->f->data[i] + mx + my * uvlinesize;
460
461 if (emu) {
462 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
463 uvlinesize, uvlinesize,
464 width + 1, height + 1,
465 mx, my, (s->h_edge_pos >> 1),
466 s->v_edge_pos >> 1);
467 src = s->edge_emu_buffer;
468 }
469 if (thirdpel)
470 (avg ? s->tdsp.avg_tpel_pixels_tab
471 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
472 uvlinesize,
473 width, height);
474 else
475 (avg ? s->hdsp.avg_pixels_tab
476 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
477 uvlinesize,
478 height);
479 }
480 }
481 }
482
483 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
484 int dir, int avg)
485 {
486 int i, j, k, mx, my, dx, dy, x, y;
487 const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
488 const int part_height = 16 >> ((unsigned)(size + 1) / 3);
489 const int extra_width = (mode == PREDICT_MODE) ? -16 * 6 : 0;
490 const int h_edge_pos = 6 * (s->h_edge_pos - part_width) - extra_width;
491 const int v_edge_pos = 6 * (s->v_edge_pos - part_height) - extra_width;
492
493 for (i = 0; i < 16; i += part_height)
494 for (j = 0; j < 16; j += part_width) {
495 const int b_xy = (4 * s->mb_x + (j >> 2)) +
496 (4 * s->mb_y + (i >> 2)) * s->b_stride;
497 int dxy;
498 x = 16 * s->mb_x + j;
499 y = 16 * s->mb_y + i;
500 k = (j >> 2 & 1) + (i >> 1 & 2) +
501 (j >> 1 & 4) + (i & 8);
502
503 if (mode != PREDICT_MODE) {
504 svq3_pred_motion(s, k, part_width >> 2, dir, 1, &mx, &my);
505 } else {
506 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
507 my = s->next_pic->motion_val[0][b_xy][1] << 1;
508
509 if (dir == 0) {
510 mx = mx * s->frame_num_offset /
511 s->prev_frame_num_offset + 1 >> 1;
512 my = my * s->frame_num_offset /
513 s->prev_frame_num_offset + 1 >> 1;
514 } else {
515 mx = mx * (s->frame_num_offset - s->prev_frame_num_offset) /
516 s->prev_frame_num_offset + 1 >> 1;
517 my = my * (s->frame_num_offset - s->prev_frame_num_offset) /
518 s->prev_frame_num_offset + 1 >> 1;
519 }
520 }
521
522 /* clip motion vector prediction to frame border */
523 mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
524 my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
525
526 /* get (optional) motion vector differential */
527 if (mode == PREDICT_MODE) {
528 dx = dy = 0;
529 } else {
530 dy = svq3_get_se_golomb(&s->gb_slice);
531 dx = svq3_get_se_golomb(&s->gb_slice);
532
533 if (dx == INVALID_VLC || dy == INVALID_VLC) {
534 av_log(s->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
535 return -1;
536 }
537 }
538
539 /* compute motion vector */
540 if (mode == THIRDPEL_MODE) {
541 int fx, fy;
542 mx = (mx + 1 >> 1) + dx;
543 my = (my + 1 >> 1) + dy;
544 fx = (unsigned)(mx + 0x3000) / 3 - 0x1000;
545 fy = (unsigned)(my + 0x3000) / 3 - 0x1000;
546 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
547
548 svq3_mc_dir_part(s, x, y, part_width, part_height,
549 fx, fy, dxy, 1, dir, avg);
550 mx += mx;
551 my += my;
552 } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
553 mx = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
554 my = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
555 dxy = (mx & 1) + 2 * (my & 1);
556
557 svq3_mc_dir_part(s, x, y, part_width, part_height,
558 mx >> 1, my >> 1, dxy, 0, dir, avg);
559 mx *= 3;
560 my *= 3;
561 } else {
562 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
563 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
564
565 svq3_mc_dir_part(s, x, y, part_width, part_height,
566 mx, my, 0, 0, dir, avg);
567 mx *= 6;
568 my *= 6;
569 }
570
571 /* update mv_cache */
572 if (mode != PREDICT_MODE) {
573 int32_t mv = pack16to32(mx, my);
574
575 if (part_height == 8 && i < 8) {
576 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 * 8], mv);
577
578 if (part_width == 8 && j < 8)
579 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
580 }
581 if (part_width == 8 && j < 8)
582 AV_WN32A(s->mv_cache[dir][scan8[k] + 1], mv);
583 if (part_width == 4 || part_height == 4)
584 AV_WN32A(s->mv_cache[dir][scan8[k]], mv);
585 }
586
587 /* write back motion vectors */
588 fill_rectangle(s->cur_pic->motion_val[dir][b_xy],
589 part_width >> 2, part_height >> 2, s->b_stride,
590 pack16to32(mx, my), 4);
591 }
592
593 return 0;
594 }
595
596 static av_always_inline void hl_decode_mb_idct_luma(SVQ3Context *s,
597 int mb_type, const int *block_offset,
598 int linesize, uint8_t *dest_y)
599 {
600 int i;
601 if (!IS_INTRA4x4(mb_type)) {
602 for (i = 0; i < 16; i++)
603 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
604 uint8_t *const ptr = dest_y + block_offset[i];
605 svq3_add_idct_c(ptr, s->mb + i * 16, linesize,
606 s->qscale, IS_INTRA(mb_type) ? 1 : 0);
607 }
608 }
609 }
610
611 static av_always_inline int dctcoef_get(int16_t *mb, int index)
612 {
613 return AV_RN16A(mb + index);
614 }
615
616 static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
617 int mb_type,
618 const int *block_offset,
619 int linesize,
620 uint8_t *dest_y)
621 {
622 int i;
623 int qscale = s->qscale;
624
625 if (IS_INTRA4x4(mb_type)) {
626 for (i = 0; i < 16; i++) {
627 uint8_t *const ptr = dest_y + block_offset[i];
628 const int dir = s->intra4x4_pred_mode_cache[scan8[i]];
629
630 uint8_t *topright;
631 int nnz, tr;
632 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
633 const int topright_avail = (s->topright_samples_available << i) & 0x8000;
634 assert(s->mb_y || linesize <= block_offset[i]);
635 if (!topright_avail) {
636 tr = ptr[3 - linesize] * 0x01010101u;
637 topright = (uint8_t *)&tr;
638 } else
639 topright = ptr + 4 - linesize;
640 } else
641 topright = NULL;
642
643 s->hpc.pred4x4[dir](ptr, topright, linesize);
644 nnz = s->non_zero_count_cache[scan8[i]];
645 if (nnz) {
646 svq3_add_idct_c(ptr, s->mb + i * 16, linesize, qscale, 0);
647 }
648 }
649 } else {
650 s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
651 svq3_luma_dc_dequant_idct_c(s->mb, s->mb_luma_dc[0], qscale);
652 }
653 }
654
655 static void hl_decode_mb(SVQ3Context *s)
656 {
657 const int mb_x = s->mb_x;
658 const int mb_y = s->mb_y;
659 const int mb_xy = s->mb_xy;
660 const int mb_type = s->cur_pic->mb_type[mb_xy];
661 uint8_t *dest_y, *dest_cb, *dest_cr;
662 int linesize, uvlinesize;
663 int i, j;
664 const int *block_offset = &s->block_offset[0];
665 const int block_h = 16 >> 1;
666
667 linesize = s->cur_pic->f->linesize[0];
668 uvlinesize = s->cur_pic->f->linesize[1];
669
670 dest_y = s->cur_pic->f->data[0] + (mb_x + mb_y * linesize) * 16;
671 dest_cb = s->cur_pic->f->data[1] + mb_x * 8 + mb_y * uvlinesize * block_h;
672 dest_cr = s->cur_pic->f->data[2] + mb_x * 8 + mb_y * uvlinesize * block_h;
673
674 s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * linesize + 64, linesize, 4);
675 s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * uvlinesize + 64, dest_cr - dest_cb, 2);
676
677 if (IS_INTRA(mb_type)) {
678 s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
679 s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
680
681 hl_decode_mb_predict_luma(s, mb_type, block_offset, linesize, dest_y);
682 }
683
684 hl_decode_mb_idct_luma(s, mb_type, block_offset, linesize, dest_y);
685
686 if (s->cbp & 0x30) {
687 uint8_t *dest[2] = { dest_cb, dest_cr };
688 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
689 s->dequant4_coeff[4][0]);
690 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
691 s->dequant4_coeff[4][0]);
692 for (j = 1; j < 3; j++) {
693 for (i = j * 16; i < j * 16 + 4; i++)
694 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
695 uint8_t *const ptr = dest[j - 1] + block_offset[i];
696 svq3_add_idct_c(ptr, s->mb + i * 16,
697 uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
698 }
699 }
700 }
701 }
702
703 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
704 {
705 int i, j, k, m, dir, mode;
706 int cbp = 0;
707 uint32_t vlc;
708 int8_t *top, *left;
709 const int mb_xy = s->mb_xy;
710 const int b_xy = 4 * s->mb_x + 4 * s->mb_y * s->b_stride;
711
712 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
713 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
714 s->topright_samples_available = 0xFFFF;
715
716 if (mb_type == 0) { /* SKIP */
717 if (s->pict_type == AV_PICTURE_TYPE_P ||
718 s->next_pic->mb_type[mb_xy] == -1) {
719 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
720 0, 0, 0, 0, 0, 0);
721
722 if (s->pict_type == AV_PICTURE_TYPE_B)
723 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
724 0, 0, 0, 0, 1, 1);
725
726 mb_type = MB_TYPE_SKIP;
727 } else {
728 mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
729 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
730 return -1;
731 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
732 return -1;
733
734 mb_type = MB_TYPE_16x16;
735 }
736 } else if (mb_type < 8) { /* INTER */
737 if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&s->gb_slice))
738 mode = THIRDPEL_MODE;
739 else if (s->halfpel_flag &&
740 s->thirdpel_flag == !get_bits1(&s->gb_slice))
741 mode = HALFPEL_MODE;
742 else
743 mode = FULLPEL_MODE;
744
745 /* fill caches */
746 /* note ref_cache should contain here:
747 * ????????
748 * ???11111
749 * N??11111
750 * N??11111
751 * N??11111
752 */
753
754 for (m = 0; m < 2; m++) {
755 if (s->mb_x > 0 && s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6] != -1) {
756 for (i = 0; i < 4; i++)
757 AV_COPY32(s->mv_cache[m][scan8[0] - 1 + i * 8],
758 s->cur_pic->motion_val[m][b_xy - 1 + i * s->b_stride]);
759 } else {
760 for (i = 0; i < 4; i++)
761 AV_ZERO32(s->mv_cache[m][scan8[0] - 1 + i * 8]);
762 }
763 if (s->mb_y > 0) {
764 memcpy(s->mv_cache[m][scan8[0] - 1 * 8],
765 s->cur_pic->motion_val[m][b_xy - s->b_stride],
766 4 * 2 * sizeof(int16_t));
767 memset(&s->ref_cache[m][scan8[0] - 1 * 8],
768 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
769
770 if (s->mb_x < s->mb_width - 1) {
771 AV_COPY32(s->mv_cache[m][scan8[0] + 4 - 1 * 8],
772 s->cur_pic->motion_val[m][b_xy - s->b_stride + 4]);
773 s->ref_cache[m][scan8[0] + 4 - 1 * 8] =
774 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride + 1] + 6] == -1 ||
775 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
776 } else
777 s->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
778 if (s->mb_x > 0) {
779 AV_COPY32(s->mv_cache[m][scan8[0] - 1 - 1 * 8],
780 s->cur_pic->motion_val[m][b_xy - s->b_stride - 1]);
781 s->ref_cache[m][scan8[0] - 1 - 1 * 8] =
782 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
783 } else
784 s->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
785 } else
786 memset(&s->ref_cache[m][scan8[0] - 1 * 8 - 1],
787 PART_NOT_AVAILABLE, 8);
788
789 if (s->pict_type != AV_PICTURE_TYPE_B)
790 break;
791 }
792
793 /* decode motion vector(s) and form prediction(s) */
794 if (s->pict_type == AV_PICTURE_TYPE_P) {
795 if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
796 return -1;
797 } else { /* AV_PICTURE_TYPE_B */
798 if (mb_type != 2) {
799 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
800 return -1;
801 } else {
802 for (i = 0; i < 4; i++)
803 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
804 0, 4 * 2 * sizeof(int16_t));
805 }
806 if (mb_type != 1) {
807 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
808 return -1;
809 } else {
810 for (i = 0; i < 4; i++)
811 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
812 0, 4 * 2 * sizeof(int16_t));
813 }
814 }
815
816 mb_type = MB_TYPE_16x16;
817 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
818 int8_t *i4x4 = s->intra4x4_pred_mode + s->mb2br_xy[s->mb_xy];
819 int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
820
821 memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
822
823 if (mb_type == 8) {
824 if (s->mb_x > 0) {
825 for (i = 0; i < 4; i++)
826 s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6 - i];
827 if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
828 s->left_samples_available = 0x5F5F;
829 }
830 if (s->mb_y > 0) {
831 s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 0];
832 s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 1];
833 s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 2];
834 s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 3];
835
836 if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
837 s->top_samples_available = 0x33FF;
838 }
839
840 /* decode prediction codes for luma blocks */
841 for (i = 0; i < 16; i += 2) {
842 vlc = svq3_get_ue_golomb(&s->gb_slice);
843
844 if (vlc >= 25) {
845 av_log(s->avctx, AV_LOG_ERROR,
846 "luma prediction:%"PRIu32"\n", vlc);
847 return -1;
848 }
849
850 left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
851 top = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
852
853 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
854 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
855
856 if (left[1] == -1 || left[2] == -1) {
857 av_log(s->avctx, AV_LOG_ERROR, "weird prediction\n");
858 return -1;
859 }
860 }
861 } else { /* mb_type == 33, DC_128_PRED block type */
862 for (i = 0; i < 4; i++)
863 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
864 }
865
866 AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
867 i4x4[4] = i4x4_cache[7 + 8 * 3];
868 i4x4[5] = i4x4_cache[7 + 8 * 2];
869 i4x4[6] = i4x4_cache[7 + 8 * 1];
870
871 if (mb_type == 8) {
872 ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
873 s->avctx, s->top_samples_available,
874 s->left_samples_available);
875
876 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
877 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
878 } else {
879 for (i = 0; i < 4; i++)
880 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
881
882 s->top_samples_available = 0x33FF;
883 s->left_samples_available = 0x5F5F;
884 }
885
886 mb_type = MB_TYPE_INTRA4x4;
887 } else { /* INTRA16x16 */
888 dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
889 dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
890
891 if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(s->avctx, s->top_samples_available,
892 s->left_samples_available, dir, 0)) < 0) {
893 av_log(s->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
894 return s->intra16x16_pred_mode;
895 }
896
897 cbp = ff_h264_i_mb_type_info[mb_type - 8].cbp;
898 mb_type = MB_TYPE_INTRA16x16;
899 }
900
901 if (!IS_INTER(mb_type) && s->pict_type != AV_PICTURE_TYPE_I) {
902 for (i = 0; i < 4; i++)
903 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
904 0, 4 * 2 * sizeof(int16_t));
905 if (s->pict_type == AV_PICTURE_TYPE_B) {
906 for (i = 0; i < 4; i++)
907 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
908 0, 4 * 2 * sizeof(int16_t));
909 }
910 }
911 if (!IS_INTRA4x4(mb_type)) {
912 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy], DC_PRED, 8);
913 }
914 if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
915 memset(s->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
916 }
917
918 if (!IS_INTRA16x16(mb_type) &&
919 (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
920 if ((vlc = svq3_get_ue_golomb(&s->gb_slice)) >= 48) {
921 av_log(s->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
922 return -1;
923 }
924
925 cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
926 : ff_h264_golomb_to_inter_cbp[vlc];
927 }
928 if (IS_INTRA16x16(mb_type) ||
929 (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
930 s->qscale += svq3_get_se_golomb(&s->gb_slice);
931
932 if (s->qscale > 31u) {
933 av_log(s->avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
934 return -1;
935 }
936 }
937 if (IS_INTRA16x16(mb_type)) {
938 AV_ZERO128(s->mb_luma_dc[0] + 0);
939 AV_ZERO128(s->mb_luma_dc[0] + 8);
940 if (svq3_decode_block(&s->gb_slice, s->mb_luma_dc[0], 0, 1)) {
941 av_log(s->avctx, AV_LOG_ERROR,
942 "error while decoding intra luma dc\n");
943 return -1;
944 }
945 }
946
947 if (cbp) {
948 const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
949 const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
950
951 for (i = 0; i < 4; i++)
952 if ((cbp & (1 << i))) {
953 for (j = 0; j < 4; j++) {
954 k = index ? (1 * (j & 1) + 2 * (i & 1) +
955 2 * (j & 2) + 4 * (i & 2))
956 : (4 * i + j);
957 s->non_zero_count_cache[scan8[k]] = 1;
958
959 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * k], index, type)) {
960 av_log(s->avctx, AV_LOG_ERROR,
961 "error while decoding block\n");
962 return -1;
963 }
964 }
965 }
966
967 if ((cbp & 0x30)) {
968 for (i = 1; i < 3; ++i)
969 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * 16 * i], 0, 3)) {
970 av_log(s->avctx, AV_LOG_ERROR,
971 "error while decoding chroma dc block\n");
972 return -1;
973 }
974
975 if ((cbp & 0x20)) {
976 for (i = 1; i < 3; i++) {
977 for (j = 0; j < 4; j++) {
978 k = 16 * i + j;
979 s->non_zero_count_cache[scan8[k]] = 1;
980
981 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * k], 1, 1)) {
982 av_log(s->avctx, AV_LOG_ERROR,
983 "error while decoding chroma ac block\n");
984 return -1;
985 }
986 }
987 }
988 }
989 }
990 }
991
992 s->cbp = cbp;
993 s->cur_pic->mb_type[mb_xy] = mb_type;
994
995 if (IS_INTRA(mb_type))
996 s->chroma_pred_mode = ff_h264_check_intra_pred_mode(s->avctx, s->top_samples_available,
997 s->left_samples_available, DC_PRED8x8, 1);
998
999 return 0;
1000 }
1001
1002 static int svq3_decode_slice_header(AVCodecContext *avctx)
1003 {
1004 SVQ3Context *s = avctx->priv_data;
1005 const int mb_xy = s->mb_xy;
1006 int i, header;
1007 unsigned slice_id;
1008
1009 header = get_bits(&s->gb, 8);
1010
1011 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
1012 /* TODO: what? */
1013 av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
1014 return -1;
1015 } else {
1016 int slice_bits, slice_bytes, slice_length;
1017 int length = header >> 5 & 3;
1018
1019 slice_length = show_bits(&s->gb, 8 * length);
1020 slice_bits = slice_length * 8;
1021 slice_bytes = slice_length + length - 1;
1022
1023 if (slice_bytes > get_bits_left(&s->gb)) {
1024 av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
1025 return -1;
1026 }
1027
1028 skip_bits(&s->gb, 8);
1029
1030 av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
1031 if (!s->slice_buf)
1032 return AVERROR(ENOMEM);
1033
1034 memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
1035
1036 init_get_bits(&s->gb_slice, s->slice_buf, slice_bits);
1037
1038 if (s->watermark_key) {
1039 uint32_t header = AV_RL32(&s->gb_slice.buffer[1]);
1040 AV_WL32(&s->gb_slice.buffer[1], header ^ s->watermark_key);
1041 }
1042 if (length > 0) {
1043 memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
1044 }
1045 skip_bits_long(&s->gb, slice_bytes * 8);
1046 }
1047
1048 if ((slice_id = svq3_get_ue_golomb(&s->gb_slice)) >= 3) {
1049 av_log(s->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
1050 return -1;
1051 }
1052
1053 s->pict_type = ff_h264_golomb_to_pict_type[slice_id];
1054
1055 if ((header & 0x9F) == 2) {
1056 i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
1057 get_bits(&s->gb_slice, i);
1058 } else {
1059 skip_bits1(&s->gb_slice);
1060 }
1061
1062 s->slice_num = get_bits(&s->gb_slice, 8);
1063 s->qscale = get_bits(&s->gb_slice, 5);
1064 s->adaptive_quant = get_bits1(&s->gb_slice);
1065
1066 /* unknown fields */
1067 skip_bits1(&s->gb_slice);
1068
1069 if (s->unknown_flag)
1070 skip_bits1(&s->gb_slice);
1071
1072 skip_bits1(&s->gb_slice);
1073 skip_bits(&s->gb_slice, 2);
1074
1075 while (get_bits1(&s->gb_slice))
1076 skip_bits(&s->gb_slice, 8);
1077
1078 /* reset intra predictors and invalidate motion vector references */
1079 if (s->mb_x > 0) {
1080 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - 1] + 3,
1081 -1, 4 * sizeof(int8_t));
1082 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_x],
1083 -1, 8 * sizeof(int8_t) * s->mb_x);
1084 }
1085 if (s->mb_y > 0) {
1086 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_stride],
1087 -1, 8 * sizeof(int8_t) * (s->mb_width - s->mb_x));
1088
1089 if (s->mb_x > 0)
1090 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] = -1;
1091 }
1092
1093 return 0;
1094 }
1095
1096 static void init_dequant4_coeff_table(SVQ3Context *s)
1097 {
1098 int q, x;
1099 const int max_qp = 51;
1100
1101 for (q = 0; q < max_qp + 1; q++) {
1102 int shift = ff_h264_quant_div6[q] + 2;
1103 int idx = ff_h264_quant_rem6[q];
1104 for (x = 0; x < 16; x++)
1105 s->dequant4_coeff[q][(x >> 2) | ((x << 2) & 0xF)] =
1106 ((uint32_t)ff_h264_dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * 16) << shift;
1107 }
1108 }
1109
1110 static av_cold int svq3_decode_init(AVCodecContext *avctx)
1111 {
1112 SVQ3Context *s = avctx->priv_data;
1113 int m, x, y;
1114 unsigned char *extradata;
1115 unsigned char *extradata_end;
1116 unsigned int size;
1117 int marker_found = 0;
1118
1119 s->cur_pic = av_mallocz(sizeof(*s->cur_pic));
1120 s->last_pic = av_mallocz(sizeof(*s->last_pic));
1121 s->next_pic = av_mallocz(sizeof(*s->next_pic));
1122 if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1123 av_freep(&s->cur_pic);
1124 av_freep(&s->last_pic);
1125 av_freep(&s->next_pic);
1126 return AVERROR(ENOMEM);
1127 }
1128
1129 s->cur_pic->f = av_frame_alloc();
1130 s->last_pic->f = av_frame_alloc();
1131 s->next_pic->f = av_frame_alloc();
1132 if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1133 return AVERROR(ENOMEM);
1134
1135 ff_h264dsp_init(&s->h264dsp, 8, 1);
1136 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1137 ff_videodsp_init(&s->vdsp, 8);
1138
1139 ff_hpeldsp_init(&s->hdsp, avctx->flags);
1140 ff_tpeldsp_init(&s->tdsp);
1141
1142 avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
1143 avctx->color_range = AVCOL_RANGE_JPEG;
1144
1145 s->avctx = avctx;
1146 s->halfpel_flag = 1;
1147 s->thirdpel_flag = 1;
1148 s->unknown_flag = 0;
1149
1150 /* prowl for the "SEQH" marker in the extradata */
1151 extradata = (unsigned char *)avctx->extradata;
1152 extradata_end = avctx->extradata + avctx->extradata_size;
1153 if (extradata) {
1154 for (m = 0; m + 8 < avctx->extradata_size; m++) {
1155 if (!memcmp(extradata, "SEQH", 4)) {
1156 marker_found = 1;
1157 break;
1158 }
1159 extradata++;
1160 }
1161 }
1162
1163 /* if a match was found, parse the extra data */
1164 if (marker_found) {
1165 GetBitContext gb;
1166 int frame_size_code;
1167
1168 size = AV_RB32(&extradata[4]);
1169 if (size > extradata_end - extradata - 8)
1170 return AVERROR_INVALIDDATA;
1171 init_get_bits(&gb, extradata + 8, size * 8);
1172
1173 /* 'frame size code' and optional 'width, height' */
1174 frame_size_code = get_bits(&gb, 3);
1175 switch (frame_size_code) {
1176 case 0:
1177 avctx->width = 160;
1178 avctx->height = 120;
1179 break;
1180 case 1:
1181 avctx->width = 128;
1182 avctx->height = 96;
1183 break;
1184 case 2:
1185 avctx->width = 176;
1186 avctx->height = 144;
1187 break;
1188 case 3:
1189 avctx->width = 352;
1190 avctx->height = 288;
1191 break;
1192 case 4:
1193 avctx->width = 704;
1194 avctx->height = 576;
1195 break;
1196 case 5:
1197 avctx->width = 240;
1198 avctx->height = 180;
1199 break;
1200 case 6:
1201 avctx->width = 320;
1202 avctx->height = 240;
1203 break;
1204 case 7:
1205 avctx->width = get_bits(&gb, 12);
1206 avctx->height = get_bits(&gb, 12);
1207 break;
1208 }
1209
1210 s->halfpel_flag = get_bits1(&gb);
1211 s->thirdpel_flag = get_bits1(&gb);
1212
1213 /* unknown fields */
1214 skip_bits1(&gb);
1215 skip_bits1(&gb);
1216 skip_bits1(&gb);
1217 skip_bits1(&gb);
1218
1219 s->low_delay = get_bits1(&gb);
1220
1221 /* unknown field */
1222 skip_bits1(&gb);
1223
1224 while (get_bits1(&gb))
1225 skip_bits(&gb, 8);
1226
1227 s->unknown_flag = get_bits1(&gb);
1228 avctx->has_b_frames = !s->low_delay;
1229 if (s->unknown_flag) {
1230 #if CONFIG_ZLIB
1231 unsigned watermark_width = svq3_get_ue_golomb(&gb);
1232 unsigned watermark_height = svq3_get_ue_golomb(&gb);
1233 int u1 = svq3_get_ue_golomb(&gb);
1234 int u2 = get_bits(&gb, 8);
1235 int u3 = get_bits(&gb, 2);
1236 int u4 = svq3_get_ue_golomb(&gb);
1237 unsigned long buf_len = watermark_width *
1238 watermark_height * 4;
1239 int offset = get_bits_count(&gb) + 7 >> 3;
1240 uint8_t *buf;
1241
1242 if (watermark_height > 0 &&
1243 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1244 return -1;
1245
1246 buf = av_malloc(buf_len);
1247 av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1248 watermark_width, watermark_height);
1249 av_log(avctx, AV_LOG_DEBUG,
1250 "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1251 u1, u2, u3, u4, offset);
1252 if (uncompress(buf, &buf_len, extradata + 8 + offset,
1253 size - offset) != Z_OK) {
1254 av_log(avctx, AV_LOG_ERROR,
1255 "could not uncompress watermark logo\n");
1256 av_free(buf);
1257 return -1;
1258 }
1259 s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1260 s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1261 av_log(avctx, AV_LOG_DEBUG,
1262 "watermark key %#"PRIx32"\n", s->watermark_key);
1263 av_free(buf);
1264 #else
1265 av_log(avctx, AV_LOG_ERROR,
1266 "this svq3 file contains watermark which need zlib support compiled in\n");
1267 return -1;
1268 #endif
1269 }
1270 }
1271
1272 s->mb_width = (avctx->width + 15) / 16;
1273 s->mb_height = (avctx->height + 15) / 16;
1274 s->mb_stride = s->mb_width + 1;
1275 s->mb_num = s->mb_width * s->mb_height;
1276 s->b_stride = 4 * s->mb_width;
1277 s->h_edge_pos = s->mb_width * 16;
1278 s->v_edge_pos = s->mb_height * 16;
1279
1280 s->intra4x4_pred_mode = av_mallocz(s->mb_stride * 2 * 8);
1281 if (!s->intra4x4_pred_mode)
1282 return AVERROR(ENOMEM);
1283
1284 s->mb2br_xy = av_mallocz(s->mb_stride * (s->mb_height + 1) *
1285 sizeof(*s->mb2br_xy));
1286 if (!s->mb2br_xy)
1287 return AVERROR(ENOMEM);
1288
1289 for (y = 0; y < s->mb_height; y++)
1290 for (x = 0; x < s->mb_width; x++) {
1291 const int mb_xy = x + y * s->mb_stride;
1292
1293 s->mb2br_xy[mb_xy] = 8 * (mb_xy % (2 * s->mb_stride));
1294 }
1295
1296 init_dequant4_coeff_table(s);
1297
1298 return 0;
1299 }
1300
1301 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1302 {
1303 int i;
1304 for (i = 0; i < 2; i++) {
1305 av_buffer_unref(&pic->motion_val_buf[i]);
1306 av_buffer_unref(&pic->ref_index_buf[i]);
1307 }
1308 av_buffer_unref(&pic->mb_type_buf);
1309
1310 av_frame_unref(pic->f);
1311 }
1312
1313 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1314 {
1315 SVQ3Context *s = avctx->priv_data;
1316 const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1;
1317 const int mb_array_size = s->mb_stride * s->mb_height;
1318 const int b4_stride = s->mb_width * 4 + 1;
1319 const int b4_array_size = b4_stride * s->mb_height * 4;
1320 int ret;
1321
1322 if (!pic->motion_val_buf[0]) {
1323 int i;
1324
1325 pic->mb_type_buf = av_buffer_allocz((big_mb_num + s->mb_stride) * sizeof(uint32_t));
1326 if (!pic->mb_type_buf)
1327 return AVERROR(ENOMEM);
1328 pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * s->mb_stride + 1;
1329
1330 for (i = 0; i < 2; i++) {
1331 pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1332 pic->ref_index_buf[i] = av_buffer_allocz(4 * mb_array_size);
1333 if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1334 ret = AVERROR(ENOMEM);
1335 goto fail;
1336 }
1337
1338 pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1339 pic->ref_index[i] = pic->ref_index_buf[i]->data;
1340 }
1341 }
1342 pic->reference = !(s->pict_type == AV_PICTURE_TYPE_B);
1343
1344 ret = ff_get_buffer(avctx, pic->f,
1345 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1346 if (ret < 0)
1347 goto fail;
1348
1349 if (!s->edge_emu_buffer) {
1350 s->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1351 if (!s->edge_emu_buffer)
1352 return AVERROR(ENOMEM);
1353 }
1354
1355 return 0;
1356 fail:
1357 free_picture(avctx, pic);
1358 return ret;
1359 }
1360
1361 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1362 int *got_frame, AVPacket *avpkt)
1363 {
1364 const uint8_t *buf = avpkt->data;
1365 SVQ3Context *s = avctx->priv_data;
1366 int buf_size = avpkt->size;
1367 int ret, m, i;
1368
1369 /* special case for last picture */
1370 if (buf_size == 0) {
1371 if (s->next_pic->f->data[0] && !s->low_delay && !s->last_frame_output) {
1372 ret = av_frame_ref(data, s->next_pic->f);
1373 if (ret < 0)
1374 return ret;
1375 s->last_frame_output = 1;
1376 *got_frame = 1;
1377 }
1378 return 0;
1379 }
1380
1381 ret = init_get_bits(&s->gb, buf, 8 * buf_size);
1382 if (ret < 0)
1383 return ret;
1384
1385 s->mb_x = s->mb_y = s->mb_xy = 0;
1386
1387 if (svq3_decode_slice_header(avctx))
1388 return -1;
1389
1390 if (s->pict_type != AV_PICTURE_TYPE_B)
1391 FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1392
1393 av_frame_unref(s->cur_pic->f);
1394
1395 /* for skipping the frame */
1396 s->cur_pic->f->pict_type = s->pict_type;
1397 s->cur_pic->f->key_frame = (s->pict_type == AV_PICTURE_TYPE_I);
1398
1399 ret = get_buffer(avctx, s->cur_pic);
1400 if (ret < 0)
1401 return ret;
1402
1403 for (i = 0; i < 16; i++) {
1404 s->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1405 s->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1406 }
1407 for (i = 0; i < 16; i++) {
1408 s->block_offset[16 + i] =
1409 s->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1410 s->block_offset[48 + 16 + i] =
1411 s->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1412 }
1413
1414 if (s->pict_type != AV_PICTURE_TYPE_I) {
1415 if (!s->last_pic->f->data[0]) {
1416 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1417 ret = get_buffer(avctx, s->last_pic);
1418 if (ret < 0)
1419 return ret;
1420 memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1421 memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1422 s->last_pic->f->linesize[1]);
1423 memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1424 s->last_pic->f->linesize[2]);
1425 }
1426
1427 if (s->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1428 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1429 ret = get_buffer(avctx, s->next_pic);
1430 if (ret < 0)
1431 return ret;
1432 memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1433 memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1434 s->next_pic->f->linesize[1]);
1435 memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1436 s->next_pic->f->linesize[2]);
1437 }
1438 }
1439
1440 if (avctx->debug & FF_DEBUG_PICT_INFO)
1441 av_log(s->avctx, AV_LOG_DEBUG,
1442 "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1443 av_get_picture_type_char(s->pict_type),
1444 s->halfpel_flag, s->thirdpel_flag,
1445 s->adaptive_quant, s->qscale, s->slice_num);
1446
1447 if (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B ||
1448 avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I ||
1449 avctx->skip_frame >= AVDISCARD_ALL)
1450 return 0;
1451
1452 if (s->next_p_frame_damaged) {
1453 if (s->pict_type == AV_PICTURE_TYPE_B)
1454 return 0;
1455 else
1456 s->next_p_frame_damaged = 0;
1457 }
1458
1459 if (s->pict_type == AV_PICTURE_TYPE_B) {
1460 s->frame_num_offset = s->slice_num - s->prev_frame_num;
1461
1462 if (s->frame_num_offset < 0)
1463 s->frame_num_offset += 256;
1464 if (s->frame_num_offset == 0 ||
1465 s->frame_num_offset >= s->prev_frame_num_offset) {
1466 av_log(s->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1467 return -1;
1468 }
1469 } else {
1470 s->prev_frame_num = s->frame_num;
1471 s->frame_num = s->slice_num;
1472 s->prev_frame_num_offset = s->frame_num - s->prev_frame_num;
1473
1474 if (s->prev_frame_num_offset < 0)
1475 s->prev_frame_num_offset += 256;
1476 }
1477
1478 for (m = 0; m < 2; m++) {
1479 int i;
1480 for (i = 0; i < 4; i++) {
1481 int j;
1482 for (j = -1; j < 4; j++)
1483 s->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1484 if (i < 3)
1485 s->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1486 }
1487 }
1488
1489 for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
1490 for (s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
1491 unsigned mb_type;
1492 s->mb_xy = s->mb_x + s->mb_y * s->mb_stride;
1493
1494 if ((get_bits_left(&s->gb_slice)) <= 7) {
1495 if (((get_bits_count(&s->gb_slice) & 7) == 0 ||
1496 show_bits(&s->gb_slice, get_bits_left(&s->gb_slice) & 7) == 0)) {
1497
1498 if (svq3_decode_slice_header(avctx))
1499 return -1;
1500 }
1501 /* TODO: support s->mb_skip_run */
1502 }
1503
1504 mb_type = svq3_get_ue_golomb(&s->gb_slice);
1505
1506 if (s->pict_type == AV_PICTURE_TYPE_I)
1507 mb_type += 8;
1508 else if (s->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1509 mb_type += 4;
1510 if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1511 av_log(s->avctx, AV_LOG_ERROR,
1512 "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1513 return -1;
1514 }
1515
1516 if (mb_type != 0)
1517 hl_decode_mb(s);
1518
1519 if (s->pict_type != AV_PICTURE_TYPE_B && !s->low_delay)
1520 s->cur_pic->mb_type[s->mb_x + s->mb_y * s->mb_stride] =
1521 (s->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1522 }
1523
1524 ff_draw_horiz_band(avctx, s->cur_pic->f,
1525 s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1526 16 * s->mb_y, 16, PICT_FRAME, 0,
1527 s->low_delay);
1528 }
1529
1530 if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay)
1531 ret = av_frame_ref(data, s->cur_pic->f);
1532 else if (s->last_pic->f->data[0])
1533 ret = av_frame_ref(data, s->last_pic->f);
1534 if (ret < 0)
1535 return ret;
1536
1537 /* Do not output the last pic after seeking. */
1538 if (s->last_pic->f->data[0] || s->low_delay)
1539 *got_frame = 1;
1540
1541 if (s->pict_type != AV_PICTURE_TYPE_B) {
1542 FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1543 } else {
1544 av_frame_unref(s->cur_pic->f);
1545 }
1546
1547 return buf_size;
1548 }
1549
1550 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1551 {
1552 SVQ3Context *s = avctx->priv_data;
1553
1554 free_picture(avctx, s->cur_pic);
1555 free_picture(avctx, s->next_pic);
1556 free_picture(avctx, s->last_pic);
1557 av_frame_free(&s->cur_pic->f);
1558 av_frame_free(&s->next_pic->f);
1559 av_frame_free(&s->last_pic->f);
1560 av_freep(&s->cur_pic);
1561 av_freep(&s->next_pic);
1562 av_freep(&s->last_pic);
1563 av_freep(&s->slice_buf);
1564 av_freep(&s->intra4x4_pred_mode);
1565 av_freep(&s->edge_emu_buffer);
1566 av_freep(&s->mb2br_xy);
1567
1568 return 0;
1569 }
1570
1571 AVCodec ff_svq3_decoder = {
1572 .name = "svq3",
1573 .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1574 .type = AVMEDIA_TYPE_VIDEO,
1575 .id = AV_CODEC_ID_SVQ3,
1576 .priv_data_size = sizeof(SVQ3Context),
1577 .init = svq3_decode_init,
1578 .close = svq3_decode_end,
1579 .decode = svq3_decode_frame,
1580 .capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1581 AV_CODEC_CAP_DR1 |
1582 AV_CODEC_CAP_DELAY,
1583 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1584 AV_PIX_FMT_NONE},
1585 };