c039e744f178d238e0f74820b4a08eadcc7e20a2
[libav.git] / libavcodec / svq3.c
1 /*
2 * Copyright (c) 2003 The Libav Project
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
31 *
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
37 *
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 * http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41 */
42
43 #include <inttypes.h>
44
45 #include "libavutil/attributes.h"
46 #include "internal.h"
47 #include "avcodec.h"
48 #include "mpegutils.h"
49 #include "h264.h"
50 #include "h264_mvpred.h"
51 #include "h264data.h"
52 #include "golomb.h"
53 #include "hpeldsp.h"
54 #include "mathops.h"
55 #include "rectangle.h"
56 #include "tpeldsp.h"
57
58 #if CONFIG_ZLIB
59 #include <zlib.h>
60 #endif
61
62 #include "svq1.h"
63
64 /**
65 * @file
66 * svq3 decoder.
67 */
68
69 typedef struct SVQ3Context {
70 H264Context h;
71
72 H264DSPContext h264dsp;
73 H264PredContext hpc;
74 HpelDSPContext hdsp;
75 TpelDSPContext tdsp;
76 VideoDSPContext vdsp;
77
78 H264Picture *cur_pic;
79 H264Picture *next_pic;
80 H264Picture *last_pic;
81 GetBitContext gb;
82 GetBitContext gb_slice;
83 uint8_t *slice_buf;
84 int slice_size;
85 int halfpel_flag;
86 int thirdpel_flag;
87 int unknown_flag;
88 uint32_t watermark_key;
89 int adaptive_quant;
90 int next_p_frame_damaged;
91 int h_edge_pos;
92 int v_edge_pos;
93 int last_frame_output;
94 int slice_num;
95 int qscale;
96 int cbp;
97 int frame_num;
98 int frame_num_offset;
99 int prev_frame_num_offset;
100 int prev_frame_num;
101
102 enum AVPictureType pict_type;
103
104 int mb_x, mb_y;
105 int mb_xy;
106 int mb_width, mb_height;
107 int mb_stride, mb_num;
108 int b_stride;
109
110 uint32_t *mb2br_xy;
111
112 int chroma_pred_mode;
113 int intra16x16_pred_mode;
114
115 int8_t intra4x4_pred_mode_cache[5 * 8];
116 int8_t (*intra4x4_pred_mode);
117
118 unsigned int top_samples_available;
119 unsigned int topright_samples_available;
120 unsigned int left_samples_available;
121
122 uint8_t *edge_emu_buffer;
123
124 DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
125 DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
126 DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2];
127 DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
128 DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
129 uint32_t dequant4_coeff[QP_MAX_NUM + 1][16];
130 } SVQ3Context;
131
132 #define FULLPEL_MODE 1
133 #define HALFPEL_MODE 2
134 #define THIRDPEL_MODE 3
135 #define PREDICT_MODE 4
136
137 /* dual scan (from some older h264 draft)
138 * o-->o-->o o
139 * | /|
140 * o o o / o
141 * | / | |/ |
142 * o o o o
143 * /
144 * o-->o-->o-->o
145 */
146 static const uint8_t svq3_scan[16] = {
147 0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
148 2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
149 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
150 0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
151 };
152
153 static const uint8_t luma_dc_zigzag_scan[16] = {
154 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
155 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
156 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
157 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
158 };
159
160 static const uint8_t svq3_pred_0[25][2] = {
161 { 0, 0 },
162 { 1, 0 }, { 0, 1 },
163 { 0, 2 }, { 1, 1 }, { 2, 0 },
164 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
165 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
166 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
167 { 2, 4 }, { 3, 3 }, { 4, 2 },
168 { 4, 3 }, { 3, 4 },
169 { 4, 4 }
170 };
171
172 static const int8_t svq3_pred_1[6][6][5] = {
173 { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
174 { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
175 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
176 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
177 { { 2, 0, -1, -1, -1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
178 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
179 { { 2, 0, -1, -1, -1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
180 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
181 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
182 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
183 { { 0, 2, -1, -1, -1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
184 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
185 };
186
187 static const struct {
188 uint8_t run;
189 uint8_t level;
190 } svq3_dct_tables[2][16] = {
191 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
192 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
193 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
194 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
195 };
196
197 static const uint32_t svq3_dequant_coeff[32] = {
198 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
199 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
200 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
201 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
202 };
203
204 static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
205 {
206 const int qmul = svq3_dequant_coeff[qp];
207 #define stride 16
208 int i;
209 int temp[16];
210 static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
211
212 for (i = 0; i < 4; i++) {
213 const int z0 = 13 * (input[4 * i + 0] + input[4 * i + 2]);
214 const int z1 = 13 * (input[4 * i + 0] - input[4 * i + 2]);
215 const int z2 = 7 * input[4 * i + 1] - 17 * input[4 * i + 3];
216 const int z3 = 17 * input[4 * i + 1] + 7 * input[4 * i + 3];
217
218 temp[4 * i + 0] = z0 + z3;
219 temp[4 * i + 1] = z1 + z2;
220 temp[4 * i + 2] = z1 - z2;
221 temp[4 * i + 3] = z0 - z3;
222 }
223
224 for (i = 0; i < 4; i++) {
225 const int offset = x_offset[i];
226 const int z0 = 13 * (temp[4 * 0 + i] + temp[4 * 2 + i]);
227 const int z1 = 13 * (temp[4 * 0 + i] - temp[4 * 2 + i]);
228 const int z2 = 7 * temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
229 const int z3 = 17 * temp[4 * 1 + i] + 7 * temp[4 * 3 + i];
230
231 output[stride * 0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
232 output[stride * 2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
233 output[stride * 8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
234 output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
235 }
236 }
237 #undef stride
238
239 static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
240 int stride, int qp, int dc)
241 {
242 const int qmul = svq3_dequant_coeff[qp];
243 int i;
244
245 if (dc) {
246 dc = 13 * 13 * (dc == 1 ? 1538 * block[0]
247 : qmul * (block[0] >> 3) / 2);
248 block[0] = 0;
249 }
250
251 for (i = 0; i < 4; i++) {
252 const int z0 = 13 * (block[0 + 4 * i] + block[2 + 4 * i]);
253 const int z1 = 13 * (block[0 + 4 * i] - block[2 + 4 * i]);
254 const int z2 = 7 * block[1 + 4 * i] - 17 * block[3 + 4 * i];
255 const int z3 = 17 * block[1 + 4 * i] + 7 * block[3 + 4 * i];
256
257 block[0 + 4 * i] = z0 + z3;
258 block[1 + 4 * i] = z1 + z2;
259 block[2 + 4 * i] = z1 - z2;
260 block[3 + 4 * i] = z0 - z3;
261 }
262
263 for (i = 0; i < 4; i++) {
264 const int z0 = 13 * (block[i + 4 * 0] + block[i + 4 * 2]);
265 const int z1 = 13 * (block[i + 4 * 0] - block[i + 4 * 2]);
266 const int z2 = 7 * block[i + 4 * 1] - 17 * block[i + 4 * 3];
267 const int z3 = 17 * block[i + 4 * 1] + 7 * block[i + 4 * 3];
268 const int rr = (dc + 0x80000);
269
270 dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
271 dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
272 dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
273 dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
274 }
275
276 memset(block, 0, 16 * sizeof(int16_t));
277 }
278
279 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
280 int index, const int type)
281 {
282 static const uint8_t *const scan_patterns[4] = {
283 luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
284 };
285
286 int run, level, limit;
287 unsigned vlc;
288 const int intra = 3 * type >> 2;
289 const uint8_t *const scan = scan_patterns[type];
290
291 for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
292 for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
293 int sign = (vlc & 1) ? 0 : -1;
294 vlc = vlc + 1 >> 1;
295
296 if (type == 3) {
297 if (vlc < 3) {
298 run = 0;
299 level = vlc;
300 } else if (vlc < 4) {
301 run = 1;
302 level = 1;
303 } else {
304 run = vlc & 0x3;
305 level = (vlc + 9 >> 2) - run;
306 }
307 } else {
308 if (vlc < 16) {
309 run = svq3_dct_tables[intra][vlc].run;
310 level = svq3_dct_tables[intra][vlc].level;
311 } else if (intra) {
312 run = vlc & 0x7;
313 level = (vlc >> 3) +
314 ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
315 } else {
316 run = vlc & 0xF;
317 level = (vlc >> 4) +
318 ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
319 }
320 }
321
322 if ((index += run) >= limit)
323 return -1;
324
325 block[scan[index]] = (level ^ sign) - sign;
326 }
327
328 if (type != 2) {
329 break;
330 }
331 }
332
333 return 0;
334 }
335
336 static av_always_inline int
337 svq3_fetch_diagonal_mv(const SVQ3Context *s, const int16_t **C,
338 int i, int list, int part_width)
339 {
340 const int topright_ref = s->ref_cache[list][i - 8 + part_width];
341
342 if (topright_ref != PART_NOT_AVAILABLE) {
343 *C = s->mv_cache[list][i - 8 + part_width];
344 return topright_ref;
345 } else {
346 *C = s->mv_cache[list][i - 8 - 1];
347 return s->ref_cache[list][i - 8 - 1];
348 }
349 }
350
351 /**
352 * Get the predicted MV.
353 * @param n the block index
354 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
355 * @param mx the x component of the predicted motion vector
356 * @param my the y component of the predicted motion vector
357 */
358 static av_always_inline void svq3_pred_motion(const SVQ3Context *s, int n,
359 int part_width, int list,
360 int ref, int *const mx, int *const my)
361 {
362 const int index8 = scan8[n];
363 const int top_ref = s->ref_cache[list][index8 - 8];
364 const int left_ref = s->ref_cache[list][index8 - 1];
365 const int16_t *const A = s->mv_cache[list][index8 - 1];
366 const int16_t *const B = s->mv_cache[list][index8 - 8];
367 const int16_t *C;
368 int diagonal_ref, match_count;
369
370 /* mv_cache
371 * B . . A T T T T
372 * U . . L . . , .
373 * U . . L . . . .
374 * U . . L . . , .
375 * . . . L . . . .
376 */
377
378 diagonal_ref = svq3_fetch_diagonal_mv(s, &C, index8, list, part_width);
379 match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
380 if (match_count > 1) { //most common
381 *mx = mid_pred(A[0], B[0], C[0]);
382 *my = mid_pred(A[1], B[1], C[1]);
383 } else if (match_count == 1) {
384 if (left_ref == ref) {
385 *mx = A[0];
386 *my = A[1];
387 } else if (top_ref == ref) {
388 *mx = B[0];
389 *my = B[1];
390 } else {
391 *mx = C[0];
392 *my = C[1];
393 }
394 } else {
395 if (top_ref == PART_NOT_AVAILABLE &&
396 diagonal_ref == PART_NOT_AVAILABLE &&
397 left_ref != PART_NOT_AVAILABLE) {
398 *mx = A[0];
399 *my = A[1];
400 } else {
401 *mx = mid_pred(A[0], B[0], C[0]);
402 *my = mid_pred(A[1], B[1], C[1]);
403 }
404 }
405 }
406
407 static inline void svq3_mc_dir_part(SVQ3Context *s,
408 int x, int y, int width, int height,
409 int mx, int my, int dxy,
410 int thirdpel, int dir, int avg)
411 {
412 H264Context *h = &s->h;
413 const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
414 uint8_t *src, *dest;
415 int i, emu = 0;
416 int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
417 int linesize = s->cur_pic->f->linesize[0];
418 int uvlinesize = s->cur_pic->f->linesize[1];
419
420 mx += x;
421 my += y;
422
423 if (mx < 0 || mx >= s->h_edge_pos - width - 1 ||
424 my < 0 || my >= s->v_edge_pos - height - 1) {
425 emu = 1;
426 mx = av_clip(mx, -16, s->h_edge_pos - width + 15);
427 my = av_clip(my, -16, s->v_edge_pos - height + 15);
428 }
429
430 /* form component predictions */
431 dest = s->cur_pic->f->data[0] + x + y * linesize;
432 src = pic->f->data[0] + mx + my * linesize;
433
434 if (emu) {
435 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
436 linesize, linesize,
437 width + 1, height + 1,
438 mx, my, s->h_edge_pos, s->v_edge_pos);
439 src = s->edge_emu_buffer;
440 }
441 if (thirdpel)
442 (avg ? s->tdsp.avg_tpel_pixels_tab
443 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, linesize,
444 width, height);
445 else
446 (avg ? s->hdsp.avg_pixels_tab
447 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, linesize,
448 height);
449
450 if (!(h->flags & AV_CODEC_FLAG_GRAY)) {
451 mx = mx + (mx < (int) x) >> 1;
452 my = my + (my < (int) y) >> 1;
453 width = width >> 1;
454 height = height >> 1;
455 blocksize++;
456
457 for (i = 1; i < 3; i++) {
458 dest = s->cur_pic->f->data[i] + (x >> 1) + (y >> 1) * uvlinesize;
459 src = pic->f->data[i] + mx + my * uvlinesize;
460
461 if (emu) {
462 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
463 uvlinesize, uvlinesize,
464 width + 1, height + 1,
465 mx, my, (s->h_edge_pos >> 1),
466 s->v_edge_pos >> 1);
467 src = s->edge_emu_buffer;
468 }
469 if (thirdpel)
470 (avg ? s->tdsp.avg_tpel_pixels_tab
471 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
472 uvlinesize,
473 width, height);
474 else
475 (avg ? s->hdsp.avg_pixels_tab
476 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
477 uvlinesize,
478 height);
479 }
480 }
481 }
482
483 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
484 int dir, int avg)
485 {
486 int i, j, k, mx, my, dx, dy, x, y;
487 H264Context *h = &s->h;
488 const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
489 const int part_height = 16 >> ((unsigned)(size + 1) / 3);
490 const int extra_width = (mode == PREDICT_MODE) ? -16 * 6 : 0;
491 const int h_edge_pos = 6 * (s->h_edge_pos - part_width) - extra_width;
492 const int v_edge_pos = 6 * (s->v_edge_pos - part_height) - extra_width;
493
494 for (i = 0; i < 16; i += part_height)
495 for (j = 0; j < 16; j += part_width) {
496 const int b_xy = (4 * s->mb_x + (j >> 2)) +
497 (4 * s->mb_y + (i >> 2)) * s->b_stride;
498 int dxy;
499 x = 16 * s->mb_x + j;
500 y = 16 * s->mb_y + i;
501 k = (j >> 2 & 1) + (i >> 1 & 2) +
502 (j >> 1 & 4) + (i & 8);
503
504 if (mode != PREDICT_MODE) {
505 svq3_pred_motion(s, k, part_width >> 2, dir, 1, &mx, &my);
506 } else {
507 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
508 my = s->next_pic->motion_val[0][b_xy][1] << 1;
509
510 if (dir == 0) {
511 mx = mx * s->frame_num_offset /
512 s->prev_frame_num_offset + 1 >> 1;
513 my = my * s->frame_num_offset /
514 s->prev_frame_num_offset + 1 >> 1;
515 } else {
516 mx = mx * (s->frame_num_offset - s->prev_frame_num_offset) /
517 s->prev_frame_num_offset + 1 >> 1;
518 my = my * (s->frame_num_offset - s->prev_frame_num_offset) /
519 s->prev_frame_num_offset + 1 >> 1;
520 }
521 }
522
523 /* clip motion vector prediction to frame border */
524 mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
525 my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
526
527 /* get (optional) motion vector differential */
528 if (mode == PREDICT_MODE) {
529 dx = dy = 0;
530 } else {
531 dy = svq3_get_se_golomb(&s->gb_slice);
532 dx = svq3_get_se_golomb(&s->gb_slice);
533
534 if (dx == INVALID_VLC || dy == INVALID_VLC) {
535 av_log(h->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
536 return -1;
537 }
538 }
539
540 /* compute motion vector */
541 if (mode == THIRDPEL_MODE) {
542 int fx, fy;
543 mx = (mx + 1 >> 1) + dx;
544 my = (my + 1 >> 1) + dy;
545 fx = (unsigned)(mx + 0x3000) / 3 - 0x1000;
546 fy = (unsigned)(my + 0x3000) / 3 - 0x1000;
547 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
548
549 svq3_mc_dir_part(s, x, y, part_width, part_height,
550 fx, fy, dxy, 1, dir, avg);
551 mx += mx;
552 my += my;
553 } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
554 mx = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
555 my = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
556 dxy = (mx & 1) + 2 * (my & 1);
557
558 svq3_mc_dir_part(s, x, y, part_width, part_height,
559 mx >> 1, my >> 1, dxy, 0, dir, avg);
560 mx *= 3;
561 my *= 3;
562 } else {
563 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
564 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
565
566 svq3_mc_dir_part(s, x, y, part_width, part_height,
567 mx, my, 0, 0, dir, avg);
568 mx *= 6;
569 my *= 6;
570 }
571
572 /* update mv_cache */
573 if (mode != PREDICT_MODE) {
574 int32_t mv = pack16to32(mx, my);
575
576 if (part_height == 8 && i < 8) {
577 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 * 8], mv);
578
579 if (part_width == 8 && j < 8)
580 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
581 }
582 if (part_width == 8 && j < 8)
583 AV_WN32A(s->mv_cache[dir][scan8[k] + 1], mv);
584 if (part_width == 4 || part_height == 4)
585 AV_WN32A(s->mv_cache[dir][scan8[k]], mv);
586 }
587
588 /* write back motion vectors */
589 fill_rectangle(s->cur_pic->motion_val[dir][b_xy],
590 part_width >> 2, part_height >> 2, s->b_stride,
591 pack16to32(mx, my), 4);
592 }
593
594 return 0;
595 }
596
597 static av_always_inline void hl_decode_mb_idct_luma(SVQ3Context *s,
598 int mb_type, const int *block_offset,
599 int linesize, uint8_t *dest_y)
600 {
601 int i;
602 if (!IS_INTRA4x4(mb_type)) {
603 for (i = 0; i < 16; i++)
604 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
605 uint8_t *const ptr = dest_y + block_offset[i];
606 svq3_add_idct_c(ptr, s->mb + i * 16, linesize,
607 s->qscale, IS_INTRA(mb_type) ? 1 : 0);
608 }
609 }
610 }
611
612 static av_always_inline int dctcoef_get(int16_t *mb, int index)
613 {
614 return AV_RN16A(mb + index);
615 }
616
617 static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
618 const H264Context *h,
619 int mb_type,
620 const int *block_offset,
621 int linesize,
622 uint8_t *dest_y)
623 {
624 int i;
625 int qscale = s->qscale;
626
627 if (IS_INTRA4x4(mb_type)) {
628 for (i = 0; i < 16; i++) {
629 uint8_t *const ptr = dest_y + block_offset[i];
630 const int dir = s->intra4x4_pred_mode_cache[scan8[i]];
631
632 uint8_t *topright;
633 int nnz, tr;
634 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
635 const int topright_avail = (s->topright_samples_available << i) & 0x8000;
636 assert(s->mb_y || linesize <= block_offset[i]);
637 if (!topright_avail) {
638 tr = ptr[3 - linesize] * 0x01010101u;
639 topright = (uint8_t *)&tr;
640 } else
641 topright = ptr + 4 - linesize;
642 } else
643 topright = NULL;
644
645 s->hpc.pred4x4[dir](ptr, topright, linesize);
646 nnz = s->non_zero_count_cache[scan8[i]];
647 if (nnz) {
648 svq3_add_idct_c(ptr, s->mb + i * 16, linesize, qscale, 0);
649 }
650 }
651 } else {
652 s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
653 svq3_luma_dc_dequant_idct_c(s->mb, s->mb_luma_dc[0], qscale);
654 }
655 }
656
657 static void hl_decode_mb(SVQ3Context *s, const H264Context *h)
658 {
659 const int mb_x = s->mb_x;
660 const int mb_y = s->mb_y;
661 const int mb_xy = s->mb_xy;
662 const int mb_type = s->cur_pic->mb_type[mb_xy];
663 uint8_t *dest_y, *dest_cb, *dest_cr;
664 int linesize, uvlinesize;
665 int i, j;
666 const int *block_offset = &h->block_offset[0];
667 const int block_h = 16 >> h->chroma_y_shift;
668
669 linesize = s->cur_pic->f->linesize[0];
670 uvlinesize = s->cur_pic->f->linesize[1];
671
672 dest_y = s->cur_pic->f->data[0] + (mb_x + mb_y * linesize) * 16;
673 dest_cb = s->cur_pic->f->data[1] + mb_x * 8 + mb_y * uvlinesize * block_h;
674 dest_cr = s->cur_pic->f->data[2] + mb_x * 8 + mb_y * uvlinesize * block_h;
675
676 s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * linesize + 64, linesize, 4);
677 s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * uvlinesize + 64, dest_cr - dest_cb, 2);
678
679 if (IS_INTRA(mb_type)) {
680 s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
681 s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
682
683 hl_decode_mb_predict_luma(s, h, mb_type, block_offset, linesize, dest_y);
684 }
685
686 hl_decode_mb_idct_luma(s, mb_type, block_offset, linesize, dest_y);
687
688 if (s->cbp & 0x30) {
689 uint8_t *dest[2] = { dest_cb, dest_cr };
690 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
691 s->dequant4_coeff[4][0]);
692 s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
693 s->dequant4_coeff[4][0]);
694 for (j = 1; j < 3; j++) {
695 for (i = j * 16; i < j * 16 + 4; i++)
696 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
697 uint8_t *const ptr = dest[j - 1] + block_offset[i];
698 svq3_add_idct_c(ptr, s->mb + i * 16,
699 uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
700 }
701 }
702 }
703 }
704
705 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
706 {
707 H264Context *h = &s->h;
708 int i, j, k, m, dir, mode;
709 int cbp = 0;
710 uint32_t vlc;
711 int8_t *top, *left;
712 const int mb_xy = s->mb_xy;
713 const int b_xy = 4 * s->mb_x + 4 * s->mb_y * s->b_stride;
714
715 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
716 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
717 s->topright_samples_available = 0xFFFF;
718
719 if (mb_type == 0) { /* SKIP */
720 if (s->pict_type == AV_PICTURE_TYPE_P ||
721 s->next_pic->mb_type[mb_xy] == -1) {
722 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
723 0, 0, 0, 0, 0, 0);
724
725 if (s->pict_type == AV_PICTURE_TYPE_B)
726 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
727 0, 0, 0, 0, 1, 1);
728
729 mb_type = MB_TYPE_SKIP;
730 } else {
731 mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
732 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
733 return -1;
734 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
735 return -1;
736
737 mb_type = MB_TYPE_16x16;
738 }
739 } else if (mb_type < 8) { /* INTER */
740 if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&s->gb_slice))
741 mode = THIRDPEL_MODE;
742 else if (s->halfpel_flag &&
743 s->thirdpel_flag == !get_bits1(&s->gb_slice))
744 mode = HALFPEL_MODE;
745 else
746 mode = FULLPEL_MODE;
747
748 /* fill caches */
749 /* note ref_cache should contain here:
750 * ????????
751 * ???11111
752 * N??11111
753 * N??11111
754 * N??11111
755 */
756
757 for (m = 0; m < 2; m++) {
758 if (s->mb_x > 0 && s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6] != -1) {
759 for (i = 0; i < 4; i++)
760 AV_COPY32(s->mv_cache[m][scan8[0] - 1 + i * 8],
761 s->cur_pic->motion_val[m][b_xy - 1 + i * s->b_stride]);
762 } else {
763 for (i = 0; i < 4; i++)
764 AV_ZERO32(s->mv_cache[m][scan8[0] - 1 + i * 8]);
765 }
766 if (s->mb_y > 0) {
767 memcpy(s->mv_cache[m][scan8[0] - 1 * 8],
768 s->cur_pic->motion_val[m][b_xy - s->b_stride],
769 4 * 2 * sizeof(int16_t));
770 memset(&s->ref_cache[m][scan8[0] - 1 * 8],
771 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
772
773 if (s->mb_x < s->mb_width - 1) {
774 AV_COPY32(s->mv_cache[m][scan8[0] + 4 - 1 * 8],
775 s->cur_pic->motion_val[m][b_xy - s->b_stride + 4]);
776 s->ref_cache[m][scan8[0] + 4 - 1 * 8] =
777 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride + 1] + 6] == -1 ||
778 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
779 } else
780 s->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
781 if (s->mb_x > 0) {
782 AV_COPY32(s->mv_cache[m][scan8[0] - 1 - 1 * 8],
783 s->cur_pic->motion_val[m][b_xy - s->b_stride - 1]);
784 s->ref_cache[m][scan8[0] - 1 - 1 * 8] =
785 (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
786 } else
787 s->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
788 } else
789 memset(&s->ref_cache[m][scan8[0] - 1 * 8 - 1],
790 PART_NOT_AVAILABLE, 8);
791
792 if (s->pict_type != AV_PICTURE_TYPE_B)
793 break;
794 }
795
796 /* decode motion vector(s) and form prediction(s) */
797 if (s->pict_type == AV_PICTURE_TYPE_P) {
798 if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
799 return -1;
800 } else { /* AV_PICTURE_TYPE_B */
801 if (mb_type != 2) {
802 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
803 return -1;
804 } else {
805 for (i = 0; i < 4; i++)
806 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
807 0, 4 * 2 * sizeof(int16_t));
808 }
809 if (mb_type != 1) {
810 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
811 return -1;
812 } else {
813 for (i = 0; i < 4; i++)
814 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
815 0, 4 * 2 * sizeof(int16_t));
816 }
817 }
818
819 mb_type = MB_TYPE_16x16;
820 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
821 int8_t *i4x4 = s->intra4x4_pred_mode + s->mb2br_xy[s->mb_xy];
822 int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
823
824 memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
825
826 if (mb_type == 8) {
827 if (s->mb_x > 0) {
828 for (i = 0; i < 4; i++)
829 s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6 - i];
830 if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
831 s->left_samples_available = 0x5F5F;
832 }
833 if (s->mb_y > 0) {
834 s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 0];
835 s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 1];
836 s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 2];
837 s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 3];
838
839 if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
840 s->top_samples_available = 0x33FF;
841 }
842
843 /* decode prediction codes for luma blocks */
844 for (i = 0; i < 16; i += 2) {
845 vlc = svq3_get_ue_golomb(&s->gb_slice);
846
847 if (vlc >= 25) {
848 av_log(h->avctx, AV_LOG_ERROR,
849 "luma prediction:%"PRIu32"\n", vlc);
850 return -1;
851 }
852
853 left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
854 top = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
855
856 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
857 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
858
859 if (left[1] == -1 || left[2] == -1) {
860 av_log(h->avctx, AV_LOG_ERROR, "weird prediction\n");
861 return -1;
862 }
863 }
864 } else { /* mb_type == 33, DC_128_PRED block type */
865 for (i = 0; i < 4; i++)
866 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
867 }
868
869 AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
870 i4x4[4] = i4x4_cache[7 + 8 * 3];
871 i4x4[5] = i4x4_cache[7 + 8 * 2];
872 i4x4[6] = i4x4_cache[7 + 8 * 1];
873
874 if (mb_type == 8) {
875 ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
876 h->avctx, s->top_samples_available,
877 s->left_samples_available);
878
879 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
880 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
881 } else {
882 for (i = 0; i < 4; i++)
883 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
884
885 s->top_samples_available = 0x33FF;
886 s->left_samples_available = 0x5F5F;
887 }
888
889 mb_type = MB_TYPE_INTRA4x4;
890 } else { /* INTRA16x16 */
891 dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
892 dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
893
894 if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
895 s->left_samples_available, dir, 0)) < 0) {
896 av_log(h->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
897 return s->intra16x16_pred_mode;
898 }
899
900 cbp = ff_h264_i_mb_type_info[mb_type - 8].cbp;
901 mb_type = MB_TYPE_INTRA16x16;
902 }
903
904 if (!IS_INTER(mb_type) && s->pict_type != AV_PICTURE_TYPE_I) {
905 for (i = 0; i < 4; i++)
906 memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
907 0, 4 * 2 * sizeof(int16_t));
908 if (s->pict_type == AV_PICTURE_TYPE_B) {
909 for (i = 0; i < 4; i++)
910 memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
911 0, 4 * 2 * sizeof(int16_t));
912 }
913 }
914 if (!IS_INTRA4x4(mb_type)) {
915 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy], DC_PRED, 8);
916 }
917 if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
918 memset(s->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
919 }
920
921 if (!IS_INTRA16x16(mb_type) &&
922 (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
923 if ((vlc = svq3_get_ue_golomb(&s->gb_slice)) >= 48) {
924 av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
925 return -1;
926 }
927
928 cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
929 : ff_h264_golomb_to_inter_cbp[vlc];
930 }
931 if (IS_INTRA16x16(mb_type) ||
932 (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
933 s->qscale += svq3_get_se_golomb(&s->gb_slice);
934
935 if (s->qscale > 31u) {
936 av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
937 return -1;
938 }
939 }
940 if (IS_INTRA16x16(mb_type)) {
941 AV_ZERO128(s->mb_luma_dc[0] + 0);
942 AV_ZERO128(s->mb_luma_dc[0] + 8);
943 if (svq3_decode_block(&s->gb_slice, s->mb_luma_dc[0], 0, 1)) {
944 av_log(h->avctx, AV_LOG_ERROR,
945 "error while decoding intra luma dc\n");
946 return -1;
947 }
948 }
949
950 if (cbp) {
951 const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
952 const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
953
954 for (i = 0; i < 4; i++)
955 if ((cbp & (1 << i))) {
956 for (j = 0; j < 4; j++) {
957 k = index ? (1 * (j & 1) + 2 * (i & 1) +
958 2 * (j & 2) + 4 * (i & 2))
959 : (4 * i + j);
960 s->non_zero_count_cache[scan8[k]] = 1;
961
962 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * k], index, type)) {
963 av_log(h->avctx, AV_LOG_ERROR,
964 "error while decoding block\n");
965 return -1;
966 }
967 }
968 }
969
970 if ((cbp & 0x30)) {
971 for (i = 1; i < 3; ++i)
972 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * 16 * i], 0, 3)) {
973 av_log(h->avctx, AV_LOG_ERROR,
974 "error while decoding chroma dc block\n");
975 return -1;
976 }
977
978 if ((cbp & 0x20)) {
979 for (i = 1; i < 3; i++) {
980 for (j = 0; j < 4; j++) {
981 k = 16 * i + j;
982 s->non_zero_count_cache[scan8[k]] = 1;
983
984 if (svq3_decode_block(&s->gb_slice, &s->mb[16 * k], 1, 1)) {
985 av_log(h->avctx, AV_LOG_ERROR,
986 "error while decoding chroma ac block\n");
987 return -1;
988 }
989 }
990 }
991 }
992 }
993 }
994
995 s->cbp = cbp;
996 s->cur_pic->mb_type[mb_xy] = mb_type;
997
998 if (IS_INTRA(mb_type))
999 s->chroma_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
1000 s->left_samples_available, DC_PRED8x8, 1);
1001
1002 return 0;
1003 }
1004
1005 static int svq3_decode_slice_header(AVCodecContext *avctx)
1006 {
1007 SVQ3Context *s = avctx->priv_data;
1008 H264Context *h = &s->h;
1009 const int mb_xy = s->mb_xy;
1010 int i, header;
1011 unsigned slice_id;
1012
1013 header = get_bits(&s->gb, 8);
1014
1015 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
1016 /* TODO: what? */
1017 av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
1018 return -1;
1019 } else {
1020 int slice_bits, slice_bytes, slice_length;
1021 int length = header >> 5 & 3;
1022
1023 slice_length = show_bits(&s->gb, 8 * length);
1024 slice_bits = slice_length * 8;
1025 slice_bytes = slice_length + length - 1;
1026
1027 if (slice_bytes > get_bits_left(&s->gb)) {
1028 av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
1029 return -1;
1030 }
1031
1032 skip_bits(&s->gb, 8);
1033
1034 av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
1035 if (!s->slice_buf)
1036 return AVERROR(ENOMEM);
1037
1038 memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
1039
1040 init_get_bits(&s->gb_slice, s->slice_buf, slice_bits);
1041
1042 if (s->watermark_key) {
1043 uint32_t header = AV_RL32(&s->gb_slice.buffer[1]);
1044 AV_WL32(&s->gb_slice.buffer[1], header ^ s->watermark_key);
1045 }
1046 if (length > 0) {
1047 memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
1048 }
1049 skip_bits_long(&s->gb, slice_bytes * 8);
1050 }
1051
1052 if ((slice_id = svq3_get_ue_golomb(&s->gb_slice)) >= 3) {
1053 av_log(h->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
1054 return -1;
1055 }
1056
1057 s->pict_type = ff_h264_golomb_to_pict_type[slice_id];
1058
1059 if ((header & 0x9F) == 2) {
1060 i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
1061 get_bits(&s->gb_slice, i);
1062 } else {
1063 skip_bits1(&s->gb_slice);
1064 }
1065
1066 s->slice_num = get_bits(&s->gb_slice, 8);
1067 s->qscale = get_bits(&s->gb_slice, 5);
1068 s->adaptive_quant = get_bits1(&s->gb_slice);
1069
1070 /* unknown fields */
1071 skip_bits1(&s->gb_slice);
1072
1073 if (s->unknown_flag)
1074 skip_bits1(&s->gb_slice);
1075
1076 skip_bits1(&s->gb_slice);
1077 skip_bits(&s->gb_slice, 2);
1078
1079 while (get_bits1(&s->gb_slice))
1080 skip_bits(&s->gb_slice, 8);
1081
1082 /* reset intra predictors and invalidate motion vector references */
1083 if (s->mb_x > 0) {
1084 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - 1] + 3,
1085 -1, 4 * sizeof(int8_t));
1086 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_x],
1087 -1, 8 * sizeof(int8_t) * s->mb_x);
1088 }
1089 if (s->mb_y > 0) {
1090 memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_stride],
1091 -1, 8 * sizeof(int8_t) * (s->mb_width - s->mb_x));
1092
1093 if (s->mb_x > 0)
1094 s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] = -1;
1095 }
1096
1097 return 0;
1098 }
1099
1100 static void init_dequant4_coeff_table(SVQ3Context *s)
1101 {
1102 int q, x;
1103 const int max_qp = 51;
1104
1105 for (q = 0; q < max_qp + 1; q++) {
1106 int shift = ff_h264_quant_div6[q] + 2;
1107 int idx = ff_h264_quant_rem6[q];
1108 for (x = 0; x < 16; x++)
1109 s->dequant4_coeff[q][(x >> 2) | ((x << 2) & 0xF)] =
1110 ((uint32_t)ff_h264_dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * 16) << shift;
1111 }
1112 }
1113
1114 static av_cold int svq3_decode_init(AVCodecContext *avctx)
1115 {
1116 SVQ3Context *s = avctx->priv_data;
1117 H264Context *h = &s->h;
1118 int m, x, y;
1119 unsigned char *extradata;
1120 unsigned char *extradata_end;
1121 unsigned int size;
1122 int marker_found = 0;
1123
1124 s->cur_pic = av_mallocz(sizeof(*s->cur_pic));
1125 s->last_pic = av_mallocz(sizeof(*s->last_pic));
1126 s->next_pic = av_mallocz(sizeof(*s->next_pic));
1127 if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1128 av_freep(&s->cur_pic);
1129 av_freep(&s->last_pic);
1130 av_freep(&s->next_pic);
1131 return AVERROR(ENOMEM);
1132 }
1133
1134 s->cur_pic->f = av_frame_alloc();
1135 s->last_pic->f = av_frame_alloc();
1136 s->next_pic->f = av_frame_alloc();
1137 if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1138 return AVERROR(ENOMEM);
1139
1140 if (ff_h264_decode_init(avctx) < 0)
1141 return -1;
1142
1143 // we will overwrite it later during decoding
1144 av_frame_free(&h->cur_pic.f);
1145
1146 ff_h264dsp_init(&s->h264dsp, 8, 1);
1147 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1148 ff_videodsp_init(&s->vdsp, 8);
1149
1150 h->sps.bit_depth_luma = 8;
1151 h->chroma_format_idc = 1;
1152
1153 ff_hpeldsp_init(&s->hdsp, avctx->flags);
1154 ff_tpeldsp_init(&s->tdsp);
1155
1156 h->flags = avctx->flags;
1157 h->picture_structure = PICT_FRAME;
1158 avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
1159 avctx->color_range = AVCOL_RANGE_JPEG;
1160
1161 h->chroma_x_shift = h->chroma_y_shift = 1;
1162
1163 s->halfpel_flag = 1;
1164 s->thirdpel_flag = 1;
1165 s->unknown_flag = 0;
1166
1167 /* prowl for the "SEQH" marker in the extradata */
1168 extradata = (unsigned char *)avctx->extradata;
1169 extradata_end = avctx->extradata + avctx->extradata_size;
1170 if (extradata) {
1171 for (m = 0; m + 8 < avctx->extradata_size; m++) {
1172 if (!memcmp(extradata, "SEQH", 4)) {
1173 marker_found = 1;
1174 break;
1175 }
1176 extradata++;
1177 }
1178 }
1179
1180 /* if a match was found, parse the extra data */
1181 if (marker_found) {
1182 GetBitContext gb;
1183 int frame_size_code;
1184
1185 size = AV_RB32(&extradata[4]);
1186 if (size > extradata_end - extradata - 8)
1187 return AVERROR_INVALIDDATA;
1188 init_get_bits(&gb, extradata + 8, size * 8);
1189
1190 /* 'frame size code' and optional 'width, height' */
1191 frame_size_code = get_bits(&gb, 3);
1192 switch (frame_size_code) {
1193 case 0:
1194 avctx->width = 160;
1195 avctx->height = 120;
1196 break;
1197 case 1:
1198 avctx->width = 128;
1199 avctx->height = 96;
1200 break;
1201 case 2:
1202 avctx->width = 176;
1203 avctx->height = 144;
1204 break;
1205 case 3:
1206 avctx->width = 352;
1207 avctx->height = 288;
1208 break;
1209 case 4:
1210 avctx->width = 704;
1211 avctx->height = 576;
1212 break;
1213 case 5:
1214 avctx->width = 240;
1215 avctx->height = 180;
1216 break;
1217 case 6:
1218 avctx->width = 320;
1219 avctx->height = 240;
1220 break;
1221 case 7:
1222 avctx->width = get_bits(&gb, 12);
1223 avctx->height = get_bits(&gb, 12);
1224 break;
1225 }
1226
1227 s->halfpel_flag = get_bits1(&gb);
1228 s->thirdpel_flag = get_bits1(&gb);
1229
1230 /* unknown fields */
1231 skip_bits1(&gb);
1232 skip_bits1(&gb);
1233 skip_bits1(&gb);
1234 skip_bits1(&gb);
1235
1236 h->low_delay = get_bits1(&gb);
1237
1238 /* unknown field */
1239 skip_bits1(&gb);
1240
1241 while (get_bits1(&gb))
1242 skip_bits(&gb, 8);
1243
1244 s->unknown_flag = get_bits1(&gb);
1245 avctx->has_b_frames = !h->low_delay;
1246 if (s->unknown_flag) {
1247 #if CONFIG_ZLIB
1248 unsigned watermark_width = svq3_get_ue_golomb(&gb);
1249 unsigned watermark_height = svq3_get_ue_golomb(&gb);
1250 int u1 = svq3_get_ue_golomb(&gb);
1251 int u2 = get_bits(&gb, 8);
1252 int u3 = get_bits(&gb, 2);
1253 int u4 = svq3_get_ue_golomb(&gb);
1254 unsigned long buf_len = watermark_width *
1255 watermark_height * 4;
1256 int offset = get_bits_count(&gb) + 7 >> 3;
1257 uint8_t *buf;
1258
1259 if (watermark_height > 0 &&
1260 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1261 return -1;
1262
1263 buf = av_malloc(buf_len);
1264 av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1265 watermark_width, watermark_height);
1266 av_log(avctx, AV_LOG_DEBUG,
1267 "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1268 u1, u2, u3, u4, offset);
1269 if (uncompress(buf, &buf_len, extradata + 8 + offset,
1270 size - offset) != Z_OK) {
1271 av_log(avctx, AV_LOG_ERROR,
1272 "could not uncompress watermark logo\n");
1273 av_free(buf);
1274 return -1;
1275 }
1276 s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1277 s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1278 av_log(avctx, AV_LOG_DEBUG,
1279 "watermark key %#"PRIx32"\n", s->watermark_key);
1280 av_free(buf);
1281 #else
1282 av_log(avctx, AV_LOG_ERROR,
1283 "this svq3 file contains watermark which need zlib support compiled in\n");
1284 return -1;
1285 #endif
1286 }
1287 }
1288
1289 s->mb_width = (avctx->width + 15) / 16;
1290 s->mb_height = (avctx->height + 15) / 16;
1291 s->mb_stride = s->mb_width + 1;
1292 s->mb_num = s->mb_width * s->mb_height;
1293 s->b_stride = 4 * s->mb_width;
1294 s->h_edge_pos = s->mb_width * 16;
1295 s->v_edge_pos = s->mb_height * 16;
1296
1297 s->intra4x4_pred_mode = av_mallocz(s->mb_stride * 2 * 8);
1298 if (!s->intra4x4_pred_mode)
1299 return AVERROR(ENOMEM);
1300
1301 s->mb2br_xy = av_mallocz(s->mb_stride * (s->mb_height + 1) *
1302 sizeof(*s->mb2br_xy));
1303 if (!s->mb2br_xy)
1304 return AVERROR(ENOMEM);
1305
1306 for (y = 0; y < s->mb_height; y++)
1307 for (x = 0; x < s->mb_width; x++) {
1308 const int mb_xy = x + y * s->mb_stride;
1309
1310 s->mb2br_xy[mb_xy] = 8 * (mb_xy % (2 * s->mb_stride));
1311 }
1312
1313 init_dequant4_coeff_table(s);
1314
1315 return 0;
1316 }
1317
1318 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1319 {
1320 int i;
1321 for (i = 0; i < 2; i++) {
1322 av_buffer_unref(&pic->motion_val_buf[i]);
1323 av_buffer_unref(&pic->ref_index_buf[i]);
1324 }
1325 av_buffer_unref(&pic->mb_type_buf);
1326
1327 av_frame_unref(pic->f);
1328 }
1329
1330 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1331 {
1332 SVQ3Context *s = avctx->priv_data;
1333 const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1;
1334 const int mb_array_size = s->mb_stride * s->mb_height;
1335 const int b4_stride = s->mb_width * 4 + 1;
1336 const int b4_array_size = b4_stride * s->mb_height * 4;
1337 int ret;
1338
1339 if (!pic->motion_val_buf[0]) {
1340 int i;
1341
1342 pic->mb_type_buf = av_buffer_allocz((big_mb_num + s->mb_stride) * sizeof(uint32_t));
1343 if (!pic->mb_type_buf)
1344 return AVERROR(ENOMEM);
1345 pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * s->mb_stride + 1;
1346
1347 for (i = 0; i < 2; i++) {
1348 pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1349 pic->ref_index_buf[i] = av_buffer_allocz(4 * mb_array_size);
1350 if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1351 ret = AVERROR(ENOMEM);
1352 goto fail;
1353 }
1354
1355 pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1356 pic->ref_index[i] = pic->ref_index_buf[i]->data;
1357 }
1358 }
1359 pic->reference = !(s->pict_type == AV_PICTURE_TYPE_B);
1360
1361 ret = ff_get_buffer(avctx, pic->f,
1362 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1363 if (ret < 0)
1364 goto fail;
1365
1366 if (!s->edge_emu_buffer) {
1367 s->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1368 if (!s->edge_emu_buffer)
1369 return AVERROR(ENOMEM);
1370 }
1371
1372 return 0;
1373 fail:
1374 free_picture(avctx, pic);
1375 return ret;
1376 }
1377
1378 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1379 int *got_frame, AVPacket *avpkt)
1380 {
1381 const uint8_t *buf = avpkt->data;
1382 SVQ3Context *s = avctx->priv_data;
1383 H264Context *h = &s->h;
1384 int buf_size = avpkt->size;
1385 int ret, m, i;
1386
1387 /* special case for last picture */
1388 if (buf_size == 0) {
1389 if (s->next_pic->f->data[0] && !h->low_delay && !s->last_frame_output) {
1390 ret = av_frame_ref(data, s->next_pic->f);
1391 if (ret < 0)
1392 return ret;
1393 s->last_frame_output = 1;
1394 *got_frame = 1;
1395 }
1396 return 0;
1397 }
1398
1399 ret = init_get_bits(&s->gb, buf, 8 * buf_size);
1400 if (ret < 0)
1401 return ret;
1402
1403 s->mb_x = s->mb_y = s->mb_xy = 0;
1404
1405 if (svq3_decode_slice_header(avctx))
1406 return -1;
1407
1408 if (s->pict_type != AV_PICTURE_TYPE_B)
1409 FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1410
1411 av_frame_unref(s->cur_pic->f);
1412
1413 /* for skipping the frame */
1414 s->cur_pic->f->pict_type = s->pict_type;
1415 s->cur_pic->f->key_frame = (s->pict_type == AV_PICTURE_TYPE_I);
1416
1417 ret = get_buffer(avctx, s->cur_pic);
1418 if (ret < 0)
1419 return ret;
1420
1421 for (i = 0; i < 16; i++) {
1422 h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1423 h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1424 }
1425 for (i = 0; i < 16; i++) {
1426 h->block_offset[16 + i] =
1427 h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1428 h->block_offset[48 + 16 + i] =
1429 h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1430 }
1431
1432 if (s->pict_type != AV_PICTURE_TYPE_I) {
1433 if (!s->last_pic->f->data[0]) {
1434 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1435 ret = get_buffer(avctx, s->last_pic);
1436 if (ret < 0)
1437 return ret;
1438 memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1439 memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1440 s->last_pic->f->linesize[1]);
1441 memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1442 s->last_pic->f->linesize[2]);
1443 }
1444
1445 if (s->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1446 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1447 ret = get_buffer(avctx, s->next_pic);
1448 if (ret < 0)
1449 return ret;
1450 memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1451 memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1452 s->next_pic->f->linesize[1]);
1453 memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1454 s->next_pic->f->linesize[2]);
1455 }
1456 }
1457
1458 if (avctx->debug & FF_DEBUG_PICT_INFO)
1459 av_log(h->avctx, AV_LOG_DEBUG,
1460 "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1461 av_get_picture_type_char(s->pict_type),
1462 s->halfpel_flag, s->thirdpel_flag,
1463 s->adaptive_quant, s->qscale, s->slice_num);
1464
1465 if (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B ||
1466 avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I ||
1467 avctx->skip_frame >= AVDISCARD_ALL)
1468 return 0;
1469
1470 if (s->next_p_frame_damaged) {
1471 if (s->pict_type == AV_PICTURE_TYPE_B)
1472 return 0;
1473 else
1474 s->next_p_frame_damaged = 0;
1475 }
1476
1477 if (s->pict_type == AV_PICTURE_TYPE_B) {
1478 s->frame_num_offset = s->slice_num - s->prev_frame_num;
1479
1480 if (s->frame_num_offset < 0)
1481 s->frame_num_offset += 256;
1482 if (s->frame_num_offset == 0 ||
1483 s->frame_num_offset >= s->prev_frame_num_offset) {
1484 av_log(h->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1485 return -1;
1486 }
1487 } else {
1488 s->prev_frame_num = s->frame_num;
1489 s->frame_num = s->slice_num;
1490 s->prev_frame_num_offset = s->frame_num - s->prev_frame_num;
1491
1492 if (s->prev_frame_num_offset < 0)
1493 s->prev_frame_num_offset += 256;
1494 }
1495
1496 for (m = 0; m < 2; m++) {
1497 int i;
1498 for (i = 0; i < 4; i++) {
1499 int j;
1500 for (j = -1; j < 4; j++)
1501 s->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1502 if (i < 3)
1503 s->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1504 }
1505 }
1506
1507 for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
1508 for (s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
1509 unsigned mb_type;
1510 s->mb_xy = s->mb_x + s->mb_y * s->mb_stride;
1511
1512 if ((get_bits_left(&s->gb_slice)) <= 7) {
1513 if (((get_bits_count(&s->gb_slice) & 7) == 0 ||
1514 show_bits(&s->gb_slice, get_bits_left(&s->gb_slice) & 7) == 0)) {
1515
1516 if (svq3_decode_slice_header(avctx))
1517 return -1;
1518 }
1519 /* TODO: support s->mb_skip_run */
1520 }
1521
1522 mb_type = svq3_get_ue_golomb(&s->gb_slice);
1523
1524 if (s->pict_type == AV_PICTURE_TYPE_I)
1525 mb_type += 8;
1526 else if (s->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1527 mb_type += 4;
1528 if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1529 av_log(h->avctx, AV_LOG_ERROR,
1530 "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1531 return -1;
1532 }
1533
1534 if (mb_type != 0)
1535 hl_decode_mb(s, h);
1536
1537 if (s->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
1538 s->cur_pic->mb_type[s->mb_x + s->mb_y * s->mb_stride] =
1539 (s->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1540 }
1541
1542 ff_draw_horiz_band(avctx, s->cur_pic->f,
1543 s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1544 16 * s->mb_y, 16, h->picture_structure, 0,
1545 h->low_delay);
1546 }
1547
1548 if (s->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
1549 ret = av_frame_ref(data, s->cur_pic->f);
1550 else if (s->last_pic->f->data[0])
1551 ret = av_frame_ref(data, s->last_pic->f);
1552 if (ret < 0)
1553 return ret;
1554
1555 /* Do not output the last pic after seeking. */
1556 if (s->last_pic->f->data[0] || h->low_delay)
1557 *got_frame = 1;
1558
1559 if (s->pict_type != AV_PICTURE_TYPE_B) {
1560 FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1561 } else {
1562 av_frame_unref(s->cur_pic->f);
1563 }
1564
1565 return buf_size;
1566 }
1567
1568 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1569 {
1570 SVQ3Context *s = avctx->priv_data;
1571 H264Context *h = &s->h;
1572
1573 free_picture(avctx, s->cur_pic);
1574 free_picture(avctx, s->next_pic);
1575 free_picture(avctx, s->last_pic);
1576 av_frame_free(&s->cur_pic->f);
1577 av_frame_free(&s->next_pic->f);
1578 av_frame_free(&s->last_pic->f);
1579 av_freep(&s->cur_pic);
1580 av_freep(&s->next_pic);
1581 av_freep(&s->last_pic);
1582 av_freep(&s->slice_buf);
1583 av_freep(&s->intra4x4_pred_mode);
1584 av_freep(&s->edge_emu_buffer);
1585 av_freep(&s->mb2br_xy);
1586
1587 ff_h264_free_context(h);
1588
1589 return 0;
1590 }
1591
1592 AVCodec ff_svq3_decoder = {
1593 .name = "svq3",
1594 .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1595 .type = AVMEDIA_TYPE_VIDEO,
1596 .id = AV_CODEC_ID_SVQ3,
1597 .priv_data_size = sizeof(SVQ3Context),
1598 .init = svq3_decode_init,
1599 .close = svq3_decode_end,
1600 .decode = svq3_decode_frame,
1601 .capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1602 AV_CODEC_CAP_DR1 |
1603 AV_CODEC_CAP_DELAY,
1604 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1605 AV_PIX_FMT_NONE},
1606 };