svq3: move {ref,mv}_cache to the SVQ3Context
[libav.git] / libavcodec / svq3.c
1 /*
2 * Copyright (c) 2003 The Libav Project
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
31 *
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
37 *
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 * http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41 */
42
43 #include <inttypes.h>
44
45 #include "libavutil/attributes.h"
46 #include "internal.h"
47 #include "avcodec.h"
48 #include "mpegutils.h"
49 #include "h264.h"
50 #include "h264_mvpred.h"
51 #include "h264data.h"
52 #include "golomb.h"
53 #include "hpeldsp.h"
54 #include "mathops.h"
55 #include "rectangle.h"
56 #include "tpeldsp.h"
57
58 #if CONFIG_ZLIB
59 #include <zlib.h>
60 #endif
61
62 #include "svq1.h"
63
64 /**
65 * @file
66 * svq3 decoder.
67 */
68
69 typedef struct SVQ3Context {
70 H264Context h;
71
72 H264DSPContext h264dsp;
73 H264PredContext hpc;
74 HpelDSPContext hdsp;
75 TpelDSPContext tdsp;
76 VideoDSPContext vdsp;
77
78 H264Picture *cur_pic;
79 H264Picture *next_pic;
80 H264Picture *last_pic;
81 GetBitContext gb;
82 uint8_t *slice_buf;
83 int slice_size;
84 int halfpel_flag;
85 int thirdpel_flag;
86 int unknown_flag;
87 uint32_t watermark_key;
88 int adaptive_quant;
89 int next_p_frame_damaged;
90 int h_edge_pos;
91 int v_edge_pos;
92 int last_frame_output;
93
94 int mb_x, mb_y;
95 int mb_xy;
96
97 int chroma_pred_mode;
98 int intra16x16_pred_mode;
99
100 int8_t intra4x4_pred_mode_cache[5 * 8];
101 int8_t (*intra4x4_pred_mode);
102
103 unsigned int top_samples_available;
104 unsigned int topright_samples_available;
105 unsigned int left_samples_available;
106
107 uint8_t *edge_emu_buffer;
108
109 DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
110 DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
111 } SVQ3Context;
112
113 #define FULLPEL_MODE 1
114 #define HALFPEL_MODE 2
115 #define THIRDPEL_MODE 3
116 #define PREDICT_MODE 4
117
118 /* dual scan (from some older h264 draft)
119 * o-->o-->o o
120 * | /|
121 * o o o / o
122 * | / | |/ |
123 * o o o o
124 * /
125 * o-->o-->o-->o
126 */
127 static const uint8_t svq3_scan[16] = {
128 0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
129 2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
130 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
131 0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
132 };
133
134 static const uint8_t luma_dc_zigzag_scan[16] = {
135 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
136 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
137 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
138 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
139 };
140
141 static const uint8_t svq3_pred_0[25][2] = {
142 { 0, 0 },
143 { 1, 0 }, { 0, 1 },
144 { 0, 2 }, { 1, 1 }, { 2, 0 },
145 { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
146 { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
147 { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
148 { 2, 4 }, { 3, 3 }, { 4, 2 },
149 { 4, 3 }, { 3, 4 },
150 { 4, 4 }
151 };
152
153 static const int8_t svq3_pred_1[6][6][5] = {
154 { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
155 { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
156 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
157 { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
158 { { 2, 0, -1, -1, -1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
159 { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
160 { { 2, 0, -1, -1, -1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
161 { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
162 { { 0, 2, -1, -1, -1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
163 { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
164 { { 0, 2, -1, -1, -1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
165 { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
166 };
167
168 static const struct {
169 uint8_t run;
170 uint8_t level;
171 } svq3_dct_tables[2][16] = {
172 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
173 { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
174 { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
175 { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
176 };
177
178 static const uint32_t svq3_dequant_coeff[32] = {
179 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
180 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
181 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
182 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
183 };
184
185 static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
186 {
187 const int qmul = svq3_dequant_coeff[qp];
188 #define stride 16
189 int i;
190 int temp[16];
191 static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
192
193 for (i = 0; i < 4; i++) {
194 const int z0 = 13 * (input[4 * i + 0] + input[4 * i + 2]);
195 const int z1 = 13 * (input[4 * i + 0] - input[4 * i + 2]);
196 const int z2 = 7 * input[4 * i + 1] - 17 * input[4 * i + 3];
197 const int z3 = 17 * input[4 * i + 1] + 7 * input[4 * i + 3];
198
199 temp[4 * i + 0] = z0 + z3;
200 temp[4 * i + 1] = z1 + z2;
201 temp[4 * i + 2] = z1 - z2;
202 temp[4 * i + 3] = z0 - z3;
203 }
204
205 for (i = 0; i < 4; i++) {
206 const int offset = x_offset[i];
207 const int z0 = 13 * (temp[4 * 0 + i] + temp[4 * 2 + i]);
208 const int z1 = 13 * (temp[4 * 0 + i] - temp[4 * 2 + i]);
209 const int z2 = 7 * temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
210 const int z3 = 17 * temp[4 * 1 + i] + 7 * temp[4 * 3 + i];
211
212 output[stride * 0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
213 output[stride * 2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
214 output[stride * 8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
215 output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
216 }
217 }
218 #undef stride
219
220 static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
221 int stride, int qp, int dc)
222 {
223 const int qmul = svq3_dequant_coeff[qp];
224 int i;
225
226 if (dc) {
227 dc = 13 * 13 * (dc == 1 ? 1538 * block[0]
228 : qmul * (block[0] >> 3) / 2);
229 block[0] = 0;
230 }
231
232 for (i = 0; i < 4; i++) {
233 const int z0 = 13 * (block[0 + 4 * i] + block[2 + 4 * i]);
234 const int z1 = 13 * (block[0 + 4 * i] - block[2 + 4 * i]);
235 const int z2 = 7 * block[1 + 4 * i] - 17 * block[3 + 4 * i];
236 const int z3 = 17 * block[1 + 4 * i] + 7 * block[3 + 4 * i];
237
238 block[0 + 4 * i] = z0 + z3;
239 block[1 + 4 * i] = z1 + z2;
240 block[2 + 4 * i] = z1 - z2;
241 block[3 + 4 * i] = z0 - z3;
242 }
243
244 for (i = 0; i < 4; i++) {
245 const int z0 = 13 * (block[i + 4 * 0] + block[i + 4 * 2]);
246 const int z1 = 13 * (block[i + 4 * 0] - block[i + 4 * 2]);
247 const int z2 = 7 * block[i + 4 * 1] - 17 * block[i + 4 * 3];
248 const int z3 = 17 * block[i + 4 * 1] + 7 * block[i + 4 * 3];
249 const int rr = (dc + 0x80000);
250
251 dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
252 dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
253 dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
254 dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
255 }
256
257 memset(block, 0, 16 * sizeof(int16_t));
258 }
259
260 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
261 int index, const int type)
262 {
263 static const uint8_t *const scan_patterns[4] = {
264 luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
265 };
266
267 int run, level, limit;
268 unsigned vlc;
269 const int intra = 3 * type >> 2;
270 const uint8_t *const scan = scan_patterns[type];
271
272 for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
273 for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
274 int sign = (vlc & 1) ? 0 : -1;
275 vlc = vlc + 1 >> 1;
276
277 if (type == 3) {
278 if (vlc < 3) {
279 run = 0;
280 level = vlc;
281 } else if (vlc < 4) {
282 run = 1;
283 level = 1;
284 } else {
285 run = vlc & 0x3;
286 level = (vlc + 9 >> 2) - run;
287 }
288 } else {
289 if (vlc < 16) {
290 run = svq3_dct_tables[intra][vlc].run;
291 level = svq3_dct_tables[intra][vlc].level;
292 } else if (intra) {
293 run = vlc & 0x7;
294 level = (vlc >> 3) +
295 ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
296 } else {
297 run = vlc & 0xF;
298 level = (vlc >> 4) +
299 ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
300 }
301 }
302
303 if ((index += run) >= limit)
304 return -1;
305
306 block[scan[index]] = (level ^ sign) - sign;
307 }
308
309 if (type != 2) {
310 break;
311 }
312 }
313
314 return 0;
315 }
316
317 static av_always_inline int
318 svq3_fetch_diagonal_mv(const SVQ3Context *s, const int16_t **C,
319 int i, int list, int part_width)
320 {
321 const int topright_ref = s->ref_cache[list][i - 8 + part_width];
322
323 if (topright_ref != PART_NOT_AVAILABLE) {
324 *C = s->mv_cache[list][i - 8 + part_width];
325 return topright_ref;
326 } else {
327 *C = s->mv_cache[list][i - 8 - 1];
328 return s->ref_cache[list][i - 8 - 1];
329 }
330 }
331
332 /**
333 * Get the predicted MV.
334 * @param n the block index
335 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
336 * @param mx the x component of the predicted motion vector
337 * @param my the y component of the predicted motion vector
338 */
339 static av_always_inline void svq3_pred_motion(const SVQ3Context *s, int n,
340 int part_width, int list,
341 int ref, int *const mx, int *const my)
342 {
343 const int index8 = scan8[n];
344 const int top_ref = s->ref_cache[list][index8 - 8];
345 const int left_ref = s->ref_cache[list][index8 - 1];
346 const int16_t *const A = s->mv_cache[list][index8 - 1];
347 const int16_t *const B = s->mv_cache[list][index8 - 8];
348 const int16_t *C;
349 int diagonal_ref, match_count;
350
351 /* mv_cache
352 * B . . A T T T T
353 * U . . L . . , .
354 * U . . L . . . .
355 * U . . L . . , .
356 * . . . L . . . .
357 */
358
359 diagonal_ref = svq3_fetch_diagonal_mv(s, &C, index8, list, part_width);
360 match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
361 if (match_count > 1) { //most common
362 *mx = mid_pred(A[0], B[0], C[0]);
363 *my = mid_pred(A[1], B[1], C[1]);
364 } else if (match_count == 1) {
365 if (left_ref == ref) {
366 *mx = A[0];
367 *my = A[1];
368 } else if (top_ref == ref) {
369 *mx = B[0];
370 *my = B[1];
371 } else {
372 *mx = C[0];
373 *my = C[1];
374 }
375 } else {
376 if (top_ref == PART_NOT_AVAILABLE &&
377 diagonal_ref == PART_NOT_AVAILABLE &&
378 left_ref != PART_NOT_AVAILABLE) {
379 *mx = A[0];
380 *my = A[1];
381 } else {
382 *mx = mid_pred(A[0], B[0], C[0]);
383 *my = mid_pred(A[1], B[1], C[1]);
384 }
385 }
386 }
387
388 static inline void svq3_mc_dir_part(SVQ3Context *s,
389 int x, int y, int width, int height,
390 int mx, int my, int dxy,
391 int thirdpel, int dir, int avg)
392 {
393 H264Context *h = &s->h;
394 H264SliceContext *sl = &h->slice_ctx[0];
395 const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
396 uint8_t *src, *dest;
397 int i, emu = 0;
398 int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
399
400 mx += x;
401 my += y;
402
403 if (mx < 0 || mx >= s->h_edge_pos - width - 1 ||
404 my < 0 || my >= s->v_edge_pos - height - 1) {
405 emu = 1;
406 mx = av_clip(mx, -16, s->h_edge_pos - width + 15);
407 my = av_clip(my, -16, s->v_edge_pos - height + 15);
408 }
409
410 /* form component predictions */
411 dest = h->cur_pic.f->data[0] + x + y * sl->linesize;
412 src = pic->f->data[0] + mx + my * sl->linesize;
413
414 if (emu) {
415 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
416 sl->linesize, sl->linesize,
417 width + 1, height + 1,
418 mx, my, s->h_edge_pos, s->v_edge_pos);
419 src = s->edge_emu_buffer;
420 }
421 if (thirdpel)
422 (avg ? s->tdsp.avg_tpel_pixels_tab
423 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, sl->linesize,
424 width, height);
425 else
426 (avg ? s->hdsp.avg_pixels_tab
427 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, sl->linesize,
428 height);
429
430 if (!(h->flags & AV_CODEC_FLAG_GRAY)) {
431 mx = mx + (mx < (int) x) >> 1;
432 my = my + (my < (int) y) >> 1;
433 width = width >> 1;
434 height = height >> 1;
435 blocksize++;
436
437 for (i = 1; i < 3; i++) {
438 dest = h->cur_pic.f->data[i] + (x >> 1) + (y >> 1) * sl->uvlinesize;
439 src = pic->f->data[i] + mx + my * sl->uvlinesize;
440
441 if (emu) {
442 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
443 sl->uvlinesize, sl->uvlinesize,
444 width + 1, height + 1,
445 mx, my, (s->h_edge_pos >> 1),
446 s->v_edge_pos >> 1);
447 src = s->edge_emu_buffer;
448 }
449 if (thirdpel)
450 (avg ? s->tdsp.avg_tpel_pixels_tab
451 : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
452 sl->uvlinesize,
453 width, height);
454 else
455 (avg ? s->hdsp.avg_pixels_tab
456 : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
457 sl->uvlinesize,
458 height);
459 }
460 }
461 }
462
463 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
464 int dir, int avg)
465 {
466 int i, j, k, mx, my, dx, dy, x, y;
467 H264Context *h = &s->h;
468 const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
469 const int part_height = 16 >> ((unsigned)(size + 1) / 3);
470 const int extra_width = (mode == PREDICT_MODE) ? -16 * 6 : 0;
471 const int h_edge_pos = 6 * (s->h_edge_pos - part_width) - extra_width;
472 const int v_edge_pos = 6 * (s->v_edge_pos - part_height) - extra_width;
473
474 for (i = 0; i < 16; i += part_height)
475 for (j = 0; j < 16; j += part_width) {
476 const int b_xy = (4 * s->mb_x + (j >> 2)) +
477 (4 * s->mb_y + (i >> 2)) * h->b_stride;
478 int dxy;
479 x = 16 * s->mb_x + j;
480 y = 16 * s->mb_y + i;
481 k = (j >> 2 & 1) + (i >> 1 & 2) +
482 (j >> 1 & 4) + (i & 8);
483
484 if (mode != PREDICT_MODE) {
485 svq3_pred_motion(s, k, part_width >> 2, dir, 1, &mx, &my);
486 } else {
487 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
488 my = s->next_pic->motion_val[0][b_xy][1] << 1;
489
490 if (dir == 0) {
491 mx = mx * h->frame_num_offset /
492 h->prev_frame_num_offset + 1 >> 1;
493 my = my * h->frame_num_offset /
494 h->prev_frame_num_offset + 1 >> 1;
495 } else {
496 mx = mx * (h->frame_num_offset - h->prev_frame_num_offset) /
497 h->prev_frame_num_offset + 1 >> 1;
498 my = my * (h->frame_num_offset - h->prev_frame_num_offset) /
499 h->prev_frame_num_offset + 1 >> 1;
500 }
501 }
502
503 /* clip motion vector prediction to frame border */
504 mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
505 my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
506
507 /* get (optional) motion vector differential */
508 if (mode == PREDICT_MODE) {
509 dx = dy = 0;
510 } else {
511 dy = svq3_get_se_golomb(&h->gb);
512 dx = svq3_get_se_golomb(&h->gb);
513
514 if (dx == INVALID_VLC || dy == INVALID_VLC) {
515 av_log(h->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
516 return -1;
517 }
518 }
519
520 /* compute motion vector */
521 if (mode == THIRDPEL_MODE) {
522 int fx, fy;
523 mx = (mx + 1 >> 1) + dx;
524 my = (my + 1 >> 1) + dy;
525 fx = (unsigned)(mx + 0x3000) / 3 - 0x1000;
526 fy = (unsigned)(my + 0x3000) / 3 - 0x1000;
527 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
528
529 svq3_mc_dir_part(s, x, y, part_width, part_height,
530 fx, fy, dxy, 1, dir, avg);
531 mx += mx;
532 my += my;
533 } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
534 mx = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
535 my = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
536 dxy = (mx & 1) + 2 * (my & 1);
537
538 svq3_mc_dir_part(s, x, y, part_width, part_height,
539 mx >> 1, my >> 1, dxy, 0, dir, avg);
540 mx *= 3;
541 my *= 3;
542 } else {
543 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
544 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
545
546 svq3_mc_dir_part(s, x, y, part_width, part_height,
547 mx, my, 0, 0, dir, avg);
548 mx *= 6;
549 my *= 6;
550 }
551
552 /* update mv_cache */
553 if (mode != PREDICT_MODE) {
554 int32_t mv = pack16to32(mx, my);
555
556 if (part_height == 8 && i < 8) {
557 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 * 8], mv);
558
559 if (part_width == 8 && j < 8)
560 AV_WN32A(s->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
561 }
562 if (part_width == 8 && j < 8)
563 AV_WN32A(s->mv_cache[dir][scan8[k] + 1], mv);
564 if (part_width == 4 || part_height == 4)
565 AV_WN32A(s->mv_cache[dir][scan8[k]], mv);
566 }
567
568 /* write back motion vectors */
569 fill_rectangle(h->cur_pic.motion_val[dir][b_xy],
570 part_width >> 2, part_height >> 2, h->b_stride,
571 pack16to32(mx, my), 4);
572 }
573
574 return 0;
575 }
576
577 static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl,
578 int mb_type, const int *block_offset,
579 int linesize, uint8_t *dest_y)
580 {
581 int i;
582 if (!IS_INTRA4x4(mb_type)) {
583 for (i = 0; i < 16; i++)
584 if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
585 uint8_t *const ptr = dest_y + block_offset[i];
586 svq3_add_idct_c(ptr, sl->mb + i * 16, linesize,
587 sl->qscale, IS_INTRA(mb_type) ? 1 : 0);
588 }
589 }
590 }
591
592 static av_always_inline int dctcoef_get(int16_t *mb, int index)
593 {
594 return AV_RN16A(mb + index);
595 }
596
597 static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
598 const H264Context *h,
599 H264SliceContext *sl,
600 int mb_type,
601 const int *block_offset,
602 int linesize,
603 uint8_t *dest_y)
604 {
605 int i;
606 int qscale = sl->qscale;
607
608 if (IS_INTRA4x4(mb_type)) {
609 for (i = 0; i < 16; i++) {
610 uint8_t *const ptr = dest_y + block_offset[i];
611 const int dir = s->intra4x4_pred_mode_cache[scan8[i]];
612
613 uint8_t *topright;
614 int nnz, tr;
615 if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
616 const int topright_avail = (s->topright_samples_available << i) & 0x8000;
617 assert(s->mb_y || linesize <= block_offset[i]);
618 if (!topright_avail) {
619 tr = ptr[3 - linesize] * 0x01010101u;
620 topright = (uint8_t *)&tr;
621 } else
622 topright = ptr + 4 - linesize;
623 } else
624 topright = NULL;
625
626 s->hpc.pred4x4[dir](ptr, topright, linesize);
627 nnz = sl->non_zero_count_cache[scan8[i]];
628 if (nnz) {
629 svq3_add_idct_c(ptr, sl->mb + i * 16, linesize, qscale, 0);
630 }
631 }
632 } else {
633 s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
634 svq3_luma_dc_dequant_idct_c(sl->mb, sl->mb_luma_dc[0], qscale);
635 }
636 }
637
638 static void hl_decode_mb(SVQ3Context *s, const H264Context *h, H264SliceContext *sl)
639 {
640 const int mb_x = s->mb_x;
641 const int mb_y = s->mb_y;
642 const int mb_xy = s->mb_xy;
643 const int mb_type = h->cur_pic.mb_type[mb_xy];
644 uint8_t *dest_y, *dest_cb, *dest_cr;
645 int linesize, uvlinesize;
646 int i, j;
647 const int *block_offset = &h->block_offset[0];
648 const int block_h = 16 >> h->chroma_y_shift;
649
650 dest_y = h->cur_pic.f->data[0] + (mb_x + mb_y * sl->linesize) * 16;
651 dest_cb = h->cur_pic.f->data[1] + mb_x * 8 + mb_y * sl->uvlinesize * block_h;
652 dest_cr = h->cur_pic.f->data[2] + mb_x * 8 + mb_y * sl->uvlinesize * block_h;
653
654 s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * sl->linesize + 64, sl->linesize, 4);
655 s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * sl->uvlinesize + 64, dest_cr - dest_cb, 2);
656
657 h->list_counts[mb_xy] = sl->list_count;
658
659 linesize = sl->mb_linesize = sl->linesize;
660 uvlinesize = sl->mb_uvlinesize = sl->uvlinesize;
661
662 if (IS_INTRA(mb_type)) {
663 s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
664 s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
665
666 hl_decode_mb_predict_luma(s, h, sl, mb_type, block_offset, linesize, dest_y);
667 }
668
669 hl_decode_mb_idct_luma(h, sl, mb_type, block_offset, linesize, dest_y);
670
671 if (sl->cbp & 0x30) {
672 uint8_t *dest[2] = { dest_cb, dest_cr };
673 s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 1,
674 h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][sl->chroma_qp[0]][0]);
675 s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 2,
676 h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][sl->chroma_qp[1]][0]);
677 for (j = 1; j < 3; j++) {
678 for (i = j * 16; i < j * 16 + 4; i++)
679 if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
680 uint8_t *const ptr = dest[j - 1] + block_offset[i];
681 svq3_add_idct_c(ptr, sl->mb + i * 16,
682 uvlinesize, ff_h264_chroma_qp[0][sl->qscale + 12] - 12, 2);
683 }
684 }
685 }
686 }
687
688 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
689 {
690 H264Context *h = &s->h;
691 H264SliceContext *sl = &h->slice_ctx[0];
692 int i, j, k, m, dir, mode;
693 int cbp = 0;
694 uint32_t vlc;
695 int8_t *top, *left;
696 const int mb_xy = s->mb_xy;
697 const int b_xy = 4 * s->mb_x + 4 * s->mb_y * h->b_stride;
698
699 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
700 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
701 s->topright_samples_available = 0xFFFF;
702
703 if (mb_type == 0) { /* SKIP */
704 if (h->pict_type == AV_PICTURE_TYPE_P ||
705 s->next_pic->mb_type[mb_xy] == -1) {
706 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
707 0, 0, 0, 0, 0, 0);
708
709 if (h->pict_type == AV_PICTURE_TYPE_B)
710 svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
711 0, 0, 0, 0, 1, 1);
712
713 mb_type = MB_TYPE_SKIP;
714 } else {
715 mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
716 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
717 return -1;
718 if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
719 return -1;
720
721 mb_type = MB_TYPE_16x16;
722 }
723 } else if (mb_type < 8) { /* INTER */
724 if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&h->gb))
725 mode = THIRDPEL_MODE;
726 else if (s->halfpel_flag &&
727 s->thirdpel_flag == !get_bits1(&h->gb))
728 mode = HALFPEL_MODE;
729 else
730 mode = FULLPEL_MODE;
731
732 /* fill caches */
733 /* note ref_cache should contain here:
734 * ????????
735 * ???11111
736 * N??11111
737 * N??11111
738 * N??11111
739 */
740
741 for (m = 0; m < 2; m++) {
742 if (s->mb_x > 0 && s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6] != -1) {
743 for (i = 0; i < 4; i++)
744 AV_COPY32(s->mv_cache[m][scan8[0] - 1 + i * 8],
745 h->cur_pic.motion_val[m][b_xy - 1 + i * h->b_stride]);
746 } else {
747 for (i = 0; i < 4; i++)
748 AV_ZERO32(s->mv_cache[m][scan8[0] - 1 + i * 8]);
749 }
750 if (s->mb_y > 0) {
751 memcpy(s->mv_cache[m][scan8[0] - 1 * 8],
752 h->cur_pic.motion_val[m][b_xy - h->b_stride],
753 4 * 2 * sizeof(int16_t));
754 memset(&s->ref_cache[m][scan8[0] - 1 * 8],
755 (s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
756
757 if (s->mb_x < h->mb_width - 1) {
758 AV_COPY32(s->mv_cache[m][scan8[0] + 4 - 1 * 8],
759 h->cur_pic.motion_val[m][b_xy - h->b_stride + 4]);
760 s->ref_cache[m][scan8[0] + 4 - 1 * 8] =
761 (s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride + 1] + 6] == -1 ||
762 s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
763 } else
764 s->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
765 if (s->mb_x > 0) {
766 AV_COPY32(s->mv_cache[m][scan8[0] - 1 - 1 * 8],
767 h->cur_pic.motion_val[m][b_xy - h->b_stride - 1]);
768 s->ref_cache[m][scan8[0] - 1 - 1 * 8] =
769 (s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
770 } else
771 s->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
772 } else
773 memset(&s->ref_cache[m][scan8[0] - 1 * 8 - 1],
774 PART_NOT_AVAILABLE, 8);
775
776 if (h->pict_type != AV_PICTURE_TYPE_B)
777 break;
778 }
779
780 /* decode motion vector(s) and form prediction(s) */
781 if (h->pict_type == AV_PICTURE_TYPE_P) {
782 if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
783 return -1;
784 } else { /* AV_PICTURE_TYPE_B */
785 if (mb_type != 2) {
786 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
787 return -1;
788 } else {
789 for (i = 0; i < 4; i++)
790 memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
791 0, 4 * 2 * sizeof(int16_t));
792 }
793 if (mb_type != 1) {
794 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
795 return -1;
796 } else {
797 for (i = 0; i < 4; i++)
798 memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
799 0, 4 * 2 * sizeof(int16_t));
800 }
801 }
802
803 mb_type = MB_TYPE_16x16;
804 } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
805 int8_t *i4x4 = s->intra4x4_pred_mode + h->mb2br_xy[s->mb_xy];
806 int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
807
808 memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
809
810 if (mb_type == 8) {
811 if (s->mb_x > 0) {
812 for (i = 0; i < 4; i++)
813 s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6 - i];
814 if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
815 s->left_samples_available = 0x5F5F;
816 }
817 if (s->mb_y > 0) {
818 s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 0];
819 s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 1];
820 s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 2];
821 s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 3];
822
823 if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
824 s->top_samples_available = 0x33FF;
825 }
826
827 /* decode prediction codes for luma blocks */
828 for (i = 0; i < 16; i += 2) {
829 vlc = svq3_get_ue_golomb(&h->gb);
830
831 if (vlc >= 25) {
832 av_log(h->avctx, AV_LOG_ERROR,
833 "luma prediction:%"PRIu32"\n", vlc);
834 return -1;
835 }
836
837 left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
838 top = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
839
840 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
841 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
842
843 if (left[1] == -1 || left[2] == -1) {
844 av_log(h->avctx, AV_LOG_ERROR, "weird prediction\n");
845 return -1;
846 }
847 }
848 } else { /* mb_type == 33, DC_128_PRED block type */
849 for (i = 0; i < 4; i++)
850 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
851 }
852
853 AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
854 i4x4[4] = i4x4_cache[7 + 8 * 3];
855 i4x4[5] = i4x4_cache[7 + 8 * 2];
856 i4x4[6] = i4x4_cache[7 + 8 * 1];
857
858 if (mb_type == 8) {
859 ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
860 h->avctx, s->top_samples_available,
861 s->left_samples_available);
862
863 s->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
864 s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
865 } else {
866 for (i = 0; i < 4; i++)
867 memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
868
869 s->top_samples_available = 0x33FF;
870 s->left_samples_available = 0x5F5F;
871 }
872
873 mb_type = MB_TYPE_INTRA4x4;
874 } else { /* INTRA16x16 */
875 dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
876 dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
877
878 if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
879 s->left_samples_available, dir, 0)) < 0) {
880 av_log(h->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
881 return s->intra16x16_pred_mode;
882 }
883
884 cbp = ff_h264_i_mb_type_info[mb_type - 8].cbp;
885 mb_type = MB_TYPE_INTRA16x16;
886 }
887
888 if (!IS_INTER(mb_type) && h->pict_type != AV_PICTURE_TYPE_I) {
889 for (i = 0; i < 4; i++)
890 memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
891 0, 4 * 2 * sizeof(int16_t));
892 if (h->pict_type == AV_PICTURE_TYPE_B) {
893 for (i = 0; i < 4; i++)
894 memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
895 0, 4 * 2 * sizeof(int16_t));
896 }
897 }
898 if (!IS_INTRA4x4(mb_type)) {
899 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy], DC_PRED, 8);
900 }
901 if (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B) {
902 memset(sl->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
903 }
904
905 if (!IS_INTRA16x16(mb_type) &&
906 (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B)) {
907 if ((vlc = svq3_get_ue_golomb(&h->gb)) >= 48) {
908 av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
909 return -1;
910 }
911
912 cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
913 : ff_h264_golomb_to_inter_cbp[vlc];
914 }
915 if (IS_INTRA16x16(mb_type) ||
916 (h->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
917 sl->qscale += svq3_get_se_golomb(&h->gb);
918
919 if (sl->qscale > 31u) {
920 av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", sl->qscale);
921 return -1;
922 }
923 }
924 if (IS_INTRA16x16(mb_type)) {
925 AV_ZERO128(sl->mb_luma_dc[0] + 0);
926 AV_ZERO128(sl->mb_luma_dc[0] + 8);
927 if (svq3_decode_block(&h->gb, sl->mb_luma_dc[0], 0, 1)) {
928 av_log(h->avctx, AV_LOG_ERROR,
929 "error while decoding intra luma dc\n");
930 return -1;
931 }
932 }
933
934 if (cbp) {
935 const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
936 const int type = ((sl->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
937
938 for (i = 0; i < 4; i++)
939 if ((cbp & (1 << i))) {
940 for (j = 0; j < 4; j++) {
941 k = index ? (1 * (j & 1) + 2 * (i & 1) +
942 2 * (j & 2) + 4 * (i & 2))
943 : (4 * i + j);
944 sl->non_zero_count_cache[scan8[k]] = 1;
945
946 if (svq3_decode_block(&h->gb, &sl->mb[16 * k], index, type)) {
947 av_log(h->avctx, AV_LOG_ERROR,
948 "error while decoding block\n");
949 return -1;
950 }
951 }
952 }
953
954 if ((cbp & 0x30)) {
955 for (i = 1; i < 3; ++i)
956 if (svq3_decode_block(&h->gb, &sl->mb[16 * 16 * i], 0, 3)) {
957 av_log(h->avctx, AV_LOG_ERROR,
958 "error while decoding chroma dc block\n");
959 return -1;
960 }
961
962 if ((cbp & 0x20)) {
963 for (i = 1; i < 3; i++) {
964 for (j = 0; j < 4; j++) {
965 k = 16 * i + j;
966 sl->non_zero_count_cache[scan8[k]] = 1;
967
968 if (svq3_decode_block(&h->gb, &sl->mb[16 * k], 1, 1)) {
969 av_log(h->avctx, AV_LOG_ERROR,
970 "error while decoding chroma ac block\n");
971 return -1;
972 }
973 }
974 }
975 }
976 }
977 }
978
979 sl->cbp = cbp;
980 h->cur_pic.mb_type[mb_xy] = mb_type;
981
982 if (IS_INTRA(mb_type))
983 s->chroma_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, s->top_samples_available,
984 s->left_samples_available, DC_PRED8x8, 1);
985
986 return 0;
987 }
988
989 static int svq3_decode_slice_header(AVCodecContext *avctx)
990 {
991 SVQ3Context *s = avctx->priv_data;
992 H264Context *h = &s->h;
993 H264SliceContext *sl = &h->slice_ctx[0];
994 const int mb_xy = s->mb_xy;
995 int i, header;
996 unsigned slice_id;
997
998 header = get_bits(&s->gb, 8);
999
1000 if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
1001 /* TODO: what? */
1002 av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
1003 return -1;
1004 } else {
1005 int slice_bits, slice_bytes, slice_length;
1006 int length = header >> 5 & 3;
1007
1008 slice_length = show_bits(&s->gb, 8 * length);
1009 slice_bits = slice_length * 8;
1010 slice_bytes = slice_length + length - 1;
1011
1012 if (slice_bytes > get_bits_left(&s->gb)) {
1013 av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
1014 return -1;
1015 }
1016
1017 skip_bits(&s->gb, 8);
1018
1019 av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
1020 if (!s->slice_buf)
1021 return AVERROR(ENOMEM);
1022
1023 memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
1024
1025 init_get_bits(&h->gb, s->slice_buf, slice_bits);
1026
1027 if (s->watermark_key) {
1028 uint32_t header = AV_RL32(&h->gb.buffer[1]);
1029 AV_WL32(&h->gb.buffer[1], header ^ s->watermark_key);
1030 }
1031 if (length > 0) {
1032 memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
1033 }
1034 skip_bits_long(&s->gb, slice_bytes * 8);
1035 }
1036
1037 if ((slice_id = svq3_get_ue_golomb(&h->gb)) >= 3) {
1038 av_log(h->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
1039 return -1;
1040 }
1041
1042 sl->slice_type = ff_h264_golomb_to_pict_type[slice_id];
1043
1044 if ((header & 0x9F) == 2) {
1045 i = (h->mb_num < 64) ? 6 : (1 + av_log2(h->mb_num - 1));
1046 sl->mb_skip_run = get_bits(&h->gb, i) -
1047 (s->mb_y * h->mb_width + s->mb_x);
1048 } else {
1049 skip_bits1(&h->gb);
1050 sl->mb_skip_run = 0;
1051 }
1052
1053 sl->slice_num = get_bits(&h->gb, 8);
1054 sl->qscale = get_bits(&h->gb, 5);
1055 s->adaptive_quant = get_bits1(&h->gb);
1056
1057 /* unknown fields */
1058 skip_bits1(&h->gb);
1059
1060 if (s->unknown_flag)
1061 skip_bits1(&h->gb);
1062
1063 skip_bits1(&h->gb);
1064 skip_bits(&h->gb, 2);
1065
1066 while (get_bits1(&h->gb))
1067 skip_bits(&h->gb, 8);
1068
1069 /* reset intra predictors and invalidate motion vector references */
1070 if (s->mb_x > 0) {
1071 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy - 1] + 3,
1072 -1, 4 * sizeof(int8_t));
1073 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy - s->mb_x],
1074 -1, 8 * sizeof(int8_t) * s->mb_x);
1075 }
1076 if (s->mb_y > 0) {
1077 memset(s->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_stride],
1078 -1, 8 * sizeof(int8_t) * (h->mb_width - s->mb_x));
1079
1080 if (s->mb_x > 0)
1081 s->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] = -1;
1082 }
1083
1084 return 0;
1085 }
1086
1087 static av_cold int svq3_decode_init(AVCodecContext *avctx)
1088 {
1089 SVQ3Context *s = avctx->priv_data;
1090 H264Context *h = &s->h;
1091 H264SliceContext *sl;
1092 int m;
1093 unsigned char *extradata;
1094 unsigned char *extradata_end;
1095 unsigned int size;
1096 int marker_found = 0;
1097
1098 s->cur_pic = av_mallocz(sizeof(*s->cur_pic));
1099 s->last_pic = av_mallocz(sizeof(*s->last_pic));
1100 s->next_pic = av_mallocz(sizeof(*s->next_pic));
1101 if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1102 av_freep(&s->cur_pic);
1103 av_freep(&s->last_pic);
1104 av_freep(&s->next_pic);
1105 return AVERROR(ENOMEM);
1106 }
1107
1108 s->cur_pic->f = av_frame_alloc();
1109 s->last_pic->f = av_frame_alloc();
1110 s->next_pic->f = av_frame_alloc();
1111 if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1112 return AVERROR(ENOMEM);
1113
1114 if (ff_h264_decode_init(avctx) < 0)
1115 return -1;
1116
1117 // we will overwrite it later during decoding
1118 av_frame_free(&h->cur_pic.f);
1119
1120 ff_h264dsp_init(&s->h264dsp, 8, 1);
1121 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1122 ff_videodsp_init(&s->vdsp, 8);
1123
1124 memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
1125 memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
1126
1127 h->sps.bit_depth_luma = 8;
1128 h->chroma_format_idc = 1;
1129
1130 ff_hpeldsp_init(&s->hdsp, avctx->flags);
1131 ff_tpeldsp_init(&s->tdsp);
1132
1133 sl = h->slice_ctx;
1134
1135 h->flags = avctx->flags;
1136 sl->is_complex = 1;
1137 h->picture_structure = PICT_FRAME;
1138 avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
1139 avctx->color_range = AVCOL_RANGE_JPEG;
1140
1141 h->slice_ctx[0].chroma_qp[0] = h->slice_ctx[0].chroma_qp[1] = 4;
1142 h->chroma_x_shift = h->chroma_y_shift = 1;
1143
1144 s->halfpel_flag = 1;
1145 s->thirdpel_flag = 1;
1146 s->unknown_flag = 0;
1147
1148 /* prowl for the "SEQH" marker in the extradata */
1149 extradata = (unsigned char *)avctx->extradata;
1150 extradata_end = avctx->extradata + avctx->extradata_size;
1151 if (extradata) {
1152 for (m = 0; m + 8 < avctx->extradata_size; m++) {
1153 if (!memcmp(extradata, "SEQH", 4)) {
1154 marker_found = 1;
1155 break;
1156 }
1157 extradata++;
1158 }
1159 }
1160
1161 /* if a match was found, parse the extra data */
1162 if (marker_found) {
1163 GetBitContext gb;
1164 int frame_size_code;
1165
1166 size = AV_RB32(&extradata[4]);
1167 if (size > extradata_end - extradata - 8)
1168 return AVERROR_INVALIDDATA;
1169 init_get_bits(&gb, extradata + 8, size * 8);
1170
1171 /* 'frame size code' and optional 'width, height' */
1172 frame_size_code = get_bits(&gb, 3);
1173 switch (frame_size_code) {
1174 case 0:
1175 avctx->width = 160;
1176 avctx->height = 120;
1177 break;
1178 case 1:
1179 avctx->width = 128;
1180 avctx->height = 96;
1181 break;
1182 case 2:
1183 avctx->width = 176;
1184 avctx->height = 144;
1185 break;
1186 case 3:
1187 avctx->width = 352;
1188 avctx->height = 288;
1189 break;
1190 case 4:
1191 avctx->width = 704;
1192 avctx->height = 576;
1193 break;
1194 case 5:
1195 avctx->width = 240;
1196 avctx->height = 180;
1197 break;
1198 case 6:
1199 avctx->width = 320;
1200 avctx->height = 240;
1201 break;
1202 case 7:
1203 avctx->width = get_bits(&gb, 12);
1204 avctx->height = get_bits(&gb, 12);
1205 break;
1206 }
1207
1208 s->halfpel_flag = get_bits1(&gb);
1209 s->thirdpel_flag = get_bits1(&gb);
1210
1211 /* unknown fields */
1212 skip_bits1(&gb);
1213 skip_bits1(&gb);
1214 skip_bits1(&gb);
1215 skip_bits1(&gb);
1216
1217 h->low_delay = get_bits1(&gb);
1218
1219 /* unknown field */
1220 skip_bits1(&gb);
1221
1222 while (get_bits1(&gb))
1223 skip_bits(&gb, 8);
1224
1225 s->unknown_flag = get_bits1(&gb);
1226 avctx->has_b_frames = !h->low_delay;
1227 if (s->unknown_flag) {
1228 #if CONFIG_ZLIB
1229 unsigned watermark_width = svq3_get_ue_golomb(&gb);
1230 unsigned watermark_height = svq3_get_ue_golomb(&gb);
1231 int u1 = svq3_get_ue_golomb(&gb);
1232 int u2 = get_bits(&gb, 8);
1233 int u3 = get_bits(&gb, 2);
1234 int u4 = svq3_get_ue_golomb(&gb);
1235 unsigned long buf_len = watermark_width *
1236 watermark_height * 4;
1237 int offset = get_bits_count(&gb) + 7 >> 3;
1238 uint8_t *buf;
1239
1240 if (watermark_height > 0 &&
1241 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1242 return -1;
1243
1244 buf = av_malloc(buf_len);
1245 av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1246 watermark_width, watermark_height);
1247 av_log(avctx, AV_LOG_DEBUG,
1248 "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1249 u1, u2, u3, u4, offset);
1250 if (uncompress(buf, &buf_len, extradata + 8 + offset,
1251 size - offset) != Z_OK) {
1252 av_log(avctx, AV_LOG_ERROR,
1253 "could not uncompress watermark logo\n");
1254 av_free(buf);
1255 return -1;
1256 }
1257 s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1258 s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1259 av_log(avctx, AV_LOG_DEBUG,
1260 "watermark key %#"PRIx32"\n", s->watermark_key);
1261 av_free(buf);
1262 #else
1263 av_log(avctx, AV_LOG_ERROR,
1264 "this svq3 file contains watermark which need zlib support compiled in\n");
1265 return -1;
1266 #endif
1267 }
1268 }
1269
1270 h->width = avctx->width;
1271 h->height = avctx->height;
1272 h->mb_width = (h->width + 15) / 16;
1273 h->mb_height = (h->height + 15) / 16;
1274 h->mb_stride = h->mb_width + 1;
1275 h->mb_num = h->mb_width * h->mb_height;
1276 h->b_stride = 4 * h->mb_width;
1277 s->h_edge_pos = h->mb_width * 16;
1278 s->v_edge_pos = h->mb_height * 16;
1279
1280 s->intra4x4_pred_mode = av_mallocz(h->mb_stride * 2 * 8);
1281 if (!s->intra4x4_pred_mode)
1282 return AVERROR(ENOMEM);
1283
1284 if (ff_h264_alloc_tables(h) < 0) {
1285 av_log(avctx, AV_LOG_ERROR, "svq3 memory allocation failed\n");
1286 return AVERROR(ENOMEM);
1287 }
1288
1289 return 0;
1290 }
1291
1292 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1293 {
1294 int i;
1295 for (i = 0; i < 2; i++) {
1296 av_buffer_unref(&pic->motion_val_buf[i]);
1297 av_buffer_unref(&pic->ref_index_buf[i]);
1298 }
1299 av_buffer_unref(&pic->mb_type_buf);
1300
1301 av_frame_unref(pic->f);
1302 }
1303
1304 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1305 {
1306 SVQ3Context *s = avctx->priv_data;
1307 H264Context *h = &s->h;
1308 H264SliceContext *sl = &h->slice_ctx[0];
1309 const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1;
1310 const int mb_array_size = h->mb_stride * h->mb_height;
1311 const int b4_stride = h->mb_width * 4 + 1;
1312 const int b4_array_size = b4_stride * h->mb_height * 4;
1313 int ret;
1314
1315 if (!pic->motion_val_buf[0]) {
1316 int i;
1317
1318 pic->mb_type_buf = av_buffer_allocz((big_mb_num + h->mb_stride) * sizeof(uint32_t));
1319 if (!pic->mb_type_buf)
1320 return AVERROR(ENOMEM);
1321 pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1;
1322
1323 for (i = 0; i < 2; i++) {
1324 pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1325 pic->ref_index_buf[i] = av_buffer_allocz(4 * mb_array_size);
1326 if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1327 ret = AVERROR(ENOMEM);
1328 goto fail;
1329 }
1330
1331 pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1332 pic->ref_index[i] = pic->ref_index_buf[i]->data;
1333 }
1334 }
1335 pic->reference = !(h->pict_type == AV_PICTURE_TYPE_B);
1336
1337 ret = ff_get_buffer(avctx, pic->f,
1338 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1339 if (ret < 0)
1340 goto fail;
1341
1342 if (!s->edge_emu_buffer) {
1343 s->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1344 if (!s->edge_emu_buffer)
1345 return AVERROR(ENOMEM);
1346 }
1347
1348 sl->linesize = pic->f->linesize[0];
1349 sl->uvlinesize = pic->f->linesize[1];
1350
1351 return 0;
1352 fail:
1353 free_picture(avctx, pic);
1354 return ret;
1355 }
1356
1357 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1358 int *got_frame, AVPacket *avpkt)
1359 {
1360 const uint8_t *buf = avpkt->data;
1361 SVQ3Context *s = avctx->priv_data;
1362 H264Context *h = &s->h;
1363 H264SliceContext *sl = &h->slice_ctx[0];
1364 int buf_size = avpkt->size;
1365 int ret, m, i;
1366
1367 /* special case for last picture */
1368 if (buf_size == 0) {
1369 if (s->next_pic->f->data[0] && !h->low_delay && !s->last_frame_output) {
1370 ret = av_frame_ref(data, s->next_pic->f);
1371 if (ret < 0)
1372 return ret;
1373 s->last_frame_output = 1;
1374 *got_frame = 1;
1375 }
1376 return 0;
1377 }
1378
1379 ret = init_get_bits(&s->gb, buf, 8 * buf_size);
1380 if (ret < 0)
1381 return ret;
1382
1383 s->mb_x = s->mb_y = s->mb_xy = 0;
1384
1385 if (svq3_decode_slice_header(avctx))
1386 return -1;
1387
1388 h->pict_type = sl->slice_type;
1389
1390 if (h->pict_type != AV_PICTURE_TYPE_B)
1391 FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1392
1393 av_frame_unref(s->cur_pic->f);
1394
1395 /* for skipping the frame */
1396 s->cur_pic->f->pict_type = h->pict_type;
1397 s->cur_pic->f->key_frame = (h->pict_type == AV_PICTURE_TYPE_I);
1398
1399 ret = get_buffer(avctx, s->cur_pic);
1400 if (ret < 0)
1401 return ret;
1402
1403 h->cur_pic_ptr = s->cur_pic;
1404 h->cur_pic = *s->cur_pic;
1405
1406 for (i = 0; i < 16; i++) {
1407 h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
1408 h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
1409 }
1410 for (i = 0; i < 16; i++) {
1411 h->block_offset[16 + i] =
1412 h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1413 h->block_offset[48 + 16 + i] =
1414 h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1415 }
1416
1417 if (h->pict_type != AV_PICTURE_TYPE_I) {
1418 if (!s->last_pic->f->data[0]) {
1419 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1420 ret = get_buffer(avctx, s->last_pic);
1421 if (ret < 0)
1422 return ret;
1423 memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1424 memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1425 s->last_pic->f->linesize[1]);
1426 memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1427 s->last_pic->f->linesize[2]);
1428 }
1429
1430 if (h->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1431 av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1432 ret = get_buffer(avctx, s->next_pic);
1433 if (ret < 0)
1434 return ret;
1435 memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1436 memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1437 s->next_pic->f->linesize[1]);
1438 memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1439 s->next_pic->f->linesize[2]);
1440 }
1441 }
1442
1443 if (avctx->debug & FF_DEBUG_PICT_INFO)
1444 av_log(h->avctx, AV_LOG_DEBUG,
1445 "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1446 av_get_picture_type_char(h->pict_type),
1447 s->halfpel_flag, s->thirdpel_flag,
1448 s->adaptive_quant, h->slice_ctx[0].qscale, sl->slice_num);
1449
1450 if (avctx->skip_frame >= AVDISCARD_NONREF && h->pict_type == AV_PICTURE_TYPE_B ||
1451 avctx->skip_frame >= AVDISCARD_NONKEY && h->pict_type != AV_PICTURE_TYPE_I ||
1452 avctx->skip_frame >= AVDISCARD_ALL)
1453 return 0;
1454
1455 if (s->next_p_frame_damaged) {
1456 if (h->pict_type == AV_PICTURE_TYPE_B)
1457 return 0;
1458 else
1459 s->next_p_frame_damaged = 0;
1460 }
1461
1462 if (h->pict_type == AV_PICTURE_TYPE_B) {
1463 h->frame_num_offset = sl->slice_num - h->prev_frame_num;
1464
1465 if (h->frame_num_offset < 0)
1466 h->frame_num_offset += 256;
1467 if (h->frame_num_offset == 0 ||
1468 h->frame_num_offset >= h->prev_frame_num_offset) {
1469 av_log(h->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1470 return -1;
1471 }
1472 } else {
1473 h->prev_frame_num = h->frame_num;
1474 h->frame_num = sl->slice_num;
1475 h->prev_frame_num_offset = h->frame_num - h->prev_frame_num;
1476
1477 if (h->prev_frame_num_offset < 0)
1478 h->prev_frame_num_offset += 256;
1479 }
1480
1481 for (m = 0; m < 2; m++) {
1482 int i;
1483 for (i = 0; i < 4; i++) {
1484 int j;
1485 for (j = -1; j < 4; j++)
1486 s->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1487 if (i < 3)
1488 s->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1489 }
1490 }
1491
1492 for (s->mb_y = 0; s->mb_y < h->mb_height; s->mb_y++) {
1493 for (s->mb_x = 0; s->mb_x < h->mb_width; s->mb_x++) {
1494 unsigned mb_type;
1495 s->mb_xy = s->mb_x + s->mb_y * h->mb_stride;
1496
1497 if ((get_bits_left(&h->gb)) <= 7) {
1498 if (((get_bits_count(&h->gb) & 7) == 0 ||
1499 show_bits(&h->gb, get_bits_left(&h->gb) & 7) == 0)) {
1500
1501 if (svq3_decode_slice_header(avctx))
1502 return -1;
1503 }
1504 /* TODO: support s->mb_skip_run */
1505 }
1506
1507 mb_type = svq3_get_ue_golomb(&h->gb);
1508
1509 if (h->pict_type == AV_PICTURE_TYPE_I)
1510 mb_type += 8;
1511 else if (h->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1512 mb_type += 4;
1513 if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1514 av_log(h->avctx, AV_LOG_ERROR,
1515 "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1516 return -1;
1517 }
1518
1519 if (mb_type != 0)
1520 hl_decode_mb(s, h, &h->slice_ctx[0]);
1521
1522 if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
1523 h->cur_pic.mb_type[s->mb_x + s->mb_y * h->mb_stride] =
1524 (h->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1525 }
1526
1527 ff_draw_horiz_band(avctx, s->cur_pic->f,
1528 s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1529 16 * s->mb_y, 16, h->picture_structure, 0,
1530 h->low_delay);
1531 }
1532
1533 if (h->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
1534 ret = av_frame_ref(data, s->cur_pic->f);
1535 else if (s->last_pic->f->data[0])
1536 ret = av_frame_ref(data, s->last_pic->f);
1537 if (ret < 0)
1538 return ret;
1539
1540 /* Do not output the last pic after seeking. */
1541 if (s->last_pic->f->data[0] || h->low_delay)
1542 *got_frame = 1;
1543
1544 if (h->pict_type != AV_PICTURE_TYPE_B) {
1545 FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1546 } else {
1547 av_frame_unref(s->cur_pic->f);
1548 }
1549
1550 return buf_size;
1551 }
1552
1553 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1554 {
1555 SVQ3Context *s = avctx->priv_data;
1556 H264Context *h = &s->h;
1557
1558 free_picture(avctx, s->cur_pic);
1559 free_picture(avctx, s->next_pic);
1560 free_picture(avctx, s->last_pic);
1561 av_frame_free(&s->cur_pic->f);
1562 av_frame_free(&s->next_pic->f);
1563 av_frame_free(&s->last_pic->f);
1564 av_freep(&s->cur_pic);
1565 av_freep(&s->next_pic);
1566 av_freep(&s->last_pic);
1567 av_freep(&s->slice_buf);
1568 av_freep(&s->intra4x4_pred_mode);
1569 av_freep(&s->edge_emu_buffer);
1570
1571 memset(&h->cur_pic, 0, sizeof(h->cur_pic));
1572
1573 ff_h264_free_context(h);
1574
1575 return 0;
1576 }
1577
1578 AVCodec ff_svq3_decoder = {
1579 .name = "svq3",
1580 .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1581 .type = AVMEDIA_TYPE_VIDEO,
1582 .id = AV_CODEC_ID_SVQ3,
1583 .priv_data_size = sizeof(SVQ3Context),
1584 .init = svq3_decode_init,
1585 .close = svq3_decode_end,
1586 .decode = svq3_decode_frame,
1587 .capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1588 AV_CODEC_CAP_DR1 |
1589 AV_CODEC_CAP_DELAY,
1590 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1591 AV_PIX_FMT_NONE},
1592 };