aic: add frame threading support
[libav.git] / libavcodec / aic.c
1 /*
2 * Apple Intermediate Codec decoder
3 *
4 * Copyright (c) 2013 Konstantin Shishkov
5 *
6 * This file is part of Libav.
7 *
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <inttypes.h>
24
25 #include "avcodec.h"
26 #include "bytestream.h"
27 #include "internal.h"
28 #include "get_bits.h"
29 #include "golomb.h"
30 #include "idctdsp.h"
31 #include "thread.h"
32 #include "unary.h"
33
34 #define AIC_HDR_SIZE 24
35 #define AIC_BAND_COEFFS (64 + 32 + 192 + 96)
36
37 enum AICBands {
38 COEFF_LUMA = 0,
39 COEFF_CHROMA,
40 COEFF_LUMA_EXT,
41 COEFF_CHROMA_EXT,
42 NUM_BANDS
43 };
44
45 static const int aic_num_band_coeffs[NUM_BANDS] = { 64, 32, 192, 96 };
46
47 static const int aic_band_off[NUM_BANDS] = { 0, 64, 96, 288 };
48
49 static const uint8_t aic_quant_matrix[64] = {
50 8, 16, 19, 22, 22, 26, 26, 27,
51 16, 16, 22, 22, 26, 27, 27, 29,
52 19, 22, 26, 26, 27, 29, 29, 35,
53 22, 24, 27, 27, 29, 32, 34, 38,
54 26, 27, 29, 29, 32, 35, 38, 46,
55 27, 29, 34, 34, 35, 40, 46, 56,
56 29, 34, 34, 37, 40, 48, 56, 69,
57 34, 37, 38, 40, 48, 58, 69, 83,
58 };
59
60 static const uint8_t aic_y_scan[64] = {
61 0, 4, 1, 2, 5, 8, 12, 9,
62 6, 3, 7, 10, 13, 14, 11, 15,
63 47, 43, 46, 45, 42, 39, 35, 38,
64 41, 44, 40, 37, 34, 33, 36, 32,
65 16, 20, 17, 18, 21, 24, 28, 25,
66 22, 19, 23, 26, 29, 30, 27, 31,
67 63, 59, 62, 61, 58, 55, 51, 54,
68 57, 60, 56, 53, 50, 49, 52, 48,
69 };
70
71 static const uint8_t aic_y_ext_scan[192] = {
72 64, 72, 65, 66, 73, 80, 88, 81,
73 74, 67, 75, 82, 89, 90, 83, 91,
74 0, 4, 1, 2, 5, 8, 12, 9,
75 6, 3, 7, 10, 13, 14, 11, 15,
76 16, 20, 17, 18, 21, 24, 28, 25,
77 22, 19, 23, 26, 29, 30, 27, 31,
78 155, 147, 154, 153, 146, 139, 131, 138,
79 145, 152, 144, 137, 130, 129, 136, 128,
80 47, 43, 46, 45, 42, 39, 35, 38,
81 41, 44, 40, 37, 34, 33, 36, 32,
82 63, 59, 62, 61, 58, 55, 51, 54,
83 57, 60, 56, 53, 50, 49, 52, 48,
84 96, 104, 97, 98, 105, 112, 120, 113,
85 106, 99, 107, 114, 121, 122, 115, 123,
86 68, 76, 69, 70, 77, 84, 92, 85,
87 78, 71, 79, 86, 93, 94, 87, 95,
88 100, 108, 101, 102, 109, 116, 124, 117,
89 110, 103, 111, 118, 125, 126, 119, 127,
90 187, 179, 186, 185, 178, 171, 163, 170,
91 177, 184, 176, 169, 162, 161, 168, 160,
92 159, 151, 158, 157, 150, 143, 135, 142,
93 149, 156, 148, 141, 134, 133, 140, 132,
94 191, 183, 190, 189, 182, 175, 167, 174,
95 181, 188, 180, 173, 166, 165, 172, 164,
96 };
97
98 static const uint8_t aic_c_scan[64] = {
99 0, 4, 1, 2, 5, 8, 12, 9,
100 6, 3, 7, 10, 13, 14, 11, 15,
101 31, 27, 30, 29, 26, 23, 19, 22,
102 25, 28, 24, 21, 18, 17, 20, 16,
103 32, 36, 33, 34, 37, 40, 44, 41,
104 38, 35, 39, 42, 45, 46, 43, 47,
105 63, 59, 62, 61, 58, 55, 51, 54,
106 57, 60, 56, 53, 50, 49, 52, 48,
107 };
108
109 static const uint8_t aic_c_ext_scan[192] = {
110 16, 24, 17, 18, 25, 32, 40, 33,
111 26, 19, 27, 34, 41, 42, 35, 43,
112 0, 4, 1, 2, 5, 8, 12, 9,
113 6, 3, 7, 10, 13, 14, 11, 15,
114 20, 28, 21, 22, 29, 36, 44, 37,
115 30, 23, 31, 38, 45, 46, 39, 47,
116 95, 87, 94, 93, 86, 79, 71, 78,
117 85, 92, 84, 77, 70, 69, 76, 68,
118 63, 59, 62, 61, 58, 55, 51, 54,
119 57, 60, 56, 53, 50, 49, 52, 48,
120 91, 83, 90, 89, 82, 75, 67, 74,
121 81, 88, 80, 73, 66, 65, 72, 64,
122 112, 120, 113, 114, 121, 128, 136, 129,
123 122, 115, 123, 130, 137, 138, 131, 139,
124 96, 100, 97, 98, 101, 104, 108, 105,
125 102, 99, 103, 106, 109, 110, 107, 111,
126 116, 124, 117, 118, 125, 132, 140, 133,
127 126, 119, 127, 134, 141, 142, 135, 143,
128 191, 183, 190, 189, 182, 175, 167, 174,
129 181, 188, 180, 173, 166, 165, 172, 164,
130 159, 155, 158, 157, 154, 151, 147, 150,
131 153, 156, 152, 149, 146, 145, 148, 144,
132 187, 179, 186, 185, 178, 171, 163, 170,
133 177, 184, 176, 169, 162, 161, 168, 160,
134 };
135
136 static const uint8_t *aic_scan[NUM_BANDS] = {
137 aic_y_scan, aic_c_scan, aic_y_ext_scan, aic_c_ext_scan
138 };
139
140 typedef struct AICContext {
141 AVCodecContext *avctx;
142 AVFrame *frame;
143 IDCTDSPContext idsp;
144 ScanTable scantable;
145
146 int num_x_slices;
147 int slice_width;
148 int mb_width, mb_height;
149 int quant;
150 int interlaced;
151
152 int16_t *slice_data;
153 int16_t *data_ptr[NUM_BANDS];
154
155 DECLARE_ALIGNED(16, int16_t, block)[64];
156 } AICContext;
157
158 static int aic_decode_header(AICContext *ctx, const uint8_t *src, int size)
159 {
160 uint32_t frame_size;
161 int width, height;
162
163 if (src[0] != 1) {
164 av_log(ctx->avctx, AV_LOG_ERROR, "Invalid version %d\n", src[0]);
165 return AVERROR_INVALIDDATA;
166 }
167 if (src[1] != AIC_HDR_SIZE - 2) {
168 av_log(ctx->avctx, AV_LOG_ERROR, "Invalid header size %d\n", src[1]);
169 return AVERROR_INVALIDDATA;
170 }
171 frame_size = AV_RB32(src + 2);
172 width = AV_RB16(src + 6);
173 height = AV_RB16(src + 8);
174 if (frame_size > size) {
175 av_log(ctx->avctx, AV_LOG_ERROR, "Frame size should be %"PRIu32" got %d\n",
176 frame_size, size);
177 return AVERROR_INVALIDDATA;
178 }
179 if (width != ctx->avctx->width || height != ctx->avctx->height) {
180 av_log(ctx->avctx, AV_LOG_ERROR,
181 "Picture dimension changed: old: %d x %d, new: %d x %d\n",
182 ctx->avctx->width, ctx->avctx->height, width, height);
183 return AVERROR_INVALIDDATA;
184 }
185 ctx->quant = src[15];
186 ctx->interlaced = ((src[16] >> 4) == 3);
187
188 return 0;
189 }
190
191 #define GET_CODE(val, type, add_bits) \
192 do { \
193 if (type) \
194 val = get_ue_golomb(gb); \
195 else \
196 val = get_unary(gb, 1, 31); \
197 if (add_bits) \
198 val = (val << add_bits) + get_bits(gb, add_bits); \
199 } while (0)
200
201 static int aic_decode_coeffs(GetBitContext *gb, int16_t *dst,
202 int band, int slice_width, int force_chroma)
203 {
204 int has_skips, coeff_type, coeff_bits, skip_type, skip_bits;
205 const int num_coeffs = aic_num_band_coeffs[band];
206 const uint8_t *scan = aic_scan[band | force_chroma];
207 int mb, idx, val;
208
209 has_skips = get_bits1(gb);
210 coeff_type = get_bits1(gb);
211 coeff_bits = get_bits(gb, 3);
212
213 if (has_skips) {
214 skip_type = get_bits1(gb);
215 skip_bits = get_bits(gb, 3);
216
217 for (mb = 0; mb < slice_width; mb++) {
218 idx = -1;
219 do {
220 GET_CODE(val, skip_type, skip_bits);
221 if (val < 0)
222 return AVERROR_INVALIDDATA;
223 idx += val + 1;
224 if (idx >= num_coeffs)
225 break;
226 GET_CODE(val, coeff_type, coeff_bits);
227 val++;
228 if (val >= 0x10000 || val < 0)
229 return AVERROR_INVALIDDATA;
230 dst[scan[idx]] = val;
231 } while (idx < num_coeffs - 1);
232 dst += num_coeffs;
233 }
234 } else {
235 for (mb = 0; mb < slice_width; mb++) {
236 for (idx = 0; idx < num_coeffs; idx++) {
237 GET_CODE(val, coeff_type, coeff_bits);
238 if (val >= 0x10000 || val < 0)
239 return AVERROR_INVALIDDATA;
240 dst[scan[idx]] = val;
241 }
242 dst += num_coeffs;
243 }
244 }
245 return 0;
246 }
247
248 static void recombine_block(int16_t *dst, const uint8_t *scan,
249 int16_t **base, int16_t **ext)
250 {
251 int i, j;
252
253 for (i = 0; i < 4; i++) {
254 for (j = 0; j < 4; j++)
255 dst[scan[i * 8 + j]] = (*base)[j];
256 for (j = 0; j < 4; j++)
257 dst[scan[i * 8 + j + 4]] = (*ext)[j];
258 *base += 4;
259 *ext += 4;
260 }
261 for (; i < 8; i++) {
262 for (j = 0; j < 8; j++)
263 dst[scan[i * 8 + j]] = (*ext)[j];
264 *ext += 8;
265 }
266 }
267
268 static void recombine_block_il(int16_t *dst, const uint8_t *scan,
269 int16_t **base, int16_t **ext,
270 int block_no)
271 {
272 int i, j;
273
274 if (block_no < 2) {
275 for (i = 0; i < 8; i++) {
276 for (j = 0; j < 4; j++)
277 dst[scan[i * 8 + j]] = (*base)[j];
278 for (j = 0; j < 4; j++)
279 dst[scan[i * 8 + j + 4]] = (*ext)[j];
280 *base += 4;
281 *ext += 4;
282 }
283 } else {
284 for (i = 0; i < 64; i++)
285 dst[scan[i]] = (*ext)[i];
286 *ext += 64;
287 }
288 }
289
290 static void unquant_block(int16_t *block, int q)
291 {
292 int i;
293
294 for (i = 0; i < 64; i++) {
295 int val = (uint16_t)block[i];
296 int sign = val & 1;
297
298 block[i] = (((val >> 1) ^ -sign) * q * aic_quant_matrix[i] >> 4)
299 + sign;
300 }
301 }
302
303 static int aic_decode_slice(AICContext *ctx, int mb_x, int mb_y,
304 const uint8_t *src, int src_size)
305 {
306 GetBitContext gb;
307 int ret, i, mb, blk;
308 int slice_width = FFMIN(ctx->slice_width, ctx->mb_width - mb_x);
309 uint8_t *Y, *C[2];
310 uint8_t *dst;
311 int16_t *base_y = ctx->data_ptr[COEFF_LUMA];
312 int16_t *base_c = ctx->data_ptr[COEFF_CHROMA];
313 int16_t *ext_y = ctx->data_ptr[COEFF_LUMA_EXT];
314 int16_t *ext_c = ctx->data_ptr[COEFF_CHROMA_EXT];
315 const int ystride = ctx->frame->linesize[0];
316
317 Y = ctx->frame->data[0] + mb_x * 16 + mb_y * 16 * ystride;
318 for (i = 0; i < 2; i++)
319 C[i] = ctx->frame->data[i + 1] + mb_x * 8
320 + mb_y * 8 * ctx->frame->linesize[i + 1];
321 init_get_bits(&gb, src, src_size * 8);
322
323 memset(ctx->slice_data, 0,
324 sizeof(*ctx->slice_data) * slice_width * AIC_BAND_COEFFS);
325 for (i = 0; i < NUM_BANDS; i++)
326 if ((ret = aic_decode_coeffs(&gb, ctx->data_ptr[i],
327 i, slice_width,
328 !ctx->interlaced)) < 0)
329 return ret;
330
331 for (mb = 0; mb < slice_width; mb++) {
332 for (blk = 0; blk < 4; blk++) {
333 if (!ctx->interlaced)
334 recombine_block(ctx->block, ctx->scantable.permutated,
335 &base_y, &ext_y);
336 else
337 recombine_block_il(ctx->block, ctx->scantable.permutated,
338 &base_y, &ext_y, blk);
339 unquant_block(ctx->block, ctx->quant);
340 ctx->idsp.idct(ctx->block);
341
342 if (!ctx->interlaced) {
343 dst = Y + (blk >> 1) * 8 * ystride + (blk & 1) * 8;
344 ctx->idsp.put_signed_pixels_clamped(ctx->block, dst, ystride);
345 } else {
346 dst = Y + (blk & 1) * 8 + (blk >> 1) * ystride;
347 ctx->idsp.put_signed_pixels_clamped(ctx->block, dst,
348 ystride * 2);
349 }
350 }
351 Y += 16;
352
353 for (blk = 0; blk < 2; blk++) {
354 recombine_block(ctx->block, ctx->scantable.permutated,
355 &base_c, &ext_c);
356 unquant_block(ctx->block, ctx->quant);
357 ctx->idsp.idct(ctx->block);
358 ctx->idsp.put_signed_pixels_clamped(ctx->block, C[blk],
359 ctx->frame->linesize[blk + 1]);
360 C[blk] += 8;
361 }
362 }
363
364 return 0;
365 }
366
367 static int aic_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
368 AVPacket *avpkt)
369 {
370 AICContext *ctx = avctx->priv_data;
371 const uint8_t *buf = avpkt->data;
372 int buf_size = avpkt->size;
373 GetByteContext gb;
374 uint32_t off;
375 int x, y, ret;
376 int slice_size;
377 ThreadFrame frame = { .f = data };
378
379 ctx->frame = data;
380 ctx->frame->pict_type = AV_PICTURE_TYPE_I;
381 ctx->frame->key_frame = 1;
382
383 off = FFALIGN(AIC_HDR_SIZE + ctx->num_x_slices * ctx->mb_height * 2, 4);
384
385 if (buf_size < off) {
386 av_log(avctx, AV_LOG_ERROR, "Too small frame\n");
387 return AVERROR_INVALIDDATA;
388 }
389
390 ret = aic_decode_header(ctx, buf, buf_size);
391 if (ret < 0) {
392 av_log(avctx, AV_LOG_ERROR, "Invalid header\n");
393 return ret;
394 }
395
396 if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
397 return ret;
398
399 bytestream2_init(&gb, buf + AIC_HDR_SIZE,
400 ctx->num_x_slices * ctx->mb_height * 2);
401
402 for (y = 0; y < ctx->mb_height; y++) {
403 for (x = 0; x < ctx->mb_width; x += ctx->slice_width) {
404 slice_size = bytestream2_get_le16(&gb) * 4;
405 if (slice_size + off > buf_size || !slice_size) {
406 av_log(avctx, AV_LOG_ERROR,
407 "Incorrect slice size %d at %d.%d\n", slice_size, x, y);
408 return AVERROR_INVALIDDATA;
409 }
410
411 ret = aic_decode_slice(ctx, x, y, buf + off, slice_size);
412 if (ret < 0) {
413 av_log(avctx, AV_LOG_ERROR,
414 "Error decoding slice at %d.%d\n", x, y);
415 return ret;
416 }
417
418 off += slice_size;
419 }
420 }
421
422 *got_frame = 1;
423
424 return avpkt->size;
425 }
426
427 static av_cold int aic_decode_init(AVCodecContext *avctx)
428 {
429 AICContext *ctx = avctx->priv_data;
430 int i;
431 uint8_t scan[64];
432
433 ctx->avctx = avctx;
434
435 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
436
437 ff_idctdsp_init(&ctx->idsp, avctx);
438
439 for (i = 0; i < 64; i++)
440 scan[i] = i;
441 ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable, scan);
442
443 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
444 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
445
446 ctx->num_x_slices = (ctx->mb_width + 15) >> 4;
447 ctx->slice_width = 16;
448 for (i = 1; i < 32; i++) {
449 if (!(ctx->mb_width % i) && (ctx->mb_width / i <= 32)) {
450 ctx->slice_width = ctx->mb_width / i;
451 ctx->num_x_slices = i;
452 break;
453 }
454 }
455
456 ctx->slice_data = av_malloc(ctx->slice_width * AIC_BAND_COEFFS
457 * sizeof(*ctx->slice_data));
458 if (!ctx->slice_data) {
459 av_log(avctx, AV_LOG_ERROR, "Error allocating slice buffer\n");
460
461 return AVERROR(ENOMEM);
462 }
463
464 for (i = 0; i < NUM_BANDS; i++)
465 ctx->data_ptr[i] = ctx->slice_data + ctx->slice_width
466 * aic_band_off[i];
467
468 return 0;
469 }
470
471 static av_cold int aic_decode_close(AVCodecContext *avctx)
472 {
473 AICContext *ctx = avctx->priv_data;
474
475 av_freep(&ctx->slice_data);
476
477 return 0;
478 }
479
480 AVCodec ff_aic_decoder = {
481 .name = "aic",
482 .long_name = NULL_IF_CONFIG_SMALL("Apple Intermediate Codec"),
483 .type = AVMEDIA_TYPE_VIDEO,
484 .id = AV_CODEC_ID_AIC,
485 .priv_data_size = sizeof(AICContext),
486 .init = aic_decode_init,
487 .close = aic_decode_close,
488 .decode = aic_decode_frame,
489 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
490 .init_thread_copy = ONLY_IF_THREADS_ENABLED(aic_decode_init),
491 };