aic: add frame threading support
[libav.git] / libavcodec / aic.c
CommitLineData
1232a164
KS
1/*
2 * Apple Intermediate Codec decoder
3 *
4 * Copyright (c) 2013 Konstantin Shishkov
5 *
6 * This file is part of Libav.
7 *
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
cc8163e1
DB
23#include <inttypes.h>
24
1232a164
KS
25#include "avcodec.h"
26#include "bytestream.h"
1232a164
KS
27#include "internal.h"
28#include "get_bits.h"
29#include "golomb.h"
e3fcb143 30#include "idctdsp.h"
6e2ad28c 31#include "thread.h"
1232a164
KS
32#include "unary.h"
33
34#define AIC_HDR_SIZE 24
35#define AIC_BAND_COEFFS (64 + 32 + 192 + 96)
36
37enum AICBands {
38 COEFF_LUMA = 0,
39 COEFF_CHROMA,
40 COEFF_LUMA_EXT,
41 COEFF_CHROMA_EXT,
42 NUM_BANDS
43};
44
45static const int aic_num_band_coeffs[NUM_BANDS] = { 64, 32, 192, 96 };
46
47static const int aic_band_off[NUM_BANDS] = { 0, 64, 96, 288 };
48
49static const uint8_t aic_quant_matrix[64] = {
50 8, 16, 19, 22, 22, 26, 26, 27,
51 16, 16, 22, 22, 26, 27, 27, 29,
52 19, 22, 26, 26, 27, 29, 29, 35,
53 22, 24, 27, 27, 29, 32, 34, 38,
54 26, 27, 29, 29, 32, 35, 38, 46,
55 27, 29, 34, 34, 35, 40, 46, 56,
56 29, 34, 34, 37, 40, 48, 56, 69,
57 34, 37, 38, 40, 48, 58, 69, 83,
58};
59
60static const uint8_t aic_y_scan[64] = {
61 0, 4, 1, 2, 5, 8, 12, 9,
62 6, 3, 7, 10, 13, 14, 11, 15,
63 47, 43, 46, 45, 42, 39, 35, 38,
64 41, 44, 40, 37, 34, 33, 36, 32,
65 16, 20, 17, 18, 21, 24, 28, 25,
66 22, 19, 23, 26, 29, 30, 27, 31,
67 63, 59, 62, 61, 58, 55, 51, 54,
68 57, 60, 56, 53, 50, 49, 52, 48,
69};
70
71static const uint8_t aic_y_ext_scan[192] = {
72 64, 72, 65, 66, 73, 80, 88, 81,
73 74, 67, 75, 82, 89, 90, 83, 91,
74 0, 4, 1, 2, 5, 8, 12, 9,
75 6, 3, 7, 10, 13, 14, 11, 15,
76 16, 20, 17, 18, 21, 24, 28, 25,
77 22, 19, 23, 26, 29, 30, 27, 31,
78 155, 147, 154, 153, 146, 139, 131, 138,
79 145, 152, 144, 137, 130, 129, 136, 128,
80 47, 43, 46, 45, 42, 39, 35, 38,
81 41, 44, 40, 37, 34, 33, 36, 32,
82 63, 59, 62, 61, 58, 55, 51, 54,
83 57, 60, 56, 53, 50, 49, 52, 48,
84 96, 104, 97, 98, 105, 112, 120, 113,
85 106, 99, 107, 114, 121, 122, 115, 123,
86 68, 76, 69, 70, 77, 84, 92, 85,
87 78, 71, 79, 86, 93, 94, 87, 95,
88 100, 108, 101, 102, 109, 116, 124, 117,
89 110, 103, 111, 118, 125, 126, 119, 127,
90 187, 179, 186, 185, 178, 171, 163, 170,
91 177, 184, 176, 169, 162, 161, 168, 160,
92 159, 151, 158, 157, 150, 143, 135, 142,
93 149, 156, 148, 141, 134, 133, 140, 132,
94 191, 183, 190, 189, 182, 175, 167, 174,
95 181, 188, 180, 173, 166, 165, 172, 164,
96};
97
98static const uint8_t aic_c_scan[64] = {
99 0, 4, 1, 2, 5, 8, 12, 9,
100 6, 3, 7, 10, 13, 14, 11, 15,
101 31, 27, 30, 29, 26, 23, 19, 22,
102 25, 28, 24, 21, 18, 17, 20, 16,
103 32, 36, 33, 34, 37, 40, 44, 41,
104 38, 35, 39, 42, 45, 46, 43, 47,
105 63, 59, 62, 61, 58, 55, 51, 54,
106 57, 60, 56, 53, 50, 49, 52, 48,
107};
108
109static const uint8_t aic_c_ext_scan[192] = {
110 16, 24, 17, 18, 25, 32, 40, 33,
111 26, 19, 27, 34, 41, 42, 35, 43,
112 0, 4, 1, 2, 5, 8, 12, 9,
113 6, 3, 7, 10, 13, 14, 11, 15,
114 20, 28, 21, 22, 29, 36, 44, 37,
115 30, 23, 31, 38, 45, 46, 39, 47,
116 95, 87, 94, 93, 86, 79, 71, 78,
117 85, 92, 84, 77, 70, 69, 76, 68,
118 63, 59, 62, 61, 58, 55, 51, 54,
119 57, 60, 56, 53, 50, 49, 52, 48,
120 91, 83, 90, 89, 82, 75, 67, 74,
121 81, 88, 80, 73, 66, 65, 72, 64,
122 112, 120, 113, 114, 121, 128, 136, 129,
123 122, 115, 123, 130, 137, 138, 131, 139,
124 96, 100, 97, 98, 101, 104, 108, 105,
125 102, 99, 103, 106, 109, 110, 107, 111,
126 116, 124, 117, 118, 125, 132, 140, 133,
127 126, 119, 127, 134, 141, 142, 135, 143,
128 191, 183, 190, 189, 182, 175, 167, 174,
129 181, 188, 180, 173, 166, 165, 172, 164,
130 159, 155, 158, 157, 154, 151, 147, 150,
131 153, 156, 152, 149, 146, 145, 148, 144,
132 187, 179, 186, 185, 178, 171, 163, 170,
133 177, 184, 176, 169, 162, 161, 168, 160,
134};
135
136static const uint8_t *aic_scan[NUM_BANDS] = {
137 aic_y_scan, aic_c_scan, aic_y_ext_scan, aic_c_ext_scan
138};
139
140typedef struct AICContext {
141 AVCodecContext *avctx;
142 AVFrame *frame;
e3fcb143 143 IDCTDSPContext idsp;
1232a164
KS
144 ScanTable scantable;
145
146 int num_x_slices;
147 int slice_width;
148 int mb_width, mb_height;
149 int quant;
150 int interlaced;
151
152 int16_t *slice_data;
153 int16_t *data_ptr[NUM_BANDS];
154
155 DECLARE_ALIGNED(16, int16_t, block)[64];
156} AICContext;
157
158static int aic_decode_header(AICContext *ctx, const uint8_t *src, int size)
159{
160 uint32_t frame_size;
161 int width, height;
162
163 if (src[0] != 1) {
164 av_log(ctx->avctx, AV_LOG_ERROR, "Invalid version %d\n", src[0]);
165 return AVERROR_INVALIDDATA;
166 }
167 if (src[1] != AIC_HDR_SIZE - 2) {
168 av_log(ctx->avctx, AV_LOG_ERROR, "Invalid header size %d\n", src[1]);
169 return AVERROR_INVALIDDATA;
170 }
171 frame_size = AV_RB32(src + 2);
172 width = AV_RB16(src + 6);
173 height = AV_RB16(src + 8);
174 if (frame_size > size) {
cc8163e1 175 av_log(ctx->avctx, AV_LOG_ERROR, "Frame size should be %"PRIu32" got %d\n",
1232a164
KS
176 frame_size, size);
177 return AVERROR_INVALIDDATA;
178 }
179 if (width != ctx->avctx->width || height != ctx->avctx->height) {
180 av_log(ctx->avctx, AV_LOG_ERROR,
181 "Picture dimension changed: old: %d x %d, new: %d x %d\n",
182 ctx->avctx->width, ctx->avctx->height, width, height);
183 return AVERROR_INVALIDDATA;
184 }
185 ctx->quant = src[15];
186 ctx->interlaced = ((src[16] >> 4) == 3);
187
188 return 0;
189}
190
191#define GET_CODE(val, type, add_bits) \
192 do { \
193 if (type) \
194 val = get_ue_golomb(gb); \
195 else \
196 val = get_unary(gb, 1, 31); \
197 if (add_bits) \
198 val = (val << add_bits) + get_bits(gb, add_bits); \
199 } while (0)
200
201static int aic_decode_coeffs(GetBitContext *gb, int16_t *dst,
41006698 202 int band, int slice_width, int force_chroma)
1232a164
KS
203{
204 int has_skips, coeff_type, coeff_bits, skip_type, skip_bits;
205 const int num_coeffs = aic_num_band_coeffs[band];
41006698 206 const uint8_t *scan = aic_scan[band | force_chroma];
1232a164
KS
207 int mb, idx, val;
208
209 has_skips = get_bits1(gb);
210 coeff_type = get_bits1(gb);
211 coeff_bits = get_bits(gb, 3);
212
213 if (has_skips) {
214 skip_type = get_bits1(gb);
215 skip_bits = get_bits(gb, 3);
216
217 for (mb = 0; mb < slice_width; mb++) {
218 idx = -1;
219 do {
220 GET_CODE(val, skip_type, skip_bits);
0f678c02
MS
221 if (val < 0)
222 return AVERROR_INVALIDDATA;
1232a164
KS
223 idx += val + 1;
224 if (idx >= num_coeffs)
225 break;
226 GET_CODE(val, coeff_type, coeff_bits);
227 val++;
0f678c02 228 if (val >= 0x10000 || val < 0)
1232a164
KS
229 return AVERROR_INVALIDDATA;
230 dst[scan[idx]] = val;
231 } while (idx < num_coeffs - 1);
232 dst += num_coeffs;
233 }
234 } else {
235 for (mb = 0; mb < slice_width; mb++) {
236 for (idx = 0; idx < num_coeffs; idx++) {
237 GET_CODE(val, coeff_type, coeff_bits);
0f678c02 238 if (val >= 0x10000 || val < 0)
1232a164
KS
239 return AVERROR_INVALIDDATA;
240 dst[scan[idx]] = val;
241 }
242 dst += num_coeffs;
243 }
244 }
245 return 0;
246}
247
248static void recombine_block(int16_t *dst, const uint8_t *scan,
249 int16_t **base, int16_t **ext)
250{
251 int i, j;
252
253 for (i = 0; i < 4; i++) {
254 for (j = 0; j < 4; j++)
255 dst[scan[i * 8 + j]] = (*base)[j];
256 for (j = 0; j < 4; j++)
257 dst[scan[i * 8 + j + 4]] = (*ext)[j];
258 *base += 4;
259 *ext += 4;
260 }
261 for (; i < 8; i++) {
262 for (j = 0; j < 8; j++)
263 dst[scan[i * 8 + j]] = (*ext)[j];
264 *ext += 8;
265 }
266}
267
268static void recombine_block_il(int16_t *dst, const uint8_t *scan,
269 int16_t **base, int16_t **ext,
270 int block_no)
271{
272 int i, j;
273
274 if (block_no < 2) {
275 for (i = 0; i < 8; i++) {
276 for (j = 0; j < 4; j++)
277 dst[scan[i * 8 + j]] = (*base)[j];
278 for (j = 0; j < 4; j++)
279 dst[scan[i * 8 + j + 4]] = (*ext)[j];
280 *base += 4;
281 *ext += 4;
282 }
283 } else {
284 for (i = 0; i < 64; i++)
285 dst[scan[i]] = (*ext)[i];
286 *ext += 64;
287 }
288}
289
290static void unquant_block(int16_t *block, int q)
291{
292 int i;
293
294 for (i = 0; i < 64; i++) {
295 int val = (uint16_t)block[i];
296 int sign = val & 1;
297
298 block[i] = (((val >> 1) ^ -sign) * q * aic_quant_matrix[i] >> 4)
299 + sign;
300 }
301}
302
303static int aic_decode_slice(AICContext *ctx, int mb_x, int mb_y,
304 const uint8_t *src, int src_size)
305{
306 GetBitContext gb;
307 int ret, i, mb, blk;
308 int slice_width = FFMIN(ctx->slice_width, ctx->mb_width - mb_x);
309 uint8_t *Y, *C[2];
310 uint8_t *dst;
311 int16_t *base_y = ctx->data_ptr[COEFF_LUMA];
312 int16_t *base_c = ctx->data_ptr[COEFF_CHROMA];
313 int16_t *ext_y = ctx->data_ptr[COEFF_LUMA_EXT];
314 int16_t *ext_c = ctx->data_ptr[COEFF_CHROMA_EXT];
315 const int ystride = ctx->frame->linesize[0];
316
317 Y = ctx->frame->data[0] + mb_x * 16 + mb_y * 16 * ystride;
318 for (i = 0; i < 2; i++)
319 C[i] = ctx->frame->data[i + 1] + mb_x * 8
320 + mb_y * 8 * ctx->frame->linesize[i + 1];
321 init_get_bits(&gb, src, src_size * 8);
322
323 memset(ctx->slice_data, 0,
324 sizeof(*ctx->slice_data) * slice_width * AIC_BAND_COEFFS);
325 for (i = 0; i < NUM_BANDS; i++)
326 if ((ret = aic_decode_coeffs(&gb, ctx->data_ptr[i],
41006698
KS
327 i, slice_width,
328 !ctx->interlaced)) < 0)
1232a164
KS
329 return ret;
330
331 for (mb = 0; mb < slice_width; mb++) {
332 for (blk = 0; blk < 4; blk++) {
333 if (!ctx->interlaced)
334 recombine_block(ctx->block, ctx->scantable.permutated,
335 &base_y, &ext_y);
336 else
337 recombine_block_il(ctx->block, ctx->scantable.permutated,
338 &base_y, &ext_y, blk);
339 unquant_block(ctx->block, ctx->quant);
e3fcb143 340 ctx->idsp.idct(ctx->block);
1232a164
KS
341
342 if (!ctx->interlaced) {
41006698 343 dst = Y + (blk >> 1) * 8 * ystride + (blk & 1) * 8;
e3fcb143 344 ctx->idsp.put_signed_pixels_clamped(ctx->block, dst, ystride);
1232a164
KS
345 } else {
346 dst = Y + (blk & 1) * 8 + (blk >> 1) * ystride;
e3fcb143
DB
347 ctx->idsp.put_signed_pixels_clamped(ctx->block, dst,
348 ystride * 2);
1232a164
KS
349 }
350 }
351 Y += 16;
352
353 for (blk = 0; blk < 2; blk++) {
354 recombine_block(ctx->block, ctx->scantable.permutated,
355 &base_c, &ext_c);
356 unquant_block(ctx->block, ctx->quant);
e3fcb143
DB
357 ctx->idsp.idct(ctx->block);
358 ctx->idsp.put_signed_pixels_clamped(ctx->block, C[blk],
359 ctx->frame->linesize[blk + 1]);
1232a164
KS
360 C[blk] += 8;
361 }
362 }
363
364 return 0;
365}
366
367static int aic_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
368 AVPacket *avpkt)
369{
370 AICContext *ctx = avctx->priv_data;
371 const uint8_t *buf = avpkt->data;
372 int buf_size = avpkt->size;
373 GetByteContext gb;
374 uint32_t off;
375 int x, y, ret;
376 int slice_size;
6e2ad28c 377 ThreadFrame frame = { .f = data };
1232a164
KS
378
379 ctx->frame = data;
380 ctx->frame->pict_type = AV_PICTURE_TYPE_I;
381 ctx->frame->key_frame = 1;
382
383 off = FFALIGN(AIC_HDR_SIZE + ctx->num_x_slices * ctx->mb_height * 2, 4);
384
385 if (buf_size < off) {
386 av_log(avctx, AV_LOG_ERROR, "Too small frame\n");
387 return AVERROR_INVALIDDATA;
388 }
389
d8d124ee
VG
390 ret = aic_decode_header(ctx, buf, buf_size);
391 if (ret < 0) {
392 av_log(avctx, AV_LOG_ERROR, "Invalid header\n");
1232a164 393 return ret;
d8d124ee 394 }
1232a164 395
6e2ad28c 396 if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
1232a164
KS
397 return ret;
398
399 bytestream2_init(&gb, buf + AIC_HDR_SIZE,
400 ctx->num_x_slices * ctx->mb_height * 2);
401
402 for (y = 0; y < ctx->mb_height; y++) {
403 for (x = 0; x < ctx->mb_width; x += ctx->slice_width) {
404 slice_size = bytestream2_get_le16(&gb) * 4;
405 if (slice_size + off > buf_size || !slice_size) {
d8d124ee
VG
406 av_log(avctx, AV_LOG_ERROR,
407 "Incorrect slice size %d at %d.%d\n", slice_size, x, y);
1232a164
KS
408 return AVERROR_INVALIDDATA;
409 }
410
d8d124ee
VG
411 ret = aic_decode_slice(ctx, x, y, buf + off, slice_size);
412 if (ret < 0) {
413 av_log(avctx, AV_LOG_ERROR,
414 "Error decoding slice at %d.%d\n", x, y);
1232a164 415 return ret;
d8d124ee 416 }
1232a164
KS
417
418 off += slice_size;
419 }
420 }
421
422 *got_frame = 1;
423
424 return avpkt->size;
425}
426
427static av_cold int aic_decode_init(AVCodecContext *avctx)
428{
429 AICContext *ctx = avctx->priv_data;
430 int i;
431 uint8_t scan[64];
432
433 ctx->avctx = avctx;
434
435 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
436
e3fcb143 437 ff_idctdsp_init(&ctx->idsp, avctx);
1232a164
KS
438
439 for (i = 0; i < 64; i++)
440 scan[i] = i;
e3fcb143 441 ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable, scan);
1232a164
KS
442
443 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
444 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
445
e878ec0d
VG
446 ctx->num_x_slices = (ctx->mb_width + 15) >> 4;
447 ctx->slice_width = 16;
1232a164 448 for (i = 1; i < 32; i++) {
303ec065 449 if (!(ctx->mb_width % i) && (ctx->mb_width / i <= 32)) {
1232a164
KS
450 ctx->slice_width = ctx->mb_width / i;
451 ctx->num_x_slices = i;
452 break;
453 }
454 }
455
456 ctx->slice_data = av_malloc(ctx->slice_width * AIC_BAND_COEFFS
457 * sizeof(*ctx->slice_data));
458 if (!ctx->slice_data) {
459 av_log(avctx, AV_LOG_ERROR, "Error allocating slice buffer\n");
460
461 return AVERROR(ENOMEM);
462 }
463
464 for (i = 0; i < NUM_BANDS; i++)
465 ctx->data_ptr[i] = ctx->slice_data + ctx->slice_width
466 * aic_band_off[i];
467
468 return 0;
469}
470
471static av_cold int aic_decode_close(AVCodecContext *avctx)
472{
473 AICContext *ctx = avctx->priv_data;
474
475 av_freep(&ctx->slice_data);
476
477 return 0;
478}
479
480AVCodec ff_aic_decoder = {
481 .name = "aic",
b2bed932 482 .long_name = NULL_IF_CONFIG_SMALL("Apple Intermediate Codec"),
1232a164
KS
483 .type = AVMEDIA_TYPE_VIDEO,
484 .id = AV_CODEC_ID_AIC,
485 .priv_data_size = sizeof(AICContext),
486 .init = aic_decode_init,
487 .close = aic_decode_close,
488 .decode = aic_decode_frame,
6e2ad28c
PM
489 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
490 .init_thread_copy = ONLY_IF_THREADS_ENABLED(aic_decode_init),
1232a164 491};