7cf21954c9db5f1957ec28f810d901fb69c419b9
[libav.git] / libavcodec / aacdec.c
1 /*
2 * AAC decoder
3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5 *
6 * AAC LATM decoder
7 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
8 * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
9 *
10 * This file is part of Libav.
11 *
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27 /**
28 * @file
29 * AAC decoder
30 * @author Oded Shimon ( ods15 ods15 dyndns org )
31 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
32 */
33
34 /*
35 * supported tools
36 *
37 * Support? Name
38 * N (code in SoC repo) gain control
39 * Y block switching
40 * Y window shapes - standard
41 * N window shapes - Low Delay
42 * Y filterbank - standard
43 * N (code in SoC repo) filterbank - Scalable Sample Rate
44 * Y Temporal Noise Shaping
45 * Y Long Term Prediction
46 * Y intensity stereo
47 * Y channel coupling
48 * Y frequency domain prediction
49 * Y Perceptual Noise Substitution
50 * Y Mid/Side stereo
51 * N Scalable Inverse AAC Quantization
52 * N Frequency Selective Switch
53 * N upsampling filter
54 * Y quantization & coding - AAC
55 * N quantization & coding - TwinVQ
56 * N quantization & coding - BSAC
57 * N AAC Error Resilience tools
58 * N Error Resilience payload syntax
59 * N Error Protection tool
60 * N CELP
61 * N Silence Compression
62 * N HVXC
63 * N HVXC 4kbits/s VR
64 * N Structured Audio tools
65 * N Structured Audio Sample Bank Format
66 * N MIDI
67 * N Harmonic and Individual Lines plus Noise
68 * N Text-To-Speech Interface
69 * Y Spectral Band Replication
70 * Y (not in this code) Layer-1
71 * Y (not in this code) Layer-2
72 * Y (not in this code) Layer-3
73 * N SinuSoidal Coding (Transient, Sinusoid, Noise)
74 * Y Parametric Stereo
75 * N Direct Stream Transfer
76 *
77 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
78 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
79 Parametric Stereo.
80 */
81
82
83 #include "avcodec.h"
84 #include "internal.h"
85 #include "get_bits.h"
86 #include "dsputil.h"
87 #include "fft.h"
88 #include "fmtconvert.h"
89 #include "lpc.h"
90 #include "kbdwin.h"
91 #include "sinewin.h"
92
93 #include "aac.h"
94 #include "aactab.h"
95 #include "aacdectab.h"
96 #include "cbrt_tablegen.h"
97 #include "sbr.h"
98 #include "aacsbr.h"
99 #include "mpeg4audio.h"
100 #include "aacadtsdec.h"
101 #include "libavutil/intfloat.h"
102
103 #include <assert.h>
104 #include <errno.h>
105 #include <math.h>
106 #include <string.h>
107
108 #if ARCH_ARM
109 # include "arm/aac.h"
110 #endif
111
112 static VLC vlc_scalefactors;
113 static VLC vlc_spectral[11];
114
115 static const char overread_err[] = "Input buffer exhausted before END element found\n";
116
117 static int count_channels(uint8_t (*layout)[3], int tags)
118 {
119 int i, sum = 0;
120 for (i = 0; i < tags; i++) {
121 int syn_ele = layout[i][0];
122 int pos = layout[i][2];
123 sum += (1 + (syn_ele == TYPE_CPE)) *
124 (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC);
125 }
126 return sum;
127 }
128
129 /**
130 * Check for the channel element in the current channel position configuration.
131 * If it exists, make sure the appropriate element is allocated and map the
132 * channel order to match the internal Libav channel layout.
133 *
134 * @param che_pos current channel position configuration
135 * @param type channel element type
136 * @param id channel element id
137 * @param channels count of the number of channels in the configuration
138 *
139 * @return Returns error status. 0 - OK, !0 - error
140 */
141 static av_cold int che_configure(AACContext *ac,
142 enum ChannelPosition che_pos,
143 int type, int id, int *channels)
144 {
145 if (che_pos) {
146 if (!ac->che[type][id]) {
147 if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
148 return AVERROR(ENOMEM);
149 ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
150 }
151 if (type != TYPE_CCE) {
152 ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
153 if (type == TYPE_CPE ||
154 (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) {
155 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
156 }
157 }
158 } else {
159 if (ac->che[type][id])
160 ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
161 av_freep(&ac->che[type][id]);
162 }
163 return 0;
164 }
165
166 struct elem_to_channel {
167 uint64_t av_position;
168 uint8_t syn_ele;
169 uint8_t elem_id;
170 uint8_t aac_position;
171 };
172
173 static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],
174 uint8_t (*layout_map)[3], int offset, int tags, uint64_t left,
175 uint64_t right, int pos)
176 {
177 if (layout_map[offset][0] == TYPE_CPE) {
178 e2c_vec[offset] = (struct elem_to_channel) {
179 .av_position = left | right, .syn_ele = TYPE_CPE,
180 .elem_id = layout_map[offset ][1], .aac_position = pos };
181 return 1;
182 } else {
183 e2c_vec[offset] = (struct elem_to_channel) {
184 .av_position = left, .syn_ele = TYPE_SCE,
185 .elem_id = layout_map[offset ][1], .aac_position = pos };
186 e2c_vec[offset + 1] = (struct elem_to_channel) {
187 .av_position = right, .syn_ele = TYPE_SCE,
188 .elem_id = layout_map[offset + 1][1], .aac_position = pos };
189 return 2;
190 }
191 }
192
193 static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos, int *current) {
194 int num_pos_channels = 0;
195 int first_cpe = 0;
196 int sce_parity = 0;
197 int i;
198 for (i = *current; i < tags; i++) {
199 if (layout_map[i][2] != pos)
200 break;
201 if (layout_map[i][0] == TYPE_CPE) {
202 if (sce_parity) {
203 if (pos == AAC_CHANNEL_FRONT && !first_cpe) {
204 sce_parity = 0;
205 } else {
206 return -1;
207 }
208 }
209 num_pos_channels += 2;
210 first_cpe = 1;
211 } else {
212 num_pos_channels++;
213 sce_parity ^= 1;
214 }
215 }
216 if (sce_parity &&
217 ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE))
218 return -1;
219 *current = i;
220 return num_pos_channels;
221 }
222
223 static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags)
224 {
225 int i, n, total_non_cc_elements;
226 struct elem_to_channel e2c_vec[4*MAX_ELEM_ID] = {{ 0 }};
227 int num_front_channels, num_side_channels, num_back_channels;
228 uint64_t layout;
229
230 if (FF_ARRAY_ELEMS(e2c_vec) < tags)
231 return 0;
232
233 i = 0;
234 num_front_channels =
235 count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i);
236 if (num_front_channels < 0)
237 return 0;
238 num_side_channels =
239 count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i);
240 if (num_side_channels < 0)
241 return 0;
242 num_back_channels =
243 count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i);
244 if (num_back_channels < 0)
245 return 0;
246
247 i = 0;
248 if (num_front_channels & 1) {
249 e2c_vec[i] = (struct elem_to_channel) {
250 .av_position = AV_CH_FRONT_CENTER, .syn_ele = TYPE_SCE,
251 .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_FRONT };
252 i++;
253 num_front_channels--;
254 }
255 if (num_front_channels >= 4) {
256 i += assign_pair(e2c_vec, layout_map, i, tags,
257 AV_CH_FRONT_LEFT_OF_CENTER,
258 AV_CH_FRONT_RIGHT_OF_CENTER,
259 AAC_CHANNEL_FRONT);
260 num_front_channels -= 2;
261 }
262 if (num_front_channels >= 2) {
263 i += assign_pair(e2c_vec, layout_map, i, tags,
264 AV_CH_FRONT_LEFT,
265 AV_CH_FRONT_RIGHT,
266 AAC_CHANNEL_FRONT);
267 num_front_channels -= 2;
268 }
269 while (num_front_channels >= 2) {
270 i += assign_pair(e2c_vec, layout_map, i, tags,
271 UINT64_MAX,
272 UINT64_MAX,
273 AAC_CHANNEL_FRONT);
274 num_front_channels -= 2;
275 }
276
277 if (num_side_channels >= 2) {
278 i += assign_pair(e2c_vec, layout_map, i, tags,
279 AV_CH_SIDE_LEFT,
280 AV_CH_SIDE_RIGHT,
281 AAC_CHANNEL_FRONT);
282 num_side_channels -= 2;
283 }
284 while (num_side_channels >= 2) {
285 i += assign_pair(e2c_vec, layout_map, i, tags,
286 UINT64_MAX,
287 UINT64_MAX,
288 AAC_CHANNEL_SIDE);
289 num_side_channels -= 2;
290 }
291
292 while (num_back_channels >= 4) {
293 i += assign_pair(e2c_vec, layout_map, i, tags,
294 UINT64_MAX,
295 UINT64_MAX,
296 AAC_CHANNEL_BACK);
297 num_back_channels -= 2;
298 }
299 if (num_back_channels >= 2) {
300 i += assign_pair(e2c_vec, layout_map, i, tags,
301 AV_CH_BACK_LEFT,
302 AV_CH_BACK_RIGHT,
303 AAC_CHANNEL_BACK);
304 num_back_channels -= 2;
305 }
306 if (num_back_channels) {
307 e2c_vec[i] = (struct elem_to_channel) {
308 .av_position = AV_CH_BACK_CENTER, .syn_ele = TYPE_SCE,
309 .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_BACK };
310 i++;
311 num_back_channels--;
312 }
313
314 if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
315 e2c_vec[i] = (struct elem_to_channel) {
316 .av_position = AV_CH_LOW_FREQUENCY, .syn_ele = TYPE_LFE,
317 .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
318 i++;
319 }
320 while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
321 e2c_vec[i] = (struct elem_to_channel) {
322 .av_position = UINT64_MAX, .syn_ele = TYPE_LFE,
323 .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
324 i++;
325 }
326
327 // Must choose a stable sort
328 total_non_cc_elements = n = i;
329 do {
330 int next_n = 0;
331 for (i = 1; i < n; i++) {
332 if (e2c_vec[i-1].av_position > e2c_vec[i].av_position) {
333 FFSWAP(struct elem_to_channel, e2c_vec[i-1], e2c_vec[i]);
334 next_n = i;
335 }
336 }
337 n = next_n;
338 } while (n > 0);
339
340 layout = 0;
341 for (i = 0; i < total_non_cc_elements; i++) {
342 layout_map[i][0] = e2c_vec[i].syn_ele;
343 layout_map[i][1] = e2c_vec[i].elem_id;
344 layout_map[i][2] = e2c_vec[i].aac_position;
345 if (e2c_vec[i].av_position != UINT64_MAX) {
346 layout |= e2c_vec[i].av_position;
347 }
348 }
349
350 return layout;
351 }
352
353 /**
354 * Save current output configuration if and only if it has been locked.
355 */
356 static void push_output_configuration(AACContext *ac) {
357 if (ac->oc[1].status == OC_LOCKED) {
358 ac->oc[0] = ac->oc[1];
359 }
360 ac->oc[1].status = OC_NONE;
361 }
362
363 /**
364 * Restore the previous output configuration if and only if the current
365 * configuration is unlocked.
366 */
367 static void pop_output_configuration(AACContext *ac) {
368 if (ac->oc[1].status != OC_LOCKED) {
369 ac->oc[1] = ac->oc[0];
370 ac->avctx->channels = ac->oc[1].channels;
371 ac->avctx->channel_layout = ac->oc[1].channels;
372 }
373 }
374
375 /**
376 * Configure output channel order based on the current program configuration element.
377 *
378 * @return Returns error status. 0 - OK, !0 - error
379 */
380 static int output_configure(AACContext *ac,
381 uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
382 int channel_config, enum OCStatus oc_type)
383 {
384 AVCodecContext *avctx = ac->avctx;
385 int i, channels = 0, ret;
386 uint64_t layout = 0;
387
388 if (ac->oc[1].layout_map != layout_map) {
389 memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0]));
390 ac->oc[1].layout_map_tags = tags;
391 }
392
393 // Try to sniff a reasonable channel order, otherwise output the
394 // channels in the order the PCE declared them.
395 if (avctx->request_channel_layout != AV_CH_LAYOUT_NATIVE)
396 layout = sniff_channel_order(layout_map, tags);
397 for (i = 0; i < tags; i++) {
398 int type = layout_map[i][0];
399 int id = layout_map[i][1];
400 int position = layout_map[i][2];
401 // Allocate or free elements depending on if they are in the
402 // current program configuration.
403 ret = che_configure(ac, position, type, id, &channels);
404 if (ret < 0)
405 return ret;
406 }
407
408 memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
409 avctx->channel_layout = ac->oc[1].channel_layout = layout;
410 avctx->channels = ac->oc[1].channels = channels;
411 ac->oc[1].status = oc_type;
412
413 return 0;
414 }
415
416 /**
417 * Set up channel positions based on a default channel configuration
418 * as specified in table 1.17.
419 *
420 * @return Returns error status. 0 - OK, !0 - error
421 */
422 static int set_default_channel_config(AVCodecContext *avctx,
423 uint8_t (*layout_map)[3],
424 int *tags,
425 int channel_config)
426 {
427 if (channel_config < 1 || channel_config > 7) {
428 av_log(avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
429 channel_config);
430 return -1;
431 }
432 *tags = tags_per_config[channel_config];
433 memcpy(layout_map, aac_channel_layout_map[channel_config-1], *tags * sizeof(*layout_map));
434 return 0;
435 }
436
437 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
438 {
439 // For PCE based channel configurations map the channels solely based on tags.
440 if (!ac->oc[1].m4ac.chan_config) {
441 return ac->tag_che_map[type][elem_id];
442 }
443 // Allow single CPE stereo files to be signalled with mono configuration.
444 if (!ac->tags_mapped && type == TYPE_CPE && ac->oc[1].m4ac.chan_config == 1) {
445 uint8_t layout_map[MAX_ELEM_ID*4][3];
446 int layout_map_tags;
447 push_output_configuration(ac);
448
449 if (set_default_channel_config(ac->avctx, layout_map, &layout_map_tags,
450 2) < 0)
451 return NULL;
452 if (output_configure(ac, layout_map, layout_map_tags,
453 2, OC_TRIAL_FRAME) < 0)
454 return NULL;
455
456 ac->oc[1].m4ac.chan_config = 2;
457 }
458 // And vice-versa
459 if (!ac->tags_mapped && type == TYPE_SCE && ac->oc[1].m4ac.chan_config == 2) {
460 uint8_t layout_map[MAX_ELEM_ID*4][3];
461 int layout_map_tags;
462 push_output_configuration(ac);
463
464 if (set_default_channel_config(ac->avctx, layout_map, &layout_map_tags,
465 1) < 0)
466 return NULL;
467 if (output_configure(ac, layout_map, layout_map_tags,
468 1, OC_TRIAL_FRAME) < 0)
469 return NULL;
470
471 ac->oc[1].m4ac.chan_config = 1;
472 }
473 // For indexed channel configurations map the channels solely based on position.
474 switch (ac->oc[1].m4ac.chan_config) {
475 case 7:
476 if (ac->tags_mapped == 3 && type == TYPE_CPE) {
477 ac->tags_mapped++;
478 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
479 }
480 case 6:
481 /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
482 instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
483 encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
484 if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
485 ac->tags_mapped++;
486 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
487 }
488 case 5:
489 if (ac->tags_mapped == 2 && type == TYPE_CPE) {
490 ac->tags_mapped++;
491 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
492 }
493 case 4:
494 if (ac->tags_mapped == 2 && ac->oc[1].m4ac.chan_config == 4 && type == TYPE_SCE) {
495 ac->tags_mapped++;
496 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
497 }
498 case 3:
499 case 2:
500 if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) && type == TYPE_CPE) {
501 ac->tags_mapped++;
502 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
503 } else if (ac->oc[1].m4ac.chan_config == 2) {
504 return NULL;
505 }
506 case 1:
507 if (!ac->tags_mapped && type == TYPE_SCE) {
508 ac->tags_mapped++;
509 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
510 }
511 default:
512 return NULL;
513 }
514 }
515
516 /**
517 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
518 *
519 * @param type speaker type/position for these channels
520 */
521 static void decode_channel_map(uint8_t layout_map[][3],
522 enum ChannelPosition type,
523 GetBitContext *gb, int n)
524 {
525 while (n--) {
526 enum RawDataBlockType syn_ele;
527 switch (type) {
528 case AAC_CHANNEL_FRONT:
529 case AAC_CHANNEL_BACK:
530 case AAC_CHANNEL_SIDE:
531 syn_ele = get_bits1(gb);
532 break;
533 case AAC_CHANNEL_CC:
534 skip_bits1(gb);
535 syn_ele = TYPE_CCE;
536 break;
537 case AAC_CHANNEL_LFE:
538 syn_ele = TYPE_LFE;
539 break;
540 }
541 layout_map[0][0] = syn_ele;
542 layout_map[0][1] = get_bits(gb, 4);
543 layout_map[0][2] = type;
544 layout_map++;
545 }
546 }
547
548 /**
549 * Decode program configuration element; reference: table 4.2.
550 *
551 * @return Returns error status. 0 - OK, !0 - error
552 */
553 static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
554 uint8_t (*layout_map)[3],
555 GetBitContext *gb)
556 {
557 int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
558 int comment_len;
559 int tags;
560
561 skip_bits(gb, 2); // object_type
562
563 sampling_index = get_bits(gb, 4);
564 if (m4ac->sampling_index != sampling_index)
565 av_log(avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
566
567 num_front = get_bits(gb, 4);
568 num_side = get_bits(gb, 4);
569 num_back = get_bits(gb, 4);
570 num_lfe = get_bits(gb, 2);
571 num_assoc_data = get_bits(gb, 3);
572 num_cc = get_bits(gb, 4);
573
574 if (get_bits1(gb))
575 skip_bits(gb, 4); // mono_mixdown_tag
576 if (get_bits1(gb))
577 skip_bits(gb, 4); // stereo_mixdown_tag
578
579 if (get_bits1(gb))
580 skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
581
582 decode_channel_map(layout_map , AAC_CHANNEL_FRONT, gb, num_front);
583 tags = num_front;
584 decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE, gb, num_side);
585 tags += num_side;
586 decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK, gb, num_back);
587 tags += num_back;
588 decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE, gb, num_lfe);
589 tags += num_lfe;
590
591 skip_bits_long(gb, 4 * num_assoc_data);
592
593 decode_channel_map(layout_map + tags, AAC_CHANNEL_CC, gb, num_cc);
594 tags += num_cc;
595
596 align_get_bits(gb);
597
598 /* comment field, first byte is length */
599 comment_len = get_bits(gb, 8) * 8;
600 if (get_bits_left(gb) < comment_len) {
601 av_log(avctx, AV_LOG_ERROR, overread_err);
602 return -1;
603 }
604 skip_bits_long(gb, comment_len);
605 return tags;
606 }
607
608 /**
609 * Decode GA "General Audio" specific configuration; reference: table 4.1.
610 *
611 * @param ac pointer to AACContext, may be null
612 * @param avctx pointer to AVCCodecContext, used for logging
613 *
614 * @return Returns error status. 0 - OK, !0 - error
615 */
616 static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
617 GetBitContext *gb,
618 MPEG4AudioConfig *m4ac,
619 int channel_config)
620 {
621 int extension_flag, ret;
622 uint8_t layout_map[MAX_ELEM_ID*4][3];
623 int tags = 0;
624
625 if (get_bits1(gb)) { // frameLengthFlag
626 av_log_missing_feature(avctx, "960/120 MDCT window is", 1);
627 return -1;
628 }
629
630 if (get_bits1(gb)) // dependsOnCoreCoder
631 skip_bits(gb, 14); // coreCoderDelay
632 extension_flag = get_bits1(gb);
633
634 if (m4ac->object_type == AOT_AAC_SCALABLE ||
635 m4ac->object_type == AOT_ER_AAC_SCALABLE)
636 skip_bits(gb, 3); // layerNr
637
638 if (channel_config == 0) {
639 skip_bits(gb, 4); // element_instance_tag
640 tags = decode_pce(avctx, m4ac, layout_map, gb);
641 if (tags < 0)
642 return tags;
643 } else {
644 if ((ret = set_default_channel_config(avctx, layout_map, &tags, channel_config)))
645 return ret;
646 }
647
648 if (count_channels(layout_map, tags) > 1) {
649 m4ac->ps = 0;
650 } else if (m4ac->sbr == 1 && m4ac->ps == -1)
651 m4ac->ps = 1;
652
653 if (ac && (ret = output_configure(ac, layout_map, tags,
654 channel_config, OC_GLOBAL_HDR)))
655 return ret;
656
657 if (extension_flag) {
658 switch (m4ac->object_type) {
659 case AOT_ER_BSAC:
660 skip_bits(gb, 5); // numOfSubFrame
661 skip_bits(gb, 11); // layer_length
662 break;
663 case AOT_ER_AAC_LC:
664 case AOT_ER_AAC_LTP:
665 case AOT_ER_AAC_SCALABLE:
666 case AOT_ER_AAC_LD:
667 skip_bits(gb, 3); /* aacSectionDataResilienceFlag
668 * aacScalefactorDataResilienceFlag
669 * aacSpectralDataResilienceFlag
670 */
671 break;
672 }
673 skip_bits1(gb); // extensionFlag3 (TBD in version 3)
674 }
675 return 0;
676 }
677
678 /**
679 * Decode audio specific configuration; reference: table 1.13.
680 *
681 * @param ac pointer to AACContext, may be null
682 * @param avctx pointer to AVCCodecContext, used for logging
683 * @param m4ac pointer to MPEG4AudioConfig, used for parsing
684 * @param data pointer to buffer holding an audio specific config
685 * @param bit_size size of audio specific config or data in bits
686 * @param sync_extension look for an appended sync extension
687 *
688 * @return Returns error status or number of consumed bits. <0 - error
689 */
690 static int decode_audio_specific_config(AACContext *ac,
691 AVCodecContext *avctx,
692 MPEG4AudioConfig *m4ac,
693 const uint8_t *data, int bit_size,
694 int sync_extension)
695 {
696 GetBitContext gb;
697 int i;
698
699 av_dlog(avctx, "extradata size %d\n", avctx->extradata_size);
700 for (i = 0; i < avctx->extradata_size; i++)
701 av_dlog(avctx, "%02x ", avctx->extradata[i]);
702 av_dlog(avctx, "\n");
703
704 init_get_bits(&gb, data, bit_size);
705
706 if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size, sync_extension)) < 0)
707 return -1;
708 if (m4ac->sampling_index > 12) {
709 av_log(avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", m4ac->sampling_index);
710 return -1;
711 }
712
713 skip_bits_long(&gb, i);
714
715 switch (m4ac->object_type) {
716 case AOT_AAC_MAIN:
717 case AOT_AAC_LC:
718 case AOT_AAC_LTP:
719 if (decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config))
720 return -1;
721 break;
722 default:
723 av_log(avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
724 m4ac->sbr == 1? "SBR+" : "", m4ac->object_type);
725 return -1;
726 }
727
728 av_dlog(avctx, "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
729 m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
730 m4ac->sample_rate, m4ac->sbr, m4ac->ps);
731
732 return get_bits_count(&gb);
733 }
734
735 /**
736 * linear congruential pseudorandom number generator
737 *
738 * @param previous_val pointer to the current state of the generator
739 *
740 * @return Returns a 32-bit pseudorandom integer
741 */
742 static av_always_inline int lcg_random(int previous_val)
743 {
744 return previous_val * 1664525 + 1013904223;
745 }
746
747 static av_always_inline void reset_predict_state(PredictorState *ps)
748 {
749 ps->r0 = 0.0f;
750 ps->r1 = 0.0f;
751 ps->cor0 = 0.0f;
752 ps->cor1 = 0.0f;
753 ps->var0 = 1.0f;
754 ps->var1 = 1.0f;
755 }
756
757 static void reset_all_predictors(PredictorState *ps)
758 {
759 int i;
760 for (i = 0; i < MAX_PREDICTORS; i++)
761 reset_predict_state(&ps[i]);
762 }
763
764 static int sample_rate_idx (int rate)
765 {
766 if (92017 <= rate) return 0;
767 else if (75132 <= rate) return 1;
768 else if (55426 <= rate) return 2;
769 else if (46009 <= rate) return 3;
770 else if (37566 <= rate) return 4;
771 else if (27713 <= rate) return 5;
772 else if (23004 <= rate) return 6;
773 else if (18783 <= rate) return 7;
774 else if (13856 <= rate) return 8;
775 else if (11502 <= rate) return 9;
776 else if (9391 <= rate) return 10;
777 else return 11;
778 }
779
780 static void reset_predictor_group(PredictorState *ps, int group_num)
781 {
782 int i;
783 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
784 reset_predict_state(&ps[i]);
785 }
786
787 #define AAC_INIT_VLC_STATIC(num, size) \
788 INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
789 ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
790 ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
791 size);
792
793 static av_cold int aac_decode_init(AVCodecContext *avctx)
794 {
795 AACContext *ac = avctx->priv_data;
796 float output_scale_factor;
797
798 ac->avctx = avctx;
799 ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
800
801 if (avctx->extradata_size > 0) {
802 if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
803 avctx->extradata,
804 avctx->extradata_size*8, 1) < 0)
805 return -1;
806 } else {
807 int sr, i;
808 uint8_t layout_map[MAX_ELEM_ID*4][3];
809 int layout_map_tags;
810
811 sr = sample_rate_idx(avctx->sample_rate);
812 ac->oc[1].m4ac.sampling_index = sr;
813 ac->oc[1].m4ac.channels = avctx->channels;
814 ac->oc[1].m4ac.sbr = -1;
815 ac->oc[1].m4ac.ps = -1;
816
817 for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
818 if (ff_mpeg4audio_channels[i] == avctx->channels)
819 break;
820 if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) {
821 i = 0;
822 }
823 ac->oc[1].m4ac.chan_config = i;
824
825 if (ac->oc[1].m4ac.chan_config) {
826 int ret = set_default_channel_config(avctx, layout_map,
827 &layout_map_tags, ac->oc[1].m4ac.chan_config);
828 if (!ret)
829 output_configure(ac, layout_map, layout_map_tags,
830 ac->oc[1].m4ac.chan_config, OC_GLOBAL_HDR);
831 else if (avctx->err_recognition & AV_EF_EXPLODE)
832 return AVERROR_INVALIDDATA;
833 }
834 }
835
836 if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
837 avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
838 output_scale_factor = 1.0 / 32768.0;
839 } else {
840 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
841 output_scale_factor = 1.0;
842 }
843
844 AAC_INIT_VLC_STATIC( 0, 304);
845 AAC_INIT_VLC_STATIC( 1, 270);
846 AAC_INIT_VLC_STATIC( 2, 550);
847 AAC_INIT_VLC_STATIC( 3, 300);
848 AAC_INIT_VLC_STATIC( 4, 328);
849 AAC_INIT_VLC_STATIC( 5, 294);
850 AAC_INIT_VLC_STATIC( 6, 306);
851 AAC_INIT_VLC_STATIC( 7, 268);
852 AAC_INIT_VLC_STATIC( 8, 510);
853 AAC_INIT_VLC_STATIC( 9, 366);
854 AAC_INIT_VLC_STATIC(10, 462);
855
856 ff_aac_sbr_init();
857
858 ff_dsputil_init(&ac->dsp, avctx);
859 ff_fmt_convert_init(&ac->fmt_conv, avctx);
860
861 ac->random_state = 0x1f2e3d4c;
862
863 ff_aac_tableinit();
864
865 INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
866 ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
867 ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
868 352);
869
870 ff_mdct_init(&ac->mdct, 11, 1, output_scale_factor/1024.0);
871 ff_mdct_init(&ac->mdct_small, 8, 1, output_scale_factor/128.0);
872 ff_mdct_init(&ac->mdct_ltp, 11, 0, -2.0/output_scale_factor);
873 // window initialization
874 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
875 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
876 ff_init_ff_sine_windows(10);
877 ff_init_ff_sine_windows( 7);
878
879 cbrt_tableinit();
880
881 avcodec_get_frame_defaults(&ac->frame);
882 avctx->coded_frame = &ac->frame;
883
884 return 0;
885 }
886
887 /**
888 * Skip data_stream_element; reference: table 4.10.
889 */
890 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
891 {
892 int byte_align = get_bits1(gb);
893 int count = get_bits(gb, 8);
894 if (count == 255)
895 count += get_bits(gb, 8);
896 if (byte_align)
897 align_get_bits(gb);
898
899 if (get_bits_left(gb) < 8 * count) {
900 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
901 return -1;
902 }
903 skip_bits_long(gb, 8 * count);
904 return 0;
905 }
906
907 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
908 GetBitContext *gb)
909 {
910 int sfb;
911 if (get_bits1(gb)) {
912 ics->predictor_reset_group = get_bits(gb, 5);
913 if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
914 av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
915 return -1;
916 }
917 }
918 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) {
919 ics->prediction_used[sfb] = get_bits1(gb);
920 }
921 return 0;
922 }
923
924 /**
925 * Decode Long Term Prediction data; reference: table 4.xx.
926 */
927 static void decode_ltp(AACContext *ac, LongTermPrediction *ltp,
928 GetBitContext *gb, uint8_t max_sfb)
929 {
930 int sfb;
931
932 ltp->lag = get_bits(gb, 11);
933 ltp->coef = ltp_coef[get_bits(gb, 3)];
934 for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
935 ltp->used[sfb] = get_bits1(gb);
936 }
937
938 /**
939 * Decode Individual Channel Stream info; reference: table 4.6.
940 */
941 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
942 GetBitContext *gb)
943 {
944 if (get_bits1(gb)) {
945 av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
946 return AVERROR_INVALIDDATA;
947 }
948 ics->window_sequence[1] = ics->window_sequence[0];
949 ics->window_sequence[0] = get_bits(gb, 2);
950 ics->use_kb_window[1] = ics->use_kb_window[0];
951 ics->use_kb_window[0] = get_bits1(gb);
952 ics->num_window_groups = 1;
953 ics->group_len[0] = 1;
954 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
955 int i;
956 ics->max_sfb = get_bits(gb, 4);
957 for (i = 0; i < 7; i++) {
958 if (get_bits1(gb)) {
959 ics->group_len[ics->num_window_groups - 1]++;
960 } else {
961 ics->num_window_groups++;
962 ics->group_len[ics->num_window_groups - 1] = 1;
963 }
964 }
965 ics->num_windows = 8;
966 ics->swb_offset = ff_swb_offset_128[ac->oc[1].m4ac.sampling_index];
967 ics->num_swb = ff_aac_num_swb_128[ac->oc[1].m4ac.sampling_index];
968 ics->tns_max_bands = ff_tns_max_bands_128[ac->oc[1].m4ac.sampling_index];
969 ics->predictor_present = 0;
970 } else {
971 ics->max_sfb = get_bits(gb, 6);
972 ics->num_windows = 1;
973 ics->swb_offset = ff_swb_offset_1024[ac->oc[1].m4ac.sampling_index];
974 ics->num_swb = ff_aac_num_swb_1024[ac->oc[1].m4ac.sampling_index];
975 ics->tns_max_bands = ff_tns_max_bands_1024[ac->oc[1].m4ac.sampling_index];
976 ics->predictor_present = get_bits1(gb);
977 ics->predictor_reset_group = 0;
978 if (ics->predictor_present) {
979 if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
980 if (decode_prediction(ac, ics, gb)) {
981 return AVERROR_INVALIDDATA;
982 }
983 } else if (ac->oc[1].m4ac.object_type == AOT_AAC_LC) {
984 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
985 return AVERROR_INVALIDDATA;
986 } else {
987 if ((ics->ltp.present = get_bits(gb, 1)))
988 decode_ltp(ac, &ics->ltp, gb, ics->max_sfb);
989 }
990 }
991 }
992
993 if (ics->max_sfb > ics->num_swb) {
994 av_log(ac->avctx, AV_LOG_ERROR,
995 "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
996 ics->max_sfb, ics->num_swb);
997 return AVERROR_INVALIDDATA;
998 }
999
1000 return 0;
1001 }
1002
1003 /**
1004 * Decode band types (section_data payload); reference: table 4.46.
1005 *
1006 * @param band_type array of the used band type
1007 * @param band_type_run_end array of the last scalefactor band of a band type run
1008 *
1009 * @return Returns error status. 0 - OK, !0 - error
1010 */
1011 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
1012 int band_type_run_end[120], GetBitContext *gb,
1013 IndividualChannelStream *ics)
1014 {
1015 int g, idx = 0;
1016 const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
1017 for (g = 0; g < ics->num_window_groups; g++) {
1018 int k = 0;
1019 while (k < ics->max_sfb) {
1020 uint8_t sect_end = k;
1021 int sect_len_incr;
1022 int sect_band_type = get_bits(gb, 4);
1023 if (sect_band_type == 12) {
1024 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
1025 return -1;
1026 }
1027 do {
1028 sect_len_incr = get_bits(gb, bits);
1029 sect_end += sect_len_incr;
1030 if (get_bits_left(gb) < 0) {
1031 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
1032 return -1;
1033 }
1034 if (sect_end > ics->max_sfb) {
1035 av_log(ac->avctx, AV_LOG_ERROR,
1036 "Number of bands (%d) exceeds limit (%d).\n",
1037 sect_end, ics->max_sfb);
1038 return -1;
1039 }
1040 } while (sect_len_incr == (1 << bits) - 1);
1041 for (; k < sect_end; k++) {
1042 band_type [idx] = sect_band_type;
1043 band_type_run_end[idx++] = sect_end;
1044 }
1045 }
1046 }
1047 return 0;
1048 }
1049
1050 /**
1051 * Decode scalefactors; reference: table 4.47.
1052 *
1053 * @param global_gain first scalefactor value as scalefactors are differentially coded
1054 * @param band_type array of the used band type
1055 * @param band_type_run_end array of the last scalefactor band of a band type run
1056 * @param sf array of scalefactors or intensity stereo positions
1057 *
1058 * @return Returns error status. 0 - OK, !0 - error
1059 */
1060 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
1061 unsigned int global_gain,
1062 IndividualChannelStream *ics,
1063 enum BandType band_type[120],
1064 int band_type_run_end[120])
1065 {
1066 int g, i, idx = 0;
1067 int offset[3] = { global_gain, global_gain - 90, 0 };
1068 int clipped_offset;
1069 int noise_flag = 1;
1070 for (g = 0; g < ics->num_window_groups; g++) {
1071 for (i = 0; i < ics->max_sfb;) {
1072 int run_end = band_type_run_end[idx];
1073 if (band_type[idx] == ZERO_BT) {
1074 for (; i < run_end; i++, idx++)
1075 sf[idx] = 0.;
1076 } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
1077 for (; i < run_end; i++, idx++) {
1078 offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1079 clipped_offset = av_clip(offset[2], -155, 100);
1080 if (offset[2] != clipped_offset) {
1081 av_log_ask_for_sample(ac->avctx, "Intensity stereo "
1082 "position clipped (%d -> %d).\nIf you heard an "
1083 "audible artifact, there may be a bug in the "
1084 "decoder. ", offset[2], clipped_offset);
1085 }
1086 sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
1087 }
1088 } else if (band_type[idx] == NOISE_BT) {
1089 for (; i < run_end; i++, idx++) {
1090 if (noise_flag-- > 0)
1091 offset[1] += get_bits(gb, 9) - 256;
1092 else
1093 offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1094 clipped_offset = av_clip(offset[1], -100, 155);
1095 if (offset[1] != clipped_offset) {
1096 av_log_ask_for_sample(ac->avctx, "Noise gain clipped "
1097 "(%d -> %d).\nIf you heard an audible "
1098 "artifact, there may be a bug in the decoder. ",
1099 offset[1], clipped_offset);
1100 }
1101 sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
1102 }
1103 } else {
1104 for (; i < run_end; i++, idx++) {
1105 offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1106 if (offset[0] > 255U) {
1107 av_log(ac->avctx, AV_LOG_ERROR,
1108 "Scalefactor (%d) out of range.\n", offset[0]);
1109 return -1;
1110 }
1111 sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
1112 }
1113 }
1114 }
1115 }
1116 return 0;
1117 }
1118
1119 /**
1120 * Decode pulse data; reference: table 4.7.
1121 */
1122 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
1123 const uint16_t *swb_offset, int num_swb)
1124 {
1125 int i, pulse_swb;
1126 pulse->num_pulse = get_bits(gb, 2) + 1;
1127 pulse_swb = get_bits(gb, 6);
1128 if (pulse_swb >= num_swb)
1129 return -1;
1130 pulse->pos[0] = swb_offset[pulse_swb];
1131 pulse->pos[0] += get_bits(gb, 5);
1132 if (pulse->pos[0] > 1023)
1133 return -1;
1134 pulse->amp[0] = get_bits(gb, 4);
1135 for (i = 1; i < pulse->num_pulse; i++) {
1136 pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
1137 if (pulse->pos[i] > 1023)
1138 return -1;
1139 pulse->amp[i] = get_bits(gb, 4);
1140 }
1141 return 0;
1142 }
1143
1144 /**
1145 * Decode Temporal Noise Shaping data; reference: table 4.48.
1146 *
1147 * @return Returns error status. 0 - OK, !0 - error
1148 */
1149 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
1150 GetBitContext *gb, const IndividualChannelStream *ics)
1151 {
1152 int w, filt, i, coef_len, coef_res, coef_compress;
1153 const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
1154 const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
1155 for (w = 0; w < ics->num_windows; w++) {
1156 if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
1157 coef_res = get_bits1(gb);
1158
1159 for (filt = 0; filt < tns->n_filt[w]; filt++) {
1160 int tmp2_idx;
1161 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
1162
1163 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
1164 av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
1165 tns->order[w][filt], tns_max_order);
1166 tns->order[w][filt] = 0;
1167 return -1;
1168 }
1169 if (tns->order[w][filt]) {
1170 tns->direction[w][filt] = get_bits1(gb);
1171 coef_compress = get_bits1(gb);
1172 coef_len = coef_res + 3 - coef_compress;
1173 tmp2_idx = 2 * coef_compress + coef_res;
1174
1175 for (i = 0; i < tns->order[w][filt]; i++)
1176 tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
1177 }
1178 }
1179 }
1180 }
1181 return 0;
1182 }
1183
1184 /**
1185 * Decode Mid/Side data; reference: table 4.54.
1186 *
1187 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1188 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1189 * [3] reserved for scalable AAC
1190 */
1191 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
1192 int ms_present)
1193 {
1194 int idx;
1195 if (ms_present == 1) {
1196 for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
1197 cpe->ms_mask[idx] = get_bits1(gb);
1198 } else if (ms_present == 2) {
1199 memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
1200 }
1201 }
1202
1203 #ifndef VMUL2
1204 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
1205 const float *scale)
1206 {
1207 float s = *scale;
1208 *dst++ = v[idx & 15] * s;
1209 *dst++ = v[idx>>4 & 15] * s;
1210 return dst;
1211 }
1212 #endif
1213
1214 #ifndef VMUL4
1215 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
1216 const float *scale)
1217 {
1218 float s = *scale;
1219 *dst++ = v[idx & 3] * s;
1220 *dst++ = v[idx>>2 & 3] * s;
1221 *dst++ = v[idx>>4 & 3] * s;
1222 *dst++ = v[idx>>6 & 3] * s;
1223 return dst;
1224 }
1225 #endif
1226
1227 #ifndef VMUL2S
1228 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
1229 unsigned sign, const float *scale)
1230 {
1231 union av_intfloat32 s0, s1;
1232
1233 s0.f = s1.f = *scale;
1234 s0.i ^= sign >> 1 << 31;
1235 s1.i ^= sign << 31;
1236
1237 *dst++ = v[idx & 15] * s0.f;
1238 *dst++ = v[idx>>4 & 15] * s1.f;
1239
1240 return dst;
1241 }
1242 #endif
1243
1244 #ifndef VMUL4S
1245 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
1246 unsigned sign, const float *scale)
1247 {
1248 unsigned nz = idx >> 12;
1249 union av_intfloat32 s = { .f = *scale };
1250 union av_intfloat32 t;
1251
1252 t.i = s.i ^ (sign & 1U<<31);
1253 *dst++ = v[idx & 3] * t.f;
1254
1255 sign <<= nz & 1; nz >>= 1;
1256 t.i = s.i ^ (sign & 1U<<31);
1257 *dst++ = v[idx>>2 & 3] * t.f;
1258
1259 sign <<= nz & 1; nz >>= 1;
1260 t.i = s.i ^ (sign & 1U<<31);
1261 *dst++ = v[idx>>4 & 3] * t.f;
1262
1263 sign <<= nz & 1; nz >>= 1;
1264 t.i = s.i ^ (sign & 1U<<31);
1265 *dst++ = v[idx>>6 & 3] * t.f;
1266
1267 return dst;
1268 }
1269 #endif
1270
1271 /**
1272 * Decode spectral data; reference: table 4.50.
1273 * Dequantize and scale spectral data; reference: 4.6.3.3.
1274 *
1275 * @param coef array of dequantized, scaled spectral data
1276 * @param sf array of scalefactors or intensity stereo positions
1277 * @param pulse_present set if pulses are present
1278 * @param pulse pointer to pulse data struct
1279 * @param band_type array of the used band type
1280 *
1281 * @return Returns error status. 0 - OK, !0 - error
1282 */
1283 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
1284 GetBitContext *gb, const float sf[120],
1285 int pulse_present, const Pulse *pulse,
1286 const IndividualChannelStream *ics,
1287 enum BandType band_type[120])
1288 {
1289 int i, k, g, idx = 0;
1290 const int c = 1024 / ics->num_windows;
1291 const uint16_t *offsets = ics->swb_offset;
1292 float *coef_base = coef;
1293
1294 for (g = 0; g < ics->num_windows; g++)
1295 memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1296
1297 for (g = 0; g < ics->num_window_groups; g++) {
1298 unsigned g_len = ics->group_len[g];
1299
1300 for (i = 0; i < ics->max_sfb; i++, idx++) {
1301 const unsigned cbt_m1 = band_type[idx] - 1;
1302 float *cfo = coef + offsets[i];
1303 int off_len = offsets[i + 1] - offsets[i];
1304 int group;
1305
1306 if (cbt_m1 >= INTENSITY_BT2 - 1) {
1307 for (group = 0; group < g_len; group++, cfo+=128) {
1308 memset(cfo, 0, off_len * sizeof(float));
1309 }
1310 } else if (cbt_m1 == NOISE_BT - 1) {
1311 for (group = 0; group < g_len; group++, cfo+=128) {
1312 float scale;
1313 float band_energy;
1314
1315 for (k = 0; k < off_len; k++) {
1316 ac->random_state = lcg_random(ac->random_state);
1317 cfo[k] = ac->random_state;
1318 }
1319
1320 band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1321 scale = sf[idx] / sqrtf(band_energy);
1322 ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1323 }
1324 } else {
1325 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1326 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1327 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1328 OPEN_READER(re, gb);
1329
1330 switch (cbt_m1 >> 1) {
1331 case 0:
1332 for (group = 0; group < g_len; group++, cfo+=128) {
1333 float *cf = cfo;
1334 int len = off_len;
1335
1336 do {
1337 int code;
1338 unsigned cb_idx;
1339
1340 UPDATE_CACHE(re, gb);
1341 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1342 cb_idx = cb_vector_idx[code];
1343 cf = VMUL4(cf, vq, cb_idx, sf + idx);
1344 } while (len -= 4);
1345 }
1346 break;
1347
1348 case 1:
1349 for (group = 0; group < g_len; group++, cfo+=128) {
1350 float *cf = cfo;
1351 int len = off_len;
1352
1353 do {
1354 int code;
1355 unsigned nnz;
1356 unsigned cb_idx;
1357 uint32_t bits;
1358
1359 UPDATE_CACHE(re, gb);
1360 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1361 cb_idx = cb_vector_idx[code];
1362 nnz = cb_idx >> 8 & 15;
1363 bits = nnz ? GET_CACHE(re, gb) : 0;
1364 LAST_SKIP_BITS(re, gb, nnz);
1365 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1366 } while (len -= 4);
1367 }
1368 break;
1369
1370 case 2:
1371 for (group = 0; group < g_len; group++, cfo+=128) {
1372 float *cf = cfo;
1373 int len = off_len;
1374
1375 do {
1376 int code;
1377 unsigned cb_idx;
1378
1379 UPDATE_CACHE(re, gb);
1380 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1381 cb_idx = cb_vector_idx[code];
1382 cf = VMUL2(cf, vq, cb_idx, sf + idx);
1383 } while (len -= 2);
1384 }
1385 break;
1386
1387 case 3:
1388 case 4:
1389 for (group = 0; group < g_len; group++, cfo+=128) {
1390 float *cf = cfo;
1391 int len = off_len;
1392
1393 do {
1394 int code;
1395 unsigned nnz;
1396 unsigned cb_idx;
1397 unsigned sign;
1398
1399 UPDATE_CACHE(re, gb);
1400 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1401 cb_idx = cb_vector_idx[code];
1402 nnz = cb_idx >> 8 & 15;
1403 sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
1404 LAST_SKIP_BITS(re, gb, nnz);
1405 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1406 } while (len -= 2);
1407 }
1408 break;
1409
1410 default:
1411 for (group = 0; group < g_len; group++, cfo+=128) {
1412 float *cf = cfo;
1413 uint32_t *icf = (uint32_t *) cf;
1414 int len = off_len;
1415
1416 do {
1417 int code;
1418 unsigned nzt, nnz;
1419 unsigned cb_idx;
1420 uint32_t bits;
1421 int j;
1422
1423 UPDATE_CACHE(re, gb);
1424 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1425
1426 if (!code) {
1427 *icf++ = 0;
1428 *icf++ = 0;
1429 continue;
1430 }
1431
1432 cb_idx = cb_vector_idx[code];
1433 nnz = cb_idx >> 12;
1434 nzt = cb_idx >> 8;
1435 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1436 LAST_SKIP_BITS(re, gb, nnz);
1437
1438 for (j = 0; j < 2; j++) {
1439 if (nzt & 1<<j) {
1440 uint32_t b;
1441 int n;
1442 /* The total length of escape_sequence must be < 22 bits according
1443 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1444 UPDATE_CACHE(re, gb);
1445 b = GET_CACHE(re, gb);
1446 b = 31 - av_log2(~b);
1447
1448 if (b > 8) {
1449 av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1450 return -1;
1451 }
1452
1453 SKIP_BITS(re, gb, b + 1);
1454 b += 4;
1455 n = (1 << b) + SHOW_UBITS(re, gb, b);
1456 LAST_SKIP_BITS(re, gb, b);
1457 *icf++ = cbrt_tab[n] | (bits & 1U<<31);
1458 bits <<= 1;
1459 } else {
1460 unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1461 *icf++ = (bits & 1U<<31) | v;
1462 bits <<= !!v;
1463 }
1464 cb_idx >>= 4;
1465 }
1466 } while (len -= 2);
1467
1468 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1469 }
1470 }
1471
1472 CLOSE_READER(re, gb);
1473 }
1474 }
1475 coef += g_len << 7;
1476 }
1477
1478 if (pulse_present) {
1479 idx = 0;
1480 for (i = 0; i < pulse->num_pulse; i++) {
1481 float co = coef_base[ pulse->pos[i] ];
1482 while (offsets[idx + 1] <= pulse->pos[i])
1483 idx++;
1484 if (band_type[idx] != NOISE_BT && sf[idx]) {
1485 float ico = -pulse->amp[i];
1486 if (co) {
1487 co /= sf[idx];
1488 ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1489 }
1490 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1491 }
1492 }
1493 }
1494 return 0;
1495 }
1496
1497 static av_always_inline float flt16_round(float pf)
1498 {
1499 union av_intfloat32 tmp;
1500 tmp.f = pf;
1501 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1502 return tmp.f;
1503 }
1504
1505 static av_always_inline float flt16_even(float pf)
1506 {
1507 union av_intfloat32 tmp;
1508 tmp.f = pf;
1509 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1510 return tmp.f;
1511 }
1512
1513 static av_always_inline float flt16_trunc(float pf)
1514 {
1515 union av_intfloat32 pun;
1516 pun.f = pf;
1517 pun.i &= 0xFFFF0000U;
1518 return pun.f;
1519 }
1520
1521 static av_always_inline void predict(PredictorState *ps, float *coef,
1522 int output_enable)
1523 {
1524 const float a = 0.953125; // 61.0 / 64
1525 const float alpha = 0.90625; // 29.0 / 32
1526 float e0, e1;
1527 float pv;
1528 float k1, k2;
1529 float r0 = ps->r0, r1 = ps->r1;
1530 float cor0 = ps->cor0, cor1 = ps->cor1;
1531 float var0 = ps->var0, var1 = ps->var1;
1532
1533 k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
1534 k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
1535
1536 pv = flt16_round(k1 * r0 + k2 * r1);
1537 if (output_enable)
1538 *coef += pv;
1539
1540 e0 = *coef;
1541 e1 = e0 - k1 * r0;
1542
1543 ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
1544 ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
1545 ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
1546 ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
1547
1548 ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
1549 ps->r0 = flt16_trunc(a * e0);
1550 }
1551
1552 /**
1553 * Apply AAC-Main style frequency domain prediction.
1554 */
1555 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1556 {
1557 int sfb, k;
1558
1559 if (!sce->ics.predictor_initialized) {
1560 reset_all_predictors(sce->predictor_state);
1561 sce->ics.predictor_initialized = 1;
1562 }
1563
1564 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1565 for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]; sfb++) {
1566 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1567 predict(&sce->predictor_state[k], &sce->coeffs[k],
1568 sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1569 }
1570 }
1571 if (sce->ics.predictor_reset_group)
1572 reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1573 } else
1574 reset_all_predictors(sce->predictor_state);
1575 }
1576
1577 /**
1578 * Decode an individual_channel_stream payload; reference: table 4.44.
1579 *
1580 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1581 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1582 *
1583 * @return Returns error status. 0 - OK, !0 - error
1584 */
1585 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1586 GetBitContext *gb, int common_window, int scale_flag)
1587 {
1588 Pulse pulse;
1589 TemporalNoiseShaping *tns = &sce->tns;
1590 IndividualChannelStream *ics = &sce->ics;
1591 float *out = sce->coeffs;
1592 int global_gain, pulse_present = 0;
1593
1594 /* This assignment is to silence a GCC warning about the variable being used
1595 * uninitialized when in fact it always is.
1596 */
1597 pulse.num_pulse = 0;
1598
1599 global_gain = get_bits(gb, 8);
1600
1601 if (!common_window && !scale_flag) {
1602 if (decode_ics_info(ac, ics, gb) < 0)
1603 return AVERROR_INVALIDDATA;
1604 }
1605
1606 if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1607 return -1;
1608 if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1609 return -1;
1610
1611 pulse_present = 0;
1612 if (!scale_flag) {
1613 if ((pulse_present = get_bits1(gb))) {
1614 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1615 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1616 return -1;
1617 }
1618 if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1619 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1620 return -1;
1621 }
1622 }
1623 if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1624 return -1;
1625 if (get_bits1(gb)) {
1626 av_log_missing_feature(ac->avctx, "SSR", 1);
1627 return -1;
1628 }
1629 }
1630
1631 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1632 return -1;
1633
1634 if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window)
1635 apply_prediction(ac, sce);
1636
1637 return 0;
1638 }
1639
1640 /**
1641 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1642 */
1643 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1644 {
1645 const IndividualChannelStream *ics = &cpe->ch[0].ics;
1646 float *ch0 = cpe->ch[0].coeffs;
1647 float *ch1 = cpe->ch[1].coeffs;
1648 int g, i, group, idx = 0;
1649 const uint16_t *offsets = ics->swb_offset;
1650 for (g = 0; g < ics->num_window_groups; g++) {
1651 for (i = 0; i < ics->max_sfb; i++, idx++) {
1652 if (cpe->ms_mask[idx] &&
1653 cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1654 for (group = 0; group < ics->group_len[g]; group++) {
1655 ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1656 ch1 + group * 128 + offsets[i],
1657 offsets[i+1] - offsets[i]);
1658 }
1659 }
1660 }
1661 ch0 += ics->group_len[g] * 128;
1662 ch1 += ics->group_len[g] * 128;
1663 }
1664 }
1665
1666 /**
1667 * intensity stereo decoding; reference: 4.6.8.2.3
1668 *
1669 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1670 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1671 * [3] reserved for scalable AAC
1672 */
1673 static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_present)
1674 {
1675 const IndividualChannelStream *ics = &cpe->ch[1].ics;
1676 SingleChannelElement *sce1 = &cpe->ch[1];
1677 float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1678 const uint16_t *offsets = ics->swb_offset;
1679 int g, group, i, idx = 0;
1680 int c;
1681 float scale;
1682 for (g = 0; g < ics->num_window_groups; g++) {
1683 for (i = 0; i < ics->max_sfb;) {
1684 if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1685 const int bt_run_end = sce1->band_type_run_end[idx];
1686 for (; i < bt_run_end; i++, idx++) {
1687 c = -1 + 2 * (sce1->band_type[idx] - 14);
1688 if (ms_present)
1689 c *= 1 - 2 * cpe->ms_mask[idx];
1690 scale = c * sce1->sf[idx];
1691 for (group = 0; group < ics->group_len[g]; group++)
1692 ac->dsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
1693 coef0 + group * 128 + offsets[i],
1694 scale,
1695 offsets[i + 1] - offsets[i]);
1696 }
1697 } else {
1698 int bt_run_end = sce1->band_type_run_end[idx];
1699 idx += bt_run_end - i;
1700 i = bt_run_end;
1701 }
1702 }
1703 coef0 += ics->group_len[g] * 128;
1704 coef1 += ics->group_len[g] * 128;
1705 }
1706 }
1707
1708 /**
1709 * Decode a channel_pair_element; reference: table 4.4.
1710 *
1711 * @return Returns error status. 0 - OK, !0 - error
1712 */
1713 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1714 {
1715 int i, ret, common_window, ms_present = 0;
1716
1717 common_window = get_bits1(gb);
1718 if (common_window) {
1719 if (decode_ics_info(ac, &cpe->ch[0].ics, gb))
1720 return AVERROR_INVALIDDATA;
1721 i = cpe->ch[1].ics.use_kb_window[0];
1722 cpe->ch[1].ics = cpe->ch[0].ics;
1723 cpe->ch[1].ics.use_kb_window[1] = i;
1724 if (cpe->ch[1].ics.predictor_present && (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN))
1725 if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
1726 decode_ltp(ac, &cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
1727 ms_present = get_bits(gb, 2);
1728 if (ms_present == 3) {
1729 av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1730 return -1;
1731 } else if (ms_present)
1732 decode_mid_side_stereo(cpe, gb, ms_present);
1733 }
1734 if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1735 return ret;
1736 if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1737 return ret;
1738
1739 if (common_window) {
1740 if (ms_present)
1741 apply_mid_side_stereo(ac, cpe);
1742 if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
1743 apply_prediction(ac, &cpe->ch[0]);
1744 apply_prediction(ac, &cpe->ch[1]);
1745 }
1746 }
1747
1748 apply_intensity_stereo(ac, cpe, ms_present);
1749 return 0;
1750 }
1751
1752 static const float cce_scale[] = {
1753 1.09050773266525765921, //2^(1/8)
1754 1.18920711500272106672, //2^(1/4)
1755 M_SQRT2,
1756 2,
1757 };
1758
1759 /**
1760 * Decode coupling_channel_element; reference: table 4.8.
1761 *
1762 * @return Returns error status. 0 - OK, !0 - error
1763 */
1764 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1765 {
1766 int num_gain = 0;
1767 int c, g, sfb, ret;
1768 int sign;
1769 float scale;
1770 SingleChannelElement *sce = &che->ch[0];
1771 ChannelCoupling *coup = &che->coup;
1772
1773 coup->coupling_point = 2 * get_bits1(gb);
1774 coup->num_coupled = get_bits(gb, 3);
1775 for (c = 0; c <= coup->num_coupled; c++) {
1776 num_gain++;
1777 coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1778 coup->id_select[c] = get_bits(gb, 4);
1779 if (coup->type[c] == TYPE_CPE) {
1780 coup->ch_select[c] = get_bits(gb, 2);
1781 if (coup->ch_select[c] == 3)
1782 num_gain++;
1783 } else
1784 coup->ch_select[c] = 2;
1785 }
1786 coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1787
1788 sign = get_bits(gb, 1);
1789 scale = cce_scale[get_bits(gb, 2)];
1790
1791 if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1792 return ret;
1793
1794 for (c = 0; c < num_gain; c++) {
1795 int idx = 0;
1796 int cge = 1;
1797 int gain = 0;
1798 float gain_cache = 1.;
1799 if (c) {
1800 cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1801 gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1802 gain_cache = powf(scale, -gain);
1803 }
1804 if (coup->coupling_point == AFTER_IMDCT) {
1805 coup->gain[c][0] = gain_cache;
1806 } else {
1807 for (g = 0; g < sce->ics.num_window_groups; g++) {
1808 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1809 if (sce->band_type[idx] != ZERO_BT) {
1810 if (!cge) {
1811 int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1812 if (t) {
1813 int s = 1;
1814 t = gain += t;
1815 if (sign) {
1816 s -= 2 * (t & 0x1);
1817 t >>= 1;
1818 }
1819 gain_cache = powf(scale, -t) * s;
1820 }
1821 }
1822 coup->gain[c][idx] = gain_cache;
1823 }
1824 }
1825 }
1826 }
1827 }
1828 return 0;
1829 }
1830
1831 /**
1832 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1833 *
1834 * @return Returns number of bytes consumed.
1835 */
1836 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1837 GetBitContext *gb)
1838 {
1839 int i;
1840 int num_excl_chan = 0;
1841
1842 do {
1843 for (i = 0; i < 7; i++)
1844 che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1845 } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1846
1847 return num_excl_chan / 7;
1848 }
1849
1850 /**
1851 * Decode dynamic range information; reference: table 4.52.
1852 *
1853 * @param cnt length of TYPE_FIL syntactic element in bytes
1854 *
1855 * @return Returns number of bytes consumed.
1856 */
1857 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1858 GetBitContext *gb, int cnt)
1859 {
1860 int n = 1;
1861 int drc_num_bands = 1;
1862 int i;
1863
1864 /* pce_tag_present? */
1865 if (get_bits1(gb)) {
1866 che_drc->pce_instance_tag = get_bits(gb, 4);
1867 skip_bits(gb, 4); // tag_reserved_bits
1868 n++;
1869 }
1870
1871 /* excluded_chns_present? */
1872 if (get_bits1(gb)) {
1873 n += decode_drc_channel_exclusions(che_drc, gb);
1874 }
1875
1876 /* drc_bands_present? */
1877 if (get_bits1(gb)) {
1878 che_drc->band_incr = get_bits(gb, 4);
1879 che_drc->interpolation_scheme = get_bits(gb, 4);
1880 n++;
1881 drc_num_bands += che_drc->band_incr;
1882 for (i = 0; i < drc_num_bands; i++) {
1883 che_drc->band_top[i] = get_bits(gb, 8);
1884 n++;
1885 }
1886 }
1887
1888 /* prog_ref_level_present? */
1889 if (get_bits1(gb)) {
1890 che_drc->prog_ref_level = get_bits(gb, 7);
1891 skip_bits1(gb); // prog_ref_level_reserved_bits
1892 n++;
1893 }
1894
1895 for (i = 0; i < drc_num_bands; i++) {
1896 che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1897 che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1898 n++;
1899 }
1900
1901 return n;
1902 }
1903
1904 /**
1905 * Decode extension data (incomplete); reference: table 4.51.
1906 *
1907 * @param cnt length of TYPE_FIL syntactic element in bytes
1908 *
1909 * @return Returns number of bytes consumed
1910 */
1911 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1912 ChannelElement *che, enum RawDataBlockType elem_type)
1913 {
1914 int crc_flag = 0;
1915 int res = cnt;
1916 switch (get_bits(gb, 4)) { // extension type
1917 case EXT_SBR_DATA_CRC:
1918 crc_flag++;
1919 case EXT_SBR_DATA:
1920 if (!che) {
1921 av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1922 return res;
1923 } else if (!ac->oc[1].m4ac.sbr) {
1924 av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1925 skip_bits_long(gb, 8 * cnt - 4);
1926 return res;
1927 } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) {
1928 av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1929 skip_bits_long(gb, 8 * cnt - 4);
1930 return res;
1931 } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED && ac->avctx->channels == 1) {
1932 ac->oc[1].m4ac.sbr = 1;
1933 ac->oc[1].m4ac.ps = 1;
1934 output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
1935 ac->oc[1].m4ac.chan_config, ac->oc[1].status);
1936 } else {
1937 ac->oc[1].m4ac.sbr = 1;
1938 }
1939 res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1940 break;
1941 case EXT_DYNAMIC_RANGE:
1942 res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1943 break;
1944 case EXT_FILL:
1945 case EXT_FILL_DATA:
1946 case EXT_DATA_ELEMENT:
1947 default:
1948 skip_bits_long(gb, 8 * cnt - 4);
1949 break;
1950 };
1951 return res;
1952 }
1953
1954 /**
1955 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1956 *
1957 * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
1958 * @param coef spectral coefficients
1959 */
1960 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1961 IndividualChannelStream *ics, int decode)
1962 {
1963 const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1964 int w, filt, m, i;
1965 int bottom, top, order, start, end, size, inc;
1966 float lpc[TNS_MAX_ORDER];
1967 float tmp[TNS_MAX_ORDER];
1968
1969 for (w = 0; w < ics->num_windows; w++) {
1970 bottom = ics->num_swb;
1971 for (filt = 0; filt < tns->n_filt[w]; filt++) {
1972 top = bottom;
1973 bottom = FFMAX(0, top - tns->length[w][filt]);
1974 order = tns->order[w][filt];
1975 if (order == 0)
1976 continue;
1977
1978 // tns_decode_coef
1979 compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1980
1981 start = ics->swb_offset[FFMIN(bottom, mmm)];
1982 end = ics->swb_offset[FFMIN( top, mmm)];
1983 if ((size = end - start) <= 0)
1984 continue;
1985 if (tns->direction[w][filt]) {
1986 inc = -1;
1987 start = end - 1;
1988 } else {
1989 inc = 1;
1990 }
1991 start += w * 128;
1992
1993 if (decode) {
1994 // ar filter
1995 for (m = 0; m < size; m++, start += inc)
1996 for (i = 1; i <= FFMIN(m, order); i++)
1997 coef[start] -= coef[start - i * inc] * lpc[i - 1];
1998 } else {
1999 // ma filter
2000 for (m = 0; m < size; m++, start += inc) {
2001 tmp[0] = coef[start];
2002 for (i = 1; i <= FFMIN(m, order); i++)
2003 coef[start] += tmp[i] * lpc[i - 1];
2004 for (i = order; i > 0; i--)
2005 tmp[i] = tmp[i - 1];
2006 }
2007 }
2008 }
2009 }
2010 }
2011
2012 /**
2013 * Apply windowing and MDCT to obtain the spectral
2014 * coefficient from the predicted sample by LTP.
2015 */
2016 static void windowing_and_mdct_ltp(AACContext *ac, float *out,
2017 float *in, IndividualChannelStream *ics)
2018 {
2019 const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2020 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2021 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2022 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2023
2024 if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
2025 ac->dsp.vector_fmul(in, in, lwindow_prev, 1024);
2026 } else {
2027 memset(in, 0, 448 * sizeof(float));
2028 ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
2029 }
2030 if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
2031 ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
2032 } else {
2033 ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
2034 memset(in + 1024 + 576, 0, 448 * sizeof(float));
2035 }
2036 ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
2037 }
2038
2039 /**
2040 * Apply the long term prediction
2041 */
2042 static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
2043 {
2044 const LongTermPrediction *ltp = &sce->ics.ltp;
2045 const uint16_t *offsets = sce->ics.swb_offset;
2046 int i, sfb;
2047
2048 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
2049 float *predTime = sce->ret;
2050 float *predFreq = ac->buf_mdct;
2051 int16_t num_samples = 2048;
2052
2053 if (ltp->lag < 1024)
2054 num_samples = ltp->lag + 1024;
2055 for (i = 0; i < num_samples; i++)
2056 predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
2057 memset(&predTime[i], 0, (2048 - i) * sizeof(float));
2058
2059 windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
2060
2061 if (sce->tns.present)
2062 apply_tns(predFreq, &sce->tns, &sce->ics, 0);
2063
2064 for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
2065 if (ltp->used[sfb])
2066 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
2067 sce->coeffs[i] += predFreq[i];
2068 }
2069 }
2070
2071 /**
2072 * Update the LTP buffer for next frame
2073 */
2074 static void update_ltp(AACContext *ac, SingleChannelElement *sce)
2075 {
2076 IndividualChannelStream *ics = &sce->ics;
2077 float *saved = sce->saved;
2078 float *saved_ltp = sce->coeffs;
2079 const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2080 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2081 int i;
2082
2083 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2084 memcpy(saved_ltp, saved, 512 * sizeof(float));
2085 memset(saved_ltp + 576, 0, 448 * sizeof(float));
2086 ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
2087 for (i = 0; i < 64; i++)
2088 saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2089 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2090 memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(float));
2091 memset(saved_ltp + 576, 0, 448 * sizeof(float));
2092 ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
2093 for (i = 0; i < 64; i++)
2094 saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2095 } else { // LONG_STOP or ONLY_LONG
2096 ac->dsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512);
2097 for (i = 0; i < 512; i++)
2098 saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
2099 }
2100
2101 memcpy(sce->ltp_state, sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
2102 memcpy(sce->ltp_state+1024, sce->ret, 1024 * sizeof(*sce->ltp_state));
2103 memcpy(sce->ltp_state+2048, saved_ltp, 1024 * sizeof(*sce->ltp_state));
2104 }
2105
2106 /**
2107 * Conduct IMDCT and windowing.
2108 */
2109 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
2110 {
2111 IndividualChannelStream *ics = &sce->ics;
2112 float *in = sce->coeffs;
2113 float *out = sce->ret;
2114 float *saved = sce->saved;
2115 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2116 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2117 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2118 float *buf = ac->buf_mdct;
2119 float *temp = ac->temp;
2120 int i;
2121
2122 // imdct
2123 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2124 for (i = 0; i < 1024; i += 128)
2125 ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
2126 } else
2127 ac->mdct.imdct_half(&ac->mdct, buf, in);
2128
2129 /* window overlapping
2130 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
2131 * and long to short transitions are considered to be short to short
2132 * transitions. This leaves just two cases (long to long and short to short)
2133 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
2134 */
2135 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
2136 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
2137 ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512);
2138 } else {
2139 memcpy( out, saved, 448 * sizeof(float));
2140
2141 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2142 ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64);
2143 ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64);
2144 ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64);
2145 ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64);
2146 ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64);
2147 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
2148 } else {
2149 ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
2150 memcpy( out + 576, buf + 64, 448 * sizeof(float));
2151 }
2152 }
2153
2154 // buffer update
2155 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2156 memcpy( saved, temp + 64, 64 * sizeof(float));
2157 ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
2158 ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
2159 ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
2160 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
2161 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2162 memcpy( saved, buf + 512, 448 * sizeof(float));
2163 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
2164 } else { // LONG_STOP or ONLY_LONG
2165 memcpy( saved, buf + 512, 512 * sizeof(float));
2166 }
2167 }
2168
2169 /**
2170 * Apply dependent channel coupling (applied before IMDCT).
2171 *
2172 * @param index index into coupling gain array
2173 */
2174 static void apply_dependent_coupling(AACContext *ac,
2175 SingleChannelElement *target,
2176 ChannelElement *cce, int index)
2177 {
2178 IndividualChannelStream *ics = &cce->ch[0].ics;
2179 const uint16_t *offsets = ics->swb_offset;
2180 float *dest = target->coeffs;
2181 const float *src = cce->ch[0].coeffs;
2182 int g, i, group, k, idx = 0;
2183 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2184 av_log(ac->avctx, AV_LOG_ERROR,
2185 "Dependent coupling is not supported together with LTP\n");
2186 return;
2187 }
2188 for (g = 0; g < ics->num_window_groups; g++) {
2189 for (i = 0; i < ics->max_sfb; i++, idx++) {
2190 if (cce->ch[0].band_type[idx] != ZERO_BT) {
2191 const float gain = cce->coup.gain[index][idx];
2192 for (group = 0; group < ics->group_len[g]; group++) {
2193 for (k = offsets[i]; k < offsets[i + 1]; k++) {
2194 // XXX dsputil-ize
2195 dest[group * 128 + k] += gain * src[group * 128 + k];
2196 }
2197 }
2198 }
2199 }
2200 dest += ics->group_len[g] * 128;
2201 src += ics->group_len[g] * 128;
2202 }
2203 }
2204
2205 /**
2206 * Apply independent channel coupling (applied after IMDCT).
2207 *
2208 * @param index index into coupling gain array
2209 */
2210 static void apply_independent_coupling(AACContext *ac,
2211 SingleChannelElement *target,
2212 ChannelElement *cce, int index)
2213 {
2214 int i;
2215 const float gain = cce->coup.gain[index][0];
2216 const float *src = cce->ch[0].ret;
2217 float *dest = target->ret;
2218 const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
2219
2220 for (i = 0; i < len; i++)
2221 dest[i] += gain * src[i];
2222 }
2223
2224 /**
2225 * channel coupling transformation interface
2226 *
2227 * @param apply_coupling_method pointer to (in)dependent coupling function
2228 */
2229 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
2230 enum RawDataBlockType type, int elem_id,
2231 enum CouplingPoint coupling_point,
2232 void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
2233 {
2234 int i, c;
2235
2236 for (i = 0; i < MAX_ELEM_ID; i++) {
2237 ChannelElement *cce = ac->che[TYPE_CCE][i];
2238 int index = 0;
2239
2240 if (cce && cce->coup.coupling_point == coupling_point) {
2241 ChannelCoupling *coup = &cce->coup;
2242
2243 for (c = 0; c <= coup->num_coupled; c++) {
2244 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
2245 if (coup->ch_select[c] != 1) {
2246 apply_coupling_method(ac, &cc->ch[0], cce, index);
2247 if (coup->ch_select[c] != 0)
2248 index++;
2249 }
2250 if (coup->ch_select[c] != 2)
2251 apply_coupling_method(ac, &cc->ch[1], cce, index++);
2252 } else
2253 index += 1 + (coup->ch_select[c] == 3);
2254 }
2255 }
2256 }
2257 }
2258
2259 /**
2260 * Convert spectral data to float samples, applying all supported tools as appropriate.
2261 */
2262 static void spectral_to_sample(AACContext *ac)
2263 {
2264 int i, type;
2265 for (type = 3; type >= 0; type--) {
2266 for (i = 0; i < MAX_ELEM_ID; i++) {
2267 ChannelElement *che = ac->che[type][i];
2268 if (che) {
2269 if (type <= TYPE_CPE)
2270 apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
2271 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2272 if (che->ch[0].ics.predictor_present) {
2273 if (che->ch[0].ics.ltp.present)
2274 apply_ltp(ac, &che->ch[0]);
2275 if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
2276 apply_ltp(ac, &che->ch[1]);
2277 }
2278 }
2279 if (che->ch[0].tns.present)
2280 apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
2281 if (che->ch[1].tns.present)
2282 apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
2283 if (type <= TYPE_CPE)
2284 apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
2285 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
2286 imdct_and_windowing(ac, &che->ch[0]);
2287 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2288 update_ltp(ac, &che->ch[0]);
2289 if (type == TYPE_CPE) {
2290 imdct_and_windowing(ac, &che->ch[1]);
2291 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2292 update_ltp(ac, &che->ch[1]);
2293 }
2294 if (ac->oc[1].m4ac.sbr > 0) {
2295 ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
2296 }
2297 }
2298 if (type <= TYPE_CCE)
2299 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
2300 }
2301 }
2302 }
2303 }
2304
2305 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
2306 {
2307 int size;
2308 AACADTSHeaderInfo hdr_info;
2309 uint8_t layout_map[MAX_ELEM_ID*4][3];
2310 int layout_map_tags;
2311
2312 size = avpriv_aac_parse_header(gb, &hdr_info);
2313 if (size > 0) {
2314 if (hdr_info.num_aac_frames != 1) {
2315 av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
2316 return -1;
2317 }
2318 push_output_configuration(ac);
2319 if (hdr_info.chan_config) {
2320 ac->oc[1].m4ac.chan_config = hdr_info.chan_config;
2321 if (set_default_channel_config(ac->avctx, layout_map,
2322 &layout_map_tags, hdr_info.chan_config))
2323 return -7;
2324 if (output_configure(ac, layout_map, layout_map_tags,
2325 hdr_info.chan_config,
2326 FFMAX(ac->oc[1].status, OC_TRIAL_FRAME)))
2327 return -7;
2328 } else {
2329 ac->oc[1].m4ac.chan_config = 0;
2330 }
2331 ac->oc[1].m4ac.sample_rate = hdr_info.sample_rate;
2332 ac->oc[1].m4ac.sampling_index = hdr_info.sampling_index;
2333 ac->oc[1].m4ac.object_type = hdr_info.object_type;
2334 if (ac->oc[0].status != OC_LOCKED ||
2335 ac->oc[0].m4ac.chan_config != hdr_info.chan_config ||
2336 ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) {
2337 ac->oc[1].m4ac.sbr = -1;
2338 ac->oc[1].m4ac.ps = -1;
2339 }
2340 if (!hdr_info.crc_absent)
2341 skip_bits(gb, 16);
2342 }
2343 return size;
2344 }
2345
2346 static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
2347 int *got_frame_ptr, GetBitContext *gb)
2348 {
2349 AACContext *ac = avctx->priv_data;
2350 ChannelElement *che = NULL, *che_prev = NULL;
2351 enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
2352 int err, elem_id;
2353 int samples = 0, multiplier, audio_found = 0, pce_found = 0;
2354
2355 if (show_bits(gb, 12) == 0xfff) {
2356 if (parse_adts_frame_header(ac, gb) < 0) {
2357 av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
2358 err = -1;
2359 goto fail;
2360 }
2361 if (ac->oc[1].m4ac.sampling_index > 12) {
2362 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
2363 err = -1;
2364 goto fail;
2365 }
2366 }
2367
2368 ac->tags_mapped = 0;
2369 // parse
2370 while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
2371 elem_id = get_bits(gb, 4);
2372
2373 if (elem_type < TYPE_DSE) {
2374 if (!(che=get_che(ac, elem_type, elem_id))) {
2375 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2376 elem_type, elem_id);
2377 err = -1;
2378 goto fail;
2379 }
2380 samples = 1024;
2381 }
2382
2383 switch (elem_type) {
2384
2385 case TYPE_SCE:
2386 err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2387 audio_found = 1;
2388 break;
2389
2390 case TYPE_CPE:
2391 err = decode_cpe(ac, gb, che);
2392 audio_found = 1;
2393 break;
2394
2395 case TYPE_CCE:
2396 err = decode_cce(ac, gb, che);
2397 break;
2398
2399 case TYPE_LFE:
2400 err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2401 audio_found = 1;
2402 break;
2403
2404 case TYPE_DSE:
2405 err = skip_data_stream_element(ac, gb);
2406 break;
2407
2408 case TYPE_PCE: {
2409 uint8_t layout_map[MAX_ELEM_ID*4][3];
2410 int tags;
2411 push_output_configuration(ac);
2412 tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb);
2413 if (tags < 0) {
2414 err = tags;
2415 break;
2416 }
2417 if (pce_found) {
2418 av_log(avctx, AV_LOG_ERROR,
2419 "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2420 pop_output_configuration(ac);
2421 } else {
2422 err = output_configure(ac, layout_map, tags, 0, OC_TRIAL_PCE);
2423 pce_found = 1;
2424 }
2425 break;
2426 }
2427
2428 case TYPE_FIL:
2429 if (elem_id == 15)
2430 elem_id += get_bits(gb, 8) - 1;
2431 if (get_bits_left(gb) < 8 * elem_id) {
2432 av_log(avctx, AV_LOG_ERROR, overread_err);
2433 err = -1;
2434 goto fail;
2435 }
2436 while (elem_id > 0)
2437 elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev);
2438 err = 0; /* FIXME */
2439 break;
2440
2441 default:
2442 err = -1; /* should not happen, but keeps compiler happy */
2443 break;
2444 }
2445
2446 che_prev = che;
2447 elem_type_prev = elem_type;
2448
2449 if (err)
2450 goto fail;
2451
2452 if (get_bits_left(gb) < 3) {
2453 av_log(avctx, AV_LOG_ERROR, overread_err);
2454 err = -1;
2455 goto fail;
2456 }
2457 }
2458
2459 spectral_to_sample(ac);
2460
2461 multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0;
2462 samples <<= multiplier;
2463
2464 if (samples) {
2465 /* get output buffer */
2466 ac->frame.nb_samples = samples;
2467 if ((err = avctx->get_buffer(avctx, &ac->frame)) < 0) {
2468 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2469 err = -1;
2470 goto fail;
2471 }
2472
2473 if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
2474 ac->fmt_conv.float_interleave((float *)ac->frame.data[0],
2475 (const float **)ac->output_data,
2476 samples, avctx->channels);
2477 else
2478 ac->fmt_conv.float_to_int16_interleave((int16_t *)ac->frame.data[0],
2479 (const float **)ac->output_data,
2480 samples, avctx->channels);
2481
2482 *(AVFrame *)data = ac->frame;
2483 }
2484 *got_frame_ptr = !!samples;
2485
2486 if (ac->oc[1].status && audio_found) {
2487 avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier;
2488 avctx->frame_size = samples;
2489 ac->oc[1].status = OC_LOCKED;
2490 }
2491
2492 return 0;
2493 fail:
2494 pop_output_configuration(ac);
2495 return err;
2496 }
2497
2498 static int aac_decode_frame(AVCodecContext *avctx, void *data,
2499 int *got_frame_ptr, AVPacket *avpkt)
2500 {
2501 AACContext *ac = avctx->priv_data;
2502 const uint8_t *buf = avpkt->data;
2503 int buf_size = avpkt->size;
2504 GetBitContext gb;
2505 int buf_consumed;
2506 int buf_offset;
2507 int err;
2508 int new_extradata_size;
2509 const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
2510 AV_PKT_DATA_NEW_EXTRADATA,
2511 &new_extradata_size);
2512
2513 if (new_extradata) {
2514 av_free(avctx->extradata);
2515 avctx->extradata = av_mallocz(new_extradata_size +
2516 FF_INPUT_BUFFER_PADDING_SIZE);
2517 if (!avctx->extradata)
2518 return AVERROR(ENOMEM);
2519 avctx->extradata_size = new_extradata_size;
2520 memcpy(avctx->extradata, new_extradata, new_extradata_size);
2521 push_output_configuration(ac);
2522 if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
2523 avctx->extradata,
2524 avctx->extradata_size*8, 1) < 0) {
2525 pop_output_configuration(ac);
2526 return AVERROR_INVALIDDATA;
2527 }
2528 }
2529
2530 init_get_bits(&gb, buf, buf_size * 8);
2531
2532 if ((err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb)) < 0)
2533 return err;
2534
2535 buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2536 for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2537 if (buf[buf_offset])
2538 break;
2539
2540 return buf_size > buf_offset ? buf_consumed : buf_size;
2541 }
2542
2543 static av_cold int aac_decode_close(AVCodecContext *avctx)
2544 {
2545 AACContext *ac = avctx->priv_data;
2546 int i, type;
2547
2548 for (i = 0; i < MAX_ELEM_ID; i++) {
2549 for (type = 0; type < 4; type++) {
2550 if (ac->che[type][i])
2551 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2552 av_freep(&ac->che[type][i]);
2553 }
2554 }
2555
2556 ff_mdct_end(&ac->mdct);
2557 ff_mdct_end(&ac->mdct_small);
2558 ff_mdct_end(&ac->mdct_ltp);
2559 return 0;
2560 }
2561
2562
2563 #define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
2564
2565 struct LATMContext {
2566 AACContext aac_ctx; ///< containing AACContext
2567 int initialized; ///< initilized after a valid extradata was seen
2568
2569 // parser data
2570 int audio_mux_version_A; ///< LATM syntax version
2571 int frame_length_type; ///< 0/1 variable/fixed frame length
2572 int frame_length; ///< frame length for fixed frame length
2573 };
2574
2575 static inline uint32_t latm_get_value(GetBitContext *b)
2576 {
2577 int length = get_bits(b, 2);
2578
2579 return get_bits_long(b, (length+1)*8);
2580 }
2581
2582 static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
2583 GetBitContext *gb, int asclen)
2584 {
2585 AACContext *ac = &latmctx->aac_ctx;
2586 AVCodecContext *avctx = ac->avctx;
2587 MPEG4AudioConfig m4ac = { 0 };
2588 int config_start_bit = get_bits_count(gb);
2589 int sync_extension = 0;
2590 int bits_consumed, esize;
2591
2592 if (asclen) {
2593 sync_extension = 1;
2594 asclen = FFMIN(asclen, get_bits_left(gb));
2595 } else
2596 asclen = get_bits_left(gb);
2597
2598 if (config_start_bit % 8) {
2599 av_log_missing_feature(latmctx->aac_ctx.avctx, "audio specific "
2600 "config not byte aligned.\n", 1);
2601 return AVERROR_INVALIDDATA;
2602 }
2603 if (asclen <= 0)
2604 return AVERROR_INVALIDDATA;
2605 bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac,
2606 gb->buffer + (config_start_bit / 8),
2607 asclen, sync_extension);
2608
2609 if (bits_consumed < 0)
2610 return AVERROR_INVALIDDATA;
2611
2612 if (ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
2613 ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
2614
2615 av_log(avctx, AV_LOG_INFO, "audio config changed\n");
2616 latmctx->initialized = 0;
2617
2618 esize = (bits_consumed+7) / 8;
2619
2620 if (avctx->extradata_size < esize) {
2621 av_free(avctx->extradata);
2622 avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE);
2623 if (!avctx->extradata)
2624 return AVERROR(ENOMEM);
2625 }
2626
2627 avctx->extradata_size = esize;
2628 memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
2629 memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2630 }
2631 skip_bits_long(gb, bits_consumed);
2632
2633 return bits_consumed;
2634 }
2635
2636 static int read_stream_mux_config(struct LATMContext *latmctx,
2637 GetBitContext *gb)
2638 {
2639 int ret, audio_mux_version = get_bits(gb, 1);
2640
2641 latmctx->audio_mux_version_A = 0;
2642 if (audio_mux_version)
2643 latmctx->audio_mux_version_A = get_bits(gb, 1);
2644
2645 if (!latmctx->audio_mux_version_A) {
2646
2647 if (audio_mux_version)
2648 latm_get_value(gb); // taraFullness
2649
2650 skip_bits(gb, 1); // allStreamSameTimeFraming
2651 skip_bits(gb, 6); // numSubFrames
2652 // numPrograms
2653 if (get_bits(gb, 4)) { // numPrograms
2654 av_log_missing_feature(latmctx->aac_ctx.avctx,
2655 "multiple programs are not supported\n", 1);
2656 return AVERROR_PATCHWELCOME;
2657 }
2658
2659 // for each program (which there is only on in DVB)
2660
2661 // for each layer (which there is only on in DVB)
2662 if (get_bits(gb, 3)) { // numLayer
2663 av_log_missing_feature(latmctx->aac_ctx.avctx,
2664 "multiple layers are not supported\n", 1);
2665 return AVERROR_PATCHWELCOME;
2666 }
2667
2668 // for all but first stream: use_same_config = get_bits(gb, 1);
2669 if (!audio_mux_version) {
2670 if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
2671 return ret;
2672 } else {
2673 int ascLen = latm_get_value(gb);
2674 if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
2675 return ret;
2676 ascLen -= ret;
2677 skip_bits_long(gb, ascLen);
2678 }
2679
2680 latmctx->frame_length_type = get_bits(gb, 3);
2681 switch (latmctx->frame_length_type) {
2682 case 0:
2683 skip_bits(gb, 8); // latmBufferFullness
2684 break;
2685 case 1:
2686 latmctx->frame_length = get_bits(gb, 9);
2687 break;
2688 case 3:
2689 case 4:
2690 case 5:
2691 skip_bits(gb, 6); // CELP frame length table index
2692 break;
2693 case 6:
2694 case 7:
2695 skip_bits(gb, 1); // HVXC frame length table index
2696 break;
2697 }
2698
2699 if (get_bits(gb, 1)) { // other data
2700 if (audio_mux_version) {
2701 latm_get_value(gb); // other_data_bits
2702 } else {
2703 int esc;
2704 do {
2705 esc = get_bits(gb, 1);
2706 skip_bits(gb, 8);
2707 } while (esc);
2708 }
2709 }
2710
2711 if (get_bits(gb, 1)) // crc present
2712 skip_bits(gb, 8); // config_crc
2713 }
2714
2715 return 0;
2716 }
2717
2718 static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
2719 {
2720 uint8_t tmp;
2721
2722 if (ctx->frame_length_type == 0) {
2723 int mux_slot_length = 0;
2724 do {
2725 tmp = get_bits(gb, 8);
2726 mux_slot_length += tmp;
2727 } while (tmp == 255);
2728 return mux_slot_length;
2729 } else if (ctx->frame_length_type == 1) {
2730 return ctx->frame_length;
2731 } else if (ctx->frame_length_type == 3 ||
2732 ctx->frame_length_type == 5 ||
2733 ctx->frame_length_type == 7) {
2734 skip_bits(gb, 2); // mux_slot_length_coded
2735 }
2736 return 0;
2737 }
2738
2739 static int read_audio_mux_element(struct LATMContext *latmctx,
2740 GetBitContext *gb)
2741 {
2742 int err;
2743 uint8_t use_same_mux = get_bits(gb, 1);
2744 if (!use_same_mux) {
2745 if ((err = read_stream_mux_config(latmctx, gb)) < 0)
2746 return err;
2747 } else if (!latmctx->aac_ctx.avctx->extradata) {
2748 av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
2749 "no decoder config found\n");
2750 return AVERROR(EAGAIN);
2751 }
2752 if (latmctx->audio_mux_version_A == 0) {
2753 int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
2754 if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
2755 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
2756 return AVERROR_INVALIDDATA;
2757 } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
2758 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2759 "frame length mismatch %d << %d\n",
2760 mux_slot_length_bytes * 8, get_bits_left(gb));
2761 return AVERROR_INVALIDDATA;
2762 }
2763 }
2764 return 0;
2765 }
2766
2767
2768 static int latm_decode_frame(AVCodecContext *avctx, void *out,
2769 int *got_frame_ptr, AVPacket *avpkt)
2770 {
2771 struct LATMContext *latmctx = avctx->priv_data;
2772 int muxlength, err;
2773 GetBitContext gb;
2774
2775 init_get_bits(&gb, avpkt->data, avpkt->size * 8);
2776
2777 // check for LOAS sync word
2778 if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
2779 return AVERROR_INVALIDDATA;
2780
2781 muxlength = get_bits(&gb, 13) + 3;
2782 // not enough data, the parser should have sorted this
2783 if (muxlength > avpkt->size)
2784 return AVERROR_INVALIDDATA;
2785
2786 if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
2787 return err;
2788
2789 if (!latmctx->initialized) {
2790 if (!avctx->extradata) {
2791 *got_frame_ptr = 0;
2792 return avpkt->size;
2793 } else {
2794 push_output_configuration(&latmctx->aac_ctx);
2795 if ((err = decode_audio_specific_config(
2796 &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
2797 avctx->extradata, avctx->extradata_size*8, 1)) < 0) {
2798 pop_output_configuration(&latmctx->aac_ctx);
2799 return err;
2800 }
2801 latmctx->initialized = 1;
2802 }
2803 }
2804
2805 if (show_bits(&gb, 12) == 0xfff) {
2806 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2807 "ADTS header detected, probably as result of configuration "
2808 "misparsing\n");
2809 return AVERROR_INVALIDDATA;
2810 }
2811
2812 if ((err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb)) < 0)
2813 return err;
2814
2815 return muxlength;
2816 }
2817
2818 static av_cold int latm_decode_init(AVCodecContext *avctx)
2819 {
2820 struct LATMContext *latmctx = avctx->priv_data;
2821 int ret = aac_decode_init(avctx);
2822
2823 if (avctx->extradata_size > 0)
2824 latmctx->initialized = !ret;
2825
2826 return ret;
2827 }
2828
2829
2830 AVCodec ff_aac_decoder = {
2831 .name = "aac",
2832 .type = AVMEDIA_TYPE_AUDIO,
2833 .id = CODEC_ID_AAC,
2834 .priv_data_size = sizeof(AACContext),
2835 .init = aac_decode_init,
2836 .close = aac_decode_close,
2837 .decode = aac_decode_frame,
2838 .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2839 .sample_fmts = (const enum AVSampleFormat[]) {
2840 AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
2841 },
2842 .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
2843 .channel_layouts = aac_channel_layout,
2844 };
2845
2846 /*
2847 Note: This decoder filter is intended to decode LATM streams transferred
2848 in MPEG transport streams which only contain one program.
2849 To do a more complex LATM demuxing a separate LATM demuxer should be used.
2850 */
2851 AVCodec ff_aac_latm_decoder = {
2852 .name = "aac_latm",
2853 .type = AVMEDIA_TYPE_AUDIO,
2854 .id = CODEC_ID_AAC_LATM,
2855 .priv_data_size = sizeof(struct LATMContext),
2856 .init = latm_decode_init,
2857 .close = aac_decode_close,
2858 .decode = latm_decode_frame,
2859 .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"),
2860 .sample_fmts = (const enum AVSampleFormat[]) {
2861 AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
2862 },
2863 .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
2864 .channel_layouts = aac_channel_layout,
2865 };