aacdec: remove sf_scale and sf_offset.
[libav.git] / libavcodec / aac.h
CommitLineData
589ce6e6
RS
1/*
2 * AAC definitions and structures
3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5 *
2912e87a 6 * This file is part of Libav.
589ce6e6 7 *
2912e87a 8 * Libav is free software; you can redistribute it and/or
589ce6e6
RS
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
2912e87a 13 * Libav is distributed in the hope that it will be useful,
589ce6e6
RS
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
2912e87a 19 * License along with Libav; if not, write to the Free Software
589ce6e6
RS
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23/**
ba87f080 24 * @file
589ce6e6
RS
25 * AAC definitions and structures
26 * @author Oded Shimon ( ods15 ods15 dyndns org )
27 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
28 */
29
98790382
SS
30#ifndef AVCODEC_AAC_H
31#define AVCODEC_AAC_H
589ce6e6
RS
32
33#include "avcodec.h"
34#include "dsputil.h"
1429224b 35#include "fft.h"
589ce6e6 36#include "mpeg4audio.h"
ed492b61 37#include "sbr.h"
c73d99e6 38#include "fmtconvert.h"
589ce6e6
RS
39
40#include <stdint.h>
41
cc0591da 42#define MAX_CHANNELS 64
9cc04edf 43#define MAX_ELEM_ID 16
cc0591da 44
9ffd5c1c 45#define TNS_MAX_ORDER 20
ead15f1d 46#define MAX_LTP_LONG_SFB 40
9ffd5c1c 47
9cc04edf
RS
48enum RawDataBlockType {
49 TYPE_SCE,
50 TYPE_CPE,
51 TYPE_CCE,
52 TYPE_LFE,
53 TYPE_DSE,
54 TYPE_PCE,
55 TYPE_FIL,
56 TYPE_END,
57};
58
cc0591da
RS
59enum ExtensionPayloadID {
60 EXT_FILL,
61 EXT_FILL_DATA,
62 EXT_DATA_ELEMENT,
63 EXT_DYNAMIC_RANGE = 0xb,
64 EXT_SBR_DATA = 0xd,
65 EXT_SBR_DATA_CRC = 0xe,
66};
67
589ce6e6
RS
68enum WindowSequence {
69 ONLY_LONG_SEQUENCE,
70 LONG_START_SEQUENCE,
71 EIGHT_SHORT_SEQUENCE,
72 LONG_STOP_SEQUENCE,
73};
74
cc0591da
RS
75enum BandType {
76 ZERO_BT = 0, ///< Scalefactors and spectral data are all zero.
77 FIRST_PAIR_BT = 5, ///< This and later band types encode two values (rather than four) with one code word.
78 ESC_BT = 11, ///< Spectral data are coded with an escape sequence.
79 NOISE_BT = 13, ///< Spectral data are scaled white noise not coded in the bitstream.
80 INTENSITY_BT2 = 14, ///< Scalefactor data are intensity stereo positions.
81 INTENSITY_BT = 15, ///< Scalefactor data are intensity stereo positions.
82};
83
84#define IS_CODEBOOK_UNSIGNED(x) ((x - 1) & 10)
85
86enum ChannelPosition {
589ce6e6
RS
87 AAC_CHANNEL_FRONT = 1,
88 AAC_CHANNEL_SIDE = 2,
89 AAC_CHANNEL_BACK = 3,
90 AAC_CHANNEL_LFE = 4,
91 AAC_CHANNEL_CC = 5,
92};
93
9cc04edf
RS
94/**
95 * The point during decoding at which channel coupling is applied.
96 */
97enum CouplingPoint {
98 BEFORE_TNS,
99 BETWEEN_TNS_AND_IMDCT,
100 AFTER_IMDCT = 3,
101};
102
103/**
981b8fd7
AC
104 * Output configuration status
105 */
106enum OCStatus {
107 OC_NONE, //< Output unconfigured
108 OC_TRIAL_PCE, //< Output configuration under trial specified by an inband PCE
109 OC_TRIAL_FRAME, //< Output configuration under trial specified by a frame header
4fab6627 110 OC_GLOBAL_HDR, //< Output configuration set in a global header but not yet locked
981b8fd7
AC
111 OC_LOCKED, //< Output configuration locked in place
112};
113
114/**
7633a041
AC
115 * Predictor State
116 */
117typedef struct {
118 float cor0;
119 float cor1;
120 float var0;
121 float var1;
122 float r0;
123 float r1;
124} PredictorState;
125
126#define MAX_PREDICTORS 672
127
78e65cd7
AC
128#define SCALE_DIV_512 36 ///< scalefactor difference that corresponds to scale difference in 512 times
129#define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0
130#define SCALE_MAX_POS 255 ///< scalefactor index maximum value
131#define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard
132#define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference
d70fa4c4 133#define POW_SF2_ZERO 200 ///< ff_aac_pow2sf_tab index corresponding to pow(2, 0);
78e65cd7 134
7633a041 135/**
ead15f1d
YHL
136 * Long Term Prediction
137 */
138typedef struct {
139 int8_t present;
140 int16_t lag;
141 float coef;
142 int8_t used[MAX_LTP_LONG_SFB];
143} LongTermPrediction;
144
145/**
9cc04edf
RS
146 * Individual Channel Stream
147 */
62a57fae
RS
148typedef struct {
149 uint8_t max_sfb; ///< number of scalefactor bands per group
150 enum WindowSequence window_sequence[2];
151 uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sinus window.
152 int num_window_groups;
153 uint8_t group_len[8];
ead15f1d 154 LongTermPrediction ltp;
62a57fae 155 const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window
78e65cd7 156 const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window
62a57fae
RS
157 int num_swb; ///< number of scalefactor window bands
158 int num_windows;
159 int tns_max_bands;
7633a041
AC
160 int predictor_present;
161 int predictor_initialized;
162 int predictor_reset_group;
163 uint8_t prediction_used[41];
62a57fae 164} IndividualChannelStream;
9cc04edf
RS
165
166/**
1dece0d2
RS
167 * Temporal Noise Shaping
168 */
169typedef struct {
170 int present;
171 int n_filt[8];
172 int length[8][4];
173 int direction[8][4];
174 int order[8][4];
175 float coef[8][4][TNS_MAX_ORDER];
176} TemporalNoiseShaping;
177
178/**
9cc04edf
RS
179 * Dynamic Range Control - decoded from the bitstream but not processed further.
180 */
181typedef struct {
182 int pce_instance_tag; ///< Indicates with which program the DRC info is associated.
183 int dyn_rng_sgn[17]; ///< DRC sign information; 0 - positive, 1 - negative
184 int dyn_rng_ctl[17]; ///< DRC magnitude information
185 int exclude_mask[MAX_CHANNELS]; ///< Channels to be excluded from DRC processing.
186 int band_incr; ///< Number of DRC bands greater than 1 having DRC info.
187 int interpolation_scheme; ///< Indicates the interpolation scheme used in the SBR QMF domain.
188 int band_top[17]; ///< Indicates the top of the i-th DRC band in units of 4 spectral lines.
189 int prog_ref_level; /**< A reference level for the long-term program audio level for all
190 * channels combined.
191 */
192} DynamicRangeControl;
193
cc0591da
RS
194typedef struct {
195 int num_pulse;
78e65cd7 196 int start;
848a5815 197 int pos[4];
cc0591da
RS
198 int amp[4];
199} Pulse;
200
201/**
202 * coupling parameters
203 */
204typedef struct {
62a57fae
RS
205 enum CouplingPoint coupling_point; ///< The point during decoding at which coupling is applied.
206 int num_coupled; ///< number of target elements
207 enum RawDataBlockType type[8]; ///< Type of channel element to be coupled - SCE or CPE.
208 int id_select[8]; ///< element id
5c2cb172
AC
209 int ch_select[8]; /**< [0] shared list of gains; [1] list of gains for right channel;
210 * [2] list of gains for left channel; [3] lists of gains for both channels
62a57fae
RS
211 */
212 float gain[16][120];
213} ChannelCoupling;
214
215/**
216 * Single Channel Element - used for both SCE and LFE elements.
217 */
218typedef struct {
219 IndividualChannelStream ics;
220 TemporalNoiseShaping tns;
78e65cd7 221 Pulse pulse;
ead15f1d
YHL
222 enum BandType band_type[128]; ///< band types
223 int band_type_run_end[120]; ///< band type run end points
224 float sf[120]; ///< scalefactors
225 int sf_idx[128]; ///< scalefactor indices (used by encoder)
226 uint8_t zeroes[128]; ///< band is not coded (used by encoder)
9d35fa52
VS
227 DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT
228 DECLARE_ALIGNED(32, float, saved)[1024]; ///< overlap
229 DECLARE_ALIGNED(32, float, ret)[2048]; ///< PCM output
ead15f1d 230 DECLARE_ALIGNED(16, int16_t, ltp_state)[3072]; ///< time signal for LTP
aab54133 231 PredictorState predictor_state[MAX_PREDICTORS];
62a57fae
RS
232} SingleChannelElement;
233
234/**
235 * channel element - generic struct for SCE/CPE/CCE/LFE
236 */
237typedef struct {
238 // CPE specific
78e65cd7
AC
239 int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream.
240 int ms_mode; ///< Signals mid/side stereo flags coding mode (used by encoder)
241 uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band
62a57fae
RS
242 // shared
243 SingleChannelElement ch[2];
244 // CCE specific
245 ChannelCoupling coup;
ed492b61 246 SpectralBandReplication sbr;
62a57fae 247} ChannelElement;
cc0591da 248
589ce6e6
RS
249/**
250 * main AAC context
251 */
252typedef struct {
c909d482 253 AVCodecContext *avctx;
589ce6e6 254
cc0591da
RS
255 MPEG4AudioConfig m4ac;
256
257 int is_saved; ///< Set if elements have stored overlap from previous frame.
258 DynamicRangeControl che_drc;
259
9cc04edf 260 /**
b17f1a07 261 * @defgroup elements Channel element related data.
9cc04edf
RS
262 * @{
263 */
cc0591da
RS
264 enum ChannelPosition che_pos[4][MAX_ELEM_ID]; /**< channel element channel mapping with the
265 * first index as the first 4 raw data block types
266 */
c909d482
AC
267 ChannelElement *che[4][MAX_ELEM_ID];
268 ChannelElement *tag_che_map[4][MAX_ELEM_ID];
bb5c0988 269 int tags_mapped;
9cc04edf 270 /** @} */
cc0591da 271
589ce6e6 272 /**
c7f4d983
RS
273 * @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.)
274 * @{
275 */
9d35fa52 276 DECLARE_ALIGNED(32, float, buf_mdct)[1024];
c7f4d983
RS
277 /** @} */
278
279 /**
589ce6e6
RS
280 * @defgroup tables Computed / set up during initialization.
281 * @{
282 */
01b22147
MR
283 FFTContext mdct;
284 FFTContext mdct_small;
ead15f1d 285 FFTContext mdct_ltp;
589ce6e6 286 DSPContext dsp;
c73d99e6 287 FmtConvertContext fmt_conv;
9cc04edf 288 int random_state;
589ce6e6
RS
289 /** @} */
290
291 /**
cc0591da 292 * @defgroup output Members used for output interleaving.
589ce6e6
RS
293 * @{
294 */
cc0591da 295 float *output_data[MAX_CHANNELS]; ///< Points to each element's 'ret' buffer (PCM output).
589ce6e6
RS
296 /** @} */
297
9d35fa52 298 DECLARE_ALIGNED(32, float, temp)[128];
6308765c 299
981b8fd7 300 enum OCStatus output_configured;
589ce6e6
RS
301} AACContext;
302
98790382 303#endif /* AVCODEC_AAC_H */