Commit | Line | Data |
---|---|---|
ad6b2c1f JR |
1 | /* |
2 | * The simplest AC-3 encoder | |
3 | * Copyright (c) 2000 Fabrice Bellard | |
4 | * Copyright (c) 2006-2010 Justin Ruggles <justin.ruggles@gmail.com> | |
5 | * Copyright (c) 2006-2010 Prakash Punnoor <prakash@punnoor.de> | |
6 | * | |
7 | * This file is part of FFmpeg. | |
8 | * | |
9 | * FFmpeg is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * FFmpeg is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with FFmpeg; if not, write to the Free Software | |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | */ | |
23 | ||
24 | /** | |
25 | * @file | |
26 | * fixed-point AC-3 encoder. | |
27 | */ | |
28 | ||
6fd96d1a | 29 | #undef CONFIG_AC3ENC_FLOAT |
ad6b2c1f JR |
30 | #include "ac3enc.c" |
31 | ||
32 | ||
33 | /** Scale a float value by 2^15, convert to an integer, and clip to range -32767..32767. */ | |
34 | #define FIX15(a) av_clip(SCALE_FLOAT(a, 15), -32767, 32767) | |
35 | ||
36 | ||
37 | /** | |
38 | * Finalize MDCT and free allocated memory. | |
39 | */ | |
40 | static av_cold void mdct_end(AC3MDCTContext *mdct) | |
41 | { | |
42 | mdct->nbits = 0; | |
43 | av_freep(&mdct->costab); | |
44 | av_freep(&mdct->sintab); | |
45 | av_freep(&mdct->xcos1); | |
46 | av_freep(&mdct->xsin1); | |
47 | av_freep(&mdct->rot_tmp); | |
48 | av_freep(&mdct->cplx_tmp); | |
49 | } | |
50 | ||
51 | ||
52 | /** | |
53 | * Initialize FFT tables. | |
54 | * @param ln log2(FFT size) | |
55 | */ | |
56 | static av_cold int fft_init(AVCodecContext *avctx, AC3MDCTContext *mdct, int ln) | |
57 | { | |
58 | int i, n, n2; | |
59 | float alpha; | |
60 | ||
61 | n = 1 << ln; | |
62 | n2 = n >> 1; | |
63 | ||
64 | FF_ALLOC_OR_GOTO(avctx, mdct->costab, n2 * sizeof(*mdct->costab), fft_alloc_fail); | |
65 | FF_ALLOC_OR_GOTO(avctx, mdct->sintab, n2 * sizeof(*mdct->sintab), fft_alloc_fail); | |
66 | ||
67 | for (i = 0; i < n2; i++) { | |
68 | alpha = 2.0 * M_PI * i / n; | |
69 | mdct->costab[i] = FIX15(cos(alpha)); | |
70 | mdct->sintab[i] = FIX15(sin(alpha)); | |
71 | } | |
72 | ||
73 | return 0; | |
74 | fft_alloc_fail: | |
75 | mdct_end(mdct); | |
76 | return AVERROR(ENOMEM); | |
77 | } | |
78 | ||
79 | ||
80 | /** | |
81 | * Initialize MDCT tables. | |
82 | * @param nbits log2(MDCT size) | |
83 | */ | |
84 | static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, | |
85 | int nbits) | |
86 | { | |
87 | int i, n, n4, ret; | |
88 | ||
89 | n = 1 << nbits; | |
90 | n4 = n >> 2; | |
91 | ||
92 | mdct->nbits = nbits; | |
93 | ||
94 | ret = fft_init(avctx, mdct, nbits - 2); | |
95 | if (ret) | |
96 | return ret; | |
97 | ||
98 | mdct->window = ff_ac3_window; | |
99 | ||
100 | FF_ALLOC_OR_GOTO(avctx, mdct->xcos1, n4 * sizeof(*mdct->xcos1), mdct_alloc_fail); | |
101 | FF_ALLOC_OR_GOTO(avctx, mdct->xsin1, n4 * sizeof(*mdct->xsin1), mdct_alloc_fail); | |
102 | FF_ALLOC_OR_GOTO(avctx, mdct->rot_tmp, n * sizeof(*mdct->rot_tmp), mdct_alloc_fail); | |
103 | FF_ALLOC_OR_GOTO(avctx, mdct->cplx_tmp, n4 * sizeof(*mdct->cplx_tmp), mdct_alloc_fail); | |
104 | ||
105 | for (i = 0; i < n4; i++) { | |
106 | float alpha = 2.0 * M_PI * (i + 1.0 / 8.0) / n; | |
107 | mdct->xcos1[i] = FIX15(-cos(alpha)); | |
108 | mdct->xsin1[i] = FIX15(-sin(alpha)); | |
109 | } | |
110 | ||
111 | return 0; | |
112 | mdct_alloc_fail: | |
113 | mdct_end(mdct); | |
114 | return AVERROR(ENOMEM); | |
115 | } | |
116 | ||
117 | ||
118 | /** Butterfly op */ | |
119 | #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ | |
120 | { \ | |
121 | int ax, ay, bx, by; \ | |
122 | bx = pre1; \ | |
123 | by = pim1; \ | |
124 | ax = qre1; \ | |
125 | ay = qim1; \ | |
126 | pre = (bx + ax) >> 1; \ | |
127 | pim = (by + ay) >> 1; \ | |
128 | qre = (bx - ax) >> 1; \ | |
129 | qim = (by - ay) >> 1; \ | |
130 | } | |
131 | ||
132 | ||
133 | /** Complex multiply */ | |
134 | #define CMUL(pre, pim, are, aim, bre, bim) \ | |
135 | { \ | |
136 | pre = (MUL16(are, bre) - MUL16(aim, bim)) >> 15; \ | |
137 | pim = (MUL16(are, bim) + MUL16(bre, aim)) >> 15; \ | |
138 | } | |
139 | ||
140 | ||
141 | /** | |
142 | * Calculate a 2^n point complex FFT on 2^ln points. | |
143 | * @param z complex input/output samples | |
144 | * @param ln log2(FFT size) | |
145 | */ | |
146 | static void fft(AC3MDCTContext *mdct, IComplex *z, int ln) | |
147 | { | |
148 | int j, l, np, np2; | |
149 | int nblocks, nloops; | |
150 | register IComplex *p,*q; | |
151 | int tmp_re, tmp_im; | |
152 | ||
153 | np = 1 << ln; | |
154 | ||
155 | /* reverse */ | |
156 | for (j = 0; j < np; j++) { | |
157 | int k = av_reverse[j] >> (8 - ln); | |
158 | if (k < j) | |
159 | FFSWAP(IComplex, z[k], z[j]); | |
160 | } | |
161 | ||
162 | /* pass 0 */ | |
163 | ||
164 | p = &z[0]; | |
165 | j = np >> 1; | |
166 | do { | |
167 | BF(p[0].re, p[0].im, p[1].re, p[1].im, | |
168 | p[0].re, p[0].im, p[1].re, p[1].im); | |
169 | p += 2; | |
170 | } while (--j); | |
171 | ||
172 | /* pass 1 */ | |
173 | ||
174 | p = &z[0]; | |
175 | j = np >> 2; | |
176 | do { | |
177 | BF(p[0].re, p[0].im, p[2].re, p[2].im, | |
178 | p[0].re, p[0].im, p[2].re, p[2].im); | |
179 | BF(p[1].re, p[1].im, p[3].re, p[3].im, | |
180 | p[1].re, p[1].im, p[3].im, -p[3].re); | |
181 | p+=4; | |
182 | } while (--j); | |
183 | ||
184 | /* pass 2 .. ln-1 */ | |
185 | ||
186 | nblocks = np >> 3; | |
187 | nloops = 1 << 2; | |
188 | np2 = np >> 1; | |
189 | do { | |
190 | p = z; | |
191 | q = z + nloops; | |
192 | for (j = 0; j < nblocks; j++) { | |
193 | BF(p->re, p->im, q->re, q->im, | |
194 | p->re, p->im, q->re, q->im); | |
195 | p++; | |
196 | q++; | |
197 | for(l = nblocks; l < np2; l += nblocks) { | |
198 | CMUL(tmp_re, tmp_im, mdct->costab[l], -mdct->sintab[l], q->re, q->im); | |
199 | BF(p->re, p->im, q->re, q->im, | |
200 | p->re, p->im, tmp_re, tmp_im); | |
201 | p++; | |
202 | q++; | |
203 | } | |
204 | p += nloops; | |
205 | q += nloops; | |
206 | } | |
207 | nblocks = nblocks >> 1; | |
208 | nloops = nloops << 1; | |
209 | } while (nblocks); | |
210 | } | |
211 | ||
212 | ||
213 | /** | |
214 | * Calculate a 512-point MDCT | |
215 | * @param out 256 output frequency coefficients | |
216 | * @param in 512 windowed input audio samples | |
217 | */ | |
218 | static void mdct512(AC3MDCTContext *mdct, int32_t *out, int16_t *in) | |
219 | { | |
220 | int i, re, im, n, n2, n4; | |
221 | int16_t *rot = mdct->rot_tmp; | |
222 | IComplex *x = mdct->cplx_tmp; | |
223 | ||
224 | n = 1 << mdct->nbits; | |
225 | n2 = n >> 1; | |
226 | n4 = n >> 2; | |
227 | ||
228 | /* shift to simplify computations */ | |
229 | for (i = 0; i <n4; i++) | |
230 | rot[i] = -in[i + 3*n4]; | |
231 | memcpy(&rot[n4], &in[0], 3*n4*sizeof(*in)); | |
232 | ||
233 | /* pre rotation */ | |
234 | for (i = 0; i < n4; i++) { | |
235 | re = ((int)rot[ 2*i] - (int)rot[ n-1-2*i]) >> 1; | |
236 | im = -((int)rot[n2+2*i] - (int)rot[n2-1-2*i]) >> 1; | |
237 | CMUL(x[i].re, x[i].im, re, im, -mdct->xcos1[i], mdct->xsin1[i]); | |
238 | } | |
239 | ||
240 | fft(mdct, x, mdct->nbits - 2); | |
241 | ||
242 | /* post rotation */ | |
243 | for (i = 0; i < n4; i++) { | |
244 | re = x[i].re; | |
245 | im = x[i].im; | |
246 | CMUL(out[n2-1-2*i], out[2*i], re, im, mdct->xsin1[i], mdct->xcos1[i]); | |
247 | } | |
248 | } | |
249 | ||
250 | ||
251 | /** | |
252 | * Apply KBD window to input samples prior to MDCT. | |
253 | */ | |
3b924294 | 254 | static void apply_window(DSPContext *dsp, int16_t *output, const int16_t *input, |
ad6b2c1f JR |
255 | const int16_t *window, int n) |
256 | { | |
257 | int i; | |
258 | int n2 = n >> 1; | |
259 | ||
260 | for (i = 0; i < n2; i++) { | |
261 | output[i] = MUL16(input[i], window[i]) >> 15; | |
262 | output[n-i-1] = MUL16(input[n-i-1], window[i]) >> 15; | |
263 | } | |
264 | } | |
265 | ||
266 | ||
267 | /** | |
268 | * Calculate the log2() of the maximum absolute value in an array. | |
269 | * @param tab input array | |
270 | * @param n number of values in the array | |
271 | * @return log2(max(abs(tab[]))) | |
272 | */ | |
273 | static int log2_tab(int16_t *tab, int n) | |
274 | { | |
275 | int i, v; | |
276 | ||
277 | v = 0; | |
278 | for (i = 0; i < n; i++) | |
279 | v |= abs(tab[i]); | |
280 | ||
281 | return av_log2(v); | |
282 | } | |
283 | ||
284 | ||
285 | /** | |
286 | * Left-shift each value in an array by a specified amount. | |
287 | * @param tab input array | |
288 | * @param n number of values in the array | |
289 | * @param lshift left shift amount. a negative value means right shift. | |
290 | */ | |
291 | static void lshift_tab(int16_t *tab, int n, int lshift) | |
292 | { | |
293 | int i; | |
294 | ||
295 | if (lshift > 0) { | |
296 | for (i = 0; i < n; i++) | |
297 | tab[i] <<= lshift; | |
298 | } else if (lshift < 0) { | |
299 | lshift = -lshift; | |
300 | for (i = 0; i < n; i++) | |
301 | tab[i] >>= lshift; | |
302 | } | |
303 | } | |
304 | ||
305 | ||
306 | /** | |
307 | * Normalize the input samples to use the maximum available precision. | |
308 | * This assumes signed 16-bit input samples. Exponents are reduced by 9 to | |
309 | * match the 24-bit internal precision for MDCT coefficients. | |
310 | * | |
311 | * @return exponent shift | |
312 | */ | |
313 | static int normalize_samples(AC3EncodeContext *s) | |
314 | { | |
315 | int v = 14 - log2_tab(s->windowed_samples, AC3_WINDOW_SIZE); | |
316 | v = FFMAX(0, v); | |
317 | lshift_tab(s->windowed_samples, AC3_WINDOW_SIZE, v); | |
318 | return v - 9; | |
319 | } | |
320 | ||
321 | ||
ac05f903 JR |
322 | /** |
323 | * Scale MDCT coefficients from float to fixed-point. | |
324 | */ | |
325 | static void scale_coefficients(AC3EncodeContext *s) | |
326 | { | |
327 | /* scaling/conversion is obviously not needed for the fixed-point encoder | |
328 | since the coefficients are already fixed-point. */ | |
329 | return; | |
330 | } | |
331 | ||
332 | ||
ad6b2c1f JR |
333 | #ifdef TEST |
334 | /*************************************************************************/ | |
335 | /* TEST */ | |
336 | ||
337 | #include "libavutil/lfg.h" | |
338 | ||
339 | #define MDCT_NBITS 9 | |
340 | #define MDCT_SAMPLES (1 << MDCT_NBITS) | |
341 | #define FN (MDCT_SAMPLES/4) | |
342 | ||
343 | ||
344 | static void fft_test(AC3MDCTContext *mdct, AVLFG *lfg) | |
345 | { | |
346 | IComplex in[FN], in1[FN]; | |
347 | int k, n, i; | |
348 | float sum_re, sum_im, a; | |
349 | ||
350 | for (i = 0; i < FN; i++) { | |
351 | in[i].re = av_lfg_get(lfg) % 65535 - 32767; | |
352 | in[i].im = av_lfg_get(lfg) % 65535 - 32767; | |
353 | in1[i] = in[i]; | |
354 | } | |
355 | fft(mdct, in, 7); | |
356 | ||
357 | /* do it by hand */ | |
358 | for (k = 0; k < FN; k++) { | |
359 | sum_re = 0; | |
360 | sum_im = 0; | |
361 | for (n = 0; n < FN; n++) { | |
362 | a = -2 * M_PI * (n * k) / FN; | |
363 | sum_re += in1[n].re * cos(a) - in1[n].im * sin(a); | |
364 | sum_im += in1[n].re * sin(a) + in1[n].im * cos(a); | |
365 | } | |
366 | av_log(NULL, AV_LOG_DEBUG, "%3d: %6d,%6d %6.0f,%6.0f\n", | |
367 | k, in[k].re, in[k].im, sum_re / FN, sum_im / FN); | |
368 | } | |
369 | } | |
370 | ||
371 | ||
372 | static void mdct_test(AC3MDCTContext *mdct, AVLFG *lfg) | |
373 | { | |
374 | int16_t input[MDCT_SAMPLES]; | |
375 | int32_t output[AC3_MAX_COEFS]; | |
376 | float input1[MDCT_SAMPLES]; | |
377 | float output1[AC3_MAX_COEFS]; | |
378 | float s, a, err, e, emax; | |
379 | int i, k, n; | |
380 | ||
381 | for (i = 0; i < MDCT_SAMPLES; i++) { | |
382 | input[i] = (av_lfg_get(lfg) % 65535 - 32767) * 9 / 10; | |
383 | input1[i] = input[i]; | |
384 | } | |
385 | ||
386 | mdct512(mdct, output, input); | |
387 | ||
388 | /* do it by hand */ | |
389 | for (k = 0; k < AC3_MAX_COEFS; k++) { | |
390 | s = 0; | |
391 | for (n = 0; n < MDCT_SAMPLES; n++) { | |
392 | a = (2*M_PI*(2*n+1+MDCT_SAMPLES/2)*(2*k+1) / (4 * MDCT_SAMPLES)); | |
393 | s += input1[n] * cos(a); | |
394 | } | |
395 | output1[k] = -2 * s / MDCT_SAMPLES; | |
396 | } | |
397 | ||
398 | err = 0; | |
399 | emax = 0; | |
400 | for (i = 0; i < AC3_MAX_COEFS; i++) { | |
401 | av_log(NULL, AV_LOG_DEBUG, "%3d: %7d %7.0f\n", i, output[i], output1[i]); | |
402 | e = output[i] - output1[i]; | |
403 | if (e > emax) | |
404 | emax = e; | |
405 | err += e * e; | |
406 | } | |
407 | av_log(NULL, AV_LOG_DEBUG, "err2=%f emax=%f\n", err / AC3_MAX_COEFS, emax); | |
408 | } | |
409 | ||
410 | ||
411 | int main(void) | |
412 | { | |
413 | AVLFG lfg; | |
414 | AC3MDCTContext mdct; | |
415 | ||
416 | mdct.avctx = NULL; | |
417 | av_log_set_level(AV_LOG_DEBUG); | |
418 | mdct_init(&mdct, 9); | |
419 | ||
420 | fft_test(&mdct, &lfg); | |
421 | mdct_test(&mdct, &lfg); | |
422 | ||
423 | return 0; | |
424 | } | |
425 | #endif /* TEST */ | |
426 | ||
427 | ||
6fd96d1a JR |
428 | AVCodec ac3_fixed_encoder = { |
429 | "ac3_fixed", | |
ad6b2c1f JR |
430 | AVMEDIA_TYPE_AUDIO, |
431 | CODEC_ID_AC3, | |
432 | sizeof(AC3EncodeContext), | |
433 | ac3_encode_init, | |
434 | ac3_encode_frame, | |
435 | ac3_encode_close, | |
436 | NULL, | |
437 | .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, | |
438 | .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"), | |
439 | .channel_layouts = ac3_channel_layouts, | |
440 | }; |