Initial revision
[libav.git] / libav / mpegaudio.c
1 /*
2 * The simplest mpeg audio layer 2 encoder
3 * Copyright (c) 2000 Gerard Lantau.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <netinet/in.h>
23 #include <math.h>
24 #include "avcodec.h"
25 #include "mpegaudio.h"
26
27 #define NDEBUG
28 #include <assert.h>
29
30 /* define it to use floats in quantization (I don't like floats !) */
31 //#define USE_FLOATS
32
33 #define MPA_STEREO 0
34 #define MPA_JSTEREO 1
35 #define MPA_DUAL 2
36 #define MPA_MONO 3
37
38 #include "mpegaudiotab.h"
39
40 int MPA_encode_init(AVEncodeContext *avctx)
41 {
42 MpegAudioContext *s = avctx->priv_data;
43 int freq = avctx->rate;
44 int bitrate = avctx->bit_rate;
45 int channels = avctx->channels;
46 int i, v, table;
47 float a;
48
49 if (channels != 1)
50 return -1;
51
52 bitrate = bitrate / 1000;
53 s->freq = freq;
54 s->bit_rate = bitrate * 1000;
55 avctx->frame_size = MPA_FRAME_SIZE;
56 avctx->key_frame = 1; /* always key frame */
57
58 /* encoding freq */
59 s->lsf = 0;
60 for(i=0;i<3;i++) {
61 if (freq_tab[i] == freq)
62 break;
63 if ((freq_tab[i] / 2) == freq) {
64 s->lsf = 1;
65 break;
66 }
67 }
68 if (i == 3)
69 return -1;
70 s->freq_index = i;
71
72 /* encoding bitrate & frequency */
73 for(i=0;i<15;i++) {
74 if (bitrate_tab[1-s->lsf][i] == bitrate)
75 break;
76 }
77 if (i == 15)
78 return -1;
79 s->bitrate_index = i;
80
81 /* compute total header size & pad bit */
82
83 a = (float)(bitrate * 1000 * MPA_FRAME_SIZE) / (freq * 8.0);
84 s->frame_size = ((int)a) * 8;
85
86 /* frame fractional size to compute padding */
87 s->frame_frac = 0;
88 s->frame_frac_incr = (int)((a - floor(a)) * 65536.0);
89
90 /* select the right allocation table */
91 if (!s->lsf) {
92 if ((freq == 48000 && bitrate >= 56) ||
93 (bitrate >= 56 && bitrate <= 80))
94 table = 0;
95 else if (freq != 48000 && bitrate >= 96)
96 table = 1;
97 else if (freq != 32000 && bitrate <= 48)
98 table = 2;
99 else
100 table = 3;
101 } else {
102 table = 4;
103 }
104 /* number of used subbands */
105 s->sblimit = sblimit_table[table];
106 s->alloc_table = alloc_tables[table];
107
108 #ifdef DEBUG
109 printf("%d kb/s, %d Hz, frame_size=%d bits, table=%d, padincr=%x\n",
110 bitrate, freq, s->frame_size, table, s->frame_frac_incr);
111 #endif
112
113 s->samples_offset = 0;
114
115 for(i=0;i<512;i++) {
116 float a = enwindow[i] * 32768.0 * 16.0;
117 filter_bank[i] = (int)(a);
118 }
119 for(i=0;i<64;i++) {
120 v = (int)(pow(2.0, (3 - i) / 3.0) * (1 << 20));
121 if (v <= 0)
122 v = 1;
123 scale_factor_table[i] = v;
124 #ifdef USE_FLOATS
125 scale_factor_inv_table[i] = pow(2.0, -(3 - i) / 3.0) / (float)(1 << 20);
126 #else
127 #define P 15
128 scale_factor_shift[i] = 21 - P - (i / 3);
129 scale_factor_mult[i] = (1 << P) * pow(2.0, (i % 3) / 3.0);
130 #endif
131 }
132 for(i=0;i<128;i++) {
133 v = i - 64;
134 if (v <= -3)
135 v = 0;
136 else if (v < 0)
137 v = 1;
138 else if (v == 0)
139 v = 2;
140 else if (v < 3)
141 v = 3;
142 else
143 v = 4;
144 scale_diff_table[i] = v;
145 }
146
147 for(i=0;i<17;i++) {
148 v = quant_bits[i];
149 if (v < 0)
150 v = -v;
151 else
152 v = v * 3;
153 total_quant_bits[i] = 12 * v;
154 }
155
156 return 0;
157 }
158
159 /* 32 point floating point IDCT */
160 static void idct32(int *out, int *tab, int sblimit, int left_shift)
161 {
162 int i, j;
163 int *t, *t1, xr;
164 const int *xp = costab32;
165
166 for(j=31;j>=3;j-=2) tab[j] += tab[j - 2];
167
168 t = tab + 30;
169 t1 = tab + 2;
170 do {
171 t[0] += t[-4];
172 t[1] += t[1 - 4];
173 t -= 4;
174 } while (t != t1);
175
176 t = tab + 28;
177 t1 = tab + 4;
178 do {
179 t[0] += t[-8];
180 t[1] += t[1-8];
181 t[2] += t[2-8];
182 t[3] += t[3-8];
183 t -= 8;
184 } while (t != t1);
185
186 t = tab;
187 t1 = tab + 32;
188 do {
189 t[ 3] = -t[ 3];
190 t[ 6] = -t[ 6];
191
192 t[11] = -t[11];
193 t[12] = -t[12];
194 t[13] = -t[13];
195 t[15] = -t[15];
196 t += 16;
197 } while (t != t1);
198
199
200 t = tab;
201 t1 = tab + 8;
202 do {
203 int x1, x2, x3, x4;
204
205 x3 = MUL(t[16], FIX(SQRT2*0.5));
206 x4 = t[0] - x3;
207 x3 = t[0] + x3;
208
209 x2 = MUL(-(t[24] + t[8]), FIX(SQRT2*0.5));
210 x1 = MUL((t[8] - x2), xp[0]);
211 x2 = MUL((t[8] + x2), xp[1]);
212
213 t[ 0] = x3 + x1;
214 t[ 8] = x4 - x2;
215 t[16] = x4 + x2;
216 t[24] = x3 - x1;
217 t++;
218 } while (t != t1);
219
220 xp += 2;
221 t = tab;
222 t1 = tab + 4;
223 do {
224 xr = MUL(t[28],xp[0]);
225 t[28] = (t[0] - xr);
226 t[0] = (t[0] + xr);
227
228 xr = MUL(t[4],xp[1]);
229 t[ 4] = (t[24] - xr);
230 t[24] = (t[24] + xr);
231
232 xr = MUL(t[20],xp[2]);
233 t[20] = (t[8] - xr);
234 t[ 8] = (t[8] + xr);
235
236 xr = MUL(t[12],xp[3]);
237 t[12] = (t[16] - xr);
238 t[16] = (t[16] + xr);
239 t++;
240 } while (t != t1);
241 xp += 4;
242
243 for (i = 0; i < 4; i++) {
244 xr = MUL(tab[30-i*4],xp[0]);
245 tab[30-i*4] = (tab[i*4] - xr);
246 tab[ i*4] = (tab[i*4] + xr);
247
248 xr = MUL(tab[ 2+i*4],xp[1]);
249 tab[ 2+i*4] = (tab[28-i*4] - xr);
250 tab[28-i*4] = (tab[28-i*4] + xr);
251
252 xr = MUL(tab[31-i*4],xp[0]);
253 tab[31-i*4] = (tab[1+i*4] - xr);
254 tab[ 1+i*4] = (tab[1+i*4] + xr);
255
256 xr = MUL(tab[ 3+i*4],xp[1]);
257 tab[ 3+i*4] = (tab[29-i*4] - xr);
258 tab[29-i*4] = (tab[29-i*4] + xr);
259
260 xp += 2;
261 }
262
263 t = tab + 30;
264 t1 = tab + 1;
265 do {
266 xr = MUL(t1[0], *xp);
267 t1[0] = (t[0] - xr);
268 t[0] = (t[0] + xr);
269 t -= 2;
270 t1 += 2;
271 xp++;
272 } while (t >= tab);
273
274 for(i=0;i<32;i++) {
275 out[i] = tab[bitinv32[i]] << left_shift;
276 }
277 }
278
279 static void filter(MpegAudioContext *s, short *samples)
280 {
281 short *p, *q;
282 int sum, offset, i, j, norm, n;
283 short tmp[64];
284 int tmp1[32];
285 int *out;
286
287 // print_pow1(samples, 1152);
288
289 offset = s->samples_offset;
290 out = &s->sb_samples[0][0][0];
291 for(j=0;j<36;j++) {
292 /* 32 samples at once */
293 for(i=0;i<32;i++)
294 s->samples_buf[offset + (31 - i)] = samples[i];
295
296 /* filter */
297 p = s->samples_buf + offset;
298 q = filter_bank;
299 /* maxsum = 23169 */
300 for(i=0;i<64;i++) {
301 sum = p[0*64] * q[0*64];
302 sum += p[1*64] * q[1*64];
303 sum += p[2*64] * q[2*64];
304 sum += p[3*64] * q[3*64];
305 sum += p[4*64] * q[4*64];
306 sum += p[5*64] * q[5*64];
307 sum += p[6*64] * q[6*64];
308 sum += p[7*64] * q[7*64];
309 tmp[i] = sum >> 14;
310 p++;
311 q++;
312 }
313 tmp1[0] = tmp[16];
314 for( i=1; i<=16; i++ ) tmp1[i] = tmp[i+16]+tmp[16-i];
315 for( i=17; i<=31; i++ ) tmp1[i] = tmp[i+16]-tmp[80-i];
316
317 /* integer IDCT 32 with normalization. XXX: There may be some
318 overflow left */
319 norm = 0;
320 for(i=0;i<32;i++) {
321 norm |= abs(tmp1[i]);
322 }
323 n = log2(norm) - 12;
324 if (n > 0) {
325 for(i=0;i<32;i++)
326 tmp1[i] >>= n;
327 } else {
328 n = 0;
329 }
330
331 idct32(out, tmp1, s->sblimit, n);
332
333 /* advance of 32 samples */
334 samples += 32;
335 offset -= 32;
336 out += 32;
337 /* handle the wrap around */
338 if (offset < 0) {
339 memmove(s->samples_buf + SAMPLES_BUF_SIZE - (512 - 32),
340 s->samples_buf, (512 - 32) * 2);
341 offset = SAMPLES_BUF_SIZE - 512;
342 }
343 }
344 s->samples_offset = offset;
345
346 // print_pow(s->sb_samples, 1152);
347 }
348
349 static void compute_scale_factors(unsigned char scale_code[SBLIMIT],
350 unsigned char scale_factors[SBLIMIT][3],
351 int sb_samples[3][12][SBLIMIT],
352 int sblimit)
353 {
354 int *p, vmax, v, n, i, j, k, code;
355 int index, d1, d2;
356 unsigned char *sf = &scale_factors[0][0];
357
358 for(j=0;j<sblimit;j++) {
359 for(i=0;i<3;i++) {
360 /* find the max absolute value */
361 p = &sb_samples[i][0][j];
362 vmax = abs(*p);
363 for(k=1;k<12;k++) {
364 p += SBLIMIT;
365 v = abs(*p);
366 if (v > vmax)
367 vmax = v;
368 }
369 /* compute the scale factor index using log 2 computations */
370 if (vmax > 0) {
371 n = log2(vmax);
372 /* n is the position of the MSB of vmax. now
373 use at most 2 compares to find the index */
374 index = (21 - n) * 3 - 3;
375 if (index >= 0) {
376 while (vmax <= scale_factor_table[index+1])
377 index++;
378 } else {
379 index = 0; /* very unlikely case of overflow */
380 }
381 } else {
382 index = 63;
383 }
384
385 #if 0
386 printf("%2d:%d in=%x %x %d\n",
387 j, i, vmax, scale_factor_table[index], index);
388 #endif
389 /* store the scale factor */
390 assert(index >=0 && index <= 63);
391 sf[i] = index;
392 }
393
394 /* compute the transmission factor : look if the scale factors
395 are close enough to each other */
396 d1 = scale_diff_table[sf[0] - sf[1] + 64];
397 d2 = scale_diff_table[sf[1] - sf[2] + 64];
398
399 /* handle the 25 cases */
400 switch(d1 * 5 + d2) {
401 case 0*5+0:
402 case 0*5+4:
403 case 3*5+4:
404 case 4*5+0:
405 case 4*5+4:
406 code = 0;
407 break;
408 case 0*5+1:
409 case 0*5+2:
410 case 4*5+1:
411 case 4*5+2:
412 code = 3;
413 sf[2] = sf[1];
414 break;
415 case 0*5+3:
416 case 4*5+3:
417 code = 3;
418 sf[1] = sf[2];
419 break;
420 case 1*5+0:
421 case 1*5+4:
422 case 2*5+4:
423 code = 1;
424 sf[1] = sf[0];
425 break;
426 case 1*5+1:
427 case 1*5+2:
428 case 2*5+0:
429 case 2*5+1:
430 case 2*5+2:
431 code = 2;
432 sf[1] = sf[2] = sf[0];
433 break;
434 case 2*5+3:
435 case 3*5+3:
436 code = 2;
437 sf[0] = sf[1] = sf[2];
438 break;
439 case 3*5+0:
440 case 3*5+1:
441 case 3*5+2:
442 code = 2;
443 sf[0] = sf[2] = sf[1];
444 break;
445 case 1*5+3:
446 code = 2;
447 if (sf[0] > sf[2])
448 sf[0] = sf[2];
449 sf[1] = sf[2] = sf[0];
450 break;
451 default:
452 abort();
453 }
454
455 #if 0
456 printf("%d: %2d %2d %2d %d %d -> %d\n", j,
457 sf[0], sf[1], sf[2], d1, d2, code);
458 #endif
459 scale_code[j] = code;
460 sf += 3;
461 }
462 }
463
464 /* The most important function : psycho acoustic module. In this
465 encoder there is basically none, so this is the worst you can do,
466 but also this is the simpler. */
467 static void psycho_acoustic_model(MpegAudioContext *s, short smr[SBLIMIT])
468 {
469 int i;
470
471 for(i=0;i<s->sblimit;i++) {
472 smr[i] = (int)(fixed_smr[i] * 10);
473 }
474 }
475
476
477 #define SB_NOTALLOCATED 0
478 #define SB_ALLOCATED 1
479 #define SB_NOMORE 2
480
481 /* Try to maximize the smr while using a number of bits inferior to
482 the frame size. I tried to make the code simpler, faster and
483 smaller than other encoders :-) */
484 static void compute_bit_allocation(MpegAudioContext *s,
485 short smr1[SBLIMIT],
486 unsigned char bit_alloc[SBLIMIT],
487 int *padding)
488 {
489 int i, b, max_smr, max_sb, current_frame_size, max_frame_size;
490 int incr;
491 short smr[SBLIMIT];
492 unsigned char subband_status[SBLIMIT];
493 const unsigned char *alloc;
494
495 memcpy(smr, smr1, sizeof(short) * s->sblimit);
496 memset(subband_status, SB_NOTALLOCATED, s->sblimit);
497 memset(bit_alloc, 0, s->sblimit);
498
499 /* compute frame size and padding */
500 max_frame_size = s->frame_size;
501 s->frame_frac += s->frame_frac_incr;
502 if (s->frame_frac >= 65536) {
503 s->frame_frac -= 65536;
504 s->do_padding = 1;
505 max_frame_size += 8;
506 } else {
507 s->do_padding = 0;
508 }
509
510 /* compute the header + bit alloc size */
511 current_frame_size = 32;
512 alloc = s->alloc_table;
513 for(i=0;i<s->sblimit;i++) {
514 incr = alloc[0];
515 current_frame_size += incr;
516 alloc += 1 << incr;
517 }
518 for(;;) {
519 /* look for the subband with the largest signal to mask ratio */
520 max_sb = -1;
521 max_smr = 0x80000000;
522 for(i=0;i<s->sblimit;i++) {
523 if (smr[i] > max_smr && subband_status[i] != SB_NOMORE) {
524 max_smr = smr[i];
525 max_sb = i;
526 }
527 }
528 #if 0
529 printf("current=%d max=%d max_sb=%d alloc=%d\n",
530 current_frame_size, max_frame_size, max_sb,
531 bit_alloc[max_sb]);
532 #endif
533 if (max_sb < 0)
534 break;
535
536 /* find alloc table entry (XXX: not optimal, should use
537 pointer table) */
538 alloc = s->alloc_table;
539 for(i=0;i<max_sb;i++) {
540 alloc += 1 << alloc[0];
541 }
542
543 if (subband_status[max_sb] == SB_NOTALLOCATED) {
544 /* nothing was coded for this band: add the necessary bits */
545 incr = 2 + nb_scale_factors[s->scale_code[max_sb]] * 6;
546 incr += total_quant_bits[alloc[1]];
547 } else {
548 /* increments bit allocation */
549 b = bit_alloc[max_sb];
550 incr = total_quant_bits[alloc[b + 1]] -
551 total_quant_bits[alloc[b]];
552 }
553
554 if (current_frame_size + incr <= max_frame_size) {
555 /* can increase size */
556 b = ++bit_alloc[max_sb];
557 current_frame_size += incr;
558 /* decrease smr by the resolution we added */
559 smr[max_sb] = smr1[max_sb] - quant_snr[alloc[b]];
560 /* max allocation size reached ? */
561 if (b == ((1 << alloc[0]) - 1))
562 subband_status[max_sb] = SB_NOMORE;
563 else
564 subband_status[max_sb] = SB_ALLOCATED;
565 } else {
566 /* cannot increase the size of this subband */
567 subband_status[max_sb] = SB_NOMORE;
568 }
569 }
570 *padding = max_frame_size - current_frame_size;
571 assert(*padding >= 0);
572
573 #if 0
574 for(i=0;i<s->sblimit;i++) {
575 printf("%d ", bit_alloc[i]);
576 }
577 printf("\n");
578 #endif
579 }
580
581 /*
582 * Output the mpeg audio layer 2 frame. Note how the code is small
583 * compared to other encoders :-)
584 */
585 static void encode_frame(MpegAudioContext *s,
586 unsigned char bit_alloc[SBLIMIT],
587 int padding)
588 {
589 int i, j, k, l, bit_alloc_bits, b;
590 unsigned char *sf;
591 int q[3];
592 PutBitContext *p = &s->pb;
593
594 /* header */
595
596 put_bits(p, 12, 0xfff);
597 put_bits(p, 1, 1 - s->lsf); /* 1 = mpeg1 ID, 0 = mpeg2 lsf ID */
598 put_bits(p, 2, 4-2); /* layer 2 */
599 put_bits(p, 1, 1); /* no error protection */
600 put_bits(p, 4, s->bitrate_index);
601 put_bits(p, 2, s->freq_index);
602 put_bits(p, 1, s->do_padding); /* use padding */
603 put_bits(p, 1, 0); /* private_bit */
604 put_bits(p, 2, MPA_MONO);
605 put_bits(p, 2, 0); /* mode_ext */
606 put_bits(p, 1, 0); /* no copyright */
607 put_bits(p, 1, 1); /* original */
608 put_bits(p, 2, 0); /* no emphasis */
609
610 /* bit allocation */
611 j = 0;
612 for(i=0;i<s->sblimit;i++) {
613 bit_alloc_bits = s->alloc_table[j];
614 put_bits(p, bit_alloc_bits, bit_alloc[i]);
615 j += 1 << bit_alloc_bits;
616 }
617
618 /* scale codes */
619 for(i=0;i<s->sblimit;i++) {
620 if (bit_alloc[i])
621 put_bits(p, 2, s->scale_code[i]);
622 }
623
624 /* scale factors */
625 sf = &s->scale_factors[0][0];
626 for(i=0;i<s->sblimit;i++) {
627 if (bit_alloc[i]) {
628 switch(s->scale_code[i]) {
629 case 0:
630 put_bits(p, 6, sf[0]);
631 put_bits(p, 6, sf[1]);
632 put_bits(p, 6, sf[2]);
633 break;
634 case 3:
635 case 1:
636 put_bits(p, 6, sf[0]);
637 put_bits(p, 6, sf[2]);
638 break;
639 case 2:
640 put_bits(p, 6, sf[0]);
641 break;
642 }
643 }
644 sf += 3;
645 }
646
647 /* quantization & write sub band samples */
648
649 for(k=0;k<3;k++) {
650 for(l=0;l<12;l+=3) {
651 j = 0;
652 for(i=0;i<s->sblimit;i++) {
653 bit_alloc_bits = s->alloc_table[j];
654 b = bit_alloc[i];
655 if (b) {
656 int qindex, steps, m, sample, bits;
657 /* we encode 3 sub band samples of the same sub band at a time */
658 qindex = s->alloc_table[j+b];
659 steps = quant_steps[qindex];
660 for(m=0;m<3;m++) {
661 sample = s->sb_samples[k][l + m][i];
662 /* divide by scale factor */
663 #ifdef USE_FLOATS
664 {
665 float a;
666 a = (float)sample * scale_factor_inv_table[s->scale_factors[i][k]];
667 q[m] = (int)((a + 1.0) * steps * 0.5);
668 }
669 #else
670 {
671 int q1, e, shift, mult;
672 e = s->scale_factors[i][k];
673 shift = scale_factor_shift[e];
674 mult = scale_factor_mult[e];
675
676 /* normalize to P bits */
677 if (shift < 0)
678 q1 = sample << (-shift);
679 else
680 q1 = sample >> shift;
681 q1 = (q1 * mult) >> P;
682 q[m] = ((q1 + (1 << P)) * steps) >> (P + 1);
683 }
684 #endif
685 if (q[m] >= steps)
686 q[m] = steps - 1;
687 assert(q[m] >= 0 && q[m] < steps);
688 }
689 bits = quant_bits[qindex];
690 if (bits < 0) {
691 /* group the 3 values to save bits */
692 put_bits(p, -bits,
693 q[0] + steps * (q[1] + steps * q[2]));
694 #if 0
695 printf("%d: gr1 %d\n",
696 i, q[0] + steps * (q[1] + steps * q[2]));
697 #endif
698 } else {
699 #if 0
700 printf("%d: gr3 %d %d %d\n",
701 i, q[0], q[1], q[2]);
702 #endif
703 put_bits(p, bits, q[0]);
704 put_bits(p, bits, q[1]);
705 put_bits(p, bits, q[2]);
706 }
707 }
708 /* next subband in alloc table */
709 j += 1 << bit_alloc_bits;
710 }
711 }
712 }
713
714 /* padding */
715 for(i=0;i<padding;i++)
716 put_bits(p, 1, 0);
717
718 /* flush */
719 flush_put_bits(p);
720 }
721
722 int MPA_encode_frame(AVEncodeContext *avctx,
723 unsigned char *frame, int buf_size, void *data)
724 {
725 MpegAudioContext *s = avctx->priv_data;
726 short *samples = data;
727 short smr[SBLIMIT];
728 unsigned char bit_alloc[SBLIMIT];
729 int padding;
730
731 filter(s, samples);
732 compute_scale_factors(s->scale_code, s->scale_factors,
733 s->sb_samples, s->sblimit);
734 psycho_acoustic_model(s, smr);
735 compute_bit_allocation(s, smr, bit_alloc, &padding);
736
737 init_put_bits(&s->pb, frame, MPA_MAX_CODED_FRAME_SIZE, NULL, NULL);
738
739 encode_frame(s, bit_alloc, padding);
740
741 s->nb_samples += MPA_FRAME_SIZE;
742 return s->pb.buf_ptr - s->pb.buf;
743 }
744
745
746 AVEncoder mp2_encoder = {
747 "mp2",
748 CODEC_TYPE_AUDIO,
749 CODEC_ID_MP2,
750 sizeof(MpegAudioContext),
751 MPA_encode_init,
752 MPA_encode_frame,
753 NULL,
754 };