new opensource lossy/lossless audio codec based on speech compression techniques...
[libav.git] / libavcodec / sonic.c
1 /*
2 * Simple free lossless/lossy audio codec
3 * Copyright (c) 2004 Alex Beregszaszi
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19 #include "avcodec.h"
20 #include "golomb.h"
21
22 /**
23 * @file sonic.c
24 * Simple free lossless/lossy audio codec
25 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
26 * Written and designed by Alex Beregszaszi
27 *
28 * TODO:
29 * - CABAC put/get_symbol
30 * - independent quantizer for channels
31 * - >2 channels support
32 * - more decorrelation types
33 * - more tap_quant tests
34 * - selectable intlist writers/readers (bonk-style, golomb, cabac)
35 */
36
37 #define MAX_CHANNELS 2
38
39 typedef struct SonicContext {
40 int lossless, mid_side;
41
42 int num_taps, downsampling;
43 double quantization;
44
45 int channels, samplerate, block_align, frame_size;
46
47 int *tap_quant;
48 int *int_samples;
49 int *coded_samples[MAX_CHANNELS];
50
51 // for encoding
52 int *tail;
53 int tail_size;
54 int *window;
55 int window_size;
56
57 // for decoding
58 int *predictor_k;
59 int *predictor_state[MAX_CHANNELS];
60 } SonicContext;
61
62 #define LATTICE_SHIFT 10
63 #define SAMPLE_SHIFT 4
64 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
65 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
66
67 #define BASE_QUANT 0.6
68 #define RATE_VARIATION 3.0
69
70 static inline int divide(int a, int b)
71 {
72 if (a < 0)
73 return -( (-a + b/2)/b );
74 else
75 return (a + b/2)/b;
76 }
77
78 static inline int shift(int a,int b)
79 {
80 return (a+(1<<(b-1))) >> b;
81 }
82
83 static inline int shift_down(int a,int b)
84 {
85 return (a>>b)+((a<0)?1:0);
86 }
87
88 #if 1
89 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
90 {
91 int i;
92
93 for (i = 0; i < entries; i++)
94 set_se_golomb(pb, buf[i]);
95
96 return 1;
97 }
98
99 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
100 {
101 int i;
102
103 for (i = 0; i < entries; i++)
104 buf[i] = get_se_golomb(gb);
105
106 return 1;
107 }
108
109 #else
110
111 #define ADAPT_LEVEL 8
112
113 static int bits_to_store(uint64_t x)
114 {
115 int res = 0;
116
117 while(x)
118 {
119 res++;
120 x >>= 1;
121 }
122 return res;
123 }
124
125 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
126 {
127 int i, bits;
128
129 if (!max)
130 return;
131
132 bits = bits_to_store(max);
133
134 for (i = 0; i < bits-1; i++)
135 put_bits(pb, 1, value & (1 << i));
136
137 if ( (value | (1 << (bits-1))) <= max)
138 put_bits(pb, 1, value & (1 << (bits-1)));
139 }
140
141 static unsigned int read_uint_max(GetBitContext *gb, int max)
142 {
143 int i, bits, value = 0;
144
145 if (!max)
146 return 0;
147
148 bits = bits_to_store(max);
149
150 for (i = 0; i < bits-1; i++)
151 if (get_bits1(gb))
152 value += 1 << i;
153
154 if ( (value | (1<<(bits-1))) <= max)
155 if (get_bits1(gb))
156 value += 1 << (bits-1);
157
158 return value;
159 }
160
161 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
162 {
163 int i, j, x = 0, low_bits = 0, max = 0;
164 int step = 256, pos = 0, dominant = 0, any = 0;
165 int *copy, *bits;
166
167 copy = av_mallocz(4* entries);
168 if (!copy)
169 return -1;
170
171 if (base_2_part)
172 {
173 int energy = 0;
174
175 for (i = 0; i < entries; i++)
176 energy += abs(buf[i]);
177
178 low_bits = bits_to_store(energy / (entries * 2));
179 if (low_bits > 15)
180 low_bits = 15;
181
182 put_bits(pb, 4, low_bits);
183 }
184
185 for (i = 0; i < entries; i++)
186 {
187 put_bits(pb, low_bits, abs(buf[i]));
188 copy[i] = abs(buf[i]) >> low_bits;
189 if (copy[i] > max)
190 max = abs(copy[i]);
191 }
192
193 bits = av_mallocz(4* entries*max);
194 if (!bits)
195 {
196 // av_free(copy);
197 return -1;
198 }
199
200 for (i = 0; i <= max; i++)
201 {
202 for (j = 0; j < entries; j++)
203 if (copy[j] >= i)
204 bits[x++] = copy[j] > i;
205 }
206
207 // store bitstream
208 while (pos < x)
209 {
210 int steplet = step >> 8;
211
212 if (pos + steplet > x)
213 steplet = x - pos;
214
215 for (i = 0; i < steplet; i++)
216 if (bits[i+pos] != dominant)
217 any = 1;
218
219 put_bits(pb, 1, any);
220
221 if (!any)
222 {
223 pos += steplet;
224 step += step / ADAPT_LEVEL;
225 }
226 else
227 {
228 int interloper = 0;
229
230 while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
231 interloper++;
232
233 // note change
234 write_uint_max(pb, interloper, (step >> 8) - 1);
235
236 pos += interloper + 1;
237 step -= step / ADAPT_LEVEL;
238 }
239
240 if (step < 256)
241 {
242 step = 65536 / step;
243 dominant = !dominant;
244 }
245 }
246
247 // store signs
248 for (i = 0; i < entries; i++)
249 if (buf[i])
250 put_bits(pb, 1, buf[i] < 0);
251
252 // av_free(bits);
253 // av_free(copy);
254
255 return 0;
256 }
257
258 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
259 {
260 int i, low_bits = 0, x = 0;
261 int n_zeros = 0, step = 256, dominant = 0;
262 int pos = 0, level = 0;
263 int *bits = av_mallocz(4* entries);
264
265 if (!bits)
266 return -1;
267
268 if (base_2_part)
269 {
270 low_bits = get_bits(gb, 4);
271
272 if (low_bits)
273 for (i = 0; i < entries; i++)
274 buf[i] = get_bits(gb, low_bits);
275 }
276
277 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
278
279 while (n_zeros < entries)
280 {
281 int steplet = step >> 8;
282
283 if (!get_bits1(gb))
284 {
285 for (i = 0; i < steplet; i++)
286 bits[x++] = dominant;
287
288 if (!dominant)
289 n_zeros += steplet;
290
291 step += step / ADAPT_LEVEL;
292 }
293 else
294 {
295 int actual_run = read_uint_max(gb, steplet-1);
296
297 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
298
299 for (i = 0; i < actual_run; i++)
300 bits[x++] = dominant;
301
302 bits[x++] = !dominant;
303
304 if (!dominant)
305 n_zeros += actual_run;
306 else
307 n_zeros++;
308
309 step -= step / ADAPT_LEVEL;
310 }
311
312 if (step < 256)
313 {
314 step = 65536 / step;
315 dominant = !dominant;
316 }
317 }
318
319 // reconstruct unsigned values
320 n_zeros = 0;
321 for (i = 0; n_zeros < entries; i++)
322 {
323 while(1)
324 {
325 if (pos >= entries)
326 {
327 pos = 0;
328 level += 1 << low_bits;
329 }
330
331 if (buf[pos] >= level)
332 break;
333
334 pos++;
335 }
336
337 if (bits[i])
338 buf[pos] += 1 << low_bits;
339 else
340 n_zeros++;
341
342 pos++;
343 }
344 // av_free(bits);
345
346 // read signs
347 for (i = 0; i < entries; i++)
348 if (buf[i] && get_bits1(gb))
349 buf[i] = -buf[i];
350
351 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
352
353 return 0;
354 }
355 #endif
356
357 static void predictor_init_state(int *k, int *state, int order)
358 {
359 int i;
360
361 for (i = order-2; i >= 0; i--)
362 {
363 int j, p, x = state[i];
364
365 for (j = 0, p = i+1; p < order; j++,p++)
366 {
367 int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
368 state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
369 x = tmp;
370 }
371 }
372 }
373
374 static int predictor_calc_error(int *k, int *state, int order, int error)
375 {
376 int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
377
378 #if 1
379 int *k_ptr = &(k[order-2]),
380 *state_ptr = &(state[order-2]);
381 for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
382 {
383 int k_value = *k_ptr, state_value = *state_ptr;
384 x -= shift_down(k_value * state_value, LATTICE_SHIFT);
385 state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
386 }
387 #else
388 for (i = order-2; i >= 0; i--)
389 {
390 x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
391 state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
392 }
393 #endif
394
395 // don't drift too far, to avoid overflows
396 if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
397 if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
398
399 state[0] = x;
400
401 return x;
402 }
403
404 // Heavily modified Levinson-Durbin algorithm which
405 // copes better with quantization, and calculates the
406 // actual whitened result as it goes.
407
408 static void modified_levinson_durbin(int *window, int window_entries,
409 int *out, int out_entries, int channels, int *tap_quant)
410 {
411 int i;
412 int *state = av_mallocz(4* window_entries);
413
414 memcpy(state, window, 4* window_entries);
415
416 for (i = 0; i < out_entries; i++)
417 {
418 int step = (i+1)*channels, k, j;
419 double xx = 0.0, xy = 0.0;
420 #if 1
421 int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
422 j = window_entries - step;
423 for (;j>=0;j--,x_ptr++,state_ptr++)
424 {
425 double x_value = *x_ptr, state_value = *state_ptr;
426 xx += state_value*state_value;
427 xy += x_value*state_value;
428 }
429 #else
430 for (j = 0; j <= (window_entries - step); j++);
431 {
432 double stepval = window[step+j], stateval = window[j];
433 // xx += (double)window[j]*(double)window[j];
434 // xy += (double)window[step+j]*(double)window[j];
435 xx += stateval*stateval;
436 xy += stepval*stateval;
437 }
438 #endif
439 if (xx == 0.0)
440 k = 0;
441 else
442 k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
443
444 if (k > (LATTICE_FACTOR/tap_quant[i]))
445 k = LATTICE_FACTOR/tap_quant[i];
446 if (-k > (LATTICE_FACTOR/tap_quant[i]))
447 k = -(LATTICE_FACTOR/tap_quant[i]);
448
449 out[i] = k;
450 k *= tap_quant[i];
451
452 #if 1
453 x_ptr = &(window[step]);
454 state_ptr = &(state[0]);
455 j = window_entries - step;
456 for (;j>=0;j--,x_ptr++,state_ptr++)
457 {
458 int x_value = *x_ptr, state_value = *state_ptr;
459 *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
460 *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
461 }
462 #else
463 for (j=0; j <= (window_entries - step); j++)
464 {
465 int stepval = window[step+j], stateval=state[j];
466 window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
467 state[j] += shift_down(k * stepval, LATTICE_SHIFT);
468 }
469 #endif
470 }
471
472 av_free(state);
473 }
474
475 static int samplerate_table[] =
476 { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
477
478 #ifdef CONFIG_ENCODERS
479
480 static inline int code_samplerate(int samplerate)
481 {
482 switch (samplerate)
483 {
484 case 44100: return 0;
485 case 22050: return 1;
486 case 11025: return 2;
487 case 96000: return 3;
488 case 48000: return 4;
489 case 32000: return 5;
490 case 24000: return 6;
491 case 16000: return 7;
492 case 8000: return 8;
493 }
494 return -1;
495 }
496
497 static int sonic_encode_init(AVCodecContext *avctx)
498 {
499 SonicContext *s = avctx->priv_data;
500 PutBitContext pb;
501 int i, version = 0;
502
503 if (avctx->channels > MAX_CHANNELS)
504 return -1; /* only stereo or mono for now */
505
506 if (avctx->channels == 2)
507 s->mid_side = 1;
508 if (avctx->codec->id == CODEC_ID_SONIC_LS)
509 {
510 s->lossless = 1;
511 s->num_taps = 32;
512 s->downsampling = 1;
513 s->quantization = 0.0;
514 }
515 else
516 {
517 s->num_taps = 128;
518 s->downsampling = 2;
519 s->quantization = 1.0;
520 }
521
522 // max tap 2048
523 if ((s->num_taps < 32) || (s->num_taps > 1024) ||
524 ((s->num_taps>>5)<<5 != s->num_taps))
525 {
526 av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
527 return -1;
528 }
529
530 // generate taps
531 s->tap_quant = av_mallocz(4* s->num_taps);
532 for (i = 0; i < s->num_taps; i++)
533 s->tap_quant[i] = (int)(sqrt(i+1));
534
535 s->channels = avctx->channels;
536 s->samplerate = avctx->sample_rate;
537
538 s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
539 s->frame_size = s->channels*s->block_align*s->downsampling;
540
541 s->tail = av_mallocz(4* s->num_taps*s->channels);
542 if (!s->tail)
543 return -1;
544 s->tail_size = s->num_taps*s->channels;
545
546 s->predictor_k = av_mallocz(4 * s->num_taps);
547 if (!s->predictor_k)
548 return -1;
549
550 for (i = 0; i < s->channels; i++)
551 {
552 s->coded_samples[i] = av_mallocz(4* s->block_align);
553 if (!s->coded_samples[i])
554 return -1;
555 }
556
557 s->int_samples = av_mallocz(4* s->frame_size);
558
559 s->window_size = ((2*s->tail_size)+s->frame_size);
560 s->window = av_mallocz(4* s->window_size);
561 if (!s->window)
562 return -1;
563
564 avctx->extradata = av_mallocz(16);
565 if (!avctx->extradata)
566 return -1;
567 init_put_bits(&pb, avctx->extradata, 16*8);
568
569 put_bits(&pb, 2, version); // version
570 if (version == 1)
571 {
572 put_bits(&pb, 2, s->channels);
573 put_bits(&pb, 4, code_samplerate(s->samplerate));
574 }
575 put_bits(&pb, 1, s->lossless);
576 if (!s->lossless)
577 put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
578 put_bits(&pb, 1, s->mid_side);
579 put_bits(&pb, 2, s->downsampling);
580 put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
581 put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
582
583 flush_put_bits(&pb);
584 avctx->extradata_size = put_bits_count(&pb)/8;
585
586 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d ms: %d taps: %d block: %d frame: %d downsamp: %d\n",
587 version, s->lossless, s->mid_side, s->num_taps, s->block_align, s->frame_size, s->downsampling);
588
589 avctx->coded_frame = avcodec_alloc_frame();
590 if (!avctx->coded_frame)
591 return -ENOMEM;
592 avctx->coded_frame->key_frame = 1;
593 avctx->frame_size = s->block_align*s->downsampling;
594
595 return 0;
596 }
597
598 static int sonic_encode_close(AVCodecContext *avctx)
599 {
600 SonicContext *s = avctx->priv_data;
601 int i;
602
603 av_freep(&avctx->coded_frame);
604
605 for (i = 0; i < s->channels; i++)
606 av_free(s->coded_samples[i]);
607
608 av_free(s->predictor_k);
609 av_free(s->tail);
610 av_free(s->tap_quant);
611 av_free(s->window);
612 av_free(s->int_samples);
613
614 return 0;
615 }
616
617 static int sonic_encode_frame(AVCodecContext *avctx,
618 uint8_t *buf, int buf_size, void *data)
619 {
620 SonicContext *s = avctx->priv_data;
621 PutBitContext pb;
622 int i, j, ch, quant = 0, x = 0;
623 short *samples = data;
624
625 init_put_bits(&pb, buf, buf_size*8);
626
627 // short -> internal
628 for (i = 0; i < s->frame_size; i++)
629 {
630 if (samples[i] < 0)
631 s->int_samples[i] = samples[i]+32768;
632 else
633 s->int_samples[i] = samples[i]-32768;
634 }
635
636 if (!s->lossless)
637 for (i = 0; i < s->frame_size; i++)
638 s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
639
640 if (s->mid_side)
641 for (i = 0; i < s->frame_size; i += s->channels)
642 {
643 s->int_samples[i] += s->int_samples[i+1];
644 s->int_samples[i+1] -= shift(s->int_samples[i], 1);
645 }
646
647 memset(s->window, 0, 4* s->window_size);
648
649 for (i = 0; i < s->tail_size; i++)
650 s->window[x++] = s->tail[i];
651
652 for (i = 0; i < s->frame_size; i++)
653 s->window[x++] = s->int_samples[i];
654
655 for (i = 0; i < s->tail_size; i++)
656 s->window[x++] = 0;
657
658 for (i = 0; i < s->tail_size; i++)
659 s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
660
661 // generate taps
662 modified_levinson_durbin(s->window, s->window_size,
663 s->predictor_k, s->num_taps, s->channels, s->tap_quant);
664 if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
665 return -1;
666
667 for (ch = 0; ch < s->channels; ch++)
668 {
669 x = s->tail_size+ch;
670 for (i = 0; i < s->block_align; i++)
671 {
672 int sum = 0;
673 for (j = 0; j < s->downsampling; j++, x += s->channels)
674 sum += s->window[x];
675 s->coded_samples[ch][i] = sum;
676 }
677 }
678
679 // simple rate control code
680 if (!s->lossless)
681 {
682 double energy1 = 0.0, energy2 = 0.0;
683 for (ch = 0; ch < s->channels; ch++)
684 {
685 for (i = 0; i < s->block_align; i++)
686 {
687 double sample = s->coded_samples[ch][i];
688 energy2 += sample*sample;
689 energy1 += fabs(sample);
690 }
691 }
692
693 energy2 = sqrt(energy2/(s->channels*s->block_align));
694 energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
695
696 // increase bitrate when samples are like a gaussian distribution
697 // reduce bitrate when samples are like a two-tailed exponential distribution
698
699 if (energy2 > energy1)
700 energy2 += (energy2-energy1)*RATE_VARIATION;
701
702 quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
703 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
704
705 if (quant < 1)
706 quant = 1;
707 if (quant > 65535)
708 quant = 65535;
709
710 set_ue_golomb(&pb, quant);
711
712 quant *= SAMPLE_FACTOR;
713 }
714
715 // write out coded samples
716 for (ch = 0; ch < s->channels; ch++)
717 {
718 if (!s->lossless)
719 for (i = 0; i < s->block_align; i++)
720 s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
721
722 if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
723 return -1;
724 }
725
726 // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
727
728 flush_put_bits(&pb);
729 return (put_bits_count(&pb)+7)/8;
730 }
731 #endif //CONFIG_ENCODERS
732
733 static int sonic_decode_init(AVCodecContext *avctx)
734 {
735 SonicContext *s = avctx->priv_data;
736 GetBitContext gb;
737 int i, version;
738
739 s->channels = avctx->channels;
740 s->samplerate = avctx->sample_rate;
741
742 if (!avctx->extradata)
743 {
744 av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
745 return -1;
746 }
747
748 init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
749
750 version = get_bits(&gb, 2);
751 if (version > 1)
752 {
753 av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
754 return -1;
755 }
756
757 if (version == 1)
758 {
759 s->channels = get_bits(&gb, 2);
760 s->samplerate = samplerate_table[get_bits(&gb, 4)];
761 av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
762 s->channels, s->samplerate);
763 }
764
765 if (s->channels > MAX_CHANNELS)
766 {
767 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
768 return -1;
769 }
770
771 s->lossless = get_bits1(&gb);
772 if (!s->lossless)
773 skip_bits(&gb, 3); // XXX FIXME
774 s->mid_side = get_bits1(&gb);
775
776 s->downsampling = get_bits(&gb, 2);
777 s->num_taps = (get_bits(&gb, 5)+1)<<5;
778 if (get_bits1(&gb)) // XXX FIXME
779 av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
780
781 s->block_align = (int)(2048.0*(s->samplerate/44100))/s->downsampling;
782 s->frame_size = s->channels*s->block_align*s->downsampling;
783 // avctx->frame_size = s->block_align;
784
785 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d ms: %d taps: %d block: %d frame: %d downsamp: %d\n",
786 version, s->lossless, s->mid_side, s->num_taps, s->block_align, s->frame_size, s->downsampling);
787
788 // generate taps
789 s->tap_quant = av_mallocz(4* s->num_taps);
790 for (i = 0; i < s->num_taps; i++)
791 s->tap_quant[i] = (int)(sqrt(i+1));
792
793 s->predictor_k = av_mallocz(4* s->num_taps);
794
795 for (i = 0; i < s->channels; i++)
796 {
797 s->predictor_state[i] = av_mallocz(4* s->num_taps);
798 if (!s->predictor_state[i])
799 return -1;
800 }
801
802 for (i = 0; i < s->channels; i++)
803 {
804 s->coded_samples[i] = av_mallocz(4* s->block_align);
805 if (!s->coded_samples[i])
806 return -1;
807 }
808 s->int_samples = av_mallocz(4* s->frame_size);
809
810 return 0;
811 }
812
813 static int sonic_decode_close(AVCodecContext *avctx)
814 {
815 SonicContext *s = avctx->priv_data;
816 int i;
817
818 av_free(s->int_samples);
819 av_free(s->tap_quant);
820 av_free(s->predictor_k);
821
822 for (i = 0; i < s->channels; i++)
823 {
824 av_free(s->predictor_state[i]);
825 av_free(s->coded_samples[i]);
826 }
827
828 return 0;
829 }
830
831 static int sonic_decode_frame(AVCodecContext *avctx,
832 int16_t *data, int *data_size,
833 uint8_t *buf, int buf_size)
834 {
835 SonicContext *s = avctx->priv_data;
836 GetBitContext gb;
837 int i, quant, ch, j;
838 short *samples = data;
839
840 if (buf_size == 0) return 0;
841
842 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
843
844 init_get_bits(&gb, buf, buf_size*8);
845
846 intlist_read(&gb, s->predictor_k, s->num_taps, 0);
847
848 // dequantize
849 for (i = 0; i < s->num_taps; i++)
850 s->predictor_k[i] *= s->tap_quant[i];
851
852 if (s->lossless)
853 quant = 1;
854 else
855 quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
856
857 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
858
859 for (ch = 0; ch < s->channels; ch++)
860 {
861 int x = ch;
862
863 predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
864
865 intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
866
867 for (i = 0; i < s->block_align; i++)
868 {
869 for (j = 0; j < s->downsampling - 1; j++)
870 {
871 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
872 x += s->channels;
873 }
874
875 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
876 x += s->channels;
877 }
878
879 for (i = 0; i < s->num_taps; i++)
880 s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
881 }
882
883 if (s->mid_side)
884 for (i = 0; i < s->frame_size; i += s->channels)
885 {
886 s->int_samples[i+1] += shift(s->int_samples[i], 1);
887 s->int_samples[i] -= s->int_samples[i+1];
888 }
889
890 if (!s->lossless)
891 for (i = 0; i < s->frame_size; i++)
892 s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
893
894 // internal -> short
895 for (i = 0; i < s->frame_size; i++)
896 {
897 if (s->int_samples[i] > 32767)
898 samples[i] = 32767;
899 else if (s->int_samples[i] < -32768)
900 samples[i] = -32768;
901 else
902 samples[i] = s->int_samples[i];
903 }
904
905 align_get_bits(&gb);
906
907 // if (buf_size != (get_bits_count(&gb)+7)/8)
908 // av_log(NULL, AV_LOG_INFO, "buf_size (%d) and used bytes (%d) differs\n", buf_size, (get_bits_count(&gb)+7)/8);
909
910 *data_size = s->frame_size * 2;
911
912 return (get_bits_count(&gb)+7)/8;
913 }
914
915 #ifdef CONFIG_ENCODERS
916 AVCodec sonic_encoder = {
917 "sonic",
918 CODEC_TYPE_AUDIO,
919 CODEC_ID_SONIC,
920 sizeof(SonicContext),
921 sonic_encode_init,
922 sonic_encode_frame,
923 sonic_encode_close,
924 NULL,
925 };
926
927 AVCodec sonic_ls_encoder = {
928 "sonicls",
929 CODEC_TYPE_AUDIO,
930 CODEC_ID_SONIC_LS,
931 sizeof(SonicContext),
932 sonic_encode_init,
933 sonic_encode_frame,
934 sonic_encode_close,
935 NULL,
936 };
937 #endif
938
939 #ifdef CONFIG_DECODERS
940 AVCodec sonic_decoder = {
941 "sonic",
942 CODEC_TYPE_AUDIO,
943 CODEC_ID_SONIC,
944 sizeof(SonicContext),
945 sonic_decode_init,
946 NULL,
947 sonic_decode_close,
948 sonic_decode_frame,
949 };
950 #endif