96a2bd6535d912ec3389e7762b230d40b23525e5
[libav.git] / libavcodec / g722.c
1 /*
2 * G.722 ADPCM audio encoder/decoder
3 *
4 * Copyright (c) CMU 1993 Computer Science, Speech Group
5 * Chengxiang Lu and Alex Hauptmann
6 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
7 * Copyright (c) 2009 Kenan Gillet
8 * Copyright (c) 2010 Martin Storsjo
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27 /**
28 * @file
29 *
30 * G.722 ADPCM audio codec
31 *
32 * This G.722 decoder is a bit-exact implementation of the ITU G.722
33 * specification for all three specified bitrates - 64000bps, 56000bps
34 * and 48000bps. It passes the ITU tests.
35 *
36 * @note For the 56000bps and 48000bps bitrates, the lowest 1 or 2 bits
37 * respectively of each byte are ignored.
38 */
39
40 #include "avcodec.h"
41 #include "mathops.h"
42 #include "get_bits.h"
43
44 #define PREV_SAMPLES_BUF_SIZE 1024
45
46 #define FREEZE_INTERVAL 128
47
48 typedef struct {
49 int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]; ///< memory of past decoded samples
50 int prev_samples_pos; ///< the number of values in prev_samples
51
52 /**
53 * The band[0] and band[1] correspond respectively to the lower band and higher band.
54 */
55 struct G722Band {
56 int16_t s_predictor; ///< predictor output value
57 int32_t s_zero; ///< previous output signal from zero predictor
58 int8_t part_reconst_mem[2]; ///< signs of previous partially reconstructed signals
59 int16_t prev_qtzd_reconst; ///< previous quantized reconstructed signal (internal value, using low_inv_quant4)
60 int16_t pole_mem[2]; ///< second-order pole section coefficient buffer
61 int32_t diff_mem[6]; ///< quantizer difference signal memory
62 int16_t zero_mem[6]; ///< Seventh-order zero section coefficient buffer
63 int16_t log_factor; ///< delayed 2-logarithmic quantizer factor
64 int16_t scale_factor; ///< delayed quantizer scale factor
65 } band[2];
66
67 struct TrellisNode {
68 struct G722Band state;
69 uint32_t ssd;
70 int path;
71 } *node_buf[2], **nodep_buf[2];
72
73 struct TrellisPath {
74 int value;
75 int prev;
76 } *paths[2];
77 } G722Context;
78
79
80 static const int8_t sign_lookup[2] = { -1, 1 };
81
82 static const int16_t inv_log2_table[32] = {
83 2048, 2093, 2139, 2186, 2233, 2282, 2332, 2383,
84 2435, 2489, 2543, 2599, 2656, 2714, 2774, 2834,
85 2896, 2960, 3025, 3091, 3158, 3228, 3298, 3371,
86 3444, 3520, 3597, 3676, 3756, 3838, 3922, 4008
87 };
88 static const int16_t high_log_factor_step[2] = { 798, -214 };
89 static const int16_t high_inv_quant[4] = { -926, -202, 926, 202 };
90 /**
91 * low_log_factor_step[index] == wl[rl42[index]]
92 */
93 static const int16_t low_log_factor_step[16] = {
94 -60, 3042, 1198, 538, 334, 172, 58, -30,
95 3042, 1198, 538, 334, 172, 58, -30, -60
96 };
97 static const int16_t low_inv_quant4[16] = {
98 0, -2557, -1612, -1121, -786, -530, -323, -150,
99 2557, 1612, 1121, 786, 530, 323, 150, 0
100 };
101 static const int16_t low_inv_quant6[64] = {
102 -17, -17, -17, -17, -3101, -2738, -2376, -2088,
103 -1873, -1689, -1535, -1399, -1279, -1170, -1072, -982,
104 -899, -822, -750, -682, -618, -558, -501, -447,
105 -396, -347, -300, -254, -211, -170, -130, -91,
106 3101, 2738, 2376, 2088, 1873, 1689, 1535, 1399,
107 1279, 1170, 1072, 982, 899, 822, 750, 682,
108 618, 558, 501, 447, 396, 347, 300, 254,
109 211, 170, 130, 91, 54, 17, -54, -17
110 };
111
112 /**
113 * quadrature mirror filter (QMF) coefficients
114 *
115 * ITU-T G.722 Table 11
116 */
117 static const int16_t qmf_coeffs[12] = {
118 3, -11, 12, 32, -210, 951, 3876, -805, 362, -156, 53, -11,
119 };
120
121
122 /**
123 * adaptive predictor
124 *
125 * @param cur_diff the dequantized and scaled delta calculated from the
126 * current codeword
127 */
128 static void do_adaptive_prediction(struct G722Band *band, const int cur_diff)
129 {
130 int sg[2], limit, i, cur_qtzd_reconst;
131
132 const int cur_part_reconst = band->s_zero + cur_diff < 0;
133
134 sg[0] = sign_lookup[cur_part_reconst != band->part_reconst_mem[0]];
135 sg[1] = sign_lookup[cur_part_reconst == band->part_reconst_mem[1]];
136 band->part_reconst_mem[1] = band->part_reconst_mem[0];
137 band->part_reconst_mem[0] = cur_part_reconst;
138
139 band->pole_mem[1] = av_clip((sg[0] * av_clip(band->pole_mem[0], -8191, 8191) >> 5) +
140 (sg[1] << 7) + (band->pole_mem[1] * 127 >> 7), -12288, 12288);
141
142 limit = 15360 - band->pole_mem[1];
143 band->pole_mem[0] = av_clip(-192 * sg[0] + (band->pole_mem[0] * 255 >> 8), -limit, limit);
144
145
146 if (cur_diff) {
147 for (i = 0; i < 6; i++)
148 band->zero_mem[i] = ((band->zero_mem[i]*255) >> 8) +
149 ((band->diff_mem[i]^cur_diff) < 0 ? -128 : 128);
150 } else
151 for (i = 0; i < 6; i++)
152 band->zero_mem[i] = (band->zero_mem[i]*255) >> 8;
153
154 for (i = 5; i > 0; i--)
155 band->diff_mem[i] = band->diff_mem[i-1];
156 band->diff_mem[0] = av_clip_int16(cur_diff << 1);
157
158 band->s_zero = 0;
159 for (i = 5; i >= 0; i--)
160 band->s_zero += (band->zero_mem[i]*band->diff_mem[i]) >> 15;
161
162
163 cur_qtzd_reconst = av_clip_int16((band->s_predictor + cur_diff) << 1);
164 band->s_predictor = av_clip_int16(band->s_zero +
165 (band->pole_mem[0] * cur_qtzd_reconst >> 15) +
166 (band->pole_mem[1] * band->prev_qtzd_reconst >> 15));
167 band->prev_qtzd_reconst = cur_qtzd_reconst;
168 }
169
170 static int inline linear_scale_factor(const int log_factor)
171 {
172 const int wd1 = inv_log2_table[(log_factor >> 6) & 31];
173 const int shift = log_factor >> 11;
174 return shift < 0 ? wd1 >> -shift : wd1 << shift;
175 }
176
177 static void update_low_predictor(struct G722Band *band, const int ilow)
178 {
179 do_adaptive_prediction(band,
180 band->scale_factor * low_inv_quant4[ilow] >> 10);
181
182 // quantizer adaptation
183 band->log_factor = av_clip((band->log_factor * 127 >> 7) +
184 low_log_factor_step[ilow], 0, 18432);
185 band->scale_factor = linear_scale_factor(band->log_factor - (8 << 11));
186 }
187
188 static void update_high_predictor(struct G722Band *band, const int dhigh,
189 const int ihigh)
190 {
191 do_adaptive_prediction(band, dhigh);
192
193 // quantizer adaptation
194 band->log_factor = av_clip((band->log_factor * 127 >> 7) +
195 high_log_factor_step[ihigh&1], 0, 22528);
196 band->scale_factor = linear_scale_factor(band->log_factor - (10 << 11));
197 }
198
199 static void apply_qmf(const int16_t *prev_samples, int *xout1, int *xout2)
200 {
201 int i;
202
203 *xout1 = 0;
204 *xout2 = 0;
205 for (i = 0; i < 12; i++) {
206 MAC16(*xout2, prev_samples[2*i ], qmf_coeffs[i ]);
207 MAC16(*xout1, prev_samples[2*i+1], qmf_coeffs[11-i]);
208 }
209 }
210
211 static av_cold int g722_init(AVCodecContext * avctx)
212 {
213 G722Context *c = avctx->priv_data;
214
215 if (avctx->channels != 1) {
216 av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
217 return AVERROR_INVALIDDATA;
218 }
219 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
220
221 switch (avctx->bits_per_coded_sample) {
222 case 8:
223 case 7:
224 case 6:
225 break;
226 default:
227 av_log(avctx, AV_LOG_WARNING, "Unsupported bits_per_coded_sample [%d], "
228 "assuming 8\n",
229 avctx->bits_per_coded_sample);
230 case 0:
231 avctx->bits_per_coded_sample = 8;
232 break;
233 }
234
235 c->band[0].scale_factor = 8;
236 c->band[1].scale_factor = 2;
237 c->prev_samples_pos = 22;
238
239 if (avctx->lowres)
240 avctx->sample_rate /= 2;
241
242 if (avctx->trellis) {
243 int frontier = 1 << avctx->trellis;
244 int max_paths = frontier * FREEZE_INTERVAL;
245 int i;
246 for (i = 0; i < 2; i++) {
247 c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
248 c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
249 c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
250 }
251 }
252
253 return 0;
254 }
255
256 static av_cold int g722_close(AVCodecContext *avctx)
257 {
258 G722Context *c = avctx->priv_data;
259 int i;
260 for (i = 0; i < 2; i++) {
261 av_freep(&c->paths[i]);
262 av_freep(&c->node_buf[i]);
263 av_freep(&c->nodep_buf[i]);
264 }
265 return 0;
266 }
267
268 #if CONFIG_ADPCM_G722_DECODER
269 static const int16_t low_inv_quant5[32] = {
270 -35, -35, -2919, -2195, -1765, -1458, -1219, -1023,
271 -858, -714, -587, -473, -370, -276, -190, -110,
272 2919, 2195, 1765, 1458, 1219, 1023, 858, 714,
273 587, 473, 370, 276, 190, 110, 35, -35
274 };
275
276 static const int16_t *low_inv_quants[3] = { low_inv_quant6, low_inv_quant5,
277 low_inv_quant4 };
278
279 static int g722_decode_frame(AVCodecContext *avctx, void *data,
280 int *data_size, AVPacket *avpkt)
281 {
282 G722Context *c = avctx->priv_data;
283 int16_t *out_buf = data;
284 int j, out_len = 0;
285 const int skip = 8 - avctx->bits_per_coded_sample;
286 const int16_t *quantizer_table = low_inv_quants[skip];
287 GetBitContext gb;
288
289 init_get_bits(&gb, avpkt->data, avpkt->size * 8);
290
291 for (j = 0; j < avpkt->size; j++) {
292 int ilow, ihigh, rlow;
293
294 ihigh = get_bits(&gb, 2);
295 ilow = get_bits(&gb, 6 - skip);
296 skip_bits(&gb, skip);
297
298 rlow = av_clip((c->band[0].scale_factor * quantizer_table[ilow] >> 10)
299 + c->band[0].s_predictor, -16384, 16383);
300
301 update_low_predictor(&c->band[0], ilow >> (2 - skip));
302
303 if (!avctx->lowres) {
304 const int dhigh = c->band[1].scale_factor *
305 high_inv_quant[ihigh] >> 10;
306 const int rhigh = av_clip(dhigh + c->band[1].s_predictor,
307 -16384, 16383);
308 int xout1, xout2;
309
310 update_high_predictor(&c->band[1], dhigh, ihigh);
311
312 c->prev_samples[c->prev_samples_pos++] = rlow + rhigh;
313 c->prev_samples[c->prev_samples_pos++] = rlow - rhigh;
314 apply_qmf(c->prev_samples + c->prev_samples_pos - 24,
315 &xout1, &xout2);
316 out_buf[out_len++] = av_clip_int16(xout1 >> 12);
317 out_buf[out_len++] = av_clip_int16(xout2 >> 12);
318 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
319 memmove(c->prev_samples,
320 c->prev_samples + c->prev_samples_pos - 22,
321 22 * sizeof(c->prev_samples[0]));
322 c->prev_samples_pos = 22;
323 }
324 } else
325 out_buf[out_len++] = rlow;
326 }
327 *data_size = out_len << 1;
328 return avpkt->size;
329 }
330
331 AVCodec adpcm_g722_decoder = {
332 .name = "g722",
333 .type = AVMEDIA_TYPE_AUDIO,
334 .id = CODEC_ID_ADPCM_G722,
335 .priv_data_size = sizeof(G722Context),
336 .init = g722_init,
337 .decode = g722_decode_frame,
338 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
339 .max_lowres = 1,
340 };
341 #endif
342
343 #if CONFIG_ADPCM_G722_ENCODER
344 static const int16_t low_quant[33] = {
345 35, 72, 110, 150, 190, 233, 276, 323,
346 370, 422, 473, 530, 587, 650, 714, 786,
347 858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
348 1765, 1980, 2195, 2557, 2919
349 };
350
351 static inline void filter_samples(G722Context *c, const int16_t *samples,
352 int *xlow, int *xhigh)
353 {
354 int xout1, xout2;
355 c->prev_samples[c->prev_samples_pos++] = samples[0];
356 c->prev_samples[c->prev_samples_pos++] = samples[1];
357 apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
358 *xlow = xout1 + xout2 >> 13;
359 *xhigh = xout1 - xout2 >> 13;
360 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
361 memmove(c->prev_samples,
362 c->prev_samples + c->prev_samples_pos - 22,
363 22 * sizeof(c->prev_samples[0]));
364 c->prev_samples_pos = 22;
365 }
366 }
367
368 static inline int encode_high(const struct G722Band *state, int xhigh)
369 {
370 int diff = av_clip_int16(xhigh - state->s_predictor);
371 int pred = 141 * state->scale_factor >> 8;
372 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
373 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
374 }
375
376 static inline int encode_low(const struct G722Band* state, int xlow)
377 {
378 int diff = av_clip_int16(xlow - state->s_predictor);
379 /* = diff >= 0 ? diff : -(diff + 1) */
380 int limit = diff ^ (diff >> (sizeof(diff)*8-1));
381 int i = 0;
382 limit = limit + 1 << 10;
383 if (limit > low_quant[8] * state->scale_factor)
384 i = 9;
385 while (i < 29 && limit > low_quant[i] * state->scale_factor)
386 i++;
387 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
388 }
389
390 static int g722_encode_trellis(AVCodecContext *avctx,
391 uint8_t *dst, int buf_size, void *data)
392 {
393 G722Context *c = avctx->priv_data;
394 const int16_t *samples = data;
395 int i, j, k;
396 int frontier = 1 << avctx->trellis;
397 struct TrellisNode **nodes[2];
398 struct TrellisNode **nodes_next[2];
399 int pathn[2] = {0, 0}, froze = -1;
400 struct TrellisPath *p[2];
401
402 for (i = 0; i < 2; i++) {
403 nodes[i] = c->nodep_buf[i];
404 nodes_next[i] = c->nodep_buf[i] + frontier;
405 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
406 nodes[i][0] = c->node_buf[i] + frontier;
407 nodes[i][0]->ssd = 0;
408 nodes[i][0]->path = 0;
409 nodes[i][0]->state = c->band[i];
410 }
411
412 for (i = 0; i < buf_size >> 1; i++) {
413 int xlow, xhigh;
414 struct TrellisNode *next[2];
415 int heap_pos[2] = {0, 0};
416
417 for (j = 0; j < 2; j++) {
418 next[j] = c->node_buf[j] + frontier*(i & 1);
419 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
420 }
421
422 filter_samples(c, &samples[2*i], &xlow, &xhigh);
423
424 for (j = 0; j < frontier && nodes[0][j]; j++) {
425 /* Only k >> 2 affects the future adaptive state, therefore testing
426 * small steps that don't change k >> 2 is useless, the orignal
427 * value from encode_low is better than them. Since we step k
428 * in steps of 4, make sure range is a multiple of 4, so that
429 * we don't miss the original value from encode_low. */
430 int range = j < frontier/2 ? 4 : 0;
431 struct TrellisNode *cur_node = nodes[0][j];
432
433 int ilow = encode_low(&cur_node->state, xlow);
434
435 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
436 int decoded, dec_diff, pos;
437 uint32_t ssd;
438 struct TrellisNode* node;
439
440 if (k < 0)
441 continue;
442
443 decoded = av_clip((cur_node->state.scale_factor *
444 low_inv_quant6[k] >> 10)
445 + cur_node->state.s_predictor, -16384, 16383);
446 dec_diff = xlow - decoded;
447
448 #define STORE_NODE(index, UPDATE, VALUE)\
449 ssd = cur_node->ssd + dec_diff*dec_diff;\
450 /* Check for wraparound. Using 64 bit ssd counters would \
451 * be simpler, but is slower on x86 32 bit. */\
452 if (ssd < cur_node->ssd)\
453 continue;\
454 if (heap_pos[index] < frontier) {\
455 pos = heap_pos[index]++;\
456 assert(pathn[index] < FREEZE_INTERVAL * frontier);\
457 node = nodes_next[index][pos] = next[index]++;\
458 node->path = pathn[index]++;\
459 } else {\
460 /* Try to replace one of the leaf nodes with the new \
461 * one, but not always testing the same leaf position */\
462 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
463 if (ssd >= nodes_next[index][pos]->ssd)\
464 continue;\
465 heap_pos[index]++;\
466 node = nodes_next[index][pos];\
467 }\
468 node->ssd = ssd;\
469 node->state = cur_node->state;\
470 UPDATE;\
471 c->paths[index][node->path].value = VALUE;\
472 c->paths[index][node->path].prev = cur_node->path;\
473 /* Sift the newly inserted node up in the heap to restore \
474 * the heap property */\
475 while (pos > 0) {\
476 int parent = (pos - 1) >> 1;\
477 if (nodes_next[index][parent]->ssd <= ssd)\
478 break;\
479 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
480 nodes_next[index][pos]);\
481 pos = parent;\
482 }
483 STORE_NODE(0, update_low_predictor(&node->state, k >> 2), k);
484 }
485 }
486
487 for (j = 0; j < frontier && nodes[1][j]; j++) {
488 int ihigh;
489 struct TrellisNode *cur_node = nodes[1][j];
490
491 /* We don't try to get any initial guess for ihigh via
492 * encode_high - since there's only 4 possible values, test
493 * them all. Testing all of these gives a much, much larger
494 * gain than testing a larger range around ilow. */
495 for (ihigh = 0; ihigh < 4; ihigh++) {
496 int dhigh, decoded, dec_diff, pos;
497 uint32_t ssd;
498 struct TrellisNode* node;
499
500 dhigh = cur_node->state.scale_factor *
501 high_inv_quant[ihigh] >> 10;
502 decoded = av_clip(dhigh + cur_node->state.s_predictor,
503 -16384, 16383);
504 dec_diff = xhigh - decoded;
505
506 STORE_NODE(1, update_high_predictor(&node->state, dhigh, ihigh), ihigh);
507 }
508 }
509
510 for (j = 0; j < 2; j++) {
511 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
512
513 if (nodes[j][0]->ssd > (1 << 16)) {
514 for (k = 1; k < frontier && nodes[j][k]; k++)
515 nodes[j][k]->ssd -= nodes[j][0]->ssd;
516 nodes[j][0]->ssd = 0;
517 }
518 }
519
520 if (i == froze + FREEZE_INTERVAL) {
521 p[0] = &c->paths[0][nodes[0][0]->path];
522 p[1] = &c->paths[1][nodes[1][0]->path];
523 for (j = i; j > froze; j--) {
524 dst[j] = p[1]->value << 6 | p[0]->value;
525 p[0] = &c->paths[0][p[0]->prev];
526 p[1] = &c->paths[1][p[1]->prev];
527 }
528 froze = i;
529 pathn[0] = pathn[1] = 0;
530 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
531 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
532 }
533 }
534
535 p[0] = &c->paths[0][nodes[0][0]->path];
536 p[1] = &c->paths[1][nodes[1][0]->path];
537 for (j = i; j > froze; j--) {
538 dst[j] = p[1]->value << 6 | p[0]->value;
539 p[0] = &c->paths[0][p[0]->prev];
540 p[1] = &c->paths[1][p[1]->prev];
541 }
542 c->band[0] = nodes[0][0]->state;
543 c->band[1] = nodes[1][0]->state;
544
545 return i;
546 }
547
548 static int g722_encode_frame(AVCodecContext *avctx,
549 uint8_t *dst, int buf_size, void *data)
550 {
551 G722Context *c = avctx->priv_data;
552 const int16_t *samples = data;
553 int i;
554
555 if (avctx->trellis)
556 return g722_encode_trellis(avctx, dst, buf_size, data);
557
558 for (i = 0; i < buf_size >> 1; i++) {
559 int xlow, xhigh, ihigh, ilow;
560 filter_samples(c, &samples[2*i], &xlow, &xhigh);
561 ihigh = encode_high(&c->band[1], xhigh);
562 ilow = encode_low(&c->band[0], xlow);
563 update_high_predictor(&c->band[1], c->band[1].scale_factor *
564 high_inv_quant[ihigh] >> 10, ihigh);
565 update_low_predictor(&c->band[0], ilow >> 2);
566 *dst++ = ihigh << 6 | ilow;
567 }
568 return i;
569 }
570
571 AVCodec adpcm_g722_encoder = {
572 .name = "g722",
573 .type = AVMEDIA_TYPE_AUDIO,
574 .id = CODEC_ID_ADPCM_G722,
575 .priv_data_size = sizeof(G722Context),
576 .init = g722_init,
577 .close = g722_close,
578 .encode = g722_encode_frame,
579 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
580 .sample_fmts = (enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
581 };
582 #endif
583