95ab22fa3ee66bfead5e75aec27d59b2f810572f
[libav.git] / libavcodec / nellymoserenc.c
1 /*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 /**
25 * @file libavcodec/nellymoserenc.c
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38 #include "nellymoser.h"
39 #include "avcodec.h"
40 #include "dsputil.h"
41 #include "fft.h"
42
43 #define BITSTREAM_WRITER_LE
44 #include "put_bits.h"
45
46 #define POW_TABLE_SIZE (1<<11)
47 #define POW_TABLE_OFFSET 3
48 #define OPT_SIZE ((1<<15) + 3000)
49
50 typedef struct NellyMoserEncodeContext {
51 AVCodecContext *avctx;
52 int last_frame;
53 int bufsel;
54 int have_saved;
55 DSPContext dsp;
56 FFTContext mdct_ctx;
57 DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES];
58 DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES];
59 DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer
60 float (*opt )[NELLY_BANDS];
61 uint8_t (*path)[NELLY_BANDS];
62 } NellyMoserEncodeContext;
63
64 static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0);
65
66 static const uint8_t sf_lut[96] = {
67 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
68 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
69 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
70 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
71 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
72 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
73 };
74
75 static const uint8_t sf_delta_lut[78] = {
76 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
77 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
78 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
79 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
80 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
81 };
82
83 static const uint8_t quant_lut[230] = {
84 0,
85
86 0, 1, 2,
87
88 0, 1, 2, 3, 4, 5, 6,
89
90 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
91 12, 13, 13, 13, 14,
92
93 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
94 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
95 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
96 30,
97
98 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
99 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
100 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
101 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
102 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
103 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
104 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
105 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
106 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
107 61, 61, 61, 61, 62,
108 };
109
110 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
111 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
112 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
113
114 static void apply_mdct(NellyMoserEncodeContext *s)
115 {
116 memcpy(s->in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float));
117 s->dsp.vector_fmul(s->in_buff, ff_sine_128, NELLY_BUF_LEN);
118 s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
119 NELLY_BUF_LEN);
120 ff_mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
121
122 s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN);
123 s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
124 NELLY_BUF_LEN);
125 ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
126 }
127
128 static av_cold int encode_init(AVCodecContext *avctx)
129 {
130 NellyMoserEncodeContext *s = avctx->priv_data;
131 int i;
132
133 if (avctx->channels != 1) {
134 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
135 return -1;
136 }
137
138 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
139 avctx->sample_rate != 11025 &&
140 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
141 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
142 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
143 return -1;
144 }
145
146 avctx->frame_size = NELLY_SAMPLES;
147 s->avctx = avctx;
148 ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0);
149 dsputil_init(&s->dsp, avctx);
150
151 /* Generate overlap window */
152 ff_sine_window_init(ff_sine_128, 128);
153 for (i = 0; i < POW_TABLE_SIZE; i++)
154 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
155
156 if (s->avctx->trellis) {
157 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
158 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
159 }
160
161 return 0;
162 }
163
164 static av_cold int encode_end(AVCodecContext *avctx)
165 {
166 NellyMoserEncodeContext *s = avctx->priv_data;
167
168 ff_mdct_end(&s->mdct_ctx);
169
170 if (s->avctx->trellis) {
171 av_free(s->opt);
172 av_free(s->path);
173 }
174
175 return 0;
176 }
177
178 #define find_best(val, table, LUT, LUT_add, LUT_size) \
179 best_idx = \
180 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
181 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
182 best_idx++;
183
184 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
185 {
186 int band, best_idx, power_idx = 0;
187 float power_candidate;
188
189 //base exponent
190 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
191 idx_table[0] = best_idx;
192 power_idx = ff_nelly_init_table[best_idx];
193
194 for (band = 1; band < NELLY_BANDS; band++) {
195 power_candidate = cand[band] - power_idx;
196 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
197 idx_table[band] = best_idx;
198 power_idx += ff_nelly_delta_table[best_idx];
199 }
200 }
201
202 static inline float distance(float x, float y, int band)
203 {
204 //return pow(fabs(x-y), 2.0);
205 float tmp = x - y;
206 return tmp * tmp;
207 }
208
209 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
210 {
211 int i, j, band, best_idx;
212 float power_candidate, best_val;
213
214 float (*opt )[NELLY_BANDS] = s->opt ;
215 uint8_t(*path)[NELLY_BANDS] = s->path;
216
217 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
218 opt[0][i] = INFINITY;
219 }
220
221 for (i = 0; i < 64; i++) {
222 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
223 path[0][ff_nelly_init_table[i]] = i;
224 }
225
226 for (band = 1; band < NELLY_BANDS; band++) {
227 int q, c = 0;
228 float tmp;
229 int idx_min, idx_max, idx;
230 power_candidate = cand[band];
231 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
232 idx_min = FFMAX(0, cand[band] - q);
233 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
234 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
235 if ( isinf(opt[band - 1][i]) )
236 continue;
237 for (j = 0; j < 32; j++) {
238 idx = i + ff_nelly_delta_table[j];
239 if (idx > idx_max)
240 break;
241 if (idx >= idx_min) {
242 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
243 if (opt[band][idx] > tmp) {
244 opt[band][idx] = tmp;
245 path[band][idx] = j;
246 c = 1;
247 }
248 }
249 }
250 }
251 }
252 assert(c); //FIXME
253 }
254
255 best_val = INFINITY;
256 best_idx = -1;
257 band = NELLY_BANDS - 1;
258 for (i = 0; i < OPT_SIZE; i++) {
259 if (best_val > opt[band][i]) {
260 best_val = opt[band][i];
261 best_idx = i;
262 }
263 }
264 for (band = NELLY_BANDS - 1; band >= 0; band--) {
265 idx_table[band] = path[band][best_idx];
266 if (band) {
267 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
268 }
269 }
270 }
271
272 /**
273 * Encodes NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
274 * @param s encoder context
275 * @param output output buffer
276 * @param output_size size of output buffer
277 */
278 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
279 {
280 PutBitContext pb;
281 int i, j, band, block, best_idx, power_idx = 0;
282 float power_val, coeff, coeff_sum;
283 float pows[NELLY_FILL_LEN];
284 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
285 float cand[NELLY_BANDS];
286
287 apply_mdct(s);
288
289 init_put_bits(&pb, output, output_size * 8);
290
291 i = 0;
292 for (band = 0; band < NELLY_BANDS; band++) {
293 coeff_sum = 0;
294 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
295 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
296 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
297 }
298 cand[band] =
299 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
300 }
301
302 if (s->avctx->trellis) {
303 get_exponent_dynamic(s, cand, idx_table);
304 } else {
305 get_exponent_greedy(s, cand, idx_table);
306 }
307
308 i = 0;
309 for (band = 0; band < NELLY_BANDS; band++) {
310 if (band) {
311 power_idx += ff_nelly_delta_table[idx_table[band]];
312 put_bits(&pb, 5, idx_table[band]);
313 } else {
314 power_idx = ff_nelly_init_table[idx_table[0]];
315 put_bits(&pb, 6, idx_table[0]);
316 }
317 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
318 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
319 s->mdct_out[i] *= power_val;
320 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
321 pows[i] = power_idx;
322 }
323 }
324
325 ff_nelly_get_sample_bits(pows, bits);
326
327 for (block = 0; block < 2; block++) {
328 for (i = 0; i < NELLY_FILL_LEN; i++) {
329 if (bits[i] > 0) {
330 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
331 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
332 best_idx =
333 quant_lut[av_clip (
334 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
335 quant_lut_offset[bits[i]],
336 quant_lut_offset[bits[i]+1] - 1
337 )];
338 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
339 best_idx++;
340
341 put_bits(&pb, bits[i], best_idx);
342 }
343 }
344 if (!block)
345 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
346 }
347
348 flush_put_bits(&pb);
349 }
350
351 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
352 {
353 NellyMoserEncodeContext *s = avctx->priv_data;
354 int16_t *samples = data;
355 int i;
356
357 if (s->last_frame)
358 return 0;
359
360 if (data) {
361 for (i = 0; i < avctx->frame_size; i++) {
362 s->buf[s->bufsel][i] = samples[i];
363 }
364 for (; i < NELLY_SAMPLES; i++) {
365 s->buf[s->bufsel][i] = 0;
366 }
367 s->bufsel = 1 - s->bufsel;
368 if (!s->have_saved) {
369 s->have_saved = 1;
370 return 0;
371 }
372 } else {
373 memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
374 s->bufsel = 1 - s->bufsel;
375 s->last_frame = 1;
376 }
377
378 if (s->have_saved) {
379 encode_block(s, frame, buf_size);
380 return NELLY_BLOCK_LEN;
381 }
382 return 0;
383 }
384
385 AVCodec nellymoser_encoder = {
386 .name = "nellymoser",
387 .type = CODEC_TYPE_AUDIO,
388 .id = CODEC_ID_NELLYMOSER,
389 .priv_data_size = sizeof(NellyMoserEncodeContext),
390 .init = encode_init,
391 .encode = encode_frame,
392 .close = encode_end,
393 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
394 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
395 };