Move sine windows to a separate file
[libav.git] / libavcodec / nellymoserenc.c
CommitLineData
53440c62
BW
1/*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
2912e87a 7 * This file is part of Libav.
53440c62 8 *
2912e87a 9 * Libav is free software; you can redistribute it and/or
53440c62
BW
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
2912e87a 14 * Libav is distributed in the hope that it will be useful,
53440c62
BW
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
2912e87a 20 * License along with Libav; if not, write to the Free Software
53440c62
BW
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/**
ba87f080 25 * @file
53440c62
BW
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
b8d62672 31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
53440c62
BW
32 * (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38#include "nellymoser.h"
39#include "avcodec.h"
40#include "dsputil.h"
1429224b 41#include "fft.h"
4538729a 42#include "sinewin.h"
53440c62
BW
43
44#define BITSTREAM_WRITER_LE
b2755007 45#include "put_bits.h"
53440c62
BW
46
47#define POW_TABLE_SIZE (1<<11)
48#define POW_TABLE_OFFSET 3
aa8d024c 49#define OPT_SIZE ((1<<15) + 3000)
53440c62
BW
50
51typedef struct NellyMoserEncodeContext {
52 AVCodecContext *avctx;
53 int last_frame;
755ba886
BW
54 int bufsel;
55 int have_saved;
748384a1 56 DSPContext dsp;
01b22147 57 FFTContext mdct_ctx;
84dc2d8a
MR
58 DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES];
59 DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES];
60 DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer
aa8d024c
BL
61 float (*opt )[NELLY_BANDS];
62 uint8_t (*path)[NELLY_BANDS];
53440c62
BW
63} NellyMoserEncodeContext;
64
65static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0);
66
67static const uint8_t sf_lut[96] = {
68 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
69 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
70 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
71 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
72 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
73 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
74};
75
76static const uint8_t sf_delta_lut[78] = {
77 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
78 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
79 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
80 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
81 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
82};
83
84static const uint8_t quant_lut[230] = {
85 0,
86
87 0, 1, 2,
88
89 0, 1, 2, 3, 4, 5, 6,
90
91 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
92 12, 13, 13, 13, 14,
93
94 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
95 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
96 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
97 30,
98
99 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
100 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
101 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
102 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
103 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
104 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
105 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
106 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
107 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
108 61, 61, 61, 61, 62,
109};
110
111static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
112static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
113static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
114
c73ce8e2 115static void apply_mdct(NellyMoserEncodeContext *s)
755ba886 116{
6eabb0d3 117 s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
4cab1e49 118 s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
755ba886 119 NELLY_BUF_LEN);
26f548bb 120 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
755ba886 121
6eabb0d3
JR
122 s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN,
123 ff_sine_128, NELLY_BUF_LEN);
755ba886
BW
124 s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
125 NELLY_BUF_LEN);
26f548bb 126 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
755ba886
BW
127}
128
53440c62
BW
129static av_cold int encode_init(AVCodecContext *avctx)
130{
131 NellyMoserEncodeContext *s = avctx->priv_data;
132 int i;
133
134 if (avctx->channels != 1) {
135 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
136 return -1;
137 }
138
c67d7255
AC
139 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
140 avctx->sample_rate != 11025 &&
748384a1
BW
141 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
142 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
c67d7255 143 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
748384a1
BW
144 return -1;
145 }
146
53440c62
BW
147 avctx->frame_size = NELLY_SAMPLES;
148 s->avctx = avctx;
7d485f16 149 ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0);
53440c62
BW
150 dsputil_init(&s->dsp, avctx);
151
152 /* Generate overlap window */
153 ff_sine_window_init(ff_sine_128, 128);
154 for (i = 0; i < POW_TABLE_SIZE; i++)
155 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
156
aa8d024c
BL
157 if (s->avctx->trellis) {
158 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
159 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
160 }
161
53440c62
BW
162 return 0;
163}
164
165static av_cold int encode_end(AVCodecContext *avctx)
166{
167 NellyMoserEncodeContext *s = avctx->priv_data;
168
169 ff_mdct_end(&s->mdct_ctx);
aa8d024c
BL
170
171 if (s->avctx->trellis) {
172 av_free(s->opt);
173 av_free(s->path);
174 }
175
53440c62
BW
176 return 0;
177}
178
748384a1
BW
179#define find_best(val, table, LUT, LUT_add, LUT_size) \
180 best_idx = \
181 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
182 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
183 best_idx++;
184
755ba886
BW
185static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
186{
187 int band, best_idx, power_idx = 0;
188 float power_candidate;
189
190 //base exponent
191 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
192 idx_table[0] = best_idx;
193 power_idx = ff_nelly_init_table[best_idx];
194
195 for (band = 1; band < NELLY_BANDS; band++) {
196 power_candidate = cand[band] - power_idx;
197 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
198 idx_table[band] = best_idx;
199 power_idx += ff_nelly_delta_table[best_idx];
200 }
201}
202
755ba886
BW
203static inline float distance(float x, float y, int band)
204{
205 //return pow(fabs(x-y), 2.0);
206 float tmp = x - y;
207 return tmp * tmp;
208}
209
210static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
211{
212 int i, j, band, best_idx;
213 float power_candidate, best_val;
214
aa8d024c
BL
215 float (*opt )[NELLY_BANDS] = s->opt ;
216 uint8_t(*path)[NELLY_BANDS] = s->path;
755ba886
BW
217
218 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
219 opt[0][i] = INFINITY;
220 }
221
222 for (i = 0; i < 64; i++) {
223 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
224 path[0][ff_nelly_init_table[i]] = i;
225 }
226
227 for (band = 1; band < NELLY_BANDS; band++) {
228 int q, c = 0;
229 float tmp;
230 int idx_min, idx_max, idx;
231 power_candidate = cand[band];
232 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
233 idx_min = FFMAX(0, cand[band] - q);
234 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
235 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
236 if ( isinf(opt[band - 1][i]) )
237 continue;
238 for (j = 0; j < 32; j++) {
239 idx = i + ff_nelly_delta_table[j];
240 if (idx > idx_max)
241 break;
242 if (idx >= idx_min) {
243 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
244 if (opt[band][idx] > tmp) {
245 opt[band][idx] = tmp;
246 path[band][idx] = j;
247 c = 1;
248 }
249 }
250 }
251 }
252 }
253 assert(c); //FIXME
254 }
255
256 best_val = INFINITY;
257 best_idx = -1;
258 band = NELLY_BANDS - 1;
259 for (i = 0; i < OPT_SIZE; i++) {
260 if (best_val > opt[band][i]) {
261 best_val = opt[band][i];
262 best_idx = i;
263 }
264 }
265 for (band = NELLY_BANDS - 1; band >= 0; band--) {
266 idx_table[band] = path[band][best_idx];
267 if (band) {
268 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
269 }
270 }
271}
272
273/**
49bd8e4b 274 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
755ba886
BW
275 * @param s encoder context
276 * @param output output buffer
277 * @param output_size size of output buffer
278 */
279static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
280{
281 PutBitContext pb;
282 int i, j, band, block, best_idx, power_idx = 0;
283 float power_val, coeff, coeff_sum;
284 float pows[NELLY_FILL_LEN];
285 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
286 float cand[NELLY_BANDS];
287
288 apply_mdct(s);
289
290 init_put_bits(&pb, output, output_size * 8);
291
292 i = 0;
293 for (band = 0; band < NELLY_BANDS; band++) {
294 coeff_sum = 0;
295 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
296 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
297 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
298 }
299 cand[band] =
300 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
301 }
302
303 if (s->avctx->trellis) {
304 get_exponent_dynamic(s, cand, idx_table);
305 } else {
306 get_exponent_greedy(s, cand, idx_table);
307 }
308
309 i = 0;
310 for (band = 0; band < NELLY_BANDS; band++) {
311 if (band) {
312 power_idx += ff_nelly_delta_table[idx_table[band]];
313 put_bits(&pb, 5, idx_table[band]);
314 } else {
315 power_idx = ff_nelly_init_table[idx_table[0]];
316 put_bits(&pb, 6, idx_table[0]);
317 }
318 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
319 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
320 s->mdct_out[i] *= power_val;
321 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
322 pows[i] = power_idx;
323 }
324 }
325
326 ff_nelly_get_sample_bits(pows, bits);
327
328 for (block = 0; block < 2; block++) {
329 for (i = 0; i < NELLY_FILL_LEN; i++) {
330 if (bits[i] > 0) {
331 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
332 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
333 best_idx =
334 quant_lut[av_clip (
335 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
336 quant_lut_offset[bits[i]],
337 quant_lut_offset[bits[i]+1] - 1
338 )];
339 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
340 best_idx++;
341
342 put_bits(&pb, bits[i], best_idx);
343 }
344 }
345 if (!block)
346 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
347 }
f2e57867
RP
348
349 flush_put_bits(&pb);
755ba886
BW
350}
351
352static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
353{
354 NellyMoserEncodeContext *s = avctx->priv_data;
edac49da 355 const int16_t *samples = data;
755ba886
BW
356 int i;
357
358 if (s->last_frame)
359 return 0;
360
361 if (data) {
362 for (i = 0; i < avctx->frame_size; i++) {
363 s->buf[s->bufsel][i] = samples[i];
364 }
365 for (; i < NELLY_SAMPLES; i++) {
366 s->buf[s->bufsel][i] = 0;
367 }
368 s->bufsel = 1 - s->bufsel;
369 if (!s->have_saved) {
370 s->have_saved = 1;
371 return 0;
372 }
373 } else {
374 memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
375 s->bufsel = 1 - s->bufsel;
376 s->last_frame = 1;
377 }
378
379 if (s->have_saved) {
380 encode_block(s, frame, buf_size);
381 return NELLY_BLOCK_LEN;
382 }
383 return 0;
384}
385
d36beb3f 386AVCodec ff_nellymoser_encoder = {
53440c62 387 .name = "nellymoser",
72415b2a 388 .type = AVMEDIA_TYPE_AUDIO,
53440c62
BW
389 .id = CODEC_ID_NELLYMOSER,
390 .priv_data_size = sizeof(NellyMoserEncodeContext),
391 .init = encode_init,
392 .encode = encode_frame,
393 .close = encode_end,
394 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
0ffbc258 395 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
5d6e4c16 396 .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
53440c62 397};