cdb8d4da9e213e0f7eeb06e24cdf728fca227c12
3 * Copyright (c) 2002 Fabrice Bellard.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is
25 int fft_init(FFTContext
*s
, int nbits
, int inverse
)
28 float alpha
, c1
, s1
, s2
;
33 s
->exptab
= av_malloc((n
/ 2) * sizeof(FFTComplex
));
36 s
->revtab
= av_malloc(n
* sizeof(uint16_t));
41 s2
= inverse ?
1.0 : -1.0;
43 for(i
=0;i
<(n
/2);i
++) {
44 alpha
= 2 * M_PI
* (float)i
/ (float)n
;
50 s
->fft_calc
= fft_calc_c
;
53 /* compute constant table for HAVE_SSE version */
54 #if defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR)
55 if (mm_support() & MM_SSE
) {
56 int np
, nblocks
, np2
, l
;
62 s
->exptab1
= av_malloc(np
* 2 * sizeof(FFTComplex
));
67 for(l
= 0; l
< np2
; l
+= 2 * nblocks
) {
69 *q
++ = s
->exptab
[l
+ nblocks
];
71 q
->re
= -s
->exptab
[l
].im
;
72 q
->im
= s
->exptab
[l
].re
;
74 q
->re
= -s
->exptab
[l
+ nblocks
].im
;
75 q
->im
= s
->exptab
[l
+ nblocks
].re
;
78 nblocks
= nblocks
>> 1;
79 } while (nblocks
!= 0);
81 s
->fft_calc
= fft_calc_sse
;
85 /* compute bit reverse table */
89 for(j
=0;j
<nbits
;j
++) {
90 m
|= ((i
>> j
) & 1) << (nbits
-j
-1);
98 av_freep(&s
->exptab1
);
103 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
105 FFTSample ax, ay, bx, by;\
116 #define MUL16(a,b) ((a) * (b))
118 #define CMUL(pre, pim, are, aim, bre, bim) \
120 pre = (MUL16(are, bre) - MUL16(aim, bim));\
121 pim = (MUL16(are, bim) + MUL16(bre, aim));\
125 * Do a complex FFT with the parameters defined in fft_init(). The
126 * input data must be permuted before with s->revtab table. No
127 * 1.0/sqrt(n) normalization is done.
129 void fft_calc_c(FFTContext
*s
, FFTComplex
*z
)
134 register FFTComplex
*p
, *q
;
135 FFTComplex
*exptab
= s
->exptab
;
137 FFTSample tmp_re
, tmp_im
;
146 BF(p
[0].re
, p
[0].im
, p
[1].re
, p
[1].im
,
147 p
[0].re
, p
[0].im
, p
[1].re
, p
[1].im
);
158 BF(p
[0].re
, p
[0].im
, p
[2].re
, p
[2].im
,
159 p
[0].re
, p
[0].im
, p
[2].re
, p
[2].im
);
160 BF(p
[1].re
, p
[1].im
, p
[3].re
, p
[3].im
,
161 p
[1].re
, p
[1].im
, -p
[3].im
, p
[3].re
);
166 BF(p
[0].re
, p
[0].im
, p
[2].re
, p
[2].im
,
167 p
[0].re
, p
[0].im
, p
[2].re
, p
[2].im
);
168 BF(p
[1].re
, p
[1].im
, p
[3].re
, p
[3].im
,
169 p
[1].re
, p
[1].im
, p
[3].im
, -p
[3].re
);
181 for (j
= 0; j
< nblocks
; ++j
) {
182 BF(p
->re
, p
->im
, q
->re
, q
->im
,
183 p
->re
, p
->im
, q
->re
, q
->im
);
187 for(l
= nblocks
; l
< np2
; l
+= nblocks
) {
188 CMUL(tmp_re
, tmp_im
, exptab
[l
].re
, exptab
[l
].im
, q
->re
, q
->im
);
189 BF(p
->re
, p
->im
, q
->re
, q
->im
,
190 p
->re
, p
->im
, tmp_re
, tmp_im
);
198 nblocks
= nblocks
>> 1;
199 nloops
= nloops
<< 1;
200 } while (nblocks
!= 0);
204 * Do the permutation needed BEFORE calling fft_calc()
206 void fft_permute(FFTContext
*s
, FFTComplex
*z
)
210 const uint16_t *revtab
= s
->revtab
;
224 void fft_end(FFTContext
*s
)
226 av_freep(&s
->revtab
);
227 av_freep(&s
->exptab
);
228 av_freep(&s
->exptab1
);