Commit | Line | Data |
---|---|---|
ff4ec49e FB |
1 | /* |
2 | * DSP utils | |
406792e7 | 3 | * Copyright (c) 2000, 2001, 2002 Fabrice Bellard |
8f2ab833 | 4 | * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
ff4ec49e | 5 | * |
b78e7197 DB |
6 | * This file is part of FFmpeg. |
7 | * | |
8 | * FFmpeg is free software; you can redistribute it and/or | |
ff4ec49e FB |
9 | * modify it under the terms of the GNU Lesser General Public |
10 | * License as published by the Free Software Foundation; either | |
b78e7197 | 11 | * version 2.1 of the License, or (at your option) any later version. |
ff4ec49e | 12 | * |
b78e7197 | 13 | * FFmpeg is distributed in the hope that it will be useful, |
ff4ec49e FB |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
b78e7197 | 19 | * License along with FFmpeg; if not, write to the Free Software |
5509bffa | 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
ff4ec49e | 21 | */ |
24641185 MN |
22 | |
23 | /** | |
bad5537e | 24 | * @file libavcodec/dsputil.h |
983e3246 | 25 | * DSP utils. |
5755c27f MN |
26 | * note, many functions in here may use MMX which trashes the FPU state, it is |
27 | * absolutely necessary to call emms_c() between dsp & float/double code | |
24641185 MN |
28 | */ |
29 | ||
98790382 SS |
30 | #ifndef AVCODEC_DSPUTIL_H |
31 | #define AVCODEC_DSPUTIL_H | |
de6d9b64 | 32 | |
6a5d31ac | 33 | #include "libavutil/intreadwrite.h" |
43f1708f | 34 | #include "avcodec.h" |
de6d9b64 | 35 | |
24641185 | 36 | |
44eb4951 | 37 | //#define DEBUG |
de6d9b64 FB |
38 | /* dct code */ |
39 | typedef short DCTELEM; | |
059715a4 | 40 | typedef int DWTELEM; |
d593e329 | 41 | typedef short IDWTELEM; |
de6d9b64 | 42 | |
03c94ede | 43 | void fdct_ifast (DCTELEM *data); |
48b1f800 | 44 | void fdct_ifast248 (DCTELEM *data); |
28db7fce | 45 | void ff_jpeg_fdct_islow (DCTELEM *data); |
10acc479 | 46 | void ff_fdct248_islow (DCTELEM *data); |
de6d9b64 FB |
47 | |
48 | void j_rev_dct (DCTELEM *data); | |
178fcca8 | 49 | void j_rev_dct4 (DCTELEM *data); |
9ca358b9 | 50 | void j_rev_dct2 (DCTELEM *data); |
1aa8c57b | 51 | void j_rev_dct1 (DCTELEM *data); |
9abc7e0f | 52 | void ff_wmv2_idct_c(DCTELEM *data); |
de6d9b64 | 53 | |
3f09f52a | 54 | void ff_fdct_mmx(DCTELEM *block); |
cf3bf5bb | 55 | void ff_fdct_mmx2(DCTELEM *block); |
8fd19ab2 | 56 | void ff_fdct_sse2(DCTELEM *block); |
de6d9b64 | 57 | |
43efd19a | 58 | void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride); |
0fa8158d | 59 | void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); |
ef9d1d15 LM |
60 | void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); |
61 | void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); | |
0fa8158d MN |
62 | void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); |
63 | void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); | |
ac223859 MN |
64 | void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
65 | void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | |
66 | void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | |
67 | void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | |
0fa8158d | 68 | |
eb4825b5 LM |
69 | void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, |
70 | const float *src2, int src3, int blocksize, int step); | |
f27e1d64 LM |
71 | void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, |
72 | const float *win, float add_bias, int len); | |
560fa9bf | 73 | void ff_float_to_int16_c(int16_t *dst, const float *src, long len); |
8a37920c | 74 | void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels); |
eb4825b5 | 75 | |
e0eac44e | 76 | /* encoding scans */ |
0c1a9eda ZK |
77 | extern const uint8_t ff_alternate_horizontal_scan[64]; |
78 | extern const uint8_t ff_alternate_vertical_scan[64]; | |
79 | extern const uint8_t ff_zigzag_direct[64]; | |
10acc479 | 80 | extern const uint8_t ff_zigzag248_direct[64]; |
5a240838 | 81 | |
de6d9b64 | 82 | /* pixel operations */ |
f2e92ef2 | 83 | #define MAX_NEG_CROP 1024 |
de6d9b64 FB |
84 | |
85 | /* temporary */ | |
1d503957 | 86 | extern uint32_t ff_squareTbl[512]; |
55fde95e | 87 | extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP]; |
de6d9b64 | 88 | |
44cb64ee | 89 | /* VP3 DSP functions */ |
8b6103da MN |
90 | void ff_vp3_idct_c(DCTELEM *block/* align 16*/); |
91 | void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |
92 | void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |
de6d9b64 | 93 | |
9971331d DC |
94 | void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values); |
95 | void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values); | |
96 | ||
26f3ae82 AJ |
97 | /* VP6 DSP functions */ |
98 | void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride, | |
99 | const int16_t *h_weights, const int16_t *v_weights); | |
100 | ||
54009d42 MN |
101 | /* 1/2^n downscaling functions from imgconvert.c */ |
102 | void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | |
103 | void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | |
104 | void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | |
105 | void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | |
703c8195 LM |
106 | |
107 | void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | |
108 | int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | |
54009d42 | 109 | |
b7c27ee6 | 110 | /* minimum alignment rules ;) |
7ce68923 DB |
111 | If you notice errors in the align stuff, need more alignment for some ASM code |
112 | for some CPU or need to use a function with less aligned data then send a mail | |
113 | to the ffmpeg-devel mailing list, ... | |
114 | ||
115 | !warning These alignments might not match reality, (missing attribute((align)) | |
116 | stuff somewhere possible). | |
2cab6401 | 117 | I (Michael) did not check them, these are just the alignments which I think |
7ce68923 | 118 | could be reached easily ... |
de6d9b64 | 119 | |
b7c27ee6 MN |
120 | !future video codecs might need functions with less strict alignment |
121 | */ | |
122 | ||
eb4b3dd3 | 123 | /* |
0c1a9eda ZK |
124 | void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size); |
125 | void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); | |
126 | void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); | |
127 | void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); | |
649c00c9 | 128 | void clear_blocks_c(DCTELEM *blocks); |
eb4b3dd3 | 129 | */ |
de6d9b64 FB |
130 | |
131 | /* add and put pixel (decoding) */ | |
b7c27ee6 | 132 | // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 |
7d67aa9b | 133 | //h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4 |
0c1a9eda | 134 | typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); |
669ac79c | 135 | typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); |
0c1a9eda | 136 | typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); |
0da71265 | 137 | typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); |
9f2d1b4f | 138 | typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); |
e8b56208 | 139 | typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset); |
b3184779 | 140 | |
db794953 | 141 | #define DEF_OLD_QPEL(name)\ |
0c1a9eda ZK |
142 | void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ |
143 | void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ | |
144 | void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); | |
db794953 MN |
145 | |
146 | DEF_OLD_QPEL(qpel16_mc11_old_c) | |
147 | DEF_OLD_QPEL(qpel16_mc31_old_c) | |
148 | DEF_OLD_QPEL(qpel16_mc12_old_c) | |
149 | DEF_OLD_QPEL(qpel16_mc32_old_c) | |
150 | DEF_OLD_QPEL(qpel16_mc13_old_c) | |
151 | DEF_OLD_QPEL(qpel16_mc33_old_c) | |
152 | DEF_OLD_QPEL(qpel8_mc11_old_c) | |
153 | DEF_OLD_QPEL(qpel8_mc31_old_c) | |
154 | DEF_OLD_QPEL(qpel8_mc12_old_c) | |
155 | DEF_OLD_QPEL(qpel8_mc32_old_c) | |
156 | DEF_OLD_QPEL(qpel8_mc13_old_c) | |
157 | DEF_OLD_QPEL(qpel8_mc33_old_c) | |
b3184779 MN |
158 | |
159 | #define CALL_2X_PIXELS(a, b, n)\ | |
160 | static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
161 | b(block , pixels , line_size, h);\ | |
162 | b(block+n, pixels+n, line_size, h);\ | |
163 | } | |
44eb4951 | 164 | |
de6d9b64 | 165 | /* motion estimation */ |
7d67aa9b | 166 | // h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2 |
755bfeab | 167 | // although currently h<4 is not used as functions with width <8 are neither used nor implemented |
bb198e19 | 168 | typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; |
1457ab52 | 169 | |
0da71265 | 170 | |
059715a4 RE |
171 | // for snow slices |
172 | typedef struct slice_buffer_s slice_buffer; | |
173 | ||
24641185 | 174 | /** |
4c79b95c AJ |
175 | * Scantable. |
176 | */ | |
177 | typedef struct ScanTable{ | |
178 | const uint8_t *scantable; | |
179 | uint8_t permutated[64]; | |
180 | uint8_t raster_end[64]; | |
b250f9c6 | 181 | #if ARCH_PPC |
4c79b95c | 182 | /** Used by dct_quantize_altivec to find last-non-zero */ |
49c35f16 | 183 | DECLARE_ALIGNED(16, uint8_t, inverse[64]); |
4c79b95c AJ |
184 | #endif |
185 | } ScanTable; | |
186 | ||
187 | void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable); | |
188 | ||
288a44fb AJ |
189 | void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, |
190 | int block_w, int block_h, | |
191 | int src_x, int src_y, int w, int h); | |
192 | ||
4c79b95c | 193 | /** |
24641185 MN |
194 | * DSPContext. |
195 | */ | |
eb4b3dd3 ZK |
196 | typedef struct DSPContext { |
197 | /* pixel ops : interface with DCT */ | |
0c1a9eda ZK |
198 | void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); |
199 | void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); | |
200 | void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | |
f9ed9d85 | 201 | void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); |
0c1a9eda | 202 | void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); |
36940eca LM |
203 | void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size); |
204 | void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size); | |
1edbfe19 | 205 | int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/); |
d518aebd MN |
206 | /** |
207 | * translational global motion compensation. | |
208 | */ | |
0c1a9eda | 209 | void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); |
d518aebd MN |
210 | /** |
211 | * global motion compensation. | |
212 | */ | |
0c1a9eda | 213 | void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, |
bb270c08 | 214 | int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); |
5fecfb7d | 215 | void (*clear_block)(DCTELEM *block/*align 16*/); |
eb4b3dd3 | 216 | void (*clear_blocks)(DCTELEM *blocks/*align 16*/); |
0c1a9eda ZK |
217 | int (*pix_sum)(uint8_t * pix, int line_size); |
218 | int (*pix_norm1)(uint8_t * pix, int line_size); | |
bb198e19 | 219 | // 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 |
115329f1 | 220 | |
3899eb2f RS |
221 | me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ |
222 | me_cmp_func sse[6]; | |
223 | me_cmp_func hadamard8_diff[6]; | |
224 | me_cmp_func dct_sad[6]; | |
225 | me_cmp_func quant_psnr[6]; | |
226 | me_cmp_func bit[6]; | |
227 | me_cmp_func rd[6]; | |
228 | me_cmp_func vsad[6]; | |
229 | me_cmp_func vsse[6]; | |
230 | me_cmp_func nsse[6]; | |
231 | me_cmp_func w53[6]; | |
232 | me_cmp_func w97[6]; | |
233 | me_cmp_func dct_max[6]; | |
234 | me_cmp_func dct264_sad[6]; | |
235 | ||
236 | me_cmp_func me_pre_cmp[6]; | |
237 | me_cmp_func me_cmp[6]; | |
238 | me_cmp_func me_sub_cmp[6]; | |
239 | me_cmp_func mb_cmp[6]; | |
240 | me_cmp_func ildct_cmp[6]; //only width 16 used | |
241 | me_cmp_func frame_skip_cmp[6]; //only width 8 used | |
eb4b3dd3 | 242 | |
a00177a9 MR |
243 | int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, |
244 | int size); | |
59006372 | 245 | |
d518aebd MN |
246 | /** |
247 | * Halfpel motion compensation with rounding (a+b+1)>>1. | |
30f15053 | 248 | * this is an array[4][4] of motion compensation functions for 4 |
e5771f4f | 249 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> |
5755c27f | 250 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
d518aebd MN |
251 | * @param block destination where the result is stored |
252 | * @param pixels source | |
253 | * @param line_size number of bytes in a horizontal line of block | |
254 | * @param h height | |
255 | */ | |
669ac79c | 256 | op_pixels_func put_pixels_tab[4][4]; |
d518aebd MN |
257 | |
258 | /** | |
259 | * Halfpel motion compensation with rounding (a+b+1)>>1. | |
115329f1 | 260 | * This is an array[4][4] of motion compensation functions for 4 |
e5771f4f | 261 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> |
5755c27f | 262 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
d518aebd MN |
263 | * @param block destination into which the result is averaged (a+b+1)>>1 |
264 | * @param pixels source | |
265 | * @param line_size number of bytes in a horizontal line of block | |
266 | * @param h height | |
267 | */ | |
da3b9756 | 268 | op_pixels_func avg_pixels_tab[4][4]; |
d518aebd MN |
269 | |
270 | /** | |
271 | * Halfpel motion compensation with no rounding (a+b)>>1. | |
30f15053 | 272 | * this is an array[2][4] of motion compensation functions for 2 |
eb14c713 | 273 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> |
5755c27f | 274 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
d518aebd MN |
275 | * @param block destination where the result is stored |
276 | * @param pixels source | |
277 | * @param line_size number of bytes in a horizontal line of block | |
278 | * @param h height | |
279 | */ | |
dbc56b39 | 280 | op_pixels_func put_no_rnd_pixels_tab[4][4]; |
d518aebd MN |
281 | |
282 | /** | |
283 | * Halfpel motion compensation with no rounding (a+b)>>1. | |
30f15053 | 284 | * this is an array[2][4] of motion compensation functions for 2 |
eb14c713 | 285 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> |
5755c27f | 286 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
d518aebd MN |
287 | * @param block destination into which the result is averaged (a+b)>>1 |
288 | * @param pixels source | |
289 | * @param line_size number of bytes in a horizontal line of block | |
290 | * @param h height | |
291 | */ | |
dbc56b39 | 292 | op_pixels_func avg_no_rnd_pixels_tab[4][4]; |
115329f1 | 293 | |
c0a0170c | 294 | void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h); |
115329f1 | 295 | |
669ac79c MN |
296 | /** |
297 | * Thirdpel motion compensation with rounding (a+b+1)>>1. | |
30f15053 LB |
298 | * this is an array[12] of motion compensation functions for the 9 thirdpe |
299 | * positions<br> | |
669ac79c MN |
300 | * *pixels_tab[ xthirdpel + 4*ythirdpel ] |
301 | * @param block destination where the result is stored | |
302 | * @param pixels source | |
303 | * @param line_size number of bytes in a horizontal line of block | |
304 | * @param h height | |
305 | */ | |
306 | tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width? | |
da3b9756 MM |
307 | tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width? |
308 | ||
eb4b3dd3 ZK |
309 | qpel_mc_func put_qpel_pixels_tab[2][16]; |
310 | qpel_mc_func avg_qpel_pixels_tab[2][16]; | |
311 | qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | |
312 | qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; | |
1457ab52 | 313 | qpel_mc_func put_mspel_pixels_tab[8]; |
115329f1 | 314 | |
0da71265 | 315 | /** |
30f15053 | 316 | * h264 Chroma MC |
0da71265 MN |
317 | */ |
318 | h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; | |
319 | h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; | |
c374691b DC |
320 | /* This is really one func used in VC-1 decoding */ |
321 | h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3]; | |
8013da73 | 322 | h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3]; |
eb4b3dd3 | 323 | |
80e44bc3 MN |
324 | qpel_mc_func put_h264_qpel_pixels_tab[4][16]; |
325 | qpel_mc_func avg_h264_qpel_pixels_tab[4][16]; | |
115329f1 | 326 | |
2833fc46 LM |
327 | qpel_mc_func put_2tap_qpel_pixels_tab[4][16]; |
328 | qpel_mc_func avg_2tap_qpel_pixels_tab[4][16]; | |
329 | ||
9f2d1b4f LM |
330 | h264_weight_func weight_h264_pixels_tab[10]; |
331 | h264_biweight_func biweight_h264_pixels_tab[10]; | |
115329f1 | 332 | |
b482e2d1 MN |
333 | /* AVS specific */ |
334 | qpel_mc_func put_cavs_qpel_pixels_tab[2][16]; | |
335 | qpel_mc_func avg_cavs_qpel_pixels_tab[2][16]; | |
336 | void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); | |
337 | void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); | |
338 | void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); | |
339 | void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); | |
340 | void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); | |
341 | ||
bb198e19 | 342 | me_cmp_func pix_abs[2][4]; |
115329f1 | 343 | |
11f18faf | 344 | /* huffyuv specific */ |
11f18faf | 345 | void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); |
4a9ca0a2 | 346 | void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w); |
1457ab52 | 347 | void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); |
84705403 MN |
348 | /** |
349 | * subtract huffyuv's variant of median prediction | |
350 | * note, this might read from src1[-1], src2[-1] | |
351 | */ | |
352 | void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top); | |
3daa434a | 353 | void (*add_hfyu_median_prediction)(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top); |
4a9ca0a2 LM |
354 | /* this might write to dst[w] */ |
355 | void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); | |
96711ecf | 356 | void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); |
42251a2a | 357 | |
2272e04a LM |
358 | void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); |
359 | void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0); | |
360 | /* v/h_loop_filter_luma_intra: align 16 */ | |
712ca84c JGG |
361 | void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); |
362 | void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); | |
2272e04a LM |
363 | void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); |
364 | void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0); | |
365 | void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); | |
366 | void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); | |
3e20143e LM |
367 | // h264_loop_filter_strength: simd only. the C version is inlined in h264.c |
368 | void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], | |
4f20b45f | 369 | int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); |
115329f1 | 370 | |
332f9ac4 MN |
371 | void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); |
372 | void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); | |
373 | ||
fdbbf2e0 | 374 | void (*h261_loop_filter)(uint8_t *src, int stride); |
c6148de2 | 375 | |
9abc7e0f MN |
376 | void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale); |
377 | void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale); | |
378 | ||
9971331d DC |
379 | void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values); |
380 | void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values); | |
381 | ||
26f3ae82 AJ |
382 | void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride, |
383 | const int16_t *h_weights,const int16_t *v_weights); | |
384 | ||
eb4825b5 | 385 | /* assume len is a multiple of 4, and arrays are 16-byte aligned */ |
2dac4acf | 386 | void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); |
ac2e5564 | 387 | void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); |
6810b93a LM |
388 | /* no alignment needed */ |
389 | void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc); | |
eb4825b5 | 390 | /* assume len is a multiple of 8, and arrays are 16-byte aligned */ |
06972056 | 391 | void (*vector_fmul)(float *dst, const float *src, int len); |
eb4825b5 LM |
392 | void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); |
393 | /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ | |
394 | void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step); | |
f27e1d64 LM |
395 | /* assume len is a multiple of 4, and arrays are 16-byte aligned */ |
396 | void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len); | |
911e21a3 LM |
397 | /* assume len is a multiple of 8, and arrays are 16-byte aligned */ |
398 | void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); | |
50e23ae9 | 399 | void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); |
eb4825b5 LM |
400 | |
401 | /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767] | |
98145875 | 402 | * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */ |
560fa9bf | 403 | void (*float_to_int16)(int16_t *dst, const float *src, long len); |
5eb0f2a4 | 404 | void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels); |
2dac4acf | 405 | |
b0368839 MN |
406 | /* (I)DCT */ |
407 | void (*fdct)(DCTELEM *block/* align 16*/); | |
10acc479 | 408 | void (*fdct248)(DCTELEM *block/* align 16*/); |
115329f1 | 409 | |
4fb518c3 MN |
410 | /* IDCT really*/ |
411 | void (*idct)(DCTELEM *block/* align 16*/); | |
115329f1 | 412 | |
24641185 | 413 | /** |
77c92c2d | 414 | * block -> idct -> clip to unsigned 8 bit -> dest. |
24641185 | 415 | * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) |
9846cbdb | 416 | * @param line_size size in bytes of a horizontal line of dest |
24641185 | 417 | */ |
b0368839 | 418 | void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); |
115329f1 | 419 | |
24641185 MN |
420 | /** |
421 | * block -> idct -> add dest -> clip to unsigned 8 bit -> dest. | |
9846cbdb | 422 | * @param line_size size in bytes of a horizontal line of dest |
24641185 | 423 | */ |
b0368839 | 424 | void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); |
115329f1 | 425 | |
24641185 | 426 | /** |
77c92c2d | 427 | * idct input permutation. |
05493021 MN |
428 | * several optimized IDCTs need a permutated input (relative to the normal order of the reference |
429 | * IDCT) | |
430 | * this permutation must be performed before the idct_put/add, note, normally this can be merged | |
431 | * with the zigzag/alternate scan<br> | |
24641185 MN |
432 | * an example to avoid confusion: |
433 | * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...) | |
434 | * - (x -> referece dct -> reference idct -> x) | |
435 | * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x) | |
436 | * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...) | |
437 | */ | |
b0368839 MN |
438 | uint8_t idct_permutation[64]; |
439 | int idct_permutation_type; | |
440 | #define FF_NO_IDCT_PERM 1 | |
441 | #define FF_LIBMPEG2_IDCT_PERM 2 | |
442 | #define FF_SIMPLE_IDCT_PERM 3 | |
443 | #define FF_TRANSPOSE_IDCT_PERM 4 | |
5773a746 | 444 | #define FF_PARTTRANS_IDCT_PERM 5 |
0e956ba2 | 445 | #define FF_SSE2_IDCT_PERM 6 |
b0368839 | 446 | |
364a1797 MN |
447 | int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); |
448 | void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); | |
449 | #define BASIS_SHIFT 16 | |
450 | #define RECON_SHIFT 6 | |
115329f1 | 451 | |
5a6a9e78 | 452 | void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w); |
cbcd6c8c | 453 | #define EDGE_WIDTH 16 |
5a6a9e78 | 454 | |
ea9f5d6f | 455 | /* h264 functions */ |
6a1846eb MN |
456 | /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them |
457 | NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them | |
458 | The reason for above, is that no 2 out of one list may use a different permutation. | |
459 | */ | |
7fadc32d LM |
460 | void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); |
461 | void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); | |
462 | void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); | |
463 | void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); | |
f1862127 | 464 | void (*h264_dct)(DCTELEM block[4][4]); |
ac223859 MN |
465 | void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); |
466 | void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); | |
467 | void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); | |
468 | void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); | |
059715a4 RE |
469 | |
470 | /* snow wavelet */ | |
d593e329 MN |
471 | void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); |
472 | void (*horizontal_compose97i)(IDWTELEM *b, int width); | |
9dd6c804 | 473 | void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); |
513fbd8e LM |
474 | |
475 | void (*prefetch)(void *mem, int stride, int h); | |
54009d42 MN |
476 | |
477 | void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | |
64db55ae | 478 | |
bf4f19dc | 479 | /* mlp/truehd functions */ |
13bd2044 RP |
480 | void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff, |
481 | int firorder, int iirorder, | |
bf4f19dc RP |
482 | unsigned int filter_shift, int32_t mask, int blocksize, |
483 | int32_t *sample_buffer); | |
484 | ||
64db55ae KS |
485 | /* vc1 functions */ |
486 | void (*vc1_inv_trans_8x8)(DCTELEM *b); | |
d2e45f33 KS |
487 | void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block); |
488 | void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block); | |
489 | void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block); | |
4f717c69 JGG |
490 | void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block); |
491 | void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block); | |
492 | void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block); | |
493 | void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block); | |
61f5b14a KS |
494 | void (*vc1_v_overlap)(uint8_t* src, int stride); |
495 | void (*vc1_h_overlap)(uint8_t* src, int stride); | |
3992526b DC |
496 | void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq); |
497 | void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq); | |
498 | void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq); | |
499 | void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq); | |
500 | void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq); | |
501 | void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq); | |
64db55ae KS |
502 | /* put 8x8 block with bicubic interpolation and quarterpel precision |
503 | * last argument is actually round value instead of height | |
504 | */ | |
505 | op_pixels_func put_vc1_mspel_pixels_tab[16]; | |
6cecd630 | 506 | op_pixels_func avg_vc1_mspel_pixels_tab[16]; |
9abc7e0f MN |
507 | |
508 | /* intrax8 functions */ | |
8a43317e DB |
509 | void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize); |
510 | void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize, | |
9abc7e0f MN |
511 | int * range, int * sum, int edges); |
512 | ||
88c0536a KS |
513 | /* ape functions */ |
514 | /** | |
515 | * Add contents of the second vector to the first one. | |
bb68f8a2 | 516 | * @param len length of vectors, should be multiple of 16 |
88c0536a KS |
517 | */ |
518 | void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); | |
519 | /** | |
520 | * Add contents of the second vector to the first one. | |
bb68f8a2 | 521 | * @param len length of vectors, should be multiple of 16 |
88c0536a KS |
522 | */ |
523 | void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); | |
524 | /** | |
525 | * Calculate scalar product of two vectors. | |
bb68f8a2 | 526 | * @param len length of vectors, should be multiple of 16 |
88c0536a KS |
527 | * @param shift number of bits to discard from product |
528 | */ | |
529 | int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift); | |
2d8a0815 | 530 | |
11c0f9ec KS |
531 | /* rv30 functions */ |
532 | qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; | |
533 | qpel_mc_func avg_rv30_tpel_pixels_tab[4][16]; | |
534 | ||
2d8a0815 KS |
535 | /* rv40 functions */ |
536 | qpel_mc_func put_rv40_qpel_pixels_tab[4][16]; | |
537 | qpel_mc_func avg_rv40_qpel_pixels_tab[4][16]; | |
538 | h264_chroma_mc_func put_rv40_chroma_pixels_tab[3]; | |
539 | h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3]; | |
eb4b3dd3 ZK |
540 | } DSPContext; |
541 | ||
486497e0 | 542 | void dsputil_static_init(void); |
b0368839 | 543 | void dsputil_init(DSPContext* p, AVCodecContext *avctx); |
de6d9b64 | 544 | |
6dc7d5da MN |
545 | int ff_check_alignment(void); |
546 | ||
7801d21d MN |
547 | /** |
548 | * permute block according to permuatation. | |
549 | * @param last last non zero element in scantable order | |
550 | */ | |
0c1a9eda | 551 | void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last); |
e0eac44e | 552 | |
622348f9 MN |
553 | void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); |
554 | ||
bb270c08 | 555 | #define BYTE_VEC32(c) ((c)*0x01010101UL) |
d8085ea7 MN |
556 | |
557 | static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) | |
558 | { | |
559 | return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); | |
560 | } | |
561 | ||
562 | static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b) | |
563 | { | |
564 | return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); | |
565 | } | |
566 | ||
26efc54e MN |
567 | static inline int get_penalty_factor(int lambda, int lambda2, int type){ |
568 | switch(type&0xFF){ | |
569 | default: | |
570 | case FF_CMP_SAD: | |
571 | return lambda>>FF_LAMBDA_SHIFT; | |
572 | case FF_CMP_DCT: | |
573 | return (3*lambda)>>(FF_LAMBDA_SHIFT+1); | |
574 | case FF_CMP_W53: | |
575 | return (4*lambda)>>(FF_LAMBDA_SHIFT); | |
576 | case FF_CMP_W97: | |
577 | return (2*lambda)>>(FF_LAMBDA_SHIFT); | |
578 | case FF_CMP_SATD: | |
27c61ac5 | 579 | case FF_CMP_DCT264: |
26efc54e MN |
580 | return (2*lambda)>>FF_LAMBDA_SHIFT; |
581 | case FF_CMP_RD: | |
582 | case FF_CMP_PSNR: | |
583 | case FF_CMP_SSE: | |
584 | case FF_CMP_NSSE: | |
585 | return lambda2>>FF_LAMBDA_SHIFT; | |
586 | case FF_CMP_BIT: | |
587 | return 1; | |
588 | } | |
589 | } | |
590 | ||
24641185 | 591 | /** |
77c92c2d | 592 | * Empty mmx state. |
24641185 MN |
593 | * this must be called between any dsp function and float/double code. |
594 | * for example sin(); dsp->idct_put(); emms_c(); cos() | |
595 | */ | |
eb4b3dd3 ZK |
596 | #define emms_c() |
597 | ||
e629ab68 RD |
598 | /* should be defined by architectures supporting |
599 | one or more MultiMedia extension */ | |
600 | int mm_support(void); | |
e78516c6 | 601 | extern int mm_flags; |
e629ab68 | 602 | |
a1bee080 | 603 | void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); |
a2fc0f6a | 604 | void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx); |
a1bee080 MR |
605 | void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); |
606 | void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); | |
607 | void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); | |
608 | void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); | |
609 | void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); | |
610 | void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); | |
611 | void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); | |
612 | ||
43c7c7c7 | 613 | #define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v) |
5b1b0147 | 614 | #define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v) |
92a69cf8 | 615 | |
b250f9c6 | 616 | #if HAVE_MMX |
de6d9b64 | 617 | |
18f77016 | 618 | #undef emms_c |
eb4b3dd3 | 619 | |
de6d9b64 FB |
620 | static inline void emms(void) |
621 | { | |
be449fca | 622 | __asm__ volatile ("emms;":::"memory"); |
fb16b7e7 FB |
623 | } |
624 | ||
1457ab52 | 625 | |
fb16b7e7 FB |
626 | #define emms_c() \ |
627 | {\ | |
82d1605f | 628 | if (mm_flags & FF_MM_MMX)\ |
fb16b7e7 | 629 | emms();\ |
de6d9b64 FB |
630 | } |
631 | ||
b250f9c6 | 632 | #elif ARCH_ARM |
3d03c0a2 | 633 | |
b250f9c6 | 634 | #if HAVE_NEON |
f1424cfd MR |
635 | # define STRIDE_ALIGN 16 |
636 | #endif | |
637 | ||
b250f9c6 | 638 | #elif ARCH_PPC |
59925ef2 | 639 | |
3237f731 | 640 | #define STRIDE_ALIGN 16 |
59925ef2 | 641 | |
b250f9c6 | 642 | #elif HAVE_MMI |
d46aba26 | 643 | |
3237f731 | 644 | #define STRIDE_ALIGN 16 |
d46aba26 | 645 | |
f67a10cd AS |
646 | #else |
647 | ||
648 | #define mm_flags 0 | |
649 | #define mm_support() 0 | |
650 | ||
5c319d33 | 651 | #endif |
de6d9b64 | 652 | |
5c319d33 MR |
653 | #ifndef STRIDE_ALIGN |
654 | # define STRIDE_ALIGN 8 | |
de6d9b64 FB |
655 | #endif |
656 | ||
43f1708f | 657 | /* PSNR */ |
0c1a9eda | 658 | void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], |
43f1708f J |
659 | int orig_linesize[3], int coded_linesize, |
660 | AVCodecContext *avctx); | |
bb6f5690 FB |
661 | |
662 | /* FFT computation */ | |
663 | ||
664 | /* NOTE: soon integer code will be added, so you must use the | |
665 | FFTSample type */ | |
666 | typedef float FFTSample; | |
667 | ||
668 | typedef struct FFTComplex { | |
669 | FFTSample re, im; | |
670 | } FFTComplex; | |
671 | ||
672 | typedef struct FFTContext { | |
673 | int nbits; | |
674 | int inverse; | |
675 | uint16_t *revtab; | |
676 | FFTComplex *exptab; | |
677 | FFTComplex *exptab1; /* only used by SSE code */ | |
5d0ddd1a | 678 | FFTComplex *tmp_buf; |
01b22147 MR |
679 | int mdct_size; /* size of MDCT (i.e. number of input data * 2) */ |
680 | int mdct_bits; /* n = 2^nbits */ | |
681 | /* pre/post rotation tables */ | |
682 | FFTSample *tcos; | |
683 | FFTSample *tsin; | |
5d0ddd1a | 684 | void (*fft_permute)(struct FFTContext *s, FFTComplex *z); |
bb6f5690 | 685 | void (*fft_calc)(struct FFTContext *s, FFTComplex *z); |
01b22147 MR |
686 | void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input); |
687 | void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input); | |
688 | void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input); | |
f4863213 | 689 | int split_radix; |
94274b82 MR |
690 | int permutation; |
691 | #define FF_MDCT_PERM_NONE 0 | |
692 | #define FF_MDCT_PERM_INTERLEAVE 1 | |
bb6f5690 FB |
693 | } FFTContext; |
694 | ||
faca5619 | 695 | extern FFTSample* const ff_cos_tabs[13]; |
68602540 | 696 | |
8f05c995 AC |
697 | /** |
698 | * Sets up a complex FFT. | |
699 | * @param nbits log2 of the length of the input array | |
700 | * @param inverse if 0 perform the forward transform, if 1 perform the inverse | |
701 | */ | |
68951ecf | 702 | int ff_fft_init(FFTContext *s, int nbits, int inverse); |
5d0ddd1a | 703 | void ff_fft_permute_c(FFTContext *s, FFTComplex *z); |
68951ecf | 704 | void ff_fft_calc_c(FFTContext *s, FFTComplex *z); |
f4863213 MR |
705 | |
706 | void ff_fft_init_altivec(FFTContext *s); | |
707 | void ff_fft_init_mmx(FFTContext *s); | |
68336ea8 | 708 | void ff_fft_init_arm(FFTContext *s); |
8d268a7d | 709 | |
8f05c995 AC |
710 | /** |
711 | * Do the permutation needed BEFORE calling ff_fft_calc(). | |
712 | */ | |
5d0ddd1a LM |
713 | static inline void ff_fft_permute(FFTContext *s, FFTComplex *z) |
714 | { | |
715 | s->fft_permute(s, z); | |
716 | } | |
8f05c995 AC |
717 | /** |
718 | * Do a complex FFT with the parameters defined in ff_fft_init(). The | |
719 | * input data must be permuted before. No 1.0/sqrt(n) normalization is done. | |
720 | */ | |
68951ecf | 721 | static inline void ff_fft_calc(FFTContext *s, FFTComplex *z) |
bb6f5690 FB |
722 | { |
723 | s->fft_calc(s, z); | |
724 | } | |
68951ecf | 725 | void ff_fft_end(FFTContext *s); |
bb6f5690 FB |
726 | |
727 | /* MDCT computation */ | |
728 | ||
01b22147 | 729 | static inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input) |
d46ac5bf | 730 | { |
01b22147 | 731 | s->imdct_calc(s, output, input); |
d46ac5bf | 732 | } |
01b22147 | 733 | static inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input) |
d46ac5bf | 734 | { |
01b22147 | 735 | s->imdct_half(s, output, input); |
d46ac5bf LM |
736 | } |
737 | ||
01b22147 | 738 | static inline void ff_mdct_calc(FFTContext *s, FFTSample *output, |
46c32e26 MR |
739 | const FFTSample *input) |
740 | { | |
01b22147 | 741 | s->mdct_calc(s, output, input); |
46c32e26 MR |
742 | } |
743 | ||
4eb7a735 RS |
744 | /** |
745 | * Generate a Kaiser-Bessel Derived Window. | |
746 | * @param window pointer to half window | |
3ed546fe RS |
747 | * @param alpha determines window shape |
748 | * @param n size of half window | |
4eb7a735 | 749 | */ |
3ed546fe | 750 | void ff_kbd_window_init(float *window, float alpha, int n); |
4eb7a735 | 751 | |
9146e4d6 RS |
752 | /** |
753 | * Generate a sine window. | |
754 | * @param window pointer to half window | |
755 | * @param n size of half window | |
756 | */ | |
757 | void ff_sine_window_init(float *window, int n); | |
6776061b VS |
758 | extern float ff_sine_32 [ 32]; |
759 | extern float ff_sine_64 [ 64]; | |
69fc4da3 RS |
760 | extern float ff_sine_128 [ 128]; |
761 | extern float ff_sine_256 [ 256]; | |
762 | extern float ff_sine_512 [ 512]; | |
763 | extern float ff_sine_1024[1024]; | |
764 | extern float ff_sine_2048[2048]; | |
336c2ca5 | 765 | extern float ff_sine_4096[4096]; |
6776061b | 766 | extern float * const ff_sine_windows[13]; |
9146e4d6 | 767 | |
01b22147 MR |
768 | int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale); |
769 | void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input); | |
770 | void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input); | |
771 | void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input); | |
772 | void ff_mdct_end(FFTContext *s); | |
bb6f5690 | 773 | |
68602540 AC |
774 | /* Real Discrete Fourier Transform */ |
775 | ||
776 | enum RDFTransformType { | |
777 | RDFT, | |
778 | IRDFT, | |
779 | RIDFT, | |
780 | IRIDFT, | |
781 | }; | |
782 | ||
783 | typedef struct { | |
784 | int nbits; | |
785 | int inverse; | |
786 | int sign_convention; | |
787 | ||
788 | /* pre/post rotation tables */ | |
789 | FFTSample *tcos; | |
790 | FFTSample *tsin; | |
791 | FFTContext fft; | |
792 | } RDFTContext; | |
793 | ||
794 | /** | |
795 | * Sets up a real FFT. | |
796 | * @param nbits log2 of the length of the input array | |
797 | * @param trans the type of transform | |
798 | */ | |
799 | int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans); | |
800 | void ff_rdft_calc(RDFTContext *s, FFTSample *data); | |
801 | void ff_rdft_end(RDFTContext *s); | |
802 | ||
9fbd14ac | 803 | #define WRAPPER8_16(name8, name16)\ |
bb198e19 MN |
804 | static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ |
805 | return name8(s, dst , src , stride, h)\ | |
806 | +name8(s, dst+8 , src+8 , stride, h);\ | |
807 | } | |
808 | ||
9fbd14ac | 809 | #define WRAPPER8_16_SQ(name8, name16)\ |
bb198e19 MN |
810 | static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ |
811 | int score=0;\ | |
812 | score +=name8(s, dst , src , stride, 8);\ | |
813 | score +=name8(s, dst+8 , src+8 , stride, 8);\ | |
814 | if(h==16){\ | |
815 | dst += 8*stride;\ | |
816 | src += 8*stride;\ | |
817 | score +=name8(s, dst , src , stride, 8);\ | |
818 | score +=name8(s, dst+8 , src+8 , stride, 8);\ | |
819 | }\ | |
820 | return score;\ | |
1457ab52 MN |
821 | } |
822 | ||
49cef744 | 823 | |
184fcc60 | 824 | static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
49cef744 BF |
825 | { |
826 | int i; | |
827 | for(i=0; i<h; i++) | |
828 | { | |
905694d9 | 829 | AV_WN16(dst , AV_RN16(src )); |
49cef744 BF |
830 | dst+=dstStride; |
831 | src+=srcStride; | |
832 | } | |
833 | } | |
834 | ||
184fcc60 | 835 | static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
49cef744 BF |
836 | { |
837 | int i; | |
838 | for(i=0; i<h; i++) | |
839 | { | |
905694d9 | 840 | AV_WN32(dst , AV_RN32(src )); |
49cef744 BF |
841 | dst+=dstStride; |
842 | src+=srcStride; | |
843 | } | |
844 | } | |
845 | ||
184fcc60 | 846 | static inline void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
49cef744 BF |
847 | { |
848 | int i; | |
849 | for(i=0; i<h; i++) | |
850 | { | |
905694d9 RS |
851 | AV_WN32(dst , AV_RN32(src )); |
852 | AV_WN32(dst+4 , AV_RN32(src+4 )); | |
49cef744 BF |
853 | dst+=dstStride; |
854 | src+=srcStride; | |
855 | } | |
856 | } | |
857 | ||
184fcc60 | 858 | static inline void copy_block9(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
49cef744 BF |
859 | { |
860 | int i; | |
861 | for(i=0; i<h; i++) | |
862 | { | |
905694d9 RS |
863 | AV_WN32(dst , AV_RN32(src )); |
864 | AV_WN32(dst+4 , AV_RN32(src+4 )); | |
49cef744 BF |
865 | dst[8]= src[8]; |
866 | dst+=dstStride; | |
867 | src+=srcStride; | |
868 | } | |
869 | } | |
870 | ||
184fcc60 | 871 | static inline void copy_block16(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
49cef744 BF |
872 | { |
873 | int i; | |
874 | for(i=0; i<h; i++) | |
875 | { | |
905694d9 RS |
876 | AV_WN32(dst , AV_RN32(src )); |
877 | AV_WN32(dst+4 , AV_RN32(src+4 )); | |
878 | AV_WN32(dst+8 , AV_RN32(src+8 )); | |
879 | AV_WN32(dst+12, AV_RN32(src+12)); | |
49cef744 BF |
880 | dst+=dstStride; |
881 | src+=srcStride; | |
882 | } | |
883 | } | |
884 | ||
184fcc60 | 885 | static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
49cef744 BF |
886 | { |
887 | int i; | |
888 | for(i=0; i<h; i++) | |
889 | { | |
905694d9 RS |
890 | AV_WN32(dst , AV_RN32(src )); |
891 | AV_WN32(dst+4 , AV_RN32(src+4 )); | |
892 | AV_WN32(dst+8 , AV_RN32(src+8 )); | |
893 | AV_WN32(dst+12, AV_RN32(src+12)); | |
49cef744 BF |
894 | dst[16]= src[16]; |
895 | dst+=dstStride; | |
896 | src+=srcStride; | |
897 | } | |
898 | } | |
899 | ||
98790382 | 900 | #endif /* AVCODEC_DSPUTIL_H */ |