Commit | Line | Data |
---|---|---|
ff4ec49e FB |
1 | /* |
2 | * DSP utils | |
3 | * Copyright (c) 2000, 2001, 2002 Fabrice Bellard. | |
4 | * | |
5 | * This library is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU Lesser General Public | |
7 | * License as published by the Free Software Foundation; either | |
8 | * version 2 of the License, or (at your option) any later version. | |
9 | * | |
10 | * This library is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * Lesser General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Lesser General Public | |
16 | * License along with this library; if not, write to the Free Software | |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 | */ | |
de6d9b64 FB |
19 | #ifndef DSPUTIL_H |
20 | #define DSPUTIL_H | |
21 | ||
22 | #include "common.h" | |
43f1708f | 23 | #include "avcodec.h" |
de6d9b64 | 24 | |
44eb4951 | 25 | //#define DEBUG |
de6d9b64 FB |
26 | /* dct code */ |
27 | typedef short DCTELEM; | |
28 | ||
03c94ede | 29 | void fdct_ifast (DCTELEM *data); |
28db7fce | 30 | void ff_jpeg_fdct_islow (DCTELEM *data); |
de6d9b64 FB |
31 | |
32 | void j_rev_dct (DCTELEM *data); | |
33 | ||
3f09f52a | 34 | void ff_fdct_mmx(DCTELEM *block); |
de6d9b64 | 35 | |
e0eac44e FB |
36 | /* encoding scans */ |
37 | extern UINT8 ff_alternate_horizontal_scan[64]; | |
38 | extern UINT8 ff_alternate_vertical_scan[64]; | |
39 | extern UINT8 zigzag_direct[64]; | |
40 | ||
5a240838 MN |
41 | /* permutation table */ |
42 | extern UINT8 permutation[64]; | |
43 | ||
de6d9b64 FB |
44 | /* pixel operations */ |
45 | #define MAX_NEG_CROP 384 | |
46 | ||
47 | /* temporary */ | |
48 | extern UINT32 squareTbl[512]; | |
0cfa9713 | 49 | extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
de6d9b64 FB |
50 | |
51 | void dsputil_init(void); | |
52 | ||
b7c27ee6 MN |
53 | /* minimum alignment rules ;) |
54 | if u notice errors in the align stuff, need more alignment for some asm code for some cpu | |
55 | or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... | |
56 | ||
57 | !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible) | |
58 | i (michael) didnt check them, these are just the alignents which i think could be reached easily ... | |
de6d9b64 | 59 | |
b7c27ee6 MN |
60 | !future video codecs might need functions with less strict alignment |
61 | */ | |
62 | ||
63 | /* pixel ops : interface with DCT */ | |
64 | extern void (*ff_idct)(DCTELEM *block/*align 16*/); | |
65 | extern void (*ff_idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |
66 | extern void (*ff_idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |
67 | extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); | |
68 | extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); | |
69 | extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |
70 | extern void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |
71 | extern void (*gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); | |
72 | extern void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | |
3aa102be MN |
73 | extern int (*pix_sum)(UINT8 * pix, int line_size); |
74 | extern int (*pix_norm1)(UINT8 * pix, int line_size); | |
75 | ||
44eb4951 | 76 | |
de6d9b64 FB |
77 | |
78 | void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); | |
9dbcbd92 | 79 | void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); |
de6d9b64 FB |
80 | void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); |
81 | void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); | |
649c00c9 | 82 | void clear_blocks_c(DCTELEM *blocks); |
de6d9b64 FB |
83 | |
84 | /* add and put pixel (decoding) */ | |
b7c27ee6 MN |
85 | // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 |
86 | typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h); | |
87 | typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride); | |
b3184779 MN |
88 | |
89 | extern op_pixels_func put_pixels_tab[2][4]; | |
90 | extern op_pixels_func avg_pixels_tab[2][4]; | |
91 | extern op_pixels_func put_no_rnd_pixels_tab[2][4]; | |
92 | extern op_pixels_func avg_no_rnd_pixels_tab[2][4]; | |
93 | extern qpel_mc_func put_qpel_pixels_tab[2][16]; | |
94 | extern qpel_mc_func avg_qpel_pixels_tab[2][16]; | |
95 | extern qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | |
96 | extern qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; | |
97 | ||
98 | #define CALL_2X_PIXELS(a, b, n)\ | |
99 | static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
100 | b(block , pixels , line_size, h);\ | |
101 | b(block+n, pixels+n, line_size, h);\ | |
102 | } | |
44eb4951 | 103 | |
de6d9b64 FB |
104 | /* motion estimation */ |
105 | ||
b7c27ee6 | 106 | typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size); |
de6d9b64 FB |
107 | |
108 | extern op_pixels_abs_func pix_abs16x16; | |
109 | extern op_pixels_abs_func pix_abs16x16_x2; | |
110 | extern op_pixels_abs_func pix_abs16x16_y2; | |
111 | extern op_pixels_abs_func pix_abs16x16_xy2; | |
ba6802de MN |
112 | extern op_pixels_abs_func pix_abs8x8; |
113 | extern op_pixels_abs_func pix_abs8x8_x2; | |
114 | extern op_pixels_abs_func pix_abs8x8_y2; | |
115 | extern op_pixels_abs_func pix_abs8x8_xy2; | |
116 | ||
117 | int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); | |
118 | int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |
119 | int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |
120 | int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |
de6d9b64 | 121 | |
e0eac44e FB |
122 | static inline int block_permute_op(int j) |
123 | { | |
5a240838 | 124 | return permutation[j]; |
e0eac44e FB |
125 | } |
126 | ||
127 | void block_permute(INT16 *block); | |
128 | ||
3d03c0a2 | 129 | #if defined(HAVE_MMX) |
de6d9b64 FB |
130 | |
131 | #define MM_MMX 0x0001 /* standard MMX */ | |
132 | #define MM_3DNOW 0x0004 /* AMD 3DNOW */ | |
133 | #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ | |
134 | #define MM_SSE 0x0008 /* SSE functions */ | |
135 | #define MM_SSE2 0x0010 /* PIV SSE2 functions */ | |
136 | ||
137 | extern int mm_flags; | |
138 | ||
139 | int mm_support(void); | |
140 | ||
141 | static inline void emms(void) | |
142 | { | |
fb16b7e7 FB |
143 | __asm __volatile ("emms;":::"memory"); |
144 | } | |
145 | ||
146 | #define emms_c() \ | |
147 | {\ | |
148 | if (mm_flags & MM_MMX)\ | |
149 | emms();\ | |
de6d9b64 FB |
150 | } |
151 | ||
152 | #define __align8 __attribute__ ((aligned (8))) | |
153 | ||
154 | void dsputil_init_mmx(void); | |
57060b1e | 155 | void dsputil_set_bit_exact_mmx(void); |
de6d9b64 | 156 | |
3d03c0a2 FB |
157 | #elif defined(ARCH_ARMV4L) |
158 | ||
159 | #define emms_c() | |
160 | ||
161 | /* This is to use 4 bytes read to the IDCT pointers for some 'zero' | |
162 | line ptimizations */ | |
163 | #define __align8 __attribute__ ((aligned (4))) | |
164 | ||
165 | void dsputil_init_armv4l(void); | |
166 | ||
c34270f5 FB |
167 | #elif defined(HAVE_MLIB) |
168 | ||
169 | #define emms_c() | |
170 | ||
171 | /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ | |
172 | #define __align8 __attribute__ ((aligned (8))) | |
173 | ||
174 | void dsputil_init_mlib(void); | |
175 | ||
1e98dffb NK |
176 | #elif defined(ARCH_ALPHA) |
177 | ||
178 | #define emms_c() | |
179 | #define __align8 __attribute__ ((aligned (8))) | |
180 | ||
181 | void dsputil_init_alpha(void); | |
182 | ||
59925ef2 BF |
183 | #elif defined(ARCH_POWERPC) |
184 | ||
185 | #define emms_c() | |
186 | #define __align8 __attribute__ ((aligned (16))) | |
187 | ||
ab6c65f6 | 188 | void dsputil_init_ppc(void); |
59925ef2 | 189 | |
de6d9b64 FB |
190 | #else |
191 | ||
fb16b7e7 FB |
192 | #define emms_c() |
193 | ||
de6d9b64 FB |
194 | #define __align8 |
195 | ||
196 | #endif | |
197 | ||
6d4985bb FB |
198 | #ifdef __GNUC__ |
199 | ||
200 | struct unaligned_64 { uint64_t l; } __attribute__((packed)); | |
201 | struct unaligned_32 { uint32_t l; } __attribute__((packed)); | |
202 | ||
203 | #define LD32(a) (((const struct unaligned_32 *) (a))->l) | |
204 | #define LD64(a) (((const struct unaligned_64 *) (a))->l) | |
205 | ||
206 | #define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b) | |
207 | ||
208 | #else /* __GNUC__ */ | |
209 | ||
210 | #define LD32(a) (*((uint32_t*)(a))) | |
211 | #define LD64(a) (*((uint64_t*)(a))) | |
212 | ||
213 | #define ST32(a, b) *((uint32_t*)(a)) = (b) | |
214 | ||
215 | #endif /* !__GNUC__ */ | |
216 | ||
43f1708f J |
217 | /* PSNR */ |
218 | void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], | |
219 | int orig_linesize[3], int coded_linesize, | |
220 | AVCodecContext *avctx); | |
221 | ||
de6d9b64 | 222 | #endif |