Commit | Line | Data |
---|---|---|
ff4ec49e FB |
1 | /* |
2 | * DSP utils | |
3 | * Copyright (c) 2000, 2001, 2002 Fabrice Bellard. | |
4 | * | |
5 | * This library is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU Lesser General Public | |
7 | * License as published by the Free Software Foundation; either | |
8 | * version 2 of the License, or (at your option) any later version. | |
9 | * | |
10 | * This library is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * Lesser General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Lesser General Public | |
16 | * License along with this library; if not, write to the Free Software | |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 | */ | |
de6d9b64 FB |
19 | #ifndef DSPUTIL_H |
20 | #define DSPUTIL_H | |
21 | ||
22 | #include "common.h" | |
43f1708f | 23 | #include "avcodec.h" |
de6d9b64 | 24 | |
44eb4951 | 25 | //#define DEBUG |
de6d9b64 FB |
26 | /* dct code */ |
27 | typedef short DCTELEM; | |
28 | ||
03c94ede | 29 | void fdct_ifast (DCTELEM *data); |
28db7fce | 30 | void ff_jpeg_fdct_islow (DCTELEM *data); |
de6d9b64 FB |
31 | |
32 | void j_rev_dct (DCTELEM *data); | |
33 | ||
3f09f52a | 34 | void ff_fdct_mmx(DCTELEM *block); |
de6d9b64 | 35 | |
e0eac44e | 36 | /* encoding scans */ |
2ad1516a MN |
37 | extern const UINT8 ff_alternate_horizontal_scan[64]; |
38 | extern const UINT8 ff_alternate_vertical_scan[64]; | |
39 | extern const UINT8 ff_zigzag_direct[64]; | |
5a240838 | 40 | |
de6d9b64 FB |
41 | /* pixel operations */ |
42 | #define MAX_NEG_CROP 384 | |
43 | ||
44 | /* temporary */ | |
45 | extern UINT32 squareTbl[512]; | |
0cfa9713 | 46 | extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
de6d9b64 FB |
47 | |
48 | void dsputil_init(void); | |
49 | ||
b7c27ee6 MN |
50 | /* minimum alignment rules ;) |
51 | if u notice errors in the align stuff, need more alignment for some asm code for some cpu | |
52 | or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... | |
53 | ||
54 | !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible) | |
55 | i (michael) didnt check them, these are just the alignents which i think could be reached easily ... | |
de6d9b64 | 56 | |
b7c27ee6 MN |
57 | !future video codecs might need functions with less strict alignment |
58 | */ | |
59 | ||
60 | /* pixel ops : interface with DCT */ | |
b7c27ee6 MN |
61 | extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); |
62 | extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); | |
63 | extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |
64 | extern void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |
073b013d MN |
65 | extern void (*ff_gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); |
66 | extern void (*ff_gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy, | |
67 | int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | |
b7c27ee6 | 68 | extern void (*clear_blocks)(DCTELEM *blocks/*align 16*/); |
3aa102be MN |
69 | extern int (*pix_sum)(UINT8 * pix, int line_size); |
70 | extern int (*pix_norm1)(UINT8 * pix, int line_size); | |
71 | ||
44eb4951 | 72 | |
de6d9b64 FB |
73 | |
74 | void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); | |
9dbcbd92 | 75 | void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); |
de6d9b64 FB |
76 | void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); |
77 | void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); | |
649c00c9 | 78 | void clear_blocks_c(DCTELEM *blocks); |
de6d9b64 FB |
79 | |
80 | /* add and put pixel (decoding) */ | |
b7c27ee6 MN |
81 | // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 |
82 | typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h); | |
83 | typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride); | |
b3184779 MN |
84 | |
85 | extern op_pixels_func put_pixels_tab[2][4]; | |
86 | extern op_pixels_func avg_pixels_tab[2][4]; | |
87 | extern op_pixels_func put_no_rnd_pixels_tab[2][4]; | |
88 | extern op_pixels_func avg_no_rnd_pixels_tab[2][4]; | |
89 | extern qpel_mc_func put_qpel_pixels_tab[2][16]; | |
90 | extern qpel_mc_func avg_qpel_pixels_tab[2][16]; | |
91 | extern qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | |
92 | extern qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; | |
93 | ||
94 | #define CALL_2X_PIXELS(a, b, n)\ | |
95 | static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
96 | b(block , pixels , line_size, h);\ | |
97 | b(block+n, pixels+n, line_size, h);\ | |
98 | } | |
44eb4951 | 99 | |
de6d9b64 FB |
100 | /* motion estimation */ |
101 | ||
b7c27ee6 | 102 | typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size); |
de6d9b64 FB |
103 | |
104 | extern op_pixels_abs_func pix_abs16x16; | |
105 | extern op_pixels_abs_func pix_abs16x16_x2; | |
106 | extern op_pixels_abs_func pix_abs16x16_y2; | |
107 | extern op_pixels_abs_func pix_abs16x16_xy2; | |
ba6802de MN |
108 | extern op_pixels_abs_func pix_abs8x8; |
109 | extern op_pixels_abs_func pix_abs8x8_x2; | |
110 | extern op_pixels_abs_func pix_abs8x8_y2; | |
111 | extern op_pixels_abs_func pix_abs8x8_xy2; | |
112 | ||
113 | int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); | |
114 | int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |
115 | int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |
116 | int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |
de6d9b64 | 117 | |
2ad1516a | 118 | void block_permute(INT16 *block, UINT8 *permutation); |
e0eac44e | 119 | |
3d03c0a2 | 120 | #if defined(HAVE_MMX) |
de6d9b64 FB |
121 | |
122 | #define MM_MMX 0x0001 /* standard MMX */ | |
123 | #define MM_3DNOW 0x0004 /* AMD 3DNOW */ | |
124 | #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ | |
125 | #define MM_SSE 0x0008 /* SSE functions */ | |
126 | #define MM_SSE2 0x0010 /* PIV SSE2 functions */ | |
127 | ||
128 | extern int mm_flags; | |
129 | ||
130 | int mm_support(void); | |
131 | ||
132 | static inline void emms(void) | |
133 | { | |
fb16b7e7 FB |
134 | __asm __volatile ("emms;":::"memory"); |
135 | } | |
136 | ||
137 | #define emms_c() \ | |
138 | {\ | |
139 | if (mm_flags & MM_MMX)\ | |
140 | emms();\ | |
de6d9b64 FB |
141 | } |
142 | ||
143 | #define __align8 __attribute__ ((aligned (8))) | |
144 | ||
145 | void dsputil_init_mmx(void); | |
57060b1e | 146 | void dsputil_set_bit_exact_mmx(void); |
de6d9b64 | 147 | |
3d03c0a2 FB |
148 | #elif defined(ARCH_ARMV4L) |
149 | ||
150 | #define emms_c() | |
151 | ||
152 | /* This is to use 4 bytes read to the IDCT pointers for some 'zero' | |
153 | line ptimizations */ | |
154 | #define __align8 __attribute__ ((aligned (4))) | |
155 | ||
156 | void dsputil_init_armv4l(void); | |
157 | ||
c34270f5 FB |
158 | #elif defined(HAVE_MLIB) |
159 | ||
160 | #define emms_c() | |
161 | ||
162 | /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ | |
163 | #define __align8 __attribute__ ((aligned (8))) | |
164 | ||
165 | void dsputil_init_mlib(void); | |
166 | ||
1e98dffb NK |
167 | #elif defined(ARCH_ALPHA) |
168 | ||
169 | #define emms_c() | |
170 | #define __align8 __attribute__ ((aligned (8))) | |
171 | ||
172 | void dsputil_init_alpha(void); | |
173 | ||
59925ef2 BF |
174 | #elif defined(ARCH_POWERPC) |
175 | ||
176 | #define emms_c() | |
177 | #define __align8 __attribute__ ((aligned (16))) | |
178 | ||
ab6c65f6 | 179 | void dsputil_init_ppc(void); |
59925ef2 | 180 | |
d46aba26 LS |
181 | #elif defined(HAVE_MMI) |
182 | ||
183 | #define emms_c() | |
184 | ||
185 | #define __align8 __attribute__ ((aligned (16))) | |
186 | ||
187 | void dsputil_init_mmi(void); | |
188 | ||
de6d9b64 FB |
189 | #else |
190 | ||
fb16b7e7 FB |
191 | #define emms_c() |
192 | ||
de6d9b64 FB |
193 | #define __align8 |
194 | ||
195 | #endif | |
196 | ||
6d4985bb FB |
197 | #ifdef __GNUC__ |
198 | ||
199 | struct unaligned_64 { uint64_t l; } __attribute__((packed)); | |
200 | struct unaligned_32 { uint32_t l; } __attribute__((packed)); | |
201 | ||
202 | #define LD32(a) (((const struct unaligned_32 *) (a))->l) | |
203 | #define LD64(a) (((const struct unaligned_64 *) (a))->l) | |
204 | ||
205 | #define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b) | |
206 | ||
207 | #else /* __GNUC__ */ | |
208 | ||
209 | #define LD32(a) (*((uint32_t*)(a))) | |
210 | #define LD64(a) (*((uint64_t*)(a))) | |
211 | ||
212 | #define ST32(a, b) *((uint32_t*)(a)) = (b) | |
213 | ||
214 | #endif /* !__GNUC__ */ | |
215 | ||
43f1708f J |
216 | /* PSNR */ |
217 | void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], | |
218 | int orig_linesize[3], int coded_linesize, | |
219 | AVCodecContext *avctx); | |
220 | ||
de6d9b64 | 221 | #endif |