arm optimizations
[libav.git] / libavcodec / dsputil.h
1 #ifndef DSPUTIL_H
2 #define DSPUTIL_H
3
4 #include "common.h"
5
6 /* dct code */
7 typedef short DCTELEM;
8
9 void jpeg_fdct_ifast (DCTELEM *data);
10
11 void j_rev_dct (DCTELEM *data);
12
13 void fdct_mmx(DCTELEM *block);
14
15 void (*av_fdct)(DCTELEM *block);
16
17 /* encoding scans */
18 extern UINT8 ff_alternate_horizontal_scan[64];
19 extern UINT8 ff_alternate_vertical_scan[64];
20 extern UINT8 zigzag_direct[64];
21
22 /* pixel operations */
23 #define MAX_NEG_CROP 384
24
25 /* temporary */
26 extern UINT32 squareTbl[512];
27 extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
28
29 void dsputil_init(void);
30
31 /* pixel ops : interface with DCT */
32
33 extern void (*ff_idct)(DCTELEM *block);
34 extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
35 extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
36 extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
37
38 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
39 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
40 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
41
42 /* add and put pixel (decoding) */
43 typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h);
44
45 extern op_pixels_func put_pixels_tab[4];
46 extern op_pixels_func avg_pixels_tab[4];
47 extern op_pixels_func put_no_rnd_pixels_tab[4];
48 extern op_pixels_func avg_no_rnd_pixels_tab[4];
49
50 /* sub pixel (encoding) */
51 extern void (*sub_pixels_tab[4])(DCTELEM *block, const UINT8 *pixels, int line_size, int h);
52
53 #define sub_pixels_2(block, pixels, line_size, dxy) \
54 sub_pixels_tab[dxy](block, pixels, line_size, 8)
55
56 /* motion estimation */
57
58 typedef int (*op_pixels_abs_func)(UINT8 *blk1, UINT8 *blk2, int line_size, int h);
59
60 extern op_pixels_abs_func pix_abs16x16;
61 extern op_pixels_abs_func pix_abs16x16_x2;
62 extern op_pixels_abs_func pix_abs16x16_y2;
63 extern op_pixels_abs_func pix_abs16x16_xy2;
64
65 int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
66 int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
67 int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
68 int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
69
70 static inline int block_permute_op(int j)
71 {
72 return (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
73 }
74
75 void block_permute(INT16 *block);
76
77 #if defined(HAVE_MMX)
78
79 #define MM_MMX 0x0001 /* standard MMX */
80 #define MM_3DNOW 0x0004 /* AMD 3DNOW */
81 #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
82 #define MM_SSE 0x0008 /* SSE functions */
83 #define MM_SSE2 0x0010 /* PIV SSE2 functions */
84
85 extern int mm_flags;
86
87 int mm_support(void);
88
89 static inline void emms(void)
90 {
91 __asm __volatile ("emms;":::"memory");
92 }
93
94 #define emms_c() \
95 {\
96 if (mm_flags & MM_MMX)\
97 emms();\
98 }
99
100 #define __align8 __attribute__ ((aligned (8)))
101
102 void dsputil_init_mmx(void);
103
104 #elif defined(ARCH_ARMV4L)
105
106 #define emms_c()
107
108 /* This is to use 4 bytes read to the IDCT pointers for some 'zero'
109 line ptimizations */
110 #define __align8 __attribute__ ((aligned (4)))
111
112 void dsputil_init_armv4l(void);
113
114 #else
115
116 #define emms_c()
117
118 #define __align8
119
120 #endif
121
122 #endif