aacps: Adjust some const qualifiers to suppress warnings
[libav.git] / libavcodec / dsputil.h
CommitLineData
ff4ec49e
FB
1/*
2 * DSP utils
406792e7 3 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
8f2ab833 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
ff4ec49e 5 *
2912e87a 6 * This file is part of Libav.
b78e7197 7 *
2912e87a 8 * Libav is free software; you can redistribute it and/or
ff4ec49e
FB
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
b78e7197 11 * version 2.1 of the License, or (at your option) any later version.
ff4ec49e 12 *
2912e87a 13 * Libav is distributed in the hope that it will be useful,
ff4ec49e
FB
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
2912e87a 19 * License along with Libav; if not, write to the Free Software
5509bffa 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
ff4ec49e 21 */
24641185
MN
22
23/**
ba87f080 24 * @file
983e3246 25 * DSP utils.
5755c27f
MN
26 * note, many functions in here may use MMX which trashes the FPU state, it is
27 * absolutely necessary to call emms_c() between dsp & float/double code
24641185
MN
28 */
29
98790382
SS
30#ifndef AVCODEC_DSPUTIL_H
31#define AVCODEC_DSPUTIL_H
de6d9b64 32
6a5d31ac 33#include "libavutil/intreadwrite.h"
43f1708f 34#include "avcodec.h"
68d8238c 35#include "rnd_avg.h"
de6d9b64 36
e0eac44e 37/* encoding scans */
0c1a9eda
ZK
38extern const uint8_t ff_alternate_horizontal_scan[64];
39extern const uint8_t ff_alternate_vertical_scan[64];
40extern const uint8_t ff_zigzag_direct[64];
10acc479 41extern const uint8_t ff_zigzag248_direct[64];
5a240838 42
de6d9b64 43/* pixel operations */
f2e92ef2 44#define MAX_NEG_CROP 1024
de6d9b64
FB
45
46/* temporary */
1d503957 47extern uint32_t ff_squareTbl[512];
d2ec6ea6 48extern const uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
de6d9b64 49
6d25c9db
RB
50void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
51void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
52void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
53void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
3d1b1caa 54
d241f51e 55/* RV40 functions */
a8b60158
LB
56void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
57void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
58void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
59void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
d241f51e 60
703c8195
LM
61void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
62 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
54009d42 63
b7c27ee6 64/* minimum alignment rules ;)
7ce68923
DB
65If you notice errors in the align stuff, need more alignment for some ASM code
66for some CPU or need to use a function with less aligned data then send a mail
21de9204 67to the libav-devel mailing list, ...
7ce68923
DB
68
69!warning These alignments might not match reality, (missing attribute((align))
70stuff somewhere possible).
2cab6401 71I (Michael) did not check them, these are just the alignments which I think
7ce68923 72could be reached easily ...
de6d9b64 73
b7c27ee6
MN
74!future video codecs might need functions with less strict alignment
75*/
76
de6d9b64 77/* add and put pixel (decoding) */
b7c27ee6 78// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
be73d76b 79//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller than 4
669ac79c 80typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
a8b60158 81typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, ptrdiff_t stride);
b3184779 82
342c7dfd
KS
83typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h);
84
db794953 85#define DEF_OLD_QPEL(name)\
a8b60158
LB
86void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, ptrdiff_t stride);\
87void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, ptrdiff_t stride);\
88void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, ptrdiff_t stride);
db794953
MN
89
90DEF_OLD_QPEL(qpel16_mc11_old_c)
91DEF_OLD_QPEL(qpel16_mc31_old_c)
92DEF_OLD_QPEL(qpel16_mc12_old_c)
93DEF_OLD_QPEL(qpel16_mc32_old_c)
94DEF_OLD_QPEL(qpel16_mc13_old_c)
95DEF_OLD_QPEL(qpel16_mc33_old_c)
96DEF_OLD_QPEL(qpel8_mc11_old_c)
97DEF_OLD_QPEL(qpel8_mc31_old_c)
98DEF_OLD_QPEL(qpel8_mc12_old_c)
99DEF_OLD_QPEL(qpel8_mc32_old_c)
100DEF_OLD_QPEL(qpel8_mc13_old_c)
101DEF_OLD_QPEL(qpel8_mc33_old_c)
b3184779 102
de6d9b64 103/* motion estimation */
be73d76b 104// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller than 2
755bfeab 105// although currently h<4 is not used as functions with width <8 are neither used nor implemented
bb198e19 106typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
1457ab52 107
24641185 108/**
4c79b95c
AJ
109 * Scantable.
110 */
111typedef struct ScanTable{
112 const uint8_t *scantable;
113 uint8_t permutated[64];
114 uint8_t raster_end[64];
4c79b95c
AJ
115} ScanTable;
116
117void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
92fb52d9
RB
118void ff_init_scantable_permutation(uint8_t *idct_permutation,
119 int idct_permutation_type);
4c79b95c
AJ
120
121/**
24641185
MN
122 * DSPContext.
123 */
eb4b3dd3
ZK
124typedef struct DSPContext {
125 /* pixel ops : interface with DCT */
88bd7fdc
DB
126 void (*get_pixels)(int16_t *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
127 void (*diff_pixels)(int16_t *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
128 void (*put_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
129 void (*put_signed_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
130 void (*add_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
131 void (*add_pixels8)(uint8_t *pixels, int16_t *block, int line_size);
88bd7fdc 132 int (*sum_abs_dctelem)(int16_t *block/*align 16*/);
d518aebd
MN
133 /**
134 * translational global motion compensation.
135 */
0c1a9eda 136 void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
d518aebd
MN
137 /**
138 * global motion compensation.
139 */
0c1a9eda 140 void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
bb270c08 141 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
88bd7fdc
DB
142 void (*clear_block)(int16_t *block/*align 16*/);
143 void (*clear_blocks)(int16_t *blocks/*align 16*/);
0c1a9eda
ZK
144 int (*pix_sum)(uint8_t * pix, int line_size);
145 int (*pix_norm1)(uint8_t * pix, int line_size);
bb198e19 146// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
115329f1 147
3899eb2f
RS
148 me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
149 me_cmp_func sse[6];
150 me_cmp_func hadamard8_diff[6];
151 me_cmp_func dct_sad[6];
152 me_cmp_func quant_psnr[6];
153 me_cmp_func bit[6];
154 me_cmp_func rd[6];
155 me_cmp_func vsad[6];
156 me_cmp_func vsse[6];
157 me_cmp_func nsse[6];
3899eb2f
RS
158 me_cmp_func dct_max[6];
159 me_cmp_func dct264_sad[6];
160
161 me_cmp_func me_pre_cmp[6];
162 me_cmp_func me_cmp[6];
163 me_cmp_func me_sub_cmp[6];
164 me_cmp_func mb_cmp[6];
165 me_cmp_func ildct_cmp[6]; //only width 16 used
166 me_cmp_func frame_skip_cmp[6]; //only width 8 used
eb4b3dd3 167
a00177a9
MR
168 int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
169 int size);
59006372 170
d518aebd 171 /**
669ac79c 172 * Thirdpel motion compensation with rounding (a+b+1)>>1.
30f15053
LB
173 * this is an array[12] of motion compensation functions for the 9 thirdpe
174 * positions<br>
669ac79c
MN
175 * *pixels_tab[ xthirdpel + 4*ythirdpel ]
176 * @param block destination where the result is stored
177 * @param pixels source
178 * @param line_size number of bytes in a horizontal line of block
179 * @param h height
180 */
181 tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
da3b9756
MM
182 tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
183
eb4b3dd3
ZK
184 qpel_mc_func put_qpel_pixels_tab[2][16];
185 qpel_mc_func avg_qpel_pixels_tab[2][16];
186 qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
1457ab52 187 qpel_mc_func put_mspel_pixels_tab[8];
115329f1 188
bb198e19 189 me_cmp_func pix_abs[2][4];
115329f1 190
11f18faf 191 /* huffyuv specific */
11f18faf 192 void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
1457ab52 193 void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
84705403
MN
194 /**
195 * subtract huffyuv's variant of median prediction
196 * note, this might read from src1[-1], src2[-1]
197 */
e17ccf60
LM
198 void (*sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top);
199 void (*add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top);
2f77923d 200 int (*add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left);
f267d3ac 201 void (*add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha);
96711ecf 202 void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
381d37fd 203 void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);
42251a2a 204
911e21a3 205 /* assume len is a multiple of 8, and arrays are 16-byte aligned */
50e23ae9 206 void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
eb4825b5 207
b0368839 208 /* (I)DCT */
88bd7fdc
DB
209 void (*fdct)(int16_t *block/* align 16*/);
210 void (*fdct248)(int16_t *block/* align 16*/);
115329f1 211
4fb518c3 212 /* IDCT really*/
88bd7fdc 213 void (*idct)(int16_t *block/* align 16*/);
115329f1 214
24641185 215 /**
77c92c2d 216 * block -> idct -> clip to unsigned 8 bit -> dest.
24641185 217 * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
9846cbdb 218 * @param line_size size in bytes of a horizontal line of dest
24641185 219 */
88bd7fdc 220 void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, int16_t *block/*align 16*/);
115329f1 221
24641185
MN
222 /**
223 * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
9846cbdb 224 * @param line_size size in bytes of a horizontal line of dest
24641185 225 */
88bd7fdc 226 void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, int16_t *block/*align 16*/);
115329f1 227
24641185 228 /**
77c92c2d 229 * idct input permutation.
05493021
MN
230 * several optimized IDCTs need a permutated input (relative to the normal order of the reference
231 * IDCT)
232 * this permutation must be performed before the idct_put/add, note, normally this can be merged
233 * with the zigzag/alternate scan<br>
24641185
MN
234 * an example to avoid confusion:
235 * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
da9cea77
DB
236 * - (x -> reference dct -> reference idct -> x)
237 * - (x -> reference dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
24641185
MN
238 * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
239 */
b0368839
MN
240 uint8_t idct_permutation[64];
241 int idct_permutation_type;
242#define FF_NO_IDCT_PERM 1
243#define FF_LIBMPEG2_IDCT_PERM 2
244#define FF_SIMPLE_IDCT_PERM 3
245#define FF_TRANSPOSE_IDCT_PERM 4
5773a746 246#define FF_PARTTRANS_IDCT_PERM 5
0e956ba2 247#define FF_SSE2_IDCT_PERM 6
b0368839 248
364a1797
MN
249 int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
250 void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
251#define BASIS_SHIFT 16
252#define RECON_SHIFT 6
115329f1 253
c90b9442 254 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides);
cbcd6c8c 255#define EDGE_WIDTH 16
1500be13
AS
256#define EDGE_TOP 1
257#define EDGE_BOTTOM 2
5a6a9e78 258
54009d42 259 void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
64db55ae 260
88c0536a
KS
261 /**
262 * Calculate scalar product of two vectors.
bb68f8a2 263 * @param len length of vectors, should be multiple of 16
88c0536a 264 */
7e1ce6a6 265 int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len);
b1159ad9
LM
266 /* ape functions */
267 /**
268 * Calculate scalar product of v1 and v2,
269 * and v1[i] += v3[i] * mul
270 * @param len length of vectors, should be multiple of 16
271 */
b3858964 272 int32_t (*scalarproduct_and_madd_int16)(int16_t *v1/*align 16*/, const int16_t *v2, const int16_t *v3, int len, int mul);
2d8a0815 273
e6e98234
JR
274 /**
275 * Apply symmetric window in 16-bit fixed-point.
276 * @param output destination array
277 * constraints: 16-byte aligned
278 * @param input source array
279 * constraints: 16-byte aligned
280 * @param window window array
281 * constraints: 16-byte aligned, at least len/2 elements
282 * @param len full window length
283 * constraints: multiple of ? greater than zero
284 */
285 void (*apply_window_int16)(int16_t *output, const int16_t *input,
286 const int16_t *window, unsigned int len);
287
6054cd25
JR
288 /**
289 * Clip each element in an array of int32_t to a given minimum and maximum value.
290 * @param dst destination array
291 * constraints: 16-byte aligned
292 * @param src source array
293 * constraints: 16-byte aligned
294 * @param min minimum value
09f21198 295 * constraints: must be in the range [-(1 << 24), 1 << 24]
6054cd25 296 * @param max maximum value
09f21198 297 * constraints: must be in the range [-(1 << 24), 1 << 24]
6054cd25
JR
298 * @param len number of elements in the array
299 * constraints: multiple of 32 greater than zero
300 */
301 void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
302 int32_t max, unsigned int len);
303
342c7dfd 304 op_fill_func fill_block_tab[2];
eb4b3dd3
ZK
305} DSPContext;
306
9cf0841e
MS
307void ff_dsputil_static_init(void);
308void ff_dsputil_init(DSPContext* p, AVCodecContext *avctx);
de6d9b64 309
6dc7d5da
MN
310int ff_check_alignment(void);
311
622348f9
MN
312void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
313
9cf0841e
MS
314void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
315void ff_dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
9cf0841e
MS
316void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
317void ff_dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
318void ff_dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
a64f6a04 319void ff_dsputil_init_x86(DSPContext* c, AVCodecContext *avctx);
a1bee080 320
98790382 321#endif /* AVCODEC_DSPUTIL_H */