Commit | Line | Data |
---|---|---|
e7a972e1 MR |
1 | /* |
2 | * Simple IDCT | |
3 | * | |
4 | * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5 | * | |
6 | * This file is part of Libav. | |
7 | * | |
8 | * Libav is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * Libav is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with Libav; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | /** | |
24 | * @file | |
25 | * simpleidct in C. | |
26 | */ | |
27 | ||
28 | /* | |
29 | based upon some outcommented c code from mpeg2dec (idct_mmx.c | |
30 | written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) | |
31 | */ | |
32 | ||
33 | #include "bit_depth_template.c" | |
34 | ||
35 | #undef W1 | |
36 | #undef W2 | |
37 | #undef W3 | |
38 | #undef W4 | |
39 | #undef W5 | |
40 | #undef W6 | |
41 | #undef W7 | |
42 | #undef ROW_SHIFT | |
43 | #undef COL_SHIFT | |
44 | #undef DC_SHIFT | |
45 | #undef MUL | |
46 | #undef MAC | |
47 | ||
48 | #if BIT_DEPTH == 8 | |
49 | ||
50 | #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
51 | #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
52 | #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
53 | #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
54 | #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
55 | #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
56 | #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
57 | ||
58 | #define ROW_SHIFT 11 | |
59 | #define COL_SHIFT 20 | |
60 | #define DC_SHIFT 3 | |
61 | ||
62 | #define MUL(a, b) MUL16(a, b) | |
63 | #define MAC(a, b, c) MAC16(a, b, c) | |
64 | ||
65 | #elif BIT_DEPTH == 10 | |
66 | ||
67 | #define W1 90901 | |
68 | #define W2 85627 | |
69 | #define W3 77062 | |
70 | #define W4 65535 | |
71 | #define W5 51491 | |
72 | #define W6 35468 | |
73 | #define W7 18081 | |
74 | ||
75 | #define ROW_SHIFT 15 | |
76 | #define COL_SHIFT 20 | |
77 | #define DC_SHIFT 1 | |
78 | ||
79 | #define MUL(a, b) ((a) * (b)) | |
80 | #define MAC(a, b, c) ((a) += (b) * (c)) | |
81 | ||
82 | #else | |
83 | ||
84 | #error "Unsupported bitdepth" | |
85 | ||
86 | #endif | |
87 | ||
88 | static inline void FUNC(idctRowCondDC)(DCTELEM *row) | |
89 | { | |
90 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
91 | ||
92 | #if HAVE_FAST_64BIT | |
93 | #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) | |
94 | if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) { | |
95 | uint64_t temp = (row[0] << DC_SHIFT) & 0xffff; | |
96 | temp += temp << 16; | |
97 | temp += temp << 32; | |
98 | ((uint64_t *)row)[0] = temp; | |
99 | ((uint64_t *)row)[1] = temp; | |
100 | return; | |
101 | } | |
102 | #else | |
103 | if (!(((uint32_t*)row)[1] | | |
104 | ((uint32_t*)row)[2] | | |
105 | ((uint32_t*)row)[3] | | |
106 | row[1])) { | |
107 | uint32_t temp = (row[0] << DC_SHIFT) & 0xffff; | |
108 | temp += temp << 16; | |
109 | ((uint32_t*)row)[0]=((uint32_t*)row)[1] = | |
110 | ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; | |
111 | return; | |
112 | } | |
113 | #endif | |
114 | ||
115 | a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); | |
116 | a1 = a0; | |
117 | a2 = a0; | |
118 | a3 = a0; | |
119 | ||
120 | /* no need to optimize : gcc does it */ | |
121 | a0 += W2 * row[2]; | |
122 | a1 += W6 * row[2]; | |
123 | a2 -= W6 * row[2]; | |
124 | a3 -= W2 * row[2]; | |
125 | ||
126 | b0 = MUL(W1, row[1]); | |
127 | MAC(b0, W3, row[3]); | |
128 | b1 = MUL(W3, row[1]); | |
129 | MAC(b1, -W7, row[3]); | |
130 | b2 = MUL(W5, row[1]); | |
131 | MAC(b2, -W1, row[3]); | |
132 | b3 = MUL(W7, row[1]); | |
133 | MAC(b3, -W5, row[3]); | |
134 | ||
135 | if (AV_RN64A(row + 4)) { | |
136 | a0 += W4*row[4] + W6*row[6]; | |
137 | a1 += - W4*row[4] - W2*row[6]; | |
138 | a2 += - W4*row[4] + W2*row[6]; | |
139 | a3 += W4*row[4] - W6*row[6]; | |
140 | ||
141 | MAC(b0, W5, row[5]); | |
142 | MAC(b0, W7, row[7]); | |
143 | ||
144 | MAC(b1, -W1, row[5]); | |
145 | MAC(b1, -W5, row[7]); | |
146 | ||
147 | MAC(b2, W7, row[5]); | |
148 | MAC(b2, W3, row[7]); | |
149 | ||
150 | MAC(b3, W3, row[5]); | |
151 | MAC(b3, -W1, row[7]); | |
152 | } | |
153 | ||
154 | row[0] = (a0 + b0) >> ROW_SHIFT; | |
155 | row[7] = (a0 - b0) >> ROW_SHIFT; | |
156 | row[1] = (a1 + b1) >> ROW_SHIFT; | |
157 | row[6] = (a1 - b1) >> ROW_SHIFT; | |
158 | row[2] = (a2 + b2) >> ROW_SHIFT; | |
159 | row[5] = (a2 - b2) >> ROW_SHIFT; | |
160 | row[3] = (a3 + b3) >> ROW_SHIFT; | |
161 | row[4] = (a3 - b3) >> ROW_SHIFT; | |
162 | } | |
163 | ||
164 | static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, | |
165 | DCTELEM *col) | |
166 | { | |
167 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
168 | INIT_CLIP; | |
169 | ||
170 | /* XXX: I did that only to give same values as previous code */ | |
171 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | |
172 | a1 = a0; | |
173 | a2 = a0; | |
174 | a3 = a0; | |
175 | ||
176 | a0 += + W2*col[8*2]; | |
177 | a1 += + W6*col[8*2]; | |
178 | a2 += - W6*col[8*2]; | |
179 | a3 += - W2*col[8*2]; | |
180 | ||
181 | b0 = MUL(W1, col[8*1]); | |
182 | b1 = MUL(W3, col[8*1]); | |
183 | b2 = MUL(W5, col[8*1]); | |
184 | b3 = MUL(W7, col[8*1]); | |
185 | ||
186 | MAC(b0, + W3, col[8*3]); | |
187 | MAC(b1, - W7, col[8*3]); | |
188 | MAC(b2, - W1, col[8*3]); | |
189 | MAC(b3, - W5, col[8*3]); | |
190 | ||
191 | if(col[8*4]){ | |
192 | a0 += + W4*col[8*4]; | |
193 | a1 += - W4*col[8*4]; | |
194 | a2 += - W4*col[8*4]; | |
195 | a3 += + W4*col[8*4]; | |
196 | } | |
197 | ||
198 | if (col[8*5]) { | |
199 | MAC(b0, + W5, col[8*5]); | |
200 | MAC(b1, - W1, col[8*5]); | |
201 | MAC(b2, + W7, col[8*5]); | |
202 | MAC(b3, + W3, col[8*5]); | |
203 | } | |
204 | ||
205 | if(col[8*6]){ | |
206 | a0 += + W6*col[8*6]; | |
207 | a1 += - W2*col[8*6]; | |
208 | a2 += + W2*col[8*6]; | |
209 | a3 += - W6*col[8*6]; | |
210 | } | |
211 | ||
212 | if (col[8*7]) { | |
213 | MAC(b0, + W7, col[8*7]); | |
214 | MAC(b1, - W5, col[8*7]); | |
215 | MAC(b2, + W3, col[8*7]); | |
216 | MAC(b3, - W1, col[8*7]); | |
217 | } | |
218 | ||
219 | dest[0] = CLIP((a0 + b0) >> COL_SHIFT); | |
220 | dest += line_size; | |
221 | dest[0] = CLIP((a1 + b1) >> COL_SHIFT); | |
222 | dest += line_size; | |
223 | dest[0] = CLIP((a2 + b2) >> COL_SHIFT); | |
224 | dest += line_size; | |
225 | dest[0] = CLIP((a3 + b3) >> COL_SHIFT); | |
226 | dest += line_size; | |
227 | dest[0] = CLIP((a3 - b3) >> COL_SHIFT); | |
228 | dest += line_size; | |
229 | dest[0] = CLIP((a2 - b2) >> COL_SHIFT); | |
230 | dest += line_size; | |
231 | dest[0] = CLIP((a1 - b1) >> COL_SHIFT); | |
232 | dest += line_size; | |
233 | dest[0] = CLIP((a0 - b0) >> COL_SHIFT); | |
234 | } | |
235 | ||
236 | static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, | |
237 | DCTELEM *col) | |
238 | { | |
239 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
240 | INIT_CLIP; | |
241 | ||
242 | /* XXX: I did that only to give same values as previous code */ | |
243 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | |
244 | a1 = a0; | |
245 | a2 = a0; | |
246 | a3 = a0; | |
247 | ||
248 | a0 += + W2*col[8*2]; | |
249 | a1 += + W6*col[8*2]; | |
250 | a2 += - W6*col[8*2]; | |
251 | a3 += - W2*col[8*2]; | |
252 | ||
253 | b0 = MUL(W1, col[8*1]); | |
254 | b1 = MUL(W3, col[8*1]); | |
255 | b2 = MUL(W5, col[8*1]); | |
256 | b3 = MUL(W7, col[8*1]); | |
257 | ||
258 | MAC(b0, + W3, col[8*3]); | |
259 | MAC(b1, - W7, col[8*3]); | |
260 | MAC(b2, - W1, col[8*3]); | |
261 | MAC(b3, - W5, col[8*3]); | |
262 | ||
263 | if(col[8*4]){ | |
264 | a0 += + W4*col[8*4]; | |
265 | a1 += - W4*col[8*4]; | |
266 | a2 += - W4*col[8*4]; | |
267 | a3 += + W4*col[8*4]; | |
268 | } | |
269 | ||
270 | if (col[8*5]) { | |
271 | MAC(b0, + W5, col[8*5]); | |
272 | MAC(b1, - W1, col[8*5]); | |
273 | MAC(b2, + W7, col[8*5]); | |
274 | MAC(b3, + W3, col[8*5]); | |
275 | } | |
276 | ||
277 | if(col[8*6]){ | |
278 | a0 += + W6*col[8*6]; | |
279 | a1 += - W2*col[8*6]; | |
280 | a2 += + W2*col[8*6]; | |
281 | a3 += - W6*col[8*6]; | |
282 | } | |
283 | ||
284 | if (col[8*7]) { | |
285 | MAC(b0, + W7, col[8*7]); | |
286 | MAC(b1, - W5, col[8*7]); | |
287 | MAC(b2, + W3, col[8*7]); | |
288 | MAC(b3, - W1, col[8*7]); | |
289 | } | |
290 | ||
291 | dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT)); | |
292 | dest += line_size; | |
293 | dest[0] = CLIP(dest[0] + ((a1 + b1) >> COL_SHIFT)); | |
294 | dest += line_size; | |
295 | dest[0] = CLIP(dest[0] + ((a2 + b2) >> COL_SHIFT)); | |
296 | dest += line_size; | |
297 | dest[0] = CLIP(dest[0] + ((a3 + b3) >> COL_SHIFT)); | |
298 | dest += line_size; | |
299 | dest[0] = CLIP(dest[0] + ((a3 - b3) >> COL_SHIFT)); | |
300 | dest += line_size; | |
301 | dest[0] = CLIP(dest[0] + ((a2 - b2) >> COL_SHIFT)); | |
302 | dest += line_size; | |
303 | dest[0] = CLIP(dest[0] + ((a1 - b1) >> COL_SHIFT)); | |
304 | dest += line_size; | |
305 | dest[0] = CLIP(dest[0] + ((a0 - b0) >> COL_SHIFT)); | |
306 | } | |
307 | ||
308 | static inline void FUNC(idctSparseCol)(DCTELEM *col) | |
309 | { | |
310 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
311 | ||
312 | /* XXX: I did that only to give same values as previous code */ | |
313 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | |
314 | a1 = a0; | |
315 | a2 = a0; | |
316 | a3 = a0; | |
317 | ||
318 | a0 += + W2*col[8*2]; | |
319 | a1 += + W6*col[8*2]; | |
320 | a2 += - W6*col[8*2]; | |
321 | a3 += - W2*col[8*2]; | |
322 | ||
323 | b0 = MUL(W1, col[8*1]); | |
324 | b1 = MUL(W3, col[8*1]); | |
325 | b2 = MUL(W5, col[8*1]); | |
326 | b3 = MUL(W7, col[8*1]); | |
327 | ||
328 | MAC(b0, + W3, col[8*3]); | |
329 | MAC(b1, - W7, col[8*3]); | |
330 | MAC(b2, - W1, col[8*3]); | |
331 | MAC(b3, - W5, col[8*3]); | |
332 | ||
333 | if(col[8*4]){ | |
334 | a0 += + W4*col[8*4]; | |
335 | a1 += - W4*col[8*4]; | |
336 | a2 += - W4*col[8*4]; | |
337 | a3 += + W4*col[8*4]; | |
338 | } | |
339 | ||
340 | if (col[8*5]) { | |
341 | MAC(b0, + W5, col[8*5]); | |
342 | MAC(b1, - W1, col[8*5]); | |
343 | MAC(b2, + W7, col[8*5]); | |
344 | MAC(b3, + W3, col[8*5]); | |
345 | } | |
346 | ||
347 | if(col[8*6]){ | |
348 | a0 += + W6*col[8*6]; | |
349 | a1 += - W2*col[8*6]; | |
350 | a2 += + W2*col[8*6]; | |
351 | a3 += - W6*col[8*6]; | |
352 | } | |
353 | ||
354 | if (col[8*7]) { | |
355 | MAC(b0, + W7, col[8*7]); | |
356 | MAC(b1, - W5, col[8*7]); | |
357 | MAC(b2, + W3, col[8*7]); | |
358 | MAC(b3, - W1, col[8*7]); | |
359 | } | |
360 | ||
361 | col[0 ] = ((a0 + b0) >> COL_SHIFT); | |
362 | col[8 ] = ((a1 + b1) >> COL_SHIFT); | |
363 | col[16] = ((a2 + b2) >> COL_SHIFT); | |
364 | col[24] = ((a3 + b3) >> COL_SHIFT); | |
365 | col[32] = ((a3 - b3) >> COL_SHIFT); | |
366 | col[40] = ((a2 - b2) >> COL_SHIFT); | |
367 | col[48] = ((a1 - b1) >> COL_SHIFT); | |
368 | col[56] = ((a0 - b0) >> COL_SHIFT); | |
369 | } | |
370 | ||
371 | void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, DCTELEM *block) | |
372 | { | |
373 | pixel *dest = (pixel *)dest_; | |
374 | int i; | |
e7bcc5ba MR |
375 | |
376 | line_size /= sizeof(pixel); | |
377 | ||
e7a972e1 MR |
378 | for(i=0; i<8; i++) |
379 | FUNC(idctRowCondDC)(block + i*8); | |
380 | ||
381 | for(i=0; i<8; i++) | |
382 | FUNC(idctSparseColPut)(dest + i, line_size, block + i); | |
383 | } | |
384 | ||
385 | void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, DCTELEM *block) | |
386 | { | |
387 | pixel *dest = (pixel *)dest_; | |
388 | int i; | |
e7bcc5ba MR |
389 | |
390 | line_size /= sizeof(pixel); | |
391 | ||
e7a972e1 MR |
392 | for(i=0; i<8; i++) |
393 | FUNC(idctRowCondDC)(block + i*8); | |
394 | ||
395 | for(i=0; i<8; i++) | |
396 | FUNC(idctSparseColAdd)(dest + i, line_size, block + i); | |
397 | } | |
398 | ||
399 | void FUNC(ff_simple_idct)(DCTELEM *block) | |
400 | { | |
401 | int i; | |
402 | for(i=0; i<8; i++) | |
403 | FUNC(idctRowCondDC)(block + i*8); | |
404 | ||
405 | for(i=0; i<8; i++) | |
406 | FUNC(idctSparseCol)(block + i); | |
407 | } |