Commit | Line | Data |
---|---|---|
e7a972e1 MR |
1 | /* |
2 | * Simple IDCT | |
3 | * | |
4 | * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5 | * | |
6 | * This file is part of Libav. | |
7 | * | |
8 | * Libav is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * Libav is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with Libav; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | /** | |
24 | * @file | |
25 | * simpleidct in C. | |
26 | */ | |
27 | ||
28 | /* | |
29 | based upon some outcommented c code from mpeg2dec (idct_mmx.c | |
30 | written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) | |
31 | */ | |
32 | ||
33 | #include "bit_depth_template.c" | |
34 | ||
35 | #undef W1 | |
36 | #undef W2 | |
37 | #undef W3 | |
38 | #undef W4 | |
39 | #undef W5 | |
40 | #undef W6 | |
41 | #undef W7 | |
42 | #undef ROW_SHIFT | |
43 | #undef COL_SHIFT | |
44 | #undef DC_SHIFT | |
45 | #undef MUL | |
46 | #undef MAC | |
47 | ||
48 | #if BIT_DEPTH == 8 | |
49 | ||
50 | #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
51 | #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
52 | #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
53 | #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
54 | #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
55 | #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
56 | #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
57 | ||
58 | #define ROW_SHIFT 11 | |
59 | #define COL_SHIFT 20 | |
60 | #define DC_SHIFT 3 | |
61 | ||
62 | #define MUL(a, b) MUL16(a, b) | |
63 | #define MAC(a, b, c) MAC16(a, b, c) | |
64 | ||
65 | #elif BIT_DEPTH == 10 | |
66 | ||
67 | #define W1 90901 | |
68 | #define W2 85627 | |
69 | #define W3 77062 | |
70 | #define W4 65535 | |
71 | #define W5 51491 | |
72 | #define W6 35468 | |
73 | #define W7 18081 | |
74 | ||
75 | #define ROW_SHIFT 15 | |
76 | #define COL_SHIFT 20 | |
77 | #define DC_SHIFT 1 | |
78 | ||
79 | #define MUL(a, b) ((a) * (b)) | |
80 | #define MAC(a, b, c) ((a) += (b) * (c)) | |
81 | ||
82 | #else | |
83 | ||
84 | #error "Unsupported bitdepth" | |
85 | ||
86 | #endif | |
87 | ||
88 | static inline void FUNC(idctRowCondDC)(DCTELEM *row) | |
89 | { | |
90 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
91 | ||
92 | #if HAVE_FAST_64BIT | |
93 | #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) | |
94 | if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) { | |
95 | uint64_t temp = (row[0] << DC_SHIFT) & 0xffff; | |
96 | temp += temp << 16; | |
97 | temp += temp << 32; | |
98 | ((uint64_t *)row)[0] = temp; | |
99 | ((uint64_t *)row)[1] = temp; | |
100 | return; | |
101 | } | |
102 | #else | |
103 | if (!(((uint32_t*)row)[1] | | |
104 | ((uint32_t*)row)[2] | | |
105 | ((uint32_t*)row)[3] | | |
106 | row[1])) { | |
107 | uint32_t temp = (row[0] << DC_SHIFT) & 0xffff; | |
108 | temp += temp << 16; | |
109 | ((uint32_t*)row)[0]=((uint32_t*)row)[1] = | |
110 | ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; | |
111 | return; | |
112 | } | |
113 | #endif | |
114 | ||
115 | a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); | |
116 | a1 = a0; | |
117 | a2 = a0; | |
118 | a3 = a0; | |
119 | ||
120 | /* no need to optimize : gcc does it */ | |
121 | a0 += W2 * row[2]; | |
122 | a1 += W6 * row[2]; | |
123 | a2 -= W6 * row[2]; | |
124 | a3 -= W2 * row[2]; | |
125 | ||
126 | b0 = MUL(W1, row[1]); | |
127 | MAC(b0, W3, row[3]); | |
128 | b1 = MUL(W3, row[1]); | |
129 | MAC(b1, -W7, row[3]); | |
130 | b2 = MUL(W5, row[1]); | |
131 | MAC(b2, -W1, row[3]); | |
132 | b3 = MUL(W7, row[1]); | |
133 | MAC(b3, -W5, row[3]); | |
134 | ||
135 | if (AV_RN64A(row + 4)) { | |
136 | a0 += W4*row[4] + W6*row[6]; | |
137 | a1 += - W4*row[4] - W2*row[6]; | |
138 | a2 += - W4*row[4] + W2*row[6]; | |
139 | a3 += W4*row[4] - W6*row[6]; | |
140 | ||
141 | MAC(b0, W5, row[5]); | |
142 | MAC(b0, W7, row[7]); | |
143 | ||
144 | MAC(b1, -W1, row[5]); | |
145 | MAC(b1, -W5, row[7]); | |
146 | ||
147 | MAC(b2, W7, row[5]); | |
148 | MAC(b2, W3, row[7]); | |
149 | ||
150 | MAC(b3, W3, row[5]); | |
151 | MAC(b3, -W1, row[7]); | |
152 | } | |
153 | ||
154 | row[0] = (a0 + b0) >> ROW_SHIFT; | |
155 | row[7] = (a0 - b0) >> ROW_SHIFT; | |
156 | row[1] = (a1 + b1) >> ROW_SHIFT; | |
157 | row[6] = (a1 - b1) >> ROW_SHIFT; | |
158 | row[2] = (a2 + b2) >> ROW_SHIFT; | |
159 | row[5] = (a2 - b2) >> ROW_SHIFT; | |
160 | row[3] = (a3 + b3) >> ROW_SHIFT; | |
161 | row[4] = (a3 - b3) >> ROW_SHIFT; | |
162 | } | |
163 | ||
a402f109 MR |
164 | #define IDCT_COLS do { \ |
165 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \ | |
166 | a1 = a0; \ | |
167 | a2 = a0; \ | |
168 | a3 = a0; \ | |
169 | \ | |
170 | a0 += W2*col[8*2]; \ | |
171 | a1 += W6*col[8*2]; \ | |
172 | a2 += -W6*col[8*2]; \ | |
173 | a3 += -W2*col[8*2]; \ | |
174 | \ | |
175 | b0 = MUL(W1, col[8*1]); \ | |
176 | b1 = MUL(W3, col[8*1]); \ | |
177 | b2 = MUL(W5, col[8*1]); \ | |
178 | b3 = MUL(W7, col[8*1]); \ | |
179 | \ | |
180 | MAC(b0, W3, col[8*3]); \ | |
181 | MAC(b1, -W7, col[8*3]); \ | |
182 | MAC(b2, -W1, col[8*3]); \ | |
183 | MAC(b3, -W5, col[8*3]); \ | |
184 | \ | |
185 | if (col[8*4]) { \ | |
186 | a0 += W4*col[8*4]; \ | |
187 | a1 += -W4*col[8*4]; \ | |
188 | a2 += -W4*col[8*4]; \ | |
189 | a3 += W4*col[8*4]; \ | |
190 | } \ | |
191 | \ | |
192 | if (col[8*5]) { \ | |
193 | MAC(b0, W5, col[8*5]); \ | |
194 | MAC(b1, -W1, col[8*5]); \ | |
195 | MAC(b2, W7, col[8*5]); \ | |
196 | MAC(b3, W3, col[8*5]); \ | |
197 | } \ | |
198 | \ | |
199 | if (col[8*6]) { \ | |
200 | a0 += W6*col[8*6]; \ | |
201 | a1 += -W2*col[8*6]; \ | |
202 | a2 += W2*col[8*6]; \ | |
203 | a3 += -W6*col[8*6]; \ | |
204 | } \ | |
205 | \ | |
206 | if (col[8*7]) { \ | |
207 | MAC(b0, W7, col[8*7]); \ | |
208 | MAC(b1, -W5, col[8*7]); \ | |
209 | MAC(b2, W3, col[8*7]); \ | |
210 | MAC(b3, -W1, col[8*7]); \ | |
211 | } \ | |
212 | } while (0) | |
213 | ||
e7a972e1 MR |
214 | static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, |
215 | DCTELEM *col) | |
216 | { | |
217 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
218 | INIT_CLIP; | |
219 | ||
a402f109 | 220 | IDCT_COLS; |
e7a972e1 MR |
221 | |
222 | dest[0] = CLIP((a0 + b0) >> COL_SHIFT); | |
223 | dest += line_size; | |
224 | dest[0] = CLIP((a1 + b1) >> COL_SHIFT); | |
225 | dest += line_size; | |
226 | dest[0] = CLIP((a2 + b2) >> COL_SHIFT); | |
227 | dest += line_size; | |
228 | dest[0] = CLIP((a3 + b3) >> COL_SHIFT); | |
229 | dest += line_size; | |
230 | dest[0] = CLIP((a3 - b3) >> COL_SHIFT); | |
231 | dest += line_size; | |
232 | dest[0] = CLIP((a2 - b2) >> COL_SHIFT); | |
233 | dest += line_size; | |
234 | dest[0] = CLIP((a1 - b1) >> COL_SHIFT); | |
235 | dest += line_size; | |
236 | dest[0] = CLIP((a0 - b0) >> COL_SHIFT); | |
237 | } | |
238 | ||
239 | static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, | |
240 | DCTELEM *col) | |
241 | { | |
242 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
243 | INIT_CLIP; | |
244 | ||
a402f109 | 245 | IDCT_COLS; |
e7a972e1 MR |
246 | |
247 | dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT)); | |
248 | dest += line_size; | |
249 | dest[0] = CLIP(dest[0] + ((a1 + b1) >> COL_SHIFT)); | |
250 | dest += line_size; | |
251 | dest[0] = CLIP(dest[0] + ((a2 + b2) >> COL_SHIFT)); | |
252 | dest += line_size; | |
253 | dest[0] = CLIP(dest[0] + ((a3 + b3) >> COL_SHIFT)); | |
254 | dest += line_size; | |
255 | dest[0] = CLIP(dest[0] + ((a3 - b3) >> COL_SHIFT)); | |
256 | dest += line_size; | |
257 | dest[0] = CLIP(dest[0] + ((a2 - b2) >> COL_SHIFT)); | |
258 | dest += line_size; | |
259 | dest[0] = CLIP(dest[0] + ((a1 - b1) >> COL_SHIFT)); | |
260 | dest += line_size; | |
261 | dest[0] = CLIP(dest[0] + ((a0 - b0) >> COL_SHIFT)); | |
262 | } | |
263 | ||
264 | static inline void FUNC(idctSparseCol)(DCTELEM *col) | |
265 | { | |
266 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
267 | ||
a402f109 | 268 | IDCT_COLS; |
e7a972e1 MR |
269 | |
270 | col[0 ] = ((a0 + b0) >> COL_SHIFT); | |
271 | col[8 ] = ((a1 + b1) >> COL_SHIFT); | |
272 | col[16] = ((a2 + b2) >> COL_SHIFT); | |
273 | col[24] = ((a3 + b3) >> COL_SHIFT); | |
274 | col[32] = ((a3 - b3) >> COL_SHIFT); | |
275 | col[40] = ((a2 - b2) >> COL_SHIFT); | |
276 | col[48] = ((a1 - b1) >> COL_SHIFT); | |
277 | col[56] = ((a0 - b0) >> COL_SHIFT); | |
278 | } | |
279 | ||
280 | void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, DCTELEM *block) | |
281 | { | |
282 | pixel *dest = (pixel *)dest_; | |
283 | int i; | |
e7bcc5ba MR |
284 | |
285 | line_size /= sizeof(pixel); | |
286 | ||
e7a972e1 MR |
287 | for(i=0; i<8; i++) |
288 | FUNC(idctRowCondDC)(block + i*8); | |
289 | ||
290 | for(i=0; i<8; i++) | |
291 | FUNC(idctSparseColPut)(dest + i, line_size, block + i); | |
292 | } | |
293 | ||
294 | void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, DCTELEM *block) | |
295 | { | |
296 | pixel *dest = (pixel *)dest_; | |
297 | int i; | |
e7bcc5ba MR |
298 | |
299 | line_size /= sizeof(pixel); | |
300 | ||
e7a972e1 MR |
301 | for(i=0; i<8; i++) |
302 | FUNC(idctRowCondDC)(block + i*8); | |
303 | ||
304 | for(i=0; i<8; i++) | |
305 | FUNC(idctSparseColAdd)(dest + i, line_size, block + i); | |
306 | } | |
307 | ||
308 | void FUNC(ff_simple_idct)(DCTELEM *block) | |
309 | { | |
310 | int i; | |
311 | for(i=0; i<8; i++) | |
312 | FUNC(idctRowCondDC)(block + i*8); | |
313 | ||
314 | for(i=0; i<8; i++) | |
315 | FUNC(idctSparseCol)(block + i); | |
316 | } |