Commit | Line | Data |
---|---|---|
e7a972e1 MR |
1 | /* |
2 | * Simple IDCT | |
3 | * | |
4 | * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5 | * | |
6 | * This file is part of Libav. | |
7 | * | |
8 | * Libav is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * Libav is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with Libav; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | /** | |
24 | * @file | |
25 | * simpleidct in C. | |
26 | */ | |
27 | ||
28 | /* | |
29 | based upon some outcommented c code from mpeg2dec (idct_mmx.c | |
30 | written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) | |
31 | */ | |
32 | ||
33 | #include "bit_depth_template.c" | |
34 | ||
35 | #undef W1 | |
36 | #undef W2 | |
37 | #undef W3 | |
38 | #undef W4 | |
39 | #undef W5 | |
40 | #undef W6 | |
41 | #undef W7 | |
42 | #undef ROW_SHIFT | |
43 | #undef COL_SHIFT | |
44 | #undef DC_SHIFT | |
45 | #undef MUL | |
46 | #undef MAC | |
47 | ||
48 | #if BIT_DEPTH == 8 | |
49 | ||
50 | #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
51 | #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
52 | #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
53 | #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
54 | #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
55 | #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
56 | #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
57 | ||
58 | #define ROW_SHIFT 11 | |
59 | #define COL_SHIFT 20 | |
60 | #define DC_SHIFT 3 | |
61 | ||
62 | #define MUL(a, b) MUL16(a, b) | |
63 | #define MAC(a, b, c) MAC16(a, b, c) | |
64 | ||
65 | #elif BIT_DEPTH == 10 | |
66 | ||
67 | #define W1 90901 | |
68 | #define W2 85627 | |
69 | #define W3 77062 | |
70 | #define W4 65535 | |
71 | #define W5 51491 | |
72 | #define W6 35468 | |
73 | #define W7 18081 | |
74 | ||
75 | #define ROW_SHIFT 15 | |
76 | #define COL_SHIFT 20 | |
77 | #define DC_SHIFT 1 | |
78 | ||
79 | #define MUL(a, b) ((a) * (b)) | |
80 | #define MAC(a, b, c) ((a) += (b) * (c)) | |
81 | ||
82 | #else | |
83 | ||
84 | #error "Unsupported bitdepth" | |
85 | ||
86 | #endif | |
87 | ||
f78cd0c2 | 88 | static inline void FUNC(idctRowCondDC)(DCTELEM *row, int extra_shift) |
e7a972e1 | 89 | { |
2cc4f3b2 | 90 | int a0, a1, a2, a3, b0, b1, b2, b3; |
e7a972e1 MR |
91 | |
92 | #if HAVE_FAST_64BIT | |
93 | #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) | |
2cc4f3b2 | 94 | if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) { |
f78cd0c2 RB |
95 | uint64_t temp; |
96 | if (DC_SHIFT - extra_shift > 0) { | |
97 | temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff; | |
98 | } else { | |
99 | temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff; | |
100 | } | |
2cc4f3b2 MR |
101 | temp += temp << 16; |
102 | temp += temp << 32; | |
103 | ((uint64_t *)row)[0] = temp; | |
104 | ((uint64_t *)row)[1] = temp; | |
105 | return; | |
106 | } | |
e7a972e1 | 107 | #else |
2cc4f3b2 MR |
108 | if (!(((uint32_t*)row)[1] | |
109 | ((uint32_t*)row)[2] | | |
110 | ((uint32_t*)row)[3] | | |
111 | row[1])) { | |
f78cd0c2 RB |
112 | uint32_t temp; |
113 | if (DC_SHIFT - extra_shift > 0) { | |
114 | temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff; | |
115 | } else { | |
116 | temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff; | |
117 | } | |
2cc4f3b2 MR |
118 | temp += temp << 16; |
119 | ((uint32_t*)row)[0]=((uint32_t*)row)[1] = | |
120 | ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; | |
121 | return; | |
122 | } | |
e7a972e1 MR |
123 | #endif |
124 | ||
2cc4f3b2 MR |
125 | a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
126 | a1 = a0; | |
127 | a2 = a0; | |
128 | a3 = a0; | |
129 | ||
130 | a0 += W2 * row[2]; | |
131 | a1 += W6 * row[2]; | |
132 | a2 -= W6 * row[2]; | |
133 | a3 -= W2 * row[2]; | |
134 | ||
135 | b0 = MUL(W1, row[1]); | |
136 | MAC(b0, W3, row[3]); | |
137 | b1 = MUL(W3, row[1]); | |
138 | MAC(b1, -W7, row[3]); | |
139 | b2 = MUL(W5, row[1]); | |
140 | MAC(b2, -W1, row[3]); | |
141 | b3 = MUL(W7, row[1]); | |
142 | MAC(b3, -W5, row[3]); | |
143 | ||
144 | if (AV_RN64A(row + 4)) { | |
145 | a0 += W4*row[4] + W6*row[6]; | |
146 | a1 += - W4*row[4] - W2*row[6]; | |
147 | a2 += - W4*row[4] + W2*row[6]; | |
148 | a3 += W4*row[4] - W6*row[6]; | |
149 | ||
150 | MAC(b0, W5, row[5]); | |
151 | MAC(b0, W7, row[7]); | |
152 | ||
153 | MAC(b1, -W1, row[5]); | |
154 | MAC(b1, -W5, row[7]); | |
155 | ||
156 | MAC(b2, W7, row[5]); | |
157 | MAC(b2, W3, row[7]); | |
158 | ||
159 | MAC(b3, W3, row[5]); | |
160 | MAC(b3, -W1, row[7]); | |
161 | } | |
162 | ||
f78cd0c2 RB |
163 | row[0] = (a0 + b0) >> (ROW_SHIFT + extra_shift); |
164 | row[7] = (a0 - b0) >> (ROW_SHIFT + extra_shift); | |
165 | row[1] = (a1 + b1) >> (ROW_SHIFT + extra_shift); | |
166 | row[6] = (a1 - b1) >> (ROW_SHIFT + extra_shift); | |
167 | row[2] = (a2 + b2) >> (ROW_SHIFT + extra_shift); | |
168 | row[5] = (a2 - b2) >> (ROW_SHIFT + extra_shift); | |
169 | row[3] = (a3 + b3) >> (ROW_SHIFT + extra_shift); | |
170 | row[4] = (a3 - b3) >> (ROW_SHIFT + extra_shift); | |
e7a972e1 MR |
171 | } |
172 | ||
a402f109 MR |
173 | #define IDCT_COLS do { \ |
174 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \ | |
175 | a1 = a0; \ | |
176 | a2 = a0; \ | |
177 | a3 = a0; \ | |
178 | \ | |
179 | a0 += W2*col[8*2]; \ | |
180 | a1 += W6*col[8*2]; \ | |
181 | a2 += -W6*col[8*2]; \ | |
182 | a3 += -W2*col[8*2]; \ | |
183 | \ | |
184 | b0 = MUL(W1, col[8*1]); \ | |
185 | b1 = MUL(W3, col[8*1]); \ | |
186 | b2 = MUL(W5, col[8*1]); \ | |
187 | b3 = MUL(W7, col[8*1]); \ | |
188 | \ | |
189 | MAC(b0, W3, col[8*3]); \ | |
190 | MAC(b1, -W7, col[8*3]); \ | |
191 | MAC(b2, -W1, col[8*3]); \ | |
192 | MAC(b3, -W5, col[8*3]); \ | |
193 | \ | |
194 | if (col[8*4]) { \ | |
195 | a0 += W4*col[8*4]; \ | |
196 | a1 += -W4*col[8*4]; \ | |
197 | a2 += -W4*col[8*4]; \ | |
198 | a3 += W4*col[8*4]; \ | |
199 | } \ | |
200 | \ | |
201 | if (col[8*5]) { \ | |
202 | MAC(b0, W5, col[8*5]); \ | |
203 | MAC(b1, -W1, col[8*5]); \ | |
204 | MAC(b2, W7, col[8*5]); \ | |
205 | MAC(b3, W3, col[8*5]); \ | |
206 | } \ | |
207 | \ | |
208 | if (col[8*6]) { \ | |
209 | a0 += W6*col[8*6]; \ | |
210 | a1 += -W2*col[8*6]; \ | |
211 | a2 += W2*col[8*6]; \ | |
212 | a3 += -W6*col[8*6]; \ | |
213 | } \ | |
214 | \ | |
215 | if (col[8*7]) { \ | |
216 | MAC(b0, W7, col[8*7]); \ | |
217 | MAC(b1, -W5, col[8*7]); \ | |
218 | MAC(b2, W3, col[8*7]); \ | |
219 | MAC(b3, -W1, col[8*7]); \ | |
220 | } \ | |
221 | } while (0) | |
222 | ||
e7a972e1 MR |
223 | static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, |
224 | DCTELEM *col) | |
225 | { | |
2cc4f3b2 MR |
226 | int a0, a1, a2, a3, b0, b1, b2, b3; |
227 | INIT_CLIP; | |
228 | ||
229 | IDCT_COLS; | |
230 | ||
231 | dest[0] = CLIP((a0 + b0) >> COL_SHIFT); | |
232 | dest += line_size; | |
233 | dest[0] = CLIP((a1 + b1) >> COL_SHIFT); | |
234 | dest += line_size; | |
235 | dest[0] = CLIP((a2 + b2) >> COL_SHIFT); | |
236 | dest += line_size; | |
237 | dest[0] = CLIP((a3 + b3) >> COL_SHIFT); | |
238 | dest += line_size; | |
239 | dest[0] = CLIP((a3 - b3) >> COL_SHIFT); | |
240 | dest += line_size; | |
241 | dest[0] = CLIP((a2 - b2) >> COL_SHIFT); | |
242 | dest += line_size; | |
243 | dest[0] = CLIP((a1 - b1) >> COL_SHIFT); | |
244 | dest += line_size; | |
245 | dest[0] = CLIP((a0 - b0) >> COL_SHIFT); | |
e7a972e1 MR |
246 | } |
247 | ||
248 | static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, | |
249 | DCTELEM *col) | |
250 | { | |
2cc4f3b2 MR |
251 | int a0, a1, a2, a3, b0, b1, b2, b3; |
252 | INIT_CLIP; | |
253 | ||
254 | IDCT_COLS; | |
255 | ||
256 | dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT)); | |
257 | dest += line_size; | |
258 | dest[0] = CLIP(dest[0] + ((a1 + b1) >> COL_SHIFT)); | |
259 | dest += line_size; | |
260 | dest[0] = CLIP(dest[0] + ((a2 + b2) >> COL_SHIFT)); | |
261 | dest += line_size; | |
262 | dest[0] = CLIP(dest[0] + ((a3 + b3) >> COL_SHIFT)); | |
263 | dest += line_size; | |
264 | dest[0] = CLIP(dest[0] + ((a3 - b3) >> COL_SHIFT)); | |
265 | dest += line_size; | |
266 | dest[0] = CLIP(dest[0] + ((a2 - b2) >> COL_SHIFT)); | |
267 | dest += line_size; | |
268 | dest[0] = CLIP(dest[0] + ((a1 - b1) >> COL_SHIFT)); | |
269 | dest += line_size; | |
270 | dest[0] = CLIP(dest[0] + ((a0 - b0) >> COL_SHIFT)); | |
e7a972e1 MR |
271 | } |
272 | ||
273 | static inline void FUNC(idctSparseCol)(DCTELEM *col) | |
274 | { | |
2cc4f3b2 MR |
275 | int a0, a1, a2, a3, b0, b1, b2, b3; |
276 | ||
277 | IDCT_COLS; | |
278 | ||
279 | col[0 ] = ((a0 + b0) >> COL_SHIFT); | |
280 | col[8 ] = ((a1 + b1) >> COL_SHIFT); | |
281 | col[16] = ((a2 + b2) >> COL_SHIFT); | |
282 | col[24] = ((a3 + b3) >> COL_SHIFT); | |
283 | col[32] = ((a3 - b3) >> COL_SHIFT); | |
284 | col[40] = ((a2 - b2) >> COL_SHIFT); | |
285 | col[48] = ((a1 - b1) >> COL_SHIFT); | |
286 | col[56] = ((a0 - b0) >> COL_SHIFT); | |
e7a972e1 MR |
287 | } |
288 | ||
289 | void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, DCTELEM *block) | |
290 | { | |
291 | pixel *dest = (pixel *)dest_; | |
292 | int i; | |
e7bcc5ba MR |
293 | |
294 | line_size /= sizeof(pixel); | |
295 | ||
2cc4f3b2 | 296 | for (i = 0; i < 8; i++) |
f78cd0c2 | 297 | FUNC(idctRowCondDC)(block + i*8, 0); |
e7a972e1 | 298 | |
2cc4f3b2 | 299 | for (i = 0; i < 8; i++) |
e7a972e1 MR |
300 | FUNC(idctSparseColPut)(dest + i, line_size, block + i); |
301 | } | |
302 | ||
303 | void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, DCTELEM *block) | |
304 | { | |
305 | pixel *dest = (pixel *)dest_; | |
306 | int i; | |
e7bcc5ba MR |
307 | |
308 | line_size /= sizeof(pixel); | |
309 | ||
2cc4f3b2 | 310 | for (i = 0; i < 8; i++) |
f78cd0c2 | 311 | FUNC(idctRowCondDC)(block + i*8, 0); |
e7a972e1 | 312 | |
2cc4f3b2 | 313 | for (i = 0; i < 8; i++) |
e7a972e1 MR |
314 | FUNC(idctSparseColAdd)(dest + i, line_size, block + i); |
315 | } | |
316 | ||
317 | void FUNC(ff_simple_idct)(DCTELEM *block) | |
318 | { | |
319 | int i; | |
2cc4f3b2 MR |
320 | |
321 | for (i = 0; i < 8; i++) | |
f78cd0c2 | 322 | FUNC(idctRowCondDC)(block + i*8, 0); |
e7a972e1 | 323 | |
2cc4f3b2 | 324 | for (i = 0; i < 8; i++) |
e7a972e1 MR |
325 | FUNC(idctSparseCol)(block + i); |
326 | } |