Commit | Line | Data |
---|---|---|
64db55ae KS |
1 | /* |
2 | * VC-1 and WMV3 decoder - DSP functions | |
3 | * Copyright (c) 2006 Konstantin Shishkov | |
4 | * | |
b78e7197 DB |
5 | * This file is part of FFmpeg. |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
64db55ae KS |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either | |
b78e7197 | 10 | * version 2.1 of the License, or (at your option) any later version. |
64db55ae | 11 | * |
b78e7197 | 12 | * FFmpeg is distributed in the hope that it will be useful, |
64db55ae KS |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
b78e7197 | 18 | * License along with FFmpeg; if not, write to the Free Software |
64db55ae | 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
64db55ae KS |
20 | */ |
21 | ||
22 | /** | |
bad5537e | 23 | * @file libavcodec/vc1dsp.c |
64db55ae KS |
24 | * VC-1 and WMV3 decoder |
25 | * | |
26 | */ | |
27 | ||
28 | #include "dsputil.h" | |
29 | ||
30 | ||
7ad8d3bf | 31 | /** Apply overlap transform to horizontal edge |
64db55ae | 32 | */ |
61f5b14a | 33 | static void vc1_v_overlap_c(uint8_t* src, int stride) |
64db55ae KS |
34 | { |
35 | int i; | |
36 | int a, b, c, d; | |
b615c1ed | 37 | int d1, d2; |
61f5b14a | 38 | int rnd = 1; |
64db55ae KS |
39 | for(i = 0; i < 8; i++) { |
40 | a = src[-2*stride]; | |
41 | b = src[-stride]; | |
42 | c = src[0]; | |
43 | d = src[stride]; | |
b615c1ed KS |
44 | d1 = (a - d + 3 + rnd) >> 3; |
45 | d2 = (a - d + b - c + 4 - rnd) >> 3; | |
64db55ae | 46 | |
b615c1ed | 47 | src[-2*stride] = a - d1; |
5a446bc8 LM |
48 | src[-stride] = av_clip_uint8(b - d2); |
49 | src[0] = av_clip_uint8(c + d2); | |
b615c1ed | 50 | src[stride] = d + d1; |
64db55ae | 51 | src++; |
61f5b14a | 52 | rnd = !rnd; |
64db55ae KS |
53 | } |
54 | } | |
55 | ||
7ad8d3bf | 56 | /** Apply overlap transform to vertical edge |
64db55ae | 57 | */ |
61f5b14a | 58 | static void vc1_h_overlap_c(uint8_t* src, int stride) |
64db55ae KS |
59 | { |
60 | int i; | |
61 | int a, b, c, d; | |
b615c1ed | 62 | int d1, d2; |
61f5b14a | 63 | int rnd = 1; |
64db55ae KS |
64 | for(i = 0; i < 8; i++) { |
65 | a = src[-2]; | |
66 | b = src[-1]; | |
67 | c = src[0]; | |
68 | d = src[1]; | |
b615c1ed KS |
69 | d1 = (a - d + 3 + rnd) >> 3; |
70 | d2 = (a - d + b - c + 4 - rnd) >> 3; | |
64db55ae | 71 | |
b615c1ed | 72 | src[-2] = a - d1; |
5a446bc8 LM |
73 | src[-1] = av_clip_uint8(b - d2); |
74 | src[0] = av_clip_uint8(c + d2); | |
b615c1ed | 75 | src[1] = d + d1; |
64db55ae | 76 | src += stride; |
61f5b14a | 77 | rnd = !rnd; |
64db55ae KS |
78 | } |
79 | } | |
80 | ||
0e58865d DC |
81 | /** |
82 | * VC-1 in-loop deblocking filter for one line | |
83 | * @param src source block type | |
84 | * @param stride block stride | |
85 | * @param pq block quantizer | |
86 | * @return whether other 3 pairs should be filtered or not | |
87 | * @see 8.6 | |
88 | */ | |
89 | static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){ | |
90 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |
91 | ||
92 | int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3; | |
93 | int a0_sign = a0 >> 31; /* Store sign */ | |
94 | a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */ | |
95 | if(a0 < pq){ | |
96 | int a1 = FFABS((2*(src[-4*stride] - src[-1*stride]) - 5*(src[-3*stride] - src[-2*stride]) + 4) >> 3); | |
97 | int a2 = FFABS((2*(src[ 0*stride] - src[ 3*stride]) - 5*(src[ 1*stride] - src[ 2*stride]) + 4) >> 3); | |
98 | if(a1 < a0 || a2 < a0){ | |
99 | int clip = src[-1*stride] - src[ 0*stride]; | |
100 | int clip_sign = clip >> 31; | |
101 | clip = ((clip ^ clip_sign) - clip_sign)>>1; | |
102 | if(clip){ | |
103 | int a3 = FFMIN(a1, a2); | |
104 | int d = 5 * (a3 - a0); | |
105 | int d_sign = (d >> 31); | |
106 | d = ((d ^ d_sign) - d_sign) >> 3; | |
107 | d_sign ^= a0_sign; | |
108 | ||
109 | if( d_sign ^ clip_sign ) | |
110 | d = 0; | |
111 | else{ | |
112 | d = FFMIN(d, clip); | |
113 | d = (d ^ d_sign) - d_sign; /* Restore sign */ | |
114 | src[-1*stride] = cm[src[-1*stride] - d]; | |
115 | src[ 0*stride] = cm[src[ 0*stride] + d]; | |
116 | } | |
117 | return 1; | |
118 | } | |
119 | } | |
120 | } | |
121 | return 0; | |
122 | } | |
123 | ||
124 | /** | |
125 | * VC-1 in-loop deblocking filter | |
126 | * @param src source block type | |
127 | * @param step distance between horizontally adjacent elements | |
128 | * @param stride distance between vertically adjacent elements | |
129 | * @param len edge length to filter (4 or 8 pixels) | |
130 | * @param pq block quantizer | |
131 | * @see 8.6 | |
132 | */ | |
3992526b | 133 | static inline void vc1_loop_filter(uint8_t* src, int step, int stride, int len, int pq) |
0e58865d DC |
134 | { |
135 | int i; | |
136 | int filt3; | |
137 | ||
138 | for(i = 0; i < len; i += 4){ | |
139 | filt3 = vc1_filter_line(src + 2*step, stride, pq); | |
140 | if(filt3){ | |
141 | vc1_filter_line(src + 0*step, stride, pq); | |
142 | vc1_filter_line(src + 1*step, stride, pq); | |
143 | vc1_filter_line(src + 3*step, stride, pq); | |
144 | } | |
145 | src += step * 4; | |
146 | } | |
147 | } | |
64db55ae | 148 | |
3992526b DC |
149 | static void vc1_v_loop_filter4_c(uint8_t *src, int stride, int pq) |
150 | { | |
151 | vc1_loop_filter(src, 1, stride, 4, pq); | |
152 | } | |
153 | ||
154 | static void vc1_h_loop_filter4_c(uint8_t *src, int stride, int pq) | |
155 | { | |
156 | vc1_loop_filter(src, stride, 1, 4, pq); | |
157 | } | |
158 | ||
159 | static void vc1_v_loop_filter8_c(uint8_t *src, int stride, int pq) | |
160 | { | |
161 | vc1_loop_filter(src, 1, stride, 8, pq); | |
162 | } | |
163 | ||
164 | static void vc1_h_loop_filter8_c(uint8_t *src, int stride, int pq) | |
165 | { | |
166 | vc1_loop_filter(src, stride, 1, 8, pq); | |
167 | } | |
168 | ||
169 | static void vc1_v_loop_filter16_c(uint8_t *src, int stride, int pq) | |
170 | { | |
171 | vc1_loop_filter(src, 1, stride, 16, pq); | |
172 | } | |
173 | ||
174 | static void vc1_h_loop_filter16_c(uint8_t *src, int stride, int pq) | |
175 | { | |
176 | vc1_loop_filter(src, stride, 1, 16, pq); | |
177 | } | |
178 | ||
64db55ae KS |
179 | /** Do inverse transform on 8x8 block |
180 | */ | |
4f717c69 JGG |
181 | static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) |
182 | { | |
183 | int i; | |
184 | int dc = block[0]; | |
185 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |
186 | dc = (3 * dc + 1) >> 1; | |
187 | dc = (3 * dc + 16) >> 5; | |
188 | for(i = 0; i < 8; i++){ | |
189 | dest[0] = cm[dest[0]+dc]; | |
190 | dest[1] = cm[dest[1]+dc]; | |
191 | dest[2] = cm[dest[2]+dc]; | |
192 | dest[3] = cm[dest[3]+dc]; | |
193 | dest[4] = cm[dest[4]+dc]; | |
194 | dest[5] = cm[dest[5]+dc]; | |
195 | dest[6] = cm[dest[6]+dc]; | |
196 | dest[7] = cm[dest[7]+dc]; | |
197 | dest += linesize; | |
198 | } | |
199 | } | |
200 | ||
64db55ae KS |
201 | static void vc1_inv_trans_8x8_c(DCTELEM block[64]) |
202 | { | |
203 | int i; | |
204 | register int t1,t2,t3,t4,t5,t6,t7,t8; | |
205 | DCTELEM *src, *dst; | |
206 | ||
207 | src = block; | |
208 | dst = block; | |
209 | for(i = 0; i < 8; i++){ | |
f3647f59 KS |
210 | t1 = 12 * (src[0] + src[4]) + 4; |
211 | t2 = 12 * (src[0] - src[4]) + 4; | |
64db55ae KS |
212 | t3 = 16 * src[2] + 6 * src[6]; |
213 | t4 = 6 * src[2] - 16 * src[6]; | |
214 | ||
215 | t5 = t1 + t3; | |
216 | t6 = t2 + t4; | |
217 | t7 = t2 - t4; | |
218 | t8 = t1 - t3; | |
219 | ||
220 | t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; | |
221 | t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; | |
222 | t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; | |
223 | t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; | |
224 | ||
f3647f59 KS |
225 | dst[0] = (t5 + t1) >> 3; |
226 | dst[1] = (t6 + t2) >> 3; | |
227 | dst[2] = (t7 + t3) >> 3; | |
228 | dst[3] = (t8 + t4) >> 3; | |
229 | dst[4] = (t8 - t4) >> 3; | |
230 | dst[5] = (t7 - t3) >> 3; | |
231 | dst[6] = (t6 - t2) >> 3; | |
232 | dst[7] = (t5 - t1) >> 3; | |
64db55ae KS |
233 | |
234 | src += 8; | |
235 | dst += 8; | |
236 | } | |
237 | ||
238 | src = block; | |
239 | dst = block; | |
240 | for(i = 0; i < 8; i++){ | |
f3647f59 KS |
241 | t1 = 12 * (src[ 0] + src[32]) + 64; |
242 | t2 = 12 * (src[ 0] - src[32]) + 64; | |
64db55ae KS |
243 | t3 = 16 * src[16] + 6 * src[48]; |
244 | t4 = 6 * src[16] - 16 * src[48]; | |
245 | ||
246 | t5 = t1 + t3; | |
247 | t6 = t2 + t4; | |
248 | t7 = t2 - t4; | |
249 | t8 = t1 - t3; | |
250 | ||
251 | t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; | |
252 | t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; | |
253 | t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | |
254 | t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | |
255 | ||
f3647f59 KS |
256 | dst[ 0] = (t5 + t1) >> 7; |
257 | dst[ 8] = (t6 + t2) >> 7; | |
258 | dst[16] = (t7 + t3) >> 7; | |
259 | dst[24] = (t8 + t4) >> 7; | |
260 | dst[32] = (t8 - t4 + 1) >> 7; | |
261 | dst[40] = (t7 - t3 + 1) >> 7; | |
262 | dst[48] = (t6 - t2 + 1) >> 7; | |
263 | dst[56] = (t5 - t1 + 1) >> 7; | |
64db55ae KS |
264 | |
265 | src++; | |
266 | dst++; | |
267 | } | |
268 | } | |
269 | ||
270 | /** Do inverse transform on 8x4 part of block | |
271 | */ | |
4f717c69 JGG |
272 | static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) |
273 | { | |
274 | int i; | |
275 | int dc = block[0]; | |
276 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |
277 | dc = ( 3 * dc + 1) >> 1; | |
278 | dc = (17 * dc + 64) >> 7; | |
279 | for(i = 0; i < 4; i++){ | |
280 | dest[0] = cm[dest[0]+dc]; | |
281 | dest[1] = cm[dest[1]+dc]; | |
282 | dest[2] = cm[dest[2]+dc]; | |
283 | dest[3] = cm[dest[3]+dc]; | |
284 | dest[4] = cm[dest[4]+dc]; | |
285 | dest[5] = cm[dest[5]+dc]; | |
286 | dest[6] = cm[dest[6]+dc]; | |
287 | dest[7] = cm[dest[7]+dc]; | |
288 | dest += linesize; | |
289 | } | |
290 | } | |
291 | ||
d2e45f33 | 292 | static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block) |
64db55ae KS |
293 | { |
294 | int i; | |
295 | register int t1,t2,t3,t4,t5,t6,t7,t8; | |
296 | DCTELEM *src, *dst; | |
d2e45f33 | 297 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
64db55ae | 298 | |
d2e45f33 KS |
299 | src = block; |
300 | dst = block; | |
64db55ae | 301 | for(i = 0; i < 4; i++){ |
f3647f59 KS |
302 | t1 = 12 * (src[0] + src[4]) + 4; |
303 | t2 = 12 * (src[0] - src[4]) + 4; | |
64db55ae KS |
304 | t3 = 16 * src[2] + 6 * src[6]; |
305 | t4 = 6 * src[2] - 16 * src[6]; | |
306 | ||
307 | t5 = t1 + t3; | |
308 | t6 = t2 + t4; | |
309 | t7 = t2 - t4; | |
310 | t8 = t1 - t3; | |
311 | ||
312 | t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; | |
313 | t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; | |
314 | t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; | |
315 | t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; | |
316 | ||
f3647f59 KS |
317 | dst[0] = (t5 + t1) >> 3; |
318 | dst[1] = (t6 + t2) >> 3; | |
319 | dst[2] = (t7 + t3) >> 3; | |
320 | dst[3] = (t8 + t4) >> 3; | |
321 | dst[4] = (t8 - t4) >> 3; | |
322 | dst[5] = (t7 - t3) >> 3; | |
323 | dst[6] = (t6 - t2) >> 3; | |
324 | dst[7] = (t5 - t1) >> 3; | |
64db55ae KS |
325 | |
326 | src += 8; | |
327 | dst += 8; | |
328 | } | |
329 | ||
d2e45f33 | 330 | src = block; |
64db55ae | 331 | for(i = 0; i < 8; i++){ |
f3647f59 KS |
332 | t1 = 17 * (src[ 0] + src[16]) + 64; |
333 | t2 = 17 * (src[ 0] - src[16]) + 64; | |
d1cc6e46 KS |
334 | t3 = 22 * src[ 8] + 10 * src[24]; |
335 | t4 = 22 * src[24] - 10 * src[ 8]; | |
64db55ae | 336 | |
d1cc6e46 KS |
337 | dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)]; |
338 | dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)]; | |
339 | dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)]; | |
340 | dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)]; | |
64db55ae KS |
341 | |
342 | src ++; | |
d2e45f33 | 343 | dest++; |
64db55ae KS |
344 | } |
345 | } | |
346 | ||
347 | /** Do inverse transform on 4x8 parts of block | |
348 | */ | |
4f717c69 JGG |
349 | static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) |
350 | { | |
351 | int i; | |
352 | int dc = block[0]; | |
353 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |
354 | dc = (17 * dc + 4) >> 3; | |
355 | dc = (12 * dc + 64) >> 7; | |
356 | for(i = 0; i < 8; i++){ | |
357 | dest[0] = cm[dest[0]+dc]; | |
358 | dest[1] = cm[dest[1]+dc]; | |
359 | dest[2] = cm[dest[2]+dc]; | |
360 | dest[3] = cm[dest[3]+dc]; | |
361 | dest += linesize; | |
362 | } | |
363 | } | |
364 | ||
d2e45f33 | 365 | static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block) |
64db55ae KS |
366 | { |
367 | int i; | |
368 | register int t1,t2,t3,t4,t5,t6,t7,t8; | |
369 | DCTELEM *src, *dst; | |
d2e45f33 | 370 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
64db55ae | 371 | |
d2e45f33 KS |
372 | src = block; |
373 | dst = block; | |
64db55ae | 374 | for(i = 0; i < 8; i++){ |
f3647f59 KS |
375 | t1 = 17 * (src[0] + src[2]) + 4; |
376 | t2 = 17 * (src[0] - src[2]) + 4; | |
d1cc6e46 KS |
377 | t3 = 22 * src[1] + 10 * src[3]; |
378 | t4 = 22 * src[3] - 10 * src[1]; | |
64db55ae | 379 | |
d1cc6e46 KS |
380 | dst[0] = (t1 + t3) >> 3; |
381 | dst[1] = (t2 - t4) >> 3; | |
382 | dst[2] = (t2 + t4) >> 3; | |
383 | dst[3] = (t1 - t3) >> 3; | |
64db55ae KS |
384 | |
385 | src += 8; | |
386 | dst += 8; | |
387 | } | |
388 | ||
d2e45f33 | 389 | src = block; |
64db55ae | 390 | for(i = 0; i < 4; i++){ |
f3647f59 KS |
391 | t1 = 12 * (src[ 0] + src[32]) + 64; |
392 | t2 = 12 * (src[ 0] - src[32]) + 64; | |
64db55ae KS |
393 | t3 = 16 * src[16] + 6 * src[48]; |
394 | t4 = 6 * src[16] - 16 * src[48]; | |
395 | ||
396 | t5 = t1 + t3; | |
397 | t6 = t2 + t4; | |
398 | t7 = t2 - t4; | |
399 | t8 = t1 - t3; | |
400 | ||
401 | t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; | |
402 | t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; | |
403 | t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | |
404 | t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | |
405 | ||
f3647f59 KS |
406 | dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1) >> 7)]; |
407 | dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2) >> 7)]; | |
408 | dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3) >> 7)]; | |
409 | dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4) >> 7)]; | |
410 | dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 1) >> 7)]; | |
411 | dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 1) >> 7)]; | |
412 | dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 1) >> 7)]; | |
413 | dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 1) >> 7)]; | |
64db55ae | 414 | |
d2e45f33 KS |
415 | src ++; |
416 | dest++; | |
64db55ae KS |
417 | } |
418 | } | |
419 | ||
420 | /** Do inverse transform on 4x4 part of block | |
421 | */ | |
4f717c69 JGG |
422 | static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) |
423 | { | |
424 | int i; | |
425 | int dc = block[0]; | |
426 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |
427 | dc = (17 * dc + 4) >> 3; | |
428 | dc = (17 * dc + 64) >> 7; | |
429 | for(i = 0; i < 4; i++){ | |
430 | dest[0] = cm[dest[0]+dc]; | |
431 | dest[1] = cm[dest[1]+dc]; | |
432 | dest[2] = cm[dest[2]+dc]; | |
433 | dest[3] = cm[dest[3]+dc]; | |
434 | dest += linesize; | |
435 | } | |
436 | } | |
437 | ||
d2e45f33 | 438 | static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block) |
64db55ae KS |
439 | { |
440 | int i; | |
d1cc6e46 | 441 | register int t1,t2,t3,t4; |
64db55ae | 442 | DCTELEM *src, *dst; |
d2e45f33 | 443 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
64db55ae | 444 | |
d2e45f33 KS |
445 | src = block; |
446 | dst = block; | |
64db55ae | 447 | for(i = 0; i < 4; i++){ |
f3647f59 KS |
448 | t1 = 17 * (src[0] + src[2]) + 4; |
449 | t2 = 17 * (src[0] - src[2]) + 4; | |
d1cc6e46 KS |
450 | t3 = 22 * src[1] + 10 * src[3]; |
451 | t4 = 22 * src[3] - 10 * src[1]; | |
64db55ae | 452 | |
d1cc6e46 KS |
453 | dst[0] = (t1 + t3) >> 3; |
454 | dst[1] = (t2 - t4) >> 3; | |
455 | dst[2] = (t2 + t4) >> 3; | |
456 | dst[3] = (t1 - t3) >> 3; | |
64db55ae KS |
457 | |
458 | src += 8; | |
459 | dst += 8; | |
460 | } | |
461 | ||
d2e45f33 | 462 | src = block; |
64db55ae | 463 | for(i = 0; i < 4; i++){ |
f3647f59 KS |
464 | t1 = 17 * (src[ 0] + src[16]) + 64; |
465 | t2 = 17 * (src[ 0] - src[16]) + 64; | |
d1cc6e46 KS |
466 | t3 = 22 * src[ 8] + 10 * src[24]; |
467 | t4 = 22 * src[24] - 10 * src[ 8]; | |
468 | ||
469 | dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)]; | |
470 | dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)]; | |
471 | dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)]; | |
472 | dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)]; | |
64db55ae KS |
473 | |
474 | src ++; | |
d2e45f33 | 475 | dest++; |
64db55ae KS |
476 | } |
477 | } | |
478 | ||
479 | /* motion compensation functions */ | |
7919d10c CG |
480 | /** Filter in case of 2 filters */ |
481 | #define VC1_MSPEL_FILTER_16B(DIR, TYPE) \ | |
482 | static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, int stride, int mode) \ | |
483 | { \ | |
484 | switch(mode){ \ | |
485 | case 0: /* no shift - should not occur */ \ | |
486 | return 0; \ | |
487 | case 1: /* 1/4 shift */ \ | |
488 | return -4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2]; \ | |
489 | case 2: /* 1/2 shift */ \ | |
490 | return -src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2]; \ | |
491 | case 3: /* 3/4 shift */ \ | |
492 | return -3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2]; \ | |
493 | } \ | |
494 | return 0; /* should not occur */ \ | |
495 | } | |
496 | ||
497 | VC1_MSPEL_FILTER_16B(ver, uint8_t); | |
498 | VC1_MSPEL_FILTER_16B(hor, int16_t); | |
499 | ||
64db55ae KS |
500 | |
501 | /** Filter used to interpolate fractional pel values | |
502 | */ | |
849f1035 | 503 | static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r) |
64db55ae KS |
504 | { |
505 | switch(mode){ | |
506 | case 0: //no shift | |
507 | return src[0]; | |
508 | case 1: // 1/4 shift | |
509 | return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6; | |
510 | case 2: // 1/2 shift | |
511 | return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4; | |
512 | case 3: // 3/4 shift | |
513 | return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6; | |
514 | } | |
515 | return 0; //should not occur | |
516 | } | |
517 | ||
518 | /** Function used to do motion compensation with bicubic interpolation | |
519 | */ | |
6cecd630 DC |
520 | #define VC1_MSPEL_MC(OP, OPNAME)\ |
521 | static void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\ | |
522 | {\ | |
523 | int i, j;\ | |
524 | \ | |
525 | if (vmode) { /* Horizontal filter to apply */\ | |
526 | int r;\ | |
527 | \ | |
528 | if (hmode) { /* Vertical filter to apply, output to tmp */\ | |
529 | static const int shift_value[] = { 0, 5, 1, 5 };\ | |
530 | int shift = (shift_value[hmode]+shift_value[vmode])>>1;\ | |
531 | int16_t tmp[11*8], *tptr = tmp;\ | |
532 | \ | |
533 | r = (1<<(shift-1)) + rnd-1;\ | |
534 | \ | |
535 | src -= 1;\ | |
536 | for(j = 0; j < 8; j++) {\ | |
537 | for(i = 0; i < 11; i++)\ | |
538 | tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;\ | |
539 | src += stride;\ | |
540 | tptr += 11;\ | |
541 | }\ | |
542 | \ | |
543 | r = 64-rnd;\ | |
544 | tptr = tmp+1;\ | |
545 | for(j = 0; j < 8; j++) {\ | |
546 | for(i = 0; i < 8; i++)\ | |
547 | OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);\ | |
548 | dst += stride;\ | |
549 | tptr += 11;\ | |
550 | }\ | |
551 | \ | |
552 | return;\ | |
553 | }\ | |
554 | else { /* No horizontal filter, output 8 lines to dst */\ | |
555 | r = 1-rnd;\ | |
556 | \ | |
557 | for(j = 0; j < 8; j++) {\ | |
558 | for(i = 0; i < 8; i++)\ | |
559 | OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));\ | |
560 | src += stride;\ | |
561 | dst += stride;\ | |
562 | }\ | |
563 | return;\ | |
564 | }\ | |
565 | }\ | |
566 | \ | |
567 | /* Horizontal mode with no vertical mode */\ | |
568 | for(j = 0; j < 8; j++) {\ | |
569 | for(i = 0; i < 8; i++)\ | |
570 | OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));\ | |
571 | dst += stride;\ | |
572 | src += stride;\ | |
573 | }\ | |
64db55ae KS |
574 | } |
575 | ||
6cecd630 DC |
576 | #define op_put(a, b) a = av_clip_uint8(b) |
577 | #define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1 | |
578 | ||
579 | VC1_MSPEL_MC(op_put, put_) | |
580 | VC1_MSPEL_MC(op_avg, avg_) | |
581 | ||
64db55ae KS |
582 | /* pixel functions - really are entry points to vc1_mspel_mc */ |
583 | ||
803b0904 CG |
584 | #define PUT_VC1_MSPEL(a, b)\ |
585 | static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ | |
6cecd630 DC |
586 | put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ |
587 | }\ | |
588 | static void avg_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ | |
589 | avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ | |
64db55ae KS |
590 | } |
591 | ||
803b0904 CG |
592 | PUT_VC1_MSPEL(1, 0) |
593 | PUT_VC1_MSPEL(2, 0) | |
594 | PUT_VC1_MSPEL(3, 0) | |
64db55ae | 595 | |
803b0904 CG |
596 | PUT_VC1_MSPEL(0, 1) |
597 | PUT_VC1_MSPEL(1, 1) | |
598 | PUT_VC1_MSPEL(2, 1) | |
599 | PUT_VC1_MSPEL(3, 1) | |
64db55ae | 600 | |
803b0904 CG |
601 | PUT_VC1_MSPEL(0, 2) |
602 | PUT_VC1_MSPEL(1, 2) | |
603 | PUT_VC1_MSPEL(2, 2) | |
604 | PUT_VC1_MSPEL(3, 2) | |
64db55ae | 605 | |
803b0904 CG |
606 | PUT_VC1_MSPEL(0, 3) |
607 | PUT_VC1_MSPEL(1, 3) | |
608 | PUT_VC1_MSPEL(2, 3) | |
609 | PUT_VC1_MSPEL(3, 3) | |
64db55ae | 610 | |
0752cd39 | 611 | av_cold void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) { |
64db55ae KS |
612 | dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; |
613 | dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; | |
614 | dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; | |
615 | dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; | |
4f717c69 JGG |
616 | dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_c; |
617 | dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_c; | |
618 | dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_c; | |
619 | dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_c; | |
64db55ae KS |
620 | dsp->vc1_h_overlap = vc1_h_overlap_c; |
621 | dsp->vc1_v_overlap = vc1_v_overlap_c; | |
3992526b DC |
622 | dsp->vc1_v_loop_filter4 = vc1_v_loop_filter4_c; |
623 | dsp->vc1_h_loop_filter4 = vc1_h_loop_filter4_c; | |
624 | dsp->vc1_v_loop_filter8 = vc1_v_loop_filter8_c; | |
625 | dsp->vc1_h_loop_filter8 = vc1_h_loop_filter8_c; | |
626 | dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_c; | |
627 | dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_c; | |
64db55ae KS |
628 | |
629 | dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c; | |
e95c953b CG |
630 | dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_c; |
631 | dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_c; | |
632 | dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_c; | |
633 | dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_c; | |
634 | dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_c; | |
635 | dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_c; | |
636 | dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_c; | |
637 | dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_c; | |
638 | dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_c; | |
639 | dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_c; | |
640 | dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_c; | |
641 | dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_c; | |
642 | dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c; | |
643 | dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c; | |
644 | dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c; | |
6cecd630 DC |
645 | |
646 | dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_c; | |
647 | dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_c; | |
648 | dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_c; | |
649 | dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_c; | |
650 | dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_c; | |
651 | dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_c; | |
652 | dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_c; | |
653 | dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_c; | |
654 | dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_c; | |
655 | dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_c; | |
656 | dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_c; | |
657 | dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_c; | |
658 | dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_c; | |
659 | dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c; | |
660 | dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c; | |
661 | dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c; | |
64db55ae | 662 | } |