Commit | Line | Data |
---|---|---|
de6d9b64 FB |
1 | /* |
2 | * High quality image resampling with polyphase filters | |
ff4ec49e | 3 | * Copyright (c) 2001 Fabrice Bellard. |
de6d9b64 | 4 | * |
ff4ec49e FB |
5 | * This library is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU Lesser General Public | |
7 | * License as published by the Free Software Foundation; either | |
8 | * version 2 of the License, or (at your option) any later version. | |
de6d9b64 | 9 | * |
ff4ec49e | 10 | * This library is distributed in the hope that it will be useful, |
de6d9b64 | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
ff4ec49e FB |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * Lesser General Public License for more details. | |
de6d9b64 | 14 | * |
ff4ec49e FB |
15 | * You should have received a copy of the GNU Lesser General Public |
16 | * License along with this library; if not, write to the Free Software | |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
de6d9b64 | 18 | */ |
983e3246 MN |
19 | |
20 | /** | |
21 | * @file imgresample.c | |
22 | * High quality image resampling with polyphase filters . | |
23 | */ | |
24 | ||
de6d9b64 | 25 | #include "avcodec.h" |
6000abfa | 26 | #include "dsputil.h" |
de6d9b64 | 27 | |
54329dd5 NK |
28 | #ifdef USE_FASTMEMCPY |
29 | #include "fastmemcpy.h" | |
30 | #endif | |
54329dd5 | 31 | |
de6d9b64 FB |
32 | #define NB_COMPONENTS 3 |
33 | ||
34 | #define PHASE_BITS 4 | |
35 | #define NB_PHASES (1 << PHASE_BITS) | |
36 | #define NB_TAPS 4 | |
37 | #define FCENTER 1 /* index of the center of the filter */ | |
ab6d194a | 38 | //#define TEST 1 /* Test it */ |
de6d9b64 FB |
39 | |
40 | #define POS_FRAC_BITS 16 | |
41 | #define POS_FRAC (1 << POS_FRAC_BITS) | |
42 | /* 6 bits precision is needed for MMX */ | |
43 | #define FILTER_BITS 8 | |
44 | ||
45 | #define LINE_BUF_HEIGHT (NB_TAPS * 4) | |
46 | ||
47 | struct ImgReSampleContext { | |
1ff93ffc TK |
48 | int iwidth, iheight, owidth, oheight; |
49 | int topBand, bottomBand, leftBand, rightBand; | |
50 | int padtop, padbottom, padleft, padright; | |
51 | int pad_owidth, pad_oheight; | |
de6d9b64 | 52 | int h_incr, v_incr; |
0c1a9eda ZK |
53 | int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */ |
54 | int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */ | |
55 | uint8_t *line_buf; | |
de6d9b64 FB |
56 | }; |
57 | ||
aaaf1635 MN |
58 | void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type); |
59 | ||
de6d9b64 FB |
60 | static inline int get_phase(int pos) |
61 | { | |
62 | return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1); | |
63 | } | |
64 | ||
65 | /* This function must be optimized */ | |
da64ecc3 DH |
66 | static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, |
67 | int src_width, int src_start, int src_incr, | |
68 | int16_t *filters) | |
de6d9b64 FB |
69 | { |
70 | int src_pos, phase, sum, i; | |
da64ecc3 | 71 | const uint8_t *s; |
0c1a9eda | 72 | int16_t *filter; |
de6d9b64 FB |
73 | |
74 | src_pos = src_start; | |
75 | for(i=0;i<dst_width;i++) { | |
76 | #ifdef TEST | |
77 | /* test */ | |
78 | if ((src_pos >> POS_FRAC_BITS) < 0 || | |
79 | (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS)) | |
02ac3136 | 80 | av_abort(); |
de6d9b64 FB |
81 | #endif |
82 | s = src + (src_pos >> POS_FRAC_BITS); | |
83 | phase = get_phase(src_pos); | |
84 | filter = filters + phase * NB_TAPS; | |
85 | #if NB_TAPS == 4 | |
86 | sum = s[0] * filter[0] + | |
87 | s[1] * filter[1] + | |
88 | s[2] * filter[2] + | |
89 | s[3] * filter[3]; | |
90 | #else | |
91 | { | |
92 | int j; | |
93 | sum = 0; | |
94 | for(j=0;j<NB_TAPS;j++) | |
95 | sum += s[j] * filter[j]; | |
96 | } | |
97 | #endif | |
98 | sum = sum >> FILTER_BITS; | |
99 | if (sum < 0) | |
100 | sum = 0; | |
101 | else if (sum > 255) | |
102 | sum = 255; | |
103 | dst[0] = sum; | |
104 | src_pos += src_incr; | |
105 | dst++; | |
106 | } | |
107 | } | |
108 | ||
109 | /* This function must be optimized */ | |
da64ecc3 DH |
110 | static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, |
111 | int wrap, int16_t *filter) | |
de6d9b64 FB |
112 | { |
113 | int sum, i; | |
da64ecc3 | 114 | const uint8_t *s; |
de6d9b64 FB |
115 | |
116 | s = src; | |
117 | for(i=0;i<dst_width;i++) { | |
118 | #if NB_TAPS == 4 | |
119 | sum = s[0 * wrap] * filter[0] + | |
120 | s[1 * wrap] * filter[1] + | |
121 | s[2 * wrap] * filter[2] + | |
122 | s[3 * wrap] * filter[3]; | |
123 | #else | |
124 | { | |
125 | int j; | |
0c1a9eda | 126 | uint8_t *s1 = s; |
de6d9b64 FB |
127 | |
128 | sum = 0; | |
129 | for(j=0;j<NB_TAPS;j++) { | |
130 | sum += s1[0] * filter[j]; | |
131 | s1 += wrap; | |
132 | } | |
133 | } | |
134 | #endif | |
135 | sum = sum >> FILTER_BITS; | |
136 | if (sum < 0) | |
137 | sum = 0; | |
138 | else if (sum > 255) | |
139 | sum = 255; | |
140 | dst[0] = sum; | |
141 | dst++; | |
142 | s++; | |
143 | } | |
144 | } | |
145 | ||
980fc7b8 | 146 | #ifdef HAVE_MMX |
de6d9b64 FB |
147 | |
148 | #include "i386/mmx.h" | |
149 | ||
150 | #define FILTER4(reg) \ | |
151 | {\ | |
152 | s = src + (src_pos >> POS_FRAC_BITS);\ | |
153 | phase = get_phase(src_pos);\ | |
154 | filter = filters + phase * NB_TAPS;\ | |
155 | movq_m2r(*s, reg);\ | |
156 | punpcklbw_r2r(mm7, reg);\ | |
157 | movq_m2r(*filter, mm6);\ | |
158 | pmaddwd_r2r(reg, mm6);\ | |
159 | movq_r2r(mm6, reg);\ | |
160 | psrlq_i2r(32, reg);\ | |
161 | paddd_r2r(mm6, reg);\ | |
162 | psrad_i2r(FILTER_BITS, reg);\ | |
163 | src_pos += src_incr;\ | |
164 | } | |
165 | ||
166 | #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); | |
167 | ||
168 | /* XXX: do four pixels at a time */ | |
da64ecc3 DH |
169 | static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, |
170 | const uint8_t *src, int src_width, | |
0c1a9eda | 171 | int src_start, int src_incr, int16_t *filters) |
de6d9b64 FB |
172 | { |
173 | int src_pos, phase; | |
da64ecc3 | 174 | const uint8_t *s; |
0c1a9eda | 175 | int16_t *filter; |
de6d9b64 FB |
176 | mmx_t tmp; |
177 | ||
178 | src_pos = src_start; | |
179 | pxor_r2r(mm7, mm7); | |
180 | ||
181 | while (dst_width >= 4) { | |
182 | ||
183 | FILTER4(mm0); | |
184 | FILTER4(mm1); | |
185 | FILTER4(mm2); | |
186 | FILTER4(mm3); | |
187 | ||
188 | packuswb_r2r(mm7, mm0); | |
189 | packuswb_r2r(mm7, mm1); | |
190 | packuswb_r2r(mm7, mm3); | |
191 | packuswb_r2r(mm7, mm2); | |
192 | movq_r2m(mm0, tmp); | |
193 | dst[0] = tmp.ub[0]; | |
194 | movq_r2m(mm1, tmp); | |
195 | dst[1] = tmp.ub[0]; | |
196 | movq_r2m(mm2, tmp); | |
197 | dst[2] = tmp.ub[0]; | |
198 | movq_r2m(mm3, tmp); | |
199 | dst[3] = tmp.ub[0]; | |
200 | dst += 4; | |
201 | dst_width -= 4; | |
202 | } | |
203 | while (dst_width > 0) { | |
204 | FILTER4(mm0); | |
205 | packuswb_r2r(mm7, mm0); | |
206 | movq_r2m(mm0, tmp); | |
207 | dst[0] = tmp.ub[0]; | |
208 | dst++; | |
209 | dst_width--; | |
210 | } | |
211 | emms(); | |
212 | } | |
213 | ||
da64ecc3 DH |
214 | static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, |
215 | int wrap, int16_t *filter) | |
de6d9b64 FB |
216 | { |
217 | int sum, i, v; | |
da64ecc3 | 218 | const uint8_t *s; |
de6d9b64 FB |
219 | mmx_t tmp; |
220 | mmx_t coefs[4]; | |
221 | ||
222 | for(i=0;i<4;i++) { | |
223 | v = filter[i]; | |
224 | coefs[i].uw[0] = v; | |
225 | coefs[i].uw[1] = v; | |
226 | coefs[i].uw[2] = v; | |
227 | coefs[i].uw[3] = v; | |
228 | } | |
229 | ||
230 | pxor_r2r(mm7, mm7); | |
231 | s = src; | |
232 | while (dst_width >= 4) { | |
233 | movq_m2r(s[0 * wrap], mm0); | |
234 | punpcklbw_r2r(mm7, mm0); | |
235 | movq_m2r(s[1 * wrap], mm1); | |
236 | punpcklbw_r2r(mm7, mm1); | |
237 | movq_m2r(s[2 * wrap], mm2); | |
238 | punpcklbw_r2r(mm7, mm2); | |
239 | movq_m2r(s[3 * wrap], mm3); | |
240 | punpcklbw_r2r(mm7, mm3); | |
241 | ||
242 | pmullw_m2r(coefs[0], mm0); | |
243 | pmullw_m2r(coefs[1], mm1); | |
244 | pmullw_m2r(coefs[2], mm2); | |
245 | pmullw_m2r(coefs[3], mm3); | |
246 | ||
247 | paddw_r2r(mm1, mm0); | |
248 | paddw_r2r(mm3, mm2); | |
249 | paddw_r2r(mm2, mm0); | |
250 | psraw_i2r(FILTER_BITS, mm0); | |
251 | ||
252 | packuswb_r2r(mm7, mm0); | |
253 | movq_r2m(mm0, tmp); | |
254 | ||
0c1a9eda | 255 | *(uint32_t *)dst = tmp.ud[0]; |
de6d9b64 FB |
256 | dst += 4; |
257 | s += 4; | |
258 | dst_width -= 4; | |
259 | } | |
260 | while (dst_width > 0) { | |
261 | sum = s[0 * wrap] * filter[0] + | |
262 | s[1 * wrap] * filter[1] + | |
263 | s[2 * wrap] * filter[2] + | |
264 | s[3 * wrap] * filter[3]; | |
265 | sum = sum >> FILTER_BITS; | |
266 | if (sum < 0) | |
267 | sum = 0; | |
268 | else if (sum > 255) | |
269 | sum = 255; | |
270 | dst[0] = sum; | |
271 | dst++; | |
272 | s++; | |
273 | dst_width--; | |
274 | } | |
275 | emms(); | |
276 | } | |
277 | #endif | |
278 | ||
404d2241 BF |
279 | #ifdef HAVE_ALTIVEC |
280 | typedef union { | |
281 | vector unsigned char v; | |
282 | unsigned char c[16]; | |
283 | } vec_uc_t; | |
284 | ||
285 | typedef union { | |
286 | vector signed short v; | |
287 | signed short s[8]; | |
288 | } vec_ss_t; | |
289 | ||
da64ecc3 DH |
290 | void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, |
291 | int wrap, int16_t *filter) | |
404d2241 BF |
292 | { |
293 | int sum, i; | |
da64ecc3 | 294 | const uint8_t *s; |
404d2241 BF |
295 | vector unsigned char *tv, tmp, dstv, zero; |
296 | vec_ss_t srchv[4], srclv[4], fv[4]; | |
297 | vector signed short zeros, sumhv, sumlv; | |
298 | s = src; | |
299 | ||
300 | for(i=0;i<4;i++) | |
301 | { | |
302 | /* | |
303 | The vec_madds later on does an implicit >>15 on the result. | |
304 | Since FILTER_BITS is 8, and we have 15 bits of magnitude in | |
305 | a signed short, we have just enough bits to pre-shift our | |
306 | filter constants <<7 to compensate for vec_madds. | |
307 | */ | |
308 | fv[i].s[0] = filter[i] << (15-FILTER_BITS); | |
309 | fv[i].v = vec_splat(fv[i].v, 0); | |
310 | } | |
311 | ||
312 | zero = vec_splat_u8(0); | |
313 | zeros = vec_splat_s16(0); | |
314 | ||
315 | ||
316 | /* | |
317 | When we're resampling, we'd ideally like both our input buffers, | |
318 | and output buffers to be 16-byte aligned, so we can do both aligned | |
319 | reads and writes. Sadly we can't always have this at the moment, so | |
320 | we opt for aligned writes, as unaligned writes have a huge overhead. | |
321 | To do this, do enough scalar resamples to get dst 16-byte aligned. | |
322 | */ | |
9e4e1659 | 323 | i = (-(int)dst) & 0xf; |
404d2241 BF |
324 | while(i>0) { |
325 | sum = s[0 * wrap] * filter[0] + | |
326 | s[1 * wrap] * filter[1] + | |
327 | s[2 * wrap] * filter[2] + | |
328 | s[3 * wrap] * filter[3]; | |
329 | sum = sum >> FILTER_BITS; | |
330 | if (sum<0) sum = 0; else if (sum>255) sum=255; | |
331 | dst[0] = sum; | |
332 | dst++; | |
333 | s++; | |
334 | dst_width--; | |
335 | i--; | |
336 | } | |
337 | ||
338 | /* Do our altivec resampling on 16 pixels at once. */ | |
339 | while(dst_width>=16) { | |
340 | /* | |
341 | Read 16 (potentially unaligned) bytes from each of | |
342 | 4 lines into 4 vectors, and split them into shorts. | |
343 | Interleave the multipy/accumulate for the resample | |
344 | filter with the loads to hide the 3 cycle latency | |
345 | the vec_madds have. | |
346 | */ | |
347 | tv = (vector unsigned char *) &s[0 * wrap]; | |
348 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); | |
349 | srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); | |
350 | srclv[0].v = (vector signed short) vec_mergel(zero, tmp); | |
351 | sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); | |
352 | sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); | |
353 | ||
354 | tv = (vector unsigned char *) &s[1 * wrap]; | |
355 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); | |
356 | srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); | |
357 | srclv[1].v = (vector signed short) vec_mergel(zero, tmp); | |
358 | sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); | |
359 | sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); | |
360 | ||
361 | tv = (vector unsigned char *) &s[2 * wrap]; | |
362 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); | |
363 | srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); | |
364 | srclv[2].v = (vector signed short) vec_mergel(zero, tmp); | |
365 | sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); | |
366 | sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); | |
367 | ||
368 | tv = (vector unsigned char *) &s[3 * wrap]; | |
369 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); | |
370 | srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); | |
371 | srclv[3].v = (vector signed short) vec_mergel(zero, tmp); | |
372 | sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); | |
373 | sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); | |
374 | ||
375 | /* | |
376 | Pack the results into our destination vector, | |
377 | and do an aligned write of that back to memory. | |
378 | */ | |
379 | dstv = vec_packsu(sumhv, sumlv) ; | |
380 | vec_st(dstv, 0, (vector unsigned char *) dst); | |
381 | ||
382 | dst+=16; | |
383 | s+=16; | |
384 | dst_width-=16; | |
385 | } | |
386 | ||
387 | /* | |
388 | If there are any leftover pixels, resample them | |
389 | with the slow scalar method. | |
390 | */ | |
391 | while(dst_width>0) { | |
392 | sum = s[0 * wrap] * filter[0] + | |
393 | s[1 * wrap] * filter[1] + | |
394 | s[2 * wrap] * filter[2] + | |
395 | s[3 * wrap] * filter[3]; | |
396 | sum = sum >> FILTER_BITS; | |
397 | if (sum<0) sum = 0; else if (sum>255) sum=255; | |
398 | dst[0] = sum; | |
399 | dst++; | |
400 | s++; | |
401 | dst_width--; | |
402 | } | |
403 | } | |
404 | #endif | |
405 | ||
de6d9b64 | 406 | /* slow version to handle limit cases. Does not need optimisation */ |
da64ecc3 DH |
407 | static void h_resample_slow(uint8_t *dst, int dst_width, |
408 | const uint8_t *src, int src_width, | |
0c1a9eda | 409 | int src_start, int src_incr, int16_t *filters) |
de6d9b64 FB |
410 | { |
411 | int src_pos, phase, sum, j, v, i; | |
da64ecc3 | 412 | const uint8_t *s, *src_end; |
0c1a9eda | 413 | int16_t *filter; |
de6d9b64 FB |
414 | |
415 | src_end = src + src_width; | |
416 | src_pos = src_start; | |
417 | for(i=0;i<dst_width;i++) { | |
418 | s = src + (src_pos >> POS_FRAC_BITS); | |
419 | phase = get_phase(src_pos); | |
420 | filter = filters + phase * NB_TAPS; | |
421 | sum = 0; | |
422 | for(j=0;j<NB_TAPS;j++) { | |
423 | if (s < src) | |
424 | v = src[0]; | |
425 | else if (s >= src_end) | |
426 | v = src_end[-1]; | |
427 | else | |
428 | v = s[0]; | |
429 | sum += v * filter[j]; | |
430 | s++; | |
431 | } | |
432 | sum = sum >> FILTER_BITS; | |
433 | if (sum < 0) | |
434 | sum = 0; | |
435 | else if (sum > 255) | |
436 | sum = 255; | |
437 | dst[0] = sum; | |
438 | src_pos += src_incr; | |
439 | dst++; | |
440 | } | |
441 | } | |
442 | ||
da64ecc3 DH |
443 | static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src, |
444 | int src_width, int src_start, int src_incr, | |
445 | int16_t *filters) | |
de6d9b64 FB |
446 | { |
447 | int n, src_end; | |
448 | ||
449 | if (src_start < 0) { | |
450 | n = (0 - src_start + src_incr - 1) / src_incr; | |
451 | h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters); | |
452 | dst += n; | |
453 | dst_width -= n; | |
454 | src_start += n * src_incr; | |
455 | } | |
456 | src_end = src_start + dst_width * src_incr; | |
457 | if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) { | |
458 | n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / | |
459 | src_incr; | |
460 | } else { | |
461 | n = dst_width; | |
462 | } | |
980fc7b8 | 463 | #ifdef HAVE_MMX |
de6d9b64 FB |
464 | if ((mm_flags & MM_MMX) && NB_TAPS == 4) |
465 | h_resample_fast4_mmx(dst, n, | |
466 | src, src_width, src_start, src_incr, filters); | |
467 | else | |
468 | #endif | |
469 | h_resample_fast(dst, n, | |
470 | src, src_width, src_start, src_incr, filters); | |
471 | if (n < dst_width) { | |
472 | dst += n; | |
473 | dst_width -= n; | |
474 | src_start += n * src_incr; | |
475 | h_resample_slow(dst, dst_width, | |
476 | src, src_width, src_start, src_incr, filters); | |
477 | } | |
478 | } | |
479 | ||
480 | static void component_resample(ImgReSampleContext *s, | |
0c1a9eda ZK |
481 | uint8_t *output, int owrap, int owidth, int oheight, |
482 | uint8_t *input, int iwrap, int iwidth, int iheight) | |
de6d9b64 FB |
483 | { |
484 | int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y; | |
0c1a9eda | 485 | uint8_t *new_line, *src_line; |
de6d9b64 FB |
486 | |
487 | last_src_y = - FCENTER - 1; | |
488 | /* position of the bottom of the filter in the source image */ | |
489 | src_y = (last_src_y + NB_TAPS) * POS_FRAC; | |
490 | ring_y = NB_TAPS; /* position in ring buffer */ | |
491 | for(y=0;y<oheight;y++) { | |
492 | /* apply horizontal filter on new lines from input if needed */ | |
493 | src_y1 = src_y >> POS_FRAC_BITS; | |
494 | while (last_src_y < src_y1) { | |
495 | if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS) | |
496 | ring_y = NB_TAPS; | |
497 | last_src_y++; | |
ab6d194a MN |
498 | /* handle limit conditions : replicate line (slightly |
499 | inefficient because we filter multiple times) */ | |
de6d9b64 FB |
500 | y1 = last_src_y; |
501 | if (y1 < 0) { | |
502 | y1 = 0; | |
503 | } else if (y1 >= iheight) { | |
504 | y1 = iheight - 1; | |
505 | } | |
506 | src_line = input + y1 * iwrap; | |
507 | new_line = s->line_buf + ring_y * owidth; | |
508 | /* apply filter and handle limit cases correctly */ | |
509 | h_resample(new_line, owidth, | |
510 | src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, | |
511 | &s->h_filters[0][0]); | |
512 | /* handle ring buffer wraping */ | |
513 | if (ring_y >= LINE_BUF_HEIGHT) { | |
514 | memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, | |
515 | new_line, owidth); | |
516 | } | |
517 | } | |
518 | /* apply vertical filter */ | |
519 | phase_y = get_phase(src_y); | |
980fc7b8 | 520 | #ifdef HAVE_MMX |
de6d9b64 FB |
521 | /* desactivated MMX because loss of precision */ |
522 | if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0) | |
523 | v_resample4_mmx(output, owidth, | |
524 | s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
525 | &s->v_filters[phase_y][0]); | |
404d2241 BF |
526 | else |
527 | #endif | |
528 | #ifdef HAVE_ALTIVEC | |
00a7d8d6 | 529 | if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6) |
404d2241 BF |
530 | v_resample16_altivec(output, owidth, |
531 | s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
532 | &s->v_filters[phase_y][0]); | |
de6d9b64 FB |
533 | else |
534 | #endif | |
535 | v_resample(output, owidth, | |
536 | s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
537 | &s->v_filters[phase_y][0]); | |
538 | ||
539 | src_y += s->v_incr; | |
1ff93ffc | 540 | |
de6d9b64 FB |
541 | output += owrap; |
542 | } | |
543 | } | |
544 | ||
de6d9b64 FB |
545 | ImgReSampleContext *img_resample_init(int owidth, int oheight, |
546 | int iwidth, int iheight) | |
547 | { | |
1ff93ffc TK |
548 | return img_resample_full_init(owidth, oheight, iwidth, iheight, |
549 | 0, 0, 0, 0, 0, 0, 0, 0); | |
ab6d194a MN |
550 | } |
551 | ||
552 | ImgReSampleContext *img_resample_full_init(int owidth, int oheight, | |
553 | int iwidth, int iheight, | |
554 | int topBand, int bottomBand, | |
1ff93ffc TK |
555 | int leftBand, int rightBand, |
556 | int padtop, int padbottom, | |
557 | int padleft, int padright) | |
ab6d194a | 558 | { |
de6d9b64 FB |
559 | ImgReSampleContext *s; |
560 | ||
561 | s = av_mallocz(sizeof(ImgReSampleContext)); | |
562 | if (!s) | |
563 | return NULL; | |
564 | s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); | |
565 | if (!s->line_buf) | |
566 | goto fail; | |
567 | ||
568 | s->owidth = owidth; | |
569 | s->oheight = oheight; | |
570 | s->iwidth = iwidth; | |
571 | s->iheight = iheight; | |
1ff93ffc | 572 | |
ab6d194a MN |
573 | s->topBand = topBand; |
574 | s->bottomBand = bottomBand; | |
575 | s->leftBand = leftBand; | |
576 | s->rightBand = rightBand; | |
de6d9b64 | 577 | |
1ff93ffc TK |
578 | s->padtop = padtop; |
579 | s->padbottom = padbottom; | |
580 | s->padleft = padleft; | |
581 | s->padright = padright; | |
582 | ||
583 | s->pad_owidth = owidth - (padleft + padright); | |
584 | s->pad_oheight = oheight - (padtop + padbottom); | |
585 | ||
586 | s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth; | |
587 | s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight; | |
588 | ||
aaaf1635 MN |
589 | av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth / |
590 | (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0); | |
591 | av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight / | |
592 | (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0); | |
de6d9b64 FB |
593 | |
594 | return s; | |
1ff93ffc | 595 | fail: |
6000abfa | 596 | av_free(s); |
de6d9b64 FB |
597 | return NULL; |
598 | } | |
599 | ||
600 | void img_resample(ImgReSampleContext *s, | |
da64ecc3 | 601 | AVPicture *output, const AVPicture *input) |
de6d9b64 FB |
602 | { |
603 | int i, shift; | |
1ff93ffc | 604 | uint8_t* optr; |
de6d9b64 | 605 | |
1ff93ffc | 606 | for (i=0;i<3;i++) { |
de6d9b64 | 607 | shift = (i == 0) ? 0 : 1; |
1ff93ffc TK |
608 | |
609 | optr = output->data[i] + (((output->linesize[i] * | |
610 | s->padtop) + s->padleft) >> shift); | |
611 | ||
612 | component_resample(s, optr, output->linesize[i], | |
613 | s->pad_owidth >> shift, s->pad_oheight >> shift, | |
614 | input->data[i] + (input->linesize[i] * | |
615 | (s->topBand >> shift)) + (s->leftBand >> shift), | |
616 | input->linesize[i], ((s->iwidth - s->leftBand - | |
617 | s->rightBand) >> shift), | |
ab6d194a | 618 | (s->iheight - s->topBand - s->bottomBand) >> shift); |
de6d9b64 FB |
619 | } |
620 | } | |
621 | ||
622 | void img_resample_close(ImgReSampleContext *s) | |
623 | { | |
6000abfa FB |
624 | av_free(s->line_buf); |
625 | av_free(s); | |
de6d9b64 FB |
626 | } |
627 | ||
628 | #ifdef TEST | |
629 | ||
630 | void *av_mallocz(int size) | |
631 | { | |
632 | void *ptr; | |
633 | ptr = malloc(size); | |
634 | memset(ptr, 0, size); | |
635 | return ptr; | |
636 | } | |
637 | ||
ab6d194a MN |
638 | void av_free(void *ptr) |
639 | { | |
640 | /* XXX: this test should not be needed on most libcs */ | |
641 | if (ptr) | |
642 | free(ptr); | |
643 | } | |
644 | ||
de6d9b64 FB |
645 | /* input */ |
646 | #define XSIZE 256 | |
647 | #define YSIZE 256 | |
0c1a9eda | 648 | uint8_t img[XSIZE * YSIZE]; |
de6d9b64 FB |
649 | |
650 | /* output */ | |
651 | #define XSIZE1 512 | |
652 | #define YSIZE1 512 | |
0c1a9eda ZK |
653 | uint8_t img1[XSIZE1 * YSIZE1]; |
654 | uint8_t img2[XSIZE1 * YSIZE1]; | |
de6d9b64 | 655 | |
0c1a9eda | 656 | void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize) |
de6d9b64 FB |
657 | { |
658 | FILE *f; | |
659 | f=fopen(filename,"w"); | |
660 | fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255); | |
661 | fwrite(img,1, xsize * ysize,f); | |
662 | fclose(f); | |
663 | } | |
664 | ||
0c1a9eda | 665 | static void dump_filter(int16_t *filter) |
de6d9b64 FB |
666 | { |
667 | int i, ph; | |
668 | ||
669 | for(ph=0;ph<NB_PHASES;ph++) { | |
670 | printf("%2d: ", ph); | |
671 | for(i=0;i<NB_TAPS;i++) { | |
672 | printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0); | |
673 | } | |
674 | printf("\n"); | |
675 | } | |
676 | } | |
677 | ||
980fc7b8 | 678 | #ifdef HAVE_MMX |
6acce86b | 679 | int mm_flags; |
de6d9b64 FB |
680 | #endif |
681 | ||
682 | int main(int argc, char **argv) | |
683 | { | |
684 | int x, y, v, i, xsize, ysize; | |
685 | ImgReSampleContext *s; | |
686 | float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 }; | |
687 | char buf[256]; | |
688 | ||
689 | /* build test image */ | |
690 | for(y=0;y<YSIZE;y++) { | |
691 | for(x=0;x<XSIZE;x++) { | |
692 | if (x < XSIZE/2 && y < YSIZE/2) { | |
693 | if (x < XSIZE/4 && y < YSIZE/4) { | |
694 | if ((x % 10) <= 6 && | |
695 | (y % 10) <= 6) | |
696 | v = 0xff; | |
697 | else | |
698 | v = 0x00; | |
699 | } else if (x < XSIZE/4) { | |
700 | if (x & 1) | |
701 | v = 0xff; | |
702 | else | |
703 | v = 0; | |
704 | } else if (y < XSIZE/4) { | |
705 | if (y & 1) | |
706 | v = 0xff; | |
707 | else | |
708 | v = 0; | |
709 | } else { | |
710 | if (y < YSIZE*3/8) { | |
711 | if ((y+x) & 1) | |
712 | v = 0xff; | |
713 | else | |
714 | v = 0; | |
715 | } else { | |
716 | if (((x+3) % 4) <= 1 && | |
717 | ((y+3) % 4) <= 1) | |
718 | v = 0xff; | |
719 | else | |
720 | v = 0x00; | |
721 | } | |
722 | } | |
723 | } else if (x < XSIZE/2) { | |
724 | v = ((x - (XSIZE/2)) * 255) / (XSIZE/2); | |
725 | } else if (y < XSIZE/2) { | |
726 | v = ((y - (XSIZE/2)) * 255) / (XSIZE/2); | |
727 | } else { | |
728 | v = ((x + y - XSIZE) * 255) / XSIZE; | |
729 | } | |
ab6d194a | 730 | img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v; |
de6d9b64 FB |
731 | } |
732 | } | |
733 | save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE); | |
734 | for(i=0;i<sizeof(factors)/sizeof(float);i++) { | |
735 | fact = factors[i]; | |
736 | xsize = (int)(XSIZE * fact); | |
ab6d194a | 737 | ysize = (int)((YSIZE - 100) * fact); |
6acce86b | 738 | s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0); |
de6d9b64 FB |
739 | printf("Factor=%0.2f\n", fact); |
740 | dump_filter(&s->h_filters[0][0]); | |
741 | component_resample(s, img1, xsize, xsize, ysize, | |
ab6d194a | 742 | img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100); |
de6d9b64 FB |
743 | img_resample_close(s); |
744 | ||
745 | sprintf(buf, "/tmp/out%d.pgm", i); | |
746 | save_pgm(buf, img1, xsize, ysize); | |
747 | } | |
748 | ||
749 | /* mmx test */ | |
980fc7b8 | 750 | #ifdef HAVE_MMX |
de6d9b64 FB |
751 | printf("MMX test\n"); |
752 | fact = 0.72; | |
753 | xsize = (int)(XSIZE * fact); | |
754 | ysize = (int)(YSIZE * fact); | |
755 | mm_flags = MM_MMX; | |
756 | s = img_resample_init(xsize, ysize, XSIZE, YSIZE); | |
757 | component_resample(s, img1, xsize, xsize, ysize, | |
758 | img, XSIZE, XSIZE, YSIZE); | |
759 | ||
760 | mm_flags = 0; | |
761 | s = img_resample_init(xsize, ysize, XSIZE, YSIZE); | |
762 | component_resample(s, img2, xsize, xsize, ysize, | |
763 | img, XSIZE, XSIZE, YSIZE); | |
764 | if (memcmp(img1, img2, xsize * ysize) != 0) { | |
765 | fprintf(stderr, "mmx error\n"); | |
766 | exit(1); | |
767 | } | |
768 | printf("MMX OK\n"); | |
769 | #endif | |
770 | return 0; | |
771 | } | |
772 | ||
773 | #endif |