practically disabling altivec resampling code (some ppl said its broken) patch by...
[libav.git] / libavcodec / imgresample.c
1 /*
2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Fabrice Bellard.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19 #include "avcodec.h"
20 #include "dsputil.h"
21
22 #ifdef USE_FASTMEMCPY
23 #include "fastmemcpy.h"
24 #endif
25 extern int mm_flags;
26
27 #define NB_COMPONENTS 3
28
29 #define PHASE_BITS 4
30 #define NB_PHASES (1 << PHASE_BITS)
31 #define NB_TAPS 4
32 #define FCENTER 1 /* index of the center of the filter */
33 //#define TEST 1 /* Test it */
34
35 #define POS_FRAC_BITS 16
36 #define POS_FRAC (1 << POS_FRAC_BITS)
37 /* 6 bits precision is needed for MMX */
38 #define FILTER_BITS 8
39
40 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
41
42 struct ImgReSampleContext {
43 int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
44 int h_incr, v_incr;
45 INT16 h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
46 INT16 v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
47 UINT8 *line_buf;
48 };
49
50 static inline int get_phase(int pos)
51 {
52 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
53 }
54
55 /* This function must be optimized */
56 static void h_resample_fast(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
57 int src_start, int src_incr, INT16 *filters)
58 {
59 int src_pos, phase, sum, i;
60 UINT8 *s;
61 INT16 *filter;
62
63 src_pos = src_start;
64 for(i=0;i<dst_width;i++) {
65 #ifdef TEST
66 /* test */
67 if ((src_pos >> POS_FRAC_BITS) < 0 ||
68 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
69 av_abort();
70 #endif
71 s = src + (src_pos >> POS_FRAC_BITS);
72 phase = get_phase(src_pos);
73 filter = filters + phase * NB_TAPS;
74 #if NB_TAPS == 4
75 sum = s[0] * filter[0] +
76 s[1] * filter[1] +
77 s[2] * filter[2] +
78 s[3] * filter[3];
79 #else
80 {
81 int j;
82 sum = 0;
83 for(j=0;j<NB_TAPS;j++)
84 sum += s[j] * filter[j];
85 }
86 #endif
87 sum = sum >> FILTER_BITS;
88 if (sum < 0)
89 sum = 0;
90 else if (sum > 255)
91 sum = 255;
92 dst[0] = sum;
93 src_pos += src_incr;
94 dst++;
95 }
96 }
97
98 /* This function must be optimized */
99 static void v_resample(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
100 INT16 *filter)
101 {
102 int sum, i;
103 UINT8 *s;
104
105 s = src;
106 for(i=0;i<dst_width;i++) {
107 #if NB_TAPS == 4
108 sum = s[0 * wrap] * filter[0] +
109 s[1 * wrap] * filter[1] +
110 s[2 * wrap] * filter[2] +
111 s[3 * wrap] * filter[3];
112 #else
113 {
114 int j;
115 UINT8 *s1 = s;
116
117 sum = 0;
118 for(j=0;j<NB_TAPS;j++) {
119 sum += s1[0] * filter[j];
120 s1 += wrap;
121 }
122 }
123 #endif
124 sum = sum >> FILTER_BITS;
125 if (sum < 0)
126 sum = 0;
127 else if (sum > 255)
128 sum = 255;
129 dst[0] = sum;
130 dst++;
131 s++;
132 }
133 }
134
135 #ifdef HAVE_MMX
136
137 #include "i386/mmx.h"
138
139 #define FILTER4(reg) \
140 {\
141 s = src + (src_pos >> POS_FRAC_BITS);\
142 phase = get_phase(src_pos);\
143 filter = filters + phase * NB_TAPS;\
144 movq_m2r(*s, reg);\
145 punpcklbw_r2r(mm7, reg);\
146 movq_m2r(*filter, mm6);\
147 pmaddwd_r2r(reg, mm6);\
148 movq_r2r(mm6, reg);\
149 psrlq_i2r(32, reg);\
150 paddd_r2r(mm6, reg);\
151 psrad_i2r(FILTER_BITS, reg);\
152 src_pos += src_incr;\
153 }
154
155 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
156
157 /* XXX: do four pixels at a time */
158 static void h_resample_fast4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
159 int src_start, int src_incr, INT16 *filters)
160 {
161 int src_pos, phase;
162 UINT8 *s;
163 INT16 *filter;
164 mmx_t tmp;
165
166 src_pos = src_start;
167 pxor_r2r(mm7, mm7);
168
169 while (dst_width >= 4) {
170
171 FILTER4(mm0);
172 FILTER4(mm1);
173 FILTER4(mm2);
174 FILTER4(mm3);
175
176 packuswb_r2r(mm7, mm0);
177 packuswb_r2r(mm7, mm1);
178 packuswb_r2r(mm7, mm3);
179 packuswb_r2r(mm7, mm2);
180 movq_r2m(mm0, tmp);
181 dst[0] = tmp.ub[0];
182 movq_r2m(mm1, tmp);
183 dst[1] = tmp.ub[0];
184 movq_r2m(mm2, tmp);
185 dst[2] = tmp.ub[0];
186 movq_r2m(mm3, tmp);
187 dst[3] = tmp.ub[0];
188 dst += 4;
189 dst_width -= 4;
190 }
191 while (dst_width > 0) {
192 FILTER4(mm0);
193 packuswb_r2r(mm7, mm0);
194 movq_r2m(mm0, tmp);
195 dst[0] = tmp.ub[0];
196 dst++;
197 dst_width--;
198 }
199 emms();
200 }
201
202 static void v_resample4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
203 INT16 *filter)
204 {
205 int sum, i, v;
206 UINT8 *s;
207 mmx_t tmp;
208 mmx_t coefs[4];
209
210 for(i=0;i<4;i++) {
211 v = filter[i];
212 coefs[i].uw[0] = v;
213 coefs[i].uw[1] = v;
214 coefs[i].uw[2] = v;
215 coefs[i].uw[3] = v;
216 }
217
218 pxor_r2r(mm7, mm7);
219 s = src;
220 while (dst_width >= 4) {
221 movq_m2r(s[0 * wrap], mm0);
222 punpcklbw_r2r(mm7, mm0);
223 movq_m2r(s[1 * wrap], mm1);
224 punpcklbw_r2r(mm7, mm1);
225 movq_m2r(s[2 * wrap], mm2);
226 punpcklbw_r2r(mm7, mm2);
227 movq_m2r(s[3 * wrap], mm3);
228 punpcklbw_r2r(mm7, mm3);
229
230 pmullw_m2r(coefs[0], mm0);
231 pmullw_m2r(coefs[1], mm1);
232 pmullw_m2r(coefs[2], mm2);
233 pmullw_m2r(coefs[3], mm3);
234
235 paddw_r2r(mm1, mm0);
236 paddw_r2r(mm3, mm2);
237 paddw_r2r(mm2, mm0);
238 psraw_i2r(FILTER_BITS, mm0);
239
240 packuswb_r2r(mm7, mm0);
241 movq_r2m(mm0, tmp);
242
243 *(UINT32 *)dst = tmp.ud[0];
244 dst += 4;
245 s += 4;
246 dst_width -= 4;
247 }
248 while (dst_width > 0) {
249 sum = s[0 * wrap] * filter[0] +
250 s[1 * wrap] * filter[1] +
251 s[2 * wrap] * filter[2] +
252 s[3 * wrap] * filter[3];
253 sum = sum >> FILTER_BITS;
254 if (sum < 0)
255 sum = 0;
256 else if (sum > 255)
257 sum = 255;
258 dst[0] = sum;
259 dst++;
260 s++;
261 dst_width--;
262 }
263 emms();
264 }
265 #endif
266
267 #ifdef HAVE_ALTIVEC
268 typedef union {
269 vector unsigned char v;
270 unsigned char c[16];
271 } vec_uc_t;
272
273 typedef union {
274 vector signed short v;
275 signed short s[8];
276 } vec_ss_t;
277
278 void v_resample16_altivec(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
279 INT16 *filter)
280 {
281 int sum, i;
282 uint8_t *s;
283 vector unsigned char *tv, tmp, dstv, zero;
284 vec_ss_t srchv[4], srclv[4], fv[4];
285 vector signed short zeros, sumhv, sumlv;
286 s = src;
287
288 for(i=0;i<4;i++)
289 {
290 /*
291 The vec_madds later on does an implicit >>15 on the result.
292 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
293 a signed short, we have just enough bits to pre-shift our
294 filter constants <<7 to compensate for vec_madds.
295 */
296 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
297 fv[i].v = vec_splat(fv[i].v, 0);
298 }
299
300 zero = vec_splat_u8(0);
301 zeros = vec_splat_s16(0);
302
303
304 /*
305 When we're resampling, we'd ideally like both our input buffers,
306 and output buffers to be 16-byte aligned, so we can do both aligned
307 reads and writes. Sadly we can't always have this at the moment, so
308 we opt for aligned writes, as unaligned writes have a huge overhead.
309 To do this, do enough scalar resamples to get dst 16-byte aligned.
310 */
311 i = (-(int)dst) & 0xf;
312 while(i>0) {
313 sum = s[0 * wrap] * filter[0] +
314 s[1 * wrap] * filter[1] +
315 s[2 * wrap] * filter[2] +
316 s[3 * wrap] * filter[3];
317 sum = sum >> FILTER_BITS;
318 if (sum<0) sum = 0; else if (sum>255) sum=255;
319 dst[0] = sum;
320 dst++;
321 s++;
322 dst_width--;
323 i--;
324 }
325
326 /* Do our altivec resampling on 16 pixels at once. */
327 while(dst_width>=16) {
328 /*
329 Read 16 (potentially unaligned) bytes from each of
330 4 lines into 4 vectors, and split them into shorts.
331 Interleave the multipy/accumulate for the resample
332 filter with the loads to hide the 3 cycle latency
333 the vec_madds have.
334 */
335 tv = (vector unsigned char *) &s[0 * wrap];
336 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
337 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
338 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
339 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
340 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
341
342 tv = (vector unsigned char *) &s[1 * wrap];
343 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
344 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
345 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
346 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
347 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
348
349 tv = (vector unsigned char *) &s[2 * wrap];
350 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
351 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
352 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
353 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
354 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
355
356 tv = (vector unsigned char *) &s[3 * wrap];
357 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
358 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
359 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
360 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
361 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
362
363 /*
364 Pack the results into our destination vector,
365 and do an aligned write of that back to memory.
366 */
367 dstv = vec_packsu(sumhv, sumlv) ;
368 vec_st(dstv, 0, (vector unsigned char *) dst);
369
370 dst+=16;
371 s+=16;
372 dst_width-=16;
373 }
374
375 /*
376 If there are any leftover pixels, resample them
377 with the slow scalar method.
378 */
379 while(dst_width>0) {
380 sum = s[0 * wrap] * filter[0] +
381 s[1 * wrap] * filter[1] +
382 s[2 * wrap] * filter[2] +
383 s[3 * wrap] * filter[3];
384 sum = sum >> FILTER_BITS;
385 if (sum<0) sum = 0; else if (sum>255) sum=255;
386 dst[0] = sum;
387 dst++;
388 s++;
389 dst_width--;
390 }
391 }
392 #endif
393
394 /* slow version to handle limit cases. Does not need optimisation */
395 static void h_resample_slow(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
396 int src_start, int src_incr, INT16 *filters)
397 {
398 int src_pos, phase, sum, j, v, i;
399 UINT8 *s, *src_end;
400 INT16 *filter;
401
402 src_end = src + src_width;
403 src_pos = src_start;
404 for(i=0;i<dst_width;i++) {
405 s = src + (src_pos >> POS_FRAC_BITS);
406 phase = get_phase(src_pos);
407 filter = filters + phase * NB_TAPS;
408 sum = 0;
409 for(j=0;j<NB_TAPS;j++) {
410 if (s < src)
411 v = src[0];
412 else if (s >= src_end)
413 v = src_end[-1];
414 else
415 v = s[0];
416 sum += v * filter[j];
417 s++;
418 }
419 sum = sum >> FILTER_BITS;
420 if (sum < 0)
421 sum = 0;
422 else if (sum > 255)
423 sum = 255;
424 dst[0] = sum;
425 src_pos += src_incr;
426 dst++;
427 }
428 }
429
430 static void h_resample(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
431 int src_start, int src_incr, INT16 *filters)
432 {
433 int n, src_end;
434
435 if (src_start < 0) {
436 n = (0 - src_start + src_incr - 1) / src_incr;
437 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
438 dst += n;
439 dst_width -= n;
440 src_start += n * src_incr;
441 }
442 src_end = src_start + dst_width * src_incr;
443 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
444 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
445 src_incr;
446 } else {
447 n = dst_width;
448 }
449 #ifdef HAVE_MMX
450 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
451 h_resample_fast4_mmx(dst, n,
452 src, src_width, src_start, src_incr, filters);
453 else
454 #endif
455 h_resample_fast(dst, n,
456 src, src_width, src_start, src_incr, filters);
457 if (n < dst_width) {
458 dst += n;
459 dst_width -= n;
460 src_start += n * src_incr;
461 h_resample_slow(dst, dst_width,
462 src, src_width, src_start, src_incr, filters);
463 }
464 }
465
466 static void component_resample(ImgReSampleContext *s,
467 UINT8 *output, int owrap, int owidth, int oheight,
468 UINT8 *input, int iwrap, int iwidth, int iheight)
469 {
470 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
471 UINT8 *new_line, *src_line;
472
473 last_src_y = - FCENTER - 1;
474 /* position of the bottom of the filter in the source image */
475 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
476 ring_y = NB_TAPS; /* position in ring buffer */
477 for(y=0;y<oheight;y++) {
478 /* apply horizontal filter on new lines from input if needed */
479 src_y1 = src_y >> POS_FRAC_BITS;
480 while (last_src_y < src_y1) {
481 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
482 ring_y = NB_TAPS;
483 last_src_y++;
484 /* handle limit conditions : replicate line (slightly
485 inefficient because we filter multiple times) */
486 y1 = last_src_y;
487 if (y1 < 0) {
488 y1 = 0;
489 } else if (y1 >= iheight) {
490 y1 = iheight - 1;
491 }
492 src_line = input + y1 * iwrap;
493 new_line = s->line_buf + ring_y * owidth;
494 /* apply filter and handle limit cases correctly */
495 h_resample(new_line, owidth,
496 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
497 &s->h_filters[0][0]);
498 /* handle ring buffer wraping */
499 if (ring_y >= LINE_BUF_HEIGHT) {
500 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
501 new_line, owidth);
502 }
503 }
504 /* apply vertical filter */
505 phase_y = get_phase(src_y);
506 #ifdef HAVE_MMX
507 /* desactivated MMX because loss of precision */
508 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
509 v_resample4_mmx(output, owidth,
510 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
511 &s->v_filters[phase_y][0]);
512 else
513 #endif
514 #ifdef HAVE_ALTIVEC
515 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
516 v_resample16_altivec(output, owidth,
517 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
518 &s->v_filters[phase_y][0]);
519 else
520 #endif
521 v_resample(output, owidth,
522 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
523 &s->v_filters[phase_y][0]);
524
525 src_y += s->v_incr;
526 output += owrap;
527 }
528 }
529
530 /* XXX: the following filter is quite naive, but it seems to suffice
531 for 4 taps */
532 static void build_filter(INT16 *filter, float factor)
533 {
534 int ph, i, v;
535 float x, y, tab[NB_TAPS], norm, mult;
536
537 /* if upsampling, only need to interpolate, no filter */
538 if (factor > 1.0)
539 factor = 1.0;
540
541 for(ph=0;ph<NB_PHASES;ph++) {
542 norm = 0;
543 for(i=0;i<NB_TAPS;i++) {
544
545 x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
546 if (x == 0)
547 y = 1.0;
548 else
549 y = sin(x) / x;
550 tab[i] = y;
551 norm += y;
552 }
553
554 /* normalize so that an uniform color remains the same */
555 mult = (float)(1 << FILTER_BITS) / norm;
556 for(i=0;i<NB_TAPS;i++) {
557 v = (int)(tab[i] * mult);
558 filter[ph * NB_TAPS + i] = v;
559 }
560 }
561 }
562
563 ImgReSampleContext *img_resample_init(int owidth, int oheight,
564 int iwidth, int iheight)
565 {
566 return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0);
567 }
568
569 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
570 int iwidth, int iheight,
571 int topBand, int bottomBand,
572 int leftBand, int rightBand)
573 {
574 ImgReSampleContext *s;
575
576 s = av_mallocz(sizeof(ImgReSampleContext));
577 if (!s)
578 return NULL;
579 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
580 if (!s->line_buf)
581 goto fail;
582
583 s->owidth = owidth;
584 s->oheight = oheight;
585 s->iwidth = iwidth;
586 s->iheight = iheight;
587 s->topBand = topBand;
588 s->bottomBand = bottomBand;
589 s->leftBand = leftBand;
590 s->rightBand = rightBand;
591
592 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth;
593 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight;
594
595 build_filter(&s->h_filters[0][0], (float) owidth / (float) (iwidth - leftBand - rightBand));
596 build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand));
597
598 return s;
599 fail:
600 av_free(s);
601 return NULL;
602 }
603
604 void img_resample(ImgReSampleContext *s,
605 AVPicture *output, AVPicture *input)
606 {
607 int i, shift;
608
609 for(i=0;i<3;i++) {
610 shift = (i == 0) ? 0 : 1;
611 component_resample(s, output->data[i], output->linesize[i],
612 s->owidth >> shift, s->oheight >> shift,
613 input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift),
614 input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift),
615 (s->iheight - s->topBand - s->bottomBand) >> shift);
616 }
617 }
618
619 void img_resample_close(ImgReSampleContext *s)
620 {
621 av_free(s->line_buf);
622 av_free(s);
623 }
624
625 #ifdef TEST
626
627 void *av_mallocz(int size)
628 {
629 void *ptr;
630 ptr = malloc(size);
631 memset(ptr, 0, size);
632 return ptr;
633 }
634
635 void av_free(void *ptr)
636 {
637 /* XXX: this test should not be needed on most libcs */
638 if (ptr)
639 free(ptr);
640 }
641
642 /* input */
643 #define XSIZE 256
644 #define YSIZE 256
645 UINT8 img[XSIZE * YSIZE];
646
647 /* output */
648 #define XSIZE1 512
649 #define YSIZE1 512
650 UINT8 img1[XSIZE1 * YSIZE1];
651 UINT8 img2[XSIZE1 * YSIZE1];
652
653 void save_pgm(const char *filename, UINT8 *img, int xsize, int ysize)
654 {
655 FILE *f;
656 f=fopen(filename,"w");
657 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
658 fwrite(img,1, xsize * ysize,f);
659 fclose(f);
660 }
661
662 static void dump_filter(INT16 *filter)
663 {
664 int i, ph;
665
666 for(ph=0;ph<NB_PHASES;ph++) {
667 printf("%2d: ", ph);
668 for(i=0;i<NB_TAPS;i++) {
669 printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
670 }
671 printf("\n");
672 }
673 }
674
675 #ifdef HAVE_MMX
676 int mm_flags;
677 #endif
678
679 int main(int argc, char **argv)
680 {
681 int x, y, v, i, xsize, ysize;
682 ImgReSampleContext *s;
683 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
684 char buf[256];
685
686 /* build test image */
687 for(y=0;y<YSIZE;y++) {
688 for(x=0;x<XSIZE;x++) {
689 if (x < XSIZE/2 && y < YSIZE/2) {
690 if (x < XSIZE/4 && y < YSIZE/4) {
691 if ((x % 10) <= 6 &&
692 (y % 10) <= 6)
693 v = 0xff;
694 else
695 v = 0x00;
696 } else if (x < XSIZE/4) {
697 if (x & 1)
698 v = 0xff;
699 else
700 v = 0;
701 } else if (y < XSIZE/4) {
702 if (y & 1)
703 v = 0xff;
704 else
705 v = 0;
706 } else {
707 if (y < YSIZE*3/8) {
708 if ((y+x) & 1)
709 v = 0xff;
710 else
711 v = 0;
712 } else {
713 if (((x+3) % 4) <= 1 &&
714 ((y+3) % 4) <= 1)
715 v = 0xff;
716 else
717 v = 0x00;
718 }
719 }
720 } else if (x < XSIZE/2) {
721 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
722 } else if (y < XSIZE/2) {
723 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
724 } else {
725 v = ((x + y - XSIZE) * 255) / XSIZE;
726 }
727 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
728 }
729 }
730 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
731 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
732 fact = factors[i];
733 xsize = (int)(XSIZE * fact);
734 ysize = (int)((YSIZE - 100) * fact);
735 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
736 printf("Factor=%0.2f\n", fact);
737 dump_filter(&s->h_filters[0][0]);
738 component_resample(s, img1, xsize, xsize, ysize,
739 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
740 img_resample_close(s);
741
742 sprintf(buf, "/tmp/out%d.pgm", i);
743 save_pgm(buf, img1, xsize, ysize);
744 }
745
746 /* mmx test */
747 #ifdef HAVE_MMX
748 printf("MMX test\n");
749 fact = 0.72;
750 xsize = (int)(XSIZE * fact);
751 ysize = (int)(YSIZE * fact);
752 mm_flags = MM_MMX;
753 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
754 component_resample(s, img1, xsize, xsize, ysize,
755 img, XSIZE, XSIZE, YSIZE);
756
757 mm_flags = 0;
758 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
759 component_resample(s, img2, xsize, xsize, ysize,
760 img, XSIZE, XSIZE, YSIZE);
761 if (memcmp(img1, img2, xsize * ysize) != 0) {
762 fprintf(stderr, "mmx error\n");
763 exit(1);
764 }
765 printf("MMX OK\n");
766 #endif
767 return 0;
768 }
769
770 #endif