da57ad773f825183f1224a5bdde925e85fa4790b
[libav.git] / libavcodec / imgresample.c
1 /*
2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Fabrice Bellard.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20 /**
21 * @file imgresample.c
22 * High quality image resampling with polyphase filters .
23 */
24
25 #include "avcodec.h"
26 #include "dsputil.h"
27
28 #ifdef USE_FASTMEMCPY
29 #include "fastmemcpy.h"
30 #endif
31
32 #define NB_COMPONENTS 3
33
34 #define PHASE_BITS 4
35 #define NB_PHASES (1 << PHASE_BITS)
36 #define NB_TAPS 4
37 #define FCENTER 1 /* index of the center of the filter */
38 //#define TEST 1 /* Test it */
39
40 #define POS_FRAC_BITS 16
41 #define POS_FRAC (1 << POS_FRAC_BITS)
42 /* 6 bits precision is needed for MMX */
43 #define FILTER_BITS 8
44
45 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
46
47 struct ImgReSampleContext {
48 int iwidth, iheight, owidth, oheight;
49 int topBand, bottomBand, leftBand, rightBand;
50 int padtop, padbottom, padleft, padright;
51 int pad_owidth, pad_oheight;
52 int h_incr, v_incr;
53 int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
54 int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
55 uint8_t *line_buf;
56 };
57
58 static inline int get_phase(int pos)
59 {
60 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
61 }
62
63 /* This function must be optimized */
64 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
65 int src_width, int src_start, int src_incr,
66 int16_t *filters)
67 {
68 int src_pos, phase, sum, i;
69 const uint8_t *s;
70 int16_t *filter;
71
72 src_pos = src_start;
73 for(i=0;i<dst_width;i++) {
74 #ifdef TEST
75 /* test */
76 if ((src_pos >> POS_FRAC_BITS) < 0 ||
77 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
78 av_abort();
79 #endif
80 s = src + (src_pos >> POS_FRAC_BITS);
81 phase = get_phase(src_pos);
82 filter = filters + phase * NB_TAPS;
83 #if NB_TAPS == 4
84 sum = s[0] * filter[0] +
85 s[1] * filter[1] +
86 s[2] * filter[2] +
87 s[3] * filter[3];
88 #else
89 {
90 int j;
91 sum = 0;
92 for(j=0;j<NB_TAPS;j++)
93 sum += s[j] * filter[j];
94 }
95 #endif
96 sum = sum >> FILTER_BITS;
97 if (sum < 0)
98 sum = 0;
99 else if (sum > 255)
100 sum = 255;
101 dst[0] = sum;
102 src_pos += src_incr;
103 dst++;
104 }
105 }
106
107 /* This function must be optimized */
108 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
109 int wrap, int16_t *filter)
110 {
111 int sum, i;
112 const uint8_t *s;
113
114 s = src;
115 for(i=0;i<dst_width;i++) {
116 #if NB_TAPS == 4
117 sum = s[0 * wrap] * filter[0] +
118 s[1 * wrap] * filter[1] +
119 s[2 * wrap] * filter[2] +
120 s[3 * wrap] * filter[3];
121 #else
122 {
123 int j;
124 uint8_t *s1 = s;
125
126 sum = 0;
127 for(j=0;j<NB_TAPS;j++) {
128 sum += s1[0] * filter[j];
129 s1 += wrap;
130 }
131 }
132 #endif
133 sum = sum >> FILTER_BITS;
134 if (sum < 0)
135 sum = 0;
136 else if (sum > 255)
137 sum = 255;
138 dst[0] = sum;
139 dst++;
140 s++;
141 }
142 }
143
144 #ifdef HAVE_MMX
145
146 #include "i386/mmx.h"
147
148 #define FILTER4(reg) \
149 {\
150 s = src + (src_pos >> POS_FRAC_BITS);\
151 phase = get_phase(src_pos);\
152 filter = filters + phase * NB_TAPS;\
153 movq_m2r(*s, reg);\
154 punpcklbw_r2r(mm7, reg);\
155 movq_m2r(*filter, mm6);\
156 pmaddwd_r2r(reg, mm6);\
157 movq_r2r(mm6, reg);\
158 psrlq_i2r(32, reg);\
159 paddd_r2r(mm6, reg);\
160 psrad_i2r(FILTER_BITS, reg);\
161 src_pos += src_incr;\
162 }
163
164 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
165
166 /* XXX: do four pixels at a time */
167 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
168 const uint8_t *src, int src_width,
169 int src_start, int src_incr, int16_t *filters)
170 {
171 int src_pos, phase;
172 const uint8_t *s;
173 int16_t *filter;
174 mmx_t tmp;
175
176 src_pos = src_start;
177 pxor_r2r(mm7, mm7);
178
179 while (dst_width >= 4) {
180
181 FILTER4(mm0);
182 FILTER4(mm1);
183 FILTER4(mm2);
184 FILTER4(mm3);
185
186 packuswb_r2r(mm7, mm0);
187 packuswb_r2r(mm7, mm1);
188 packuswb_r2r(mm7, mm3);
189 packuswb_r2r(mm7, mm2);
190 movq_r2m(mm0, tmp);
191 dst[0] = tmp.ub[0];
192 movq_r2m(mm1, tmp);
193 dst[1] = tmp.ub[0];
194 movq_r2m(mm2, tmp);
195 dst[2] = tmp.ub[0];
196 movq_r2m(mm3, tmp);
197 dst[3] = tmp.ub[0];
198 dst += 4;
199 dst_width -= 4;
200 }
201 while (dst_width > 0) {
202 FILTER4(mm0);
203 packuswb_r2r(mm7, mm0);
204 movq_r2m(mm0, tmp);
205 dst[0] = tmp.ub[0];
206 dst++;
207 dst_width--;
208 }
209 emms();
210 }
211
212 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
213 int wrap, int16_t *filter)
214 {
215 int sum, i, v;
216 const uint8_t *s;
217 mmx_t tmp;
218 mmx_t coefs[4];
219
220 for(i=0;i<4;i++) {
221 v = filter[i];
222 coefs[i].uw[0] = v;
223 coefs[i].uw[1] = v;
224 coefs[i].uw[2] = v;
225 coefs[i].uw[3] = v;
226 }
227
228 pxor_r2r(mm7, mm7);
229 s = src;
230 while (dst_width >= 4) {
231 movq_m2r(s[0 * wrap], mm0);
232 punpcklbw_r2r(mm7, mm0);
233 movq_m2r(s[1 * wrap], mm1);
234 punpcklbw_r2r(mm7, mm1);
235 movq_m2r(s[2 * wrap], mm2);
236 punpcklbw_r2r(mm7, mm2);
237 movq_m2r(s[3 * wrap], mm3);
238 punpcklbw_r2r(mm7, mm3);
239
240 pmullw_m2r(coefs[0], mm0);
241 pmullw_m2r(coefs[1], mm1);
242 pmullw_m2r(coefs[2], mm2);
243 pmullw_m2r(coefs[3], mm3);
244
245 paddw_r2r(mm1, mm0);
246 paddw_r2r(mm3, mm2);
247 paddw_r2r(mm2, mm0);
248 psraw_i2r(FILTER_BITS, mm0);
249
250 packuswb_r2r(mm7, mm0);
251 movq_r2m(mm0, tmp);
252
253 *(uint32_t *)dst = tmp.ud[0];
254 dst += 4;
255 s += 4;
256 dst_width -= 4;
257 }
258 while (dst_width > 0) {
259 sum = s[0 * wrap] * filter[0] +
260 s[1 * wrap] * filter[1] +
261 s[2 * wrap] * filter[2] +
262 s[3 * wrap] * filter[3];
263 sum = sum >> FILTER_BITS;
264 if (sum < 0)
265 sum = 0;
266 else if (sum > 255)
267 sum = 255;
268 dst[0] = sum;
269 dst++;
270 s++;
271 dst_width--;
272 }
273 emms();
274 }
275 #endif
276
277 #ifdef HAVE_ALTIVEC
278 typedef union {
279 vector unsigned char v;
280 unsigned char c[16];
281 } vec_uc_t;
282
283 typedef union {
284 vector signed short v;
285 signed short s[8];
286 } vec_ss_t;
287
288 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
289 int wrap, int16_t *filter)
290 {
291 int sum, i;
292 const uint8_t *s;
293 vector unsigned char *tv, tmp, dstv, zero;
294 vec_ss_t srchv[4], srclv[4], fv[4];
295 vector signed short zeros, sumhv, sumlv;
296 s = src;
297
298 for(i=0;i<4;i++)
299 {
300 /*
301 The vec_madds later on does an implicit >>15 on the result.
302 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
303 a signed short, we have just enough bits to pre-shift our
304 filter constants <<7 to compensate for vec_madds.
305 */
306 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
307 fv[i].v = vec_splat(fv[i].v, 0);
308 }
309
310 zero = vec_splat_u8(0);
311 zeros = vec_splat_s16(0);
312
313
314 /*
315 When we're resampling, we'd ideally like both our input buffers,
316 and output buffers to be 16-byte aligned, so we can do both aligned
317 reads and writes. Sadly we can't always have this at the moment, so
318 we opt for aligned writes, as unaligned writes have a huge overhead.
319 To do this, do enough scalar resamples to get dst 16-byte aligned.
320 */
321 i = (-(int)dst) & 0xf;
322 while(i>0) {
323 sum = s[0 * wrap] * filter[0] +
324 s[1 * wrap] * filter[1] +
325 s[2 * wrap] * filter[2] +
326 s[3 * wrap] * filter[3];
327 sum = sum >> FILTER_BITS;
328 if (sum<0) sum = 0; else if (sum>255) sum=255;
329 dst[0] = sum;
330 dst++;
331 s++;
332 dst_width--;
333 i--;
334 }
335
336 /* Do our altivec resampling on 16 pixels at once. */
337 while(dst_width>=16) {
338 /*
339 Read 16 (potentially unaligned) bytes from each of
340 4 lines into 4 vectors, and split them into shorts.
341 Interleave the multipy/accumulate for the resample
342 filter with the loads to hide the 3 cycle latency
343 the vec_madds have.
344 */
345 tv = (vector unsigned char *) &s[0 * wrap];
346 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
347 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
348 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
349 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
350 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
351
352 tv = (vector unsigned char *) &s[1 * wrap];
353 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
354 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
355 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
356 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
357 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
358
359 tv = (vector unsigned char *) &s[2 * wrap];
360 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
361 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
362 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
363 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
364 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
365
366 tv = (vector unsigned char *) &s[3 * wrap];
367 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
368 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
369 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
370 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
371 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
372
373 /*
374 Pack the results into our destination vector,
375 and do an aligned write of that back to memory.
376 */
377 dstv = vec_packsu(sumhv, sumlv) ;
378 vec_st(dstv, 0, (vector unsigned char *) dst);
379
380 dst+=16;
381 s+=16;
382 dst_width-=16;
383 }
384
385 /*
386 If there are any leftover pixels, resample them
387 with the slow scalar method.
388 */
389 while(dst_width>0) {
390 sum = s[0 * wrap] * filter[0] +
391 s[1 * wrap] * filter[1] +
392 s[2 * wrap] * filter[2] +
393 s[3 * wrap] * filter[3];
394 sum = sum >> FILTER_BITS;
395 if (sum<0) sum = 0; else if (sum>255) sum=255;
396 dst[0] = sum;
397 dst++;
398 s++;
399 dst_width--;
400 }
401 }
402 #endif
403
404 /* slow version to handle limit cases. Does not need optimisation */
405 static void h_resample_slow(uint8_t *dst, int dst_width,
406 const uint8_t *src, int src_width,
407 int src_start, int src_incr, int16_t *filters)
408 {
409 int src_pos, phase, sum, j, v, i;
410 const uint8_t *s, *src_end;
411 int16_t *filter;
412
413 src_end = src + src_width;
414 src_pos = src_start;
415 for(i=0;i<dst_width;i++) {
416 s = src + (src_pos >> POS_FRAC_BITS);
417 phase = get_phase(src_pos);
418 filter = filters + phase * NB_TAPS;
419 sum = 0;
420 for(j=0;j<NB_TAPS;j++) {
421 if (s < src)
422 v = src[0];
423 else if (s >= src_end)
424 v = src_end[-1];
425 else
426 v = s[0];
427 sum += v * filter[j];
428 s++;
429 }
430 sum = sum >> FILTER_BITS;
431 if (sum < 0)
432 sum = 0;
433 else if (sum > 255)
434 sum = 255;
435 dst[0] = sum;
436 src_pos += src_incr;
437 dst++;
438 }
439 }
440
441 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
442 int src_width, int src_start, int src_incr,
443 int16_t *filters)
444 {
445 int n, src_end;
446
447 if (src_start < 0) {
448 n = (0 - src_start + src_incr - 1) / src_incr;
449 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
450 dst += n;
451 dst_width -= n;
452 src_start += n * src_incr;
453 }
454 src_end = src_start + dst_width * src_incr;
455 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
456 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
457 src_incr;
458 } else {
459 n = dst_width;
460 }
461 #ifdef HAVE_MMX
462 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
463 h_resample_fast4_mmx(dst, n,
464 src, src_width, src_start, src_incr, filters);
465 else
466 #endif
467 h_resample_fast(dst, n,
468 src, src_width, src_start, src_incr, filters);
469 if (n < dst_width) {
470 dst += n;
471 dst_width -= n;
472 src_start += n * src_incr;
473 h_resample_slow(dst, dst_width,
474 src, src_width, src_start, src_incr, filters);
475 }
476 }
477
478 static void component_resample(ImgReSampleContext *s,
479 uint8_t *output, int owrap, int owidth, int oheight,
480 uint8_t *input, int iwrap, int iwidth, int iheight)
481 {
482 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
483 uint8_t *new_line, *src_line;
484
485 last_src_y = - FCENTER - 1;
486 /* position of the bottom of the filter in the source image */
487 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
488 ring_y = NB_TAPS; /* position in ring buffer */
489 for(y=0;y<oheight;y++) {
490 /* apply horizontal filter on new lines from input if needed */
491 src_y1 = src_y >> POS_FRAC_BITS;
492 while (last_src_y < src_y1) {
493 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
494 ring_y = NB_TAPS;
495 last_src_y++;
496 /* handle limit conditions : replicate line (slightly
497 inefficient because we filter multiple times) */
498 y1 = last_src_y;
499 if (y1 < 0) {
500 y1 = 0;
501 } else if (y1 >= iheight) {
502 y1 = iheight - 1;
503 }
504 src_line = input + y1 * iwrap;
505 new_line = s->line_buf + ring_y * owidth;
506 /* apply filter and handle limit cases correctly */
507 h_resample(new_line, owidth,
508 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
509 &s->h_filters[0][0]);
510 /* handle ring buffer wraping */
511 if (ring_y >= LINE_BUF_HEIGHT) {
512 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
513 new_line, owidth);
514 }
515 }
516 /* apply vertical filter */
517 phase_y = get_phase(src_y);
518 #ifdef HAVE_MMX
519 /* desactivated MMX because loss of precision */
520 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
521 v_resample4_mmx(output, owidth,
522 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
523 &s->v_filters[phase_y][0]);
524 else
525 #endif
526 #ifdef HAVE_ALTIVEC
527 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
528 v_resample16_altivec(output, owidth,
529 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
530 &s->v_filters[phase_y][0]);
531 else
532 #endif
533 v_resample(output, owidth,
534 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
535 &s->v_filters[phase_y][0]);
536
537 src_y += s->v_incr;
538
539 output += owrap;
540 }
541 }
542
543 /* XXX: the following filter is quite naive, but it seems to suffice
544 for 4 taps */
545 static void build_filter(int16_t *filter, float factor)
546 {
547 int ph, i, v;
548 float x, y, tab[NB_TAPS], norm, mult, target;
549
550 /* if upsampling, only need to interpolate, no filter */
551 if (factor > 1.0)
552 factor = 1.0;
553
554 for(ph=0;ph<NB_PHASES;ph++) {
555 norm = 0;
556 for(i=0;i<NB_TAPS;i++) {
557 #if 1
558 const float d= -0.5; //first order derivative = -0.5
559 x = fabs(((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor);
560 if(x<1.0) y= 1 - 3*x*x + 2*x*x*x + d*( -x*x + x*x*x);
561 else y= d*(-4 + 8*x - 5*x*x + x*x*x);
562 #else
563 x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
564 if (x == 0)
565 y = 1.0;
566 else
567 y = sin(x) / x;
568 #endif
569 tab[i] = y;
570 norm += y;
571 }
572
573 /* normalize so that an uniform color remains the same */
574 target= 1 << FILTER_BITS;
575 for(i=0;i<NB_TAPS;i++) {
576 mult = target / norm;
577 v = lrintf(tab[i] * mult);
578 filter[ph * NB_TAPS + i] = v;
579 norm -= tab[i];
580 target -= v;
581 }
582 }
583 }
584
585 ImgReSampleContext *img_resample_init(int owidth, int oheight,
586 int iwidth, int iheight)
587 {
588 return img_resample_full_init(owidth, oheight, iwidth, iheight,
589 0, 0, 0, 0, 0, 0, 0, 0);
590 }
591
592 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
593 int iwidth, int iheight,
594 int topBand, int bottomBand,
595 int leftBand, int rightBand,
596 int padtop, int padbottom,
597 int padleft, int padright)
598 {
599 ImgReSampleContext *s;
600
601 s = av_mallocz(sizeof(ImgReSampleContext));
602 if (!s)
603 return NULL;
604 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
605 if (!s->line_buf)
606 goto fail;
607
608 s->owidth = owidth;
609 s->oheight = oheight;
610 s->iwidth = iwidth;
611 s->iheight = iheight;
612
613 s->topBand = topBand;
614 s->bottomBand = bottomBand;
615 s->leftBand = leftBand;
616 s->rightBand = rightBand;
617
618 s->padtop = padtop;
619 s->padbottom = padbottom;
620 s->padleft = padleft;
621 s->padright = padright;
622
623 s->pad_owidth = owidth - (padleft + padright);
624 s->pad_oheight = oheight - (padtop + padbottom);
625
626 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
627 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
628
629 build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
630 (float) (iwidth - leftBand - rightBand));
631 build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
632 (float) (iheight - topBand - bottomBand));
633
634 return s;
635 fail:
636 av_free(s);
637 return NULL;
638 }
639
640 void img_resample(ImgReSampleContext *s,
641 AVPicture *output, const AVPicture *input)
642 {
643 int i, shift;
644 uint8_t* optr;
645
646 for (i=0;i<3;i++) {
647 shift = (i == 0) ? 0 : 1;
648
649 optr = output->data[i] + (((output->linesize[i] *
650 s->padtop) + s->padleft) >> shift);
651
652 component_resample(s, optr, output->linesize[i],
653 s->pad_owidth >> shift, s->pad_oheight >> shift,
654 input->data[i] + (input->linesize[i] *
655 (s->topBand >> shift)) + (s->leftBand >> shift),
656 input->linesize[i], ((s->iwidth - s->leftBand -
657 s->rightBand) >> shift),
658 (s->iheight - s->topBand - s->bottomBand) >> shift);
659 }
660 }
661
662 void img_resample_close(ImgReSampleContext *s)
663 {
664 av_free(s->line_buf);
665 av_free(s);
666 }
667
668 #ifdef TEST
669
670 void *av_mallocz(int size)
671 {
672 void *ptr;
673 ptr = malloc(size);
674 memset(ptr, 0, size);
675 return ptr;
676 }
677
678 void av_free(void *ptr)
679 {
680 /* XXX: this test should not be needed on most libcs */
681 if (ptr)
682 free(ptr);
683 }
684
685 /* input */
686 #define XSIZE 256
687 #define YSIZE 256
688 uint8_t img[XSIZE * YSIZE];
689
690 /* output */
691 #define XSIZE1 512
692 #define YSIZE1 512
693 uint8_t img1[XSIZE1 * YSIZE1];
694 uint8_t img2[XSIZE1 * YSIZE1];
695
696 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
697 {
698 FILE *f;
699 f=fopen(filename,"w");
700 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
701 fwrite(img,1, xsize * ysize,f);
702 fclose(f);
703 }
704
705 static void dump_filter(int16_t *filter)
706 {
707 int i, ph;
708
709 for(ph=0;ph<NB_PHASES;ph++) {
710 printf("%2d: ", ph);
711 for(i=0;i<NB_TAPS;i++) {
712 printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
713 }
714 printf("\n");
715 }
716 }
717
718 #ifdef HAVE_MMX
719 int mm_flags;
720 #endif
721
722 int main(int argc, char **argv)
723 {
724 int x, y, v, i, xsize, ysize;
725 ImgReSampleContext *s;
726 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
727 char buf[256];
728
729 /* build test image */
730 for(y=0;y<YSIZE;y++) {
731 for(x=0;x<XSIZE;x++) {
732 if (x < XSIZE/2 && y < YSIZE/2) {
733 if (x < XSIZE/4 && y < YSIZE/4) {
734 if ((x % 10) <= 6 &&
735 (y % 10) <= 6)
736 v = 0xff;
737 else
738 v = 0x00;
739 } else if (x < XSIZE/4) {
740 if (x & 1)
741 v = 0xff;
742 else
743 v = 0;
744 } else if (y < XSIZE/4) {
745 if (y & 1)
746 v = 0xff;
747 else
748 v = 0;
749 } else {
750 if (y < YSIZE*3/8) {
751 if ((y+x) & 1)
752 v = 0xff;
753 else
754 v = 0;
755 } else {
756 if (((x+3) % 4) <= 1 &&
757 ((y+3) % 4) <= 1)
758 v = 0xff;
759 else
760 v = 0x00;
761 }
762 }
763 } else if (x < XSIZE/2) {
764 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
765 } else if (y < XSIZE/2) {
766 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
767 } else {
768 v = ((x + y - XSIZE) * 255) / XSIZE;
769 }
770 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
771 }
772 }
773 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
774 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
775 fact = factors[i];
776 xsize = (int)(XSIZE * fact);
777 ysize = (int)((YSIZE - 100) * fact);
778 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
779 printf("Factor=%0.2f\n", fact);
780 dump_filter(&s->h_filters[0][0]);
781 component_resample(s, img1, xsize, xsize, ysize,
782 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
783 img_resample_close(s);
784
785 sprintf(buf, "/tmp/out%d.pgm", i);
786 save_pgm(buf, img1, xsize, ysize);
787 }
788
789 /* mmx test */
790 #ifdef HAVE_MMX
791 printf("MMX test\n");
792 fact = 0.72;
793 xsize = (int)(XSIZE * fact);
794 ysize = (int)(YSIZE * fact);
795 mm_flags = MM_MMX;
796 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
797 component_resample(s, img1, xsize, xsize, ysize,
798 img, XSIZE, XSIZE, YSIZE);
799
800 mm_flags = 0;
801 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
802 component_resample(s, img2, xsize, xsize, ysize,
803 img, XSIZE, XSIZE, YSIZE);
804 if (memcmp(img1, img2, xsize * ysize) != 0) {
805 fprintf(stderr, "mmx error\n");
806 exit(1);
807 }
808 printf("MMX OK\n");
809 #endif
810 return 0;
811 }
812
813 #endif