polyphase kaiser windowed sinc and blackman nuttall windowed sinc audio resample...
[libav.git] / libavcodec / imgresample.c
CommitLineData
de6d9b64
FB
1/*
2 * High quality image resampling with polyphase filters
ff4ec49e 3 * Copyright (c) 2001 Fabrice Bellard.
de6d9b64 4 *
ff4ec49e
FB
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
de6d9b64 9 *
ff4ec49e 10 * This library is distributed in the hope that it will be useful,
de6d9b64 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
de6d9b64 14 *
ff4ec49e
FB
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
de6d9b64 18 */
983e3246
MN
19
20/**
21 * @file imgresample.c
22 * High quality image resampling with polyphase filters .
23 */
24
de6d9b64 25#include "avcodec.h"
6000abfa 26#include "dsputil.h"
de6d9b64 27
54329dd5
NK
28#ifdef USE_FASTMEMCPY
29#include "fastmemcpy.h"
30#endif
54329dd5 31
de6d9b64
FB
32#define NB_COMPONENTS 3
33
34#define PHASE_BITS 4
35#define NB_PHASES (1 << PHASE_BITS)
36#define NB_TAPS 4
37#define FCENTER 1 /* index of the center of the filter */
ab6d194a 38//#define TEST 1 /* Test it */
de6d9b64
FB
39
40#define POS_FRAC_BITS 16
41#define POS_FRAC (1 << POS_FRAC_BITS)
42/* 6 bits precision is needed for MMX */
43#define FILTER_BITS 8
44
45#define LINE_BUF_HEIGHT (NB_TAPS * 4)
46
47struct ImgReSampleContext {
1ff93ffc
TK
48 int iwidth, iheight, owidth, oheight;
49 int topBand, bottomBand, leftBand, rightBand;
50 int padtop, padbottom, padleft, padright;
51 int pad_owidth, pad_oheight;
de6d9b64 52 int h_incr, v_incr;
0c1a9eda
ZK
53 int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
54 int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
55 uint8_t *line_buf;
de6d9b64
FB
56};
57
aaaf1635
MN
58void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
59
de6d9b64
FB
60static inline int get_phase(int pos)
61{
62 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
63}
64
65/* This function must be optimized */
da64ecc3
DH
66static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
67 int src_width, int src_start, int src_incr,
68 int16_t *filters)
de6d9b64
FB
69{
70 int src_pos, phase, sum, i;
da64ecc3 71 const uint8_t *s;
0c1a9eda 72 int16_t *filter;
de6d9b64
FB
73
74 src_pos = src_start;
75 for(i=0;i<dst_width;i++) {
76#ifdef TEST
77 /* test */
78 if ((src_pos >> POS_FRAC_BITS) < 0 ||
79 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
02ac3136 80 av_abort();
de6d9b64
FB
81#endif
82 s = src + (src_pos >> POS_FRAC_BITS);
83 phase = get_phase(src_pos);
84 filter = filters + phase * NB_TAPS;
85#if NB_TAPS == 4
86 sum = s[0] * filter[0] +
87 s[1] * filter[1] +
88 s[2] * filter[2] +
89 s[3] * filter[3];
90#else
91 {
92 int j;
93 sum = 0;
94 for(j=0;j<NB_TAPS;j++)
95 sum += s[j] * filter[j];
96 }
97#endif
98 sum = sum >> FILTER_BITS;
99 if (sum < 0)
100 sum = 0;
101 else if (sum > 255)
102 sum = 255;
103 dst[0] = sum;
104 src_pos += src_incr;
105 dst++;
106 }
107}
108
109/* This function must be optimized */
da64ecc3
DH
110static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
111 int wrap, int16_t *filter)
de6d9b64
FB
112{
113 int sum, i;
da64ecc3 114 const uint8_t *s;
de6d9b64
FB
115
116 s = src;
117 for(i=0;i<dst_width;i++) {
118#if NB_TAPS == 4
119 sum = s[0 * wrap] * filter[0] +
120 s[1 * wrap] * filter[1] +
121 s[2 * wrap] * filter[2] +
122 s[3 * wrap] * filter[3];
123#else
124 {
125 int j;
0c1a9eda 126 uint8_t *s1 = s;
de6d9b64
FB
127
128 sum = 0;
129 for(j=0;j<NB_TAPS;j++) {
130 sum += s1[0] * filter[j];
131 s1 += wrap;
132 }
133 }
134#endif
135 sum = sum >> FILTER_BITS;
136 if (sum < 0)
137 sum = 0;
138 else if (sum > 255)
139 sum = 255;
140 dst[0] = sum;
141 dst++;
142 s++;
143 }
144}
145
980fc7b8 146#ifdef HAVE_MMX
de6d9b64
FB
147
148#include "i386/mmx.h"
149
150#define FILTER4(reg) \
151{\
152 s = src + (src_pos >> POS_FRAC_BITS);\
153 phase = get_phase(src_pos);\
154 filter = filters + phase * NB_TAPS;\
155 movq_m2r(*s, reg);\
156 punpcklbw_r2r(mm7, reg);\
157 movq_m2r(*filter, mm6);\
158 pmaddwd_r2r(reg, mm6);\
159 movq_r2r(mm6, reg);\
160 psrlq_i2r(32, reg);\
161 paddd_r2r(mm6, reg);\
162 psrad_i2r(FILTER_BITS, reg);\
163 src_pos += src_incr;\
164}
165
166#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
167
168/* XXX: do four pixels at a time */
da64ecc3
DH
169static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
170 const uint8_t *src, int src_width,
0c1a9eda 171 int src_start, int src_incr, int16_t *filters)
de6d9b64
FB
172{
173 int src_pos, phase;
da64ecc3 174 const uint8_t *s;
0c1a9eda 175 int16_t *filter;
de6d9b64
FB
176 mmx_t tmp;
177
178 src_pos = src_start;
179 pxor_r2r(mm7, mm7);
180
181 while (dst_width >= 4) {
182
183 FILTER4(mm0);
184 FILTER4(mm1);
185 FILTER4(mm2);
186 FILTER4(mm3);
187
188 packuswb_r2r(mm7, mm0);
189 packuswb_r2r(mm7, mm1);
190 packuswb_r2r(mm7, mm3);
191 packuswb_r2r(mm7, mm2);
192 movq_r2m(mm0, tmp);
193 dst[0] = tmp.ub[0];
194 movq_r2m(mm1, tmp);
195 dst[1] = tmp.ub[0];
196 movq_r2m(mm2, tmp);
197 dst[2] = tmp.ub[0];
198 movq_r2m(mm3, tmp);
199 dst[3] = tmp.ub[0];
200 dst += 4;
201 dst_width -= 4;
202 }
203 while (dst_width > 0) {
204 FILTER4(mm0);
205 packuswb_r2r(mm7, mm0);
206 movq_r2m(mm0, tmp);
207 dst[0] = tmp.ub[0];
208 dst++;
209 dst_width--;
210 }
211 emms();
212}
213
da64ecc3
DH
214static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
215 int wrap, int16_t *filter)
de6d9b64
FB
216{
217 int sum, i, v;
da64ecc3 218 const uint8_t *s;
de6d9b64
FB
219 mmx_t tmp;
220 mmx_t coefs[4];
221
222 for(i=0;i<4;i++) {
223 v = filter[i];
224 coefs[i].uw[0] = v;
225 coefs[i].uw[1] = v;
226 coefs[i].uw[2] = v;
227 coefs[i].uw[3] = v;
228 }
229
230 pxor_r2r(mm7, mm7);
231 s = src;
232 while (dst_width >= 4) {
233 movq_m2r(s[0 * wrap], mm0);
234 punpcklbw_r2r(mm7, mm0);
235 movq_m2r(s[1 * wrap], mm1);
236 punpcklbw_r2r(mm7, mm1);
237 movq_m2r(s[2 * wrap], mm2);
238 punpcklbw_r2r(mm7, mm2);
239 movq_m2r(s[3 * wrap], mm3);
240 punpcklbw_r2r(mm7, mm3);
241
242 pmullw_m2r(coefs[0], mm0);
243 pmullw_m2r(coefs[1], mm1);
244 pmullw_m2r(coefs[2], mm2);
245 pmullw_m2r(coefs[3], mm3);
246
247 paddw_r2r(mm1, mm0);
248 paddw_r2r(mm3, mm2);
249 paddw_r2r(mm2, mm0);
250 psraw_i2r(FILTER_BITS, mm0);
251
252 packuswb_r2r(mm7, mm0);
253 movq_r2m(mm0, tmp);
254
0c1a9eda 255 *(uint32_t *)dst = tmp.ud[0];
de6d9b64
FB
256 dst += 4;
257 s += 4;
258 dst_width -= 4;
259 }
260 while (dst_width > 0) {
261 sum = s[0 * wrap] * filter[0] +
262 s[1 * wrap] * filter[1] +
263 s[2 * wrap] * filter[2] +
264 s[3 * wrap] * filter[3];
265 sum = sum >> FILTER_BITS;
266 if (sum < 0)
267 sum = 0;
268 else if (sum > 255)
269 sum = 255;
270 dst[0] = sum;
271 dst++;
272 s++;
273 dst_width--;
274 }
275 emms();
276}
277#endif
278
404d2241
BF
279#ifdef HAVE_ALTIVEC
280typedef union {
281 vector unsigned char v;
282 unsigned char c[16];
283} vec_uc_t;
284
285typedef union {
286 vector signed short v;
287 signed short s[8];
288} vec_ss_t;
289
da64ecc3
DH
290void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
291 int wrap, int16_t *filter)
404d2241
BF
292{
293 int sum, i;
da64ecc3 294 const uint8_t *s;
404d2241
BF
295 vector unsigned char *tv, tmp, dstv, zero;
296 vec_ss_t srchv[4], srclv[4], fv[4];
297 vector signed short zeros, sumhv, sumlv;
298 s = src;
299
300 for(i=0;i<4;i++)
301 {
302 /*
303 The vec_madds later on does an implicit >>15 on the result.
304 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
305 a signed short, we have just enough bits to pre-shift our
306 filter constants <<7 to compensate for vec_madds.
307 */
308 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
309 fv[i].v = vec_splat(fv[i].v, 0);
310 }
311
312 zero = vec_splat_u8(0);
313 zeros = vec_splat_s16(0);
314
315
316 /*
317 When we're resampling, we'd ideally like both our input buffers,
318 and output buffers to be 16-byte aligned, so we can do both aligned
319 reads and writes. Sadly we can't always have this at the moment, so
320 we opt for aligned writes, as unaligned writes have a huge overhead.
321 To do this, do enough scalar resamples to get dst 16-byte aligned.
322 */
9e4e1659 323 i = (-(int)dst) & 0xf;
404d2241
BF
324 while(i>0) {
325 sum = s[0 * wrap] * filter[0] +
326 s[1 * wrap] * filter[1] +
327 s[2 * wrap] * filter[2] +
328 s[3 * wrap] * filter[3];
329 sum = sum >> FILTER_BITS;
330 if (sum<0) sum = 0; else if (sum>255) sum=255;
331 dst[0] = sum;
332 dst++;
333 s++;
334 dst_width--;
335 i--;
336 }
337
338 /* Do our altivec resampling on 16 pixels at once. */
339 while(dst_width>=16) {
340 /*
341 Read 16 (potentially unaligned) bytes from each of
342 4 lines into 4 vectors, and split them into shorts.
343 Interleave the multipy/accumulate for the resample
344 filter with the loads to hide the 3 cycle latency
345 the vec_madds have.
346 */
347 tv = (vector unsigned char *) &s[0 * wrap];
348 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
349 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
350 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
351 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
352 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
353
354 tv = (vector unsigned char *) &s[1 * wrap];
355 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
356 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
357 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
358 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
359 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
360
361 tv = (vector unsigned char *) &s[2 * wrap];
362 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
363 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
364 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
365 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
366 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
367
368 tv = (vector unsigned char *) &s[3 * wrap];
369 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
370 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
371 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
372 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
373 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
374
375 /*
376 Pack the results into our destination vector,
377 and do an aligned write of that back to memory.
378 */
379 dstv = vec_packsu(sumhv, sumlv) ;
380 vec_st(dstv, 0, (vector unsigned char *) dst);
381
382 dst+=16;
383 s+=16;
384 dst_width-=16;
385 }
386
387 /*
388 If there are any leftover pixels, resample them
389 with the slow scalar method.
390 */
391 while(dst_width>0) {
392 sum = s[0 * wrap] * filter[0] +
393 s[1 * wrap] * filter[1] +
394 s[2 * wrap] * filter[2] +
395 s[3 * wrap] * filter[3];
396 sum = sum >> FILTER_BITS;
397 if (sum<0) sum = 0; else if (sum>255) sum=255;
398 dst[0] = sum;
399 dst++;
400 s++;
401 dst_width--;
402 }
403}
404#endif
405
de6d9b64 406/* slow version to handle limit cases. Does not need optimisation */
da64ecc3
DH
407static void h_resample_slow(uint8_t *dst, int dst_width,
408 const uint8_t *src, int src_width,
0c1a9eda 409 int src_start, int src_incr, int16_t *filters)
de6d9b64
FB
410{
411 int src_pos, phase, sum, j, v, i;
da64ecc3 412 const uint8_t *s, *src_end;
0c1a9eda 413 int16_t *filter;
de6d9b64
FB
414
415 src_end = src + src_width;
416 src_pos = src_start;
417 for(i=0;i<dst_width;i++) {
418 s = src + (src_pos >> POS_FRAC_BITS);
419 phase = get_phase(src_pos);
420 filter = filters + phase * NB_TAPS;
421 sum = 0;
422 for(j=0;j<NB_TAPS;j++) {
423 if (s < src)
424 v = src[0];
425 else if (s >= src_end)
426 v = src_end[-1];
427 else
428 v = s[0];
429 sum += v * filter[j];
430 s++;
431 }
432 sum = sum >> FILTER_BITS;
433 if (sum < 0)
434 sum = 0;
435 else if (sum > 255)
436 sum = 255;
437 dst[0] = sum;
438 src_pos += src_incr;
439 dst++;
440 }
441}
442
da64ecc3
DH
443static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
444 int src_width, int src_start, int src_incr,
445 int16_t *filters)
de6d9b64
FB
446{
447 int n, src_end;
448
449 if (src_start < 0) {
450 n = (0 - src_start + src_incr - 1) / src_incr;
451 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
452 dst += n;
453 dst_width -= n;
454 src_start += n * src_incr;
455 }
456 src_end = src_start + dst_width * src_incr;
457 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
458 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
459 src_incr;
460 } else {
461 n = dst_width;
462 }
980fc7b8 463#ifdef HAVE_MMX
de6d9b64
FB
464 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
465 h_resample_fast4_mmx(dst, n,
466 src, src_width, src_start, src_incr, filters);
467 else
468#endif
469 h_resample_fast(dst, n,
470 src, src_width, src_start, src_incr, filters);
471 if (n < dst_width) {
472 dst += n;
473 dst_width -= n;
474 src_start += n * src_incr;
475 h_resample_slow(dst, dst_width,
476 src, src_width, src_start, src_incr, filters);
477 }
478}
479
480static void component_resample(ImgReSampleContext *s,
0c1a9eda
ZK
481 uint8_t *output, int owrap, int owidth, int oheight,
482 uint8_t *input, int iwrap, int iwidth, int iheight)
de6d9b64
FB
483{
484 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
0c1a9eda 485 uint8_t *new_line, *src_line;
de6d9b64
FB
486
487 last_src_y = - FCENTER - 1;
488 /* position of the bottom of the filter in the source image */
489 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
490 ring_y = NB_TAPS; /* position in ring buffer */
491 for(y=0;y<oheight;y++) {
492 /* apply horizontal filter on new lines from input if needed */
493 src_y1 = src_y >> POS_FRAC_BITS;
494 while (last_src_y < src_y1) {
495 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
496 ring_y = NB_TAPS;
497 last_src_y++;
ab6d194a
MN
498 /* handle limit conditions : replicate line (slightly
499 inefficient because we filter multiple times) */
de6d9b64
FB
500 y1 = last_src_y;
501 if (y1 < 0) {
502 y1 = 0;
503 } else if (y1 >= iheight) {
504 y1 = iheight - 1;
505 }
506 src_line = input + y1 * iwrap;
507 new_line = s->line_buf + ring_y * owidth;
508 /* apply filter and handle limit cases correctly */
509 h_resample(new_line, owidth,
510 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
511 &s->h_filters[0][0]);
512 /* handle ring buffer wraping */
513 if (ring_y >= LINE_BUF_HEIGHT) {
514 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
515 new_line, owidth);
516 }
517 }
518 /* apply vertical filter */
519 phase_y = get_phase(src_y);
980fc7b8 520#ifdef HAVE_MMX
de6d9b64
FB
521 /* desactivated MMX because loss of precision */
522 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
523 v_resample4_mmx(output, owidth,
524 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
525 &s->v_filters[phase_y][0]);
404d2241
BF
526 else
527#endif
528#ifdef HAVE_ALTIVEC
00a7d8d6 529 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
404d2241
BF
530 v_resample16_altivec(output, owidth,
531 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
532 &s->v_filters[phase_y][0]);
de6d9b64
FB
533 else
534#endif
535 v_resample(output, owidth,
536 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
537 &s->v_filters[phase_y][0]);
538
539 src_y += s->v_incr;
1ff93ffc 540
de6d9b64
FB
541 output += owrap;
542 }
543}
544
de6d9b64
FB
545ImgReSampleContext *img_resample_init(int owidth, int oheight,
546 int iwidth, int iheight)
547{
1ff93ffc
TK
548 return img_resample_full_init(owidth, oheight, iwidth, iheight,
549 0, 0, 0, 0, 0, 0, 0, 0);
ab6d194a
MN
550}
551
552ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
553 int iwidth, int iheight,
554 int topBand, int bottomBand,
1ff93ffc
TK
555 int leftBand, int rightBand,
556 int padtop, int padbottom,
557 int padleft, int padright)
ab6d194a 558{
de6d9b64
FB
559 ImgReSampleContext *s;
560
561 s = av_mallocz(sizeof(ImgReSampleContext));
562 if (!s)
563 return NULL;
564 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
565 if (!s->line_buf)
566 goto fail;
567
568 s->owidth = owidth;
569 s->oheight = oheight;
570 s->iwidth = iwidth;
571 s->iheight = iheight;
1ff93ffc 572
ab6d194a
MN
573 s->topBand = topBand;
574 s->bottomBand = bottomBand;
575 s->leftBand = leftBand;
576 s->rightBand = rightBand;
de6d9b64 577
1ff93ffc
TK
578 s->padtop = padtop;
579 s->padbottom = padbottom;
580 s->padleft = padleft;
581 s->padright = padright;
582
583 s->pad_owidth = owidth - (padleft + padright);
584 s->pad_oheight = oheight - (padtop + padbottom);
585
586 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
587 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
588
aaaf1635
MN
589 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
590 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
591 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
592 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
de6d9b64
FB
593
594 return s;
1ff93ffc 595fail:
6000abfa 596 av_free(s);
de6d9b64
FB
597 return NULL;
598}
599
600void img_resample(ImgReSampleContext *s,
da64ecc3 601 AVPicture *output, const AVPicture *input)
de6d9b64
FB
602{
603 int i, shift;
1ff93ffc 604 uint8_t* optr;
de6d9b64 605
1ff93ffc 606 for (i=0;i<3;i++) {
de6d9b64 607 shift = (i == 0) ? 0 : 1;
1ff93ffc
TK
608
609 optr = output->data[i] + (((output->linesize[i] *
610 s->padtop) + s->padleft) >> shift);
611
612 component_resample(s, optr, output->linesize[i],
613 s->pad_owidth >> shift, s->pad_oheight >> shift,
614 input->data[i] + (input->linesize[i] *
615 (s->topBand >> shift)) + (s->leftBand >> shift),
616 input->linesize[i], ((s->iwidth - s->leftBand -
617 s->rightBand) >> shift),
ab6d194a 618 (s->iheight - s->topBand - s->bottomBand) >> shift);
de6d9b64
FB
619 }
620}
621
622void img_resample_close(ImgReSampleContext *s)
623{
6000abfa
FB
624 av_free(s->line_buf);
625 av_free(s);
de6d9b64
FB
626}
627
628#ifdef TEST
629
630void *av_mallocz(int size)
631{
632 void *ptr;
633 ptr = malloc(size);
634 memset(ptr, 0, size);
635 return ptr;
636}
637
ab6d194a
MN
638void av_free(void *ptr)
639{
640 /* XXX: this test should not be needed on most libcs */
641 if (ptr)
642 free(ptr);
643}
644
de6d9b64
FB
645/* input */
646#define XSIZE 256
647#define YSIZE 256
0c1a9eda 648uint8_t img[XSIZE * YSIZE];
de6d9b64
FB
649
650/* output */
651#define XSIZE1 512
652#define YSIZE1 512
0c1a9eda
ZK
653uint8_t img1[XSIZE1 * YSIZE1];
654uint8_t img2[XSIZE1 * YSIZE1];
de6d9b64 655
0c1a9eda 656void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
de6d9b64
FB
657{
658 FILE *f;
659 f=fopen(filename,"w");
660 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
661 fwrite(img,1, xsize * ysize,f);
662 fclose(f);
663}
664
0c1a9eda 665static void dump_filter(int16_t *filter)
de6d9b64
FB
666{
667 int i, ph;
668
669 for(ph=0;ph<NB_PHASES;ph++) {
670 printf("%2d: ", ph);
671 for(i=0;i<NB_TAPS;i++) {
672 printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
673 }
674 printf("\n");
675 }
676}
677
980fc7b8 678#ifdef HAVE_MMX
6acce86b 679int mm_flags;
de6d9b64
FB
680#endif
681
682int main(int argc, char **argv)
683{
684 int x, y, v, i, xsize, ysize;
685 ImgReSampleContext *s;
686 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
687 char buf[256];
688
689 /* build test image */
690 for(y=0;y<YSIZE;y++) {
691 for(x=0;x<XSIZE;x++) {
692 if (x < XSIZE/2 && y < YSIZE/2) {
693 if (x < XSIZE/4 && y < YSIZE/4) {
694 if ((x % 10) <= 6 &&
695 (y % 10) <= 6)
696 v = 0xff;
697 else
698 v = 0x00;
699 } else if (x < XSIZE/4) {
700 if (x & 1)
701 v = 0xff;
702 else
703 v = 0;
704 } else if (y < XSIZE/4) {
705 if (y & 1)
706 v = 0xff;
707 else
708 v = 0;
709 } else {
710 if (y < YSIZE*3/8) {
711 if ((y+x) & 1)
712 v = 0xff;
713 else
714 v = 0;
715 } else {
716 if (((x+3) % 4) <= 1 &&
717 ((y+3) % 4) <= 1)
718 v = 0xff;
719 else
720 v = 0x00;
721 }
722 }
723 } else if (x < XSIZE/2) {
724 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
725 } else if (y < XSIZE/2) {
726 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
727 } else {
728 v = ((x + y - XSIZE) * 255) / XSIZE;
729 }
ab6d194a 730 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
de6d9b64
FB
731 }
732 }
733 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
734 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
735 fact = factors[i];
736 xsize = (int)(XSIZE * fact);
ab6d194a 737 ysize = (int)((YSIZE - 100) * fact);
6acce86b 738 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
de6d9b64
FB
739 printf("Factor=%0.2f\n", fact);
740 dump_filter(&s->h_filters[0][0]);
741 component_resample(s, img1, xsize, xsize, ysize,
ab6d194a 742 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
de6d9b64
FB
743 img_resample_close(s);
744
745 sprintf(buf, "/tmp/out%d.pgm", i);
746 save_pgm(buf, img1, xsize, ysize);
747 }
748
749 /* mmx test */
980fc7b8 750#ifdef HAVE_MMX
de6d9b64
FB
751 printf("MMX test\n");
752 fact = 0.72;
753 xsize = (int)(XSIZE * fact);
754 ysize = (int)(YSIZE * fact);
755 mm_flags = MM_MMX;
756 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
757 component_resample(s, img1, xsize, xsize, ysize,
758 img, XSIZE, XSIZE, YSIZE);
759
760 mm_flags = 0;
761 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
762 component_resample(s, img2, xsize, xsize, ysize,
763 img, XSIZE, XSIZE, YSIZE);
764 if (memcmp(img1, img2, xsize * ysize) != 0) {
765 fprintf(stderr, "mmx error\n");
766 exit(1);
767 }
768 printf("MMX OK\n");
769#endif
770 return 0;
771}
772
773#endif