2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Gerard Lantau.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 #define NB_COMPONENTS 3
29 #define NB_PHASES (1 << PHASE_BITS)
31 #define FCENTER 1 /* index of the center of the filter */
33 #define POS_FRAC_BITS 16
34 #define POS_FRAC (1 << POS_FRAC_BITS)
35 /* 6 bits precision is needed for MMX */
38 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
40 struct ImgReSampleContext
{
41 int iwidth
, iheight
, owidth
, oheight
;
43 INT16 h_filters
[NB_PHASES
][NB_TAPS
] __align8
; /* horizontal filters */
44 INT16 v_filters
[NB_PHASES
][NB_TAPS
] __align8
; /* vertical filters */
48 static inline int get_phase(int pos
)
50 return ((pos
) >> (POS_FRAC_BITS
- PHASE_BITS
)) & ((1 << PHASE_BITS
) - 1);
53 /* This function must be optimized */
54 static void h_resample_fast(UINT8
*dst
, int dst_width
, UINT8
*src
, int src_width
,
55 int src_start
, int src_incr
, INT16
*filters
)
57 int src_pos
, phase
, sum
, i
;
62 for(i
=0;i
<dst_width
;i
++) {
65 if ((src_pos
>> POS_FRAC_BITS
) < 0 ||
66 (src_pos
>> POS_FRAC_BITS
) > (src_width
- NB_TAPS
))
69 s
= src
+ (src_pos
>> POS_FRAC_BITS
);
70 phase
= get_phase(src_pos
);
71 filter
= filters
+ phase
* NB_TAPS
;
73 sum
= s
[0] * filter
[0] +
81 for(j
=0;j
<NB_TAPS
;j
++)
82 sum
+= s
[j
] * filter
[j
];
85 sum
= sum
>> FILTER_BITS
;
96 /* This function must be optimized */
97 static void v_resample(UINT8
*dst
, int dst_width
, UINT8
*src
, int wrap
,
104 for(i
=0;i
<dst_width
;i
++) {
106 sum
= s
[0 * wrap
] * filter
[0] +
107 s
[1 * wrap
] * filter
[1] +
108 s
[2 * wrap
] * filter
[2] +
109 s
[3 * wrap
] * filter
[3];
116 for(j
=0;j
<NB_TAPS
;j
++) {
117 sum
+= s1
[0] * filter
[j
];
122 sum
= sum
>> FILTER_BITS
;
135 #include "i386/mmx.h"
137 #define FILTER4(reg) \
139 s = src + (src_pos >> POS_FRAC_BITS);\
140 phase = get_phase(src_pos);\
141 filter = filters + phase * NB_TAPS;\
143 punpcklbw_r2r(mm7, reg);\
144 movq_m2r(*filter, mm6);\
145 pmaddwd_r2r(reg, mm6);\
148 paddd_r2r(mm6, reg);\
149 psrad_i2r(FILTER_BITS, reg);\
150 src_pos += src_incr;\
153 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
155 /* XXX: do four pixels at a time */
156 static void h_resample_fast4_mmx(UINT8
*dst
, int dst_width
, UINT8
*src
, int src_width
,
157 int src_start
, int src_incr
, INT16
*filters
)
167 while (dst_width
>= 4) {
174 packuswb_r2r(mm7
, mm0
);
175 packuswb_r2r(mm7
, mm1
);
176 packuswb_r2r(mm7
, mm3
);
177 packuswb_r2r(mm7
, mm2
);
189 while (dst_width
> 0) {
191 packuswb_r2r(mm7
, mm0
);
200 static void v_resample4_mmx(UINT8
*dst
, int dst_width
, UINT8
*src
, int wrap
,
218 while (dst_width
>= 4) {
219 movq_m2r(s
[0 * wrap
], mm0
);
220 punpcklbw_r2r(mm7
, mm0
);
221 movq_m2r(s
[1 * wrap
], mm1
);
222 punpcklbw_r2r(mm7
, mm1
);
223 movq_m2r(s
[2 * wrap
], mm2
);
224 punpcklbw_r2r(mm7
, mm2
);
225 movq_m2r(s
[3 * wrap
], mm3
);
226 punpcklbw_r2r(mm7
, mm3
);
228 pmullw_m2r(coefs
[0], mm0
);
229 pmullw_m2r(coefs
[1], mm1
);
230 pmullw_m2r(coefs
[2], mm2
);
231 pmullw_m2r(coefs
[3], mm3
);
236 psraw_i2r(FILTER_BITS
, mm0
);
238 packuswb_r2r(mm7
, mm0
);
241 *(UINT32
*)dst
= tmp
.ud
[0];
246 while (dst_width
> 0) {
247 sum
= s
[0 * wrap
] * filter
[0] +
248 s
[1 * wrap
] * filter
[1] +
249 s
[2 * wrap
] * filter
[2] +
250 s
[3 * wrap
] * filter
[3];
251 sum
= sum
>> FILTER_BITS
;
265 /* slow version to handle limit cases. Does not need optimisation */
266 static void h_resample_slow(UINT8
*dst
, int dst_width
, UINT8
*src
, int src_width
,
267 int src_start
, int src_incr
, INT16
*filters
)
269 int src_pos
, phase
, sum
, j
, v
, i
;
273 src_end
= src
+ src_width
;
275 for(i
=0;i
<dst_width
;i
++) {
276 s
= src
+ (src_pos
>> POS_FRAC_BITS
);
277 phase
= get_phase(src_pos
);
278 filter
= filters
+ phase
* NB_TAPS
;
280 for(j
=0;j
<NB_TAPS
;j
++) {
283 else if (s
>= src_end
)
287 sum
+= v
* filter
[j
];
290 sum
= sum
>> FILTER_BITS
;
301 static void h_resample(UINT8
*dst
, int dst_width
, UINT8
*src
, int src_width
,
302 int src_start
, int src_incr
, INT16
*filters
)
307 n
= (0 - src_start
+ src_incr
- 1) / src_incr
;
308 h_resample_slow(dst
, n
, src
, src_width
, src_start
, src_incr
, filters
);
311 src_start
+= n
* src_incr
;
313 src_end
= src_start
+ dst_width
* src_incr
;
314 if (src_end
> ((src_width
- NB_TAPS
) << POS_FRAC_BITS
)) {
315 n
= (((src_width
- NB_TAPS
+ 1) << POS_FRAC_BITS
) - 1 - src_start
) /
321 if ((mm_flags
& MM_MMX
) && NB_TAPS
== 4)
322 h_resample_fast4_mmx(dst
, n
,
323 src
, src_width
, src_start
, src_incr
, filters
);
326 h_resample_fast(dst
, n
,
327 src
, src_width
, src_start
, src_incr
, filters
);
331 src_start
+= n
* src_incr
;
332 h_resample_slow(dst
, dst_width
,
333 src
, src_width
, src_start
, src_incr
, filters
);
337 static void component_resample(ImgReSampleContext
*s
,
338 UINT8
*output
, int owrap
, int owidth
, int oheight
,
339 UINT8
*input
, int iwrap
, int iwidth
, int iheight
)
341 int src_y
, src_y1
, last_src_y
, ring_y
, phase_y
, y1
, y
;
342 UINT8
*new_line
, *src_line
;
344 last_src_y
= - FCENTER
- 1;
345 /* position of the bottom of the filter in the source image */
346 src_y
= (last_src_y
+ NB_TAPS
) * POS_FRAC
;
347 ring_y
= NB_TAPS
; /* position in ring buffer */
348 for(y
=0;y
<oheight
;y
++) {
349 /* apply horizontal filter on new lines from input if needed */
350 src_y1
= src_y
>> POS_FRAC_BITS
;
351 while (last_src_y
< src_y1
) {
352 if (++ring_y
>= LINE_BUF_HEIGHT
+ NB_TAPS
)
355 /* handle limit conditions : replicate line (slighly
356 inefficient because we filter multiple times */
360 } else if (y1
>= iheight
) {
363 src_line
= input
+ y1
* iwrap
;
364 new_line
= s
->line_buf
+ ring_y
* owidth
;
365 /* apply filter and handle limit cases correctly */
366 h_resample(new_line
, owidth
,
367 src_line
, iwidth
, - FCENTER
* POS_FRAC
, s
->h_incr
,
368 &s
->h_filters
[0][0]);
369 /* handle ring buffer wraping */
370 if (ring_y
>= LINE_BUF_HEIGHT
) {
371 memcpy(s
->line_buf
+ (ring_y
- LINE_BUF_HEIGHT
) * owidth
,
375 /* apply vertical filter */
376 phase_y
= get_phase(src_y
);
378 /* desactivated MMX because loss of precision */
379 if ((mm_flags
& MM_MMX
) && NB_TAPS
== 4 && 0)
380 v_resample4_mmx(output
, owidth
,
381 s
->line_buf
+ (ring_y
- NB_TAPS
+ 1) * owidth
, owidth
,
382 &s
->v_filters
[phase_y
][0]);
385 v_resample(output
, owidth
,
386 s
->line_buf
+ (ring_y
- NB_TAPS
+ 1) * owidth
, owidth
,
387 &s
->v_filters
[phase_y
][0]);
394 /* XXX: the following filter is quite naive, but it seems to suffice
396 static void build_filter(INT16
*filter
, float factor
)
399 float x
, y
, tab
[NB_TAPS
], norm
, mult
;
401 /* if upsampling, only need to interpolate, no filter */
405 for(ph
=0;ph
<NB_PHASES
;ph
++) {
407 for(i
=0;i
<NB_TAPS
;i
++) {
409 x
= M_PI
* ((float)(i
- FCENTER
) - (float)ph
/ NB_PHASES
) * factor
;
418 /* normalize so that an uniform color remains the same */
419 mult
= (float)(1 << FILTER_BITS
) / norm
;
420 for(i
=0;i
<NB_TAPS
;i
++) {
421 v
= (int)(tab
[i
] * mult
);
422 filter
[ph
* NB_TAPS
+ i
] = v
;
427 ImgReSampleContext
*img_resample_init(int owidth
, int oheight
,
428 int iwidth
, int iheight
)
430 ImgReSampleContext
*s
;
432 s
= av_mallocz(sizeof(ImgReSampleContext
));
435 s
->line_buf
= av_mallocz(owidth
* (LINE_BUF_HEIGHT
+ NB_TAPS
));
440 s
->oheight
= oheight
;
442 s
->iheight
= iheight
;
444 s
->h_incr
= (iwidth
* POS_FRAC
) / owidth
;
445 s
->v_incr
= (iheight
* POS_FRAC
) / oheight
;
447 build_filter(&s
->h_filters
[0][0], (float)owidth
/ (float)iwidth
);
448 build_filter(&s
->v_filters
[0][0], (float)oheight
/ (float)iheight
);
456 void img_resample(ImgReSampleContext
*s
,
457 AVPicture
*output
, AVPicture
*input
)
462 shift
= (i
== 0) ?
0 : 1;
463 component_resample(s
, output
->data
[i
], output
->linesize
[i
],
464 s
->owidth
>> shift
, s
->oheight
>> shift
,
465 input
->data
[i
], input
->linesize
[i
],
466 s
->iwidth
>> shift
, s
->iheight
>> shift
);
470 void img_resample_close(ImgReSampleContext
*s
)
478 void *av_mallocz(int size
)
482 memset(ptr
, 0, size
);
489 UINT8 img
[XSIZE
* YSIZE
];
494 UINT8 img1
[XSIZE1
* YSIZE1
];
495 UINT8 img2
[XSIZE1
* YSIZE1
];
497 void save_pgm(const char *filename
, UINT8
*img
, int xsize
, int ysize
)
500 f
=fopen(filename
,"w");
501 fprintf(f
,"P5\n%d %d\n%d\n", xsize
, ysize
, 255);
502 fwrite(img
,1, xsize
* ysize
,f
);
506 static void dump_filter(INT16
*filter
)
510 for(ph
=0;ph
<NB_PHASES
;ph
++) {
512 for(i
=0;i
<NB_TAPS
;i
++) {
513 printf(" %5.2f", filter
[ph
* NB_TAPS
+ i
] / 256.0);
523 int main(int argc
, char **argv
)
525 int x
, y
, v
, i
, xsize
, ysize
;
526 ImgReSampleContext
*s
;
527 float fact
, factors
[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
530 /* build test image */
531 for(y
=0;y
<YSIZE
;y
++) {
532 for(x
=0;x
<XSIZE
;x
++) {
533 if (x
< XSIZE
/2 && y
< YSIZE
/2) {
534 if (x
< XSIZE
/4 && y
< YSIZE
/4) {
540 } else if (x
< XSIZE
/4) {
545 } else if (y
< XSIZE
/4) {
557 if (((x
+3) % 4) <= 1 &&
564 } else if (x
< XSIZE
/2) {
565 v
= ((x
- (XSIZE
/2)) * 255) / (XSIZE
/2);
566 } else if (y
< XSIZE
/2) {
567 v
= ((y
- (XSIZE
/2)) * 255) / (XSIZE
/2);
569 v
= ((x
+ y
- XSIZE
) * 255) / XSIZE
;
571 img
[y
* XSIZE
+ x
] = v
;
574 save_pgm("/tmp/in.pgm", img
, XSIZE
, YSIZE
);
575 for(i
=0;i
<sizeof(factors
)/sizeof(float);i
++) {
577 xsize
= (int)(XSIZE
* fact
);
578 ysize
= (int)(YSIZE
* fact
);
579 s
= img_resample_init(xsize
, ysize
, XSIZE
, YSIZE
);
580 printf("Factor=%0.2f\n", fact
);
581 dump_filter(&s
->h_filters
[0][0]);
582 component_resample(s
, img1
, xsize
, xsize
, ysize
,
583 img
, XSIZE
, XSIZE
, YSIZE
);
584 img_resample_close(s
);
586 sprintf(buf
, "/tmp/out%d.pgm", i
);
587 save_pgm(buf
, img1
, xsize
, ysize
);
592 printf("MMX test\n");
594 xsize
= (int)(XSIZE
* fact
);
595 ysize
= (int)(YSIZE
* fact
);
597 s
= img_resample_init(xsize
, ysize
, XSIZE
, YSIZE
);
598 component_resample(s
, img1
, xsize
, xsize
, ysize
,
599 img
, XSIZE
, XSIZE
, YSIZE
);
602 s
= img_resample_init(xsize
, ysize
, XSIZE
, YSIZE
);
603 component_resample(s
, img2
, xsize
, xsize
, ysize
,
604 img
, XSIZE
, XSIZE
, YSIZE
);
605 if (memcmp(img1
, img2
, xsize
* ysize
) != 0) {
606 fprintf(stderr
, "mmx error\n");