e294dbc2a6b809678b041a7437ef2335f9696f02
[libav.git] / libavcodec / imgresample.c
1 /*
2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Fabrice Bellard.
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file imgresample.c
24 * High quality image resampling with polyphase filters .
25 */
26
27 #include "avcodec.h"
28 #include "swscale.h"
29 #include "dsputil.h"
30
31 #ifdef USE_FASTMEMCPY
32 #include "libvo/fastmemcpy.h"
33 #endif
34
35 #define NB_COMPONENTS 3
36
37 #define PHASE_BITS 4
38 #define NB_PHASES (1 << PHASE_BITS)
39 #define NB_TAPS 4
40 #define FCENTER 1 /* index of the center of the filter */
41 //#define TEST 1 /* Test it */
42
43 #define POS_FRAC_BITS 16
44 #define POS_FRAC (1 << POS_FRAC_BITS)
45 /* 6 bits precision is needed for MMX */
46 #define FILTER_BITS 8
47
48 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
49
50 struct ImgReSampleContext {
51 int iwidth, iheight, owidth, oheight;
52 int topBand, bottomBand, leftBand, rightBand;
53 int padtop, padbottom, padleft, padright;
54 int pad_owidth, pad_oheight;
55 int h_incr, v_incr;
56 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
57 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
58 uint8_t *line_buf;
59 };
60
61 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
62
63 static inline int get_phase(int pos)
64 {
65 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
66 }
67
68 /* This function must be optimized */
69 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
70 int src_width, int src_start, int src_incr,
71 int16_t *filters)
72 {
73 int src_pos, phase, sum, i;
74 const uint8_t *s;
75 int16_t *filter;
76
77 src_pos = src_start;
78 for(i=0;i<dst_width;i++) {
79 #ifdef TEST
80 /* test */
81 if ((src_pos >> POS_FRAC_BITS) < 0 ||
82 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
83 av_abort();
84 #endif
85 s = src + (src_pos >> POS_FRAC_BITS);
86 phase = get_phase(src_pos);
87 filter = filters + phase * NB_TAPS;
88 #if NB_TAPS == 4
89 sum = s[0] * filter[0] +
90 s[1] * filter[1] +
91 s[2] * filter[2] +
92 s[3] * filter[3];
93 #else
94 {
95 int j;
96 sum = 0;
97 for(j=0;j<NB_TAPS;j++)
98 sum += s[j] * filter[j];
99 }
100 #endif
101 sum = sum >> FILTER_BITS;
102 if (sum < 0)
103 sum = 0;
104 else if (sum > 255)
105 sum = 255;
106 dst[0] = sum;
107 src_pos += src_incr;
108 dst++;
109 }
110 }
111
112 /* This function must be optimized */
113 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
114 int wrap, int16_t *filter)
115 {
116 int sum, i;
117 const uint8_t *s;
118
119 s = src;
120 for(i=0;i<dst_width;i++) {
121 #if NB_TAPS == 4
122 sum = s[0 * wrap] * filter[0] +
123 s[1 * wrap] * filter[1] +
124 s[2 * wrap] * filter[2] +
125 s[3 * wrap] * filter[3];
126 #else
127 {
128 int j;
129 uint8_t *s1 = s;
130
131 sum = 0;
132 for(j=0;j<NB_TAPS;j++) {
133 sum += s1[0] * filter[j];
134 s1 += wrap;
135 }
136 }
137 #endif
138 sum = sum >> FILTER_BITS;
139 if (sum < 0)
140 sum = 0;
141 else if (sum > 255)
142 sum = 255;
143 dst[0] = sum;
144 dst++;
145 s++;
146 }
147 }
148
149 #ifdef HAVE_MMX
150
151 #include "i386/mmx.h"
152
153 #define FILTER4(reg) \
154 {\
155 s = src + (src_pos >> POS_FRAC_BITS);\
156 phase = get_phase(src_pos);\
157 filter = filters + phase * NB_TAPS;\
158 movq_m2r(*s, reg);\
159 punpcklbw_r2r(mm7, reg);\
160 movq_m2r(*filter, mm6);\
161 pmaddwd_r2r(reg, mm6);\
162 movq_r2r(mm6, reg);\
163 psrlq_i2r(32, reg);\
164 paddd_r2r(mm6, reg);\
165 psrad_i2r(FILTER_BITS, reg);\
166 src_pos += src_incr;\
167 }
168
169 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
170
171 /* XXX: do four pixels at a time */
172 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
173 const uint8_t *src, int src_width,
174 int src_start, int src_incr, int16_t *filters)
175 {
176 int src_pos, phase;
177 const uint8_t *s;
178 int16_t *filter;
179 mmx_t tmp;
180
181 src_pos = src_start;
182 pxor_r2r(mm7, mm7);
183
184 while (dst_width >= 4) {
185
186 FILTER4(mm0);
187 FILTER4(mm1);
188 FILTER4(mm2);
189 FILTER4(mm3);
190
191 packuswb_r2r(mm7, mm0);
192 packuswb_r2r(mm7, mm1);
193 packuswb_r2r(mm7, mm3);
194 packuswb_r2r(mm7, mm2);
195 movq_r2m(mm0, tmp);
196 dst[0] = tmp.ub[0];
197 movq_r2m(mm1, tmp);
198 dst[1] = tmp.ub[0];
199 movq_r2m(mm2, tmp);
200 dst[2] = tmp.ub[0];
201 movq_r2m(mm3, tmp);
202 dst[3] = tmp.ub[0];
203 dst += 4;
204 dst_width -= 4;
205 }
206 while (dst_width > 0) {
207 FILTER4(mm0);
208 packuswb_r2r(mm7, mm0);
209 movq_r2m(mm0, tmp);
210 dst[0] = tmp.ub[0];
211 dst++;
212 dst_width--;
213 }
214 emms();
215 }
216
217 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
218 int wrap, int16_t *filter)
219 {
220 int sum, i, v;
221 const uint8_t *s;
222 mmx_t tmp;
223 mmx_t coefs[4];
224
225 for(i=0;i<4;i++) {
226 v = filter[i];
227 coefs[i].uw[0] = v;
228 coefs[i].uw[1] = v;
229 coefs[i].uw[2] = v;
230 coefs[i].uw[3] = v;
231 }
232
233 pxor_r2r(mm7, mm7);
234 s = src;
235 while (dst_width >= 4) {
236 movq_m2r(s[0 * wrap], mm0);
237 punpcklbw_r2r(mm7, mm0);
238 movq_m2r(s[1 * wrap], mm1);
239 punpcklbw_r2r(mm7, mm1);
240 movq_m2r(s[2 * wrap], mm2);
241 punpcklbw_r2r(mm7, mm2);
242 movq_m2r(s[3 * wrap], mm3);
243 punpcklbw_r2r(mm7, mm3);
244
245 pmullw_m2r(coefs[0], mm0);
246 pmullw_m2r(coefs[1], mm1);
247 pmullw_m2r(coefs[2], mm2);
248 pmullw_m2r(coefs[3], mm3);
249
250 paddw_r2r(mm1, mm0);
251 paddw_r2r(mm3, mm2);
252 paddw_r2r(mm2, mm0);
253 psraw_i2r(FILTER_BITS, mm0);
254
255 packuswb_r2r(mm7, mm0);
256 movq_r2m(mm0, tmp);
257
258 *(uint32_t *)dst = tmp.ud[0];
259 dst += 4;
260 s += 4;
261 dst_width -= 4;
262 }
263 while (dst_width > 0) {
264 sum = s[0 * wrap] * filter[0] +
265 s[1 * wrap] * filter[1] +
266 s[2 * wrap] * filter[2] +
267 s[3 * wrap] * filter[3];
268 sum = sum >> FILTER_BITS;
269 if (sum < 0)
270 sum = 0;
271 else if (sum > 255)
272 sum = 255;
273 dst[0] = sum;
274 dst++;
275 s++;
276 dst_width--;
277 }
278 emms();
279 }
280 #endif
281
282 #ifdef HAVE_ALTIVEC
283 typedef union {
284 vector unsigned char v;
285 unsigned char c[16];
286 } vec_uc_t;
287
288 typedef union {
289 vector signed short v;
290 signed short s[8];
291 } vec_ss_t;
292
293 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
294 int wrap, int16_t *filter)
295 {
296 int sum, i;
297 const uint8_t *s;
298 vector unsigned char *tv, tmp, dstv, zero;
299 vec_ss_t srchv[4], srclv[4], fv[4];
300 vector signed short zeros, sumhv, sumlv;
301 s = src;
302
303 for(i=0;i<4;i++)
304 {
305 /*
306 The vec_madds later on does an implicit >>15 on the result.
307 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
308 a signed short, we have just enough bits to pre-shift our
309 filter constants <<7 to compensate for vec_madds.
310 */
311 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
312 fv[i].v = vec_splat(fv[i].v, 0);
313 }
314
315 zero = vec_splat_u8(0);
316 zeros = vec_splat_s16(0);
317
318
319 /*
320 When we're resampling, we'd ideally like both our input buffers,
321 and output buffers to be 16-byte aligned, so we can do both aligned
322 reads and writes. Sadly we can't always have this at the moment, so
323 we opt for aligned writes, as unaligned writes have a huge overhead.
324 To do this, do enough scalar resamples to get dst 16-byte aligned.
325 */
326 i = (-(int)dst) & 0xf;
327 while(i>0) {
328 sum = s[0 * wrap] * filter[0] +
329 s[1 * wrap] * filter[1] +
330 s[2 * wrap] * filter[2] +
331 s[3 * wrap] * filter[3];
332 sum = sum >> FILTER_BITS;
333 if (sum<0) sum = 0; else if (sum>255) sum=255;
334 dst[0] = sum;
335 dst++;
336 s++;
337 dst_width--;
338 i--;
339 }
340
341 /* Do our altivec resampling on 16 pixels at once. */
342 while(dst_width>=16) {
343 /*
344 Read 16 (potentially unaligned) bytes from each of
345 4 lines into 4 vectors, and split them into shorts.
346 Interleave the multipy/accumulate for the resample
347 filter with the loads to hide the 3 cycle latency
348 the vec_madds have.
349 */
350 tv = (vector unsigned char *) &s[0 * wrap];
351 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
352 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
353 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
354 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
355 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
356
357 tv = (vector unsigned char *) &s[1 * wrap];
358 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
359 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
360 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
361 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
362 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
363
364 tv = (vector unsigned char *) &s[2 * wrap];
365 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
366 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
367 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
368 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
369 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
370
371 tv = (vector unsigned char *) &s[3 * wrap];
372 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
373 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
374 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
375 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
376 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
377
378 /*
379 Pack the results into our destination vector,
380 and do an aligned write of that back to memory.
381 */
382 dstv = vec_packsu(sumhv, sumlv) ;
383 vec_st(dstv, 0, (vector unsigned char *) dst);
384
385 dst+=16;
386 s+=16;
387 dst_width-=16;
388 }
389
390 /*
391 If there are any leftover pixels, resample them
392 with the slow scalar method.
393 */
394 while(dst_width>0) {
395 sum = s[0 * wrap] * filter[0] +
396 s[1 * wrap] * filter[1] +
397 s[2 * wrap] * filter[2] +
398 s[3 * wrap] * filter[3];
399 sum = sum >> FILTER_BITS;
400 if (sum<0) sum = 0; else if (sum>255) sum=255;
401 dst[0] = sum;
402 dst++;
403 s++;
404 dst_width--;
405 }
406 }
407 #endif
408
409 /* slow version to handle limit cases. Does not need optimisation */
410 static void h_resample_slow(uint8_t *dst, int dst_width,
411 const uint8_t *src, int src_width,
412 int src_start, int src_incr, int16_t *filters)
413 {
414 int src_pos, phase, sum, j, v, i;
415 const uint8_t *s, *src_end;
416 int16_t *filter;
417
418 src_end = src + src_width;
419 src_pos = src_start;
420 for(i=0;i<dst_width;i++) {
421 s = src + (src_pos >> POS_FRAC_BITS);
422 phase = get_phase(src_pos);
423 filter = filters + phase * NB_TAPS;
424 sum = 0;
425 for(j=0;j<NB_TAPS;j++) {
426 if (s < src)
427 v = src[0];
428 else if (s >= src_end)
429 v = src_end[-1];
430 else
431 v = s[0];
432 sum += v * filter[j];
433 s++;
434 }
435 sum = sum >> FILTER_BITS;
436 if (sum < 0)
437 sum = 0;
438 else if (sum > 255)
439 sum = 255;
440 dst[0] = sum;
441 src_pos += src_incr;
442 dst++;
443 }
444 }
445
446 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
447 int src_width, int src_start, int src_incr,
448 int16_t *filters)
449 {
450 int n, src_end;
451
452 if (src_start < 0) {
453 n = (0 - src_start + src_incr - 1) / src_incr;
454 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
455 dst += n;
456 dst_width -= n;
457 src_start += n * src_incr;
458 }
459 src_end = src_start + dst_width * src_incr;
460 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
461 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
462 src_incr;
463 } else {
464 n = dst_width;
465 }
466 #ifdef HAVE_MMX
467 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
468 h_resample_fast4_mmx(dst, n,
469 src, src_width, src_start, src_incr, filters);
470 else
471 #endif
472 h_resample_fast(dst, n,
473 src, src_width, src_start, src_incr, filters);
474 if (n < dst_width) {
475 dst += n;
476 dst_width -= n;
477 src_start += n * src_incr;
478 h_resample_slow(dst, dst_width,
479 src, src_width, src_start, src_incr, filters);
480 }
481 }
482
483 static void component_resample(ImgReSampleContext *s,
484 uint8_t *output, int owrap, int owidth, int oheight,
485 uint8_t *input, int iwrap, int iwidth, int iheight)
486 {
487 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
488 uint8_t *new_line, *src_line;
489
490 last_src_y = - FCENTER - 1;
491 /* position of the bottom of the filter in the source image */
492 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
493 ring_y = NB_TAPS; /* position in ring buffer */
494 for(y=0;y<oheight;y++) {
495 /* apply horizontal filter on new lines from input if needed */
496 src_y1 = src_y >> POS_FRAC_BITS;
497 while (last_src_y < src_y1) {
498 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
499 ring_y = NB_TAPS;
500 last_src_y++;
501 /* handle limit conditions : replicate line (slightly
502 inefficient because we filter multiple times) */
503 y1 = last_src_y;
504 if (y1 < 0) {
505 y1 = 0;
506 } else if (y1 >= iheight) {
507 y1 = iheight - 1;
508 }
509 src_line = input + y1 * iwrap;
510 new_line = s->line_buf + ring_y * owidth;
511 /* apply filter and handle limit cases correctly */
512 h_resample(new_line, owidth,
513 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
514 &s->h_filters[0][0]);
515 /* handle ring buffer wraping */
516 if (ring_y >= LINE_BUF_HEIGHT) {
517 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
518 new_line, owidth);
519 }
520 }
521 /* apply vertical filter */
522 phase_y = get_phase(src_y);
523 #ifdef HAVE_MMX
524 /* desactivated MMX because loss of precision */
525 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
526 v_resample4_mmx(output, owidth,
527 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
528 &s->v_filters[phase_y][0]);
529 else
530 #endif
531 #ifdef HAVE_ALTIVEC
532 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
533 v_resample16_altivec(output, owidth,
534 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
535 &s->v_filters[phase_y][0]);
536 else
537 #endif
538 v_resample(output, owidth,
539 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
540 &s->v_filters[phase_y][0]);
541
542 src_y += s->v_incr;
543
544 output += owrap;
545 }
546 }
547
548 ImgReSampleContext *img_resample_init(int owidth, int oheight,
549 int iwidth, int iheight)
550 {
551 return img_resample_full_init(owidth, oheight, iwidth, iheight,
552 0, 0, 0, 0, 0, 0, 0, 0);
553 }
554
555 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
556 int iwidth, int iheight,
557 int topBand, int bottomBand,
558 int leftBand, int rightBand,
559 int padtop, int padbottom,
560 int padleft, int padright)
561 {
562 ImgReSampleContext *s;
563
564 if (!owidth || !oheight || !iwidth || !iheight)
565 return NULL;
566
567 s = av_mallocz(sizeof(ImgReSampleContext));
568 if (!s)
569 return NULL;
570 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
571 return NULL;
572 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
573 if (!s->line_buf)
574 goto fail;
575
576 s->owidth = owidth;
577 s->oheight = oheight;
578 s->iwidth = iwidth;
579 s->iheight = iheight;
580
581 s->topBand = topBand;
582 s->bottomBand = bottomBand;
583 s->leftBand = leftBand;
584 s->rightBand = rightBand;
585
586 s->padtop = padtop;
587 s->padbottom = padbottom;
588 s->padleft = padleft;
589 s->padright = padright;
590
591 s->pad_owidth = owidth - (padleft + padright);
592 s->pad_oheight = oheight - (padtop + padbottom);
593
594 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
595 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
596
597 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
598 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
599 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
600 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
601
602 return s;
603 fail:
604 av_free(s);
605 return NULL;
606 }
607
608 void img_resample(ImgReSampleContext *s,
609 AVPicture *output, const AVPicture *input)
610 {
611 int i, shift;
612 uint8_t* optr;
613
614 for (i=0;i<3;i++) {
615 shift = (i == 0) ? 0 : 1;
616
617 optr = output->data[i] + (((output->linesize[i] *
618 s->padtop) + s->padleft) >> shift);
619
620 component_resample(s, optr, output->linesize[i],
621 s->pad_owidth >> shift, s->pad_oheight >> shift,
622 input->data[i] + (input->linesize[i] *
623 (s->topBand >> shift)) + (s->leftBand >> shift),
624 input->linesize[i], ((s->iwidth - s->leftBand -
625 s->rightBand) >> shift),
626 (s->iheight - s->topBand - s->bottomBand) >> shift);
627 }
628 }
629
630 void img_resample_close(ImgReSampleContext *s)
631 {
632 av_free(s->line_buf);
633 av_free(s);
634 }
635
636 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
637 int dstW, int dstH, int dstFormat,
638 int flags, SwsFilter *srcFilter,
639 SwsFilter *dstFilter, double *param)
640 {
641 struct SwsContext *ctx;
642
643 ctx = av_malloc(sizeof(struct SwsContext));
644 if (ctx == NULL) {
645 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
646
647 return NULL;
648 }
649
650 if ((srcH != dstH) || (srcW != dstW)) {
651 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
652 av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
653 }
654 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
655 } else {
656 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
657 ctx->resampling_ctx->iheight = srcH;
658 ctx->resampling_ctx->iwidth = srcW;
659 ctx->resampling_ctx->oheight = dstH;
660 ctx->resampling_ctx->owidth = dstW;
661 }
662 ctx->src_pix_fmt = srcFormat;
663 ctx->dst_pix_fmt = dstFormat;
664
665 return ctx;
666 }
667
668 void sws_freeContext(struct SwsContext *ctx)
669 {
670 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
671 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
672 img_resample_close(ctx->resampling_ctx);
673 } else {
674 av_free(ctx->resampling_ctx);
675 }
676 av_free(ctx);
677 }
678
679
680 /**
681 * Checks if context is valid or reallocs a new one instead.
682 * If context is NULL, just calls sws_getContext() to get a new one.
683 * Otherwise, checks if the parameters are the same already saved in context.
684 * If that is the case, returns the current context.
685 * Otherwise, frees context and gets a new one.
686 *
687 * Be warned that srcFilter, dstFilter are not checked, they are
688 * asumed to remain valid.
689 */
690 struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
691 int srcW, int srcH, int srcFormat,
692 int dstW, int dstH, int dstFormat, int flags,
693 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
694 {
695 if (ctx != NULL) {
696 if ((ctx->resampling_ctx->iwidth != srcW) ||
697 (ctx->resampling_ctx->iheight != srcH) ||
698 (ctx->src_pix_fmt != srcFormat) ||
699 (ctx->resampling_ctx->owidth != dstW) ||
700 (ctx->resampling_ctx->oheight != dstH) ||
701 (ctx->dst_pix_fmt != dstFormat))
702 {
703 sws_freeContext(ctx);
704 ctx = NULL;
705 }
706 }
707 if (ctx == NULL) {
708 return sws_getContext(srcW, srcH, srcFormat,
709 dstW, dstH, dstFormat, flags,
710 srcFilter, dstFilter, param);
711 }
712 return ctx;
713 }
714
715 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
716 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
717 {
718 AVPicture src_pict, dst_pict;
719 int i, res = 0;
720 AVPicture picture_format_temp;
721 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
722 uint8_t *buf1 = NULL, *buf2 = NULL;
723 enum PixelFormat current_pix_fmt;
724
725 for (i = 0; i < 3; i++) {
726 src_pict.data[i] = src[i];
727 src_pict.linesize[i] = srcStride[i];
728 dst_pict.data[i] = dst[i];
729 dst_pict.linesize[i] = dstStride[i];
730 }
731 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
732 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
733 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
734
735 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
736 int size;
737
738 /* create temporary picture for rescaling input*/
739 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
740 buf1 = av_malloc(size);
741 if (!buf1) {
742 res = -1;
743 goto the_end;
744 }
745 formatted_picture = &picture_format_temp;
746 avpicture_fill((AVPicture*)formatted_picture, buf1,
747 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
748
749 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
750 &src_pict, ctx->src_pix_fmt,
751 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
752
753 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
754 res = -1;
755 goto the_end;
756 }
757 } else {
758 formatted_picture = &src_pict;
759 }
760
761 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
762 int size;
763
764 /* create temporary picture for rescaling output*/
765 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
766 buf2 = av_malloc(size);
767 if (!buf2) {
768 res = -1;
769 goto the_end;
770 }
771 resampled_picture = &picture_resample_temp;
772 avpicture_fill((AVPicture*)resampled_picture, buf2,
773 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
774
775 } else {
776 resampled_picture = &dst_pict;
777 }
778
779 /* ...and finally rescale!!! */
780 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
781 current_pix_fmt = PIX_FMT_YUV420P;
782 } else {
783 resampled_picture = &src_pict;
784 current_pix_fmt = ctx->src_pix_fmt;
785 }
786
787 if (current_pix_fmt != ctx->dst_pix_fmt) {
788 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
789 resampled_picture, current_pix_fmt,
790 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
791
792 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
793
794 res = -1;
795 goto the_end;
796 }
797 } else if (resampled_picture != &dst_pict) {
798 img_copy(&dst_pict, resampled_picture, current_pix_fmt,
799 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
800 }
801
802 the_end:
803 av_free(buf1);
804 av_free(buf2);
805 return res;
806 }
807
808
809 #ifdef TEST
810 #include <stdio.h>
811
812 /* input */
813 #define XSIZE 256
814 #define YSIZE 256
815 uint8_t img[XSIZE * YSIZE];
816
817 /* output */
818 #define XSIZE1 512
819 #define YSIZE1 512
820 uint8_t img1[XSIZE1 * YSIZE1];
821 uint8_t img2[XSIZE1 * YSIZE1];
822
823 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
824 {
825 #undef fprintf
826 FILE *f;
827 f=fopen(filename,"w");
828 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
829 fwrite(img,1, xsize * ysize,f);
830 fclose(f);
831 #define fprintf please_use_av_log
832 }
833
834 static void dump_filter(int16_t *filter)
835 {
836 int i, ph;
837
838 for(ph=0;ph<NB_PHASES;ph++) {
839 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
840 for(i=0;i<NB_TAPS;i++) {
841 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
842 }
843 av_log(NULL, AV_LOG_INFO, "\n");
844 }
845 }
846
847 #ifdef HAVE_MMX
848 int mm_flags;
849 #endif
850
851 int main(int argc, char **argv)
852 {
853 int x, y, v, i, xsize, ysize;
854 ImgReSampleContext *s;
855 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
856 char buf[256];
857
858 /* build test image */
859 for(y=0;y<YSIZE;y++) {
860 for(x=0;x<XSIZE;x++) {
861 if (x < XSIZE/2 && y < YSIZE/2) {
862 if (x < XSIZE/4 && y < YSIZE/4) {
863 if ((x % 10) <= 6 &&
864 (y % 10) <= 6)
865 v = 0xff;
866 else
867 v = 0x00;
868 } else if (x < XSIZE/4) {
869 if (x & 1)
870 v = 0xff;
871 else
872 v = 0;
873 } else if (y < XSIZE/4) {
874 if (y & 1)
875 v = 0xff;
876 else
877 v = 0;
878 } else {
879 if (y < YSIZE*3/8) {
880 if ((y+x) & 1)
881 v = 0xff;
882 else
883 v = 0;
884 } else {
885 if (((x+3) % 4) <= 1 &&
886 ((y+3) % 4) <= 1)
887 v = 0xff;
888 else
889 v = 0x00;
890 }
891 }
892 } else if (x < XSIZE/2) {
893 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
894 } else if (y < XSIZE/2) {
895 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
896 } else {
897 v = ((x + y - XSIZE) * 255) / XSIZE;
898 }
899 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
900 }
901 }
902 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
903 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
904 fact = factors[i];
905 xsize = (int)(XSIZE * fact);
906 ysize = (int)((YSIZE - 100) * fact);
907 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
908 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
909 dump_filter(&s->h_filters[0][0]);
910 component_resample(s, img1, xsize, xsize, ysize,
911 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
912 img_resample_close(s);
913
914 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
915 save_pgm(buf, img1, xsize, ysize);
916 }
917
918 /* mmx test */
919 #ifdef HAVE_MMX
920 av_log(NULL, AV_LOG_INFO, "MMX test\n");
921 fact = 0.72;
922 xsize = (int)(XSIZE * fact);
923 ysize = (int)(YSIZE * fact);
924 mm_flags = MM_MMX;
925 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
926 component_resample(s, img1, xsize, xsize, ysize,
927 img, XSIZE, XSIZE, YSIZE);
928
929 mm_flags = 0;
930 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
931 component_resample(s, img2, xsize, xsize, ysize,
932 img, XSIZE, XSIZE, YSIZE);
933 if (memcmp(img1, img2, xsize * ysize) != 0) {
934 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
935 exit(1);
936 }
937 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
938 #endif
939 return 0;
940 }
941
942 #endif