Do not use a fake libavcodec/swscale.h, but always use the real one
[libav.git] / libavcodec / imgresample.c
1 /*
2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Fabrice Bellard.
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file imgresample.c
24 * High quality image resampling with polyphase filters .
25 */
26
27 #include "avcodec.h"
28 #include "swscale.h"
29 #include "dsputil.h"
30
31 #ifdef USE_FASTMEMCPY
32 #include "libvo/fastmemcpy.h"
33 #endif
34
35 #define NB_COMPONENTS 3
36
37 #define PHASE_BITS 4
38 #define NB_PHASES (1 << PHASE_BITS)
39 #define NB_TAPS 4
40 #define FCENTER 1 /* index of the center of the filter */
41 //#define TEST 1 /* Test it */
42
43 #define POS_FRAC_BITS 16
44 #define POS_FRAC (1 << POS_FRAC_BITS)
45 /* 6 bits precision is needed for MMX */
46 #define FILTER_BITS 8
47
48 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
49
50 struct SwsContext {
51 struct ImgReSampleContext *resampling_ctx;
52 enum PixelFormat src_pix_fmt, dst_pix_fmt;
53 };
54
55 struct ImgReSampleContext {
56 int iwidth, iheight, owidth, oheight;
57 int topBand, bottomBand, leftBand, rightBand;
58 int padtop, padbottom, padleft, padright;
59 int pad_owidth, pad_oheight;
60 int h_incr, v_incr;
61 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
62 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
63 uint8_t *line_buf;
64 };
65
66 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
67
68 static inline int get_phase(int pos)
69 {
70 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
71 }
72
73 /* This function must be optimized */
74 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
75 int src_width, int src_start, int src_incr,
76 int16_t *filters)
77 {
78 int src_pos, phase, sum, i;
79 const uint8_t *s;
80 int16_t *filter;
81
82 src_pos = src_start;
83 for(i=0;i<dst_width;i++) {
84 #ifdef TEST
85 /* test */
86 if ((src_pos >> POS_FRAC_BITS) < 0 ||
87 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
88 av_abort();
89 #endif
90 s = src + (src_pos >> POS_FRAC_BITS);
91 phase = get_phase(src_pos);
92 filter = filters + phase * NB_TAPS;
93 #if NB_TAPS == 4
94 sum = s[0] * filter[0] +
95 s[1] * filter[1] +
96 s[2] * filter[2] +
97 s[3] * filter[3];
98 #else
99 {
100 int j;
101 sum = 0;
102 for(j=0;j<NB_TAPS;j++)
103 sum += s[j] * filter[j];
104 }
105 #endif
106 sum = sum >> FILTER_BITS;
107 if (sum < 0)
108 sum = 0;
109 else if (sum > 255)
110 sum = 255;
111 dst[0] = sum;
112 src_pos += src_incr;
113 dst++;
114 }
115 }
116
117 /* This function must be optimized */
118 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
119 int wrap, int16_t *filter)
120 {
121 int sum, i;
122 const uint8_t *s;
123
124 s = src;
125 for(i=0;i<dst_width;i++) {
126 #if NB_TAPS == 4
127 sum = s[0 * wrap] * filter[0] +
128 s[1 * wrap] * filter[1] +
129 s[2 * wrap] * filter[2] +
130 s[3 * wrap] * filter[3];
131 #else
132 {
133 int j;
134 uint8_t *s1 = s;
135
136 sum = 0;
137 for(j=0;j<NB_TAPS;j++) {
138 sum += s1[0] * filter[j];
139 s1 += wrap;
140 }
141 }
142 #endif
143 sum = sum >> FILTER_BITS;
144 if (sum < 0)
145 sum = 0;
146 else if (sum > 255)
147 sum = 255;
148 dst[0] = sum;
149 dst++;
150 s++;
151 }
152 }
153
154 #ifdef HAVE_MMX
155
156 #include "i386/mmx.h"
157
158 #define FILTER4(reg) \
159 {\
160 s = src + (src_pos >> POS_FRAC_BITS);\
161 phase = get_phase(src_pos);\
162 filter = filters + phase * NB_TAPS;\
163 movq_m2r(*s, reg);\
164 punpcklbw_r2r(mm7, reg);\
165 movq_m2r(*filter, mm6);\
166 pmaddwd_r2r(reg, mm6);\
167 movq_r2r(mm6, reg);\
168 psrlq_i2r(32, reg);\
169 paddd_r2r(mm6, reg);\
170 psrad_i2r(FILTER_BITS, reg);\
171 src_pos += src_incr;\
172 }
173
174 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
175
176 /* XXX: do four pixels at a time */
177 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
178 const uint8_t *src, int src_width,
179 int src_start, int src_incr, int16_t *filters)
180 {
181 int src_pos, phase;
182 const uint8_t *s;
183 int16_t *filter;
184 mmx_t tmp;
185
186 src_pos = src_start;
187 pxor_r2r(mm7, mm7);
188
189 while (dst_width >= 4) {
190
191 FILTER4(mm0);
192 FILTER4(mm1);
193 FILTER4(mm2);
194 FILTER4(mm3);
195
196 packuswb_r2r(mm7, mm0);
197 packuswb_r2r(mm7, mm1);
198 packuswb_r2r(mm7, mm3);
199 packuswb_r2r(mm7, mm2);
200 movq_r2m(mm0, tmp);
201 dst[0] = tmp.ub[0];
202 movq_r2m(mm1, tmp);
203 dst[1] = tmp.ub[0];
204 movq_r2m(mm2, tmp);
205 dst[2] = tmp.ub[0];
206 movq_r2m(mm3, tmp);
207 dst[3] = tmp.ub[0];
208 dst += 4;
209 dst_width -= 4;
210 }
211 while (dst_width > 0) {
212 FILTER4(mm0);
213 packuswb_r2r(mm7, mm0);
214 movq_r2m(mm0, tmp);
215 dst[0] = tmp.ub[0];
216 dst++;
217 dst_width--;
218 }
219 emms();
220 }
221
222 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
223 int wrap, int16_t *filter)
224 {
225 int sum, i, v;
226 const uint8_t *s;
227 mmx_t tmp;
228 mmx_t coefs[4];
229
230 for(i=0;i<4;i++) {
231 v = filter[i];
232 coefs[i].uw[0] = v;
233 coefs[i].uw[1] = v;
234 coefs[i].uw[2] = v;
235 coefs[i].uw[3] = v;
236 }
237
238 pxor_r2r(mm7, mm7);
239 s = src;
240 while (dst_width >= 4) {
241 movq_m2r(s[0 * wrap], mm0);
242 punpcklbw_r2r(mm7, mm0);
243 movq_m2r(s[1 * wrap], mm1);
244 punpcklbw_r2r(mm7, mm1);
245 movq_m2r(s[2 * wrap], mm2);
246 punpcklbw_r2r(mm7, mm2);
247 movq_m2r(s[3 * wrap], mm3);
248 punpcklbw_r2r(mm7, mm3);
249
250 pmullw_m2r(coefs[0], mm0);
251 pmullw_m2r(coefs[1], mm1);
252 pmullw_m2r(coefs[2], mm2);
253 pmullw_m2r(coefs[3], mm3);
254
255 paddw_r2r(mm1, mm0);
256 paddw_r2r(mm3, mm2);
257 paddw_r2r(mm2, mm0);
258 psraw_i2r(FILTER_BITS, mm0);
259
260 packuswb_r2r(mm7, mm0);
261 movq_r2m(mm0, tmp);
262
263 *(uint32_t *)dst = tmp.ud[0];
264 dst += 4;
265 s += 4;
266 dst_width -= 4;
267 }
268 while (dst_width > 0) {
269 sum = s[0 * wrap] * filter[0] +
270 s[1 * wrap] * filter[1] +
271 s[2 * wrap] * filter[2] +
272 s[3 * wrap] * filter[3];
273 sum = sum >> FILTER_BITS;
274 if (sum < 0)
275 sum = 0;
276 else if (sum > 255)
277 sum = 255;
278 dst[0] = sum;
279 dst++;
280 s++;
281 dst_width--;
282 }
283 emms();
284 }
285 #endif
286
287 #ifdef HAVE_ALTIVEC
288 typedef union {
289 vector unsigned char v;
290 unsigned char c[16];
291 } vec_uc_t;
292
293 typedef union {
294 vector signed short v;
295 signed short s[8];
296 } vec_ss_t;
297
298 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
299 int wrap, int16_t *filter)
300 {
301 int sum, i;
302 const uint8_t *s;
303 vector unsigned char *tv, tmp, dstv, zero;
304 vec_ss_t srchv[4], srclv[4], fv[4];
305 vector signed short zeros, sumhv, sumlv;
306 s = src;
307
308 for(i=0;i<4;i++)
309 {
310 /*
311 The vec_madds later on does an implicit >>15 on the result.
312 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
313 a signed short, we have just enough bits to pre-shift our
314 filter constants <<7 to compensate for vec_madds.
315 */
316 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
317 fv[i].v = vec_splat(fv[i].v, 0);
318 }
319
320 zero = vec_splat_u8(0);
321 zeros = vec_splat_s16(0);
322
323
324 /*
325 When we're resampling, we'd ideally like both our input buffers,
326 and output buffers to be 16-byte aligned, so we can do both aligned
327 reads and writes. Sadly we can't always have this at the moment, so
328 we opt for aligned writes, as unaligned writes have a huge overhead.
329 To do this, do enough scalar resamples to get dst 16-byte aligned.
330 */
331 i = (-(int)dst) & 0xf;
332 while(i>0) {
333 sum = s[0 * wrap] * filter[0] +
334 s[1 * wrap] * filter[1] +
335 s[2 * wrap] * filter[2] +
336 s[3 * wrap] * filter[3];
337 sum = sum >> FILTER_BITS;
338 if (sum<0) sum = 0; else if (sum>255) sum=255;
339 dst[0] = sum;
340 dst++;
341 s++;
342 dst_width--;
343 i--;
344 }
345
346 /* Do our altivec resampling on 16 pixels at once. */
347 while(dst_width>=16) {
348 /*
349 Read 16 (potentially unaligned) bytes from each of
350 4 lines into 4 vectors, and split them into shorts.
351 Interleave the multipy/accumulate for the resample
352 filter with the loads to hide the 3 cycle latency
353 the vec_madds have.
354 */
355 tv = (vector unsigned char *) &s[0 * wrap];
356 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
357 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
358 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
359 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
360 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
361
362 tv = (vector unsigned char *) &s[1 * wrap];
363 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
364 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
365 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
366 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
367 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
368
369 tv = (vector unsigned char *) &s[2 * wrap];
370 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
371 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
372 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
373 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
374 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
375
376 tv = (vector unsigned char *) &s[3 * wrap];
377 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
378 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
379 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
380 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
381 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
382
383 /*
384 Pack the results into our destination vector,
385 and do an aligned write of that back to memory.
386 */
387 dstv = vec_packsu(sumhv, sumlv) ;
388 vec_st(dstv, 0, (vector unsigned char *) dst);
389
390 dst+=16;
391 s+=16;
392 dst_width-=16;
393 }
394
395 /*
396 If there are any leftover pixels, resample them
397 with the slow scalar method.
398 */
399 while(dst_width>0) {
400 sum = s[0 * wrap] * filter[0] +
401 s[1 * wrap] * filter[1] +
402 s[2 * wrap] * filter[2] +
403 s[3 * wrap] * filter[3];
404 sum = sum >> FILTER_BITS;
405 if (sum<0) sum = 0; else if (sum>255) sum=255;
406 dst[0] = sum;
407 dst++;
408 s++;
409 dst_width--;
410 }
411 }
412 #endif
413
414 /* slow version to handle limit cases. Does not need optimisation */
415 static void h_resample_slow(uint8_t *dst, int dst_width,
416 const uint8_t *src, int src_width,
417 int src_start, int src_incr, int16_t *filters)
418 {
419 int src_pos, phase, sum, j, v, i;
420 const uint8_t *s, *src_end;
421 int16_t *filter;
422
423 src_end = src + src_width;
424 src_pos = src_start;
425 for(i=0;i<dst_width;i++) {
426 s = src + (src_pos >> POS_FRAC_BITS);
427 phase = get_phase(src_pos);
428 filter = filters + phase * NB_TAPS;
429 sum = 0;
430 for(j=0;j<NB_TAPS;j++) {
431 if (s < src)
432 v = src[0];
433 else if (s >= src_end)
434 v = src_end[-1];
435 else
436 v = s[0];
437 sum += v * filter[j];
438 s++;
439 }
440 sum = sum >> FILTER_BITS;
441 if (sum < 0)
442 sum = 0;
443 else if (sum > 255)
444 sum = 255;
445 dst[0] = sum;
446 src_pos += src_incr;
447 dst++;
448 }
449 }
450
451 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
452 int src_width, int src_start, int src_incr,
453 int16_t *filters)
454 {
455 int n, src_end;
456
457 if (src_start < 0) {
458 n = (0 - src_start + src_incr - 1) / src_incr;
459 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
460 dst += n;
461 dst_width -= n;
462 src_start += n * src_incr;
463 }
464 src_end = src_start + dst_width * src_incr;
465 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
466 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
467 src_incr;
468 } else {
469 n = dst_width;
470 }
471 #ifdef HAVE_MMX
472 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
473 h_resample_fast4_mmx(dst, n,
474 src, src_width, src_start, src_incr, filters);
475 else
476 #endif
477 h_resample_fast(dst, n,
478 src, src_width, src_start, src_incr, filters);
479 if (n < dst_width) {
480 dst += n;
481 dst_width -= n;
482 src_start += n * src_incr;
483 h_resample_slow(dst, dst_width,
484 src, src_width, src_start, src_incr, filters);
485 }
486 }
487
488 static void component_resample(ImgReSampleContext *s,
489 uint8_t *output, int owrap, int owidth, int oheight,
490 uint8_t *input, int iwrap, int iwidth, int iheight)
491 {
492 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
493 uint8_t *new_line, *src_line;
494
495 last_src_y = - FCENTER - 1;
496 /* position of the bottom of the filter in the source image */
497 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
498 ring_y = NB_TAPS; /* position in ring buffer */
499 for(y=0;y<oheight;y++) {
500 /* apply horizontal filter on new lines from input if needed */
501 src_y1 = src_y >> POS_FRAC_BITS;
502 while (last_src_y < src_y1) {
503 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
504 ring_y = NB_TAPS;
505 last_src_y++;
506 /* handle limit conditions : replicate line (slightly
507 inefficient because we filter multiple times) */
508 y1 = last_src_y;
509 if (y1 < 0) {
510 y1 = 0;
511 } else if (y1 >= iheight) {
512 y1 = iheight - 1;
513 }
514 src_line = input + y1 * iwrap;
515 new_line = s->line_buf + ring_y * owidth;
516 /* apply filter and handle limit cases correctly */
517 h_resample(new_line, owidth,
518 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
519 &s->h_filters[0][0]);
520 /* handle ring buffer wraping */
521 if (ring_y >= LINE_BUF_HEIGHT) {
522 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
523 new_line, owidth);
524 }
525 }
526 /* apply vertical filter */
527 phase_y = get_phase(src_y);
528 #ifdef HAVE_MMX
529 /* desactivated MMX because loss of precision */
530 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
531 v_resample4_mmx(output, owidth,
532 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
533 &s->v_filters[phase_y][0]);
534 else
535 #endif
536 #ifdef HAVE_ALTIVEC
537 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
538 v_resample16_altivec(output, owidth,
539 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
540 &s->v_filters[phase_y][0]);
541 else
542 #endif
543 v_resample(output, owidth,
544 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
545 &s->v_filters[phase_y][0]);
546
547 src_y += s->v_incr;
548
549 output += owrap;
550 }
551 }
552
553 ImgReSampleContext *img_resample_init(int owidth, int oheight,
554 int iwidth, int iheight)
555 {
556 return img_resample_full_init(owidth, oheight, iwidth, iheight,
557 0, 0, 0, 0, 0, 0, 0, 0);
558 }
559
560 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
561 int iwidth, int iheight,
562 int topBand, int bottomBand,
563 int leftBand, int rightBand,
564 int padtop, int padbottom,
565 int padleft, int padright)
566 {
567 ImgReSampleContext *s;
568
569 if (!owidth || !oheight || !iwidth || !iheight)
570 return NULL;
571
572 s = av_mallocz(sizeof(ImgReSampleContext));
573 if (!s)
574 return NULL;
575 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
576 return NULL;
577 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
578 if (!s->line_buf)
579 goto fail;
580
581 s->owidth = owidth;
582 s->oheight = oheight;
583 s->iwidth = iwidth;
584 s->iheight = iheight;
585
586 s->topBand = topBand;
587 s->bottomBand = bottomBand;
588 s->leftBand = leftBand;
589 s->rightBand = rightBand;
590
591 s->padtop = padtop;
592 s->padbottom = padbottom;
593 s->padleft = padleft;
594 s->padright = padright;
595
596 s->pad_owidth = owidth - (padleft + padright);
597 s->pad_oheight = oheight - (padtop + padbottom);
598
599 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
600 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
601
602 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
603 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
604 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
605 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
606
607 return s;
608 fail:
609 av_free(s);
610 return NULL;
611 }
612
613 void img_resample(ImgReSampleContext *s,
614 AVPicture *output, const AVPicture *input)
615 {
616 int i, shift;
617 uint8_t* optr;
618
619 for (i=0;i<3;i++) {
620 shift = (i == 0) ? 0 : 1;
621
622 optr = output->data[i] + (((output->linesize[i] *
623 s->padtop) + s->padleft) >> shift);
624
625 component_resample(s, optr, output->linesize[i],
626 s->pad_owidth >> shift, s->pad_oheight >> shift,
627 input->data[i] + (input->linesize[i] *
628 (s->topBand >> shift)) + (s->leftBand >> shift),
629 input->linesize[i], ((s->iwidth - s->leftBand -
630 s->rightBand) >> shift),
631 (s->iheight - s->topBand - s->bottomBand) >> shift);
632 }
633 }
634
635 void img_resample_close(ImgReSampleContext *s)
636 {
637 av_free(s->line_buf);
638 av_free(s);
639 }
640
641 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
642 int dstW, int dstH, int dstFormat,
643 int flags, SwsFilter *srcFilter,
644 SwsFilter *dstFilter, double *param)
645 {
646 struct SwsContext *ctx;
647
648 ctx = av_malloc(sizeof(struct SwsContext));
649 if (ctx == NULL) {
650 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
651
652 return NULL;
653 }
654
655 if ((srcH != dstH) || (srcW != dstW)) {
656 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
657 av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
658 }
659 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
660 } else {
661 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
662 ctx->resampling_ctx->iheight = srcH;
663 ctx->resampling_ctx->iwidth = srcW;
664 ctx->resampling_ctx->oheight = dstH;
665 ctx->resampling_ctx->owidth = dstW;
666 }
667 ctx->src_pix_fmt = srcFormat;
668 ctx->dst_pix_fmt = dstFormat;
669
670 return ctx;
671 }
672
673 void sws_freeContext(struct SwsContext *ctx)
674 {
675 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
676 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
677 img_resample_close(ctx->resampling_ctx);
678 } else {
679 av_free(ctx->resampling_ctx);
680 }
681 av_free(ctx);
682 }
683
684
685 /**
686 * Checks if context is valid or reallocs a new one instead.
687 * If context is NULL, just calls sws_getContext() to get a new one.
688 * Otherwise, checks if the parameters are the same already saved in context.
689 * If that is the case, returns the current context.
690 * Otherwise, frees context and gets a new one.
691 *
692 * Be warned that srcFilter, dstFilter are not checked, they are
693 * asumed to remain valid.
694 */
695 struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
696 int srcW, int srcH, int srcFormat,
697 int dstW, int dstH, int dstFormat, int flags,
698 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
699 {
700 if (ctx != NULL) {
701 if ((ctx->resampling_ctx->iwidth != srcW) ||
702 (ctx->resampling_ctx->iheight != srcH) ||
703 (ctx->src_pix_fmt != srcFormat) ||
704 (ctx->resampling_ctx->owidth != dstW) ||
705 (ctx->resampling_ctx->oheight != dstH) ||
706 (ctx->dst_pix_fmt != dstFormat))
707 {
708 sws_freeContext(ctx);
709 ctx = NULL;
710 }
711 }
712 if (ctx == NULL) {
713 return sws_getContext(srcW, srcH, srcFormat,
714 dstW, dstH, dstFormat, flags,
715 srcFilter, dstFilter, param);
716 }
717 return ctx;
718 }
719
720 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
721 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
722 {
723 AVPicture src_pict, dst_pict;
724 int i, res = 0;
725 AVPicture picture_format_temp;
726 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
727 uint8_t *buf1 = NULL, *buf2 = NULL;
728 enum PixelFormat current_pix_fmt;
729
730 for (i = 0; i < 3; i++) {
731 src_pict.data[i] = src[i];
732 src_pict.linesize[i] = srcStride[i];
733 dst_pict.data[i] = dst[i];
734 dst_pict.linesize[i] = dstStride[i];
735 }
736 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
737 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
738 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
739
740 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
741 int size;
742
743 /* create temporary picture for rescaling input*/
744 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
745 buf1 = av_malloc(size);
746 if (!buf1) {
747 res = -1;
748 goto the_end;
749 }
750 formatted_picture = &picture_format_temp;
751 avpicture_fill((AVPicture*)formatted_picture, buf1,
752 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
753
754 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
755 &src_pict, ctx->src_pix_fmt,
756 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
757
758 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
759 res = -1;
760 goto the_end;
761 }
762 } else {
763 formatted_picture = &src_pict;
764 }
765
766 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
767 int size;
768
769 /* create temporary picture for rescaling output*/
770 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
771 buf2 = av_malloc(size);
772 if (!buf2) {
773 res = -1;
774 goto the_end;
775 }
776 resampled_picture = &picture_resample_temp;
777 avpicture_fill((AVPicture*)resampled_picture, buf2,
778 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
779
780 } else {
781 resampled_picture = &dst_pict;
782 }
783
784 /* ...and finally rescale!!! */
785 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
786 current_pix_fmt = PIX_FMT_YUV420P;
787 } else {
788 resampled_picture = &src_pict;
789 current_pix_fmt = ctx->src_pix_fmt;
790 }
791
792 if (current_pix_fmt != ctx->dst_pix_fmt) {
793 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
794 resampled_picture, current_pix_fmt,
795 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
796
797 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
798
799 res = -1;
800 goto the_end;
801 }
802 } else if (resampled_picture != &dst_pict) {
803 img_copy(&dst_pict, resampled_picture, current_pix_fmt,
804 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
805 }
806
807 the_end:
808 av_free(buf1);
809 av_free(buf2);
810 return res;
811 }
812
813
814 #ifdef TEST
815 #include <stdio.h>
816
817 /* input */
818 #define XSIZE 256
819 #define YSIZE 256
820 uint8_t img[XSIZE * YSIZE];
821
822 /* output */
823 #define XSIZE1 512
824 #define YSIZE1 512
825 uint8_t img1[XSIZE1 * YSIZE1];
826 uint8_t img2[XSIZE1 * YSIZE1];
827
828 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
829 {
830 #undef fprintf
831 FILE *f;
832 f=fopen(filename,"w");
833 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
834 fwrite(img,1, xsize * ysize,f);
835 fclose(f);
836 #define fprintf please_use_av_log
837 }
838
839 static void dump_filter(int16_t *filter)
840 {
841 int i, ph;
842
843 for(ph=0;ph<NB_PHASES;ph++) {
844 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
845 for(i=0;i<NB_TAPS;i++) {
846 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
847 }
848 av_log(NULL, AV_LOG_INFO, "\n");
849 }
850 }
851
852 #ifdef HAVE_MMX
853 int mm_flags;
854 #endif
855
856 int main(int argc, char **argv)
857 {
858 int x, y, v, i, xsize, ysize;
859 ImgReSampleContext *s;
860 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
861 char buf[256];
862
863 /* build test image */
864 for(y=0;y<YSIZE;y++) {
865 for(x=0;x<XSIZE;x++) {
866 if (x < XSIZE/2 && y < YSIZE/2) {
867 if (x < XSIZE/4 && y < YSIZE/4) {
868 if ((x % 10) <= 6 &&
869 (y % 10) <= 6)
870 v = 0xff;
871 else
872 v = 0x00;
873 } else if (x < XSIZE/4) {
874 if (x & 1)
875 v = 0xff;
876 else
877 v = 0;
878 } else if (y < XSIZE/4) {
879 if (y & 1)
880 v = 0xff;
881 else
882 v = 0;
883 } else {
884 if (y < YSIZE*3/8) {
885 if ((y+x) & 1)
886 v = 0xff;
887 else
888 v = 0;
889 } else {
890 if (((x+3) % 4) <= 1 &&
891 ((y+3) % 4) <= 1)
892 v = 0xff;
893 else
894 v = 0x00;
895 }
896 }
897 } else if (x < XSIZE/2) {
898 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
899 } else if (y < XSIZE/2) {
900 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
901 } else {
902 v = ((x + y - XSIZE) * 255) / XSIZE;
903 }
904 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
905 }
906 }
907 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
908 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
909 fact = factors[i];
910 xsize = (int)(XSIZE * fact);
911 ysize = (int)((YSIZE - 100) * fact);
912 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
913 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
914 dump_filter(&s->h_filters[0][0]);
915 component_resample(s, img1, xsize, xsize, ysize,
916 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
917 img_resample_close(s);
918
919 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
920 save_pgm(buf, img1, xsize, ysize);
921 }
922
923 /* mmx test */
924 #ifdef HAVE_MMX
925 av_log(NULL, AV_LOG_INFO, "MMX test\n");
926 fact = 0.72;
927 xsize = (int)(XSIZE * fact);
928 ysize = (int)(YSIZE * fact);
929 mm_flags = MM_MMX;
930 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
931 component_resample(s, img1, xsize, xsize, ysize,
932 img, XSIZE, XSIZE, YSIZE);
933
934 mm_flags = 0;
935 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
936 component_resample(s, img2, xsize, xsize, ysize,
937 img, XSIZE, XSIZE, YSIZE);
938 if (memcmp(img1, img2, xsize * ysize) != 0) {
939 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
940 exit(1);
941 }
942 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
943 #endif
944 return 0;
945 }
946
947 #endif