Explicitly include fastmemcpy.h from libvo/.
[libav.git] / libavcodec / imgresample.c
1 /*
2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Fabrice Bellard.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 /**
21 * @file imgresample.c
22 * High quality image resampling with polyphase filters .
23 */
24
25 #include "avcodec.h"
26 #include "swscale.h"
27 #include "dsputil.h"
28
29 #ifdef USE_FASTMEMCPY
30 #include "libvo/fastmemcpy.h"
31 #endif
32
33 #define NB_COMPONENTS 3
34
35 #define PHASE_BITS 4
36 #define NB_PHASES (1 << PHASE_BITS)
37 #define NB_TAPS 4
38 #define FCENTER 1 /* index of the center of the filter */
39 //#define TEST 1 /* Test it */
40
41 #define POS_FRAC_BITS 16
42 #define POS_FRAC (1 << POS_FRAC_BITS)
43 /* 6 bits precision is needed for MMX */
44 #define FILTER_BITS 8
45
46 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
47
48 struct ImgReSampleContext {
49 int iwidth, iheight, owidth, oheight;
50 int topBand, bottomBand, leftBand, rightBand;
51 int padtop, padbottom, padleft, padright;
52 int pad_owidth, pad_oheight;
53 int h_incr, v_incr;
54 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
55 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
56 uint8_t *line_buf;
57 };
58
59 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
60
61 static inline int get_phase(int pos)
62 {
63 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
64 }
65
66 /* This function must be optimized */
67 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
68 int src_width, int src_start, int src_incr,
69 int16_t *filters)
70 {
71 int src_pos, phase, sum, i;
72 const uint8_t *s;
73 int16_t *filter;
74
75 src_pos = src_start;
76 for(i=0;i<dst_width;i++) {
77 #ifdef TEST
78 /* test */
79 if ((src_pos >> POS_FRAC_BITS) < 0 ||
80 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
81 av_abort();
82 #endif
83 s = src + (src_pos >> POS_FRAC_BITS);
84 phase = get_phase(src_pos);
85 filter = filters + phase * NB_TAPS;
86 #if NB_TAPS == 4
87 sum = s[0] * filter[0] +
88 s[1] * filter[1] +
89 s[2] * filter[2] +
90 s[3] * filter[3];
91 #else
92 {
93 int j;
94 sum = 0;
95 for(j=0;j<NB_TAPS;j++)
96 sum += s[j] * filter[j];
97 }
98 #endif
99 sum = sum >> FILTER_BITS;
100 if (sum < 0)
101 sum = 0;
102 else if (sum > 255)
103 sum = 255;
104 dst[0] = sum;
105 src_pos += src_incr;
106 dst++;
107 }
108 }
109
110 /* This function must be optimized */
111 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
112 int wrap, int16_t *filter)
113 {
114 int sum, i;
115 const uint8_t *s;
116
117 s = src;
118 for(i=0;i<dst_width;i++) {
119 #if NB_TAPS == 4
120 sum = s[0 * wrap] * filter[0] +
121 s[1 * wrap] * filter[1] +
122 s[2 * wrap] * filter[2] +
123 s[3 * wrap] * filter[3];
124 #else
125 {
126 int j;
127 uint8_t *s1 = s;
128
129 sum = 0;
130 for(j=0;j<NB_TAPS;j++) {
131 sum += s1[0] * filter[j];
132 s1 += wrap;
133 }
134 }
135 #endif
136 sum = sum >> FILTER_BITS;
137 if (sum < 0)
138 sum = 0;
139 else if (sum > 255)
140 sum = 255;
141 dst[0] = sum;
142 dst++;
143 s++;
144 }
145 }
146
147 #ifdef HAVE_MMX
148
149 #include "i386/mmx.h"
150
151 #define FILTER4(reg) \
152 {\
153 s = src + (src_pos >> POS_FRAC_BITS);\
154 phase = get_phase(src_pos);\
155 filter = filters + phase * NB_TAPS;\
156 movq_m2r(*s, reg);\
157 punpcklbw_r2r(mm7, reg);\
158 movq_m2r(*filter, mm6);\
159 pmaddwd_r2r(reg, mm6);\
160 movq_r2r(mm6, reg);\
161 psrlq_i2r(32, reg);\
162 paddd_r2r(mm6, reg);\
163 psrad_i2r(FILTER_BITS, reg);\
164 src_pos += src_incr;\
165 }
166
167 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
168
169 /* XXX: do four pixels at a time */
170 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
171 const uint8_t *src, int src_width,
172 int src_start, int src_incr, int16_t *filters)
173 {
174 int src_pos, phase;
175 const uint8_t *s;
176 int16_t *filter;
177 mmx_t tmp;
178
179 src_pos = src_start;
180 pxor_r2r(mm7, mm7);
181
182 while (dst_width >= 4) {
183
184 FILTER4(mm0);
185 FILTER4(mm1);
186 FILTER4(mm2);
187 FILTER4(mm3);
188
189 packuswb_r2r(mm7, mm0);
190 packuswb_r2r(mm7, mm1);
191 packuswb_r2r(mm7, mm3);
192 packuswb_r2r(mm7, mm2);
193 movq_r2m(mm0, tmp);
194 dst[0] = tmp.ub[0];
195 movq_r2m(mm1, tmp);
196 dst[1] = tmp.ub[0];
197 movq_r2m(mm2, tmp);
198 dst[2] = tmp.ub[0];
199 movq_r2m(mm3, tmp);
200 dst[3] = tmp.ub[0];
201 dst += 4;
202 dst_width -= 4;
203 }
204 while (dst_width > 0) {
205 FILTER4(mm0);
206 packuswb_r2r(mm7, mm0);
207 movq_r2m(mm0, tmp);
208 dst[0] = tmp.ub[0];
209 dst++;
210 dst_width--;
211 }
212 emms();
213 }
214
215 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
216 int wrap, int16_t *filter)
217 {
218 int sum, i, v;
219 const uint8_t *s;
220 mmx_t tmp;
221 mmx_t coefs[4];
222
223 for(i=0;i<4;i++) {
224 v = filter[i];
225 coefs[i].uw[0] = v;
226 coefs[i].uw[1] = v;
227 coefs[i].uw[2] = v;
228 coefs[i].uw[3] = v;
229 }
230
231 pxor_r2r(mm7, mm7);
232 s = src;
233 while (dst_width >= 4) {
234 movq_m2r(s[0 * wrap], mm0);
235 punpcklbw_r2r(mm7, mm0);
236 movq_m2r(s[1 * wrap], mm1);
237 punpcklbw_r2r(mm7, mm1);
238 movq_m2r(s[2 * wrap], mm2);
239 punpcklbw_r2r(mm7, mm2);
240 movq_m2r(s[3 * wrap], mm3);
241 punpcklbw_r2r(mm7, mm3);
242
243 pmullw_m2r(coefs[0], mm0);
244 pmullw_m2r(coefs[1], mm1);
245 pmullw_m2r(coefs[2], mm2);
246 pmullw_m2r(coefs[3], mm3);
247
248 paddw_r2r(mm1, mm0);
249 paddw_r2r(mm3, mm2);
250 paddw_r2r(mm2, mm0);
251 psraw_i2r(FILTER_BITS, mm0);
252
253 packuswb_r2r(mm7, mm0);
254 movq_r2m(mm0, tmp);
255
256 *(uint32_t *)dst = tmp.ud[0];
257 dst += 4;
258 s += 4;
259 dst_width -= 4;
260 }
261 while (dst_width > 0) {
262 sum = s[0 * wrap] * filter[0] +
263 s[1 * wrap] * filter[1] +
264 s[2 * wrap] * filter[2] +
265 s[3 * wrap] * filter[3];
266 sum = sum >> FILTER_BITS;
267 if (sum < 0)
268 sum = 0;
269 else if (sum > 255)
270 sum = 255;
271 dst[0] = sum;
272 dst++;
273 s++;
274 dst_width--;
275 }
276 emms();
277 }
278 #endif
279
280 #ifdef HAVE_ALTIVEC
281 typedef union {
282 vector unsigned char v;
283 unsigned char c[16];
284 } vec_uc_t;
285
286 typedef union {
287 vector signed short v;
288 signed short s[8];
289 } vec_ss_t;
290
291 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
292 int wrap, int16_t *filter)
293 {
294 int sum, i;
295 const uint8_t *s;
296 vector unsigned char *tv, tmp, dstv, zero;
297 vec_ss_t srchv[4], srclv[4], fv[4];
298 vector signed short zeros, sumhv, sumlv;
299 s = src;
300
301 for(i=0;i<4;i++)
302 {
303 /*
304 The vec_madds later on does an implicit >>15 on the result.
305 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
306 a signed short, we have just enough bits to pre-shift our
307 filter constants <<7 to compensate for vec_madds.
308 */
309 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
310 fv[i].v = vec_splat(fv[i].v, 0);
311 }
312
313 zero = vec_splat_u8(0);
314 zeros = vec_splat_s16(0);
315
316
317 /*
318 When we're resampling, we'd ideally like both our input buffers,
319 and output buffers to be 16-byte aligned, so we can do both aligned
320 reads and writes. Sadly we can't always have this at the moment, so
321 we opt for aligned writes, as unaligned writes have a huge overhead.
322 To do this, do enough scalar resamples to get dst 16-byte aligned.
323 */
324 i = (-(int)dst) & 0xf;
325 while(i>0) {
326 sum = s[0 * wrap] * filter[0] +
327 s[1 * wrap] * filter[1] +
328 s[2 * wrap] * filter[2] +
329 s[3 * wrap] * filter[3];
330 sum = sum >> FILTER_BITS;
331 if (sum<0) sum = 0; else if (sum>255) sum=255;
332 dst[0] = sum;
333 dst++;
334 s++;
335 dst_width--;
336 i--;
337 }
338
339 /* Do our altivec resampling on 16 pixels at once. */
340 while(dst_width>=16) {
341 /*
342 Read 16 (potentially unaligned) bytes from each of
343 4 lines into 4 vectors, and split them into shorts.
344 Interleave the multipy/accumulate for the resample
345 filter with the loads to hide the 3 cycle latency
346 the vec_madds have.
347 */
348 tv = (vector unsigned char *) &s[0 * wrap];
349 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
350 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
351 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
352 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
353 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
354
355 tv = (vector unsigned char *) &s[1 * wrap];
356 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
357 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
358 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
359 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
360 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
361
362 tv = (vector unsigned char *) &s[2 * wrap];
363 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
364 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
365 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
366 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
367 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
368
369 tv = (vector unsigned char *) &s[3 * wrap];
370 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
371 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
372 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
373 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
374 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
375
376 /*
377 Pack the results into our destination vector,
378 and do an aligned write of that back to memory.
379 */
380 dstv = vec_packsu(sumhv, sumlv) ;
381 vec_st(dstv, 0, (vector unsigned char *) dst);
382
383 dst+=16;
384 s+=16;
385 dst_width-=16;
386 }
387
388 /*
389 If there are any leftover pixels, resample them
390 with the slow scalar method.
391 */
392 while(dst_width>0) {
393 sum = s[0 * wrap] * filter[0] +
394 s[1 * wrap] * filter[1] +
395 s[2 * wrap] * filter[2] +
396 s[3 * wrap] * filter[3];
397 sum = sum >> FILTER_BITS;
398 if (sum<0) sum = 0; else if (sum>255) sum=255;
399 dst[0] = sum;
400 dst++;
401 s++;
402 dst_width--;
403 }
404 }
405 #endif
406
407 /* slow version to handle limit cases. Does not need optimisation */
408 static void h_resample_slow(uint8_t *dst, int dst_width,
409 const uint8_t *src, int src_width,
410 int src_start, int src_incr, int16_t *filters)
411 {
412 int src_pos, phase, sum, j, v, i;
413 const uint8_t *s, *src_end;
414 int16_t *filter;
415
416 src_end = src + src_width;
417 src_pos = src_start;
418 for(i=0;i<dst_width;i++) {
419 s = src + (src_pos >> POS_FRAC_BITS);
420 phase = get_phase(src_pos);
421 filter = filters + phase * NB_TAPS;
422 sum = 0;
423 for(j=0;j<NB_TAPS;j++) {
424 if (s < src)
425 v = src[0];
426 else if (s >= src_end)
427 v = src_end[-1];
428 else
429 v = s[0];
430 sum += v * filter[j];
431 s++;
432 }
433 sum = sum >> FILTER_BITS;
434 if (sum < 0)
435 sum = 0;
436 else if (sum > 255)
437 sum = 255;
438 dst[0] = sum;
439 src_pos += src_incr;
440 dst++;
441 }
442 }
443
444 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
445 int src_width, int src_start, int src_incr,
446 int16_t *filters)
447 {
448 int n, src_end;
449
450 if (src_start < 0) {
451 n = (0 - src_start + src_incr - 1) / src_incr;
452 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
453 dst += n;
454 dst_width -= n;
455 src_start += n * src_incr;
456 }
457 src_end = src_start + dst_width * src_incr;
458 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
459 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
460 src_incr;
461 } else {
462 n = dst_width;
463 }
464 #ifdef HAVE_MMX
465 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
466 h_resample_fast4_mmx(dst, n,
467 src, src_width, src_start, src_incr, filters);
468 else
469 #endif
470 h_resample_fast(dst, n,
471 src, src_width, src_start, src_incr, filters);
472 if (n < dst_width) {
473 dst += n;
474 dst_width -= n;
475 src_start += n * src_incr;
476 h_resample_slow(dst, dst_width,
477 src, src_width, src_start, src_incr, filters);
478 }
479 }
480
481 static void component_resample(ImgReSampleContext *s,
482 uint8_t *output, int owrap, int owidth, int oheight,
483 uint8_t *input, int iwrap, int iwidth, int iheight)
484 {
485 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
486 uint8_t *new_line, *src_line;
487
488 last_src_y = - FCENTER - 1;
489 /* position of the bottom of the filter in the source image */
490 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
491 ring_y = NB_TAPS; /* position in ring buffer */
492 for(y=0;y<oheight;y++) {
493 /* apply horizontal filter on new lines from input if needed */
494 src_y1 = src_y >> POS_FRAC_BITS;
495 while (last_src_y < src_y1) {
496 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
497 ring_y = NB_TAPS;
498 last_src_y++;
499 /* handle limit conditions : replicate line (slightly
500 inefficient because we filter multiple times) */
501 y1 = last_src_y;
502 if (y1 < 0) {
503 y1 = 0;
504 } else if (y1 >= iheight) {
505 y1 = iheight - 1;
506 }
507 src_line = input + y1 * iwrap;
508 new_line = s->line_buf + ring_y * owidth;
509 /* apply filter and handle limit cases correctly */
510 h_resample(new_line, owidth,
511 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
512 &s->h_filters[0][0]);
513 /* handle ring buffer wraping */
514 if (ring_y >= LINE_BUF_HEIGHT) {
515 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
516 new_line, owidth);
517 }
518 }
519 /* apply vertical filter */
520 phase_y = get_phase(src_y);
521 #ifdef HAVE_MMX
522 /* desactivated MMX because loss of precision */
523 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
524 v_resample4_mmx(output, owidth,
525 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
526 &s->v_filters[phase_y][0]);
527 else
528 #endif
529 #ifdef HAVE_ALTIVEC
530 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
531 v_resample16_altivec(output, owidth,
532 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
533 &s->v_filters[phase_y][0]);
534 else
535 #endif
536 v_resample(output, owidth,
537 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
538 &s->v_filters[phase_y][0]);
539
540 src_y += s->v_incr;
541
542 output += owrap;
543 }
544 }
545
546 ImgReSampleContext *img_resample_init(int owidth, int oheight,
547 int iwidth, int iheight)
548 {
549 return img_resample_full_init(owidth, oheight, iwidth, iheight,
550 0, 0, 0, 0, 0, 0, 0, 0);
551 }
552
553 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
554 int iwidth, int iheight,
555 int topBand, int bottomBand,
556 int leftBand, int rightBand,
557 int padtop, int padbottom,
558 int padleft, int padright)
559 {
560 ImgReSampleContext *s;
561
562 if (!owidth || !oheight || !iwidth || !iheight)
563 return NULL;
564
565 s = av_mallocz(sizeof(ImgReSampleContext));
566 if (!s)
567 return NULL;
568 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
569 return NULL;
570 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
571 if (!s->line_buf)
572 goto fail;
573
574 s->owidth = owidth;
575 s->oheight = oheight;
576 s->iwidth = iwidth;
577 s->iheight = iheight;
578
579 s->topBand = topBand;
580 s->bottomBand = bottomBand;
581 s->leftBand = leftBand;
582 s->rightBand = rightBand;
583
584 s->padtop = padtop;
585 s->padbottom = padbottom;
586 s->padleft = padleft;
587 s->padright = padright;
588
589 s->pad_owidth = owidth - (padleft + padright);
590 s->pad_oheight = oheight - (padtop + padbottom);
591
592 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
593 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
594
595 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
596 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
597 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
598 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
599
600 return s;
601 fail:
602 av_free(s);
603 return NULL;
604 }
605
606 void img_resample(ImgReSampleContext *s,
607 AVPicture *output, const AVPicture *input)
608 {
609 int i, shift;
610 uint8_t* optr;
611
612 for (i=0;i<3;i++) {
613 shift = (i == 0) ? 0 : 1;
614
615 optr = output->data[i] + (((output->linesize[i] *
616 s->padtop) + s->padleft) >> shift);
617
618 component_resample(s, optr, output->linesize[i],
619 s->pad_owidth >> shift, s->pad_oheight >> shift,
620 input->data[i] + (input->linesize[i] *
621 (s->topBand >> shift)) + (s->leftBand >> shift),
622 input->linesize[i], ((s->iwidth - s->leftBand -
623 s->rightBand) >> shift),
624 (s->iheight - s->topBand - s->bottomBand) >> shift);
625 }
626 }
627
628 void img_resample_close(ImgReSampleContext *s)
629 {
630 av_free(s->line_buf);
631 av_free(s);
632 }
633
634 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
635 int dstW, int dstH, int dstFormat,
636 int flags, SwsFilter *srcFilter,
637 SwsFilter *dstFilter, double *param)
638 {
639 struct SwsContext *ctx;
640
641 ctx = av_malloc(sizeof(struct SwsContext));
642 if (ctx == NULL) {
643 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
644
645 return NULL;
646 }
647
648 if ((srcH != dstH) || (srcW != dstW)) {
649 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
650 av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
651 }
652 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
653 } else {
654 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
655 ctx->resampling_ctx->iheight = srcH;
656 ctx->resampling_ctx->iwidth = srcW;
657 ctx->resampling_ctx->oheight = dstH;
658 ctx->resampling_ctx->owidth = dstW;
659 }
660 ctx->src_pix_fmt = srcFormat;
661 ctx->dst_pix_fmt = dstFormat;
662
663 return ctx;
664 }
665
666 void sws_freeContext(struct SwsContext *ctx)
667 {
668 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
669 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
670 img_resample_close(ctx->resampling_ctx);
671 } else {
672 av_free(ctx->resampling_ctx);
673 }
674 av_free(ctx);
675 }
676
677 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
678 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
679 {
680 AVPicture src_pict, dst_pict;
681 int i, res = 0;
682 AVPicture picture_format_temp;
683 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
684 uint8_t *buf1 = NULL, *buf2 = NULL;
685 enum PixelFormat current_pix_fmt;
686
687 for (i = 0; i < 3; i++) {
688 src_pict.data[i] = src[i];
689 src_pict.linesize[i] = srcStride[i];
690 dst_pict.data[i] = dst[i];
691 dst_pict.linesize[i] = dstStride[i];
692 }
693 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
694 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
695 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
696
697 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
698 int size;
699
700 /* create temporary picture for rescaling input*/
701 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
702 buf1 = av_malloc(size);
703 if (!buf1) {
704 res = -1;
705 goto the_end;
706 }
707 formatted_picture = &picture_format_temp;
708 avpicture_fill((AVPicture*)formatted_picture, buf1,
709 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
710
711 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
712 &src_pict, ctx->src_pix_fmt,
713 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
714
715 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
716 res = -1;
717 goto the_end;
718 }
719 } else {
720 formatted_picture = &src_pict;
721 }
722
723 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
724 int size;
725
726 /* create temporary picture for rescaling output*/
727 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
728 buf2 = av_malloc(size);
729 if (!buf2) {
730 res = -1;
731 goto the_end;
732 }
733 resampled_picture = &picture_resample_temp;
734 avpicture_fill((AVPicture*)resampled_picture, buf2,
735 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
736
737 } else {
738 resampled_picture = &dst_pict;
739 }
740
741 /* ...and finally rescale!!! */
742 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
743 current_pix_fmt = PIX_FMT_YUV420P;
744 } else {
745 resampled_picture = &src_pict;
746 current_pix_fmt = ctx->src_pix_fmt;
747 }
748
749 if (current_pix_fmt != ctx->dst_pix_fmt) {
750 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
751 resampled_picture, current_pix_fmt,
752 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
753
754 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
755
756 res = -1;
757 goto the_end;
758 }
759 } else if (resampled_picture != &dst_pict) {
760 img_copy(&dst_pict, resampled_picture, current_pix_fmt,
761 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
762 }
763
764 the_end:
765 av_free(buf1);
766 av_free(buf2);
767 return res;
768 }
769
770
771 #ifdef TEST
772 #include <stdio.h>
773
774 /* input */
775 #define XSIZE 256
776 #define YSIZE 256
777 uint8_t img[XSIZE * YSIZE];
778
779 /* output */
780 #define XSIZE1 512
781 #define YSIZE1 512
782 uint8_t img1[XSIZE1 * YSIZE1];
783 uint8_t img2[XSIZE1 * YSIZE1];
784
785 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
786 {
787 #undef fprintf
788 FILE *f;
789 f=fopen(filename,"w");
790 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
791 fwrite(img,1, xsize * ysize,f);
792 fclose(f);
793 #define fprintf please_use_av_log
794 }
795
796 static void dump_filter(int16_t *filter)
797 {
798 int i, ph;
799
800 for(ph=0;ph<NB_PHASES;ph++) {
801 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
802 for(i=0;i<NB_TAPS;i++) {
803 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
804 }
805 av_log(NULL, AV_LOG_INFO, "\n");
806 }
807 }
808
809 #ifdef HAVE_MMX
810 int mm_flags;
811 #endif
812
813 int main(int argc, char **argv)
814 {
815 int x, y, v, i, xsize, ysize;
816 ImgReSampleContext *s;
817 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
818 char buf[256];
819
820 /* build test image */
821 for(y=0;y<YSIZE;y++) {
822 for(x=0;x<XSIZE;x++) {
823 if (x < XSIZE/2 && y < YSIZE/2) {
824 if (x < XSIZE/4 && y < YSIZE/4) {
825 if ((x % 10) <= 6 &&
826 (y % 10) <= 6)
827 v = 0xff;
828 else
829 v = 0x00;
830 } else if (x < XSIZE/4) {
831 if (x & 1)
832 v = 0xff;
833 else
834 v = 0;
835 } else if (y < XSIZE/4) {
836 if (y & 1)
837 v = 0xff;
838 else
839 v = 0;
840 } else {
841 if (y < YSIZE*3/8) {
842 if ((y+x) & 1)
843 v = 0xff;
844 else
845 v = 0;
846 } else {
847 if (((x+3) % 4) <= 1 &&
848 ((y+3) % 4) <= 1)
849 v = 0xff;
850 else
851 v = 0x00;
852 }
853 }
854 } else if (x < XSIZE/2) {
855 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
856 } else if (y < XSIZE/2) {
857 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
858 } else {
859 v = ((x + y - XSIZE) * 255) / XSIZE;
860 }
861 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
862 }
863 }
864 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
865 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
866 fact = factors[i];
867 xsize = (int)(XSIZE * fact);
868 ysize = (int)((YSIZE - 100) * fact);
869 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
870 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
871 dump_filter(&s->h_filters[0][0]);
872 component_resample(s, img1, xsize, xsize, ysize,
873 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
874 img_resample_close(s);
875
876 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
877 save_pgm(buf, img1, xsize, ysize);
878 }
879
880 /* mmx test */
881 #ifdef HAVE_MMX
882 av_log(NULL, AV_LOG_INFO, "MMX test\n");
883 fact = 0.72;
884 xsize = (int)(XSIZE * fact);
885 ysize = (int)(YSIZE * fact);
886 mm_flags = MM_MMX;
887 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
888 component_resample(s, img1, xsize, xsize, ysize,
889 img, XSIZE, XSIZE, YSIZE);
890
891 mm_flags = 0;
892 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
893 component_resample(s, img2, xsize, xsize, ysize,
894 img, XSIZE, XSIZE, YSIZE);
895 if (memcmp(img1, img2, xsize * ysize) != 0) {
896 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
897 exit(1);
898 }
899 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
900 #endif
901 return 0;
902 }
903
904 #endif