add an AVClass pointer in SwsContext context
[libav.git] / libavcodec / imgresample.c
CommitLineData
de6d9b64 1/*
115329f1 2 * High quality image resampling with polyphase filters
ff4ec49e 3 * Copyright (c) 2001 Fabrice Bellard.
de6d9b64 4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
ff4ec49e
FB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
de6d9b64 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
de6d9b64 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
de6d9b64 16 *
ff4ec49e 17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
de6d9b64 20 */
115329f1 21
983e3246
MN
22/**
23 * @file imgresample.c
24 * High quality image resampling with polyphase filters .
25 */
115329f1 26
de6d9b64 27#include "avcodec.h"
7b748aff 28#include "swscale.h"
6000abfa 29#include "dsputil.h"
de6d9b64
FB
30
31#define NB_COMPONENTS 3
32
33#define PHASE_BITS 4
34#define NB_PHASES (1 << PHASE_BITS)
35#define NB_TAPS 4
36#define FCENTER 1 /* index of the center of the filter */
ab6d194a 37//#define TEST 1 /* Test it */
de6d9b64
FB
38
39#define POS_FRAC_BITS 16
40#define POS_FRAC (1 << POS_FRAC_BITS)
41/* 6 bits precision is needed for MMX */
42#define FILTER_BITS 8
43
44#define LINE_BUF_HEIGHT (NB_TAPS * 4)
45
a163ed1a 46struct SwsContext {
57ae779d 47 AVClass *av_class;
a163ed1a
LA
48 struct ImgReSampleContext *resampling_ctx;
49 enum PixelFormat src_pix_fmt, dst_pix_fmt;
50};
51
de6d9b64 52struct ImgReSampleContext {
1ff93ffc
TK
53 int iwidth, iheight, owidth, oheight;
54 int topBand, bottomBand, leftBand, rightBand;
55 int padtop, padbottom, padleft, padright;
56 int pad_owidth, pad_oheight;
de6d9b64 57 int h_incr, v_incr;
68b51e58
SH
58 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
59 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
0c1a9eda 60 uint8_t *line_buf;
de6d9b64
FB
61};
62
aaaf1635
MN
63void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
64
de6d9b64
FB
65static inline int get_phase(int pos)
66{
67 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
68}
69
70/* This function must be optimized */
da64ecc3 71static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08
DB
72 int src_width, int src_start, int src_incr,
73 int16_t *filters)
de6d9b64
FB
74{
75 int src_pos, phase, sum, i;
da64ecc3 76 const uint8_t *s;
0c1a9eda 77 int16_t *filter;
de6d9b64
FB
78
79 src_pos = src_start;
80 for(i=0;i<dst_width;i++) {
81#ifdef TEST
82 /* test */
83 if ((src_pos >> POS_FRAC_BITS) < 0 ||
84 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
02ac3136 85 av_abort();
de6d9b64
FB
86#endif
87 s = src + (src_pos >> POS_FRAC_BITS);
88 phase = get_phase(src_pos);
89 filter = filters + phase * NB_TAPS;
90#if NB_TAPS == 4
91 sum = s[0] * filter[0] +
92 s[1] * filter[1] +
93 s[2] * filter[2] +
94 s[3] * filter[3];
95#else
96 {
97 int j;
98 sum = 0;
99 for(j=0;j<NB_TAPS;j++)
100 sum += s[j] * filter[j];
101 }
102#endif
103 sum = sum >> FILTER_BITS;
104 if (sum < 0)
105 sum = 0;
106 else if (sum > 255)
107 sum = 255;
108 dst[0] = sum;
109 src_pos += src_incr;
110 dst++;
111 }
112}
113
114/* This function must be optimized */
da64ecc3 115static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 116 int wrap, int16_t *filter)
de6d9b64
FB
117{
118 int sum, i;
da64ecc3 119 const uint8_t *s;
de6d9b64
FB
120
121 s = src;
122 for(i=0;i<dst_width;i++) {
123#if NB_TAPS == 4
124 sum = s[0 * wrap] * filter[0] +
125 s[1 * wrap] * filter[1] +
126 s[2 * wrap] * filter[2] +
127 s[3 * wrap] * filter[3];
128#else
129 {
130 int j;
0c1a9eda 131 uint8_t *s1 = s;
de6d9b64
FB
132
133 sum = 0;
134 for(j=0;j<NB_TAPS;j++) {
135 sum += s1[0] * filter[j];
136 s1 += wrap;
137 }
138 }
139#endif
140 sum = sum >> FILTER_BITS;
141 if (sum < 0)
142 sum = 0;
143 else if (sum > 255)
144 sum = 255;
145 dst[0] = sum;
146 dst++;
147 s++;
148 }
149}
150
980fc7b8 151#ifdef HAVE_MMX
de6d9b64
FB
152
153#include "i386/mmx.h"
154
155#define FILTER4(reg) \
156{\
157 s = src + (src_pos >> POS_FRAC_BITS);\
158 phase = get_phase(src_pos);\
159 filter = filters + phase * NB_TAPS;\
160 movq_m2r(*s, reg);\
161 punpcklbw_r2r(mm7, reg);\
162 movq_m2r(*filter, mm6);\
163 pmaddwd_r2r(reg, mm6);\
164 movq_r2r(mm6, reg);\
165 psrlq_i2r(32, reg);\
166 paddd_r2r(mm6, reg);\
167 psrad_i2r(FILTER_BITS, reg);\
168 src_pos += src_incr;\
169}
170
949b1a13 171#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
de6d9b64
FB
172
173/* XXX: do four pixels at a time */
da64ecc3 174static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
bb270c08 175 const uint8_t *src, int src_width,
0c1a9eda 176 int src_start, int src_incr, int16_t *filters)
de6d9b64
FB
177{
178 int src_pos, phase;
da64ecc3 179 const uint8_t *s;
0c1a9eda 180 int16_t *filter;
de6d9b64 181 mmx_t tmp;
115329f1 182
de6d9b64
FB
183 src_pos = src_start;
184 pxor_r2r(mm7, mm7);
185
186 while (dst_width >= 4) {
187
188 FILTER4(mm0);
189 FILTER4(mm1);
190 FILTER4(mm2);
191 FILTER4(mm3);
192
193 packuswb_r2r(mm7, mm0);
194 packuswb_r2r(mm7, mm1);
195 packuswb_r2r(mm7, mm3);
196 packuswb_r2r(mm7, mm2);
197 movq_r2m(mm0, tmp);
198 dst[0] = tmp.ub[0];
199 movq_r2m(mm1, tmp);
200 dst[1] = tmp.ub[0];
201 movq_r2m(mm2, tmp);
202 dst[2] = tmp.ub[0];
203 movq_r2m(mm3, tmp);
204 dst[3] = tmp.ub[0];
205 dst += 4;
206 dst_width -= 4;
207 }
208 while (dst_width > 0) {
209 FILTER4(mm0);
210 packuswb_r2r(mm7, mm0);
211 movq_r2m(mm0, tmp);
212 dst[0] = tmp.ub[0];
213 dst++;
214 dst_width--;
215 }
216 emms();
217}
218
da64ecc3 219static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 220 int wrap, int16_t *filter)
de6d9b64
FB
221{
222 int sum, i, v;
da64ecc3 223 const uint8_t *s;
de6d9b64
FB
224 mmx_t tmp;
225 mmx_t coefs[4];
115329f1 226
de6d9b64
FB
227 for(i=0;i<4;i++) {
228 v = filter[i];
229 coefs[i].uw[0] = v;
230 coefs[i].uw[1] = v;
231 coefs[i].uw[2] = v;
232 coefs[i].uw[3] = v;
233 }
115329f1 234
de6d9b64
FB
235 pxor_r2r(mm7, mm7);
236 s = src;
237 while (dst_width >= 4) {
238 movq_m2r(s[0 * wrap], mm0);
239 punpcklbw_r2r(mm7, mm0);
240 movq_m2r(s[1 * wrap], mm1);
241 punpcklbw_r2r(mm7, mm1);
242 movq_m2r(s[2 * wrap], mm2);
243 punpcklbw_r2r(mm7, mm2);
244 movq_m2r(s[3 * wrap], mm3);
245 punpcklbw_r2r(mm7, mm3);
246
247 pmullw_m2r(coefs[0], mm0);
248 pmullw_m2r(coefs[1], mm1);
249 pmullw_m2r(coefs[2], mm2);
250 pmullw_m2r(coefs[3], mm3);
251
252 paddw_r2r(mm1, mm0);
253 paddw_r2r(mm3, mm2);
254 paddw_r2r(mm2, mm0);
255 psraw_i2r(FILTER_BITS, mm0);
115329f1 256
de6d9b64
FB
257 packuswb_r2r(mm7, mm0);
258 movq_r2m(mm0, tmp);
259
0c1a9eda 260 *(uint32_t *)dst = tmp.ud[0];
de6d9b64
FB
261 dst += 4;
262 s += 4;
263 dst_width -= 4;
264 }
265 while (dst_width > 0) {
266 sum = s[0 * wrap] * filter[0] +
267 s[1 * wrap] * filter[1] +
268 s[2 * wrap] * filter[2] +
269 s[3 * wrap] * filter[3];
270 sum = sum >> FILTER_BITS;
271 if (sum < 0)
272 sum = 0;
273 else if (sum > 255)
274 sum = 255;
275 dst[0] = sum;
276 dst++;
277 s++;
278 dst_width--;
279 }
280 emms();
281}
fca6a0dd 282#endif /* HAVE_MMX */
de6d9b64 283
404d2241 284#ifdef HAVE_ALTIVEC
bb270c08 285typedef union {
404d2241
BF
286 vector unsigned char v;
287 unsigned char c[16];
288} vec_uc_t;
289
bb270c08 290typedef union {
404d2241
BF
291 vector signed short v;
292 signed short s[8];
293} vec_ss_t;
294
da64ecc3 295void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 296 int wrap, int16_t *filter)
404d2241
BF
297{
298 int sum, i;
da64ecc3 299 const uint8_t *s;
404d2241
BF
300 vector unsigned char *tv, tmp, dstv, zero;
301 vec_ss_t srchv[4], srclv[4], fv[4];
115329f1 302 vector signed short zeros, sumhv, sumlv;
404d2241
BF
303 s = src;
304
305 for(i=0;i<4;i++)
306 {
307 /*
308 The vec_madds later on does an implicit >>15 on the result.
309 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
310 a signed short, we have just enough bits to pre-shift our
311 filter constants <<7 to compensate for vec_madds.
312 */
313 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
314 fv[i].v = vec_splat(fv[i].v, 0);
315 }
115329f1 316
404d2241
BF
317 zero = vec_splat_u8(0);
318 zeros = vec_splat_s16(0);
319
320
321 /*
322 When we're resampling, we'd ideally like both our input buffers,
323 and output buffers to be 16-byte aligned, so we can do both aligned
324 reads and writes. Sadly we can't always have this at the moment, so
325 we opt for aligned writes, as unaligned writes have a huge overhead.
326 To do this, do enough scalar resamples to get dst 16-byte aligned.
327 */
9e4e1659 328 i = (-(int)dst) & 0xf;
404d2241
BF
329 while(i>0) {
330 sum = s[0 * wrap] * filter[0] +
331 s[1 * wrap] * filter[1] +
332 s[2 * wrap] * filter[2] +
333 s[3 * wrap] * filter[3];
334 sum = sum >> FILTER_BITS;
335 if (sum<0) sum = 0; else if (sum>255) sum=255;
336 dst[0] = sum;
337 dst++;
338 s++;
339 dst_width--;
340 i--;
341 }
115329f1 342
404d2241
BF
343 /* Do our altivec resampling on 16 pixels at once. */
344 while(dst_width>=16) {
345 /*
346 Read 16 (potentially unaligned) bytes from each of
347 4 lines into 4 vectors, and split them into shorts.
348 Interleave the multipy/accumulate for the resample
349 filter with the loads to hide the 3 cycle latency
350 the vec_madds have.
351 */
352 tv = (vector unsigned char *) &s[0 * wrap];
353 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
354 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
355 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
356 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
357 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
358
359 tv = (vector unsigned char *) &s[1 * wrap];
360 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
361 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
362 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
363 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
364 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
365
366 tv = (vector unsigned char *) &s[2 * wrap];
367 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
368 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
369 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
370 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
371 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
372
373 tv = (vector unsigned char *) &s[3 * wrap];
374 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
375 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
376 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
377 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
378 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
115329f1 379
404d2241
BF
380 /*
381 Pack the results into our destination vector,
382 and do an aligned write of that back to memory.
383 */
384 dstv = vec_packsu(sumhv, sumlv) ;
385 vec_st(dstv, 0, (vector unsigned char *) dst);
115329f1 386
404d2241
BF
387 dst+=16;
388 s+=16;
389 dst_width-=16;
390 }
391
392 /*
393 If there are any leftover pixels, resample them
394 with the slow scalar method.
395 */
396 while(dst_width>0) {
397 sum = s[0 * wrap] * filter[0] +
398 s[1 * wrap] * filter[1] +
399 s[2 * wrap] * filter[2] +
400 s[3 * wrap] * filter[3];
401 sum = sum >> FILTER_BITS;
402 if (sum<0) sum = 0; else if (sum>255) sum=255;
403 dst[0] = sum;
404 dst++;
405 s++;
406 dst_width--;
407 }
408}
fca6a0dd 409#endif /* HAVE_ALTIVEC */
404d2241 410
de6d9b64 411/* slow version to handle limit cases. Does not need optimisation */
da64ecc3 412static void h_resample_slow(uint8_t *dst, int dst_width,
bb270c08 413 const uint8_t *src, int src_width,
0c1a9eda 414 int src_start, int src_incr, int16_t *filters)
de6d9b64
FB
415{
416 int src_pos, phase, sum, j, v, i;
da64ecc3 417 const uint8_t *s, *src_end;
0c1a9eda 418 int16_t *filter;
de6d9b64
FB
419
420 src_end = src + src_width;
421 src_pos = src_start;
422 for(i=0;i<dst_width;i++) {
423 s = src + (src_pos >> POS_FRAC_BITS);
424 phase = get_phase(src_pos);
425 filter = filters + phase * NB_TAPS;
426 sum = 0;
427 for(j=0;j<NB_TAPS;j++) {
428 if (s < src)
429 v = src[0];
430 else if (s >= src_end)
431 v = src_end[-1];
432 else
433 v = s[0];
434 sum += v * filter[j];
435 s++;
436 }
437 sum = sum >> FILTER_BITS;
438 if (sum < 0)
439 sum = 0;
440 else if (sum > 255)
441 sum = 255;
442 dst[0] = sum;
443 src_pos += src_incr;
444 dst++;
445 }
446}
447
da64ecc3 448static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08
DB
449 int src_width, int src_start, int src_incr,
450 int16_t *filters)
de6d9b64
FB
451{
452 int n, src_end;
453
454 if (src_start < 0) {
455 n = (0 - src_start + src_incr - 1) / src_incr;
456 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
457 dst += n;
458 dst_width -= n;
459 src_start += n * src_incr;
460 }
461 src_end = src_start + dst_width * src_incr;
462 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
115329f1 463 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
de6d9b64
FB
464 src_incr;
465 } else {
466 n = dst_width;
467 }
980fc7b8 468#ifdef HAVE_MMX
486497e0 469 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
115329f1 470 h_resample_fast4_mmx(dst, n,
de6d9b64
FB
471 src, src_width, src_start, src_incr, filters);
472 else
473#endif
115329f1 474 h_resample_fast(dst, n,
de6d9b64
FB
475 src, src_width, src_start, src_incr, filters);
476 if (n < dst_width) {
477 dst += n;
478 dst_width -= n;
479 src_start += n * src_incr;
115329f1 480 h_resample_slow(dst, dst_width,
de6d9b64
FB
481 src, src_width, src_start, src_incr, filters);
482 }
483}
484
115329f1 485static void component_resample(ImgReSampleContext *s,
0c1a9eda
ZK
486 uint8_t *output, int owrap, int owidth, int oheight,
487 uint8_t *input, int iwrap, int iwidth, int iheight)
de6d9b64
FB
488{
489 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
0c1a9eda 490 uint8_t *new_line, *src_line;
de6d9b64
FB
491
492 last_src_y = - FCENTER - 1;
493 /* position of the bottom of the filter in the source image */
115329f1 494 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
de6d9b64
FB
495 ring_y = NB_TAPS; /* position in ring buffer */
496 for(y=0;y<oheight;y++) {
497 /* apply horizontal filter on new lines from input if needed */
498 src_y1 = src_y >> POS_FRAC_BITS;
499 while (last_src_y < src_y1) {
500 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
501 ring_y = NB_TAPS;
502 last_src_y++;
ab6d194a
MN
503 /* handle limit conditions : replicate line (slightly
504 inefficient because we filter multiple times) */
de6d9b64
FB
505 y1 = last_src_y;
506 if (y1 < 0) {
507 y1 = 0;
508 } else if (y1 >= iheight) {
509 y1 = iheight - 1;
510 }
511 src_line = input + y1 * iwrap;
512 new_line = s->line_buf + ring_y * owidth;
513 /* apply filter and handle limit cases correctly */
115329f1
DB
514 h_resample(new_line, owidth,
515 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
de6d9b64
FB
516 &s->h_filters[0][0]);
517 /* handle ring buffer wraping */
518 if (ring_y >= LINE_BUF_HEIGHT) {
519 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
520 new_line, owidth);
521 }
522 }
523 /* apply vertical filter */
524 phase_y = get_phase(src_y);
980fc7b8 525#ifdef HAVE_MMX
de6d9b64 526 /* desactivated MMX because loss of precision */
486497e0 527 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
115329f1
DB
528 v_resample4_mmx(output, owidth,
529 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
de6d9b64 530 &s->v_filters[phase_y][0]);
404d2241
BF
531 else
532#endif
533#ifdef HAVE_ALTIVEC
486497e0 534 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
404d2241
BF
535 v_resample16_altivec(output, owidth,
536 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
537 &s->v_filters[phase_y][0]);
de6d9b64
FB
538 else
539#endif
115329f1
DB
540 v_resample(output, owidth,
541 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
de6d9b64 542 &s->v_filters[phase_y][0]);
115329f1 543
de6d9b64 544 src_y += s->v_incr;
115329f1 545
de6d9b64
FB
546 output += owrap;
547 }
548}
549
de6d9b64
FB
550ImgReSampleContext *img_resample_init(int owidth, int oheight,
551 int iwidth, int iheight)
552{
115329f1 553 return img_resample_full_init(owidth, oheight, iwidth, iheight,
1ff93ffc 554 0, 0, 0, 0, 0, 0, 0, 0);
ab6d194a
MN
555}
556
557ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
558 int iwidth, int iheight,
559 int topBand, int bottomBand,
1ff93ffc
TK
560 int leftBand, int rightBand,
561 int padtop, int padbottom,
562 int padleft, int padright)
ab6d194a 563{
de6d9b64
FB
564 ImgReSampleContext *s;
565
d10dc616 566 if (!owidth || !oheight || !iwidth || !iheight)
bb270c08 567 return NULL;
d10dc616 568
de6d9b64
FB
569 s = av_mallocz(sizeof(ImgReSampleContext));
570 if (!s)
571 return NULL;
0ecca7a4
MN
572 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
573 return NULL;
de6d9b64 574 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
115329f1 575 if (!s->line_buf)
de6d9b64 576 goto fail;
115329f1 577
de6d9b64
FB
578 s->owidth = owidth;
579 s->oheight = oheight;
580 s->iwidth = iwidth;
581 s->iheight = iheight;
115329f1 582
ab6d194a
MN
583 s->topBand = topBand;
584 s->bottomBand = bottomBand;
585 s->leftBand = leftBand;
586 s->rightBand = rightBand;
115329f1 587
1ff93ffc
TK
588 s->padtop = padtop;
589 s->padbottom = padbottom;
590 s->padleft = padleft;
591 s->padright = padright;
592
593 s->pad_owidth = owidth - (padleft + padright);
594 s->pad_oheight = oheight - (padtop + padbottom);
595
596 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
115329f1 597 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
1ff93ffc 598
115329f1 599 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
aaaf1635 600 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
115329f1 601 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
aaaf1635 602 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
de6d9b64
FB
603
604 return s;
1ff93ffc 605fail:
6000abfa 606 av_free(s);
de6d9b64
FB
607 return NULL;
608}
609
115329f1 610void img_resample(ImgReSampleContext *s,
da64ecc3 611 AVPicture *output, const AVPicture *input)
de6d9b64
FB
612{
613 int i, shift;
1ff93ffc 614 uint8_t* optr;
de6d9b64 615
1ff93ffc 616 for (i=0;i<3;i++) {
de6d9b64 617 shift = (i == 0) ? 0 : 1;
1ff93ffc 618
115329f1 619 optr = output->data[i] + (((output->linesize[i] *
1ff93ffc
TK
620 s->padtop) + s->padleft) >> shift);
621
115329f1 622 component_resample(s, optr, output->linesize[i],
1ff93ffc 623 s->pad_owidth >> shift, s->pad_oheight >> shift,
115329f1 624 input->data[i] + (input->linesize[i] *
1ff93ffc 625 (s->topBand >> shift)) + (s->leftBand >> shift),
115329f1 626 input->linesize[i], ((s->iwidth - s->leftBand -
1ff93ffc 627 s->rightBand) >> shift),
ab6d194a 628 (s->iheight - s->topBand - s->bottomBand) >> shift);
de6d9b64
FB
629 }
630}
631
632void img_resample_close(ImgReSampleContext *s)
633{
6000abfa
FB
634 av_free(s->line_buf);
635 av_free(s);
de6d9b64
FB
636}
637
7b748aff
LA
638struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
639 int dstW, int dstH, int dstFormat,
640 int flags, SwsFilter *srcFilter,
641 SwsFilter *dstFilter, double *param)
642{
643 struct SwsContext *ctx;
644
645 ctx = av_malloc(sizeof(struct SwsContext));
57ae779d
BF
646 ctx->av_class = av_mallocz(sizeof(AVClass));
647 if (!ctx || !ctx->av_class) {
7b748aff
LA
648 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
649
650 return NULL;
651 }
652
653 if ((srcH != dstH) || (srcW != dstW)) {
654 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
655 av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
656 }
657 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
658 } else {
659 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
660 ctx->resampling_ctx->iheight = srcH;
661 ctx->resampling_ctx->iwidth = srcW;
662 ctx->resampling_ctx->oheight = dstH;
663 ctx->resampling_ctx->owidth = dstW;
664 }
665 ctx->src_pix_fmt = srcFormat;
666 ctx->dst_pix_fmt = dstFormat;
667
668 return ctx;
669}
670
671void sws_freeContext(struct SwsContext *ctx)
672{
04675319
PI
673 if (!ctx)
674 return;
7b748aff
LA
675 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
676 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
677 img_resample_close(ctx->resampling_ctx);
678 } else {
679 av_free(ctx->resampling_ctx);
680 }
57ae779d 681 av_free(ctx->av_class);
7b748aff
LA
682 av_free(ctx);
683}
684
96db3808
LA
685
686/**
687 * Checks if context is valid or reallocs a new one instead.
688 * If context is NULL, just calls sws_getContext() to get a new one.
689 * Otherwise, checks if the parameters are the same already saved in context.
690 * If that is the case, returns the current context.
691 * Otherwise, frees context and gets a new one.
692 *
693 * Be warned that srcFilter, dstFilter are not checked, they are
694 * asumed to remain valid.
695 */
696struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
697 int srcW, int srcH, int srcFormat,
698 int dstW, int dstH, int dstFormat, int flags,
699 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
700{
701 if (ctx != NULL) {
702 if ((ctx->resampling_ctx->iwidth != srcW) ||
703 (ctx->resampling_ctx->iheight != srcH) ||
704 (ctx->src_pix_fmt != srcFormat) ||
705 (ctx->resampling_ctx->owidth != dstW) ||
706 (ctx->resampling_ctx->oheight != dstH) ||
707 (ctx->dst_pix_fmt != dstFormat))
708 {
709 sws_freeContext(ctx);
710 ctx = NULL;
711 }
712 }
713 if (ctx == NULL) {
714 return sws_getContext(srcW, srcH, srcFormat,
715 dstW, dstH, dstFormat, flags,
716 srcFilter, dstFilter, param);
717 }
718 return ctx;
719}
720
7b748aff
LA
721int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
722 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
723{
724 AVPicture src_pict, dst_pict;
725 int i, res = 0;
726 AVPicture picture_format_temp;
727 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
728 uint8_t *buf1 = NULL, *buf2 = NULL;
729 enum PixelFormat current_pix_fmt;
730
9c5d7c56 731 for (i = 0; i < 4; i++) {
7b748aff
LA
732 src_pict.data[i] = src[i];
733 src_pict.linesize[i] = srcStride[i];
734 dst_pict.data[i] = dst[i];
735 dst_pict.linesize[i] = dstStride[i];
736 }
737 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
738 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
739 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
740
741 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
742 int size;
743
744 /* create temporary picture for rescaling input*/
745 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
746 buf1 = av_malloc(size);
747 if (!buf1) {
748 res = -1;
749 goto the_end;
750 }
751 formatted_picture = &picture_format_temp;
752 avpicture_fill((AVPicture*)formatted_picture, buf1,
753 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
754
755 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
756 &src_pict, ctx->src_pix_fmt,
757 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
758
759 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
760 res = -1;
761 goto the_end;
762 }
763 } else {
764 formatted_picture = &src_pict;
765 }
766
767 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
768 int size;
769
770 /* create temporary picture for rescaling output*/
771 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
772 buf2 = av_malloc(size);
773 if (!buf2) {
774 res = -1;
775 goto the_end;
776 }
777 resampled_picture = &picture_resample_temp;
778 avpicture_fill((AVPicture*)resampled_picture, buf2,
779 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
780
781 } else {
782 resampled_picture = &dst_pict;
783 }
784
785 /* ...and finally rescale!!! */
786 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
787 current_pix_fmt = PIX_FMT_YUV420P;
788 } else {
789 resampled_picture = &src_pict;
790 current_pix_fmt = ctx->src_pix_fmt;
791 }
792
793 if (current_pix_fmt != ctx->dst_pix_fmt) {
794 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
795 resampled_picture, current_pix_fmt,
796 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
797
798 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
799
800 res = -1;
801 goto the_end;
802 }
2793096f 803 } else if (resampled_picture != &dst_pict) {
636d6a4a 804 av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt,
2793096f 805 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
7b748aff
LA
806 }
807
808the_end:
809 av_free(buf1);
810 av_free(buf2);
811 return res;
812}
813
814
de6d9b64 815#ifdef TEST
13160c07 816#include <stdio.h>
14eb0a2e 817#undef exit
ab6d194a 818
de6d9b64
FB
819/* input */
820#define XSIZE 256
821#define YSIZE 256
0c1a9eda 822uint8_t img[XSIZE * YSIZE];
de6d9b64
FB
823
824/* output */
825#define XSIZE1 512
826#define YSIZE1 512
0c1a9eda
ZK
827uint8_t img1[XSIZE1 * YSIZE1];
828uint8_t img2[XSIZE1 * YSIZE1];
de6d9b64 829
0c1a9eda 830void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
de6d9b64 831{
95ae72b7 832#undef fprintf
de6d9b64
FB
833 FILE *f;
834 f=fopen(filename,"w");
835 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
836 fwrite(img,1, xsize * ysize,f);
837 fclose(f);
95ae72b7 838#define fprintf please_use_av_log
de6d9b64
FB
839}
840
0c1a9eda 841static void dump_filter(int16_t *filter)
de6d9b64
FB
842{
843 int i, ph;
844
845 for(ph=0;ph<NB_PHASES;ph++) {
13160c07 846 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
de6d9b64 847 for(i=0;i<NB_TAPS;i++) {
13160c07 848 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
de6d9b64 849 }
13160c07 850 av_log(NULL, AV_LOG_INFO, "\n");
de6d9b64
FB
851 }
852}
853
980fc7b8 854#ifdef HAVE_MMX
6acce86b 855int mm_flags;
de6d9b64
FB
856#endif
857
858int main(int argc, char **argv)
859{
860 int x, y, v, i, xsize, ysize;
861 ImgReSampleContext *s;
862 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
863 char buf[256];
864
865 /* build test image */
866 for(y=0;y<YSIZE;y++) {
867 for(x=0;x<XSIZE;x++) {
868 if (x < XSIZE/2 && y < YSIZE/2) {
869 if (x < XSIZE/4 && y < YSIZE/4) {
870 if ((x % 10) <= 6 &&
871 (y % 10) <= 6)
872 v = 0xff;
873 else
874 v = 0x00;
875 } else if (x < XSIZE/4) {
115329f1 876 if (x & 1)
de6d9b64 877 v = 0xff;
115329f1 878 else
de6d9b64
FB
879 v = 0;
880 } else if (y < XSIZE/4) {
115329f1 881 if (y & 1)
de6d9b64 882 v = 0xff;
115329f1 883 else
de6d9b64
FB
884 v = 0;
885 } else {
886 if (y < YSIZE*3/8) {
115329f1 887 if ((y+x) & 1)
de6d9b64 888 v = 0xff;
115329f1 889 else
de6d9b64
FB
890 v = 0;
891 } else {
892 if (((x+3) % 4) <= 1 &&
893 ((y+3) % 4) <= 1)
894 v = 0xff;
895 else
896 v = 0x00;
897 }
898 }
899 } else if (x < XSIZE/2) {
900 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
901 } else if (y < XSIZE/2) {
902 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
903 } else {
904 v = ((x + y - XSIZE) * 255) / XSIZE;
905 }
ab6d194a 906 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
de6d9b64
FB
907 }
908 }
909 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
910 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
911 fact = factors[i];
912 xsize = (int)(XSIZE * fact);
ab6d194a 913 ysize = (int)((YSIZE - 100) * fact);
13160c07
PI
914 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
915 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
de6d9b64
FB
916 dump_filter(&s->h_filters[0][0]);
917 component_resample(s, img1, xsize, xsize, ysize,
ab6d194a 918 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
de6d9b64
FB
919 img_resample_close(s);
920
2fc8ea24 921 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
de6d9b64
FB
922 save_pgm(buf, img1, xsize, ysize);
923 }
924
925 /* mmx test */
980fc7b8 926#ifdef HAVE_MMX
13160c07 927 av_log(NULL, AV_LOG_INFO, "MMX test\n");
de6d9b64
FB
928 fact = 0.72;
929 xsize = (int)(XSIZE * fact);
930 ysize = (int)(YSIZE * fact);
931 mm_flags = MM_MMX;
932 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
933 component_resample(s, img1, xsize, xsize, ysize,
934 img, XSIZE, XSIZE, YSIZE);
935
936 mm_flags = 0;
937 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
938 component_resample(s, img2, xsize, xsize, ysize,
939 img, XSIZE, XSIZE, YSIZE);
940 if (memcmp(img1, img2, xsize * ysize) != 0) {
13160c07 941 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
de6d9b64
FB
942 exit(1);
943 }
13160c07 944 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
fca6a0dd 945#endif /* HAVE_MMX */
de6d9b64
FB
946 return 0;
947}
948
fca6a0dd 949#endif /* TEST */