Add the prefix "av_" to img_crop(), img_copy() and img_pad(), and rename "img"
[libav.git] / libavcodec / imgresample.c
CommitLineData
de6d9b64 1/*
115329f1 2 * High quality image resampling with polyphase filters
ff4ec49e 3 * Copyright (c) 2001 Fabrice Bellard.
de6d9b64 4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
ff4ec49e
FB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
de6d9b64 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
de6d9b64 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
de6d9b64 16 *
ff4ec49e 17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
de6d9b64 20 */
115329f1 21
983e3246
MN
22/**
23 * @file imgresample.c
24 * High quality image resampling with polyphase filters .
25 */
115329f1 26
de6d9b64 27#include "avcodec.h"
7b748aff 28#include "swscale.h"
6000abfa 29#include "dsputil.h"
de6d9b64 30
54329dd5 31#ifdef USE_FASTMEMCPY
f4bd289a 32#include "libvo/fastmemcpy.h"
54329dd5 33#endif
54329dd5 34
de6d9b64
FB
35#define NB_COMPONENTS 3
36
37#define PHASE_BITS 4
38#define NB_PHASES (1 << PHASE_BITS)
39#define NB_TAPS 4
40#define FCENTER 1 /* index of the center of the filter */
ab6d194a 41//#define TEST 1 /* Test it */
de6d9b64
FB
42
43#define POS_FRAC_BITS 16
44#define POS_FRAC (1 << POS_FRAC_BITS)
45/* 6 bits precision is needed for MMX */
46#define FILTER_BITS 8
47
48#define LINE_BUF_HEIGHT (NB_TAPS * 4)
49
a163ed1a
LA
50struct SwsContext {
51 struct ImgReSampleContext *resampling_ctx;
52 enum PixelFormat src_pix_fmt, dst_pix_fmt;
53};
54
de6d9b64 55struct ImgReSampleContext {
1ff93ffc
TK
56 int iwidth, iheight, owidth, oheight;
57 int topBand, bottomBand, leftBand, rightBand;
58 int padtop, padbottom, padleft, padright;
59 int pad_owidth, pad_oheight;
de6d9b64 60 int h_incr, v_incr;
68b51e58
SH
61 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
62 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
0c1a9eda 63 uint8_t *line_buf;
de6d9b64
FB
64};
65
aaaf1635
MN
66void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
67
de6d9b64
FB
68static inline int get_phase(int pos)
69{
70 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
71}
72
73/* This function must be optimized */
da64ecc3 74static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08
DB
75 int src_width, int src_start, int src_incr,
76 int16_t *filters)
de6d9b64
FB
77{
78 int src_pos, phase, sum, i;
da64ecc3 79 const uint8_t *s;
0c1a9eda 80 int16_t *filter;
de6d9b64
FB
81
82 src_pos = src_start;
83 for(i=0;i<dst_width;i++) {
84#ifdef TEST
85 /* test */
86 if ((src_pos >> POS_FRAC_BITS) < 0 ||
87 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
02ac3136 88 av_abort();
de6d9b64
FB
89#endif
90 s = src + (src_pos >> POS_FRAC_BITS);
91 phase = get_phase(src_pos);
92 filter = filters + phase * NB_TAPS;
93#if NB_TAPS == 4
94 sum = s[0] * filter[0] +
95 s[1] * filter[1] +
96 s[2] * filter[2] +
97 s[3] * filter[3];
98#else
99 {
100 int j;
101 sum = 0;
102 for(j=0;j<NB_TAPS;j++)
103 sum += s[j] * filter[j];
104 }
105#endif
106 sum = sum >> FILTER_BITS;
107 if (sum < 0)
108 sum = 0;
109 else if (sum > 255)
110 sum = 255;
111 dst[0] = sum;
112 src_pos += src_incr;
113 dst++;
114 }
115}
116
117/* This function must be optimized */
da64ecc3 118static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 119 int wrap, int16_t *filter)
de6d9b64
FB
120{
121 int sum, i;
da64ecc3 122 const uint8_t *s;
de6d9b64
FB
123
124 s = src;
125 for(i=0;i<dst_width;i++) {
126#if NB_TAPS == 4
127 sum = s[0 * wrap] * filter[0] +
128 s[1 * wrap] * filter[1] +
129 s[2 * wrap] * filter[2] +
130 s[3 * wrap] * filter[3];
131#else
132 {
133 int j;
0c1a9eda 134 uint8_t *s1 = s;
de6d9b64
FB
135
136 sum = 0;
137 for(j=0;j<NB_TAPS;j++) {
138 sum += s1[0] * filter[j];
139 s1 += wrap;
140 }
141 }
142#endif
143 sum = sum >> FILTER_BITS;
144 if (sum < 0)
145 sum = 0;
146 else if (sum > 255)
147 sum = 255;
148 dst[0] = sum;
149 dst++;
150 s++;
151 }
152}
153
980fc7b8 154#ifdef HAVE_MMX
de6d9b64
FB
155
156#include "i386/mmx.h"
157
158#define FILTER4(reg) \
159{\
160 s = src + (src_pos >> POS_FRAC_BITS);\
161 phase = get_phase(src_pos);\
162 filter = filters + phase * NB_TAPS;\
163 movq_m2r(*s, reg);\
164 punpcklbw_r2r(mm7, reg);\
165 movq_m2r(*filter, mm6);\
166 pmaddwd_r2r(reg, mm6);\
167 movq_r2r(mm6, reg);\
168 psrlq_i2r(32, reg);\
169 paddd_r2r(mm6, reg);\
170 psrad_i2r(FILTER_BITS, reg);\
171 src_pos += src_incr;\
172}
173
949b1a13 174#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
de6d9b64
FB
175
176/* XXX: do four pixels at a time */
da64ecc3 177static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
bb270c08 178 const uint8_t *src, int src_width,
0c1a9eda 179 int src_start, int src_incr, int16_t *filters)
de6d9b64
FB
180{
181 int src_pos, phase;
da64ecc3 182 const uint8_t *s;
0c1a9eda 183 int16_t *filter;
de6d9b64 184 mmx_t tmp;
115329f1 185
de6d9b64
FB
186 src_pos = src_start;
187 pxor_r2r(mm7, mm7);
188
189 while (dst_width >= 4) {
190
191 FILTER4(mm0);
192 FILTER4(mm1);
193 FILTER4(mm2);
194 FILTER4(mm3);
195
196 packuswb_r2r(mm7, mm0);
197 packuswb_r2r(mm7, mm1);
198 packuswb_r2r(mm7, mm3);
199 packuswb_r2r(mm7, mm2);
200 movq_r2m(mm0, tmp);
201 dst[0] = tmp.ub[0];
202 movq_r2m(mm1, tmp);
203 dst[1] = tmp.ub[0];
204 movq_r2m(mm2, tmp);
205 dst[2] = tmp.ub[0];
206 movq_r2m(mm3, tmp);
207 dst[3] = tmp.ub[0];
208 dst += 4;
209 dst_width -= 4;
210 }
211 while (dst_width > 0) {
212 FILTER4(mm0);
213 packuswb_r2r(mm7, mm0);
214 movq_r2m(mm0, tmp);
215 dst[0] = tmp.ub[0];
216 dst++;
217 dst_width--;
218 }
219 emms();
220}
221
da64ecc3 222static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 223 int wrap, int16_t *filter)
de6d9b64
FB
224{
225 int sum, i, v;
da64ecc3 226 const uint8_t *s;
de6d9b64
FB
227 mmx_t tmp;
228 mmx_t coefs[4];
115329f1 229
de6d9b64
FB
230 for(i=0;i<4;i++) {
231 v = filter[i];
232 coefs[i].uw[0] = v;
233 coefs[i].uw[1] = v;
234 coefs[i].uw[2] = v;
235 coefs[i].uw[3] = v;
236 }
115329f1 237
de6d9b64
FB
238 pxor_r2r(mm7, mm7);
239 s = src;
240 while (dst_width >= 4) {
241 movq_m2r(s[0 * wrap], mm0);
242 punpcklbw_r2r(mm7, mm0);
243 movq_m2r(s[1 * wrap], mm1);
244 punpcklbw_r2r(mm7, mm1);
245 movq_m2r(s[2 * wrap], mm2);
246 punpcklbw_r2r(mm7, mm2);
247 movq_m2r(s[3 * wrap], mm3);
248 punpcklbw_r2r(mm7, mm3);
249
250 pmullw_m2r(coefs[0], mm0);
251 pmullw_m2r(coefs[1], mm1);
252 pmullw_m2r(coefs[2], mm2);
253 pmullw_m2r(coefs[3], mm3);
254
255 paddw_r2r(mm1, mm0);
256 paddw_r2r(mm3, mm2);
257 paddw_r2r(mm2, mm0);
258 psraw_i2r(FILTER_BITS, mm0);
115329f1 259
de6d9b64
FB
260 packuswb_r2r(mm7, mm0);
261 movq_r2m(mm0, tmp);
262
0c1a9eda 263 *(uint32_t *)dst = tmp.ud[0];
de6d9b64
FB
264 dst += 4;
265 s += 4;
266 dst_width -= 4;
267 }
268 while (dst_width > 0) {
269 sum = s[0 * wrap] * filter[0] +
270 s[1 * wrap] * filter[1] +
271 s[2 * wrap] * filter[2] +
272 s[3 * wrap] * filter[3];
273 sum = sum >> FILTER_BITS;
274 if (sum < 0)
275 sum = 0;
276 else if (sum > 255)
277 sum = 255;
278 dst[0] = sum;
279 dst++;
280 s++;
281 dst_width--;
282 }
283 emms();
284}
285#endif
286
404d2241 287#ifdef HAVE_ALTIVEC
bb270c08 288typedef union {
404d2241
BF
289 vector unsigned char v;
290 unsigned char c[16];
291} vec_uc_t;
292
bb270c08 293typedef union {
404d2241
BF
294 vector signed short v;
295 signed short s[8];
296} vec_ss_t;
297
da64ecc3 298void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 299 int wrap, int16_t *filter)
404d2241
BF
300{
301 int sum, i;
da64ecc3 302 const uint8_t *s;
404d2241
BF
303 vector unsigned char *tv, tmp, dstv, zero;
304 vec_ss_t srchv[4], srclv[4], fv[4];
115329f1 305 vector signed short zeros, sumhv, sumlv;
404d2241
BF
306 s = src;
307
308 for(i=0;i<4;i++)
309 {
310 /*
311 The vec_madds later on does an implicit >>15 on the result.
312 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
313 a signed short, we have just enough bits to pre-shift our
314 filter constants <<7 to compensate for vec_madds.
315 */
316 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
317 fv[i].v = vec_splat(fv[i].v, 0);
318 }
115329f1 319
404d2241
BF
320 zero = vec_splat_u8(0);
321 zeros = vec_splat_s16(0);
322
323
324 /*
325 When we're resampling, we'd ideally like both our input buffers,
326 and output buffers to be 16-byte aligned, so we can do both aligned
327 reads and writes. Sadly we can't always have this at the moment, so
328 we opt for aligned writes, as unaligned writes have a huge overhead.
329 To do this, do enough scalar resamples to get dst 16-byte aligned.
330 */
9e4e1659 331 i = (-(int)dst) & 0xf;
404d2241
BF
332 while(i>0) {
333 sum = s[0 * wrap] * filter[0] +
334 s[1 * wrap] * filter[1] +
335 s[2 * wrap] * filter[2] +
336 s[3 * wrap] * filter[3];
337 sum = sum >> FILTER_BITS;
338 if (sum<0) sum = 0; else if (sum>255) sum=255;
339 dst[0] = sum;
340 dst++;
341 s++;
342 dst_width--;
343 i--;
344 }
115329f1 345
404d2241
BF
346 /* Do our altivec resampling on 16 pixels at once. */
347 while(dst_width>=16) {
348 /*
349 Read 16 (potentially unaligned) bytes from each of
350 4 lines into 4 vectors, and split them into shorts.
351 Interleave the multipy/accumulate for the resample
352 filter with the loads to hide the 3 cycle latency
353 the vec_madds have.
354 */
355 tv = (vector unsigned char *) &s[0 * wrap];
356 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
357 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
358 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
359 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
360 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
361
362 tv = (vector unsigned char *) &s[1 * wrap];
363 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
364 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
365 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
366 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
367 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
368
369 tv = (vector unsigned char *) &s[2 * wrap];
370 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
371 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
372 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
373 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
374 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
375
376 tv = (vector unsigned char *) &s[3 * wrap];
377 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
378 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
379 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
380 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
381 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
115329f1 382
404d2241
BF
383 /*
384 Pack the results into our destination vector,
385 and do an aligned write of that back to memory.
386 */
387 dstv = vec_packsu(sumhv, sumlv) ;
388 vec_st(dstv, 0, (vector unsigned char *) dst);
115329f1 389
404d2241
BF
390 dst+=16;
391 s+=16;
392 dst_width-=16;
393 }
394
395 /*
396 If there are any leftover pixels, resample them
397 with the slow scalar method.
398 */
399 while(dst_width>0) {
400 sum = s[0 * wrap] * filter[0] +
401 s[1 * wrap] * filter[1] +
402 s[2 * wrap] * filter[2] +
403 s[3 * wrap] * filter[3];
404 sum = sum >> FILTER_BITS;
405 if (sum<0) sum = 0; else if (sum>255) sum=255;
406 dst[0] = sum;
407 dst++;
408 s++;
409 dst_width--;
410 }
411}
412#endif
413
de6d9b64 414/* slow version to handle limit cases. Does not need optimisation */
da64ecc3 415static void h_resample_slow(uint8_t *dst, int dst_width,
bb270c08 416 const uint8_t *src, int src_width,
0c1a9eda 417 int src_start, int src_incr, int16_t *filters)
de6d9b64
FB
418{
419 int src_pos, phase, sum, j, v, i;
da64ecc3 420 const uint8_t *s, *src_end;
0c1a9eda 421 int16_t *filter;
de6d9b64
FB
422
423 src_end = src + src_width;
424 src_pos = src_start;
425 for(i=0;i<dst_width;i++) {
426 s = src + (src_pos >> POS_FRAC_BITS);
427 phase = get_phase(src_pos);
428 filter = filters + phase * NB_TAPS;
429 sum = 0;
430 for(j=0;j<NB_TAPS;j++) {
431 if (s < src)
432 v = src[0];
433 else if (s >= src_end)
434 v = src_end[-1];
435 else
436 v = s[0];
437 sum += v * filter[j];
438 s++;
439 }
440 sum = sum >> FILTER_BITS;
441 if (sum < 0)
442 sum = 0;
443 else if (sum > 255)
444 sum = 255;
445 dst[0] = sum;
446 src_pos += src_incr;
447 dst++;
448 }
449}
450
da64ecc3 451static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08
DB
452 int src_width, int src_start, int src_incr,
453 int16_t *filters)
de6d9b64
FB
454{
455 int n, src_end;
456
457 if (src_start < 0) {
458 n = (0 - src_start + src_incr - 1) / src_incr;
459 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
460 dst += n;
461 dst_width -= n;
462 src_start += n * src_incr;
463 }
464 src_end = src_start + dst_width * src_incr;
465 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
115329f1 466 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
de6d9b64
FB
467 src_incr;
468 } else {
469 n = dst_width;
470 }
980fc7b8 471#ifdef HAVE_MMX
486497e0 472 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
115329f1 473 h_resample_fast4_mmx(dst, n,
de6d9b64
FB
474 src, src_width, src_start, src_incr, filters);
475 else
476#endif
115329f1 477 h_resample_fast(dst, n,
de6d9b64
FB
478 src, src_width, src_start, src_incr, filters);
479 if (n < dst_width) {
480 dst += n;
481 dst_width -= n;
482 src_start += n * src_incr;
115329f1 483 h_resample_slow(dst, dst_width,
de6d9b64
FB
484 src, src_width, src_start, src_incr, filters);
485 }
486}
487
115329f1 488static void component_resample(ImgReSampleContext *s,
0c1a9eda
ZK
489 uint8_t *output, int owrap, int owidth, int oheight,
490 uint8_t *input, int iwrap, int iwidth, int iheight)
de6d9b64
FB
491{
492 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
0c1a9eda 493 uint8_t *new_line, *src_line;
de6d9b64
FB
494
495 last_src_y = - FCENTER - 1;
496 /* position of the bottom of the filter in the source image */
115329f1 497 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
de6d9b64
FB
498 ring_y = NB_TAPS; /* position in ring buffer */
499 for(y=0;y<oheight;y++) {
500 /* apply horizontal filter on new lines from input if needed */
501 src_y1 = src_y >> POS_FRAC_BITS;
502 while (last_src_y < src_y1) {
503 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
504 ring_y = NB_TAPS;
505 last_src_y++;
ab6d194a
MN
506 /* handle limit conditions : replicate line (slightly
507 inefficient because we filter multiple times) */
de6d9b64
FB
508 y1 = last_src_y;
509 if (y1 < 0) {
510 y1 = 0;
511 } else if (y1 >= iheight) {
512 y1 = iheight - 1;
513 }
514 src_line = input + y1 * iwrap;
515 new_line = s->line_buf + ring_y * owidth;
516 /* apply filter and handle limit cases correctly */
115329f1
DB
517 h_resample(new_line, owidth,
518 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
de6d9b64
FB
519 &s->h_filters[0][0]);
520 /* handle ring buffer wraping */
521 if (ring_y >= LINE_BUF_HEIGHT) {
522 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
523 new_line, owidth);
524 }
525 }
526 /* apply vertical filter */
527 phase_y = get_phase(src_y);
980fc7b8 528#ifdef HAVE_MMX
de6d9b64 529 /* desactivated MMX because loss of precision */
486497e0 530 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
115329f1
DB
531 v_resample4_mmx(output, owidth,
532 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
de6d9b64 533 &s->v_filters[phase_y][0]);
404d2241
BF
534 else
535#endif
536#ifdef HAVE_ALTIVEC
486497e0 537 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
404d2241
BF
538 v_resample16_altivec(output, owidth,
539 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
540 &s->v_filters[phase_y][0]);
de6d9b64
FB
541 else
542#endif
115329f1
DB
543 v_resample(output, owidth,
544 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
de6d9b64 545 &s->v_filters[phase_y][0]);
115329f1 546
de6d9b64 547 src_y += s->v_incr;
115329f1 548
de6d9b64
FB
549 output += owrap;
550 }
551}
552
de6d9b64
FB
553ImgReSampleContext *img_resample_init(int owidth, int oheight,
554 int iwidth, int iheight)
555{
115329f1 556 return img_resample_full_init(owidth, oheight, iwidth, iheight,
1ff93ffc 557 0, 0, 0, 0, 0, 0, 0, 0);
ab6d194a
MN
558}
559
560ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
561 int iwidth, int iheight,
562 int topBand, int bottomBand,
1ff93ffc
TK
563 int leftBand, int rightBand,
564 int padtop, int padbottom,
565 int padleft, int padright)
ab6d194a 566{
de6d9b64
FB
567 ImgReSampleContext *s;
568
d10dc616 569 if (!owidth || !oheight || !iwidth || !iheight)
bb270c08 570 return NULL;
d10dc616 571
de6d9b64
FB
572 s = av_mallocz(sizeof(ImgReSampleContext));
573 if (!s)
574 return NULL;
0ecca7a4
MN
575 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
576 return NULL;
de6d9b64 577 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
115329f1 578 if (!s->line_buf)
de6d9b64 579 goto fail;
115329f1 580
de6d9b64
FB
581 s->owidth = owidth;
582 s->oheight = oheight;
583 s->iwidth = iwidth;
584 s->iheight = iheight;
115329f1 585
ab6d194a
MN
586 s->topBand = topBand;
587 s->bottomBand = bottomBand;
588 s->leftBand = leftBand;
589 s->rightBand = rightBand;
115329f1 590
1ff93ffc
TK
591 s->padtop = padtop;
592 s->padbottom = padbottom;
593 s->padleft = padleft;
594 s->padright = padright;
595
596 s->pad_owidth = owidth - (padleft + padright);
597 s->pad_oheight = oheight - (padtop + padbottom);
598
599 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
115329f1 600 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
1ff93ffc 601
115329f1 602 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
aaaf1635 603 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
115329f1 604 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
aaaf1635 605 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
de6d9b64
FB
606
607 return s;
1ff93ffc 608fail:
6000abfa 609 av_free(s);
de6d9b64
FB
610 return NULL;
611}
612
115329f1 613void img_resample(ImgReSampleContext *s,
da64ecc3 614 AVPicture *output, const AVPicture *input)
de6d9b64
FB
615{
616 int i, shift;
1ff93ffc 617 uint8_t* optr;
de6d9b64 618
1ff93ffc 619 for (i=0;i<3;i++) {
de6d9b64 620 shift = (i == 0) ? 0 : 1;
1ff93ffc 621
115329f1 622 optr = output->data[i] + (((output->linesize[i] *
1ff93ffc
TK
623 s->padtop) + s->padleft) >> shift);
624
115329f1 625 component_resample(s, optr, output->linesize[i],
1ff93ffc 626 s->pad_owidth >> shift, s->pad_oheight >> shift,
115329f1 627 input->data[i] + (input->linesize[i] *
1ff93ffc 628 (s->topBand >> shift)) + (s->leftBand >> shift),
115329f1 629 input->linesize[i], ((s->iwidth - s->leftBand -
1ff93ffc 630 s->rightBand) >> shift),
ab6d194a 631 (s->iheight - s->topBand - s->bottomBand) >> shift);
de6d9b64
FB
632 }
633}
634
635void img_resample_close(ImgReSampleContext *s)
636{
6000abfa
FB
637 av_free(s->line_buf);
638 av_free(s);
de6d9b64
FB
639}
640
7b748aff
LA
641struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
642 int dstW, int dstH, int dstFormat,
643 int flags, SwsFilter *srcFilter,
644 SwsFilter *dstFilter, double *param)
645{
646 struct SwsContext *ctx;
647
648 ctx = av_malloc(sizeof(struct SwsContext));
649 if (ctx == NULL) {
650 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
651
652 return NULL;
653 }
654
655 if ((srcH != dstH) || (srcW != dstW)) {
656 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
657 av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
658 }
659 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
660 } else {
661 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
662 ctx->resampling_ctx->iheight = srcH;
663 ctx->resampling_ctx->iwidth = srcW;
664 ctx->resampling_ctx->oheight = dstH;
665 ctx->resampling_ctx->owidth = dstW;
666 }
667 ctx->src_pix_fmt = srcFormat;
668 ctx->dst_pix_fmt = dstFormat;
669
670 return ctx;
671}
672
673void sws_freeContext(struct SwsContext *ctx)
674{
04675319
PI
675 if (!ctx)
676 return;
7b748aff
LA
677 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
678 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
679 img_resample_close(ctx->resampling_ctx);
680 } else {
681 av_free(ctx->resampling_ctx);
682 }
683 av_free(ctx);
684}
685
96db3808
LA
686
687/**
688 * Checks if context is valid or reallocs a new one instead.
689 * If context is NULL, just calls sws_getContext() to get a new one.
690 * Otherwise, checks if the parameters are the same already saved in context.
691 * If that is the case, returns the current context.
692 * Otherwise, frees context and gets a new one.
693 *
694 * Be warned that srcFilter, dstFilter are not checked, they are
695 * asumed to remain valid.
696 */
697struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
698 int srcW, int srcH, int srcFormat,
699 int dstW, int dstH, int dstFormat, int flags,
700 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
701{
702 if (ctx != NULL) {
703 if ((ctx->resampling_ctx->iwidth != srcW) ||
704 (ctx->resampling_ctx->iheight != srcH) ||
705 (ctx->src_pix_fmt != srcFormat) ||
706 (ctx->resampling_ctx->owidth != dstW) ||
707 (ctx->resampling_ctx->oheight != dstH) ||
708 (ctx->dst_pix_fmt != dstFormat))
709 {
710 sws_freeContext(ctx);
711 ctx = NULL;
712 }
713 }
714 if (ctx == NULL) {
715 return sws_getContext(srcW, srcH, srcFormat,
716 dstW, dstH, dstFormat, flags,
717 srcFilter, dstFilter, param);
718 }
719 return ctx;
720}
721
7b748aff
LA
722int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
723 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
724{
725 AVPicture src_pict, dst_pict;
726 int i, res = 0;
727 AVPicture picture_format_temp;
728 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
729 uint8_t *buf1 = NULL, *buf2 = NULL;
730 enum PixelFormat current_pix_fmt;
731
9c5d7c56 732 for (i = 0; i < 4; i++) {
7b748aff
LA
733 src_pict.data[i] = src[i];
734 src_pict.linesize[i] = srcStride[i];
735 dst_pict.data[i] = dst[i];
736 dst_pict.linesize[i] = dstStride[i];
737 }
738 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
739 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
740 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
741
742 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
743 int size;
744
745 /* create temporary picture for rescaling input*/
746 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
747 buf1 = av_malloc(size);
748 if (!buf1) {
749 res = -1;
750 goto the_end;
751 }
752 formatted_picture = &picture_format_temp;
753 avpicture_fill((AVPicture*)formatted_picture, buf1,
754 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
755
756 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
757 &src_pict, ctx->src_pix_fmt,
758 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
759
760 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
761 res = -1;
762 goto the_end;
763 }
764 } else {
765 formatted_picture = &src_pict;
766 }
767
768 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
769 int size;
770
771 /* create temporary picture for rescaling output*/
772 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
773 buf2 = av_malloc(size);
774 if (!buf2) {
775 res = -1;
776 goto the_end;
777 }
778 resampled_picture = &picture_resample_temp;
779 avpicture_fill((AVPicture*)resampled_picture, buf2,
780 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
781
782 } else {
783 resampled_picture = &dst_pict;
784 }
785
786 /* ...and finally rescale!!! */
787 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
788 current_pix_fmt = PIX_FMT_YUV420P;
789 } else {
790 resampled_picture = &src_pict;
791 current_pix_fmt = ctx->src_pix_fmt;
792 }
793
794 if (current_pix_fmt != ctx->dst_pix_fmt) {
795 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
796 resampled_picture, current_pix_fmt,
797 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
798
799 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
800
801 res = -1;
802 goto the_end;
803 }
2793096f 804 } else if (resampled_picture != &dst_pict) {
636d6a4a 805 av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt,
2793096f 806 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
7b748aff
LA
807 }
808
809the_end:
810 av_free(buf1);
811 av_free(buf2);
812 return res;
813}
814
815
de6d9b64 816#ifdef TEST
13160c07 817#include <stdio.h>
ab6d194a 818
de6d9b64
FB
819/* input */
820#define XSIZE 256
821#define YSIZE 256
0c1a9eda 822uint8_t img[XSIZE * YSIZE];
de6d9b64
FB
823
824/* output */
825#define XSIZE1 512
826#define YSIZE1 512
0c1a9eda
ZK
827uint8_t img1[XSIZE1 * YSIZE1];
828uint8_t img2[XSIZE1 * YSIZE1];
de6d9b64 829
0c1a9eda 830void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
de6d9b64 831{
95ae72b7 832#undef fprintf
de6d9b64
FB
833 FILE *f;
834 f=fopen(filename,"w");
835 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
836 fwrite(img,1, xsize * ysize,f);
837 fclose(f);
95ae72b7 838#define fprintf please_use_av_log
de6d9b64
FB
839}
840
0c1a9eda 841static void dump_filter(int16_t *filter)
de6d9b64
FB
842{
843 int i, ph;
844
845 for(ph=0;ph<NB_PHASES;ph++) {
13160c07 846 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
de6d9b64 847 for(i=0;i<NB_TAPS;i++) {
13160c07 848 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
de6d9b64 849 }
13160c07 850 av_log(NULL, AV_LOG_INFO, "\n");
de6d9b64
FB
851 }
852}
853
980fc7b8 854#ifdef HAVE_MMX
6acce86b 855int mm_flags;
de6d9b64
FB
856#endif
857
858int main(int argc, char **argv)
859{
860 int x, y, v, i, xsize, ysize;
861 ImgReSampleContext *s;
862 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
863 char buf[256];
864
865 /* build test image */
866 for(y=0;y<YSIZE;y++) {
867 for(x=0;x<XSIZE;x++) {
868 if (x < XSIZE/2 && y < YSIZE/2) {
869 if (x < XSIZE/4 && y < YSIZE/4) {
870 if ((x % 10) <= 6 &&
871 (y % 10) <= 6)
872 v = 0xff;
873 else
874 v = 0x00;
875 } else if (x < XSIZE/4) {
115329f1 876 if (x & 1)
de6d9b64 877 v = 0xff;
115329f1 878 else
de6d9b64
FB
879 v = 0;
880 } else if (y < XSIZE/4) {
115329f1 881 if (y & 1)
de6d9b64 882 v = 0xff;
115329f1 883 else
de6d9b64
FB
884 v = 0;
885 } else {
886 if (y < YSIZE*3/8) {
115329f1 887 if ((y+x) & 1)
de6d9b64 888 v = 0xff;
115329f1 889 else
de6d9b64
FB
890 v = 0;
891 } else {
892 if (((x+3) % 4) <= 1 &&
893 ((y+3) % 4) <= 1)
894 v = 0xff;
895 else
896 v = 0x00;
897 }
898 }
899 } else if (x < XSIZE/2) {
900 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
901 } else if (y < XSIZE/2) {
902 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
903 } else {
904 v = ((x + y - XSIZE) * 255) / XSIZE;
905 }
ab6d194a 906 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
de6d9b64
FB
907 }
908 }
909 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
910 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
911 fact = factors[i];
912 xsize = (int)(XSIZE * fact);
ab6d194a 913 ysize = (int)((YSIZE - 100) * fact);
13160c07
PI
914 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
915 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
de6d9b64
FB
916 dump_filter(&s->h_filters[0][0]);
917 component_resample(s, img1, xsize, xsize, ysize,
ab6d194a 918 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
de6d9b64
FB
919 img_resample_close(s);
920
2fc8ea24 921 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
de6d9b64
FB
922 save_pgm(buf, img1, xsize, ysize);
923 }
924
925 /* mmx test */
980fc7b8 926#ifdef HAVE_MMX
13160c07 927 av_log(NULL, AV_LOG_INFO, "MMX test\n");
de6d9b64
FB
928 fact = 0.72;
929 xsize = (int)(XSIZE * fact);
930 ysize = (int)(YSIZE * fact);
931 mm_flags = MM_MMX;
932 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
933 component_resample(s, img1, xsize, xsize, ysize,
934 img, XSIZE, XSIZE, YSIZE);
935
936 mm_flags = 0;
937 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
938 component_resample(s, img2, xsize, xsize, ysize,
939 img, XSIZE, XSIZE, YSIZE);
940 if (memcmp(img1, img2, xsize * ysize) != 0) {
13160c07 941 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
de6d9b64
FB
942 exit(1);
943 }
13160c07 944 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
de6d9b64
FB
945#endif
946 return 0;
947}
948
949#endif