Implement sws_getCachedContext() in swscale emulation
[libav.git] / libavcodec / imgresample.c
CommitLineData
de6d9b64 1/*
115329f1 2 * High quality image resampling with polyphase filters
ff4ec49e 3 * Copyright (c) 2001 Fabrice Bellard.
de6d9b64 4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
ff4ec49e
FB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
de6d9b64 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
de6d9b64 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
de6d9b64 16 *
ff4ec49e 17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
de6d9b64 20 */
115329f1 21
983e3246
MN
22/**
23 * @file imgresample.c
24 * High quality image resampling with polyphase filters .
25 */
115329f1 26
de6d9b64 27#include "avcodec.h"
7b748aff 28#include "swscale.h"
6000abfa 29#include "dsputil.h"
de6d9b64 30
54329dd5 31#ifdef USE_FASTMEMCPY
f4bd289a 32#include "libvo/fastmemcpy.h"
54329dd5 33#endif
54329dd5 34
de6d9b64
FB
35#define NB_COMPONENTS 3
36
37#define PHASE_BITS 4
38#define NB_PHASES (1 << PHASE_BITS)
39#define NB_TAPS 4
40#define FCENTER 1 /* index of the center of the filter */
ab6d194a 41//#define TEST 1 /* Test it */
de6d9b64
FB
42
43#define POS_FRAC_BITS 16
44#define POS_FRAC (1 << POS_FRAC_BITS)
45/* 6 bits precision is needed for MMX */
46#define FILTER_BITS 8
47
48#define LINE_BUF_HEIGHT (NB_TAPS * 4)
49
50struct ImgReSampleContext {
1ff93ffc
TK
51 int iwidth, iheight, owidth, oheight;
52 int topBand, bottomBand, leftBand, rightBand;
53 int padtop, padbottom, padleft, padright;
54 int pad_owidth, pad_oheight;
de6d9b64 55 int h_incr, v_incr;
68b51e58
SH
56 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
57 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
0c1a9eda 58 uint8_t *line_buf;
de6d9b64
FB
59};
60
aaaf1635
MN
61void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
62
de6d9b64
FB
63static inline int get_phase(int pos)
64{
65 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
66}
67
68/* This function must be optimized */
da64ecc3 69static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08
DB
70 int src_width, int src_start, int src_incr,
71 int16_t *filters)
de6d9b64
FB
72{
73 int src_pos, phase, sum, i;
da64ecc3 74 const uint8_t *s;
0c1a9eda 75 int16_t *filter;
de6d9b64
FB
76
77 src_pos = src_start;
78 for(i=0;i<dst_width;i++) {
79#ifdef TEST
80 /* test */
81 if ((src_pos >> POS_FRAC_BITS) < 0 ||
82 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
02ac3136 83 av_abort();
de6d9b64
FB
84#endif
85 s = src + (src_pos >> POS_FRAC_BITS);
86 phase = get_phase(src_pos);
87 filter = filters + phase * NB_TAPS;
88#if NB_TAPS == 4
89 sum = s[0] * filter[0] +
90 s[1] * filter[1] +
91 s[2] * filter[2] +
92 s[3] * filter[3];
93#else
94 {
95 int j;
96 sum = 0;
97 for(j=0;j<NB_TAPS;j++)
98 sum += s[j] * filter[j];
99 }
100#endif
101 sum = sum >> FILTER_BITS;
102 if (sum < 0)
103 sum = 0;
104 else if (sum > 255)
105 sum = 255;
106 dst[0] = sum;
107 src_pos += src_incr;
108 dst++;
109 }
110}
111
112/* This function must be optimized */
da64ecc3 113static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 114 int wrap, int16_t *filter)
de6d9b64
FB
115{
116 int sum, i;
da64ecc3 117 const uint8_t *s;
de6d9b64
FB
118
119 s = src;
120 for(i=0;i<dst_width;i++) {
121#if NB_TAPS == 4
122 sum = s[0 * wrap] * filter[0] +
123 s[1 * wrap] * filter[1] +
124 s[2 * wrap] * filter[2] +
125 s[3 * wrap] * filter[3];
126#else
127 {
128 int j;
0c1a9eda 129 uint8_t *s1 = s;
de6d9b64
FB
130
131 sum = 0;
132 for(j=0;j<NB_TAPS;j++) {
133 sum += s1[0] * filter[j];
134 s1 += wrap;
135 }
136 }
137#endif
138 sum = sum >> FILTER_BITS;
139 if (sum < 0)
140 sum = 0;
141 else if (sum > 255)
142 sum = 255;
143 dst[0] = sum;
144 dst++;
145 s++;
146 }
147}
148
980fc7b8 149#ifdef HAVE_MMX
de6d9b64
FB
150
151#include "i386/mmx.h"
152
153#define FILTER4(reg) \
154{\
155 s = src + (src_pos >> POS_FRAC_BITS);\
156 phase = get_phase(src_pos);\
157 filter = filters + phase * NB_TAPS;\
158 movq_m2r(*s, reg);\
159 punpcklbw_r2r(mm7, reg);\
160 movq_m2r(*filter, mm6);\
161 pmaddwd_r2r(reg, mm6);\
162 movq_r2r(mm6, reg);\
163 psrlq_i2r(32, reg);\
164 paddd_r2r(mm6, reg);\
165 psrad_i2r(FILTER_BITS, reg);\
166 src_pos += src_incr;\
167}
168
169#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
170
171/* XXX: do four pixels at a time */
da64ecc3 172static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
bb270c08 173 const uint8_t *src, int src_width,
0c1a9eda 174 int src_start, int src_incr, int16_t *filters)
de6d9b64
FB
175{
176 int src_pos, phase;
da64ecc3 177 const uint8_t *s;
0c1a9eda 178 int16_t *filter;
de6d9b64 179 mmx_t tmp;
115329f1 180
de6d9b64
FB
181 src_pos = src_start;
182 pxor_r2r(mm7, mm7);
183
184 while (dst_width >= 4) {
185
186 FILTER4(mm0);
187 FILTER4(mm1);
188 FILTER4(mm2);
189 FILTER4(mm3);
190
191 packuswb_r2r(mm7, mm0);
192 packuswb_r2r(mm7, mm1);
193 packuswb_r2r(mm7, mm3);
194 packuswb_r2r(mm7, mm2);
195 movq_r2m(mm0, tmp);
196 dst[0] = tmp.ub[0];
197 movq_r2m(mm1, tmp);
198 dst[1] = tmp.ub[0];
199 movq_r2m(mm2, tmp);
200 dst[2] = tmp.ub[0];
201 movq_r2m(mm3, tmp);
202 dst[3] = tmp.ub[0];
203 dst += 4;
204 dst_width -= 4;
205 }
206 while (dst_width > 0) {
207 FILTER4(mm0);
208 packuswb_r2r(mm7, mm0);
209 movq_r2m(mm0, tmp);
210 dst[0] = tmp.ub[0];
211 dst++;
212 dst_width--;
213 }
214 emms();
215}
216
da64ecc3 217static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 218 int wrap, int16_t *filter)
de6d9b64
FB
219{
220 int sum, i, v;
da64ecc3 221 const uint8_t *s;
de6d9b64
FB
222 mmx_t tmp;
223 mmx_t coefs[4];
115329f1 224
de6d9b64
FB
225 for(i=0;i<4;i++) {
226 v = filter[i];
227 coefs[i].uw[0] = v;
228 coefs[i].uw[1] = v;
229 coefs[i].uw[2] = v;
230 coefs[i].uw[3] = v;
231 }
115329f1 232
de6d9b64
FB
233 pxor_r2r(mm7, mm7);
234 s = src;
235 while (dst_width >= 4) {
236 movq_m2r(s[0 * wrap], mm0);
237 punpcklbw_r2r(mm7, mm0);
238 movq_m2r(s[1 * wrap], mm1);
239 punpcklbw_r2r(mm7, mm1);
240 movq_m2r(s[2 * wrap], mm2);
241 punpcklbw_r2r(mm7, mm2);
242 movq_m2r(s[3 * wrap], mm3);
243 punpcklbw_r2r(mm7, mm3);
244
245 pmullw_m2r(coefs[0], mm0);
246 pmullw_m2r(coefs[1], mm1);
247 pmullw_m2r(coefs[2], mm2);
248 pmullw_m2r(coefs[3], mm3);
249
250 paddw_r2r(mm1, mm0);
251 paddw_r2r(mm3, mm2);
252 paddw_r2r(mm2, mm0);
253 psraw_i2r(FILTER_BITS, mm0);
115329f1 254
de6d9b64
FB
255 packuswb_r2r(mm7, mm0);
256 movq_r2m(mm0, tmp);
257
0c1a9eda 258 *(uint32_t *)dst = tmp.ud[0];
de6d9b64
FB
259 dst += 4;
260 s += 4;
261 dst_width -= 4;
262 }
263 while (dst_width > 0) {
264 sum = s[0 * wrap] * filter[0] +
265 s[1 * wrap] * filter[1] +
266 s[2 * wrap] * filter[2] +
267 s[3 * wrap] * filter[3];
268 sum = sum >> FILTER_BITS;
269 if (sum < 0)
270 sum = 0;
271 else if (sum > 255)
272 sum = 255;
273 dst[0] = sum;
274 dst++;
275 s++;
276 dst_width--;
277 }
278 emms();
279}
280#endif
281
404d2241 282#ifdef HAVE_ALTIVEC
bb270c08 283typedef union {
404d2241
BF
284 vector unsigned char v;
285 unsigned char c[16];
286} vec_uc_t;
287
bb270c08 288typedef union {
404d2241
BF
289 vector signed short v;
290 signed short s[8];
291} vec_ss_t;
292
da64ecc3 293void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 294 int wrap, int16_t *filter)
404d2241
BF
295{
296 int sum, i;
da64ecc3 297 const uint8_t *s;
404d2241
BF
298 vector unsigned char *tv, tmp, dstv, zero;
299 vec_ss_t srchv[4], srclv[4], fv[4];
115329f1 300 vector signed short zeros, sumhv, sumlv;
404d2241
BF
301 s = src;
302
303 for(i=0;i<4;i++)
304 {
305 /*
306 The vec_madds later on does an implicit >>15 on the result.
307 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
308 a signed short, we have just enough bits to pre-shift our
309 filter constants <<7 to compensate for vec_madds.
310 */
311 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
312 fv[i].v = vec_splat(fv[i].v, 0);
313 }
115329f1 314
404d2241
BF
315 zero = vec_splat_u8(0);
316 zeros = vec_splat_s16(0);
317
318
319 /*
320 When we're resampling, we'd ideally like both our input buffers,
321 and output buffers to be 16-byte aligned, so we can do both aligned
322 reads and writes. Sadly we can't always have this at the moment, so
323 we opt for aligned writes, as unaligned writes have a huge overhead.
324 To do this, do enough scalar resamples to get dst 16-byte aligned.
325 */
9e4e1659 326 i = (-(int)dst) & 0xf;
404d2241
BF
327 while(i>0) {
328 sum = s[0 * wrap] * filter[0] +
329 s[1 * wrap] * filter[1] +
330 s[2 * wrap] * filter[2] +
331 s[3 * wrap] * filter[3];
332 sum = sum >> FILTER_BITS;
333 if (sum<0) sum = 0; else if (sum>255) sum=255;
334 dst[0] = sum;
335 dst++;
336 s++;
337 dst_width--;
338 i--;
339 }
115329f1 340
404d2241
BF
341 /* Do our altivec resampling on 16 pixels at once. */
342 while(dst_width>=16) {
343 /*
344 Read 16 (potentially unaligned) bytes from each of
345 4 lines into 4 vectors, and split them into shorts.
346 Interleave the multipy/accumulate for the resample
347 filter with the loads to hide the 3 cycle latency
348 the vec_madds have.
349 */
350 tv = (vector unsigned char *) &s[0 * wrap];
351 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
352 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
353 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
354 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
355 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
356
357 tv = (vector unsigned char *) &s[1 * wrap];
358 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
359 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
360 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
361 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
362 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
363
364 tv = (vector unsigned char *) &s[2 * wrap];
365 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
366 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
367 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
368 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
369 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
370
371 tv = (vector unsigned char *) &s[3 * wrap];
372 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
373 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
374 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
375 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
376 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
115329f1 377
404d2241
BF
378 /*
379 Pack the results into our destination vector,
380 and do an aligned write of that back to memory.
381 */
382 dstv = vec_packsu(sumhv, sumlv) ;
383 vec_st(dstv, 0, (vector unsigned char *) dst);
115329f1 384
404d2241
BF
385 dst+=16;
386 s+=16;
387 dst_width-=16;
388 }
389
390 /*
391 If there are any leftover pixels, resample them
392 with the slow scalar method.
393 */
394 while(dst_width>0) {
395 sum = s[0 * wrap] * filter[0] +
396 s[1 * wrap] * filter[1] +
397 s[2 * wrap] * filter[2] +
398 s[3 * wrap] * filter[3];
399 sum = sum >> FILTER_BITS;
400 if (sum<0) sum = 0; else if (sum>255) sum=255;
401 dst[0] = sum;
402 dst++;
403 s++;
404 dst_width--;
405 }
406}
407#endif
408
de6d9b64 409/* slow version to handle limit cases. Does not need optimisation */
da64ecc3 410static void h_resample_slow(uint8_t *dst, int dst_width,
bb270c08 411 const uint8_t *src, int src_width,
0c1a9eda 412 int src_start, int src_incr, int16_t *filters)
de6d9b64
FB
413{
414 int src_pos, phase, sum, j, v, i;
da64ecc3 415 const uint8_t *s, *src_end;
0c1a9eda 416 int16_t *filter;
de6d9b64
FB
417
418 src_end = src + src_width;
419 src_pos = src_start;
420 for(i=0;i<dst_width;i++) {
421 s = src + (src_pos >> POS_FRAC_BITS);
422 phase = get_phase(src_pos);
423 filter = filters + phase * NB_TAPS;
424 sum = 0;
425 for(j=0;j<NB_TAPS;j++) {
426 if (s < src)
427 v = src[0];
428 else if (s >= src_end)
429 v = src_end[-1];
430 else
431 v = s[0];
432 sum += v * filter[j];
433 s++;
434 }
435 sum = sum >> FILTER_BITS;
436 if (sum < 0)
437 sum = 0;
438 else if (sum > 255)
439 sum = 255;
440 dst[0] = sum;
441 src_pos += src_incr;
442 dst++;
443 }
444}
445
da64ecc3 446static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08
DB
447 int src_width, int src_start, int src_incr,
448 int16_t *filters)
de6d9b64
FB
449{
450 int n, src_end;
451
452 if (src_start < 0) {
453 n = (0 - src_start + src_incr - 1) / src_incr;
454 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
455 dst += n;
456 dst_width -= n;
457 src_start += n * src_incr;
458 }
459 src_end = src_start + dst_width * src_incr;
460 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
115329f1 461 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
de6d9b64
FB
462 src_incr;
463 } else {
464 n = dst_width;
465 }
980fc7b8 466#ifdef HAVE_MMX
de6d9b64 467 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
115329f1 468 h_resample_fast4_mmx(dst, n,
de6d9b64
FB
469 src, src_width, src_start, src_incr, filters);
470 else
471#endif
115329f1 472 h_resample_fast(dst, n,
de6d9b64
FB
473 src, src_width, src_start, src_incr, filters);
474 if (n < dst_width) {
475 dst += n;
476 dst_width -= n;
477 src_start += n * src_incr;
115329f1 478 h_resample_slow(dst, dst_width,
de6d9b64
FB
479 src, src_width, src_start, src_incr, filters);
480 }
481}
482
115329f1 483static void component_resample(ImgReSampleContext *s,
0c1a9eda
ZK
484 uint8_t *output, int owrap, int owidth, int oheight,
485 uint8_t *input, int iwrap, int iwidth, int iheight)
de6d9b64
FB
486{
487 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
0c1a9eda 488 uint8_t *new_line, *src_line;
de6d9b64
FB
489
490 last_src_y = - FCENTER - 1;
491 /* position of the bottom of the filter in the source image */
115329f1 492 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
de6d9b64
FB
493 ring_y = NB_TAPS; /* position in ring buffer */
494 for(y=0;y<oheight;y++) {
495 /* apply horizontal filter on new lines from input if needed */
496 src_y1 = src_y >> POS_FRAC_BITS;
497 while (last_src_y < src_y1) {
498 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
499 ring_y = NB_TAPS;
500 last_src_y++;
ab6d194a
MN
501 /* handle limit conditions : replicate line (slightly
502 inefficient because we filter multiple times) */
de6d9b64
FB
503 y1 = last_src_y;
504 if (y1 < 0) {
505 y1 = 0;
506 } else if (y1 >= iheight) {
507 y1 = iheight - 1;
508 }
509 src_line = input + y1 * iwrap;
510 new_line = s->line_buf + ring_y * owidth;
511 /* apply filter and handle limit cases correctly */
115329f1
DB
512 h_resample(new_line, owidth,
513 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
de6d9b64
FB
514 &s->h_filters[0][0]);
515 /* handle ring buffer wraping */
516 if (ring_y >= LINE_BUF_HEIGHT) {
517 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
518 new_line, owidth);
519 }
520 }
521 /* apply vertical filter */
522 phase_y = get_phase(src_y);
980fc7b8 523#ifdef HAVE_MMX
de6d9b64
FB
524 /* desactivated MMX because loss of precision */
525 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
115329f1
DB
526 v_resample4_mmx(output, owidth,
527 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
de6d9b64 528 &s->v_filters[phase_y][0]);
404d2241
BF
529 else
530#endif
531#ifdef HAVE_ALTIVEC
00a7d8d6 532 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
404d2241
BF
533 v_resample16_altivec(output, owidth,
534 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
535 &s->v_filters[phase_y][0]);
de6d9b64
FB
536 else
537#endif
115329f1
DB
538 v_resample(output, owidth,
539 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
de6d9b64 540 &s->v_filters[phase_y][0]);
115329f1 541
de6d9b64 542 src_y += s->v_incr;
115329f1 543
de6d9b64
FB
544 output += owrap;
545 }
546}
547
de6d9b64
FB
548ImgReSampleContext *img_resample_init(int owidth, int oheight,
549 int iwidth, int iheight)
550{
115329f1 551 return img_resample_full_init(owidth, oheight, iwidth, iheight,
1ff93ffc 552 0, 0, 0, 0, 0, 0, 0, 0);
ab6d194a
MN
553}
554
555ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
556 int iwidth, int iheight,
557 int topBand, int bottomBand,
1ff93ffc
TK
558 int leftBand, int rightBand,
559 int padtop, int padbottom,
560 int padleft, int padright)
ab6d194a 561{
de6d9b64
FB
562 ImgReSampleContext *s;
563
d10dc616 564 if (!owidth || !oheight || !iwidth || !iheight)
bb270c08 565 return NULL;
d10dc616 566
de6d9b64
FB
567 s = av_mallocz(sizeof(ImgReSampleContext));
568 if (!s)
569 return NULL;
0ecca7a4
MN
570 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
571 return NULL;
de6d9b64 572 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
115329f1 573 if (!s->line_buf)
de6d9b64 574 goto fail;
115329f1 575
de6d9b64
FB
576 s->owidth = owidth;
577 s->oheight = oheight;
578 s->iwidth = iwidth;
579 s->iheight = iheight;
115329f1 580
ab6d194a
MN
581 s->topBand = topBand;
582 s->bottomBand = bottomBand;
583 s->leftBand = leftBand;
584 s->rightBand = rightBand;
115329f1 585
1ff93ffc
TK
586 s->padtop = padtop;
587 s->padbottom = padbottom;
588 s->padleft = padleft;
589 s->padright = padright;
590
591 s->pad_owidth = owidth - (padleft + padright);
592 s->pad_oheight = oheight - (padtop + padbottom);
593
594 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
115329f1 595 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
1ff93ffc 596
115329f1 597 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
aaaf1635 598 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
115329f1 599 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
aaaf1635 600 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
de6d9b64
FB
601
602 return s;
1ff93ffc 603fail:
6000abfa 604 av_free(s);
de6d9b64
FB
605 return NULL;
606}
607
115329f1 608void img_resample(ImgReSampleContext *s,
da64ecc3 609 AVPicture *output, const AVPicture *input)
de6d9b64
FB
610{
611 int i, shift;
1ff93ffc 612 uint8_t* optr;
de6d9b64 613
1ff93ffc 614 for (i=0;i<3;i++) {
de6d9b64 615 shift = (i == 0) ? 0 : 1;
1ff93ffc 616
115329f1 617 optr = output->data[i] + (((output->linesize[i] *
1ff93ffc
TK
618 s->padtop) + s->padleft) >> shift);
619
115329f1 620 component_resample(s, optr, output->linesize[i],
1ff93ffc 621 s->pad_owidth >> shift, s->pad_oheight >> shift,
115329f1 622 input->data[i] + (input->linesize[i] *
1ff93ffc 623 (s->topBand >> shift)) + (s->leftBand >> shift),
115329f1 624 input->linesize[i], ((s->iwidth - s->leftBand -
1ff93ffc 625 s->rightBand) >> shift),
ab6d194a 626 (s->iheight - s->topBand - s->bottomBand) >> shift);
de6d9b64
FB
627 }
628}
629
630void img_resample_close(ImgReSampleContext *s)
631{
6000abfa
FB
632 av_free(s->line_buf);
633 av_free(s);
de6d9b64
FB
634}
635
7b748aff
LA
636struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
637 int dstW, int dstH, int dstFormat,
638 int flags, SwsFilter *srcFilter,
639 SwsFilter *dstFilter, double *param)
640{
641 struct SwsContext *ctx;
642
643 ctx = av_malloc(sizeof(struct SwsContext));
644 if (ctx == NULL) {
645 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
646
647 return NULL;
648 }
649
650 if ((srcH != dstH) || (srcW != dstW)) {
651 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
652 av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
653 }
654 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
655 } else {
656 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
657 ctx->resampling_ctx->iheight = srcH;
658 ctx->resampling_ctx->iwidth = srcW;
659 ctx->resampling_ctx->oheight = dstH;
660 ctx->resampling_ctx->owidth = dstW;
661 }
662 ctx->src_pix_fmt = srcFormat;
663 ctx->dst_pix_fmt = dstFormat;
664
665 return ctx;
666}
667
668void sws_freeContext(struct SwsContext *ctx)
669{
670 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
671 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
672 img_resample_close(ctx->resampling_ctx);
673 } else {
674 av_free(ctx->resampling_ctx);
675 }
676 av_free(ctx);
677}
678
96db3808
LA
679
680/**
681 * Checks if context is valid or reallocs a new one instead.
682 * If context is NULL, just calls sws_getContext() to get a new one.
683 * Otherwise, checks if the parameters are the same already saved in context.
684 * If that is the case, returns the current context.
685 * Otherwise, frees context and gets a new one.
686 *
687 * Be warned that srcFilter, dstFilter are not checked, they are
688 * asumed to remain valid.
689 */
690struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
691 int srcW, int srcH, int srcFormat,
692 int dstW, int dstH, int dstFormat, int flags,
693 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
694{
695 if (ctx != NULL) {
696 if ((ctx->resampling_ctx->iwidth != srcW) ||
697 (ctx->resampling_ctx->iheight != srcH) ||
698 (ctx->src_pix_fmt != srcFormat) ||
699 (ctx->resampling_ctx->owidth != dstW) ||
700 (ctx->resampling_ctx->oheight != dstH) ||
701 (ctx->dst_pix_fmt != dstFormat))
702 {
703 sws_freeContext(ctx);
704 ctx = NULL;
705 }
706 }
707 if (ctx == NULL) {
708 return sws_getContext(srcW, srcH, srcFormat,
709 dstW, dstH, dstFormat, flags,
710 srcFilter, dstFilter, param);
711 }
712 return ctx;
713}
714
7b748aff
LA
715int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
716 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
717{
718 AVPicture src_pict, dst_pict;
719 int i, res = 0;
720 AVPicture picture_format_temp;
721 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
722 uint8_t *buf1 = NULL, *buf2 = NULL;
723 enum PixelFormat current_pix_fmt;
724
725 for (i = 0; i < 3; i++) {
726 src_pict.data[i] = src[i];
727 src_pict.linesize[i] = srcStride[i];
728 dst_pict.data[i] = dst[i];
729 dst_pict.linesize[i] = dstStride[i];
730 }
731 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
732 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
733 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
734
735 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
736 int size;
737
738 /* create temporary picture for rescaling input*/
739 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
740 buf1 = av_malloc(size);
741 if (!buf1) {
742 res = -1;
743 goto the_end;
744 }
745 formatted_picture = &picture_format_temp;
746 avpicture_fill((AVPicture*)formatted_picture, buf1,
747 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
748
749 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
750 &src_pict, ctx->src_pix_fmt,
751 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
752
753 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
754 res = -1;
755 goto the_end;
756 }
757 } else {
758 formatted_picture = &src_pict;
759 }
760
761 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
762 int size;
763
764 /* create temporary picture for rescaling output*/
765 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
766 buf2 = av_malloc(size);
767 if (!buf2) {
768 res = -1;
769 goto the_end;
770 }
771 resampled_picture = &picture_resample_temp;
772 avpicture_fill((AVPicture*)resampled_picture, buf2,
773 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
774
775 } else {
776 resampled_picture = &dst_pict;
777 }
778
779 /* ...and finally rescale!!! */
780 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
781 current_pix_fmt = PIX_FMT_YUV420P;
782 } else {
783 resampled_picture = &src_pict;
784 current_pix_fmt = ctx->src_pix_fmt;
785 }
786
787 if (current_pix_fmt != ctx->dst_pix_fmt) {
788 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
789 resampled_picture, current_pix_fmt,
790 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
791
792 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
793
794 res = -1;
795 goto the_end;
796 }
2793096f
LA
797 } else if (resampled_picture != &dst_pict) {
798 img_copy(&dst_pict, resampled_picture, current_pix_fmt,
799 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
7b748aff
LA
800 }
801
802the_end:
803 av_free(buf1);
804 av_free(buf2);
805 return res;
806}
807
808
de6d9b64 809#ifdef TEST
13160c07 810#include <stdio.h>
ab6d194a 811
de6d9b64
FB
812/* input */
813#define XSIZE 256
814#define YSIZE 256
0c1a9eda 815uint8_t img[XSIZE * YSIZE];
de6d9b64
FB
816
817/* output */
818#define XSIZE1 512
819#define YSIZE1 512
0c1a9eda
ZK
820uint8_t img1[XSIZE1 * YSIZE1];
821uint8_t img2[XSIZE1 * YSIZE1];
de6d9b64 822
0c1a9eda 823void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
de6d9b64 824{
95ae72b7 825#undef fprintf
de6d9b64
FB
826 FILE *f;
827 f=fopen(filename,"w");
828 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
829 fwrite(img,1, xsize * ysize,f);
830 fclose(f);
95ae72b7 831#define fprintf please_use_av_log
de6d9b64
FB
832}
833
0c1a9eda 834static void dump_filter(int16_t *filter)
de6d9b64
FB
835{
836 int i, ph;
837
838 for(ph=0;ph<NB_PHASES;ph++) {
13160c07 839 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
de6d9b64 840 for(i=0;i<NB_TAPS;i++) {
13160c07 841 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
de6d9b64 842 }
13160c07 843 av_log(NULL, AV_LOG_INFO, "\n");
de6d9b64
FB
844 }
845}
846
980fc7b8 847#ifdef HAVE_MMX
6acce86b 848int mm_flags;
de6d9b64
FB
849#endif
850
851int main(int argc, char **argv)
852{
853 int x, y, v, i, xsize, ysize;
854 ImgReSampleContext *s;
855 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
856 char buf[256];
857
858 /* build test image */
859 for(y=0;y<YSIZE;y++) {
860 for(x=0;x<XSIZE;x++) {
861 if (x < XSIZE/2 && y < YSIZE/2) {
862 if (x < XSIZE/4 && y < YSIZE/4) {
863 if ((x % 10) <= 6 &&
864 (y % 10) <= 6)
865 v = 0xff;
866 else
867 v = 0x00;
868 } else if (x < XSIZE/4) {
115329f1 869 if (x & 1)
de6d9b64 870 v = 0xff;
115329f1 871 else
de6d9b64
FB
872 v = 0;
873 } else if (y < XSIZE/4) {
115329f1 874 if (y & 1)
de6d9b64 875 v = 0xff;
115329f1 876 else
de6d9b64
FB
877 v = 0;
878 } else {
879 if (y < YSIZE*3/8) {
115329f1 880 if ((y+x) & 1)
de6d9b64 881 v = 0xff;
115329f1 882 else
de6d9b64
FB
883 v = 0;
884 } else {
885 if (((x+3) % 4) <= 1 &&
886 ((y+3) % 4) <= 1)
887 v = 0xff;
888 else
889 v = 0x00;
890 }
891 }
892 } else if (x < XSIZE/2) {
893 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
894 } else if (y < XSIZE/2) {
895 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
896 } else {
897 v = ((x + y - XSIZE) * 255) / XSIZE;
898 }
ab6d194a 899 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
de6d9b64
FB
900 }
901 }
902 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
903 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
904 fact = factors[i];
905 xsize = (int)(XSIZE * fact);
ab6d194a 906 ysize = (int)((YSIZE - 100) * fact);
13160c07
PI
907 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
908 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
de6d9b64
FB
909 dump_filter(&s->h_filters[0][0]);
910 component_resample(s, img1, xsize, xsize, ysize,
ab6d194a 911 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
de6d9b64
FB
912 img_resample_close(s);
913
2fc8ea24 914 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
de6d9b64
FB
915 save_pgm(buf, img1, xsize, ysize);
916 }
917
918 /* mmx test */
980fc7b8 919#ifdef HAVE_MMX
13160c07 920 av_log(NULL, AV_LOG_INFO, "MMX test\n");
de6d9b64
FB
921 fact = 0.72;
922 xsize = (int)(XSIZE * fact);
923 ysize = (int)(YSIZE * fact);
924 mm_flags = MM_MMX;
925 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
926 component_resample(s, img1, xsize, xsize, ysize,
927 img, XSIZE, XSIZE, YSIZE);
928
929 mm_flags = 0;
930 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
931 component_resample(s, img2, xsize, xsize, ysize,
932 img, XSIZE, XSIZE, YSIZE);
933 if (memcmp(img1, img2, xsize * ysize) != 0) {
13160c07 934 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
de6d9b64
FB
935 exit(1);
936 }
13160c07 937 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
de6d9b64
FB
938#endif
939 return 0;
940}
941
942#endif